UFO
TemporalThinning.cc
Go to the documentation of this file.
1 /*
2  * (C) Copyright 2019 Met Office UK
3  *
4  * This software is licensed under the terms of the Apache Licence Version 2.0
5  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6  */
7 
9 
10 #include <algorithm>
11 #include <cmath>
12 #include <functional>
13 #include <string>
14 #include <utility>
15 #include <vector>
16 
17 #include "eckit/config/Configuration.h"
18 #include "ioda/ObsDataVector.h"
19 #include "ioda/ObsSpace.h"
20 #include "oops/base/Variables.h"
21 #include "oops/util/DateTime.h"
22 #include "oops/util/Duration.h"
23 #include "oops/util/Logger.h"
27 
28 namespace ufo {
29 
30 namespace {
31 
32 struct Observation {
33  size_t id;
34  util::DateTime time;
35  int priority = 0;
36 };
37 
38 /// \brief Responsible for the selection of observations to retain.
40  public:
41  TemporalThinner(const std::vector<size_t> &validObsIds,
42  const std::vector<util::DateTime> &times,
43  const std::vector<int> *priorities,
44  const RecursiveSplitter &splitter,
45  const TemporalThinningParameters &options);
46 
47  std::vector<bool> identifyThinnedObservations(size_t totalNumObservations) const;
48 
49  private:
50  typedef std::vector<size_t>::const_iterator ForwardValidObsIndexIterator;
51  typedef std::vector<size_t>::const_reverse_iterator BackwardValidObsIndexIterator;
52 
53  Observation getObservation(size_t validObsIndex) const {
54  Observation obs;
55  obs.id = validObsIds_[validObsIndex];
56  obs.time = times_[obs.id];
57  if (priorities_)
58  obs.priority = (*priorities_)[obs.id];
59  return obs;
60  }
61 
62  util::DateTime getTime(size_t validObsIndex) const {
63  return times_[validObsIds_[validObsIndex]];
64  }
65 
66  int getPriority(size_t validObsIndex) const {
67  ASSERT(priorities_ != nullptr);
68  return (*priorities_)[validObsIds_[validObsIndex]];
69  }
70 
71  bool hasPriorities() const {
72  return priorities_ != nullptr;
73  }
74 
75  /// Thin the valid observations with indices from the specified range. The first observation
76  /// to retain must be taken at or after \p deadline.
77  void thinRangeForwards(ForwardValidObsIndexIterator validIndicesBegin,
78  ForwardValidObsIndexIterator validIndicesEnd,
79  util::DateTime deadline,
80  std::vector<bool> &isThinned) const;
81 
82  /// Thin the valid observations with indices from the specified range. The first observation
83  /// to retain must be taken at or before \p deadline.
84  void thinRangeBackwards(BackwardValidObsIndexIterator validIndicesBegin,
85  BackwardValidObsIndexIterator validIndicesEnd,
86  util::DateTime deadline,
87  std::vector<bool> &isThinned) const;
88 
89  /// \brief Common implementation shared by thinRangeForwards() and thinRangeBackwards().
90  ///
91  /// \tparam Iterator
92  /// Iterator visiting indices of observations in chronological (reverse chronological) order
93  /// when thinning forwards (backwards).
94  /// \tparam IsPast
95  /// Binary functor taking two parameters of type util::DateTime and returning true if and only
96  /// if the first argument is later (earlier) than the second when thinning forwards (backwards).
97  /// \tparam IsAtOrPast
98  /// Binary functor taking two parameters of type util::DateTime and returning true if and only
99  /// if the first argument is later (earlier) than or equal to the second when thinning forwards
100  /// (backwards).
101  /// \tparam Advance
102  /// Binary functor taking two parameters of types util::DateTime and util::Duration and
103  /// returning the datetime obtained by adding (subtracting) the second argument to (from) the
104  /// first argument when thinning forwards (backwards).
105  template <typename Iterator, typename IsPast, typename IsAtOrPast, typename Advance>
106  void thinRange(Iterator validIndicesBegin,
107  Iterator validIndicesEnd,
108  util::DateTime deadline,
109  IsPast isPast,
110  IsAtOrPast isAtOrPast,
111  Advance advance,
112  std::vector<bool> &isThinned) const;
113 
114  /// Return an iterator to the first observation to be retained.
115  ForwardValidObsIndexIterator findSeed(
116  ForwardValidObsIndexIterator validObsIndicesBegin,
117  ForwardValidObsIndexIterator validObsIndicesEnd,
118  util::DateTime seedTime) const;
119 
120  /// Return an iterator to the valid observation taken at a time closest to \p targetTime.
121  /// In case of a tie, the later (more recent) observation is selected.
122  ForwardValidObsIndexIterator findNearest(
123  ForwardValidObsIndexIterator validObsIndicesBegin,
124  ForwardValidObsIndexIterator validObsIndicesEnd,
125  util::DateTime targetTime) const;
126 
127  private:
128  const std::vector<size_t> &validObsIds_;
129  const std::vector<util::DateTime> &times_;
130  const std::vector<int> *priorities_;
133 };
134 
135 TemporalThinner::TemporalThinner(const std::vector<size_t> &validObsIds,
136  const std::vector<util::DateTime> &times,
137  const std::vector<int> *priorities,
138  const RecursiveSplitter &splitter,
139  const TemporalThinningParameters &options) :
140  validObsIds_(validObsIds),
141  times_(times),
142  priorities_(priorities),
143  splitter_(splitter),
144  options_(options)
145 {}
146 
147 std::vector<bool> TemporalThinner::identifyThinnedObservations(size_t totalNumObservations) const {
148  std::vector<bool> isThinned(totalNumObservations, false);
149 
151  if (options_.seedTime.value() == boost::none) {
152  util::DateTime deadline = getTime(*group.begin());
153  thinRangeForwards(group.begin(), group.end(), deadline, isThinned);
154  } else {
155  const ForwardValidObsIndexIterator seedIt = findSeed(
156  group.begin(), group.end(), *options_.seedTime.value());
157  Observation seed = getObservation(*seedIt);
158  {
159  util::DateTime deadline = seed.time + options_.minSpacing;
160  thinRangeForwards(seedIt + 1, group.end(), deadline, isThinned);
161  }
162  {
163  const BackwardValidObsIndexIterator seedRevIt(seedIt + 1);
164  const BackwardValidObsIndexIterator revEnd(group.begin());
165  util::DateTime deadline = seed.time - options_.minSpacing;
166  thinRangeBackwards(seedRevIt, revEnd, seed.time, isThinned);
167  }
168  }
169  }
170 
171  return isThinned;
172 }
173 
175  ForwardValidObsIndexIterator validIndicesEnd,
176  util::DateTime deadline,
177  std::vector<bool> &isThinned) const {
178  thinRange(validIndicesBegin, validIndicesEnd, deadline,
179  std::greater<util::DateTime>(),
180  std::greater_equal<util::DateTime>(),
181  // Unfortunately in C++11 std::plus requires both arguments to be of the same type
182  [](const util::DateTime &time, const util::Duration &offset) { return time + offset; },
183  isThinned);
184 }
185 
187  BackwardValidObsIndexIterator validIndicesBegin,
188  BackwardValidObsIndexIterator validIndicesEnd,
189  util::DateTime deadline,
190  std::vector<bool> &isThinned) const {
191  thinRange(validIndicesBegin, validIndicesEnd, deadline,
192  std::less<util::DateTime>(),
193  std::less_equal<util::DateTime>(),
194  [](const util::DateTime &time, const util::Duration &offset) { return time - offset; },
195  isThinned);
196 }
197 
198 template <typename Iterator, typename IsPast, typename IsAtOrPast, typename Advance>
199 void TemporalThinner::thinRange(Iterator validIndicesBegin,
200  Iterator validIndicesEnd,
201  util::DateTime deadline,
202  IsPast isPast,
203  IsAtOrPast isAtOrPast,
204  Advance advance,
205  std::vector<bool> &isThinned) const {
206  boost::optional<Observation> best;
207  for (Iterator it = validIndicesBegin; it != validIndicesEnd; ++it) {
208  const size_t validObsIndex = *it;
209  Observation current = getObservation(validObsIndex);
210  if (best != boost::none) {
211  // We're looking for a higher-priority observation at or before the deadline
212  if (isPast(current.time, deadline)) {
213  // We haven't found one
214  deadline = advance(best->time, options_.minSpacing);
215  best = boost::none;
216  // The decision whether to thin 'current' will be taken in the next if statement
217  } else {
218  if (current.priority > best->priority) {
219  isThinned[best->id] = true;
220  best = current;
221  } else {
222  isThinned[current.id] = true;
223  }
224  }
225  }
226 
227  if (best == boost::none) {
228  // We're looking for an observation at or after the deadline
229  if (isAtOrPast(current.time, deadline)) {
230  best = current;
231  deadline = advance(best->time, options_.tolerance);
232  } else {
233  isThinned[current.id] = true;
234  }
235  }
236  }
237 }
238 
240  ForwardValidObsIndexIterator validObsIndicesBegin,
241  ForwardValidObsIndexIterator validObsIndicesEnd,
242  util::DateTime seedTime) const {
243  const ForwardValidObsIndexIterator nearestToSeedIt = findNearest(
244  validObsIndicesBegin, validObsIndicesEnd, seedTime);
245  if (!hasPriorities()) {
246  return nearestToSeedIt;
247  }
248 
249  util::DateTime nearestToSeedTime = getObservation(*nearestToSeedIt).time;
250 
251  ForwardValidObsIndexIterator acceptableBegin = std::lower_bound(
252  validObsIndicesBegin, nearestToSeedIt,
253  nearestToSeedTime - options_.tolerance,
254  [&](size_t validObsIndexA, const util::DateTime &timeB)
255  { return getTime(validObsIndexA) < timeB; });
256  ForwardValidObsIndexIterator acceptableEnd = std::upper_bound(
257  acceptableBegin, validObsIndicesEnd,
258  nearestToSeedTime + options_.tolerance,
259  [&](const util::DateTime &timeA, size_t validObsIndexB)
260  { return timeA < getTime(validObsIndexB); });
261 
262  // Find the element with highest priority in the acceptable range.
263  return std::max_element(acceptableBegin, acceptableEnd,
264  [&](size_t validObsIndexA, size_t validObsIndexB)
265  { return getPriority(validObsIndexA) < getPriority(validObsIndexB); });
266 }
267 
269  ForwardValidObsIndexIterator validObsIndicesBegin,
270  ForwardValidObsIndexIterator validObsIndicesEnd,
271  util::DateTime targetTime) const {
272  ASSERT_MSG(validObsIndicesEnd - validObsIndicesBegin != 0,
273  "The range of observation indices must not be empty");
274 
275  auto isEarlierThan = [&](size_t validObsIndexA, const util::DateTime &timeB) {
276  return getTime(validObsIndexA) < timeB;
277  };
278  const ForwardValidObsIndexIterator firstGreaterOrEqualToTargetIt =
279  std::lower_bound(validObsIndicesBegin, validObsIndicesEnd, targetTime, isEarlierThan);
280  if (firstGreaterOrEqualToTargetIt == validObsIndicesBegin) {
281  return firstGreaterOrEqualToTargetIt;
282  }
283  if (firstGreaterOrEqualToTargetIt == validObsIndicesEnd) {
284  // All observations were taken before targetTime. Return the iterator pointing to the
285  // last valid element of the range.
286  return validObsIndicesEnd - 1;
287  }
288 
289  const ForwardValidObsIndexIterator lastLessThanTargetIt =
290  firstGreaterOrEqualToTargetIt - 1;
291 
292  Observation firstGreaterOrEqualToTarget = getObservation(*firstGreaterOrEqualToTargetIt);
293  Observation lastLessThanTarget = getObservation(*lastLessThanTargetIt);
294 
295  // Prefer the later observation if there's a tie
296  if ((firstGreaterOrEqualToTarget.time - targetTime).toSeconds() <=
297  (targetTime - lastLessThanTarget.time).toSeconds()) {
298  return firstGreaterOrEqualToTargetIt;
299  } else {
300  return lastLessThanTargetIt;
301  }
302 }
303 
304 } // namespace
305 
306 TemporalThinning::TemporalThinning(ioda::ObsSpace & obsdb, const Parameters_ & parameters,
307  std::shared_ptr<ioda::ObsDataVector<int> > flags,
308  std::shared_ptr<ioda::ObsDataVector<float> > obserr)
309  : FilterBase(obsdb, parameters, flags, obserr), options_(parameters)
310 {
311  oops::Log::debug() << "TemporalThinning: config = " << options_ << std::endl;
312 }
313 
314 // Required for the correct destruction of options_.
316 {}
317 
318 void TemporalThinning::applyFilter(const std::vector<bool> & apply,
319  const Variables & filtervars,
320  std::vector<std::vector<bool>> & flagged) const {
321  ObsAccessor obsAccessor = createObsAccessor();
322 
323  const std::vector<bool> isThinned = identifyThinnedObservations(apply, filtervars, obsAccessor);
324 
325  obsAccessor.flagRejectedObservations(isThinned, flagged);
326 }
327 
329  if (options_.categoryVariable.value() != boost::none) {
331  obsdb_, *options_.categoryVariable.value() );
332  } else if (!obsdb_.obs_group_vars().empty()) {
333  // Records exist. Thin each record separately.
335  } else {
336  // Records don't exist. Thin all observations together.
338  }
339 }
340 
342  const std::vector<bool> & apply,
343  const Variables & filtervars,
344  const ObsAccessor &obsAccessor) const {
345  const std::vector<size_t> validObsIds
346  = obsAccessor.getValidObservationIds(apply, *flags_, filtervars);
347 
348  RecursiveSplitter splitter = obsAccessor.splitObservationsIntoIndependentGroups(validObsIds);
349 
350  std::vector<util::DateTime> times = obsAccessor.getDateTimeVariableFromObsSpace(
351  "MetaData", "datetime");
352  splitter.sortGroupsBy([&times, &validObsIds](size_t obsIndex)
353  { return times[validObsIds[obsIndex]]; });
354 
355  boost::optional<std::vector<int>> priorities = getObservationPriorities(obsAccessor);
356 
357  TemporalThinner thinner(validObsIds, times, priorities.get_ptr(), splitter, options_);
358  return thinner.identifyThinnedObservations(times.size());
359 }
360 
361 boost::optional<std::vector<int>> TemporalThinning::getObservationPriorities(
362  const ObsAccessor &obsAccessor) const {
363  boost::optional<std::vector<int>> priorities;
364  if (options_.priorityVariable.value() != boost::none) {
365  const ufo::Variable priorityVariable = options_.priorityVariable.value().get();
366  priorities = obsAccessor.getIntVariableFromObsSpace(priorityVariable.group(),
367  priorityVariable.variable());
368  }
369  return priorities;
370 }
371 
372 void TemporalThinning::print(std::ostream & os) const {
373  os << "TemporalThinning: config = " << options_ << std::endl;
374 }
375 
376 } // namespace ufo
Base class for UFO QC filters.
Definition: FilterBase.h:45
This class provides access to observations that may be held on multiple MPI ranks.
Definition: ObsAccessor.h:56
RecursiveSplitter splitObservationsIntoIndependentGroups(const std::vector< size_t > &validObsIds, bool opsCompatibilityMode=false) const
Definition: ObsAccessor.cc:197
std::vector< size_t > getValidObservationIds(const std::vector< bool > &apply, const ioda::ObsDataVector< int > &flags, const Variables &filtervars, bool validIfAnyFilterVariablePassedQC=true) const
Return the IDs of observation locations that should be treated as valid by a filter.
Definition: ObsAccessor.cc:99
std::vector< int > getIntVariableFromObsSpace(const std::string &group, const std::string &variable) const
Return the values of the specified variable at successive observation locations.
Definition: ObsAccessor.cc:139
static ObsAccessor toObservationsSplitIntoIndependentGroupsByRecordId(const ioda::ObsSpace &obsdb)
Create an accessor to the collection of observations held in obsdb, assuming that each record can be ...
Definition: ObsAccessor.cc:89
static ObsAccessor toAllObservations(const ioda::ObsSpace &obsdb)
Create an accessor to observations from the observation space obsdb, assuming that the whole set of o...
Definition: ObsAccessor.cc:84
static ObsAccessor toObservationsSplitIntoIndependentGroupsByVariable(const ioda::ObsSpace &obsdb, const Variable &variable)
Create an accessor to the collection of observations held in obsdb, assuming that observations with d...
Definition: ObsAccessor.cc:94
std::vector< util::DateTime > getDateTimeVariableFromObsSpace(const std::string &group, const std::string &variable) const
Definition: ObsAccessor.cc:159
void flagRejectedObservations(const std::vector< bool > &isRejected, std::vector< std::vector< bool > > &flagged) const
Update flags of observations held on the current MPI rank.
Definition: ObsAccessor.cc:243
ioda::ObsSpace & obsdb_
std::shared_ptr< ioda::ObsDataVector< int > > flags_
A range of indices of all array elements belonging to a particular equivalence class.
Partitions an array into groups of elements equivalent according to certain criteria.
MultiElementGroupRange multiElementGroups() const
Return the range of equivalence classes consisting of more than one element.
void sortGroupsBy(const UnaryOperation &key)
Sort the elements in each equivalence class in ascending order.
void applyFilter(const std::vector< bool > &, const Variables &, std::vector< std::vector< bool >> &) const override
TemporalThinning(ioda::ObsSpace &obsdb, const Parameters_ &parameters, std::shared_ptr< ioda::ObsDataVector< int > > flags, std::shared_ptr< ioda::ObsDataVector< float > > obserr)
ObsAccessor createObsAccessor() const
std::vector< bool > identifyThinnedObservations(const std::vector< bool > &apply, const Variables &filtervars, const ObsAccessor &obsAccessor) const
boost::optional< std::vector< int > > getObservationPriorities(const ObsAccessor &obsAccessor) const
void print(std::ostream &) const override
Options controlling the operation of the TemporalThinning filter.
oops::Parameter< util::Duration > minSpacing
Minimum spacing between two successive retained observations.
oops::OptionalParameter< Variable > categoryVariable
oops::OptionalParameter< Variable > priorityVariable
oops::OptionalParameter< util::DateTime > seedTime
oops::Parameter< util::Duration > tolerance
const std::string & variable() const
Definition: Variable.cc:99
const std::string & group() const
Definition: Variable.cc:116
Responsible for the selection of observations to retain.
std::vector< size_t >::const_iterator ForwardValidObsIndexIterator
ForwardValidObsIndexIterator findSeed(ForwardValidObsIndexIterator validObsIndicesBegin, ForwardValidObsIndexIterator validObsIndicesEnd, util::DateTime seedTime) const
Return an iterator to the first observation to be retained.
void thinRangeBackwards(BackwardValidObsIndexIterator validIndicesBegin, BackwardValidObsIndexIterator validIndicesEnd, util::DateTime deadline, std::vector< bool > &isThinned) const
ForwardValidObsIndexIterator findNearest(ForwardValidObsIndexIterator validObsIndicesBegin, ForwardValidObsIndexIterator validObsIndicesEnd, util::DateTime targetTime) const
std::vector< bool > identifyThinnedObservations(size_t totalNumObservations) const
void thinRange(Iterator validIndicesBegin, Iterator validIndicesEnd, util::DateTime deadline, IsPast isPast, IsAtOrPast isAtOrPast, Advance advance, std::vector< bool > &isThinned) const
Common implementation shared by thinRangeForwards() and thinRangeBackwards().
util::DateTime getTime(size_t validObsIndex) const
void thinRangeForwards(ForwardValidObsIndexIterator validIndicesBegin, ForwardValidObsIndexIterator validIndicesEnd, util::DateTime deadline, std::vector< bool > &isThinned) const
std::vector< size_t >::const_reverse_iterator BackwardValidObsIndexIterator
Definition: RunCRTM.h:27