17 #include "eckit/config/Configuration.h"
18 #include "ioda/ObsDataVector.h"
19 #include "ioda/ObsSpace.h"
20 #include "oops/base/Variables.h"
21 #include "oops/util/DateTime.h"
22 #include "oops/util/Duration.h"
23 #include "oops/util/Logger.h"
41 const std::vector<util::DateTime> ×,
42 const std::vector<int> *priorities,
46 std::vector<bool> identifyThinnedObservations(
size_t totalNumObservations)
const;
54 obs.
id = validObsIds_[validObsIndex];
61 util::DateTime
getTime(
size_t validObsIndex)
const {
62 return times_[validObsIds_[validObsIndex]];
66 ASSERT(priorities_ !=
nullptr);
67 return (*priorities_)[validObsIds_[validObsIndex]];
71 return priorities_ !=
nullptr;
76 void thinRangeForwards(ForwardValidObsIndexIterator validIndicesBegin,
77 ForwardValidObsIndexIterator validIndicesEnd,
78 util::DateTime deadline,
79 std::vector<bool> &isThinned)
const;
83 void thinRangeBackwards(BackwardValidObsIndexIterator validIndicesBegin,
84 BackwardValidObsIndexIterator validIndicesEnd,
85 util::DateTime deadline,
86 std::vector<bool> &isThinned)
const;
104 template <
typename Iterator,
typename IsPast,
typename IsAtOrPast,
typename Advance>
105 void thinRange(Iterator validIndicesBegin,
106 Iterator validIndicesEnd,
107 util::DateTime deadline,
109 IsAtOrPast isAtOrPast,
111 std::vector<bool> &isThinned)
const;
114 ForwardValidObsIndexIterator findSeed(
115 ForwardValidObsIndexIterator validObsIndicesBegin,
116 ForwardValidObsIndexIterator validObsIndicesEnd,
117 util::DateTime seedTime)
const;
121 ForwardValidObsIndexIterator findNearest(
122 ForwardValidObsIndexIterator validObsIndicesBegin,
123 ForwardValidObsIndexIterator validObsIndicesEnd,
124 util::DateTime targetTime)
const;
128 const std::vector<util::DateTime> &
times_;
134 TemporalThinner::TemporalThinner(
const std::vector<size_t> &validObsIds,
135 const std::vector<util::DateTime> ×,
136 const std::vector<int> *priorities,
139 validObsIds_(validObsIds),
141 priorities_(priorities),
147 std::vector<bool> isThinned(totalNumObservations,
false);
151 util::DateTime deadline =
getTime(*group.begin());
175 util::DateTime deadline,
176 std::vector<bool> &isThinned)
const {
177 thinRange(validIndicesBegin, validIndicesEnd, deadline,
178 std::greater<util::DateTime>(),
179 std::greater_equal<util::DateTime>(),
181 [](
const util::DateTime &time,
const util::Duration &offset) {
return time + offset; },
188 util::DateTime deadline,
189 std::vector<bool> &isThinned)
const {
190 thinRange(validIndicesBegin, validIndicesEnd, deadline,
191 std::less<util::DateTime>(),
192 std::less_equal<util::DateTime>(),
193 [](
const util::DateTime &time,
const util::Duration &offset) {
return time - offset; },
197 template <
typename Iterator,
typename IsPast,
typename IsAtOrPast,
typename Advance>
199 Iterator validIndicesEnd,
200 util::DateTime deadline,
202 IsAtOrPast isAtOrPast,
204 std::vector<bool> &isThinned)
const {
205 boost::optional<Observation> best;
206 for (Iterator it = validIndicesBegin; it != validIndicesEnd; ++it) {
207 const size_t validObsIndex = *it;
209 if (best != boost::none) {
211 if (isPast(current.
time, deadline)) {
217 if (current.
priority > best->priority) {
218 isThinned[best->id] =
true;
221 isThinned[current.
id] =
true;
226 if (best == boost::none) {
228 if (isAtOrPast(current.
time, deadline)) {
232 isThinned[current.
id] =
true;
241 util::DateTime seedTime)
const {
243 validObsIndicesBegin, validObsIndicesEnd, seedTime);
245 return nearestToSeedIt;
251 validObsIndicesBegin, nearestToSeedIt,
253 [&](
size_t validObsIndexA,
const util::DateTime &timeB)
254 { return getTime(validObsIndexA) < timeB; });
256 acceptableBegin, validObsIndicesEnd,
258 [&](
const util::DateTime &timeA,
size_t validObsIndexB)
259 { return timeA < getTime(validObsIndexB); });
262 return std::max_element(acceptableBegin, acceptableEnd,
263 [&](
size_t validObsIndexA,
size_t validObsIndexB)
270 util::DateTime targetTime)
const {
271 ASSERT_MSG(validObsIndicesEnd - validObsIndicesBegin != 0,
272 "The range of observation indices must not be empty");
274 auto isEarlierThan = [&](
size_t validObsIndexA,
const util::DateTime &timeB) {
275 return getTime(validObsIndexA) < timeB;
278 std::lower_bound(validObsIndicesBegin, validObsIndicesEnd, targetTime, isEarlierThan);
279 if (firstGreaterOrEqualToTargetIt == validObsIndicesBegin) {
280 return firstGreaterOrEqualToTargetIt;
282 if (firstGreaterOrEqualToTargetIt == validObsIndicesEnd) {
285 return validObsIndicesEnd - 1;
289 firstGreaterOrEqualToTargetIt - 1;
295 if ((firstGreaterOrEqualToTarget.
time - targetTime).toSeconds() <=
296 (targetTime - lastLessThanTarget.
time).toSeconds()) {
297 return firstGreaterOrEqualToTargetIt;
299 return lastLessThanTargetIt;
322 std::vector<std::vector<bool>> & flagged)
const {
327 if (filtervars.
size() != 0) {
328 oops::Log::trace() <<
"TemporalThinning: flagged? = " << flagged[0] << std::endl;
333 const std::vector<bool> & apply)
const {
339 std::vector<util::DateTime> times(
obsdb_.nlocs());
340 obsdb_.get_db(
"MetaData",
"datetime", times);
341 splitter.
sortGroupsBy([×, &validObsIds](
size_t obsIndexA,
size_t obsIndexB)
342 {
return times[validObsIds[obsIndexA]] < times[validObsIds[obsIndexB]]; });
345 const ioda::ObsDataRow<int> *priorities =
346 prioritiesDataVector ? &(*prioritiesDataVector)[0] :
nullptr;
348 TemporalThinner thinner(validObsIds, times, priorities, splitter, *
options_);
349 return thinner.identifyThinnedObservations(apply.size());
353 const std::vector<bool> & apply)
const {
354 std::vector<size_t> validObsIds;
355 for (
size_t obsId = 0; obsId < apply.size(); ++obsId)
357 validObsIds.push_back(obsId);
363 boost::optional<Variable> categoryVariable =
options_->categoryVariable;
364 if (categoryVariable == boost::none)
368 categoryVariable.get().group());
369 ioda::ObsDataRow<int> &category = obsDataVector[0];
371 std::vector<int> validObsCategories(validObsIds.size());
372 for (
size_t validObsIndex = 0; validObsIndex < validObsIds.size(); ++validObsIndex)
373 validObsCategories[validObsIndex] = category[validObsIds[validObsIndex]];
374 splitter.
groupBy(validObsCategories);
378 std::unique_ptr<ioda::ObsDataVector<int>> priorities;
379 if (
options_->priorityVariable.value() != boost::none) {
388 const std::vector<bool> & isThinned,
389 std::vector<std::vector<bool>> & flagged)
const {
390 for (std::vector<bool> & variableFlagged : flagged)
391 for (
size_t obsId = 0; obsId < isThinned.size(); ++obsId)
392 if (isThinned[obsId])
393 variableFlagged[obsId] =
true;
397 os <<
"TemporalThinning: config = " <<
config_ << std::endl;