16 #include "eckit/config/LocalConfiguration.h"
17 #include "eckit/types/FloatCompare.h"
18 #include "oops/util/IntSetParser.h"
19 #include "oops/util/Logger.h"
20 #include "oops/util/missingValues.h"
21 #include "oops/util/wildcard.h"
41 const T & vmin,
const T & vmax,
42 std::vector<bool> & mask) {
43 const T not_set_value = util::missingValue(not_set_value);
44 const size_t n = data.size();
46 if (vmin != not_set_value || vmax != not_set_value) {
47 for (
size_t jj = 0; jj < n; ++jj) {
48 if (vmin != not_set_value && data[jj] < vmin) mask[jj] =
false;
49 if (vmax != not_set_value && data[jj] > vmax) mask[jj] =
false;
57 const util::PartialDateTime & vmin,
const util::PartialDateTime & vmax,
58 std::vector<bool> & mask) {
59 const util::PartialDateTime not_set_value {};
61 if (vmin != not_set_value || vmax != not_set_value) {
62 for (
size_t jj = 0; jj < data.size(); ++jj) {
63 if (vmin != not_set_value && vmin > data[jj]) mask[jj] =
false;
64 if (vmax != not_set_value && vmax < data[jj]) mask[jj] =
false;
72 std::vector<bool> & mask) {
74 const size_t n = data.size();
75 for (
size_t jj = 0; jj < n; ++jj) {
76 if (data[jj] ==
missing) mask[jj] =
false;
83 std::vector<bool> & mask) {
85 const size_t n = data.size();
86 for (
size_t jj = 0; jj < n; ++jj) {
87 if (data[jj] !=
missing) mask[jj] =
false;
94 const std::set<T> & whitelist,
95 std::vector<bool> & mask) {
96 for (
size_t jj = 0; jj < data.size(); ++jj) {
103 const float tolerance,
const bool relative,
104 const std::vector<float> & whitelist,
105 std::vector<bool> & mask) {
106 for (
size_t jj = 0; jj < data.size(); ++jj) {
108 for (
auto testvalue : whitelist) {
110 float relativetolerance = testvalue * tolerance;
111 if (eckit::types::is_approximately_equal(data[jj], testvalue, relativetolerance)) {
116 if (eckit::types::is_approximately_equal(data[jj], testvalue, tolerance)) {
122 if (!inlist) mask[jj] =
false;
129 const std::set<T> & blacklist,
130 std::vector<bool> & mask) {
132 for (
size_t jj = 0; jj < data.size(); ++jj) {
139 const std::set<std::string> & blacklist,
140 std::vector<bool> & mask) {
141 for (
size_t jj = 0; jj < data.size(); ++jj) {
148 const float tolerance,
const bool relative,
149 const std::vector<float> & blacklist,
150 std::vector<bool> & mask) {
152 for (
size_t jj = 0; jj < data.size(); ++jj) {
153 for (
auto testvalue : blacklist) {
155 float relativetolerance = testvalue * tolerance;
157 eckit::types::is_approximately_equal(data[jj], testvalue, relativetolerance)) {
163 eckit::types::is_approximately_equal(data[jj], testvalue, tolerance)) {
173 template <
typename T>
176 const T not_set_value = util::missingValue(not_set_value);
179 T vmin = not_set_value;
180 if (parameters.
minvalue.value() != boost::none)
181 vmin = parameters.
minvalue.value()->as<T>();
183 T vmax = not_set_value;
184 if (parameters.
maxvalue.value() != boost::none)
185 vmax = parameters.
maxvalue.value()->as<T>();
188 if (vmin != not_set_value || vmax != not_set_value) {
190 filterdata.
get(varname, data);
197 void applyMinMax<util::DateTime>(std::vector<bool> & where, WhereParameters
const & parameters,
198 ObsFilterData
const & filterdata, Variable
const & varname) {
199 util::PartialDateTime vmin {}, vmax {}, not_set_value {};
200 if (parameters.minvalue.value() != boost::none)
201 vmin = parameters.minvalue.value()->as<util::PartialDateTime>();
202 if (parameters.maxvalue.value() != boost::none)
203 vmax = parameters.maxvalue.value()->as<util::PartialDateTime>();
206 if (vmin != not_set_value || vmax != not_set_value) {
207 std::vector<util::DateTime> data;
208 filterdata.get(varname, data);
226 const std::set<int> & bitIndices,
227 std::vector<bool> & where) {
228 std::bitset<32> mask_bs;
229 for (
const int &bitIndex : bitIndices) {
230 mask_bs[bitIndex] = 1;
232 const int mask = mask_bs.to_ulong();
234 for (
size_t jj = 0; jj < data.size(); ++jj) {
235 if ((data[jj] & mask) == 0) {
255 const std::set<int> & bitIndices,
256 std::vector<bool> & where) {
257 std::bitset<32> mask_bs;
258 for (
const int &bitIndex : bitIndices) {
259 mask_bs[bitIndex] = 1;
261 const int mask = mask_bs.to_ulong();
263 for (
size_t jj = 0; jj < data.size(); ++jj) {
264 if ((data[jj] & mask) == mask) {
278 const std::string & pattern,
279 std::vector<bool> & where) {
280 std::regex regex(pattern);
281 for (
size_t jj = 0; jj < data.size(); ++jj) {
282 if (where[jj] && !std::regex_match(data[jj], regex))
293 const std::string & pattern,
294 std::vector<bool> & where) {
295 std::regex regex(pattern);
296 for (
size_t jj = 0; jj < data.size(); ++jj) {
297 if (where[jj] && !std::regex_match(std::to_string(data[jj]), regex))
308 const std::vector<std::string> & patterns) {
309 return std::any_of(patterns.begin(),
311 [&
string] (
const std::string &pattern)
312 { return util::matchesWildcardPattern(string, pattern); });
323 const std::vector<std::string> & patterns,
324 std::vector<bool> & where) {
325 for (
size_t jj = 0; jj < data.size(); ++jj) {
334 const std::vector<std::string> & patterns,
335 std::vector<bool> & where) {
336 for (
size_t jj = 0; jj < data.size(); ++jj) {
343 void isInString(std::vector<bool> & where, std::vector<std::string>
const & allowedValues,
345 std::vector<std::string> data;
346 std::set<std::string> whitelist(allowedValues.begin(), allowedValues.end());
347 filterdata.
get(varname, data);
352 void isInInteger(std::vector<bool> & where, std::set<int>
const & allowedValues,
354 std::vector<int> data;
355 filterdata.
get(varname, data);
360 void isNotInString(std::vector<bool> & where, std::vector<std::string>
const & forbiddenValues,
362 std::vector<std::string> data;
363 std::set<std::string> blacklist(forbiddenValues.begin(), forbiddenValues.end());
364 filterdata.
get(varname, data);
369 void isNotInInteger(std::vector<bool> & where, std::set<int>
const & forbiddenValues,
371 std::vector<int> data;
372 filterdata.
get(varname, data);
377 std::vector<bool>
processWhere(
const std::vector<WhereParameters> & params,
382 std::vector<bool> where(
nlocs,
true);
386 for (
size_t jvar = 0; jvar < var.
size(); ++jvar) {
387 if (var.
group() !=
"VarMetaData") {
389 ioda::ObsDtype dtype = filterdata.
dtype(varname);
391 if (dtype == ioda::ObsDtype::DateTime) {
392 applyMinMax<util::DateTime>(where, currentParams, filterdata, varname);
393 }
else if (dtype == ioda::ObsDtype::Integer) {
394 applyMinMax<int>(where, currentParams, filterdata, varname);
396 applyMinMax<float>(where, currentParams, filterdata, varname);
400 if (currentParams.isDefined.value()) {
401 if (filterdata.
has(varname)) {
402 std::vector<float> data;
403 filterdata.
get(varname, data);
406 std::fill(where.begin(), where.end(),
false);
411 if (currentParams.isNotDefined.value()) {
412 std::vector<float> data;
413 filterdata.
get(varname, data);
418 if (currentParams.isIn.value() != boost::none) {
419 if (dtype == ioda::ObsDtype::String) {
420 isInString(where, currentParams.isIn.value()->as<std::vector<std::string>>(),
421 filterdata, varname);
422 }
else if (dtype == ioda::ObsDtype::Integer) {
423 isInInteger(where, currentParams.isIn.value()->as<std::set<int>>(),
424 filterdata, varname);
426 throw eckit::UserError(
427 "Only integer and string variables may be used for processWhere 'is_in'",
433 if (currentParams.isClose.value() != boost::none) {
434 if (dtype == ioda::ObsDtype::Float) {
435 std::vector<float> data;
436 filterdata.
get(varname, data);
437 if (currentParams.relativetolerance.value() == boost::none &&
438 currentParams.absolutetolerance.value() != boost::none) {
440 false, currentParams.isClose.value().get(), where);
441 }
else if (currentParams.relativetolerance.value() != boost::none &&
442 currentParams.absolutetolerance.value() == boost::none) {
444 true, currentParams.isClose.value().get(), where);
446 throw eckit::UserError(
447 "For 'is_close' one (and only one) tolerance is needed.",
451 throw eckit::UserError(
452 "Only float variables may be used for processWhere 'is_close'",
458 if (currentParams.isNotIn.value() != boost::none) {
459 if (dtype == ioda::ObsDtype::String) {
460 isNotInString(where, currentParams.isNotIn.value()->as<std::vector<std::string>>(),
461 filterdata, varname);
462 }
else if (dtype == ioda::ObsDtype::Integer) {
463 isNotInInteger(where, currentParams.isNotIn.value()->as<std::set<int>>(),
464 filterdata, varname);
466 throw eckit::UserError(
467 "Only integer and string variables may be used for processWhere 'is_not_in'",
473 if (currentParams.isNotClose.value() != boost::none) {
474 if (dtype == ioda::ObsDtype::Float) {
475 std::vector<float> data;
476 filterdata.
get(varname, data);
477 if (currentParams.relativetolerance.value() == boost::none &&
478 currentParams.absolutetolerance.value() != boost::none) {
480 false, currentParams.isNotClose.value().get(), where);
481 }
else if (currentParams.relativetolerance.value() != boost::none &&
482 currentParams.absolutetolerance.value() == boost::none) {
484 true, currentParams.isNotClose.value().get(), where);
486 throw eckit::UserError(
487 "For 'is_close' one (and only one) tolerance is needed.",
491 throw eckit::UserError(
492 "Only float variables may be used for processWhere 'is_not_close'",
498 if (currentParams.anyBitSetOf.value() != boost::none) {
499 if (dtype == ioda::ObsDtype::Integer) {
500 std::vector<int> data;
501 const std::set<int> &bitIndices = *currentParams.anyBitSetOf.value();
502 filterdata.
get(varname, data);
505 throw eckit::UserError(
506 "Only integer variables may be used for processWhere 'any_bit_set_of'",
512 if (currentParams.anyBitUnsetOf.value() != boost::none) {
513 if (dtype == ioda::ObsDtype::Integer) {
514 std::vector<int> data;
515 const std::set<int> &bitIndices = *currentParams.anyBitUnsetOf.value();
516 filterdata.
get(varname, data);
519 throw eckit::UserError(
520 "Only integer variables may be used for processWhere 'any_bit_unset_of'",
526 if (currentParams.matchesRegex.value() != boost::none) {
527 const std::string pattern = *currentParams.matchesRegex.value();
530 if (dtype == ioda::ObsDtype::Integer) {
531 std::vector<int> data;
532 filterdata.
get(varname, data);
534 }
else if (dtype == ioda::ObsDtype::String) {
535 std::vector<std::string> data;
536 filterdata.
get(varname, data);
539 throw eckit::UserError(
540 "Only string and integer variables may be used for processWhere 'matches_regex'",
546 if (currentParams.matchesWildcard.value() != boost::none) {
547 const std::string &pattern = *currentParams.matchesWildcard.value();
550 if (dtype == ioda::ObsDtype::Integer) {
551 std::vector<int> data;
552 filterdata.
get(varname, data);
554 }
else if (dtype == ioda::ObsDtype::String) {
555 std::vector<std::string> data;
556 filterdata.
get(varname, data);
559 throw eckit::UserError(
560 "Only string and integer variables may be used for processWhere 'matches_wildcard'",
566 if (currentParams.matchesAnyWildcard.value() != boost::none) {
567 const std::vector<std::string> &patterns = *currentParams.matchesAnyWildcard.value();
570 if (dtype == ioda::ObsDtype::Integer) {
571 std::vector<int> data;
572 filterdata.
get(varname, data);
574 }
else if (dtype == ioda::ObsDtype::String) {
575 std::vector<std::string> data;
576 filterdata.
get(varname, data);
579 throw eckit::UserError(
580 "Only string and integer variables may be used for processWhere "
581 "'matches_any_wildcard'",
590 for (
size_t jj = 0; jj <
nlocs; ++jj) {
591 if (where[jj] ==
false) ++ii;
594 oops::Log::debug() <<
"processWhere: selected " << ii <<
" obs." << std::endl;
ObsFilterData provides access to all data related to an ObsFilter.
size_t nlocs() const
Returns the number of locations in the associated ObsSpace.
ioda::ObsDtype dtype(const Variable &) const
Determines dtype of the provided variable.
bool has(const Variable &varname) const
Returns true if variable varname is known to ObsFilterData, false otherwise.
void get(const Variable &varname, std::vector< float > &values) const
Fills a std::vector with values of the specified variable.
const std::string & variable() const
const std::string & group() const
Variable variable(const size_t) const
Return a given constituent "primitive" (single-channel) variable.
Contents of a single element of the list taken by the where option of each filter,...
oops::OptionalParameter< util::AnyOf< int, float, util::PartialDateTime > > minvalue
oops::OptionalParameter< util::AnyOf< int, float, util::PartialDateTime > > maxvalue
bool contains(const std::set< T > &set, const T &element)
integer function nlocs(this)
Return the number of observational locations in this Locations object.
void processWhereIsNotIn(const std::vector< T > &data, const std::set< T > &blacklist, std::vector< bool > &mask)
void processWhereIsDefined(const std::vector< float > &data, std::vector< bool > &mask)
void isNotInString(std::vector< bool > &where, std::vector< std::string > const &forbiddenValues, ObsFilterData const &filterdata, Variable const &varname)
bool stringMatchesAnyWildcardPattern(const std::string &string, const std::vector< std::string > &patterns)
void processWhereIsNotDefined(const std::vector< float > &data, std::vector< bool > &mask)
ufo::Variables getAllWhereVariables(const std::vector< WhereParameters > ¶ms)
std::vector< bool > processWhere(const std::vector< WhereParameters > ¶ms, const ObsFilterData &filterdata)
void processWhereIsClose(const std::vector< float > &data, const float tolerance, const bool relative, const std::vector< float > &whitelist, std::vector< bool > &mask)
void processWhereIsIn(const std::vector< T > &data, const std::set< T > &whitelist, std::vector< bool > &mask)
void processWhereMatchesRegex(const std::vector< std::string > &data, const std::string &pattern, std::vector< bool > &where)
Process a matches_regex keyword in a where clause.
void processWhereAnyBitUnsetOf(const std::vector< int > &data, const std::set< int > &bitIndices, std::vector< bool > &where)
Process an any_bit_unset_of keyword in a where clause.
void applyMinMax(std::vector< bool > &where, WhereParameters const ¶meters, ObsFilterData const &filterdata, Variable const &varname)
void processWhereMatchesAnyWildcardPattern(const std::vector< std::string > &data, const std::vector< std::string > &patterns, std::vector< bool > &where)
Function used to process a matches_wildcard or matches_any_wildcard keyword in a where clause.
void isInInteger(std::vector< bool > &where, std::set< int > const &allowedValues, ObsFilterData const &filterdata, Variable const &varname)
void processWhereAnyBitSetOf(const std::vector< int > &data, const std::set< int > &bitIndices, std::vector< bool > &where)
Process an any_bit_set_of keyword in a where clause.
void isInString(std::vector< bool > &where, std::vector< std::string > const &allowedValues, ObsFilterData const &filterdata, Variable const &varname)
void processWhereMinMax(const std::vector< T > &data, const T &vmin, const T &vmax, std::vector< bool > &mask)
void processWhereIsNotClose(const std::vector< float > &data, const float tolerance, const bool relative, const std::vector< float > &blacklist, std::vector< bool > &mask)
void isNotInInteger(std::vector< bool > &where, std::set< int > const &forbiddenValues, ObsFilterData const &filterdata, Variable const &varname)