UFO
processWhere.cc
Go to the documentation of this file.
1 /*
2  * (C) Copyright 2018-2020 UCAR
3  *
4  * This software is licensed under the terms of the Apache Licence Version 2.0
5  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6  */
7 
9 
10 #include <bitset>
11 #include <set>
12 #include <string>
13 #include <vector>
14 
15 #include "eckit/config/LocalConfiguration.h"
16 #include "oops/util/IntSetParser.h"
17 #include "oops/util/Logger.h"
18 #include "oops/util/missingValues.h"
19 #include "oops/util/PartialDateTime.h"
21 #include "ufo/filters/Variables.h"
22 
23 namespace ufo {
24 
25 
26 // -----------------------------------------------------------------------------
27 ufo::Variables getAllWhereVariables(const eckit::Configuration & config) {
28  std::vector<eckit::LocalConfiguration> masks;
29  config.get("where", masks);
30 
31  ufo::Variables vars;
32  for (size_t jm = 0; jm < masks.size(); ++jm) {
33  eckit::LocalConfiguration varconf(masks[jm], "variable");
34  vars += ufo::Variable(varconf);
35  }
36  return vars;
37 }
38 
39 // -----------------------------------------------------------------------------
40 void processWhereMinMax(const std::vector<float> & data,
41  const float & vmin, const float & vmax,
42  std::vector<bool> & mask) {
43  const float not_set_value = util::missingValue(not_set_value);
44  const size_t n = data.size();
45 
46  if (vmin != not_set_value || vmax != not_set_value) {
47  for (size_t jj = 0; jj < n; ++jj) {
48  if (vmin != not_set_value && data[jj] < vmin) mask[jj] = false;
49  if (vmax != not_set_value && data[jj] > vmax) mask[jj] = false;
50  }
51  }
52 }
53 
54 
55 // -----------------------------------------------------------------------------
56 void processWhereMinMax(const std::vector<util::DateTime> & data,
57  const std::string & vmin, const std::string & vmax,
58  std::vector<bool> & mask) {
59  const std::string not_set_value = "0000-00-00T00:00:00Z";
60 
61  if (vmin != not_set_value || vmax != not_set_value) {
62  util::PartialDateTime pdt_vmin(vmin), pdt_vmax(vmax);
63 
64  for (size_t jj = 0; jj < data.size(); ++jj) {
65  if (vmin != not_set_value && pdt_vmin > data[jj]) mask[jj] = false;
66  if (vmax != not_set_value && pdt_vmax < data[jj]) mask[jj] = false;
67  }
68  }
69 }
70 
71 
72 // -----------------------------------------------------------------------------
73 void processWhereIsDefined(const std::vector<float> & data,
74  std::vector<bool> & mask) {
75  const float missing = util::missingValue(missing);
76  const size_t n = data.size();
77  for (size_t jj = 0; jj < n; ++jj) {
78  if (data[jj] == missing) mask[jj] = false;
79  }
80 }
81 
82 // -----------------------------------------------------------------------------
83 
84 void processWhereIsNotDefined(const std::vector<float> & data,
85  std::vector<bool> & mask) {
86  const float missing = util::missingValue(missing);
87  const size_t n = data.size();
88  for (size_t jj = 0; jj < n; ++jj) {
89  if (data[jj] != missing) mask[jj] = false;
90  }
91 }
92 
93 // -----------------------------------------------------------------------------
94 template <class T>
95 void processWhereIsIn(const std::vector<T> & data,
96  const std::set<T> & whitelist,
97  std::vector<bool> & mask) {
98  for (size_t jj = 0; jj < data.size(); ++jj) {
99  if (!oops::contains(whitelist, data[jj])) mask[jj] = false;
100  }
101 }
102 
103 // -----------------------------------------------------------------------------
104 template <class T>
105 void processWhereIsNotIn(const std::vector<T> & data,
106  const std::set<T> & blacklist,
107  std::vector<bool> & mask) {
108  const T missing = util::missingValue(missing);
109  for (size_t jj = 0; jj < data.size(); ++jj) {
110  if (data[jj] == missing || oops::contains(blacklist, data[jj])) mask[jj] = false;
111  }
112 }
113 
114 // -----------------------------------------------------------------------------
115 void processWhereIsNotIn(const std::vector<std::string> & data,
116  const std::set<std::string> & blacklist,
117  std::vector<bool> & mask) {
118  for (size_t jj = 0; jj < data.size(); ++jj) {
119  if (oops::contains(blacklist, data[jj])) mask[jj] = false;
120  }
121 }
122 
123 // -----------------------------------------------------------------------------
124 void applyMinMaxFloat(std::vector<bool> & where, eckit::LocalConfiguration const & mask,
125  ObsFilterData const & filterdata, Variable const & varname) {
126  const float not_set_value = util::missingValue(not_set_value);
127  const float vmin = mask.getFloat("minvalue", not_set_value);
128  const float vmax = mask.getFloat("maxvalue", not_set_value);
129  // Apply mask min/max
130  if (vmin != not_set_value || vmax != not_set_value) {
131  std::vector<float> data;
132  filterdata.get(varname, data);
133  processWhereMinMax(data, vmin, vmax, where);
134  }
135 }
136 
137 void applyMinMaxDatetime(std::vector<bool> & where, eckit::LocalConfiguration const & mask,
138  ObsFilterData const & filterdata, Variable const & varname) {
139  const std::string not_set_value("0000-00-00T00:00:00Z");
140  const std::string vmin = mask.getString("minvalue", not_set_value);
141  const std::string vmax = mask.getString("maxvalue", not_set_value);
142 
143  // Apply mask min/max
144  if (vmin != not_set_value || vmax != not_set_value) {
145  std::vector<util::DateTime> data;
146  filterdata.get(varname, data);
147  processWhereMinMax(data, vmin, vmax, where);
148  }
149 }
150 
151 // -----------------------------------------------------------------------------
152 void processWhereBitSet(const std::vector<int> & data,
153  const std::set<int> & flags,
154  std::vector<bool> & mask) {
155  std::bitset<32> flags_bs;
156  for (const int &elem : flags) {
157  flags_bs[elem] = 1;
158  }
159  for (size_t jj = 0; jj < data.size(); ++jj) {
160  if ((data[jj] & flags_bs.to_ulong()) != 0) mask[jj] = false;
161  }
162 }
163 
164 // -----------------------------------------------------------------------------
165 void isInString(std::vector<bool> & where, eckit::LocalConfiguration const & mask,
166  ObsFilterData const & filterdata, Variable const & varname) {
167  std::vector<std::string> data;
168  std::vector<std::string> whitelistvec = mask.getStringVector("is_in");
169  std::set<std::string> whitelist(whitelistvec.begin(), whitelistvec.end());
170  filterdata.get(varname, data);
171  processWhereIsIn(data, whitelist, where);
172 }
173 
174 // -----------------------------------------------------------------------------
175 void isInInteger(std::vector<bool> & where, eckit::LocalConfiguration const & mask,
176  ObsFilterData const & filterdata, Variable const & varname) {
177  std::vector<int> data;
178  std::set<int> whitelist = oops::parseIntSet(mask.getString("is_in"));
179  filterdata.get(varname, data);
180  processWhereIsIn(data, whitelist, where);
181 }
182 
183 // -----------------------------------------------------------------------------
184 void isNotInString(std::vector<bool> & where, eckit::LocalConfiguration const & mask,
185  ObsFilterData const & filterdata, Variable const & varname) {
186  std::vector<std::string> data;
187  std::vector<std::string> blacklistvec = mask.getStringVector("is_not_in");
188  std::set<std::string> blacklist(blacklistvec.begin(), blacklistvec.end());
189  filterdata.get(varname, data);
190  processWhereIsNotIn(data, blacklist, where);
191 }
192 
193 // -----------------------------------------------------------------------------
194 void isNotInInteger(std::vector<bool> & where, eckit::LocalConfiguration const & mask,
195  ObsFilterData const & filterdata, Variable const & varname) {
196  std::vector<int> data;
197  filterdata.get(varname, data);
198  std::set<int> blacklist = oops::parseIntSet(mask.getString("is_not_in"));
199  processWhereIsNotIn(data, blacklist, where);
200 }
201 
202 // -----------------------------------------------------------------------------
203 std::vector<bool> processWhere(const eckit::Configuration & config,
204  const ObsFilterData & filterdata) {
205  const size_t nlocs = filterdata.nlocs();
206 
207 // Everywhere by default if no mask
208  std::vector<bool> where(nlocs, true);
209 
210  std::vector<eckit::LocalConfiguration> masks;
211  config.get("where", masks);
212 
213  for (size_t jm = 0; jm < masks.size(); ++jm) {
214  eckit::LocalConfiguration varconf(masks[jm], "variable");
215  Variable var(varconf);
216  for (size_t jvar = 0; jvar < var.size(); ++jvar) {
217  if (var.group() != "VarMetaData") {
218  const Variable varname = var[jvar];
219  ioda::ObsDtype dtype = filterdata.dtype(varname);
220 
221  if (dtype == ioda::ObsDtype::DateTime) {
222  applyMinMaxDatetime(where, masks[jm], filterdata, varname);
223  } else {
224  applyMinMaxFloat(where, masks[jm], filterdata, varname);
225  }
226 
227 // Apply mask is_defined
228  if (masks[jm].has("is_defined")) {
229  if (filterdata.has(varname)) {
230  std::vector<float> data;
231  filterdata.get(varname, data);
232  processWhereIsDefined(data, where);
233  } else {
234  std::fill(where.begin(), where.end(), false);
235  }
236  }
237 
238 // Apply mask is_not_defined
239  if (masks[jm].has("is_not_defined")) {
240  std::vector<float> data;
241  filterdata.get(varname, data);
242  processWhereIsNotDefined(data, where);
243  }
244 
245 // Apply mask is_in
246  if (masks[jm].has("is_in")) {
247  if (dtype == ioda::ObsDtype::String) {
248  isInString(where, masks[jm], filterdata, varname);
249  } else if (dtype == ioda::ObsDtype::Integer) {
250  isInInteger(where, masks[jm], filterdata, varname);
251  } else {
252  throw eckit::UserError(
253  "Only integer and string variables may be used for processWhere 'is_in'",
254  Here());
255  }
256  }
257 
258 // Apply mask is_not_in
259  if (masks[jm].has("is_not_in")) {
260  if (dtype == ioda::ObsDtype::String) {
261  isNotInString(where, masks[jm], filterdata, varname);
262  } else if (dtype == ioda::ObsDtype::Integer) {
263  isNotInInteger(where, masks[jm], filterdata, varname);
264  } else {
265  throw eckit::UserError(
266  "Only integer and string variables may be used for processWhere 'is_not_in'",
267  Here());
268  }
269  }
270 
271 // Apply mask any_bit_set_of
272  if (masks[jm].has("any_bit_set_of")) {
273  if (dtype == ioda::ObsDtype::Integer) {
274  std::vector<int> data;
275  std::set<int> flags = oops::parseIntSet(masks[jm].getString("any_bit_set_of"));
276  filterdata.get(varname, data);
277  processWhereBitSet(data, flags, where);
278  } else {
279  throw eckit::UserError(
280  "Only integer variables may be used for processWhere 'any_bit_set_of'",
281  Here());
282  }
283  }
284  }
285  }
286  }
287 // Print diagnostics for debug
288  int ii = 0;
289  for (size_t jj = 0; jj < nlocs; ++jj) {
290  if (where[jj] == false) ++ii;
291  }
292 
293  oops::Log::debug() << "processWhere: selected " << ii << " obs." << std::endl;
294  return where;
295 }
296 
297 // -----------------------------------------------------------------------------
298 
299 } // namespace ufo
ufo::processWhereMinMax
void processWhereMinMax(const std::vector< float > &data, const float &vmin, const float &vmax, std::vector< bool > &mask)
Definition: processWhere.cc:40
ufo::isInInteger
void isInInteger(std::vector< bool > &where, eckit::LocalConfiguration const &mask, ObsFilterData const &filterdata, Variable const &varname)
Definition: processWhere.cc:175
ufo::processWhereIsNotDefined
void processWhereIsNotDefined(const std::vector< float > &data, std::vector< bool > &mask)
Definition: processWhere.cc:84
ufo::ObsFilterData::nlocs
size_t nlocs() const
Returns number of locations.
Definition: ObsFilterData.cc:66
ufo::Variables
Definition: src/ufo/filters/Variables.h:24
ufo_radiancerttov_utils_mod::debug
logical, public debug
Definition: ufo_radiancerttov_utils_mod.F90:100
processWhere.h
ufo::Variable::size
size_t size() const
Definition: Variable.cc:79
ufo::processWhereIsDefined
void processWhereIsDefined(const std::vector< float > &data, std::vector< bool > &mask)
Definition: processWhere.cc:73
ufo::processWhere
std::vector< bool > processWhere(const eckit::Configuration &config, const ObsFilterData &filterdata)
Definition: processWhere.cc:203
ufo::processWhereIsNotIn
void processWhereIsNotIn(const std::vector< T > &data, const std::set< T > &blacklist, std::vector< bool > &mask)
Definition: processWhere.cc:105
ufo
Definition: RunCRTM.h:27
ufo::applyMinMaxFloat
void applyMinMaxFloat(std::vector< bool > &where, eckit::LocalConfiguration const &mask, ObsFilterData const &filterdata, Variable const &varname)
Definition: processWhere.cc:124
ufo::ObsFilterData::has
bool has(const Variable &) const
Checks if requested data exists in ObsFilterData.
Definition: ObsFilterData.cc:76
ufo::applyMinMaxDatetime
void applyMinMaxDatetime(std::vector< bool > &where, eckit::LocalConfiguration const &mask, ObsFilterData const &filterdata, Variable const &varname)
Definition: processWhere.cc:137
Variables.h
ufo::isInString
void isInString(std::vector< bool > &where, eckit::LocalConfiguration const &mask, ObsFilterData const &filterdata, Variable const &varname)
Definition: processWhere.cc:165
ufo::QCflags::missing
constexpr int missing
Definition: QCflags.h:15
ufo::getAllWhereVariables
ufo::Variables getAllWhereVariables(const eckit::Configuration &config)
Definition: processWhere.cc:27
ufo::ObsFilterData::dtype
ioda::ObsDtype dtype(const Variable &) const
Determines dtype of the provided variable.
Definition: ObsFilterData.cc:369
ufo::isNotInString
void isNotInString(std::vector< bool > &where, eckit::LocalConfiguration const &mask, ObsFilterData const &filterdata, Variable const &varname)
Definition: processWhere.cc:184
ufo::Variable::group
const std::string & group() const
Definition: Variable.cc:117
ufo::test::contains
bool contains(const std::set< T > &set, const T &element)
Definition: test/ufo/MetOfficeBuddyPairFinder.h:282
ufo::processWhereBitSet
void processWhereBitSet(const std::vector< int > &data, const std::set< int > &flags, std::vector< bool > &mask)
Definition: processWhere.cc:152
ufo::processWhereIsIn
void processWhereIsIn(const std::vector< T > &data, const std::set< T > &whitelist, std::vector< bool > &mask)
Definition: processWhere.cc:95
ObsFilterData.h
ufo::ObsFilterData::get
void get(const Variable &, std::vector< float > &) const
Gets requested data from ObsFilterData.
Definition: ObsFilterData.cc:130
ufo::isNotInInteger
void isNotInInteger(std::vector< bool > &where, eckit::LocalConfiguration const &mask, ObsFilterData const &filterdata, Variable const &varname)
Definition: processWhere.cc:194
ufo::Variable
Definition: Variable.h:23
ufo::ObsFilterData
ObsFilterData provides access to all data related to an ObsFilter.
Definition: src/ufo/filters/ObsFilterData.h:40