UFO
VariableAssignment.cc
Go to the documentation of this file.
1 /*
2  * (C) Copyright 2021 Met Office UK
3  *
4  * This software is licensed under the terms of the Apache Licence Version 2.0
5  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6  */
7 
9 
10 #include <algorithm>
11 #include <limits>
12 #include <set>
13 #include <string>
14 #include <utility>
15 #include <vector>
16 
17 #include <boost/lexical_cast.hpp>
18 #include <boost/math/special_functions/round.hpp>
19 
20 #include "ioda/ObsDataVector.h"
21 #include "ioda/ObsSpace.h"
22 
23 #include "oops/util/IntSetParser.h"
24 #include "oops/util/Logger.h"
25 #include "oops/util/missingValues.h"
26 
29 #include "ufo/filters/QCflags.h"
30 #include "ufo/utils/StringUtils.h"
31 
32 namespace ufo {
33 
34 namespace {
35 
36 /// Convert a float \p x to an int by rounding. If \p is equal to \p missingIn, return \p
37 /// missingOut. If the value to be returned is too large to be represented by an int, throw an
38 /// exception.
39 int safeCast(float x, float missingIn, int missingOut) {
40  if (x == missingIn)
41  return missingOut;
42  // Throws boost::math::rounding_error if x is outside the range representable by ints
43  return boost::math::iround(x);
44 }
45 
46 /// Cast an int \p x to a float. If \p is equal to \p missingIn, return \p missingOut.
47 float safeCast(int x, int missingIn, float missingOut) {
48  if (x == missingIn)
49  return missingOut;
50  return x;
51 }
52 
53 /// Convert \p valueAsString to `VariableType` and for each vector in \p values assign that value
54 /// at locations selected by the `where` statement.
55 template <typename VariableType>
56 void assignValue(const std::string &valueAsString,
57  const std::vector<bool> &apply,
59  VariableType newValue;
60  if (!boost::conversion::try_lexical_convert(valueAsString, newValue))
61  throw eckit::BadCast("Value '" + valueAsString +
62  "' could not be converted to the required type", Here());
63 
64  for (size_t ival = 0; ival < values.nvars(); ++ival) {
65  std::vector<VariableType> &currentValues = values[ival];
66  for (size_t iloc = 0; iloc < apply.size(); ++iloc)
67  if (apply[iloc])
68  currentValues[iloc] = newValue;
69  }
70 }
71 
72 /// For each location selected by the `where` statement, copy the corresponding element of
73 /// \p source to \p destination.
74 ///
75 /// This two-parameter template is selected by the compiler when SourceVariableType is
76 /// different from DestinationVariableType. It takes care of converting missing value indicators
77 /// of type SourceVariableType to ones of type DestinationVariableType.
78 template <typename SourceVariableType, typename DestinationVariableType>
79 void assignObsDataVector(const std::vector<bool> &apply,
82  ASSERT(source.nvars() == destination.nvars());
83 
84  const SourceVariableType missingSource = util::missingValue(SourceVariableType());
85  const DestinationVariableType missingDestination = util::missingValue(DestinationVariableType());
86 
87  for (size_t ival = 0; ival < source.nvars(); ++ival) {
88  const ioda::ObsDataRow<SourceVariableType> &currentSource = source[ival];
89  ioda::ObsDataRow<DestinationVariableType> &currentDestination = destination[ival];
90  for (size_t iloc = 0; iloc < apply.size(); ++iloc) {
91  if (apply[iloc]) {
92  currentDestination[iloc] = safeCast(currentSource[iloc], missingSource, missingDestination);
93  }
94  }
95  }
96 }
97 
98 /// For each location selected by the `where` statement, copy the corresponding element of
99 /// \p source to \p destination.
100 ///
101 /// This single-parameter template is selected by the compiler when \p source and \p destination
102 /// are of the same type.
103 template <typename VariableType>
104 void assignObsDataVector(const std::vector<bool> &apply,
105  const ioda::ObsDataVector<VariableType> &source,
106  ioda::ObsDataVector<VariableType> &destination) {
107  ASSERT(source.nvars() == destination.nvars());
108 
109  for (size_t ival = 0; ival < source.nvars(); ++ival) {
110  const ioda::ObsDataRow<VariableType> &currentSource = source[ival];
111  ioda::ObsDataRow<VariableType> &currentDestination = destination[ival];
112  for (size_t iloc = 0; iloc < apply.size(); ++iloc) {
113  if (apply[iloc]) {
114  currentDestination[iloc] = currentSource[iloc];
115  }
116  }
117  }
118 }
119 
120 /// Retrieve the variable \p variable of type \c SourceVariableType and assign its components
121 /// (channels) to successive vectors in \p values (only at locations selected by the `where`
122 /// statement).
123 template <typename SourceVariableType, typename DestinationVariableType>
124 void assignVariable(const ufo::Variable &variable,
125  const std::vector<bool> &apply,
126  const ObsFilterData &data,
129  data.get(variable, newValues);
130  assignObsDataVector(apply, newValues, values);
131 }
132 
133 /// Evaluate the ObsFunction \p function and assign the vectors it produced to successive vectors
134 /// in \p values (only at locations selected by the `where` statement).
135 template <typename FunctionValueType, typename VariableType>
136 void assignFunction(const ufo::Variable &function,
137  const ufo::Variable &variable,
138  const std::vector<bool> &apply,
139  const ObsFilterData &data,
142  data.get(function, newValues);
143  assignObsDataVector(apply, newValues, values);
144 }
145 
146 /// Assign values to a numeric variable (of type float or int).
147 template <typename VariableType>
149  const ufo::Variable &variable,
150  const std::vector<bool> &apply,
151  const ObsFilterData &data,
153  if (params.value_.value() != boost::none) {
154  assignValue(*params.value_.value(), apply, values);
155  } else if (params.sourceVariable.value() != boost::none) {
156  switch (data.dtype(*params.sourceVariable.value())) {
157  case ioda::ObsDtype::Float:
158  assignVariable<float>(*params.sourceVariable.value(), apply, data, values);
159  break;
160  case ioda::ObsDtype::Integer:
161  assignVariable<int>(*params.sourceVariable.value(), apply, data, values);
162  break;
163  default:
164  throw eckit::BadParameter(params.sourceVariable.value()->fullName() +
165  " is not a numeric variable", Here());
166  }
167  } else {
168  ASSERT(params.function.value() != boost::none);
169  if (params.function.value()->group() == ObsFunctionTraits<float>::groupName)
170  assignFunction<float>(*params.function.value(), variable, apply, data, values);
171  else if (params.function.value()->group() == ObsFunctionTraits<int>::groupName)
172  assignFunction<int>(*params.function.value(), variable, apply, data, values);
173  else
174  throw eckit::BadParameter(params.function.value()->fullName() +
175  " is not a function producing numeric values", Here());
176  }
177 }
178 
179 /// Assign values to a non-numeric variable (of type string or DateTime).
180 template <typename VariableType>
182  const ufo::Variable &variable,
183  const std::vector<bool> &apply,
184  const ObsFilterData &data,
186  if (params.value_.value() != boost::none) {
187  assignValue(*params.value_.value(), apply, values);
188  } else if (params.sourceVariable.value() != boost::none) {
189  assignVariable<VariableType>(*params.sourceVariable.value(), apply, data, values);
190  } else {
191  ASSERT(params.function.value() != boost::none);
192  assignFunction<VariableType>(*params.function.value(), variable, apply, data, values);
193  }
194 }
195 
196 /// Retrieve and return the current values of the variable \p variable from \p obsdb (as
197 /// vectors). Variables that aren't currently stored in \p obsdb are treated as if they consisted
198 /// entirely of missing values.
199 template <typename VariableType>
201  ioda::ObsSpace &obsdb) {
202  ioda::ObsDataVector<VariableType> values(obsdb, variable.toOopsVariables());
203  for (size_t ich = 0; ich < variable.size(); ++ich) {
204  const std::string variableWithChannel = variable.variable(ich);
205  if (obsdb.has(variable.group(), variableWithChannel)) {
206  // Variable exists -- retrieve its values from the ObsSpace
207  obsdb.get_db(variable.group(), variableWithChannel, values[ich]);
208  } else {
209  // Variable doesn't exist yet -- fill the vector with missing values
210  values[ich].assign(obsdb.nlocs(), util::missingValue(VariableType()));
211  }
212  }
213  return values;
214 }
215 
216 /// Save the values \p values of variable \p variable to \p obsdb.
217 template <typename VariableType>
218 void saveValues(const ufo::Variable &variable,
219  const ioda::ObsDataVector<VariableType> &values,
220  ioda::ObsSpace &obsdb) {
221  for (size_t ich = 0; ich < variable.size(); ++ich)
222  obsdb.put_db(variable.group(), variable.variable(ich), values[ich]);
223 }
224 
225 /// Change the QC flag from `miss` to `pass` if the obs value is no longer missing or from `pass` to
226 /// `miss` if the obs value is now missing.
228  const float missing = util::missingValue(float());
229 
230  for (size_t ivar = 0; ivar < obsvalues.nvars(); ++ivar) {
231  if (qcflags.varnames().has(obsvalues.varnames()[ivar])) {
232  const ioda::ObsDataRow<float> &currentValues = obsvalues[ivar];
233  ioda::ObsDataRow<int> &currentFlags = qcflags[obsvalues.varnames()[ivar]];
234  for (size_t iloc = 0; iloc < obsvalues.nlocs(); ++iloc) {
235  if (currentFlags[iloc] == QCflags::missing && currentValues[iloc] != missing)
236  currentFlags[iloc] = QCflags::pass;
237  else if (currentFlags[iloc] == QCflags::pass && currentValues[iloc] == missing)
238  currentFlags[iloc] = QCflags::missing;
239  }
240  }
241  }
242 }
243 
244 /// Retrieve the current values of an int-valued variable \p variable from \p obsdb (or if it
245 /// doesn't already exist, fill it with missing values), assign new values to elements selected by
246 /// the `where` clause and save the results to \p obsdb.
247 void assignToIntVariable(const ufo::Variable &variable,
248  const AssignmentParameters &params,
249  const std::vector<bool> &apply,
250  const ObsFilterData &data,
251  ioda::ObsSpace &obsdb) {
252  ioda::ObsDataVector<int> values = getCurrentValues<int>(variable, obsdb);
253  assignNumericValues(params, variable, apply, data, values);
254  saveValues(variable, values, obsdb);
255 }
256 
257 /// Works like `assignToIntVariable()`, but in addition if \p variable belongs to the `ObsValue` or
258 /// `DerivedObsValue` group and is one of the simulated variables, it updates `qcflags` by (a)
259 /// changing `miss` to `pass` if the obs value is no longer missing and (b) changing `pass` to
260 /// `miss` if the obs value is now missing.
262  const AssignmentParameters &params,
263  const std::vector<bool> &apply,
264  const ObsFilterData &data,
265  ioda::ObsSpace &obsdb,
266  ioda::ObsDataVector<int> &qcflags) {
267  ioda::ObsDataVector<float> values = getCurrentValues<float>(variable, obsdb);
268  assignNumericValues(params, variable, apply, data, values);
269  saveValues(variable, values, obsdb);
270  if (variable.group() == "ObsValue" || variable.group() == "DerivedObsValue")
271  updateQCFlags(values, qcflags);
272 }
273 
274 /// Retrieve the current values of a non-numeric variable \p variable from \p obsdb (or if it
275 /// doesn't already exist, fill it with missing values), assign new values to elements selected by
276 /// the `where` clause and save the results to \p obsdb.
277 template <typename VariableType>
279  const AssignmentParameters &params,
280  const std::vector<bool> &apply,
281  const ObsFilterData &data,
282  ioda::ObsSpace &obsdb) {
283  ioda::ObsDataVector<VariableType> values = getCurrentValues<VariableType>(variable, obsdb);
284  assignNonnumericValues(params, variable, apply, data, values);
285  saveValues(variable, values, obsdb);
286 }
287 
288 /// Delegate work to an appropriate function depending on whether \p dtype is a numeric
289 /// or non-numeric type.
290 void assignToVariable(const ufo::Variable &variable,
291  ioda::ObsDtype dtype,
292  const AssignmentParameters &params,
293  const std::vector<bool> &apply,
294  const ObsFilterData &data,
295  ioda::ObsSpace &obsdb,
296  ioda::ObsDataVector<int> &qcflags) {
297  switch (dtype) {
298  case ioda::ObsDtype::Float:
299  assignToFloatVariable(variable, params, apply, data, obsdb, qcflags);
300  break;
301  case ioda::ObsDtype::Integer:
302  assignToIntVariable(variable, params, apply, data, obsdb);
303  break;
304  case ioda::ObsDtype::String:
305  assignToNonnumericVariable<std::string>(variable, params, apply, data, obsdb);
306  break;
307  case ioda::ObsDtype::DateTime:
308  assignToNonnumericVariable<util::DateTime>(variable, params, apply, data, obsdb);
309  break;
310  default:
311  ASSERT_MSG(false, "Unrecognized data type");
312  }
313 }
314 
315 /// Return the variable to which new values will be assigned.
317  const std::set<int> setChannels = oops::parseIntSet(params.channels);
318  std::vector<int> vecChannels(setChannels.begin(), setChannels.end());
319  const ufo::Variable variable(params.name, vecChannels);
320  if (variable.group() == "ObsValue")
321  throw eckit::BadValue("Assignment to variables from the ObsValue group is not allowed",
322  Here());
323  return variable;
324 }
325 
326 /// Return the data type of the variable to which new values will be assigned.
327 ioda::ObsDtype getDataType(boost::optional<ioda::ObsDtype> dtypeParam,
328  const ufo::Variable &variable,
329  const ioda::ObsSpace &obsdb) {
330  if (dtypeParam != boost::none) {
331  // If the dtype option has been set, return its value.
332  return *dtypeParam;
333  } else {
334  // Otherwise, check if the variable to which new values should be assigned already
335  // exists and if so, return its data type.
336  for (size_t ich = 0; ich < variable.size(); ++ich) {
337  const std::string variableWithChannel = variable.variable(ich);
338  if (obsdb.has(variable.group(), variableWithChannel))
339  return obsdb.dtype(variable.group(), variableWithChannel);
340  }
341  // The variable doesn't exist yet.
342  throw eckit::BadParameter("You need to specify the type of the variable to be created "
343  "by setting the 'type' option of the filter to 'float', 'int', "
344  "'string' or 'datetime'.");
345  }
346 }
347 
348 } // namespace
349 
350 
351 void AssignmentParameters::deserialize(util::CompositePath &path,
352  const eckit::Configuration &config) {
353  oops::Parameters::deserialize(path, config);
354 
355  // These checks should really be done at the validation stage (using JSON Schema),
356  // but this isn't supported yet, so this is better than nothing.
357  const int numOptionsSet = static_cast<int>(value_.value() != boost::none) +
358  static_cast<int>(sourceVariable.value() != boost::none) +
359  static_cast<int>(function.value() != boost::none);
360  if (numOptionsSet != 1)
361  throw eckit::UserError(path.path() +
362  ": Exactly one of the 'value', 'source variable' and 'function' options "
363  "must be present");
364 }
365 
366 
367 VariableAssignment::VariableAssignment(ioda::ObsSpace & obsdb, const Parameters_ & parameters,
368  std::shared_ptr<ioda::ObsDataVector<int> > flags,
369  std::shared_ptr<ioda::ObsDataVector<float> > obserr)
370  : ObsProcessorBase(obsdb, parameters.deferToPost, std::move(flags), std::move(obserr)),
371  parameters_(parameters)
372 {
373  oops::Log::debug() << "VariableAssignment: config = " << parameters_ << std::endl;
374  allvars_ += getAllWhereVariables(parameters.where);
375 
376  for (const AssignmentParameters &assignment : parameters.assignments.value()) {
377  if (assignment.sourceVariable.value() != boost::none) {
378  allvars_ += *assignment.sourceVariable.value();
379  }
380  if (assignment.function.value() != boost::none) {
381  allvars_ += *assignment.function.value();
382  }
383  }
384 }
385 
387  oops::Log::trace() << "VariableAssignment doFilter begin" << std::endl;
388 
389  // Select locations at which the filter will be applied
390  const std::vector<bool> apply = processWhere(parameters_.where, data_);
391 
392  // Assign values to successive sets of variables
393  for (const AssignmentParameters &assignment : parameters_.assignments.value()) {
394  const ufo::Variable variable = getVariable(assignment);
395  const ioda::ObsDtype dtype = getDataType(assignment.type, variable, obsdb_);
396  assignToVariable(variable, dtype, assignment, apply, data_, obsdb_, *flags_);
397  }
398 
399  oops::Log::trace() << "VariableAssignment doFilter end" << std::endl;
400 }
401 
402 void VariableAssignment::print(std::ostream & os) const {
403  os << "VariableAssignment: config = " << parameters_ << std::endl;
404 }
405 
406 } // namespace ufo
Parameters controlling assignment of new values to a variable.
oops::OptionalParameter< std::string > value_
void deserialize(util::CompositePath &path, const eckit::Configuration &config) override
oops::OptionalParameter< ufo::Variable > sourceVariable
oops::OptionalParameter< ufo::Variable > function
oops::RequiredParameter< std::string > name
Name of the variable to which new values should be assigned.
oops::Parameter< std::string > channels
Set of channels to which new values should be assigned.
ObsFilterData provides access to all data related to an ObsFilter.
ioda::ObsSpace & obsspace() const
Returns reference to ObsSpace associated with ObsFilterData.
ioda::ObsDtype dtype(const Variable &) const
Determines dtype of the provided variable.
void get(const Variable &varname, std::vector< float > &values) const
Fills a std::vector with values of the specified variable.
Base class for UFO observation processors (including QC filters).
ufo::Variables allvars_
ioda::ObsSpace & obsdb_
std::shared_ptr< ioda::ObsDataVector< int > > flags_
void doFilter() const override
VariableAssignment(ioda::ObsSpace &obsdb, const Parameters_ &parameters, std::shared_ptr< ioda::ObsDataVector< int > > flags, std::shared_ptr< ioda::ObsDataVector< float > > obserr)
void print(std::ostream &) const override
Parameters controlling the VariableAssignment filter.
oops::Parameter< std::vector< WhereParameters > > where
oops::Parameter< std::vector< AssignmentParameters > > assignments
One or more sets of options controlling the values assigned to a particular variable.
const std::string & variable() const
Definition: Variable.cc:99
const std::string & group() const
Definition: Variable.cc:116
oops::Variables toOopsVariables() const
Definition: Variable.cc:139
size_t size() const
Definition: Variable.cc:78
constexpr int pass
Definition: QCflags.h:14
constexpr int missing
Definition: QCflags.h:20
ufo::Variable getVariable(const AssignmentParameters &params)
Return the variable to which new values will be assigned.
float safeCast(int x, int missingIn, float missingOut)
Cast an int x to a float. If is equal to missingIn, return missingOut.
ioda::ObsDtype getDataType(boost::optional< ioda::ObsDtype > dtypeParam, const ufo::Variable &variable, const ioda::ObsSpace &obsdb)
Return the data type of the variable to which new values will be assigned.
void assignNumericValues(const AssignmentParameters &params, const ufo::Variable &variable, const std::vector< bool > &apply, const ObsFilterData &data, ioda::ObsDataVector< VariableType > &values)
Assign values to a numeric variable (of type float or int).
void assignToIntVariable(const ufo::Variable &variable, const AssignmentParameters &params, const std::vector< bool > &apply, const ObsFilterData &data, ioda::ObsSpace &obsdb)
void assignVariable(const ufo::Variable &variable, const std::vector< bool > &apply, const ObsFilterData &data, ioda::ObsDataVector< DestinationVariableType > &values)
void assignToFloatVariable(const ufo::Variable &variable, const AssignmentParameters &params, const std::vector< bool > &apply, const ObsFilterData &data, ioda::ObsSpace &obsdb, ioda::ObsDataVector< int > &qcflags)
void assignToVariable(const ufo::Variable &variable, ioda::ObsDtype dtype, const AssignmentParameters &params, const std::vector< bool > &apply, const ObsFilterData &data, ioda::ObsSpace &obsdb, ioda::ObsDataVector< int > &qcflags)
void assignObsDataVector(const std::vector< bool > &apply, const ioda::ObsDataVector< VariableType > &source, ioda::ObsDataVector< VariableType > &destination)
void assignToNonnumericVariable(const ufo::Variable &variable, const AssignmentParameters &params, const std::vector< bool > &apply, const ObsFilterData &data, ioda::ObsSpace &obsdb)
void updateQCFlags(const ioda::ObsDataVector< float > &obsvalues, ioda::ObsDataVector< int > &qcflags)
void assignNonnumericValues(const AssignmentParameters &params, const ufo::Variable &variable, const std::vector< bool > &apply, const ObsFilterData &data, ioda::ObsDataVector< VariableType > &values)
Assign values to a non-numeric variable (of type string or DateTime).
void assignFunction(const ufo::Variable &function, const ufo::Variable &variable, const std::vector< bool > &apply, const ObsFilterData &data, ioda::ObsDataVector< VariableType > &values)
void assignValue(const std::string &valueAsString, const std::vector< bool > &apply, ioda::ObsDataVector< VariableType > &values)
void saveValues(const ufo::Variable &variable, const ioda::ObsDataVector< VariableType > &values, ioda::ObsSpace &obsdb)
Save the values values of variable variable to obsdb.
ioda::ObsDataVector< VariableType > getCurrentValues(const ufo::Variable &variable, ioda::ObsSpace &obsdb)
Definition: RunCRTM.h:27
ufo::Variables getAllWhereVariables(const std::vector< WhereParameters > &params)
Definition: processWhere.cc:29
std::vector< bool > processWhere(const std::vector< WhereParameters > &params, const ObsFilterData &filterdata)
Common properties of ObsFunctions producing values of type FunctionValue.