UFO
DataExtractor.cc
Go to the documentation of this file.
1 /*
2  * (C) Copyright 2021 Met Office UK
3  *
4  * This software is licensed under the terms of the Apache Licence Version 2.0
5  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6  */
7 
8 #include <algorithm> // sort
9 #include <functional> // greater
10 #include <limits> // std::numeric_limits
11 #include <list> // list
12 #include <sstream> // stringstream
13 #include <utility> // pair
14 
15 #include <boost/make_unique.hpp>
16 
17 #include "eckit/utils/StringTools.h"
18 
19 #include "ioda/Misc/StringFuncs.h"
20 
21 #include "oops/util/Logger.h"
22 
27 
28 namespace ufo {
29 
30 
31 namespace {
32 
33 
34 /// \brief Boost visitor which allows us to sort a vector.
35 class SortUpdateVisitor : public boost::static_visitor<void> {
36  public:
37  explicit SortUpdateVisitor(ufo::RecursiveSplitter &splitter) : splitter(splitter) {}
38 
39  template <typename T>
40  void operator()(const std::vector<T> &coord) {
41  splitter.groupBy(coord);
42  }
43 
44  void operator()(const std::vector<float> &coord) {
45  splitter.sortGroupsBy([&coord](int index) { return coord[static_cast<size_t>(index)]; });
46  }
47 
49 };
50 
51 
52 /// \brief Boost visitor which allows us to sort a vector.
53 class SortVisitor : public boost::static_visitor<void> {
54  public:
55  explicit SortVisitor(const ufo::RecursiveSplitter &splitter) : splitter(splitter) {}
56 
57  template <typename T>
58  void operator()(std::vector<T> &coord) {
59  std::vector<T> newCoord;
60  newCoord.reserve(coord.size());
61  for (const auto &group : splitter.groups()) {
62  for (const auto &index : group) {
63  newCoord.push_back(coord[index]);
64  }
65  }
66  // Replace the coordinate with the sorted one.
67  coord = std::move(newCoord);
68  }
69 
71 };
72 
73 
74 /// \brief Update our extract constraint based on an exact match against the specified coordinate
75 /// indexing a dimension of the payload array.
76 ///
77 /// \param[in] varName
78 /// Name of the coordinate to match against.
79 /// \param[in] varValues
80 /// Vector of values of that coordinate.
81 /// \param[in] obVal
82 /// Value to match.
83 /// \param[inout] range
84 /// On input, the range of slices of the payload array along the dimension indexed by
85 /// `varValues` that matches all constraints considered so far. On output, the subrange of
86 /// slices matching also the current constraint.
87 template<typename T>
88 void exactMatch(const std::string &varName,
89  const std::vector<T> &varValues,
90  const T &obVal,
91  ConstrainedRange &range) {
92  // Find the first and last matching index
93  auto bounds = std::equal_range(varValues.begin() + range.begin(),
94  varValues.begin() + range.end(),
95  obVal);
96  if (bounds.first == bounds.second) {
97  // No matching coordinate found. If the coordinate contains a 'missing value' entry,
98  // use it as a fallback. (If it doesn't, the 'bounds' range will stay empty, so an error will
99  // be reported).
100  bounds = std::equal_range(varValues.begin() + range.begin(),
101  varValues.begin() + range.end(),
102  util::missingValue(obVal));
103  }
104 
105  range.constrain(static_cast<int>(bounds.first - varValues.begin()),
106  static_cast<int>(bounds.second - varValues.begin()));
107 
108  if (range.begin() == range.end()) {
109  std::stringstream msg;
110  msg << "No match found for exact match extraction of value '" << obVal
111  << "' of the variable '" << varName << "'";
112  throw eckit::Exception(msg.str(), Here());
113  }
114  oops::Log::debug() << "Exact match; name: " << varName << " range: " <<
115  range.begin() << "," << range.end() << std::endl;
116 }
117 
118 
119 /// \brief Update our extract constraint based on a nearest match against the specified
120 /// coordinate indexing a dimension of the payload array.
121 /// \details
122 ///
123 /// Method:
124 /// - Find **first** discovered nearest value in our loop.
125 /// - Determine which indices match this nearest value.
126 /// (more than one index could have this one value).
127 ///
128 /// [1, 1, 2, 3, 4, 5]
129 ///
130 /// Nearest neighbour extraction of “1”, has more than one neighbour.
131 /// That is, more than one index with the same value have the same distance:
132 ///
133 /// [1, 1] i.e. range=(0, 2)
134 ///
135 /// - Note that an alternative implementation could consider equidistant
136 /// values, though it was decided this was not desirable behaviour:
137 ///
138 /// [1, 1, 2, 3, 4, 5]
139 ///
140 /// Nearest neighbour extraction of “1.5” could be then considered to have 3
141 /// equidistant neighbours (1, 1, 2). That is, two different values with the
142 /// same distance.
143 ///
144 /// [1, 1, 2] i.e. range=(0, 3)
145 ///
146 /// \param[in] varName
147 /// Name of the coordinate to match against.
148 /// \param[in] varValues
149 /// Vector of values of that coordinate.
150 /// \param[in] obVal
151 /// Value to match.
152 /// \param[inout] range
153 /// On input, the range of slices of the payload array along the dimension indexed by
154 /// `varValues` that matches all constraints considered so far. On output, the subrange of
155 /// slices matching also the current constraint.
156 template<typename T>
157 void nearestMatch(const std::string &varName,
158  const std::vector<T> &varValues,
159  const T &obVal,
160  ConstrainedRange &range) {
161  // Find first index of varValues >= obVal
162  int nnIndex = std::lower_bound(varValues.begin() + range.begin(),
163  varValues.begin() + range.end(),
164  obVal) - varValues.begin();
165  if (nnIndex >= range.end()) {
166  nnIndex = range.end() - 1;
167  }
168 
169  // Now fetch the nearest neighbour index (lower index prioritised for different values with
170  // same distance)
171  T dist = std::abs(varValues[nnIndex] - obVal);
172  if ((varValues[nnIndex] > obVal) && (nnIndex > range.begin()) &&
173  (std::abs(varValues[nnIndex - 1] - obVal) <= dist))
174  nnIndex--;
175 
176  // Now find **same value** equidistant neighbours
177  auto bounds = std::equal_range(varValues.begin() + range.begin(),
178  varValues.begin() + range.end(),
179  varValues[nnIndex]);
180  range.constrain(static_cast<int>(bounds.first - varValues.begin()),
181  static_cast<int>(bounds.second - varValues.begin()));
182  oops::Log::debug() << "Nearest match; name: " << varName << " range: " <<
183  range.begin() << "," << range.end() << std::endl;
184 }
185 
186 
187 void nearestMatch(const std::string &varName,
188  const std::vector<std::string> &varValues,
189  const std::string &obVal,
190  ConstrainedRange &range) {
191  throw eckit::UserError("The 'nearest' method cannot be used for string variables.", Here());
192 }
193 
194 
195 /// \brief Update our extract constraint based on a least-upper-bound match against the specified
196 /// coordinate indexing a dimension of the payload array.
197 ///
198 /// \param[in] varName
199 /// Name of the coordinate to match against.
200 /// \param[in] varValues
201 /// Vector of values of that coordinate.
202 /// \param[in] obVal
203 /// Value to match.
204 /// \param[inout] range
205 /// On input, the range of slices of the payload array along the dimension indexed by
206 /// `varValues` that matches all constraints considered so far. On output, the subrange of
207 /// slices matching also the current constraint.
208 template<typename T>
209 void leastUpperBoundMatch(const std::string &varName,
210  const std::vector<T> &varValues,
211  const T &obVal,
212  ConstrainedRange &range) {
213  // Find index of the first varValues >= obVal
214  typedef typename std::vector<T>::const_iterator It;
215  const It rangeBegin(varValues.begin() + range.begin());
216  const It rangeEnd(varValues.begin() + range.end());
217 
218  const It leastUpperBoundIt = std::lower_bound(rangeBegin, rangeEnd, obVal);
219  if (leastUpperBoundIt == rangeEnd) {
220  std::stringstream msg;
221  msg << "No match found for 'least upper bound' extraction of value '" << obVal
222  << "' of the variable '" << varName << "'";
223  throw eckit::Exception(msg.str(), Here());
224  }
225 
226  // Find the range of items with the same value of this coordinate
227  const auto bounds = std::equal_range(rangeBegin, rangeEnd, *leastUpperBoundIt);
228  range.constrain(static_cast<int>(bounds.first - varValues.begin()),
229  static_cast<int>(bounds.second - varValues.begin()));
230  oops::Log::debug() << "Least upper bound match; name: " << varName << " range: "
231  << range.begin() << "," << range.end() << std::endl;
232 }
233 
234 void leastUpperBoundMatch(const std::string &varName,
235  const std::vector<std::string> &varValues,
236  const std::string &obVal,
237  ConstrainedRange &range) {
238  throw eckit::UserError("The 'least upper bound' method cannot be used for string variables.",
239  Here());
240 }
241 
242 /// \brief Update our extract constraint based on a greatest-lower-bound match against the
243 /// specified coordinate indexing a dimension of the payload array.
244 ///
245 /// \param[in] varName
246 /// Name of the coordinate to match against.
247 /// \param[in] varValues
248 /// Vector of values of that coordinate.
249 /// \param[in] obVal
250 /// Value to match.
251 /// \param[inout] range
252 /// On input, the range of slices of the payload array along the dimension indexed by
253 /// `varValues` that matches all constraints considered so far. On output, the subrange of
254 /// slices matching also the current constraint.
255 template<typename T>
256 void greatestLowerBoundMatch(const std::string &varName,
257  const std::vector<T> &varValues,
258  const T &obVal,
259  ConstrainedRange &range) {
260  // Find index of the last varValues <= obVal
261  typedef typename std::vector<T>::const_reverse_iterator ReverseIt;
262  typedef std::greater<T> Compare;
263  const ReverseIt reverseRangeBegin(varValues.begin() + range.end());
264  const ReverseIt reverseRangeEnd(varValues.begin() + range.begin());
265 
266  const ReverseIt greatestLowerBoundIt =
267  std::lower_bound(reverseRangeBegin, reverseRangeEnd, obVal, Compare());
268  if (greatestLowerBoundIt == reverseRangeEnd) {
269  std::stringstream msg;
270  msg << "No match found for 'greatest lower bound' extraction of value '" << obVal
271  << "' of the variable '" << varName << "'";
272  throw eckit::Exception(msg.str(), Here());
273  }
274 
275  // Find the range of items with the same value of this coordinate
276  const auto bounds = std::equal_range(varValues.begin() + range.begin(),
277  varValues.begin() + range.end(),
278  *greatestLowerBoundIt);
279  range.constrain(static_cast<int>(bounds.first - varValues.begin()),
280  static_cast<int>(bounds.second - varValues.begin()));
281  oops::Log::debug() << "Greatest lower bound match; name: " << varName << " range: "
282  << range.begin() << "," << range.end() << std::endl;
283 }
284 
285 void greatestLowerBoundMatch(const std::string &varName,
286  const std::vector<std::string> &varValues,
287  const std::string &obVal,
288  ConstrainedRange &range) {
289  throw eckit::UserError("The 'greatest lower bound' method cannot be used for string variables.",
290  Here());
291 }
292 
293 
294 /// \brief Restrict `range` to the subrange of `varValues` matching `obVal` according to the
295 /// criteria of `method`.
296 ///
297 /// \param[in] method
298 /// Matching method.
299 /// \param[in] varName
300 /// Name of the coordinate to match against.
301 /// \param[in] varValues
302 /// Vector of values of that coordinate.
303 /// \param[in] obVal
304 /// Value to match.
305 /// \param[inout] range
306 /// On input, the range of slices of the payload array along the dimension indexed by
307 /// `varValues` that matches all constraints considered so far. On output, the subrange of
308 /// slices matching also the current constraint.
309 template <typename T>
310 void match(InterpMethod method,
311  const std::string &varName,
312  const std::vector<T> &varValues,
313  const T &obVal,
314  ConstrainedRange &range) {
315  switch (method) {
316  case InterpMethod::EXACT:
317  exactMatch(varName, varValues, obVal, range);
318  break;
320  nearestMatch(varName, varValues, obVal, range);
321  break;
323  leastUpperBoundMatch(varName, varValues, obVal, range);
324  break;
326  greatestLowerBoundMatch(varName, varValues, obVal, range);
327  break;
328  default:
329  throw eckit::BadParameter("Unrecognized interpolation method", Here());
330  }
331 }
332 
333 
334 /// \brief Perform piecewise linear interpolation of the provided array `varValues` at 'location'
335 /// `obVal`.
336 ///
337 /// \details It is assumed that the provided 1D array is described by coordinate `varName`.
338 /// This function returns the value produced by piecewise linear interpolation of this array at
339 /// the point `obVal`.
340 ///
341 /// \param[in] range
342 /// Defines how to constrain (slice) `varValues` along with `interpolatedArray`.
343 /// \param[in] varName
344 /// Name of the coordinate along which to interpolate.
345 /// \param[in] varValues
346 /// Vector of values of that coordinate.
347 /// \param[in] obVal
348 /// Interpolation location.
349 /// \param[in] interpolatedArray
350 /// Interpolated array.
351 template <typename CoordinateValue>
353  const std::string &varName,
354  const std::vector<CoordinateValue> &varValues,
355  const CoordinateValue &obVal,
356  const ConstrainedRange &range,
358  if ((obVal > varValues[range.end() - 1]) || (obVal < varValues[range.begin()])) {
359  throw eckit::Exception("Linear interpolation failed, value is beyond grid extent."
360  "No extrapolation supported.",
361  Here());
362  }
363  // Find first index of varValues >= obVal
364  int nnIndex = std::lower_bound(varValues.begin() + range.begin(),
365  varValues.begin() + range.end(),
366  obVal) - varValues.begin();
367 
368  // No interpolation required (is equal)
369  if (varValues[nnIndex] == obVal)
370  return interpolatedArray[nnIndex];
371 
372  // Linearly interpolate between these two indices.
373  const float zUpper = interpolatedArray[nnIndex];
374  const float zLower = interpolatedArray[nnIndex-1];
375  float res = ((static_cast<float>(obVal - varValues[nnIndex-1]) /
376  static_cast<float>(varValues[nnIndex] - varValues[nnIndex-1])) *
377  (zUpper - zLower)) + zLower;
378  return res;
379 }
380 
381 
383  const std::string &varName,
384  const std::vector<std::string> &varValues,
385  const std::string &obVal,
386  const ConstrainedRange &range,
388  throw eckit::UserError("Linear interpolation cannot be performed along coordinate axes indexed "
389  "by string variables such as " + varName + ".", Here());
390 }
391 
392 } // namespace
393 
394 
395 template <typename ExtractedValue>
397  const std::string &group) {
398  // Read the data from the file
399  load(filepath, group);
400  // Start by constraining to the full range of our data
401  resetExtract();
402  // Initialise splitter for each dimension
403  splitter_.emplace_back(ufo::RecursiveSplitter(interpolatedArray_.shape()[0]));
404  splitter_.emplace_back(ufo::RecursiveSplitter(interpolatedArray_.shape()[1]));
405  splitter_.emplace_back(ufo::RecursiveSplitter(interpolatedArray_.shape()[2]));
406 }
407 
408 
409 template <typename ExtractedValue>
410 void DataExtractor<ExtractedValue>::load(const std::string &filepath,
411  const std::string &interpolatedArrayGroup) {
412  std::unique_ptr<DataExtractorBackend<ExtractedValue>> backend = createBackendFor(filepath);
413  DataExtractorInput<ExtractedValue> input = backend->loadData(interpolatedArrayGroup);
414  coord2DimMapping_ = std::move(input.coord2DimMapping);
415  dim2CoordMapping_ = std::move(input.dim2CoordMapping);
416  coordsVals_ = std::move(input.coordsVals);
417  interpolatedArray_.resize(boost::extents[input.payloadArray.shape()[0]]
418  [input.payloadArray.shape()[1]]
419  [input.payloadArray.shape()[2]]);
420  interpolatedArray_ = std::move(input.payloadArray);
421  // Set the unconstrained size of matching ranges along both axes of the payload array.
422  for (size_t i = 0; i < constrainedRanges_.size(); ++i)
423  constrainedRanges_[i] = ConstrainedRange(input.payloadArray.shape()[i]);
424 }
425 
426 
427 template <typename ExtractedValue>
428 std::unique_ptr<DataExtractorBackend<ExtractedValue>>
430  const std::string lowercasePath = eckit::StringTools::lower(filepath);
431  if (eckit::StringTools::endsWith(lowercasePath, ".nc") ||
432  eckit::StringTools::endsWith(lowercasePath, ".nc4"))
433  return boost::make_unique<DataExtractorNetCDFBackend<ExtractedValue>>(filepath);
434  else if (eckit::StringTools::endsWith(lowercasePath, ".csv"))
435  return boost::make_unique<DataExtractorCSVBackend<ExtractedValue>>(filepath);
436  else
437  throw eckit::BadValue("File '" + filepath + "' has an unrecognized extension. "
438  "The supported extensions are .nc, .nc4 and .csv", Here());
439 }
440 
441 
442 template <typename ExtractedValue>
444  DataExtractorPayload<ExtractedValue> sortedArray = interpolatedArray_;
445  nextCoordToExtractBy_ = coordsToExtractBy_.begin();
446 
447  for (size_t dim = 0; dim < dim2CoordMapping_.size(); ++dim) {
448  if (interpolatedArray_.shape()[dim] == 1) // Avoid sorting scalar coordinates
449  continue;
450 
451  // Reorder coordinates
452  for (auto &coord : dim2CoordMapping_[dim]) {
453  auto &coordVal = coordsVals_[coord];
454  SortVisitor visitor(splitter_[dim]);
455  boost::apply_visitor(visitor, coordVal);
456  }
457 
458  // Reorder the array to be interpolated
459  int ind = 0;
460  std::array<size_t, 2> otherDims;
461  for (size_t odim = 0; odim < interpolatedArray_.dimensionality; ++odim) {
462  if (odim != dim) {
463  otherDims[ind] = odim;
464  ind++;
465  }
466  }
467 
468  ind = 0;
469  for (const auto &group : splitter_[dim].groups()) {
470  for (const auto &index : group) {
471  oops::Log::debug() << "Sort index dim" << dim << "; index-from: " << ind <<
472  " index-to: " << index << std::endl;
473  for (size_t j = 0; j < interpolatedArray_.shape()[otherDims[0]]; j++) {
474  for (size_t k = 0; k < interpolatedArray_.shape()[otherDims[1]]; k++) {
475  if (dim == 0) {
476  sortedArray[ind][j][k] = interpolatedArray_[index][j][k];
477  } else if (dim == 1) {
478  sortedArray[j][ind][k] = interpolatedArray_[j][index][k];
479  } else if (dim == 2) {
480  sortedArray[j][k][ind] = interpolatedArray_[j][k][index];
481  } else {
482  // We shouldn't ever end up here (exception should be thrown eariler).
483  throw eckit::Exception("Unable to reorder the array to be interpolated: "
484  "it has more than 3 dimensions.", Here());
485  }
486  }
487  }
488  ind++;
489  }
490  }
491  // Replace the unsorted array with the sorted one.
492  interpolatedArray_ = sortedArray;
493  }
494 }
495 
496 
497 template <typename ExtractedValue>
498 void DataExtractor<ExtractedValue>::scheduleSort(const std::string &varName,
499  const InterpMethod &method) {
500  if (!std::is_floating_point<ExtractedValue>::value) {
501  std::string msg = "interpolation can be used when extracting floating-point values, but not "
502  "integers or strings.";
503  if (method == InterpMethod::LINEAR) {
504  throw eckit::BadParameter("Linear " + msg, Here());
505  } else if (method == InterpMethod::BILINEAR) {
506  throw eckit::BadParameter("Bilinear " + msg, Here());
507  }
508  }
509 
510  // Map any names of the form var@Group to Group/var
511  const std::string canonicalVarName = ioda::convertV1PathToV2Path(varName);
512 
513  const CoordinateValues &coordVal = coordsVals_.at(canonicalVarName);
514  const int dimIndex = coord2DimMapping_.at(canonicalVarName);
515 
516  SortUpdateVisitor visitor(splitter_[static_cast<size_t>(dimIndex)]);
517  boost::apply_visitor(visitor, coordVal);
518 
519  // Update our map between coordinate (variable) and interpolation/extract method
520  coordsToExtractBy_.emplace_back(Coordinate{varName, coordVal, method, dimIndex});
521 }
522 
523 
524 template <typename ExtractedValue>
526  extractImpl(obVal);
527 }
528 
529 
530 template <typename ExtractedValue>
532  extractImpl(obVal);
533 }
534 
535 
536 template <typename ExtractedValue>
537 void DataExtractor<ExtractedValue>::extract(const std::string &obVal) {
538  extractImpl(obVal);
539 }
540 
541 
542 template <typename ExtractedValue>
543 template <typename T>
545  if (nextCoordToExtractBy_ == coordsToExtractBy_.cend())
546  throw eckit::UserError("Too many extract() calls made for the expected number of variables.",
547  Here());
548 
549  // Perform the extraction using the selected method
550  if (nextCoordToExtractBy_->method == InterpMethod::LINEAR)
551  maybeExtractByLinearInterpolation(obVal);
552  else
553  match(nextCoordToExtractBy_->method,
554  nextCoordToExtractBy_->name,
555  boost::get<std::vector<T>>(nextCoordToExtractBy_->values),
556  obVal,
557  constrainedRanges_[nextCoordToExtractBy_->payloadDim]);
558 
559  ++nextCoordToExtractBy_;
560 }
561 
562 
563 // Primary template, used for all ExtractedValue types except float.
564 template <typename ExtractedValue>
565 template <typename T>
567  // Should never be called -- this error should be detected earlier.
568  throw eckit::BadParameter("Linear interpolation can be used when extracting floating-point "
569  "values, but not integers or strings.", Here());
570 }
571 
572 
573 // Specialization for ExtractedValue = float.
574 template <>
575 template <typename T>
577  int dimIndex = nextCoordToExtractBy_->payloadDim;
578  const auto &interpolatedArray = get1DSlice(interpolatedArray_,
579  dimIndex,
580  constrainedRanges_);
581  result_ = linearInterpolation(nextCoordToExtractBy_->name,
582  boost::get<std::vector<T>>(nextCoordToExtractBy_->values),
583  obVal, constrainedRanges_[dimIndex], interpolatedArray);
584  resultSet_ = true;
585 }
586 
587 
588 // Primary template, used for all ExtractedValue types except float.
589 template <typename ExtractedValue>
591  // Fetch the result
592  ExtractedValue res = getUniqueMatch();
593  resetExtract();
594  return res;
595 }
596 
597 
598 // Specialization adding support for linear interpolation.
599 template <>
601  // Fetch the result
602  if (resultSet_) {
603  // This was derived from linear/bilinear interpolation so return it.
604  resetExtract();
605  return result_;
606  }
607 
608  float res = getUniqueMatch();
609  resetExtract();
610  return res;
611 }
612 
613 
614 template <typename ExtractedValue>
616  // This function should be called only if linear interpolation is not used within the
617  // extraction process.
618  ASSERT(!resultSet_);
619 
620  for (size_t dim=0; dim < constrainedRanges_.size(); dim++)
621  if (constrainedRanges_[dim].size() != 1)
622  throw eckit::Exception("Previous calls to extract() have failed to identify "
623  "a single value to return.", Here());
624  return interpolatedArray_[constrainedRanges_[0].begin()]
625  [constrainedRanges_[1].begin()]
626  [constrainedRanges_[2].begin()];
627 }
628 
629 
630 template <typename ExtractedValue>
632  for (ConstrainedRange &range : constrainedRanges_)
633  range.reset();
634  resultSet_ = false;
635  nextCoordToExtractBy_ = coordsToExtractBy_.begin();
636 }
637 
638 
639 // Explicit instantiations
640 template class DataExtractor<float>;
641 template class DataExtractor<int>;
642 template class DataExtractor<std::string>;
643 
644 } // namespace ufo
A range of indices.
int end() const
Return the index of the element past the end of the range.
void constrain(int newBegin, int newEnd)
Constrain the range.
int begin() const
Return the index of the first element in the range.
Produces input for a DataExtractor by loading data from a CSV file.
This class makes it possible to extract and interpolate data loaded from a file.
void extract(float obVal)
Perform extract, given an observation value for the coordinate associated with this extract iteration...
void scheduleSort(const std::string &varName, const InterpMethod &method)
Update the instruction on how to sort the data for the provided variable name.
void sort()
Finalise the sort, sorting each of the coordinates indexing the axes of the array to be interpolated,...
void maybeExtractByLinearInterpolation(const T &obVal)
Perform extraction using piecewise linear interpolation, if it's compatible with the ExtractedValue t...
void extractImpl(const T &obVal)
Common implementation of the overloaded public function extract().
void load(const std::string &filepath, const std::string &interpolatedArrayGroup)
Load all data from the input file.
static std::unique_ptr< DataExtractorBackend< ExtractedValue > > createBackendFor(const std::string &filepath)
Create a backend able to read file filepath.
ExtractedValue getResult()
Fetch the final interpolated value.
ExtractedValue getUniqueMatch() const
Fetch the result produced by previous calls to extract(), none of which may have used linear interpol...
DataExtractor(const std::string &filepath, const std::string &group)
Create an object that can be used to extract data loaded from a file.
void resetExtract()
Reset the extraction range for this object.
boost::variant< std::vector< int >, std::vector< float >, std::vector< std::string > > CoordinateValues
Partitions an array into groups of elements equivalent according to certain criteria.
Boost visitor which allows us to sort a vector.
void operator()(const std::vector< float > &coord)
Boost visitor which allows us to sort a vector.
SortVisitor(const ufo::RecursiveSplitter &splitter)
constexpr int bounds
Definition: QCflags.h:22
float linearInterpolation(const std::string &varName, const std::vector< std::string > &varValues, const std::string &obVal, const ConstrainedRange &range, const DataExtractorPayload< float >::const_array_view< 1 >::type &interpolatedArray)
void match(InterpMethod method, const std::string &varName, const std::vector< T > &varValues, const T &obVal, ConstrainedRange &range)
Restrict range to the subrange of varValues matching obVal according to the criteria of method.
void greatestLowerBoundMatch(const std::string &varName, const std::vector< std::string > &varValues, const std::string &obVal, ConstrainedRange &range)
void exactMatch(const std::string &varName, const std::vector< T > &varValues, const T &obVal, ConstrainedRange &range)
Update our extract constraint based on an exact match against the specified coordinate indexing a dim...
void leastUpperBoundMatch(const std::string &varName, const std::vector< std::string > &varValues, const std::string &obVal, ConstrainedRange &range)
void nearestMatch(const std::string &varName, const std::vector< std::string > &varValues, const std::string &obVal, ConstrainedRange &range)
Definition: RunCRTM.h:27
InterpMethod
Method used by the DataExtractor to map the value of an ObsSpace variable to a range of slices of the...
@ LEAST_UPPER_BOUND
Select slices corresponding to the least value of the indexing coordinate greater than or equal to th...
@ GREATEST_LOWER_BOUND
Select slices corresponding to the greatest value of the indexing coordinate less than or equal to th...
@ LINEAR
Perform a piecewise linear interpolation along the dimension indexed by the ObsSpace variable.
@ NEAREST
Select slices where the indexing coordinate is closest to the value of the corresponding ObsSpace var...
@ BILINEAR
Perform a bilinear interpolation along two dimensions indexed by the ObsSpace variables.
@ EXACT
Select slices where the indexing coordinate matches exactly the value of the corresponding ObsSpace v...
util::Duration abs(const util::Duration &duration)
boost::multi_array< T, 3 > DataExtractorPayload
const DataExtractorPayload< T >::template const_array_view< 1 >::type get1DSlice(const DataExtractorPayload< T > &array, const size_t dimIndex, const std::array< ConstrainedRange, 3 > &ranges)
Fetch a 1D sliced view of a boost multi_array object.
Coordinate used for data extraction from the payload array.
Coordinates coordsVals
Coordinates indexing the payload array.
std::unordered_map< std::string, int > coord2DimMapping
Maps coordinate names to dimensions (0 or 1) of the payload array.
std::vector< std::vector< std::string > > dim2CoordMapping
Maps dimensions of the payload array (0 or 1) to coordinate names.
Input data for the DataExtractor.
boost::multi_array< ExtractedValue, 3 > payloadArray
Array from which values will be extracted.