15 #include <boost/make_unique.hpp>
17 #include "eckit/utils/StringTools.h"
19 #include "ioda/Misc/StringFuncs.h"
21 #include "oops/util/Logger.h"
41 splitter.groupBy(coord);
45 splitter.sortGroupsBy([&coord](
int index) {
return coord[
static_cast<size_t>(index)]; });
59 std::vector<T> newCoord;
60 newCoord.reserve(coord.size());
61 for (
const auto &group : splitter.groups()) {
62 for (
const auto &index : group) {
63 newCoord.push_back(coord[index]);
67 coord = std::move(newCoord);
89 const std::vector<T> &varValues,
93 auto bounds = std::equal_range(varValues.begin() + range.
begin(),
94 varValues.begin() + range.
end(),
100 bounds = std::equal_range(varValues.begin() + range.
begin(),
101 varValues.begin() + range.
end(),
102 util::missingValue(obVal));
106 static_cast<int>(
bounds.second - varValues.begin()));
109 std::stringstream msg;
110 msg <<
"No match found for exact match extraction of value '" << obVal
111 <<
"' of the variable '" << varName <<
"'";
112 throw eckit::Exception(msg.str(), Here());
115 range.
begin() <<
"," << range.
end() << std::endl;
158 const std::vector<T> &varValues,
162 int nnIndex = std::lower_bound(varValues.begin() + range.
begin(),
163 varValues.begin() + range.
end(),
164 obVal) - varValues.begin();
165 if (nnIndex >= range.
end()) {
166 nnIndex = range.
end() - 1;
171 T dist =
std::abs(varValues[nnIndex] - obVal);
172 if ((varValues[nnIndex] > obVal) && (nnIndex > range.
begin()) &&
173 (
std::abs(varValues[nnIndex - 1] - obVal) <= dist))
177 auto bounds = std::equal_range(varValues.begin() + range.
begin(),
178 varValues.begin() + range.
end(),
181 static_cast<int>(
bounds.second - varValues.begin()));
183 range.
begin() <<
"," << range.
end() << std::endl;
188 const std::vector<std::string> &varValues,
189 const std::string &obVal,
191 throw eckit::UserError(
"The 'nearest' method cannot be used for string variables.", Here());
210 const std::vector<T> &varValues,
214 typedef typename std::vector<T>::const_iterator It;
215 const It rangeBegin(varValues.begin() + range.
begin());
216 const It rangeEnd(varValues.begin() + range.
end());
218 const It leastUpperBoundIt = std::lower_bound(rangeBegin, rangeEnd, obVal);
219 if (leastUpperBoundIt == rangeEnd) {
220 std::stringstream msg;
221 msg <<
"No match found for 'least upper bound' extraction of value '" << obVal
222 <<
"' of the variable '" << varName <<
"'";
223 throw eckit::Exception(msg.str(), Here());
227 const auto bounds = std::equal_range(rangeBegin, rangeEnd, *leastUpperBoundIt);
229 static_cast<int>(
bounds.second - varValues.begin()));
230 oops::Log::debug() <<
"Least upper bound match; name: " << varName <<
" range: "
231 << range.
begin() <<
"," << range.
end() << std::endl;
235 const std::vector<std::string> &varValues,
236 const std::string &obVal,
238 throw eckit::UserError(
"The 'least upper bound' method cannot be used for string variables.",
257 const std::vector<T> &varValues,
261 typedef typename std::vector<T>::const_reverse_iterator ReverseIt;
262 typedef std::greater<T> Compare;
263 const ReverseIt reverseRangeBegin(varValues.begin() + range.
end());
264 const ReverseIt reverseRangeEnd(varValues.begin() + range.
begin());
266 const ReverseIt greatestLowerBoundIt =
267 std::lower_bound(reverseRangeBegin, reverseRangeEnd, obVal, Compare());
268 if (greatestLowerBoundIt == reverseRangeEnd) {
269 std::stringstream msg;
270 msg <<
"No match found for 'greatest lower bound' extraction of value '" << obVal
271 <<
"' of the variable '" << varName <<
"'";
272 throw eckit::Exception(msg.str(), Here());
276 const auto bounds = std::equal_range(varValues.begin() + range.
begin(),
277 varValues.begin() + range.
end(),
278 *greatestLowerBoundIt);
280 static_cast<int>(
bounds.second - varValues.begin()));
281 oops::Log::debug() <<
"Greatest lower bound match; name: " << varName <<
" range: "
282 << range.
begin() <<
"," << range.
end() << std::endl;
286 const std::vector<std::string> &varValues,
287 const std::string &obVal,
289 throw eckit::UserError(
"The 'greatest lower bound' method cannot be used for string variables.",
309 template <
typename T>
311 const std::string &varName,
312 const std::vector<T> &varValues,
329 throw eckit::BadParameter(
"Unrecognized interpolation method", Here());
351 template <
typename CoordinateValue>
353 const std::string &varName,
354 const std::vector<CoordinateValue> &varValues,
355 const CoordinateValue &obVal,
358 if ((obVal > varValues[range.
end() - 1]) || (obVal < varValues[range.
begin()])) {
359 throw eckit::Exception(
"Linear interpolation failed, value is beyond grid extent."
360 "No extrapolation supported.",
364 int nnIndex = std::lower_bound(varValues.begin() + range.
begin(),
365 varValues.begin() + range.
end(),
366 obVal) - varValues.begin();
369 if (varValues[nnIndex] == obVal)
370 return interpolatedArray[nnIndex];
373 const float zUpper = interpolatedArray[nnIndex];
374 const float zLower = interpolatedArray[nnIndex-1];
375 float res = ((
static_cast<float>(obVal - varValues[nnIndex-1]) /
376 static_cast<float>(varValues[nnIndex] - varValues[nnIndex-1])) *
377 (zUpper - zLower)) + zLower;
383 const std::string &varName,
384 const std::vector<std::string> &varValues,
385 const std::string &obVal,
388 throw eckit::UserError(
"Linear interpolation cannot be performed along coordinate axes indexed "
389 "by string variables such as " + varName +
".", Here());
395 template <
typename ExtractedValue>
397 const std::string &group) {
399 load(filepath, group);
409 template <
typename ExtractedValue>
411 const std::string &interpolatedArrayGroup) {
412 std::unique_ptr<DataExtractorBackend<ExtractedValue>> backend = createBackendFor(filepath);
417 interpolatedArray_.resize(boost::extents[input.
payloadArray.shape()[0]]
422 for (
size_t i = 0; i < constrainedRanges_.size(); ++i)
427 template <
typename ExtractedValue>
428 std::unique_ptr<DataExtractorBackend<ExtractedValue>>
430 const std::string lowercasePath = eckit::StringTools::lower(filepath);
431 if (eckit::StringTools::endsWith(lowercasePath,
".nc") ||
432 eckit::StringTools::endsWith(lowercasePath,
".nc4"))
433 return boost::make_unique<DataExtractorNetCDFBackend<ExtractedValue>>(filepath);
434 else if (eckit::StringTools::endsWith(lowercasePath,
".csv"))
437 throw eckit::BadValue(
"File '" + filepath +
"' has an unrecognized extension. "
438 "The supported extensions are .nc, .nc4 and .csv", Here());
442 template <
typename ExtractedValue>
445 nextCoordToExtractBy_ = coordsToExtractBy_.begin();
447 for (
size_t dim = 0; dim < dim2CoordMapping_.size(); ++dim) {
448 if (interpolatedArray_.shape()[dim] == 1)
452 for (
auto &coord : dim2CoordMapping_[dim]) {
453 auto &coordVal = coordsVals_[coord];
454 SortVisitor visitor(splitter_[dim]);
455 boost::apply_visitor(visitor, coordVal);
460 std::array<size_t, 2> otherDims;
461 for (
size_t odim = 0; odim < interpolatedArray_.dimensionality; ++odim) {
463 otherDims[ind] = odim;
469 for (
const auto &group : splitter_[dim].groups()) {
470 for (
const auto &index : group) {
472 " index-to: " << index << std::endl;
473 for (
size_t j = 0; j < interpolatedArray_.shape()[otherDims[0]]; j++) {
474 for (
size_t k = 0; k < interpolatedArray_.shape()[otherDims[1]]; k++) {
476 sortedArray[ind][j][k] = interpolatedArray_[index][j][k];
477 }
else if (dim == 1) {
478 sortedArray[j][ind][k] = interpolatedArray_[j][index][k];
479 }
else if (dim == 2) {
480 sortedArray[j][k][ind] = interpolatedArray_[j][k][index];
483 throw eckit::Exception(
"Unable to reorder the array to be interpolated: "
484 "it has more than 3 dimensions.", Here());
492 interpolatedArray_ = sortedArray;
497 template <
typename ExtractedValue>
500 if (!std::is_floating_point<ExtractedValue>::value) {
501 std::string msg =
"interpolation can be used when extracting floating-point values, but not "
502 "integers or strings.";
504 throw eckit::BadParameter(
"Linear " + msg, Here());
506 throw eckit::BadParameter(
"Bilinear " + msg, Here());
511 const std::string canonicalVarName = ioda::convertV1PathToV2Path(varName);
514 const int dimIndex = coord2DimMapping_.at(canonicalVarName);
516 SortUpdateVisitor visitor(splitter_[
static_cast<size_t>(dimIndex)]);
517 boost::apply_visitor(visitor, coordVal);
520 coordsToExtractBy_.emplace_back(
Coordinate{varName, coordVal, method, dimIndex});
524 template <
typename ExtractedValue>
530 template <
typename ExtractedValue>
536 template <
typename ExtractedValue>
542 template <
typename ExtractedValue>
543 template <
typename T>
545 if (nextCoordToExtractBy_ == coordsToExtractBy_.cend())
546 throw eckit::UserError(
"Too many extract() calls made for the expected number of variables.",
551 maybeExtractByLinearInterpolation(obVal);
553 match(nextCoordToExtractBy_->method,
554 nextCoordToExtractBy_->name,
555 boost::get<std::vector<T>>(nextCoordToExtractBy_->values),
557 constrainedRanges_[nextCoordToExtractBy_->payloadDim]);
559 ++nextCoordToExtractBy_;
564 template <
typename ExtractedValue>
565 template <
typename T>
568 throw eckit::BadParameter(
"Linear interpolation can be used when extracting floating-point "
569 "values, but not integers or strings.", Here());
575 template <
typename T>
577 int dimIndex = nextCoordToExtractBy_->payloadDim;
578 const auto &interpolatedArray =
get1DSlice(interpolatedArray_,
582 boost::get<std::vector<T>>(nextCoordToExtractBy_->values),
583 obVal, constrainedRanges_[dimIndex], interpolatedArray);
589 template <
typename ExtractedValue>
592 ExtractedValue res = getUniqueMatch();
608 float res = getUniqueMatch();
614 template <
typename ExtractedValue>
620 for (
size_t dim=0; dim < constrainedRanges_.size(); dim++)
621 if (constrainedRanges_[dim].size() != 1)
622 throw eckit::Exception(
"Previous calls to extract() have failed to identify "
623 "a single value to return.", Here());
624 return interpolatedArray_[constrainedRanges_[0].begin()]
625 [constrainedRanges_[1].begin()]
626 [constrainedRanges_[2].begin()];
630 template <
typename ExtractedValue>
635 nextCoordToExtractBy_ = coordsToExtractBy_.begin();
int end() const
Return the index of the element past the end of the range.
void constrain(int newBegin, int newEnd)
Constrain the range.
int begin() const
Return the index of the first element in the range.
Partitions an array into groups of elements equivalent according to certain criteria.
InterpMethod
Method used by the DataExtractor to map the value of an ObsSpace variable to a range of slices of the...
@ LEAST_UPPER_BOUND
Select slices corresponding to the least value of the indexing coordinate greater than or equal to th...
@ GREATEST_LOWER_BOUND
Select slices corresponding to the greatest value of the indexing coordinate less than or equal to th...
@ LINEAR
Perform a piecewise linear interpolation along the dimension indexed by the ObsSpace variable.
@ NEAREST
Select slices where the indexing coordinate is closest to the value of the corresponding ObsSpace var...
@ BILINEAR
Perform a bilinear interpolation along two dimensions indexed by the ObsSpace variables.
@ EXACT
Select slices where the indexing coordinate matches exactly the value of the corresponding ObsSpace v...
util::Duration abs(const util::Duration &duration)
boost::multi_array< T, 3 > DataExtractorPayload
const DataExtractorPayload< T >::template const_array_view< 1 >::type get1DSlice(const DataExtractorPayload< T > &array, const size_t dimIndex, const std::array< ConstrainedRange, 3 > &ranges)
Fetch a 1D sliced view of a boost multi_array object.