UFO
Gaussian_Thinning.cc
Go to the documentation of this file.
1 /*
2  * (C) Copyright 2019 UCAR
3  *
4  * This software is licensed under the terms of the Apache Licence Version 2.0
5  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6  */
7 
9 
10 #include <algorithm>
11 #include <cmath>
12 #include <string>
13 #include <utility>
14 #include <vector>
15 
16 #include "eckit/config/Configuration.h"
17 #include "ioda/ObsDataVector.h"
18 #include "ioda/ObsSpace.h"
19 #include "oops/base/Variables.h"
20 #include "oops/util/DateTime.h"
21 #include "oops/util/Duration.h"
22 #include "oops/util/Logger.h"
23 #include "oops/util/missingValues.h"
26 #include "ufo/utils/Constants.h"
35 
36 namespace ufo {
37 
38 // -----------------------------------------------------------------------------
39 
40 Gaussian_Thinning::Gaussian_Thinning(ioda::ObsSpace & obsdb,
41  const GaussianThinningParameters & params,
42  std::shared_ptr<ioda::ObsDataVector<int> > flags,
43  std::shared_ptr<ioda::ObsDataVector<float> > obserr)
44  : FilterBase(obsdb, params, flags, obserr), options_(params)
45 {
46  oops::Log::debug() << "Gaussian_Thinning: config = " << options_ << std::endl;
47 }
48 
49 // -----------------------------------------------------------------------------
50 
51 void Gaussian_Thinning::applyFilter(const std::vector<bool> & apply,
52  const Variables & filtervars,
53  std::vector<std::vector<bool>> & flagged) const {
54  ObsAccessor obsAccessor = createObsAccessor();
55 
56  std::vector<size_t> validObsIds = obsAccessor.getValidObservationIds(apply, *flags_,
57  filtervars, options_.thinIfAnyFilterVariablesAreValid.value());
58 
60  // Sort observations by latitude
61  const std::vector<float> lat = obsAccessor.getFloatVariableFromObsSpace("MetaData", "latitude");
62  metOfficeSort(validObsIds.begin(), validObsIds.end(), [&lat] (size_t id) { return lat[id]; });
63  }
64 
65  std::vector<float> distancesToBinCenter(validObsIds.size(), 0.f);
66  std::unique_ptr<DistanceCalculator> distanceCalculator = makeDistanceCalculator(options_);
67 
69  validObsIds, options_.opsCompatibilityMode);
70  groupObservationsByVerticalCoordinate(validObsIds, *distanceCalculator, obsAccessor,
71  splitter, distancesToBinCenter);
72  groupObservationsByTime(validObsIds, *distanceCalculator, obsAccessor,
73  splitter, distancesToBinCenter);
74  groupObservationsBySpatialLocation(validObsIds, *distanceCalculator, obsAccessor,
75  splitter, distancesToBinCenter);
76 
77  const std::vector<bool> isThinned = identifyThinnedObservations(
78  validObsIds, obsAccessor, splitter, distancesToBinCenter);
79  obsAccessor.flagRejectedObservations(isThinned, flagged);
80 }
81 
82 // -----------------------------------------------------------------------------
83 
85  if (options_.categoryVariable.value() != boost::none) {
87  obsdb_, *options_.categoryVariable.value());
88  } else {
90  }
91 }
92 
93 // -----------------------------------------------------------------------------
94 
95 std::unique_ptr<DistanceCalculator> Gaussian_Thinning::makeDistanceCalculator(
96  const GaussianThinningParameters &options) {
97  DistanceNorm distanceNorm = options.distanceNorm.value().value_or(DistanceNorm::GEODESIC);
98  if (options.opsCompatibilityMode)
99  distanceNorm = DistanceNorm::MAXIMUM;
100  switch (distanceNorm) {
102  return std::unique_ptr<DistanceCalculator>(new GeodesicDistanceCalculator());
104  return std::unique_ptr<DistanceCalculator>(new MaxNormDistanceCalculator());
105  }
106  throw eckit::BadParameter("Unrecognized distance norm", Here());
107 }
108 
109 // -----------------------------------------------------------------------------
110 
112  const std::vector<size_t> &validObsIds,
113  const DistanceCalculator &distanceCalculator,
114  const ObsAccessor &obsAccessor,
115  RecursiveSplitter &splitter,
116  std::vector<float> &distancesToBinCenter) const {
117  boost::optional<SpatialBinSelector> binSelector = makeSpatialBinSelector(options_);
118  if (binSelector == boost::none)
119  return;
120 
121  oops::Log::debug() << "Gaussian_Thinning: zonal band width (degrees) = "
122  << binSelector->latitudeBinWidth() << std::endl;
123  oops::Log::debug() << "Gaussian_Thinning: number of horizontal bins = "
124  << binSelector->totalNumBins() << std::endl;
125 
126  std::vector<float> lat = obsAccessor.getFloatVariableFromObsSpace("MetaData", "latitude");
127  std::vector<float> lon = obsAccessor.getFloatVariableFromObsSpace("MetaData", "longitude");
129  // Longitudes will typically be either in the [-180, 180] degree range or in the [0, 360]
130  // degree range. When the OPS compatibility mode is off, the spatial bin selector is constructed
131  // with the latter convention in mind, so we need to shift any negative longitudes up by 360
132  // degrees.
133  for (float &longitude : lon)
134  if (longitude < 0)
135  longitude += 360;
136  }
137 
138  std::vector<int> latBins;
139  std::vector<int> lonBins;
140  latBins.reserve(validObsIds.size());
141  lonBins.reserve(validObsIds.size());
142  for (size_t obsId : validObsIds) {
143  const size_t latBin = binSelector->latitudeBin(lat[obsId]);
144  latBins.push_back(latBin);
145  lonBins.push_back(binSelector->longitudeBin(latBin, lon[obsId]));
146  }
147  splitter.groupBy(latBins);
148  splitter.groupBy(lonBins);
149 
150  for (size_t validObsIndex = 0; validObsIndex < validObsIds.size(); ++validObsIndex) {
151  const size_t obsId = validObsIds[validObsIndex];
152  float component = distanceCalculator.spatialDistanceComponent(
153  lat[obsId], lon[obsId],
154  binSelector->latitudeBinCenter(latBins[validObsIndex]),
155  binSelector->longitudeBinCenter(latBins[validObsIndex], lonBins[validObsIndex]),
156  binSelector->inverseLatitudeBinWidth(),
157  binSelector->inverseLongitudeBinWidth(latBins[validObsIndex]));
158  distancesToBinCenter[validObsIndex] = distanceCalculator.combineDistanceComponents(
159  distancesToBinCenter[validObsIndex], component);
160  }
161 }
162 
163 // -----------------------------------------------------------------------------
164 
165 boost::optional<SpatialBinSelector> Gaussian_Thinning::makeSpatialBinSelector(
166  const GaussianThinningParameters &options) {
167  if (options.horizontalMesh <= 0)
168  return boost::none;
169 
170  oops::Log::debug() << "Gaussian_Thinning: requested horizontal bin size (km) = "
171  << options.horizontalMesh << std::endl;
172 
173  bool roundHorizontalBinCountToNearest =
174  options.roundHorizontalBinCountToNearest.value().value_or(false);
175  if (options.opsCompatibilityMode)
176  roundHorizontalBinCountToNearest = true;
177  SpatialBinCountRoundingMode roundingMode = roundHorizontalBinCountToNearest ?
179 
180  const float earthRadius = Constants::mean_earth_rad; // km
181  const float meridianLength = M_PI * earthRadius;
182  const float tentativeNumLatBins = meridianLength / options.horizontalMesh;
183  const int numLatBins = SpatialBinSelector::roundNumBins(tentativeNumLatBins, roundingMode);
184 
185  if (options.useReducedHorizontalGrid) {
186  // Use fewer bins at high latitudes
187  return SpatialBinSelector(numLatBins, roundingMode, options.opsCompatibilityMode);
188  } else {
189  // Use the same number of bins at all latitudes
190  const int equatorToMeridianLengthRatio = 2;
191  return SpatialBinSelector(numLatBins, equatorToMeridianLengthRatio * numLatBins,
192  options.opsCompatibilityMode);
193  }
194 }
195 
196 // -----------------------------------------------------------------------------
197 
199  const std::vector<size_t> &validObsIds,
200  const DistanceCalculator &distanceCalculator,
201  const ObsAccessor &obsAccessor,
202  RecursiveSplitter &splitter,
203  std::vector<float> &distancesToBinCenter) const {
204  std::unique_ptr<EquispacedBinSelectorBase> binSelector = makeVerticalBinSelector(options_);
205  if (!binSelector)
206  return;
207 
208  if (binSelector->numBins() != boost::none)
209  oops::Log::debug() << "Gaussian_Thinning: number of vertical bins = "
210  << *binSelector->numBins() << std::endl;
211 
212  std::vector<float> vcoord = obsAccessor.getFloatVariableFromObsSpace(
213  "MetaData", options_.verticalCoord);
214 
215  std::vector<int> bins;
216  bins.reserve(validObsIds.size());
217  for (size_t obsId : validObsIds)
218  {
219  bins.push_back(binSelector->bin(vcoord[obsId]));
220  }
221  splitter.groupBy(bins);
222 
223  for (size_t validObsIndex = 0; validObsIndex < validObsIds.size(); ++validObsIndex) {
224  const size_t obsId = validObsIds[validObsIndex];
225  const float component = distanceCalculator.nonspatialDistanceComponent(
226  vcoord[obsId], binSelector->binCenter(bins[validObsIndex]),
227  binSelector->inverseBinWidth());
228  distancesToBinCenter[validObsIndex] = distanceCalculator.combineDistanceComponents(
229  distancesToBinCenter[validObsIndex], component);
230  }
231 }
232 
233 // -----------------------------------------------------------------------------
234 
235 std::unique_ptr<EquispacedBinSelectorBase> Gaussian_Thinning::makeVerticalBinSelector(
236  const GaussianThinningParameters &options) {
237  if (options.verticalMesh <= 0)
238  return nullptr;
239 
240  if (options.opsCompatibilityMode) {
241  return std::make_unique<RoundingEquispacedBinSelector>(
242  options.verticalMesh, options.verticalMin + options.verticalMesh / 2);
243  } else {
244  const int numVerticalBins = std::max(
245  1,
246  static_cast<int>(std::ceil((options.verticalMax - options.verticalMin) /
247  options.verticalMesh)));
248  // Adjust verticalMax upwards to make the range of vertical coordinates
249  // evenly divisible into bins of width verticalMesh.
250  const float adjustedVerticalMax = options.verticalMin + numVerticalBins * options.verticalMesh;
251 
252  oops::Log::debug() << "Gaussian_Thinning: number of vertical bins = "
253  << numVerticalBins << std::endl;
254  return std::make_unique<TruncatingEquispacedBinSelector>(
255  options.verticalMin, adjustedVerticalMax, numVerticalBins);
256  }
257 }
258 
259 // -----------------------------------------------------------------------------
260 
262  const std::vector<size_t> &validObsIds,
263  const DistanceCalculator &distanceCalculator,
264  const ObsAccessor &obsAccessor,
265  RecursiveSplitter &splitter,
266  std::vector<float> &distancesToBinCenter) const {
267  util::DateTime timeOffset;
268  std::unique_ptr<EquispacedBinSelectorBase> binSelector =
269  makeTimeBinSelector(options_, obsdb_.windowStart(), obsdb_.windowEnd(), timeOffset);
270  if (!binSelector)
271  return;
272 
273  if (binSelector->numBins() != boost::none)
274  oops::Log::debug() << "Gaussian_Thinning: number of time bins = "
275  << *binSelector->numBins() << std::endl;
276 
277  std::vector<util::DateTime> times = obsAccessor.getDateTimeVariableFromObsSpace(
278  "MetaData", "datetime");
279 
280  std::vector<int> bins;
281  bins.reserve(validObsIds.size());
282  for (size_t obsId : validObsIds)
283  {
284  bins.push_back(binSelector->bin((times[obsId] - timeOffset).toSeconds()));
285  }
286  splitter.groupBy(bins);
287 
288  for (size_t validObsIndex = 0; validObsIndex < validObsIds.size(); ++validObsIndex) {
289  const size_t obsId = validObsIds[validObsIndex];
290  const float component = distanceCalculator.nonspatialDistanceComponent(
291  (times[obsId] - timeOffset).toSeconds(),
292  binSelector->binCenter(bins[validObsIndex]),
293  binSelector->inverseBinWidth());
294  distancesToBinCenter[validObsIndex] = distanceCalculator.combineDistanceComponents(
295  distancesToBinCenter[validObsIndex], component);
296  }
297 }
298 
299 // -----------------------------------------------------------------------------
300 
301 std::unique_ptr<EquispacedBinSelectorBase> Gaussian_Thinning::makeTimeBinSelector(
302  const GaussianThinningParameters &options,
303  const util::DateTime &windowStart,
304  const util::DateTime &windowEnd,
305  util::DateTime &timeOffset) {
306  if (options.timeMesh.value() == boost::none ||
307  options.timeMin.value() == boost::none ||
308  options.timeMax.value() == boost::none)
309  return nullptr;
310 
311  const util::Duration timeMesh = options.timeMesh.value().get();
312  if (timeMesh.toSeconds() == 0)
313  return nullptr;
314 
315  const util::DateTime timeMin = options.timeMin.value().get();
316  const util::DateTime timeMax = options.timeMax.value().get();
317 
318  oops::Log::debug() << "(timeMax - timeMin).toSeconds() = "
319  << ((timeMax - timeMin).toSeconds()) << std::endl;
320  oops::Log::debug() << "timeMesh.toSeconds() = " << timeMesh.toSeconds() << std::endl;
321 
322  timeOffset = timeMin;
323 
324  if (options.opsCompatibilityMode) {
325  // Put bin 0 at the center of the assimilation window.
326  const util::Duration windowLength = windowEnd - windowStart;
327  const int numFullBinsLeftOfWindowCenter = (windowLength / 2).toSeconds() / timeMesh.toSeconds();
328  const util::DateTime bin0Center = timeMin + numFullBinsLeftOfWindowCenter * timeMesh +
329  timeMesh / 2;
330  return std::make_unique<RoundingEquispacedBinSelector>(
331  timeMesh.toSeconds(), (bin0Center - timeOffset).toSeconds());
332  } else {
333  // The upper bound of the time interval is effectively adjusted upwards
334  // to make space for an integral number of bins of specified width.
335  const int numTimeBins = std::max(
336  1,
337  static_cast<int>(std::ceil((timeMax - timeMin).toSeconds() /
338  static_cast<float>(timeMesh.toSeconds()))));
339 
340  return std::make_unique<TruncatingEquispacedBinSelector>(
341  0.0f, numTimeBins * timeMesh.toSeconds(), numTimeBins);
342  }
343 }
344 
345 // -----------------------------------------------------------------------------
346 
348  const std::vector<size_t> &validObsIds,
349  const ObsAccessor &obsAccessor,
350  const RecursiveSplitter &splitter,
351  const std::vector<float> &distancesToBinCenter) const {
352 
353  std::function<bool(size_t, size_t)> comparator = makeObservationComparator(
354  validObsIds, distancesToBinCenter, obsAccessor);
355 
356  size_t totalNumObs = obsAccessor.totalNumObservations();
357 
358  std::vector<bool> isThinned(totalNumObs, false);
359  for (auto group : splitter.multiElementGroups()) {
360  const size_t bestValidObsIndex = *std::min_element(
361  std::begin(group), std::end(group), comparator);
362 
363  for (size_t validObsIndex : group)
364  if (validObsIndex != bestValidObsIndex)
365  isThinned[validObsIds[validObsIndex]] = true;
366  }
367 
368  return isThinned;
369 }
370 
371 // Should return true if the first observation is "better" than the second one.
372 std::function<bool(size_t, size_t)> Gaussian_Thinning::makeObservationComparator(
373  const std::vector<size_t> &validObsIds,
374  const std::vector<float> &distancesToBinCenter,
375  const ObsAccessor &obsAccessor) const
376 {
377  if (options_.priorityVariable.value() == boost::none) {
378  return [&distancesToBinCenter](size_t validObsIndexA, size_t validObsIndexB) {
379  return distancesToBinCenter[validObsIndexA] < distancesToBinCenter[validObsIndexB];
380  };
381  }
382 
383  const ufo::Variable priorityVariable = options_.priorityVariable.value().get();
384 
385  std::vector<int> priorities = obsAccessor.getIntVariableFromObsSpace(
386  priorityVariable.group(), priorityVariable.variable());
387 
388  // TODO(wsmigaj): In C++14, use move capture for 'priorities'.
389  return [priorities, &validObsIds, &distancesToBinCenter]
390  (size_t validObsIndexA, size_t validObsIndexB) {
391  // Prefer observations with large priorities and small distances
392  return std::make_pair(-priorities[validObsIds[validObsIndexA]],
393  distancesToBinCenter[validObsIndexA]) <
394  std::make_pair(-priorities[validObsIds[validObsIndexB]],
395  distancesToBinCenter[validObsIndexB]);
396  };
397 }
398 
399 // -----------------------------------------------------------------------------
400 
401 void Gaussian_Thinning::print(std::ostream & os) const {
402  os << "Gaussian_Thinning: config = " << options_ << std::endl;
403 }
404 
405 // -----------------------------------------------------------------------------
406 
407 } // namespace ufo
Calculates distances between observations and centres of bins used during thinning.
virtual float nonspatialDistanceComponent(float obs, float binCenter, float inverseBinWidth) const =0
virtual float spatialDistanceComponent(float obsLatitude, float obsLongitude, float latitudeBinCenter, float longitudeBinCenter, float inverseLatitudeBinWidth, float inverseLongitudeBinWidth) const =0
virtual float combineDistanceComponents(float componentA, float componentB) const =0
Base class for UFO QC filters.
Definition: FilterBase.h:45
void applyFilter(const std::vector< bool > &, const Variables &, std::vector< std::vector< bool >> &) const override
GaussianThinningParameters options_
static boost::optional< SpatialBinSelector > makeSpatialBinSelector(const GaussianThinningParameters &options)
void print(std::ostream &) const override
ObsAccessor createObsAccessor() const
void groupObservationsByTime(const std::vector< size_t > &validObsIds, const DistanceCalculator &distanceCalculator, const ObsAccessor &obsAccessor, RecursiveSplitter &splitter, std::vector< float > &distancesToBinCenter) const
void groupObservationsBySpatialLocation(const std::vector< size_t > &validObsIds, const DistanceCalculator &distanceCalculator, const ObsAccessor &obsAccessor, RecursiveSplitter &splitter, std::vector< float > &distancesToBinCenter) const
std::vector< bool > identifyThinnedObservations(const std::vector< size_t > &validObsIds, const ObsAccessor &obsAccessor, const RecursiveSplitter &splitter, const std::vector< float > &distancesToBinCenter) const
Gaussian_Thinning(ioda::ObsSpace &obsdb, const GaussianThinningParameters &params, std::shared_ptr< ioda::ObsDataVector< int > > flags, std::shared_ptr< ioda::ObsDataVector< float > > obserr)
static std::unique_ptr< EquispacedBinSelectorBase > makeTimeBinSelector(const GaussianThinningParameters &options, const util::DateTime &windowStart, const util::DateTime &windowEnd, util::DateTime &timeOffset)
void groupObservationsByVerticalCoordinate(const std::vector< size_t > &validObsIds, const DistanceCalculator &distanceCalculator, const ObsAccessor &obsAccessor, RecursiveSplitter &splitter, std::vector< float > &distancesToBinCenter) const
std::function< bool(size_t, size_t)> makeObservationComparator(const std::vector< size_t > &validObsIds, const std::vector< float > &distancesToBinCenter, const ObsAccessor &obsAccessor) const
static std::unique_ptr< EquispacedBinSelectorBase > makeVerticalBinSelector(const GaussianThinningParameters &options)
static std::unique_ptr< DistanceCalculator > makeDistanceCalculator(const GaussianThinningParameters &options)
Options controlling the operation of the Gaussian_Thinning filter.
oops::OptionalParameter< DistanceNorm > distanceNorm
oops::Parameter< std::string > verticalCoord
Observation vertical coordinate.
oops::OptionalParameter< Variable > priorityVariable
oops::OptionalParameter< util::DateTime > timeMin
oops::Parameter< bool > useReducedHorizontalGrid
oops::Parameter< bool > thinIfAnyFilterVariablesAreValid
oops::Parameter< float > verticalMin
Lower bound of the vertical coordinate interval split into cells of size vertical_mesh.
oops::OptionalParameter< Variable > categoryVariable
oops::OptionalParameter< util::Duration > timeMesh
oops::OptionalParameter< bool > roundHorizontalBinCountToNearest
oops::OptionalParameter< util::DateTime > timeMax
This class provides access to observations that may be held on multiple MPI ranks.
Definition: ObsAccessor.h:56
RecursiveSplitter splitObservationsIntoIndependentGroups(const std::vector< size_t > &validObsIds, bool opsCompatibilityMode=false) const
Definition: ObsAccessor.cc:197
std::vector< size_t > getValidObservationIds(const std::vector< bool > &apply, const ioda::ObsDataVector< int > &flags, const Variables &filtervars, bool validIfAnyFilterVariablePassedQC=true) const
Return the IDs of observation locations that should be treated as valid by a filter.
Definition: ObsAccessor.cc:99
std::vector< int > getIntVariableFromObsSpace(const std::string &group, const std::string &variable) const
Return the values of the specified variable at successive observation locations.
Definition: ObsAccessor.cc:139
static ObsAccessor toAllObservations(const ioda::ObsSpace &obsdb)
Create an accessor to observations from the observation space obsdb, assuming that the whole set of o...
Definition: ObsAccessor.cc:84
static ObsAccessor toObservationsSplitIntoIndependentGroupsByVariable(const ioda::ObsSpace &obsdb, const Variable &variable)
Create an accessor to the collection of observations held in obsdb, assuming that observations with d...
Definition: ObsAccessor.cc:94
std::vector< float > getFloatVariableFromObsSpace(const std::string &group, const std::string &variable) const
Definition: ObsAccessor.cc:144
std::vector< util::DateTime > getDateTimeVariableFromObsSpace(const std::string &group, const std::string &variable) const
Definition: ObsAccessor.cc:159
void flagRejectedObservations(const std::vector< bool > &isRejected, std::vector< std::vector< bool > > &flagged) const
Update flags of observations held on the current MPI rank.
Definition: ObsAccessor.cc:243
size_t totalNumObservations() const
Definition: ObsAccessor.cc:170
ioda::ObsSpace & obsdb_
std::shared_ptr< ioda::ObsDataVector< int > > flags_
Partitions an array into groups of elements equivalent according to certain criteria.
void groupBy(const std::vector< size_t > &categories)
Split existing equivalence classes according to a new criterion.
MultiElementGroupRange multiElementGroups() const
Return the range of equivalence classes consisting of more than one element.
Represents a partition of a sphere into a number of subsets (bins).
static IndexType roundNumBins(float idealNumBins, SpatialBinCountRoundingMode roundingMode)
Return idealNumBins rounded to a positive integer according to the rounding strategy roundingMode.
const std::string & variable() const
Definition: Variable.cc:99
const std::string & group() const
Definition: Variable.cc:116
Definition: RunCRTM.h:27
void metOfficeSort(RandomIt first, RandomIt last, const UnaryOperation &key)
Sort the range [first, last) in the order of ascending keys using the same algorithm as the Ops_Integ...
SpatialBinCountRoundingMode
static constexpr double mean_earth_rad
Definition: Constants.h:40