17 #include "eckit/config/Configuration.h"
18 #include "eckit/container/KDTree.h"
19 #include "ioda/ObsDataVector.h"
20 #include "ioda/ObsSpace.h"
21 #include "oops/base/Variables.h"
22 #include "oops/util/DateTime.h"
23 #include "oops/util/Duration.h"
24 #include "oops/util/IsAnyPointInVolumeInterior.h"
25 #include "oops/util/Logger.h"
39 template <
int numDims_>
43 static const int numDims = numDims_;
45 typedef std::array<CoordType, numDims>
Point;
46 typedef std::array<CoordType, numDims>
Extent;
54 int numSpatialDims)
const = 0;
57 const Extent &semiAxes)
const = 0;
61 template <
int numDims_>
70 static const int numDims = Base::numDims;
72 void insert(
const Point &point)
override;
74 bool isAnyPointInCylinderInterior(
const Point ¢er,
76 int numSpatialDims)
const override;
78 bool isAnyPointInEllipsoidInterior(
const Point ¢er,
79 const Extent &semiAxes)
const override;
85 typedef eckit::geometry::KPoint<numDims>
Point;
90 typedef typename KDTreeImpl::Alloc
Alloc;
91 typedef typename KDTreeImpl::Node
Node;
93 typedef typename KDTreeImpl::Value
Value;
98 template <
int numDims_>
100 const Point &point) {
104 template <
int numDims_>
106 const Point ¢er,
const Extent &semiAxes)
const {
108 for (
int d = 0; d < numDims; ++d) {
109 lbound.data()[d] = center[d] - semiAxes[d];
110 ubound.data()[d] = center[d] + semiAxes[d];
112 return util::isAnyPointInEllipsoidInterior(tree_, lbound, ubound);
115 template <
int numDims_>
117 const Point ¢er,
const Extent &semiAxes,
int numSpatialDims)
const {
119 for (
int d = 0; d < numDims; ++d) {
120 lbound.data()[d] = center[d] - semiAxes[d];
121 ubound.data()[d] = center[d] + semiAxes[d];
123 return util::isAnyPointInCylinderInterior(tree_, lbound, ubound, numSpatialDims);
138 boost::optional<std::vector<util::DateTime>>
times;
150 :
FilterBase(obsdb, parameters, flags, obserr), options_(parameters)
161 std::vector<std::vector<bool>> & flagged)
const {
166 int numSpatialDims, numNonspatialDims;
169 std::vector<bool> isThinned(obsData.
totalNumObs,
false);
193 std::vector<size_t> obsIdsInCategory;
194 for (
size_t validObsIndex : categoryGroup) {
195 obsIdsInCategory.push_back(validObsIds[validObsIndex]);
206 thinCategory(obsData, obsIdsInCategory, prioritySplitter, numSpatialDims, numNonspatialDims,
223 const ObsAccessor &obsAccessor,
int &numSpatialDims,
int &numNonspatialDims)
const
228 numNonspatialDims = 0;
253 if (priorityVariable != boost::none) {
255 priorityVariable.get().group(), priorityVariable.get().variable());
263 template <
typename ValueType>
266 const std::string ¶meterName)
const {
267 if (spacingsByPriority.isScalar())
270 if (spacingsByPriority.begin() == spacingsByPriority.end())
271 throw eckit::BadParameter(parameterName +
" must be a scalar or a non-empty map");
275 ValueType prevSpacing = spacingsByPriority.begin()->second;
276 for (
const auto &priorityAndSpacing : spacingsByPriority) {
277 const ValueType &spacing = priorityAndSpacing.second;
278 if (spacing > prevSpacing)
279 throw eckit::BadParameter(parameterName +
280 ": exclusion volumes of lower-priority observations must be "
281 "at least as large as those of higher-priority ones.");
286 const std::vector<bool> & apply,
295 const std::vector<size_t> recordIds = obsAccessor.
getRecordIds();
296 std::stable_sort(validObsIds.begin(), validObsIds.end(),
297 [&recordIds](
size_t obsIdA,
size_t obsIdB)
298 { return recordIds[obsIdA] < recordIds[obsIdB]; });
306 const size_t rootRank = 0;
312 if (comm.rank() == rootRank)
314 seed =
static_cast<std::uint32_t
>(std::time(
nullptr));
315 comm.broadcast(seed, rootRank);
323 const std::vector<size_t> &validObsIds,
327 if (priorityVariable == boost::none)
332 priorityVariable.get().group(), priorityVariable.get().variable());
335 return -i - std::numeric_limits<int>::lowest() + std::numeric_limits<int>::max();
338 std::vector<int> validObsPriorities(validObsIds.size());
339 for (
size_t validObsIndex = 0; validObsIndex < validObsIds.size(); ++validObsIndex)
341 validObsPriorities[validObsIndex] =
reverse(priority[validObsIds[validObsIndex]]);
342 splitter.
groupBy(validObsPriorities);
346 const std::vector<size_t> &obsIdsInCategory,
349 int numNonspatialDims,
350 std::vector<bool> &isThinned)
const {
351 switch (numSpatialDims + numNonspatialDims) {
355 return thinCategory<1>(obsData, obsIdsInCategory, prioritySplitter, numSpatialDims, isThinned);
357 return thinCategory<2>(obsData, obsIdsInCategory, prioritySplitter, numSpatialDims, isThinned);
359 return thinCategory<3>(obsData, obsIdsInCategory, prioritySplitter, numSpatialDims, isThinned);
361 return thinCategory<4>(obsData, obsIdsInCategory, prioritySplitter, numSpatialDims, isThinned);
363 return thinCategory<5>(obsData, obsIdsInCategory, prioritySplitter, numSpatialDims, isThinned);
366 ABORT(
"Unexpected number of thinning dimensions");
369 template <
int numDims>
371 const std::vector<size_t> &obsIdsInCategory,
374 std::vector<bool> &isThinned)
const {
375 KDTree<numDims> pointIndex;
377 for (
auto priorityGroup : prioritySplitter.
groups()) {
378 for (
size_t obsIndex : priorityGroup) {
379 const size_t obsId = obsIdsInCategory[obsIndex];
380 std::array<float, numDims> point = getObservationPosition<numDims>(obsId, obsData);
381 std::array<float, numDims> semiAxes = getExclusionVolumeSemiAxes<numDims>(obsId, obsData);
383 pointIndex.isAnyPointInCylinderInterior(point, semiAxes, numSpatialDims)) ||
385 pointIndex.isAnyPointInEllipsoidInterior(point, semiAxes))) {
386 isThinned[obsId] =
true;
388 pointIndex.insert(point);
394 template <
int numDims>
396 size_t obsId,
const ObsData &obsData)
const {
397 std::array<float, numDims> position;
399 unsigned int dim = 0;
402 const float deg2rad =
static_cast<float>(M_PI / 180.0);
407 const float sinLat = std::sin(lat);
408 const float cosLat = std::cos(lat);
409 const float sinLon = std::sin(lon);
410 const float cosLon = std::cos(lon);
412 position[dim++] = earthRadius * cosLat * cosLon;
413 position[dim++] = earthRadius * cosLat * sinLon;
414 position[dim++] = earthRadius * sinLat;
418 position[dim++] = (*obsData.
pressures)[obsId];
424 position[dim++] = ((*obsData.
times)[obsId] - (*obsData.
times)[0]).toSeconds();
430 template <
int numDims>
432 size_t obsId,
const ObsData &obsData)
const {
434 std::array<float, numDims> semiAxes;
438 unsigned int dim = 0;
442 const float invEarthDiameter = 1 / earthDiameter;
445 const float minEuclideanDistance =
446 earthDiameter * std::sin(minGeodesicDistance * invEarthDiameter);
448 semiAxes[dim++] = minEuclideanDistance;
449 semiAxes[dim++] = minEuclideanDistance;
450 semiAxes[dim++] = minEuclideanDistance;
465 os <<
"PoissonDiskThinning: config = " <<
options_ << std::endl;
Base class for UFO QC filters.
This class provides access to observations that may be held on multiple MPI ranks.
RecursiveSplitter splitObservationsIntoIndependentGroups(const std::vector< size_t > &validObsIds, bool opsCompatibilityMode=false) const
std::vector< size_t > getValidObservationIds(const std::vector< bool > &apply, const ioda::ObsDataVector< int > &flags, const Variables &filtervars, bool validIfAnyFilterVariablePassedQC=true) const
Return the IDs of observation locations that should be treated as valid by a filter.
std::vector< int > getIntVariableFromObsSpace(const std::string &group, const std::string &variable) const
Return the values of the specified variable at successive observation locations.
static ObsAccessor toAllObservations(const ioda::ObsSpace &obsdb)
Create an accessor to observations from the observation space obsdb, assuming that the whole set of o...
std::vector< size_t > getRecordIds() const
Return the vector of IDs of records successive observation locations belong to.
static ObsAccessor toObservationsSplitIntoIndependentGroupsByVariable(const ioda::ObsSpace &obsdb, const Variable &variable)
Create an accessor to the collection of observations held in obsdb, assuming that observations with d...
std::vector< float > getFloatVariableFromObsSpace(const std::string &group, const std::string &variable) const
std::vector< util::DateTime > getDateTimeVariableFromObsSpace(const std::string &group, const std::string &variable) const
void flagRejectedObservations(const std::vector< bool > &isRejected, std::vector< std::vector< bool > > &flagged) const
Update flags of observations held on the current MPI rank.
size_t totalNumObservations() const
std::shared_ptr< ioda::ObsDataVector< int > > flags_
~PoissonDiskThinning() override
void thinCategory(const ObsData &obsData, const std::vector< size_t > &obsIdsInCategory, const RecursiveSplitter &prioritySplitter, int numSpatialDims, int numNonspatialDims, std::vector< bool > &isThinned) const
std::array< float, numDims > getObservationPosition(size_t obsId, const ObsData &obsData) const
void print(std::ostream &) const override
void applyFilter(const std::vector< bool > &, const Variables &, std::vector< std::vector< bool >> &) const override
void synchroniseRandomNumberGenerators(const eckit::mpi::Comm &comm) const
ObsAccessor createObsAccessor() const
PoissonDiskThinning(ioda::ObsSpace &obsdb, const Parameters_ ¶meters, std::shared_ptr< ioda::ObsDataVector< int > > flags, std::shared_ptr< ioda::ObsDataVector< float > > obserr)
std::vector< size_t > getValidObservationIds(const std::vector< bool > &apply, const Variables &filtervars, const ObsAccessor &obsAccessor) const
void groupObservationsByPriority(const std::vector< size_t > &validObsIds, const ObsAccessor &obsAccessor, RecursiveSplitter &splitter) const
std::array< float, numDims > getExclusionVolumeSemiAxes(size_t obsId, const ObsData &obsData) const
ObsData getObsData(const ObsAccessor &obsAccessor, int &numSpatialDims, int &numNonspatialDims) const
Collect all observation data components used for thinning.
void validateSpacings(const util::ScalarOrMap< int, ValueType > &spacingsByPriority, const std::string ¶meterName) const
Options controlling the operation of the PoissonDiskThinning filter.
oops::OptionalParameter< util::ScalarOrMap< Priority, util::Duration > > minTimeSpacing
oops::Parameter< bool > shuffle
oops::OptionalParameter< Variable > categoryVariable
oops::OptionalParameter< util::ScalarOrMap< Priority, float > > minVerticalSpacing
oops::OptionalParameter< Variable > priorityVariable
oops::OptionalParameter< util::ScalarOrMap< Priority, float > > minHorizontalSpacing
oops::Parameter< ExclusionVolumeShape > exclusionVolumeShape
oops::OptionalParameter< int > randomSeed
Partitions an array into groups of elements equivalent according to certain criteria.
GroupRange groups() const
Return the range of equivalence classes.
void groupBy(const std::vector< size_t > &categories)
Split existing equivalence classes according to a new criterion.
MultiElementGroupRange multiElementGroups() const
Return the range of equivalence classes consisting of more than one element.
void shuffleGroups()
Randomly shuffle the elements of each equivalence class.
void setSeed(unsigned int seed, bool force)
Initialise the random number generator used by shuffleGroups() with a seed.
An implementation of PointIndex storing the point set in a kd-tree.
PointIndex< numDims_ > Base
Base::CoordType CoordType
void insert(const Point &point) override
eckit::KDTreeMemory< TreeTraits > KDTreeImpl
Abstract interface of a container storing point sets and able to answer spatial queries needed by Poi...
std::array< CoordType, numDims > Extent
virtual void insert(const Point &point)=0
virtual bool isAnyPointInEllipsoidInterior(const Point ¢er, const Extent &semiAxes) const =0
std::array< CoordType, numDims > Point
virtual bool isAnyPointInCylinderInterior(const Point ¢er, const Extent &semiAxes, int numSpatialDims) const =0
std::array< float, 3 > Point
ObsPair reverse(const ObsPair &pair)
real(kind_real), parameter, public deg2rad
static constexpr double mean_earth_rad
boost::optional< std::vector< float > > longitudes
boost::optional< util::ScalarOrMap< int, float > > minVerticalSpacings
boost::optional< std::vector< int > > priorities
boost::optional< util::ScalarOrMap< int, float > > minHorizontalSpacings
boost::optional< std::vector< float > > pressures
boost::optional< util::ScalarOrMap< int, util::Duration > > minTimeSpacings
boost::optional< std::vector< float > > latitudes
boost::optional< std::vector< util::DateTime > > times
eckit::geometry::KPoint< numDims > Point