17 #include "eckit/config/Configuration.h"
18 #include "eckit/container/KDTree.h"
19 #include "ioda/ObsDataVector.h"
20 #include "ioda/ObsSpace.h"
21 #include "oops/base/Variables.h"
22 #include "oops/util/DateTime.h"
23 #include "oops/util/Duration.h"
24 #include "oops/util/IsAnyPointInVolumeInterior.h"
25 #include "oops/util/Logger.h"
38 template <
int numDims_>
42 static const int numDims = numDims_;
44 typedef std::array<CoordType, numDims>
Point;
45 typedef std::array<CoordType, numDims>
Extent;
53 int numSpatialDims)
const = 0;
56 const Extent &semiAxes)
const = 0;
60 template <
int numDims_>
69 static const int numDims = Base::numDims;
71 void insert(
const Point &point)
override;
73 bool isAnyPointInCylinderInterior(
const Point ¢er,
75 int numSpatialDims)
const override;
77 bool isAnyPointInEllipsoidInterior(
const Point ¢er,
78 const Extent &semiAxes)
const override;
84 typedef eckit::geometry::KPoint<numDims>
Point;
89 typedef typename KDTreeImpl::Alloc
Alloc;
90 typedef typename KDTreeImpl::Node
Node;
92 typedef typename KDTreeImpl::Value
Value;
97 template <
int numDims_>
103 template <
int numDims_>
105 const Point ¢er,
const Extent &semiAxes)
const {
107 for (
int d = 0; d < numDims; ++d) {
108 lbound.data()[d] = center[d] - semiAxes[d];
109 ubound.data()[d] = center[d] + semiAxes[d];
111 return util::isAnyPointInEllipsoidInterior(tree_, lbound, ubound);
114 template <
int numDims_>
116 const Point ¢er,
const Extent &semiAxes,
int numSpatialDims)
const {
118 for (
int d = 0; d < numDims; ++d) {
119 lbound.data()[d] = center[d] - semiAxes[d];
120 ubound.data()[d] = center[d] + semiAxes[d];
122 return util::isAnyPointInCylinderInterior(tree_, lbound, ubound, numSpatialDims);
137 boost::optional<std::vector<util::DateTime>>
times;
143 const eckit::Configuration & config,
160 std::vector<std::vector<bool>> & flagged)
const {
163 if (validObsIds.empty()) {
167 int numSpatialDims, numNonspatialDims;
170 std::vector<bool> isThinned(apply.size(),
false);
176 std::vector<size_t> obsIdsInCategory;
177 for (
size_t validObsIndex : categoryGroup) {
178 obsIdsInCategory.push_back(validObsIds[validObsIndex]);
186 if (
options_->randomSeed.value() != boost::none)
193 thinCategory(obsData, obsIdsInCategory, prioritySplitter, numSpatialDims, numNonspatialDims,
199 if (filtervars.
size() != 0) {
200 oops::Log::trace() <<
"PoissonDiskThinning: flagged? = " << flagged[0] << std::endl;
205 int &numNonspatialDims)
const
210 numNonspatialDims = 0;
239 obsdb_.get_db(
"MetaData",
"datetime", *obsData.
times);
245 const boost::optional<Variable> priorityVariable =
options_->priorityVariable;
246 if (priorityVariable != boost::none) {
248 obsdb_.get_db(priorityVariable.get().group(), priorityVariable.get().variable(),
256 template <
typename ValueType>
259 const std::string ¶meterName)
const {
260 if (spacingsByPriority.isScalar())
263 if (spacingsByPriority.begin() == spacingsByPriority.end())
264 throw eckit::BadParameter(parameterName +
" must be a scalar or a non-empty map");
268 ValueType prevSpacing = spacingsByPriority.begin()->second;
269 for (
const auto &priorityAndSpacing : spacingsByPriority) {
270 const ValueType &spacing = priorityAndSpacing.second;
271 if (spacing > prevSpacing)
272 throw eckit::BadParameter(parameterName +
273 ": exclusion volumes of lower-priority observations must be "
274 "at least as large as those of higher-priority ones.");
279 const std::vector<bool> & apply)
const {
280 std::vector<size_t> validObsIds;
281 for (
size_t obsId = 0; obsId < apply.size(); ++obsId)
283 validObsIds.push_back(obsId);
288 const std::vector<size_t> &validObsIds,
290 boost::optional<Variable> categoryVariable =
options_->categoryVariable;
291 if (categoryVariable == boost::none)
295 categoryVariable.get().group());
296 const ioda::ObsDataRow<int> &category = obsDataVector[0];
298 std::vector<int> validObsCategories(validObsIds.size());
299 for (
size_t validObsIndex = 0; validObsIndex < validObsIds.size(); ++validObsIndex)
300 validObsCategories[validObsIndex] = category[validObsIds[validObsIndex]];
301 splitter.
groupBy(validObsCategories);
305 const std::vector<size_t> &validObsIds,
307 boost::optional<Variable> priorityVariable =
options_->priorityVariable;
308 if (priorityVariable == boost::none)
312 priorityVariable.get().group());
313 const ioda::ObsDataRow<int> &priority = obsDataVector[0];
316 return -i - std::numeric_limits<int>::lowest() + std::numeric_limits<int>::max();
319 std::vector<int> validObsPriorities(validObsIds.size());
320 for (
size_t validObsIndex = 0; validObsIndex < validObsIds.size(); ++validObsIndex)
322 validObsPriorities[validObsIndex] =
reverse(priority[validObsIds[validObsIndex]]);
323 splitter.
groupBy(validObsPriorities);
327 const std::vector<size_t> &obsIdsInCategory,
330 int numNonspatialDims,
331 std::vector<bool> &isThinned)
const {
332 switch (numSpatialDims + numNonspatialDims) {
336 return thinCategory<1>(obsData, obsIdsInCategory, prioritySplitter, numSpatialDims, isThinned);
338 return thinCategory<2>(obsData, obsIdsInCategory, prioritySplitter, numSpatialDims, isThinned);
340 return thinCategory<3>(obsData, obsIdsInCategory, prioritySplitter, numSpatialDims, isThinned);
342 return thinCategory<4>(obsData, obsIdsInCategory, prioritySplitter, numSpatialDims, isThinned);
344 return thinCategory<5>(obsData, obsIdsInCategory, prioritySplitter, numSpatialDims, isThinned);
347 ABORT(
"Unexpected number of thinning dimensions");
350 template <
int numDims>
352 const std::vector<size_t> &obsIdsInCategory,
355 std::vector<bool> &isThinned)
const {
356 KDTree<numDims> pointIndex;
358 for (
auto priorityGroup : prioritySplitter.
groups()) {
359 for (
size_t obsIndex : priorityGroup) {
360 const size_t obsId = obsIdsInCategory[obsIndex];
361 std::array<float, numDims> point = getObservationPosition<numDims>(obsId, obsData);
362 std::array<float, numDims> semiAxes = getExclusionVolumeSemiAxes<numDims>(obsId, obsData);
364 pointIndex.isAnyPointInCylinderInterior(point, semiAxes, numSpatialDims)) ||
366 pointIndex.isAnyPointInEllipsoidInterior(point, semiAxes))) {
367 isThinned[obsId] =
true;
369 pointIndex.insert(point);
375 template <
int numDims>
377 size_t obsId,
const ObsData &obsData)
const {
378 std::array<float, numDims> position;
380 unsigned int dim = 0;
383 const float deg2rad =
static_cast<float>(M_PI / 180.0);
388 const float sinLat = std::sin(lat);
389 const float cosLat = std::cos(lat);
390 const float sinLon = std::sin(lon);
391 const float cosLon = std::cos(lon);
393 position[dim++] = earthRadius * cosLat * cosLon;
394 position[dim++] = earthRadius * cosLat * sinLon;
395 position[dim++] = earthRadius * sinLat;
399 position[dim++] = (*obsData.
pressures)[obsId];
405 position[dim++] = ((*obsData.
times)[obsId] - (*obsData.
times)[0]).toSeconds();
411 template <
int numDims>
413 size_t obsId,
const ObsData &obsData)
const {
415 std::array<float, numDims> semiAxes;
419 unsigned int dim = 0;
423 const float invEarthDiameter = 1 / earthDiameter;
426 const float minEuclideanDistance =
427 earthDiameter * std::sin(minGeodesicDistance * invEarthDiameter);
429 semiAxes[dim++] = minEuclideanDistance;
430 semiAxes[dim++] = minEuclideanDistance;
431 semiAxes[dim++] = minEuclideanDistance;
446 const std::vector<bool> & isThinned,
447 std::vector<std::vector<bool>> & flagged)
const {
448 for (std::vector<bool> & variableFlagged : flagged)
449 for (
size_t obsId = 0; obsId < isThinned.size(); ++obsId)
450 if (isThinned[obsId])
451 variableFlagged[obsId] =
true;
455 os <<
"PoissonDiskThinning: config = " <<
config_ << std::endl;