16 #include "eckit/config/Configuration.h"
17 #include "ioda/ObsDataVector.h"
18 #include "ioda/ObsSpace.h"
19 #include "oops/base/Variables.h"
20 #include "oops/util/DateTime.h"
21 #include "oops/util/Duration.h"
22 #include "oops/util/Logger.h"
23 #include "oops/util/missingValues.h"
44 std::vector<T>
allGatherv(
const eckit::mpi::Comm &comm,
const std::vector<T> &v) {
45 eckit::mpi::Buffer<T> buffer(comm.size());
46 comm.allGatherv(v.begin(), v.end(), buffer);
75 std::vector<std::vector<bool>> & flagged)
const {
81 std::vector<float> distancesToBinCenter(validObsIds.size(), 0.f);
86 splitter, distancesToBinCenter);
88 splitter, distancesToBinCenter);
90 splitter, distancesToBinCenter);
93 validObsIds, obsDistribution, splitter, distancesToBinCenter);
96 if (filtervars.
size() != 0) {
97 oops::Log::trace() <<
"Gaussian_Thinning: flagged? = " << flagged[0] << std::endl;
105 const size_t rank =
obsdb_.comm().rank();
107 std::vector<size_t> validObsIds;
108 for (
size_t obsId = 0; obsId < apply.size(); ++obsId)
110 validObsIds.push_back(obsIdDisplacement + obsId);
125 throw eckit::BadParameter(
"Unrecognized distance norm", Here());
131 const std::vector<size_t> &validObsIds,
135 std::vector<float> &distancesToBinCenter)
const {
137 if (binSelector == boost::none)
141 << binSelector->latitudeBinWidth() << std::endl;
143 << binSelector->totalNumBins() << std::endl;
145 const std::vector<float> lat = getGlobalVariableValues<float>(
146 obsdb_, obsDistribution,
"latitude",
"MetaData");
147 std::vector<float> lon = getGlobalVariableValues<float>(
148 obsdb_, obsDistribution,
"longitude",
"MetaData");
152 for (
float &longitude : lon)
156 std::vector<size_t> latBins;
157 std::vector<size_t> lonBins;
158 latBins.reserve(validObsIds.size());
159 lonBins.reserve(validObsIds.size());
160 for (
size_t obsId : validObsIds) {
161 const size_t latBin = binSelector->latitudeBin(lat[obsId]);
162 latBins.push_back(latBin);
163 lonBins.push_back(binSelector->longitudeBin(latBin, lon[obsId]));
168 oops::Log::debug() <<
"Gaussian_Thinning: latitudes = " << lat << std::endl;
169 oops::Log::debug() <<
"Gaussian_Thinning: longitudes = " << lon << std::endl;
170 oops::Log::debug() <<
"Gaussian_Thinning: lat bins = " << latBins << std::endl;
171 oops::Log::debug() <<
"Gaussian_Thinning: lon bins = " << lonBins << std::endl;
173 for (
size_t validObsIndex = 0; validObsIndex < validObsIds.size(); ++validObsIndex) {
174 const size_t obsId = validObsIds[validObsIndex];
176 lat[obsId], lon[obsId],
177 binSelector->latitudeBinCenter(latBins[validObsIndex]),
178 binSelector->longitudeBinCenter(latBins[validObsIndex], lonBins[validObsIndex]),
179 binSelector->inverseLatitudeBinWidth(),
180 binSelector->inverseLongitudeBinWidth(latBins[validObsIndex]));
182 distancesToBinCenter[validObsIndex], component);
193 oops::Log::debug() <<
"Gaussian_Thinning: requested horizontal bin size (km) = "
200 const float meridianLength = M_PI * earthRadius;
201 const float tentativeNumLatBins = meridianLength / options.
horizontalMesh;
209 const int equatorToMeridianLengthRatio = 2;
217 const std::vector<size_t> &validObsIds,
220 boost::optional<Variable> categoryVariable =
options_->categoryVariable;
221 if (categoryVariable == boost::none)
224 const std::vector<int> category = getGlobalVariableValues<int>(
225 obsdb_, obsDistribution, categoryVariable.get().variable(), categoryVariable.get().group());
227 std::vector<int> validObsCategories(validObsIds.size());
228 for (
size_t validObsIndex = 0; validObsIndex < validObsIds.size(); ++validObsIndex)
229 validObsCategories[validObsIndex] = category[validObsIds[validObsIndex]];
230 splitter.
groupBy(validObsCategories);
236 const std::vector<size_t> &validObsIds,
240 std::vector<float> &distancesToBinCenter)
const {
242 if (binSelector == boost::none)
246 << binSelector->numBins() << std::endl;
248 const std::vector<float> pres = getGlobalVariableValues<float>(
249 obsdb_, obsDistribution,
"air_pressure",
"MetaData");
251 std::vector<size_t> bins;
252 bins.reserve(validObsIds.size());
253 for (
size_t obsId : validObsIds)
255 bins.push_back(binSelector->bin(pres[obsId]));
259 oops::Log::debug() <<
"Gaussian_Thinning: pressures = " << pres << std::endl;
260 oops::Log::debug() <<
"Gaussian_Thinning: pressure bins = " << bins << std::endl;
262 for (
size_t validObsIndex = 0; validObsIndex < validObsIds.size(); ++validObsIndex) {
263 const size_t obsId = validObsIds[validObsIndex];
265 pres[obsId], binSelector->binCenter(bins[validObsIndex]),
266 binSelector->inverseBinWidth());
268 distancesToBinCenter[validObsIndex], component);
279 const int numVerticalBins = std::max(
288 << numVerticalBins << std::endl;
295 const std::vector<size_t> &validObsIds,
299 std::vector<float> &distancesToBinCenter)
const {
300 util::DateTime timeOffset;
302 if (binSelector == boost::none)
306 << binSelector->numBins() << std::endl;
308 const std::vector<util::DateTime> times = getGlobalVariableValues<util::DateTime>(
309 obsdb_, obsDistribution,
"datetime",
"MetaData");
311 std::vector<size_t> bins;
312 bins.reserve(validObsIds.size());
313 for (
size_t obsId : validObsIds)
315 bins.push_back(binSelector->bin((times[obsId] - timeOffset).toSeconds()));
322 oops::Log::debug() <<
"Gaussian_Thinning: time bins = " << bins << std::endl;
324 for (
size_t validObsIndex = 0; validObsIndex < validObsIds.size(); ++validObsIndex) {
325 const size_t obsId = validObsIds[validObsIndex];
327 (times[obsId] - timeOffset).toSeconds(),
328 binSelector->binCenter(bins[validObsIndex]),
329 binSelector->inverseBinWidth());
331 distancesToBinCenter[validObsIndex], component);
339 if (options.
timeMesh.value() == boost::none ||
340 options.
timeMin.value() == boost::none ||
341 options.
timeMax.value() == boost::none)
344 const util::Duration timeMesh = options.
timeMesh.value().get();
345 if (timeMesh.toSeconds() == 0)
348 const util::DateTime timeMin = options.
timeMin.value().get();
349 const util::DateTime timeMax = options.
timeMax.value().get();
352 << ((timeMax - timeMin).toSeconds()) << std::endl;
353 oops::Log::debug() <<
"timeMesh.toSeconds() = " << timeMesh.toSeconds() << std::endl;
355 const int numTimeBins = std::max(
357 static_cast<int>(std::ceil((timeMax - timeMin).toSeconds() /
358 static_cast<float>(timeMesh.toSeconds()))));
363 timeOffset = timeMin;
370 const std::vector<size_t> &validObsIds,
373 const std::vector<float> &distancesToBinCenter)
const {
376 validObsIds, distancesToBinCenter, obsDistribution);
378 std::vector<bool> isThinned(obsDistribution.
globalObsCount(),
false);
380 const size_t bestValidObsIndex = *std::min_element(
381 std::begin(group), std::end(group), comparator);
383 for (
size_t validObsIndex : group)
384 if (validObsIndex != bestValidObsIndex)
385 isThinned[validObsIds[validObsIndex]] =
true;
393 const std::vector<size_t> &validObsIds,
394 const std::vector<float> &distancesToBinCenter,
397 if (
options_->priorityVariable.value() == boost::none) {
399 return [&distancesToBinCenter](
size_t validObsIndexA,
size_t validObsIndexB) {
400 return distancesToBinCenter[validObsIndexA] < distancesToBinCenter[validObsIndexB];
406 const std::vector<int> priorities = getGlobalVariableValues<int>(
412 return [priorities, &validObsIds, &distancesToBinCenter]
413 (
size_t validObsIndexA,
size_t validObsIndexB) {
415 return std::make_pair(-priorities[validObsIds[validObsIndexA]],
416 distancesToBinCenter[validObsIndexA]) <
417 std::make_pair(-priorities[validObsIds[validObsIndexB]],
418 distancesToBinCenter[validObsIndexB]);
425 const std::vector<bool> & isThinned,
427 std::vector<std::vector<bool>> & flagged)
const {
428 const size_t rank =
obsdb_.comm().rank();
430 for (std::vector<bool> & variableFlagged : flagged) {
431 ASSERT(obsDistribution.
localObsCounts()[rank] == variableFlagged.size());
432 for (
size_t localObsId = 0; localObsId < variableFlagged.size(); ++localObsId) {
433 const size_t globalObsId = displacement + localObsId;
434 if (isThinned[globalObsId])
435 variableFlagged[localObsId] =
true;
443 os <<
"Gaussian_Thinning: config = " <<
config_ << std::endl;