19 #include <boost/format.hpp>
21 #include "eckit/config/Configuration.h"
22 #include "ioda/ObsDataVector.h"
23 #include "ioda/ObsSpace.h"
24 #include "oops/base/Variables.h"
25 #include "oops/util/DateTime.h"
26 #include "oops/util/Duration.h"
27 #include "oops/util/Logger.h"
28 #include "oops/util/sqr.h"
43 if (var.
group().empty())
52 std::vector<T> result(v);
53 std::sort(result.begin(), result.end());
54 const auto newEnd = std::unique(result.begin(), result.end());
55 result.erase(newEnd, result.end());
63 std::map<T, int> intByValue;
66 for (
const T& value : uniqueValues)
67 intByValue[value] = index++;
70 std::vector<int> ints;
71 ints.reserve(values.size());
72 std::transform(values.begin(), values.end(), std::back_inserter(ints),
73 [&](
const T& value) { return intByValue.at(value); });
78 const std::vector<int> *varFlags =
nullptr;
79 const std::vector<float> *obsValues =
nullptr;
80 const std::vector<float> *obsBiases =
nullptr;
81 const std::vector<float> *obsErrors =
nullptr;
101 :
FilterBase(obsdb, config, std::move(flags), std::move(obserr))
117 std::vector<std::vector<bool>> & flagged)
const {
128 const oops::Variables &observedVars =
obsdb_.obsvariables();
133 auto getFilterVariableName = [&] (
size_t filterVarIndex) {
137 auto getScalarSingleLevelVariableData = [&] (
size_t filterVarIndex) {
138 const size_t observedVarIndex = observedVars.find(getFilterVariableName(filterVarIndex));
140 ScalarSingleLevelVariableData data;
141 data.varFlags = &(*flags_)[observedVarIndex];
142 data.obsValues = &obsValues[filterVarIndex];
143 data.obsBiases = &obsBiases[filterVarIndex];
144 data.obsErrors = &(*obserr_)[observedVarIndex];
150 data.grossErrorProbabilities);
156 const std::vector<float> *pressures =
161 const std::vector<MetOfficeBuddyPair> buddyPairs = buddyPairFinder.
findBuddyPairs(validObsIds);
167 std::map<std::string, std::vector<float>> calculatedGrossErrProbsByVarName;
169 bool previousVariableWasFirstComponentOfTwo =
false;
170 for (
size_t filterVarIndex = 0; filterVarIndex < filtervars.
size(); ++filterVarIndex) {
171 if (previousVariableWasFirstComponentOfTwo) {
173 ScalarSingleLevelVariableData firstComponentData =
174 getScalarSingleLevelVariableData(filterVarIndex - 1);
175 ScalarSingleLevelVariableData secondComponentData =
176 getScalarSingleLevelVariableData(filterVarIndex);
178 for (
size_t i = 0; i < firstComponentData.grossErrorProbabilities.size(); ++i)
179 firstComponentData.grossErrorProbabilities[i] =
180 std::max(firstComponentData.grossErrorProbabilities[i],
181 secondComponentData.grossErrorProbabilities[i]);
184 *firstComponentData.varFlags,
185 verbose, bgErrorHorizCorrScales,
187 *firstComponentData.obsValues, *firstComponentData.obsBiases,
188 *secondComponentData.obsValues, *secondComponentData.obsBiases,
189 *firstComponentData.obsErrors,
190 firstComponentData.bgValues, secondComponentData.bgValues,
191 firstComponentData.bgErrors,
192 firstComponentData.grossErrorProbabilities);
198 calculatedGrossErrProbsByVarName[getFilterVariableName(filterVarIndex - 1)] =
199 firstComponentData.grossErrorProbabilities;
200 calculatedGrossErrProbsByVarName[getFilterVariableName(filterVarIndex)] =
201 std::move(firstComponentData.grossErrorProbabilities);
203 previousVariableWasFirstComponentOfTwo =
false;
205 if (filtervars[filterVarIndex].options().getBool(
"first_component_of_two",
false)) {
206 previousVariableWasFirstComponentOfTwo =
true;
209 ScalarSingleLevelVariableData data =
210 getScalarSingleLevelVariableData(filterVarIndex);
212 *data.varFlags, verbose, bgErrorHorizCorrScales,
214 *data.obsValues, *data.obsBiases, *data.obsErrors,
215 data.bgValues, data.bgErrors,
216 data.grossErrorProbabilities);
217 calculatedGrossErrProbsByVarName[getFilterVariableName(filterVarIndex)] =
218 std::move(data.grossErrorProbabilities);
225 for (
const auto &varNameAndGrossErrProbs : calculatedGrossErrProbsByVarName) {
226 obsdb_.put_db(
"GrossErrorProbability", varNameAndGrossErrProbs.first,
227 varNameAndGrossErrProbs.second);
232 if (filtervars.
size() != 0) {
233 oops::Log::trace() <<
"MetOfficeBuddyCheck: flagged? = " << flagged[0] << std::endl;
249 if (
obsdb_.has(
"MetaData",
"air_pressure")) {
260 const boost::optional<Variable> &stationIdVariable =
options_->stationIdVariable.value();
261 if (stationIdVariable == boost::none) {
262 if (
obsdb_.obs_group_var().empty()) {
265 return std::vector<int>(
obsdb_.nlocs(), 0);
267 const std::vector<size_t> &recordNumbers =
obsdb_.recnum();
268 return std::vector<int>(recordNumbers.begin(), recordNumbers.end());
271 switch (
obsdb_.dtype(stationIdVariable->group(), stationIdVariable->variable())) {
272 case ioda::ObsDtype::Integer:
274 std::vector<int> stationIds(
obsdb_.nlocs());
275 obsdb_.get_db(stationIdVariable->group(), stationIdVariable->variable(), stationIds);
279 case ioda::ObsDtype::String:
281 std::vector<std::string> stringIds(
obsdb_.nlocs());
282 obsdb_.get_db(stationIdVariable->group(), stationIdVariable->variable(), stringIds);
287 throw eckit::UserError(
"Only integer and string variables may be used as station IDs",
294 const std::vector<size_t> &validObsIds,
const std::vector<float> &latitudes)
const {
296 std::vector<double> abscissas, ordinates;
297 for (
const std::pair<const float, float> &xy :
298 options_->horizontalCorrelationScaleInterpolationPoints.value()) {
299 abscissas.push_back(xy.first);
300 ordinates.push_back(xy.second);
303 std::move(ordinates));
305 std::vector<float> scales(latitudes.size());
306 for (
size_t obsId : validObsIds) {
307 scales[obsId] = horizontalCorrelationScaleInterp(latitudes[obsId]);
314 const std::vector<size_t> &validObsIds,
315 const std::vector<float> &latitudes,
316 const std::vector<float> &longitudes,
317 const std::vector<util::DateTime> &datetimes,
318 const std::vector<float> *pressures,
319 const std::vector<int> &stationIds,
320 const std::vector<float> &bgErrorHorizCorrScales)
const {
322 size_t numVerboseObs = 0;
323 std::vector<bool> verbose(latitudes.size(),
false);
325 for (
size_t obsId : validObsIds) {
326 verbose[obsId] = std::any_of(
327 options_->tracedBoxes.value().begin(),
328 options_->tracedBoxes.value().end(),
330 { return box.contains(latitudes[obsId], longitudes[obsId]); });
332 if (verbose[obsId]) {
333 if (numVerboseObs == 0) {
334 oops::Log::trace() <<
"Obs StationId Lat Lon Pressure "
335 "Datetime CorrScale\n";
339 const float pressure = pressures !=
nullptr ? (*pressures)[obsId] : 0;
340 oops::Log::trace() << boost::format(
"%5d %9d %7.2f %7.2f %8.0f %s %9.1f\n") %
341 obsId % stationIds[obsId] % latitudes[obsId] % longitudes[obsId] %
342 pressure % datetimes[obsId] % bgErrorHorizCorrScales[obsId];
346 oops::Log::trace() <<
"Buddy check: " << numVerboseObs <<
" verbose observations" << std::endl;
352 const std::vector<int> &flags,
353 const std::vector<bool> &verbose,
354 const std::vector<float> &bgErrorHorizCorrScales,
355 const std::vector<int> &stationIds,
356 const std::vector<util::DateTime> &datetimes,
357 const std::vector<float> &obsValues,
358 const std::vector<float> &obsBiases,
359 const std::vector<float> &obsErrors,
360 const std::vector<float> &bgValues,
361 const std::vector<float> &bgErrors,
362 std::vector<float> &pges)
const {
365 const bool isMaster =
obsdb_.comm().rank() == 0;
367 oops::Log::trace() << __func__ <<
" "
368 <<
" dampingFactor1 = " <<
options_->dampingFactor1
369 <<
", dampingFactor2 = " <<
options_->dampingFactor2 <<
'\n';
370 oops::Log::trace() <<
"ObsA ObsB StatIdA StatIdB DiffA DiffB "
371 "Dist Corr Agree PgeA PgeB Mult\n";
374 const double invTemporalCorrScale = 1.0 /
options_->temporalCorrelationScale.value().toSeconds();
377 const size_t jA = pair.obsIdA;
378 const size_t jB = pair.obsIdB;
386 const double hcScale = 0.5 * (bgErrorHorizCorrScales[jA] + bgErrorHorizCorrScales[jB]);
387 const double scaledDist = pair.distanceInKm / hcScale;
392 const double corr = (1.0 + scaledDist) *
393 std::exp(-scaledDist - sqr((datetimes[jA] - datetimes[jB]).toSeconds() *
394 invTemporalCorrScale));
400 double diffA = obsValues[jA] + obsBiases[jA] - bgValues[jA];
401 double diffB = obsValues[jB] + obsBiases[jB] - bgValues[jB];
403 double errVarA = sqr(obsErrors[jA]) + sqr(bgErrors[jA]);
404 double errVarB = sqr(obsErrors[jB]) + sqr(bgErrors[jB]);
406 double covar = corr * bgErrors[jA] * bgErrors[jB];
408 double rho2 = sqr(covar) / (errVarA * errVarB);
410 double expArg = -(0.5 * rho2 / (1.0 - rho2)) *
411 (sqr(diffA) / errVarA + sqr(diffB) / errVarB - 2.0 * diffA * diffB / covar);
412 expArg =
options_->dampingFactor1 * (-0.5 * std::log(1.0 - rho2) + expArg);
415 double z = 1.0 / (1.0 - (1.0 - pges[jA]) * (1.0 - pges[jB]) * (1.0 - std::exp(expArg)));
418 z = std::pow(z,
options_->dampingFactor2);
421 if (isMaster && (verbose[jA] || verbose[jB])) {
422 oops::Log::trace() << boost::format(
"%5d %5d %8d %8d "
424 "%5.3f %6.3f %6.3f %6.3f %6.3f\n") %
425 jA % jB % stationIds[jA] % stationIds[jB] %
426 diffA % diffB % pair.distanceInKm %
427 corr % std::exp(expArg) % pges[jA] % pges[jB] % z;
433 const std::vector<int> &flags,
434 const std::vector<bool> &verbose,
435 const std::vector<float> &bgErrorHorizCorrScales,
436 const std::vector<int> &stationIds,
437 const std::vector<util::DateTime> &datetimes,
438 const std::vector<float> &uObsValues,
439 const std::vector<float> &uObsBiases,
440 const std::vector<float> &vObsValues,
441 const std::vector<float> &vObsBiases,
442 const std::vector<float> &obsErrors,
443 const std::vector<float> &uBgValues,
444 const std::vector<float> &vBgValues,
445 const std::vector<float> &bgErrors,
446 std::vector<float> &pges)
const {
449 const bool isMaster =
obsdb_.comm().rank() == 0;
451 oops::Log::trace() << __func__ <<
" "
452 <<
" dampingFactor1 = " <<
options_->dampingFactor1
453 <<
", dampingFactor2 = " <<
options_->dampingFactor2 <<
'\n';
454 oops::Log::trace() <<
"ObsA ObsB StatIdA StatIdB LDiffA LDiffB TDiffA TDiffB "
455 "Dist Corr Agree PgeA PgeB Mult\n";
458 const double invTemporalCorrScale = 1.0 /
options_->temporalCorrelationScale.value().toSeconds();
461 const size_t jA = pair.obsIdA;
462 const size_t jB = pair.obsIdB;
470 double horizCorrScale = 0.5 * (bgErrorHorizCorrScales[jA] + bgErrorHorizCorrScales[jB]);
471 double scaleDist = pair.distanceInKm / horizCorrScale;
475 const double lCorr = std::exp(-scaleDist - sqr((datetimes[jA] - datetimes[jB]).toSeconds() *
476 invTemporalCorrScale));
478 if ((1.0 + scaleDist) * lCorr < 0.1)
482 double sinRot = std::sin(pair.rotationAInRad);
483 double cosRot = std::cos(pair.rotationAInRad);
485 double lDiffA = cosRot * (uObsValues[jA] + uObsBiases[jA] - uBgValues[jA])
486 + sinRot * (vObsValues[jA] + vObsBiases[jA] - vBgValues[jA]);
488 double tDiffA = - sinRot * (uObsValues[jA] + uObsBiases[jA] - uBgValues[jA])
489 + cosRot * (vObsValues[jA] + vObsBiases[jA] - vBgValues[jA]);
490 sinRot = std::sin(pair.rotationBInRad);
491 cosRot = std::cos(pair.rotationBInRad);
493 double lDiffB = cosRot * (uObsValues[jB] + uObsBiases[jB] - uBgValues[jB])
494 + sinRot * (vObsValues[jB] + vObsBiases[jB] - vBgValues[jB]);
496 double tDiffB = - sinRot * (uObsValues[jB] + uObsBiases[jB] - uBgValues[jB])
497 + cosRot * (vObsValues[jB] + vObsBiases[jB] - vBgValues[jB]);
500 double errVarA = sqr(obsErrors[jA]) + sqr(bgErrors[jA]);
501 double errVarB = sqr(obsErrors[jB]) + sqr(bgErrors[jB]);
504 double lCovar = lCorr * bgErrors[jA] * bgErrors[jB];
505 double tCovar = (1.0 -
options_->nonDivergenceConstraint * scaleDist) * lCovar;
507 double lRho2 = sqr(lCovar) / (errVarA * errVarB);
508 double tRho2 = sqr(tCovar) / (errVarA * errVarB);
514 expArg = -(0.5 * tRho2 / (1.0 - tRho2)) *
515 (sqr(tDiffA) / errVarA + sqr(tDiffB) / errVarB - 2.0 * tDiffA * tDiffB / tCovar);
516 expArg = expArg - (0.5 * lRho2 / (1.0 - lRho2)) *
517 (sqr(lDiffA) / errVarA + sqr(lDiffB) / errVarB - 2.0 * lDiffA * lDiffB / lCovar);
518 expArg =
options_->dampingFactor1 * (-0.5 * std::log((1.0 - lRho2) * (1.0 - lRho2)) + expArg);
521 double z = 1.0 / (1.0 - (1.0 - pges[jA]) * (1.0 - pges[jB]) * (1.0 - std::exp(expArg)));
524 z = std::pow(z,
options_->dampingFactor2);
528 if (isMaster && (verbose[jA] || verbose[jB])) {
529 oops::Log::trace() << boost::format(
"%5d %5d %8d %8d "
530 "%6.1f %6.1f %6.1f %6.1f %6.1f "
531 "%5.3f %6.3f %6.3f %6.3f %6.3f\n") %
532 jA % jB % stationIds[jA] % stationIds[jB] %
533 lDiffA % lDiffB % tDiffA % tDiffB % pair.distanceInKm %
534 lCorr % std::exp(expArg) % pges[jA] % pges[jB] % z;
540 const std::vector<bool> & apply)
const {
541 std::vector<size_t> validObsIds;
542 for (
size_t obsId = 0; obsId < apply.size(); ++obsId)
547 validObsIds.push_back(obsId);
553 const std::map<std::string, std::vector<float>> &grossErrProbsByVarName,
554 std::vector<std::vector<bool>> &flagged)
const {
555 ASSERT(filtervars.
size() == flagged.size());
557 for (
size_t varIndex = 0; varIndex < filtervars.
size(); ++varIndex) {
559 const std::vector<float> &grossErrProbs = grossErrProbsByVarName.at(varName);
560 std::vector<bool> &variableFlagged = flagged[varIndex];
561 ASSERT(grossErrProbs.size() == variableFlagged.size());
563 for (
size_t obsId = 0; obsId < grossErrProbs.size(); ++obsId)
564 if (grossErrProbs[obsId] >=
options_->rejectionThreshold)
565 variableFlagged[obsId] =
true;
570 os <<
"MetOfficeBuddyCheck: config = " <<
config_ << std::endl;