UFO
MetOfficeBuddyCheck.cc
Go to the documentation of this file.
1 /*
2  * (C) Copyright 2020 Met Office UK
3  *
4  * This software is licensed under the terms of the Apache Licence Version 2.0
5  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6  */
7 
9 
10 #include <algorithm>
11 #include <cmath>
12 #include <functional>
13 #include <iomanip>
14 #include <map>
15 #include <sstream>
16 #include <string>
17 #include <utility>
18 #include <vector>
19 
20 #include <boost/format.hpp>
21 
22 #include "eckit/config/Configuration.h"
23 #include "ioda/ObsDataVector.h"
24 #include "ioda/ObsSpace.h"
25 #include "oops/base/Variables.h"
26 #include "oops/util/DateTime.h"
27 #include "oops/util/Duration.h"
28 #include "oops/util/Logger.h"
29 #include "oops/util/sqr.h"
34 
35 
36 namespace ufo {
37 
38 namespace {
39 
40 const double expArgMax = 80.0; // maximum argument of the exponential function
41 const float maxGrossErrorProbability = 1.0; // maximum allowed value for PGE
42 
43 /// brief Extract 1st level data from the provided flattened vector.
44 template<typename T>
45 std::vector<T> extract1stLev(const std::vector<T> &flattenedArray,
46  const boost::optional<Eigen::ArrayXXi> &profileIndex) {
47  if ( !profileIndex )
48  return flattenedArray;
49 
50  std::vector<T> res;
51  res.resize((*profileIndex).rows());
52  for (size_t row=0; row < (*profileIndex).rows(); row++) {
53  res[row] = flattenedArray[(*profileIndex)(row, 0)];
54  }
55  return res;
56 }
57 
58 /// Return a 2D eigen array from the provided flattened vector.
59 Eigen::ArrayXXf unravel(const std::vector<float> &flattenedArray,
60  const boost::optional<Eigen::ArrayXXi> &profileIndex) {
61  if (profileIndex) {
62  Eigen::ArrayXXf res((*profileIndex).rows(), (*profileIndex).cols());
63  for (size_t row=0; row < (*profileIndex).rows(); row++)
64  for (size_t col=0; col < (*profileIndex).cols(); col++)
65  res(row, col) = flattenedArray[(*profileIndex)(row, col)];
66  return res;
67  } else {
68  Eigen::ArrayXXf res(flattenedArray.size(), 1);
69  for (size_t row=0; row < flattenedArray.size(); row++)
70  res(row, 0) = flattenedArray[row];
71  return res;
72  }
73 }
74 
75 /// Update the provided flat array with values from the provided eigen array
76 void updateFlatData(std::vector<float> & flatArray, const Eigen::ArrayXXf &array,
77  const boost::optional<Eigen::ArrayXXi> &profileIndex) {
78  if (profileIndex) {
79  for (size_t row=0; row < (*profileIndex).rows(); row++)
80  for (size_t col=0; col < (*profileIndex).cols(); col++)
81  flatArray[(*profileIndex)(row, col)] = array(row, col);
82  } else {
83  ASSERT(array.cols() == 1);
84  for (size_t row=0; row < array.rows(); row++)
85  flatArray[row] = array(row, 0);
86  }
87 }
88 
89 /// Derive a mapping between the ObsSpace flat data and an unraveled 2D eigen array representation.
90 Eigen::ArrayXXi deriveIndices(const ioda::ObsSpace & obsdb,
91  const int numLevels) {
92  // Assume ObsSpace contains only the averaged profiles if this variable isn't present.
93  boost::optional<std::vector<int>> extended_obs_space;
94  if (obsdb.has("MetaData", "extended_obs_space")) {
95  extended_obs_space = std::vector<int>(obsdb.nlocs());
96  obsdb.get_db("MetaData", "extended_obs_space", *extended_obs_space);
97  }
98  Eigen::ArrayXXi profileIndex {obsdb.nrecs(), numLevels};
99 
100  int recnum = 0;
101  for (ioda::ObsSpace::RecIdxIter irec = obsdb.recidx_begin(); irec != obsdb.recidx_end(); ++irec) {
102  int levnum = 0;
103  const std::vector<std::size_t> &rSort = obsdb.recidx_vector(irec);
104  for (size_t ilocs = 0; ilocs < rSort.size(); ++ilocs) {
105  if (extended_obs_space && ((*extended_obs_space)[rSort[ilocs]] != 1))
106  continue;
107  profileIndex(recnum, levnum) = rSort[ilocs];
108  levnum++;
109  }
110  if (levnum != numLevels) {
111  std::stringstream msg;
112  msg << "Record (profile): " << recnum << " length: " << levnum+1 << " does not match the "
113  << "number of levels expected: " << numLevels;
114  throw eckit::UserError(msg.str(), Here());
115  }
116  recnum++;
117  }
118  return profileIndex;
119 }
120 
121 std::string fullVariableName(const Variable &var)
122 {
123  if (var.group().empty())
124  return var.variable();
125  else
126  return var.variable() + "@" + var.group();
127 }
128 
129 template <typename T>
130 std::vector<T> uniqueElements(const std::vector<T> &v)
131 {
132  std::vector<T> result(v);
133  std::sort(result.begin(), result.end());
134  const auto newEnd = std::unique(result.begin(), result.end());
135  result.erase(newEnd, result.end());
136  return result;
137 }
138 
139 template <typename T>
140 std::vector<int> mapDistinctValuesToDistinctInts(const std::vector<T> &values)
141 {
142  const std::vector<T> uniqueValues = uniqueElements(values);
143  std::map<T, int> intByValue;
144  {
145  int index = 0;
146  for (const T& value : uniqueValues)
147  intByValue[value] = index++;
148  }
149 
150  std::vector<int> ints;
151  ints.reserve(values.size());
152  std::transform(values.begin(), values.end(), std::back_inserter(ints),
153  [&](const T& value) { return intByValue.at(value); });
154  return ints;
155 }
156 
157 
159  std::vector<int> varFlags;
160  Eigen::ArrayXXf obsValues;
161  Eigen::ArrayXXf obsBiases;
162  Eigen::ArrayXXf obsErrors;
163  Eigen::ArrayXXf bgValues;
164  Eigen::ArrayXXf bgErrors;
165  std::vector<float> flatGrossErrorProbabilities;
166  Eigen::ArrayXXf grossErrorProbabilities;
167 };
168 
169 
170 } // namespace
171 
172 /// \brief Metadata of all observations processed by the filter.
174  std::vector<float> latitudes;
175  std::vector<float> longitudes;
176  std::vector<util::DateTime> datetimes;
177  boost::optional<std::vector<float>> pressures;
178  boost::optional<Eigen::ArrayXXf> pressuresML;
179  std::vector<int> stationIds;
180 };
181 
182 MetOfficeBuddyCheck::MetOfficeBuddyCheck(ioda::ObsSpace& obsdb, const Parameters_& parameters,
183  std::shared_ptr<ioda::ObsDataVector<int> > flags,
184  std::shared_ptr<ioda::ObsDataVector<float> > obserr)
185  : FilterBase(obsdb, parameters, std::move(flags), std::move(obserr)), options_(parameters)
186 {
187  oops::Log::debug() << "MetOfficeBuddyCheck: config = " << options_ << std::endl;
188  allvars_ += Variables(filtervars_, "HofX");
189  for (size_t i = 0; i < filtervars_.size(); ++i)
191 }
192 
193 // Required for the correct destruction of options_.
195 {}
196 
197 
198 
199 void MetOfficeBuddyCheck::applyFilter(const std::vector<bool> & apply,
200  const Variables & filtervars,
201  std::vector<std::vector<bool>> & flagged) const {
202  // Fetch metadata required for identifying buddy pairs.
203 
204  const boost::optional<int> &numLevels = options_.numLevels.value();
205 
206  boost::optional<Eigen::ArrayXXi> profileIndex;
207  if (numLevels)
208  profileIndex = deriveIndices(obsdb_, *numLevels);
209 
210  const std::vector<size_t> validObsIds = getValidObservationIds(apply, profileIndex);
211  MetaData obsData = collectMetaData(profileIndex);
212 
213  const std::vector<float> bgErrorHorizCorrScales = calcBackgroundErrorHorizontalCorrelationScales(
214  validObsIds, obsData.latitudes);
215  const std::vector<bool> verbose = flagAndPrintVerboseObservations(
216  validObsIds, obsData.latitudes, obsData.longitudes, obsData.datetimes,
217  obsData.pressures.get_ptr(), obsData.stationIds, bgErrorHorizCorrScales);
218 
219  // Identify buddy pairs
220 
221  const std::vector<float> *pressures =
222  options_.sortByPressure ? obsData.pressures.get_ptr() : nullptr;
223  MetOfficeBuddyPairFinder buddyPairFinder(options_, obsData.latitudes, obsData.longitudes,
224  obsData.datetimes, pressures,
225  obsData.stationIds);
226  const std::vector<MetOfficeBuddyPair> buddyPairs = buddyPairFinder.findBuddyPairs(validObsIds);
227 
228  // Fetch data/metadata required for buddy-check calculation.
229  const oops::Variables &observedVars = obsdb_.obsvariables();
230  ioda::ObsDataVector<float> obsValues(obsdb_, filtervars.toOopsVariables(), "ObsValue");
231 
232  auto getFilterVariableName = [&] (size_t filterVarIndex) {
233  return filtervars.variable(filterVarIndex).variable();
234  };
235 
236  auto getScalarVariableData = [&] (size_t filterVarIndex) {
237  // Fetch raw data corresponding this the specific variable of interest.
238  const size_t observedVarIndex = observedVars.find(getFilterVariableName(filterVarIndex));
239 
240  std::vector<float> bgValues;
241  data_.get(ufo::Variable(filtervars[filterVarIndex], "HofX"), bgValues);
242  std::vector<float> bgErrors;
243  data_.get(backgroundErrorVariable(filtervars[filterVarIndex]), bgErrors);
244 
245  // Store data for usage.
246  ScalarVariableData data;
247  data_.get(ufo::Variable(filtervars[filterVarIndex], "GrossErrorProbability"),
248  data.flatGrossErrorProbabilities);
249 
250  // Unravel these flattened vectors.
251  data.varFlags = extract1stLev((*flags_)[observedVarIndex], profileIndex);
252  data.obsValues = unravel(obsValues[filterVarIndex], profileIndex);
253  data.obsErrors = unravel((*obserr_)[observedVarIndex], profileIndex);
254  data.bgValues = unravel(bgValues, profileIndex);
255  data.bgErrors = unravel(bgErrors, profileIndex);
256  data.grossErrorProbabilities = unravel(data.flatGrossErrorProbabilities, profileIndex);
257  return data;
258  };
259 
260  // Buddy-check all filter variables
261 
262  // Gross error probabilities updated by buddy check, indexed by variable name.
263  // (The updated values are copied to the ObsSpace only after all variables have been processed).
264  std::map<std::string, std::vector<float>> calculatedGrossErrProbsByVarName;
265 
266  bool previousVariableWasFirstComponentOfTwo = false;
267  for (size_t filterVarIndex = 0; filterVarIndex < filtervars.size(); ++filterVarIndex) {
268  if (previousVariableWasFirstComponentOfTwo) {
269  // Vector (two-component) variable
270  ScalarVariableData firstComponentData =
271  getScalarVariableData(filterVarIndex - 1);
272  ScalarVariableData secondComponentData =
273  getScalarVariableData(filterVarIndex);
274 
275  for (Eigen::Index i = 0; i < firstComponentData.grossErrorProbabilities.rows(); ++i) {
276  for (Eigen::Index j = 0; j < firstComponentData.grossErrorProbabilities.cols(); ++j) {
277  firstComponentData.grossErrorProbabilities(i, j) =
278  std::max(firstComponentData.grossErrorProbabilities(i, j),
279  secondComponentData.grossErrorProbabilities(i, j));
280  }
281  }
282 
283  checkVectorData(buddyPairs, firstComponentData.varFlags, verbose,
284  bgErrorHorizCorrScales, obsData.stationIds, obsData.datetimes,
285  obsData.pressuresML.get_ptr(), firstComponentData.obsValues,
286  secondComponentData.obsValues, firstComponentData.obsErrors,
287  firstComponentData.bgValues, secondComponentData.bgValues,
288  firstComponentData.bgErrors, firstComponentData.grossErrorProbabilities);
289  // OPS doesn't update the gross error probabilities of the second component variable,
290  // but it seems more consistent to do so (and it facilitates the implementation of
291  // flagRejectedObservations().
292  // The implementation of checkVectorSurfaceData() still assumes that the *input* gross error
293  // probabilities, flags and background error estimates are the same for both components.
294 
295  // Update the flat GPE values.
296  updateFlatData(firstComponentData.flatGrossErrorProbabilities,
297  firstComponentData.grossErrorProbabilities, profileIndex);
298  calculatedGrossErrProbsByVarName[getFilterVariableName(filterVarIndex - 1)] =
299  firstComponentData.flatGrossErrorProbabilities;
300  calculatedGrossErrProbsByVarName[getFilterVariableName(filterVarIndex)] =
301  std::move(firstComponentData.flatGrossErrorProbabilities);
302  previousVariableWasFirstComponentOfTwo = false;
303  } else {
304  if (filtervars[filterVarIndex].options().getBool("first_component_of_two", false)) {
305  previousVariableWasFirstComponentOfTwo = true;
306  } else {
307  // Scalar variable
308  ScalarVariableData data =
309  getScalarVariableData(filterVarIndex);
310  checkScalarData(buddyPairs, data.varFlags, verbose, bgErrorHorizCorrScales,
311  obsData.stationIds, obsData.datetimes, obsData.pressuresML.get_ptr(),
312  data.obsValues, data.obsErrors,
313  data.bgValues, data.bgErrors,
314  data.grossErrorProbabilities);
315 
316  // Update the flat GPE values.
317  updateFlatData(data.flatGrossErrorProbabilities, data.grossErrorProbabilities,
318  profileIndex);
319  calculatedGrossErrProbsByVarName[getFilterVariableName(filterVarIndex)] =
320  std::move(data.flatGrossErrorProbabilities);
321  }
322  }
323  }
324 
325  // Update observations flags and gross error probabilities
326 
327  for (const auto &varNameAndGrossErrProbs : calculatedGrossErrProbsByVarName)
328  obsdb_.put_db("GrossErrorProbability", varNameAndGrossErrProbs.first,
329  varNameAndGrossErrProbs.second);
330 
331  flagRejectedObservations(filtervars, calculatedGrossErrProbsByVarName, flagged);
332 }
333 
335  return Variable(filterVariable.variable() + "_background_error@ObsDiag");
336 }
337 
339  const boost::optional<Eigen::ArrayXXi> & profileIndex) const {
340  MetaData obsData;
341 
342  obsData.latitudes.resize(obsdb_.nlocs());
343  obsdb_.get_db("MetaData", "latitude", obsData.latitudes);
344 
345  obsData.longitudes.resize(obsdb_.nlocs());
346  obsdb_.get_db("MetaData", "longitude", obsData.longitudes);
347 
348  obsData.datetimes.resize(obsdb_.nlocs());
349  obsdb_.get_db("MetaData", "datetime", obsData.datetimes);
350 
351  if (obsdb_.has("MetaData", "air_pressure")) {
352  obsData.pressures = std::vector<float>(obsdb_.nlocs());
353  obsdb_.get_db("MetaData", "air_pressure", *obsData.pressures);
354  obsData.pressuresML = unravel(*obsData.pressures, profileIndex);
355  }
356  obsData.stationIds = getStationIds();
357 
358  if (profileIndex) {
359  obsData.latitudes = extract1stLev(obsData.latitudes, profileIndex);
360  obsData.longitudes = extract1stLev(obsData.longitudes, profileIndex);
361  obsData.datetimes = extract1stLev(obsData.datetimes, profileIndex);
362  obsData.stationIds = extract1stLev(obsData.stationIds, profileIndex);
363  if (obsdb_.has("MetaData", "air_pressure")) {
364  obsData.pressures = extract1stLev(*obsData.pressures, profileIndex);
365  }
366  }
367  return obsData;
368 }
369 
370 std::vector<int> MetOfficeBuddyCheck::getStationIds() const {
371  const boost::optional<Variable> &stationIdVariable = options_.stationIdVariable.value();
372  if (stationIdVariable == boost::none) {
373  if (obsdb_.obs_group_vars().empty()) {
374  // Observations were not grouped into records.
375  // Assume all observations were taken by the same station.
376  return std::vector<int>(obsdb_.nlocs(), 0);
377  } else {
378  const std::vector<size_t> &recordNumbers = obsdb_.recnum();
379  return std::vector<int>(recordNumbers.begin(), recordNumbers.end());
380  }
381  } else {
382  switch (obsdb_.dtype(stationIdVariable->group(), stationIdVariable->variable())) {
383  case ioda::ObsDtype::Integer:
384  {
385  std::vector<int> stationIds(obsdb_.nlocs());
386  obsdb_.get_db(stationIdVariable->group(), stationIdVariable->variable(), stationIds);
387  return stationIds;
388  }
389 
390  case ioda::ObsDtype::String:
391  {
392  std::vector<std::string> stringIds(obsdb_.nlocs());
393  obsdb_.get_db(stationIdVariable->group(), stationIdVariable->variable(), stringIds);
394  return mapDistinctValuesToDistinctInts(stringIds);
395  }
396 
397  default:
398  throw eckit::UserError("Only integer and string variables may be used as station IDs",
399  Here());
400  }
401  }
402 }
403 
405  const std::vector<size_t> &validObsIds, const std::vector<float> &latitudes) const {
406 
407  std::vector<double> abscissas, ordinates;
408  for (const std::pair<const float, float> &xy :
410  abscissas.push_back(xy.first);
411  ordinates.push_back(xy.second);
412  }
413  PiecewiseLinearInterpolation horizontalCorrelationScaleInterp(std::move(abscissas),
414  std::move(ordinates));
415 
416  std::vector<float> scales(latitudes.size());
417  for (size_t obsId : validObsIds) {
418  scales[obsId] = horizontalCorrelationScaleInterp(latitudes[obsId]);
419  }
420 
421  return scales;
422 }
423 
425  const std::vector<size_t> &validObsIds,
426  const std::vector<float> &latitudes,
427  const std::vector<float> &longitudes,
428  const std::vector<util::DateTime> &datetimes,
429  const std::vector<float> *pressures,
430  const std::vector<int> &stationIds,
431  const std::vector<float> &bgErrorHorizCorrScales) const {
432 
433  size_t numVerboseObs = 0;
434  std::vector<bool> verbose(latitudes.size(), false);
435 
436  for (size_t obsId : validObsIds) {
437  verbose[obsId] = std::any_of(
438  options_.tracedBoxes.value().begin(),
439  options_.tracedBoxes.value().end(),
440  [&](const LatLonBoxParameters &box)
441  { return box.contains(latitudes[obsId], longitudes[obsId]); });
442 
443  if (verbose[obsId]) {
444  if (numVerboseObs == 0) {
445  oops::Log::trace() << "Obs StationId Lat Lon Pressure "
446  "Datetime CorrScale\n";
447  }
448 
449  ++numVerboseObs;
450  const float pressure = pressures != nullptr ? (*pressures)[obsId] : 0;
451  oops::Log::trace() << boost::format("%5d %9d %7.2f %7.2f %8.0f %s %9.1f\n") %
452  obsId % stationIds[obsId] % latitudes[obsId] % longitudes[obsId] %
453  pressure % datetimes[obsId] % bgErrorHorizCorrScales[obsId];
454  }
455  }
456 
457  oops::Log::trace() << "Buddy check: " << numVerboseObs << " verbose observations" << std::endl;
458 
459  return verbose;
460 }
461 
462 void MetOfficeBuddyCheck::checkScalarData(const std::vector<MetOfficeBuddyPair> &pairs,
463  const std::vector<int> &flags,
464  const std::vector<bool> &verbose,
465  const std::vector<float> &bgErrorHorizCorrScales,
466  const std::vector<int> &stationIds,
467  const std::vector<util::DateTime> &datetimes,
468  const Eigen::ArrayXXf *pressures,
469  const Eigen::ArrayXXf &obsValues,
470  const Eigen::ArrayXXf &obsErrors,
471  const Eigen::ArrayXXf &bgValues,
472  const Eigen::ArrayXXf &bgErrors,
473  Eigen::ArrayXXf &pges) const {
474  using util::sqr;
475  const boost::optional<int> &nolevs = options_.numLevels.value();
476  const int numLevels = nolevs ? *nolevs : 0; // number of actual levels
477  const Eigen::Index cols = obsValues.cols(); // number of data columns (levels) to loop over
478 
479  const bool isMaster = obsdb_.comm().rank() == 0;
480  if (isMaster) {
481  oops::Log::trace() << "ObsA ObsB StatIdA StatIdB lev DiffA DiffB "
482  "Dist Corr Agree PgeA PgeB Mult\n";
483  }
484 
485  const double invTemporalCorrScale = 1.0 / options_.temporalCorrelationScale.value().toSeconds();
486  const float missing = util::missingValue(1.0f);
487 
488  // Loop over buddy pairs
489  for (const MetOfficeBuddyPair &pair : pairs) {
490  const size_t jA = pair.obsIdA;
491  const size_t jB = pair.obsIdB;
492 
493  // Check that observations are valid and buddy check is required
494  if (flags[jA] != QCflags::pass || flags[jB] != QCflags::pass)
495  continue; // skip to next pair
496 
497  // eqn 3.9
498  // - hcScale: horizontal error scale for the pair of obs
499  // - scaledDist: scaled Distance between observations
500  const double hcScale = 0.5 * (bgErrorHorizCorrScales[jA] + bgErrorHorizCorrScales[jB]);
501  const double scaledDist = pair.distanceInKm / hcScale;
502 
503  // Background error correlation between observation positions.
504  // eqns 3.10, 3.11
505  double corr;
506  if (numLevels == 1) {
507  // Single level data.
508  corr = (1.0 + scaledDist) *
509  std::exp(-scaledDist -
511  sqr(std::log((*pressures)(jA, 0)/(*pressures)(jB, 0))) -
512  sqr((datetimes[jA] - datetimes[jB]).toSeconds() * invTemporalCorrScale));
513  } else {
514  // Multi-level/surface data; treat vertical correlation as 1.0
515  corr = (1.0 + scaledDist) *
516  std::exp(-scaledDist -
517  sqr((datetimes[jA] - datetimes[jB]).toSeconds() * invTemporalCorrScale));
518  }
519  if (corr < 0.1) // Check against minimum background error correlation.
520  continue; // skip to next pair
521 
522  // Loop over each level
523  for (Eigen::Index jlev=0; jlev < cols; jlev++) {
524  if (pges(jA, jlev) >= maxGrossErrorProbability ||
525  pges(jB, jlev) >= maxGrossErrorProbability ||
526  pges(jA, jlev) == missing || pges(jB, jlev) == missing)
527  continue; // skip to next level
528 
529  // Differences from background
530  const double diffA = obsValues(jA, jlev) - bgValues(jA, jlev);
531  const double diffB = obsValues(jB, jlev) - bgValues(jB, jlev);
532  // Estimated error variances (ob+bk) (eqn 2.5)
533  const double errVarA = sqr(obsErrors(jA, jlev)) + sqr(bgErrors(jA, jlev));
534  const double errVarB = sqr(obsErrors(jB, jlev)) + sqr(bgErrors(jB, jlev));
535  // Background error covariance between ob positions (eqn 3.13)
536  const double covar = corr * bgErrors(jA, jlev) * bgErrors(jB, jlev);
537  // (Total error correlation between ob positions)**2 (eqn 3.14)
538  const double rho2 = sqr(covar) / (errVarA * errVarB);
539  // Argument for exponents
540  double expArg = -(0.5 * rho2 / (1.0 - rho2)) *
541  (sqr(diffA) / errVarA + sqr(diffB) / errVarB - 2.0 * diffA * diffB / covar);
542  expArg = options_.dampingFactor1 * (-0.5 * std::log(1.0 - rho2) + expArg); // exponent of
543  expArg = std::min(expArgMax, std::max(-expArgMax, expArg)); // eqn 3.18
544  // Z = P(OA)*P(OB)/P(OA and OB)
545  double z = 1.0 / (1.0 - (1.0 - pges(jA, jlev)) * (1.0 - pges(jB, jlev)) *
546  (1.0 - std::exp(expArg)));
547  if (z <= 0.0)
548  z = 1.0; // rounding error control
549  z = std::pow(z, options_.dampingFactor2); // eqn 3.16
550  pges(jA, jlev) *= z; // eqn 3.17
551  pges(jB, jlev) *= z; // eqn 3.17
552  if (isMaster && (verbose[jA] || verbose[jB])) {
553  oops::Log::trace() << boost::format("%5d %5d %8d %8d %5d "
554  "%5.1f %5.1f %6.1f "
555  "%5.3f %6.3f %6.3f %6.3f %6.3f\n") %
556  jA % jB % stationIds[jA] % stationIds[jB] % jlev %
557  diffA % diffB % pair.distanceInKm %
558  corr % std::exp(expArg) % pges(jA, jlev) % pges(jB, jlev) % z;
559  }
560  }
561  }
562 }
563 
564 
565 void MetOfficeBuddyCheck::checkVectorData(const std::vector<MetOfficeBuddyPair> &pairs,
566  const std::vector<int> &flags,
567  const std::vector<bool> &verbose,
568  const std::vector<float> &bgErrorHorizCorrScales,
569  const std::vector<int> &stationIds,
570  const std::vector<util::DateTime> &datetimes,
571  const Eigen::ArrayXXf *pressures,
572  const Eigen::ArrayXXf &uObsValues,
573  const Eigen::ArrayXXf &vObsValues,
574  const Eigen::ArrayXXf &obsErrors,
575  const Eigen::ArrayXXf &uBgValues,
576  const Eigen::ArrayXXf &vBgValues,
577  const Eigen::ArrayXXf &bgErrors,
578  Eigen::ArrayXXf &pges) const {
579  using util::sqr;
580  const boost::optional<int> &nolevs = options_.numLevels.value();
581  const int numLevels = nolevs ? *nolevs : 0; // number of actual levels
582  const Eigen::Index cols = uObsValues.cols(); // number of data columns (levels) to loop over
583 
584  const bool isMaster = obsdb_.comm().rank() == 0;
585  if (isMaster) {
586  oops::Log::trace() << "ObsA ObsB StatIdA StatIdB lev LDiffA LDiffB TDiffA TDiffB "
587  "Dist Corr Agree PgeA PgeB Mult\n";
588  }
589  const double invTemporalCorrScale = 1.0 / options_.temporalCorrelationScale.value().toSeconds();
590  const float missing = util::missingValue(1.0f);
591 
592  // Loop over buddy pairs
593  for (const MetOfficeBuddyPair &pair : pairs) {
594  const size_t jA = pair.obsIdA;
595  const size_t jB = pair.obsIdB;
596 
597  // Check that observations are valid and buddy check is required
598  if (flags[jA] != QCflags::pass || flags[jB] != QCflags::pass)
599  continue; // skip to next pair
600 
601  // eqn 3.9
602  // - hcScale: horizontal error scale for the pair of obs
603  // - scaledDist: scaled Distance between observations
604  const double hcScale = 0.5 * (bgErrorHorizCorrScales[jA] + bgErrorHorizCorrScales[jB]);
605  const double scaledDist = pair.distanceInKm / hcScale;
606 
607  // Background error correlation between observation positions.
608  // eqns 3.10, 3.11
609  double lCorr;
610  if (numLevels == 1) {
611  lCorr = std::exp(-scaledDist -
613  sqr(std::log((*pressures)(jA, 0)/(*pressures)(jB, 0))) -
614  sqr((datetimes[jA] - datetimes[jB]).toSeconds() * invTemporalCorrScale));
615  } else {
616  // Multi-level/surface data; treat vertical correlation as 1.0
617  lCorr = std::exp(-scaledDist -
618  sqr((datetimes[jA] - datetimes[jB]).toSeconds() * invTemporalCorrScale));
619  }
620  if ((1.0 + scaledDist) * lCorr < 0.1) // Check against minimum background error correlation.
621  continue; // skip to next pair
622 
623  // Calculate longitudinal and transverse wind components
624  const double sinRotA = std::sin(pair.rotationAInRad);
625  const double cosRotA = std::cos(pair.rotationAInRad);
626  const double sinRotB = std::sin(pair.rotationBInRad);
627  const double cosRotB = std::cos(pair.rotationBInRad);
628 
629  // Loop over each level
630  for (Eigen::Index jlev=0; jlev < cols; jlev++) {
631  if (pges(jA, jlev) >= maxGrossErrorProbability ||
632  pges(jB, jlev) >= maxGrossErrorProbability ||
633  pges(jA, jlev) == missing || pges(jB, jlev) == missing)
634  continue; // skip to next level
635 
636  // Difference from background - longitudinal wind (eqn 3.19)
637  const double lDiffA = cosRotA * (uObsValues(jA, jlev) - uBgValues(jA, jlev))
638  + sinRotA * (vObsValues(jA, jlev) - vBgValues(jA, jlev));
639  // Difference from background - transverse wind (eqn 3.20)
640  const double tDiffA = - sinRotA * (uObsValues(jA, jlev) - uBgValues(jA, jlev))
641  + cosRotA * (vObsValues(jA, jlev) - vBgValues(jA, jlev));
642  // Difference from background - longitudinal wind (eqn 3.19)
643  const double lDiffB = cosRotB * (uObsValues(jB, jlev) - uBgValues(jB, jlev))
644  + sinRotB * (vObsValues(jB, jlev) - vBgValues(jB, jlev));
645  // Difference from background - transverse wind (eqn 3.20)
646  const double tDiffB = - sinRotB * (uObsValues(jB, jlev) - uBgValues(jB, jlev))
647  + cosRotB * (vObsValues(jB, jlev) - vBgValues(jB, jlev));
648 
649  // Estimated error variances (ob + bk; component wind variance; eqn 2.5)
650  const double errVarA = sqr(obsErrors(jA, jlev)) + sqr(bgErrors(jA, jlev));
651  const double errVarB = sqr(obsErrors(jB, jlev)) + sqr(bgErrors(jB, jlev));
652 
653  // Calculate covariances and probabilities eqn 3.12, 3.13
654  const double lCovar = lCorr * bgErrors(jA, jlev) * bgErrors(jB, jlev);
655  const double tCovar = (1.0 - options_.nonDivergenceConstraint * scaledDist) * lCovar;
656  // rho2 = (total error correlation between ob positions)**2
657  const double lRho2 = sqr(lCovar) / (errVarA * errVarB); // eqn 3.14
658  const double tRho2 = sqr(tCovar) / (errVarA * errVarB); // eqn 3.14
659  // Argument for exponents
660  double expArg;
661  if (std::abs (tRho2) <= 0.00001)
662  expArg = 0.0; // prevent division by tCovar=0.0
663  else
664  expArg = -(0.5 * tRho2 / (1.0 - tRho2)) *
665  (sqr(tDiffA) / errVarA + sqr(tDiffB) / errVarB - 2.0 * tDiffA * tDiffB / tCovar);
666  expArg = expArg - (0.5 * lRho2 / (1.0 - lRho2)) *
667  (sqr(lDiffA) / errVarA + sqr(lDiffB) / errVarB - 2.0 * lDiffA * lDiffB / lCovar);
668  expArg = options_.dampingFactor1 * (-0.5 * std::log((1.0 - lRho2) * (1.0 - lRho2)) + expArg);
669  expArg = std::min(expArgMax, std::max(-expArgMax, expArg)); // eqn 3.22
670  // Z = P(OA)*P(OB)/P(OA and OB)
671  double z = 1.0 / (1.0 - (1.0 - pges(jA, jlev)) * (1.0 - pges(jB, jlev)) *
672  (1.0 - std::exp(expArg)));
673  if (z <= 0.0)
674  z = 1.0; // rounding error control
675  z = std::pow(z, options_.dampingFactor2); // eqn 3.16
676  pges(jA, jlev) *= z; // eqn 3.17
677  pges(jB, jlev) *= z; // eqn 3.17
678 
679  if (isMaster && (verbose[jA] || verbose[jB])) {
680  oops::Log::trace() << boost::format("%5d %5d %8d %8d %5d "
681  "%6.1f %6.1f %6.1f %6.1f %6.1f "
682  "%5.3f %6.3f %6.3f %6.3f %6.3f\n") %
683  jA % jB % stationIds[jA] % stationIds[jB] % jlev %
684  lDiffA % lDiffB % tDiffA % tDiffB % pair.distanceInKm %
685  lCorr % std::exp(expArg) % pges(jA, jlev) % pges(jB, jlev) % z;
686  }
687  }
688  }
689 }
690 
691 
693  const std::vector<bool> & apply, const boost::optional<Eigen::ArrayXXi> & profileIndex) const {
694  std::vector<size_t> validObsIds;
695 
696  const auto lev1flags = extract1stLev((*flags_)[0], profileIndex);
697  const std::vector<bool> lev1apply = extract1stLev(apply, profileIndex);
698  for (size_t obsId = 0; obsId < lev1flags.size(); ++obsId)
699  // TODO(wsmigaj): The condition below may need reviewing.
700  // Perhaps we should process only observations marked as passed in all filter variables?
701  // Or those marked as passed in at least one filter variable?
702  if (lev1apply[obsId] && lev1flags[obsId] == QCflags::pass)
703  validObsIds.push_back(obsId);
704  return validObsIds;
705 }
706 
708  const Variables &filtervars,
709  const std::map<std::string, std::vector<float>> &grossErrProbsByVarName,
710  std::vector<std::vector<bool>> &flagged) const {
711  ASSERT(filtervars.size() == flagged.size());
712 
713  for (size_t varIndex = 0; varIndex < filtervars.size(); ++varIndex) {
714  const std::string varName = fullVariableName(filtervars[varIndex]);
715  const std::vector<float> &grossErrProbs = grossErrProbsByVarName.at(varName);
716  std::vector<bool> &variableFlagged = flagged[varIndex];
717  ASSERT(grossErrProbs.size() == variableFlagged.size());
718 
719  for (size_t obsId = 0; obsId < grossErrProbs.size(); ++obsId)
720  if (grossErrProbs[obsId] >= options_.rejectionThreshold)
721  variableFlagged[obsId] = true;
722  }
723 }
724 
725 void MetOfficeBuddyCheck::print(std::ostream & os) const {
726  os << "MetOfficeBuddyCheck: config = " << options_ << std::endl;
727 }
728 
729 } // namespace ufo
Base class for UFO QC filters.
Definition: FilterBase.h:45
ufo::Variables filtervars_
Definition: FilterBase.h:60
A box covering a specified (closed) interval of latitudes and longitudes.
std::vector< bool > flagAndPrintVerboseObservations(const std::vector< size_t > &validObsIds, const std::vector< float > &latitudes, const std::vector< float > &longitudes, const std::vector< util::DateTime > &times, const std::vector< float > *pressures, const std::vector< int > &stationIds, const std::vector< float > &bgErrorHorizCorrScales) const
Identifies observations whose buddy checks should be logged.
void print(std::ostream &) const override
MetOfficeBuddyCheck(ioda::ObsSpace &obsdb, const Parameters_ &parameters, std::shared_ptr< ioda::ObsDataVector< int > > flags, std::shared_ptr< ioda::ObsDataVector< float > > obserr)
std::vector< float > calcBackgroundErrorHorizontalCorrelationScales(const std::vector< size_t > &validObsIds, const std::vector< float > &latitudes) const
Calculates and returns background error correlation scales at observation locations.
void checkVectorData(const std::vector< MetOfficeBuddyPair > &pairs, const std::vector< int > &flags, const std::vector< bool > &verbose, const std::vector< float > &bgErrorHorizCorrScales, const std::vector< int > &stationIds, const std::vector< util::DateTime > &datetimes, const Eigen::ArrayXXf *pressures, const Eigen::ArrayXXf &uObsValues, const Eigen::ArrayXXf &vObsValues, const Eigen::ArrayXXf &obsErrors, const Eigen::ArrayXXf &uBgValues, const Eigen::ArrayXXf &vBgValues, const Eigen::ArrayXXf &bgErrors, Eigen::ArrayXXf &pges) const
Buddy check for vector (two-dimensional) quantities.
Variable backgroundErrorVariable(const Variable &filterVariable) const
Return the name of the variable containing the background error estimate of the specified filter vari...
MetaData collectMetaData(const boost::optional< Eigen::ArrayXXi > &profileIndex) const
Collects and returns metadata of all observations.
void checkScalarData(const std::vector< MetOfficeBuddyPair > &pairs, const std::vector< int > &flags, const std::vector< bool > &verbose, const std::vector< float > &bgErrorHorizCorrScales, const std::vector< int > &stationIds, const std::vector< util::DateTime > &datetimes, const Eigen::ArrayXXf *pressures, const Eigen::ArrayXXf &obsValues, const Eigen::ArrayXXf &obsErrors, const Eigen::ArrayXXf &bgValues, const Eigen::ArrayXXf &bgErrors, Eigen::ArrayXXf &pges) const
Buddy check for scalar quantities.
std::vector< int > getStationIds() const
Returns a vector of integer-valued station IDs, obtained from the source indicated by the filter para...
void applyFilter(const std::vector< bool > &, const Variables &, std::vector< std::vector< bool >> &) const override
std::vector< size_t > getValidObservationIds(const std::vector< bool > &apply, const boost::optional< Eigen::ArrayXXi > &profileIndex) const
Returns a vector of IDs of all observations that should be buddy-checked.
void flagRejectedObservations(const Variables &filtervars, const std::map< std::string, std::vector< float >> &grossErrProbsByVarName, std::vector< std::vector< bool >> &flagged) const
Options controlling the operation of the MetOfficeBuddyCheck filter.
oops::Parameter< double > verticalCorrelationScale
Vertical correlation scale (relates to the ratio of pressures).
oops::Parameter< util::Duration > temporalCorrelationScale
Temporal correlation scale.
oops::Parameter< std::map< float, float > > horizontalCorrelationScaleInterpolationPoints
oops::Parameter< double > nonDivergenceConstraint
Non-divergence constraint. Used only for vector variables.
oops::OptionalParameter< Variable > stationIdVariable
oops::Parameter< float > rejectionThreshold
Observations will be rejected if the gross error probability lies at or above this threshold.
oops::Parameter< std::vector< LatLonBoxParameters > > tracedBoxes
Tracing information will be output for observations lying within any of the specified boxes.
Finds pairs of close observations ("buddies") to check against each other.
std::vector< MetOfficeBuddyPair > findBuddyPairs(const std::vector< size_t > &validObsIds)
Returns a list of MetOfficeBuddyPair objects representing pairs of "buddy" observations that should b...
void get(const Variable &varname, std::vector< float > &values) const
Fills a std::vector with values of the specified variable.
ufo::Variables allvars_
std::shared_ptr< ioda::ObsDataVector< float > > obserr_
ioda::ObsSpace & obsdb_
std::shared_ptr< ioda::ObsDataVector< int > > flags_
Represents a piecewise linear interpolation of a set of data points.
const std::string & variable() const
Definition: Variable.cc:99
const std::string & group() const
Definition: Variable.cc:116
Variable variable(const size_t) const
Return a given constituent "primitive" (single-channel) variable.
Definition: Variables.cc:114
oops::Variables toOopsVariables() const
Definition: Variables.cc:156
size_t size() const
Return the number of constituent Variable objects (some of which may contain multiple channels).
Definition: Variables.cc:92
constexpr int pass
Definition: QCflags.h:14
constexpr int missing
Definition: QCflags.h:20
void updateFlatData(std::vector< float > &flatArray, const Eigen::ArrayXXf &array, const boost::optional< Eigen::ArrayXXi > &profileIndex)
Update the provided flat array with values from the provided eigen array.
Eigen::ArrayXXf unravel(const std::vector< float > &flattenedArray, const boost::optional< Eigen::ArrayXXi > &profileIndex)
Return a 2D eigen array from the provided flattened vector.
Eigen::ArrayXXi deriveIndices(const ioda::ObsSpace &obsdb, const int numLevels)
Derive a mapping between the ObsSpace flat data and an unraveled 2D eigen array representation.
std::vector< int > mapDistinctValuesToDistinctInts(const std::vector< T > &values)
std::vector< T > uniqueElements(const std::vector< T > &v)
std::vector< T > extract1stLev(const std::vector< T > &flattenedArray, const boost::optional< Eigen::ArrayXXi > &profileIndex)
brief Extract 1st level data from the provided flattened vector.
Definition: RunCRTM.h:27
util::Duration abs(const util::Duration &duration)
Metadata of all observations processed by the filter.
boost::optional< Eigen::ArrayXXf > pressuresML
boost::optional< std::vector< float > > pressures
std::vector< util::DateTime > datetimes
Properties of a pair of observations checked against each other during buddy check.