UFO
BayesianBackgroundCheck.cc
Go to the documentation of this file.
1 /* * (C) Crown copyright 2021, Met Office
2  *
3  * This software is licensed under the terms of the Apache Licence Version 2.0
4  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5  */
6 
8 
9 #include <algorithm>
10 #include <cmath>
11 #include <iomanip>
12 #include <iostream>
13 #include <limits>
14 #include <set>
15 #include <string>
16 #include <vector>
17 
18 #include "ioda/ObsDataVector.h"
19 #include "ioda/ObsSpace.h"
20 
21 #include "oops/util/Logger.h"
22 
24 #include "ufo/filters/QCflags.h"
25 
27 
28 namespace ufo {
29 
30 // -----------------------------------------------------------------------------
31 /// BayesianBackgroundCheck: check observation closeness to background, accounting
32 /// for probability of gross error, i.e. that observation is bad.
33 
35  ioda::ObsSpace & obsdb,
36  const Parameters_ & parameters,
37  std::shared_ptr<ioda::ObsDataVector<int> > flags,
38  std::shared_ptr<ioda::ObsDataVector<float> > obserr)
39  : FilterBase(obsdb, parameters, flags, obserr), parameters_(parameters)
40 {
41  oops::Log::trace() << "BayesianBackgroundCheck constructor" << std::endl;
42  allvars_ += Variables(filtervars_, "HofX");
43  allvars_ += Variables(filtervars_, "QCFlags");
44  for (size_t i = 0; i < filtervars_.size(); ++i) {
46  }
47 }
48 
49 // -----------------------------------------------------------------------------
50 
52  oops::Log::trace() << "BayesianBackgroundCheck destructed" << std::endl;
53 }
54 
55 // -----------------------------------------------------------------------------
56 /// Return the name of the variable containing the background error estimate of the
57 /// specified filter variable.
58 
60  return Variable(filterVariable.variable() + "_background_error@ObsDiag");
61 }
62 
63 // -----------------------------------------------------------------------------
64 /// Reduce a vector to only the elements for which j_reduced=true.
65 
66 template <class T>
67 std::vector<T> BayesianBackgroundCheck::reduceVector(const std::vector<T> &vector_full,
68  const std::vector<size_t> &j_reduced) const {
69  size_t reduced_size = j_reduced.size();
70  std::vector<T> vector_reduced(reduced_size);
71  for (size_t i_reduced=0; i_reduced < reduced_size; ++i_reduced) {
72  vector_reduced[i_reduced] = vector_full[j_reduced[i_reduced]];
73  }
74  return vector_reduced;
75 }
76 
77 // -----------------------------------------------------------------------------
78 /// Copy a reduced vector back into the correct indices of the full vector.
79 
80 template <class T>
81 void BayesianBackgroundCheck::unreduceVector(const std::vector<T> &vector_reduced,
82  std::vector<T> &vector_full,
83  const std::vector<size_t> &j_reduced) const {
84  size_t reduced_size = vector_reduced.size();
85  for (size_t i_reduced=0; i_reduced < reduced_size; ++i_reduced) {
86  vector_full[j_reduced[i_reduced]] = vector_reduced[i_reduced];
87  }
88 }
89 
90 // -----------------------------------------------------------------------------
91 /// Apply the Bayesian background check filter.
92 
93 void BayesianBackgroundCheck::applyFilter(const std::vector<bool> & apply,
94  const Variables & filtervars,
95  std::vector<std::vector<bool>> & flagged) const {
96  oops::Log::trace() << "BayesianBackgroundCheck postFilter" << std::endl;
97  const oops::Variables observed = obsdb_.obsvariables();
98 
99  oops::Log::debug() << "BayesianBackgroundCheck obserr: " << *obserr_ << std::endl;
100 
101  ioda::ObsDataVector<float> obs(obsdb_, filtervars.toOopsVariables(), "ObsValue");
102 
103  Variables varhofx(filtervars_, "HofX");
104  Variables varflags(filtervars_, "QCFlags");
105 
106  // Probability density of bad observations, PdBad:
107  const std::vector<float> PdBad(obsdb_.nlocs(), parameters_.PdBad.value());
108  // NOT profiles averaged to model levels (single-level anyway):
109  const bool ModelLevels = false;
110 
111  bool previousVariableWasFirstComponentOfTwo = false;
112  bool thisVariableIsFirstComponentOfTwo = false;
113  // Loop through all filter variables. .yaml will say if it's 2-component.
114  // If so, it skips the first component and then processes both together when
115  // (previousVariableWasFirstComponentOfTwo=True).
116  // Otherwise it processes the scalar variable.
117  for (size_t filterVarIndex = 0; filterVarIndex < filtervars.size(); ++filterVarIndex) {
118  if (filtervars[filterVarIndex].options().getBool("first_component_of_two", false)) {
119  previousVariableWasFirstComponentOfTwo = true;
120  } else {
121  std::string varname1, varname2;
122  size_t iv1, iv2;
123  // H(x):
124  std::vector<float> hofx1, hofx2;
125  // H(x) error:
126  std::vector<float> hofxerr;
127  data_.get(backgrErrVariable(filtervars[filterVarIndex]), hofxerr);
128  // PGE:
129  std::vector<float> PGE1(obsdb_.nlocs(), parameters_.PGE.value());
130  // after-check PGE:
131  std::vector<float> PGEBd1(obsdb_.nlocs());
132  // QC flags:
133  std::vector<int> qcflags1(obsdb_.nlocs());
134  std::vector<float> firstComponentObVal, secondComponentObVal;
135  // make note of conditions on apply and flags_:
136  std::vector<bool> applycondition(obsdb_.nlocs(), false);
137  // index mapping between full and reduced vectors:
138  std::vector<size_t> j_reduced;
139 
140  if (previousVariableWasFirstComponentOfTwo) {
141  varname1 = filtervars.variable(filterVarIndex-1).variable();
142  varname2 = filtervars.variable(filterVarIndex).variable();
143  iv1 = observed.find(varname1);
144  iv2 = observed.find(varname2);
145  // H(x):
146  data_.get(varhofx.variable(filterVarIndex-1), hofx1);
147  data_.get(varhofx.variable(filterVarIndex), hofx2);
148  // observation values:
149  firstComponentObVal = obs[filterVarIndex-1];
150  secondComponentObVal = obs[filterVarIndex];
151  // QC flags (all zeros, or read from file if possible):
152  if (obsdb_.has("QCFlags", varname1)) {
153  data_.get(varflags.variable(filterVarIndex-1), qcflags1);
154  oops::Log::debug() << "Got qcflags1 from file: " << qcflags1 << std::endl;
155  }
156  for (size_t jobs=0; jobs < obsdb_.nlocs(); ++jobs) {
157  if (apply[jobs] && (*flags_)[iv1][jobs] == QCflags::pass
158  && (*flags_)[iv2][jobs] == QCflags::pass) {
159  applycondition[jobs] = true;
160  j_reduced.push_back(jobs);
161  }
162  }
163  } else {
164  varname1 = filtervars.variable(filterVarIndex).variable();
165  iv1 = observed.find(varname1);
166  // H(x):
167  data_.get(varhofx.variable(filterVarIndex), hofx1);
168  // observation values:
169  firstComponentObVal = obs[filterVarIndex];
170  // QC flags (all zeros, or read from file if possible):
171  if (obsdb_.has("QCFlags", varname1)) {
172  data_.get(varflags.variable(filterVarIndex), qcflags1);
173  oops::Log::debug() << "Got qcflags1 from file: " << qcflags1 << std::endl;
174  }
175  for (size_t jobs=0; jobs < obsdb_.nlocs(); ++jobs) {
176  if (apply[jobs] && (*flags_)[iv1][jobs] == QCflags::pass) {
177  applycondition[jobs] = true;
178  j_reduced.push_back(jobs);
179  }
180  }
181  }
182 
183  // create reduced vectors, copied from full ones, fulfilling applycondition:
184  std::vector<float> firstComponentObVal_reduced = reduceVector(firstComponentObVal,
185  j_reduced);
186  std::vector<float> ObsErr_reduced = reduceVector((*obserr_)[iv1], j_reduced);
187  std::vector<float> hofx1_reduced = reduceVector(hofx1, j_reduced);
188  std::vector<float> hofxerr_reduced = reduceVector(hofxerr, j_reduced);
189  std::vector<float> PdBad_reduced = reduceVector(PdBad, j_reduced);
190  std::vector<int> qcflags1_reduced = reduceVector(qcflags1, j_reduced);
191  std::vector<float> PGE1_reduced = reduceVector(PGE1, j_reduced);
192  std::vector<float> PGEBd1_reduced = reduceVector(PGEBd1, j_reduced);
193  std::vector<float> secondComponentObVal_reduced;
194  std::vector<float> hofx2_reduced;
195  if (previousVariableWasFirstComponentOfTwo) {
196  secondComponentObVal_reduced = reduceVector(secondComponentObVal, j_reduced);
197  hofx2_reduced = reduceVector(hofx2, j_reduced);
198  }
199 
201  firstComponentObVal_reduced,
202  ObsErr_reduced,
203  hofx1_reduced,
204  hofxerr_reduced,
205  PdBad_reduced,
206  ModelLevels,
207  qcflags1_reduced,
208  PGE1_reduced,
209  PGEBd1_reduced,
210  -1,
211  previousVariableWasFirstComponentOfTwo?
212  &secondComponentObVal_reduced : nullptr,
213  previousVariableWasFirstComponentOfTwo?
214  &hofx2_reduced : nullptr);
215  // write PGE-updated values from reduced vectors back into full ones:
216  unreduceVector(qcflags1_reduced, qcflags1, j_reduced);
217  unreduceVector(PGE1_reduced, PGE1, j_reduced);
218  unreduceVector(PGEBd1_reduced, PGEBd1, j_reduced);
219 
220  // Save PGE to obsdb
221  obsdb_.put_db("GrossErrorProbabilityTotal", varname1, PGE1); // 'packed' PGE
222  obsdb_.put_db("GrossErrorProbabilityBuddyCheck", varname1, PGEBd1); // for buddy check
223  // Save QC flags to obsdb
224  obsdb_.put_db("QCFlags", varname1, qcflags1); // Met Office QC flags, not flagged or *flags_
225 
226  if (previousVariableWasFirstComponentOfTwo) {
227  // Save PGE to obsdb
228  std::vector<float> &PGE2 = PGE1; // in old OPS, PGE same for both components of 2-vector
229  // after-check PGE:
230  std::vector<float> &PGEBd2 = PGEBd1; // may assign different to each component in future
231  obsdb_.put_db("GrossErrorProbabilityTotal", varname2, PGE2);
232  obsdb_.put_db("GrossErrorProbabilityBuddyCheck", varname2, PGEBd2);
233  // Save QC flags to obsdb
234  std::vector<int> &qcflags2 = qcflags1; // in old OPS, flags same for both components
235  obsdb_.put_db("QCFlags", varname2, qcflags2);
236  // Set flagged, for 2nd component:
237  for (size_t jobs=0; jobs < obsdb_.nlocs(); ++jobs) {
238  if (qcflags1[jobs] & ufo::MetOfficeQCFlags::Elem::BackRejectFlag) {
239  flagged[filterVarIndex-1][jobs] = true;
240  }
241  if (qcflags2[jobs] & ufo::MetOfficeQCFlags::Elem::BackRejectFlag) {
242  flagged[filterVarIndex][jobs] = true;
243  }
244  oops::Log::debug() << "flagged(1)[" << jobs << "]: "
245  << flagged[filterVarIndex-1][jobs] << std::endl;
246  oops::Log::debug() << "flagged(2)[" << jobs << "]: "
247  << flagged[filterVarIndex][jobs] << std::endl;
248  }
249  } else {
250  // Set flagged, for scalar:
251  for (size_t jobs=0; jobs < obsdb_.nlocs(); ++jobs) {
252  if (qcflags1[jobs] & ufo::MetOfficeQCFlags::Elem::BackRejectFlag) {
253  flagged[filterVarIndex][jobs] = true;
254  }
255  oops::Log::debug() << "flagged[" << jobs << "]: "
256  << flagged[filterVarIndex][jobs] << std::endl;
257  }
258  }
259  previousVariableWasFirstComponentOfTwo = false;
260  }
261  }
262 }
263 
264 // -----------------------------------------------------------------------------
265 
266 void BayesianBackgroundCheck::print(std::ostream & os) const {
267  os << "BayesianBackgroundCheck: config = " << parameters_ << std::endl;
268 }
269 
270 // -----------------------------------------------------------------------------
271 
272 } // namespace ufo
std::vector< T > reduceVector(const std::vector< T > &vector_full, const std::vector< size_t > &j_reduced) const
Reduce a vector to only the elements for which j_reduced=true.
void applyFilter(const std::vector< bool > &apply, const Variables &filtervars, std::vector< std::vector< bool >> &flagged) const override
Apply Bayesian background check filter. Return flagged=true for rejected obs.
void unreduceVector(const std::vector< T > &vector_reduced, std::vector< T > &vector_full, const std::vector< size_t > &j_reduced) const
Copy a reduced vector back into the correct indices of the full vector.
Variable backgrErrVariable(const Variable &filterVariable) const
Return the name of the variable containing the background error estimate of the specified filter vari...
void print(std::ostream &) const override
BayesianBackgroundCheck(ioda::ObsSpace &obsdb, const Parameters_ &parameters, std::shared_ptr< ioda::ObsDataVector< int > > flags, std::shared_ptr< ioda::ObsDataVector< float > > obserr)
Parameters controlling the operation of the BayesianBackgroundCheck filter.
oops::RequiredParameter< float > PGE
ProbabilityOfGrossErrorParameters PGEParameters
Parameters related to PGE calculations.
oops::RequiredParameter< float > PdBad
Base class for UFO QC filters.
Definition: FilterBase.h:45
ufo::Variables filtervars_
Definition: FilterBase.h:60
void get(const Variable &varname, std::vector< float > &values) const
Fills a std::vector with values of the specified variable.
ufo::Variables allvars_
std::shared_ptr< ioda::ObsDataVector< float > > obserr_
ioda::ObsSpace & obsdb_
std::shared_ptr< ioda::ObsDataVector< int > > flags_
const std::string & variable() const
Definition: Variable.cc:99
Variable variable(const size_t) const
Return a given constituent "primitive" (single-channel) variable.
Definition: Variables.cc:114
oops::Variables toOopsVariables() const
Definition: Variables.cc:156
size_t size() const
Return the number of constituent Variable objects (some of which may contain multiple channels).
Definition: Variables.cc:92
@ BackRejectFlag
PGE>0.5 after backgr check.
constexpr int pass
Definition: QCflags.h:14
Definition: RunCRTM.h:27
void BayesianPGEUpdate(const ProbabilityOfGrossErrorParameters &options, const std::vector< float > &obsVal, const std::vector< float > &obsErr, const std::vector< float > &bkgVal, const std::vector< float > &bkgErr, const std::vector< float > &PdBad, const bool ModelLevels, std::vector< int > &flags, std::vector< float > &PGE, std::vector< float > &PGEBd, float ErrVarMax, const std::vector< float > *obsVal2, const std::vector< float > *bkgVal2)
Bayesian update of probability of gross error (PGE)