UFO
ProfileDataHandler.cc
Go to the documentation of this file.
1 /*
2  * (C) Crown copyright 2020, Met Office
3  *
4  * This software is licensed under the terms of the Apache Licence Version 2.0
5  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6  */
7 
8 #include "oops/util/missingValues.h"
9 
10 #include "ufo/GeoVaLs.h"
11 #include "ufo/ObsDiagnostics.h"
12 
18 
19 namespace ufo {
21  const DataHandlerParameters &options,
22  const std::vector <bool> &apply,
23  const Variables &filtervars,
24  std::vector<std::vector<bool>> &flagged)
25  : obsdb_(data.obsspace()),
26  geovals_(data.getGeoVaLs()),
27  obsdiags_(data.getObsDiags()),
28  options_(options),
29  filtervars_(filtervars),
30  flagged_(flagged)
31  {
32  profileIndices_.reset(new ProfileIndices(obsdb_, options, apply));
34  }
35 
37  {
38  profileData_.clear();
39  GeoVaLData_.clear();
40  obsDiagData_.clear();
41  }
42 
44  {
46  profileIndices_->updateNextProfileIndices();
47  }
48 
50  {
51  // Set final report flags in this profile.
53 
54  // Modify 'flagged' vector for each filter variable based on check results.
55  setFlagged();
56 
57  // If any variables in the current profile were modified by the checks,
58  // the equivalent variables in the entire sample are set to the modified values.
60  }
61 
63  {
64  entireSampleDataHandler_->writeQuantitiesToObsdb();
65  }
66 
67  void ProfileDataHandler::getProfileIndicesInEntireSample(const std::string& groupname)
68  {
70  const size_t entriesPerProfile = options_.getEntriesPerProfile(groupname);
71  // If the number of entries per profile was not specified, use the indices
72  // that were obtained by sorting and grouping the record numbers.
73  if (entriesPerProfile == 0) {
74  profileIndicesInEntireSample_ = profileIndices_->getProfileIndices();
75  } else {
76  // Otherwise increment the indices sequentially, starting at the
77  // relevant position.
78  profileIndicesInEntireSample_.resize(entriesPerProfile);
79  std::iota(profileIndicesInEntireSample_.begin(),
81  profileIndices_->getProfileNumCurrent() * entriesPerProfile);
82  }
83  }
84 
86  {
87  for (const auto &it_profile : profileData_) {
88  std::string fullname = it_profile.first;
89  std::string varname;
90  std::string groupname;
91  ufo::splitVarGroup(fullname, varname, groupname);
92 
93  if (groupname == "QCFlags" ||
94  groupname == "ModelLevelsFlags" ||
95  groupname == "ModelLevelsQCFlags" ||
96  groupname == "ModelRhoLevelsFlags" ||
97  groupname == "Counters") {
98  const std::vector <int>& profileData = get<int>(fullname);
100  std::vector <int>& entireSampleData = entireSampleDataHandler_->get<int>(fullname);
101  size_t idx = 0;
102  for (const auto& profileIndex : profileIndicesInEntireSample_) {
103  updateValueIfPresent(profileData, idx, entireSampleData, profileIndex);
104  idx++;
105  }
106  } else if (groupname == "Corrections" ||
107  groupname == "DerivedValue" ||
108  groupname == "GrossErrorProbability" ||
109  groupname == "GrossErrorProbabilityBuddyCheck" ||
110  groupname == "ModelLevelsDerivedValue" ||
111  groupname == "ModelRhoLevelsDerivedValue" ||
113  const std::vector <float>& profileData = get<float>(fullname);
115  std::vector <float>& entireSampleData = entireSampleDataHandler_->get<float>(fullname);
116  size_t idx = 0;
117  for (const auto& profileIndex : profileIndicesInEntireSample_) {
118  updateValueIfPresent(profileData, idx, entireSampleData, profileIndex);
119  idx++;
120  }
121  }
122  }
123  }
124 
126  {
127  std::vector <int> &ReportFlags = get<int>(ufo::VariableNames::qcflags_observation_report);
128  const std::vector <int> &NumAnyErrors = get<int>(ufo::VariableNames::counter_NumAnyErrors);
129  if (!NumAnyErrors.empty() && NumAnyErrors[0] > options_.nErrorsFail.value()) {
130  oops::Log::debug() << " " << NumAnyErrors[0]
131  << " errors detected, whole profile rejected" << std::endl;
132  for (size_t jlev = 0; jlev < ReportFlags.size(); ++jlev) {
134  }
135  }
136  }
137 
139  {
140  oops::Log::debug() << "Flagging observations" << std::endl;
141 
142  for (const auto& it_profile : profileData_) {
143  std::string fullname = it_profile.first;
144  std::string varname;
145  std::string groupname;
146  ufo::splitVarGroup(fullname, varname, groupname);
147 
148  if (groupname == "QCFlags") {
149  oops::Log::debug() << " " << fullname << std::endl;
150 
151  // Obtain QC flags
152  const std::vector <int> &Flags = get<int>(fullname);
153  if (Flags.empty()) continue;
155 
156  // Determine index of varname in the filter variables.
157  // If it is not present then the variable will not be flagged individually.
158  size_t idxvar = filtervars_.size();
159  for (size_t idx = 0; idx < idxvar; ++idx) {
160  if (filtervars_[idx].variable() == varname) {
161  idxvar = idx;
162  break;
163  }
164  }
165 
166  // If varname is observation_report then all filter variables will be rejected.
167  bool isObservationReport = varname == "observation_report";
168 
169  // Index of elements in this profile.
170  size_t idxprof = 0;
171  // Loop over indices of elements in entire profile sample.
172  for (const auto& profileIndex : profileIndicesInEntireSample_) {
173  // Flag all filter variables if the whole observation has been rejected.
174  if (isObservationReport &&
176  oops::Log::debug() << " Reject all variables, index " << profileIndex << std::endl;
177  for (size_t jvar = 0; jvar < filtervars_.size(); ++jvar)
178  flagged_[jvar][profileIndex] = true;
179  // Move to next element in the profile.
180  idxprof++;
181  continue;
182  }
183  // Flag variable if its specific value has been rejected.
184  if (idxvar < filtervars_.size() &&
186  oops::Log::debug() << " Reject " << varname
187  << ", index " << profileIndex << std::endl;
188  flagged_[idxvar][profileIndex] = true;
189  }
190  idxprof++;
191  }
192  }
193  }
194  }
195 
197  (const std::string & variableName) const
198  {
199  // Obtain the map between non-default vertical coordinates and variable names.
200  const auto & alternativeVerticalCoordinate = options_.alternativeVerticalCoordinate.value();
201  auto it_altCoord = alternativeVerticalCoordinate.find(variableName);
202  if (it_altCoord != alternativeVerticalCoordinate.end()) {
203  // This variable has an associated alternative vertical coordinate.
204  return it_altCoord->second;
205  } else {
206  // This variable uses the default vertical coordinate.
207  return options_.defaultVerticalCoordinate.value();
208  }
209  }
210 
211  std::vector <float>& ProfileDataHandler::getGeoVaLVector(const std::string &variableName)
212  {
213  auto it_GeoVaLData = GeoVaLData_.find(variableName);
214  if (it_GeoVaLData != GeoVaLData_.end()) {
215  // If the GeoVaL vector is already present, return it.
216  return it_GeoVaLData->second;
217  } else {
218  std::vector <float> vec_GeoVaL_column;
219  // Only fill the GeoVaL vector if the required GeoVaLs are present
220  // and there is at least one observation location.
221  if (geovals_ &&
222  obsdb_.nlocs() > 0 &&
223  geovals_->has(variableName)) {
224  // Locations at which to retrieve the GeoVaL.
225  const std::vector<std::size_t> slant_path_location =
227  *geovals_,
228  profileIndices_->getProfileIndices(),
229  this->getAssociatedVerticalCoordinate(variableName));
230  // Vector storing GeoVaL data for current profile.
231  vec_GeoVaL_column.assign(geovals_->nlevs(variableName), 0.0);
232  // Check the number of entries in the slant path location vector is equal
233  // to the number of entries in the GeoVaL for this variable.
234  // If not, the GeoVaL at the first location in the profile is used;
235  // in other words, drift is not accounted for.
236  // todo(ctgh): revisit this choice in a future PR.
237  if (slant_path_location.size() == vec_GeoVaL_column.size()) {
238  std::vector<float> vec_GeoVaL_loc(geovals_->nlevs(variableName));
239  // Take the GeoVaL at each slant path location and copy the relevant
240  // value from each GeoVaL into the output vector.
241  for (std::size_t mlev = 0; mlev < slant_path_location.size(); ++mlev) {
242  const std::size_t jloc = slant_path_location[mlev];
243  geovals_->getAtLocation(vec_GeoVaL_loc, variableName, jloc);
244  vec_GeoVaL_column[mlev] = vec_GeoVaL_loc[mlev];
245  }
246  } else {
247  // Take the GeoVaL at the first location.
248  const std::size_t jloc = profileIndices_->getProfileIndices()[0];
249  geovals_->getAtLocation(vec_GeoVaL_column, variableName, jloc);
250  }
251  }
252  // Add GeoVaL vector to map (even if it is empty).
253  GeoVaLData_.emplace(variableName, std::move(vec_GeoVaL_column));
254  return GeoVaLData_[variableName];
255  }
256  }
257 
258  std::vector <float>& ProfileDataHandler::getObsDiag(const std::string &fullname)
259  {
260  auto it_obsDiagData = obsDiagData_.find(fullname);
261  if (it_obsDiagData != obsDiagData_.end()) {
262  // If the ObsDiag vector is already present, return it.
263  return it_obsDiagData->second;
264  } else {
265  std::string varname;
266  std::string groupname;
267  ufo::splitVarGroup(fullname, varname, groupname);
268  std::vector <float> vec_ObsDiag;
269  // Attempt to retrieve variable vector from entire sample.
270  // If it is not present, the vector will remain empty.
271  std::vector <float> &vec_all = entireSampleDataHandler_->get<float>(fullname);
272  // If the vector is empty, attempt to fill it from the ObsDiags
273  // (if they are present and have the required variable).
274  if (vec_all.empty() &&
275  obsdiags_ &&
276  obsdb_.nlocs() > 0 &&
277  obsdiags_->has(varname)) {
278  vec_all.assign(obsdb_.nlocs(), util::missingValue(1.0f));
279  obsdiags_->get(vec_all, varname);
280  }
281  // If the ObsDiags vector for the entire sample is not empty,
282  // fill the values for this profile.
283  if (!vec_all.empty()) {
285  for (const auto& profileIndex : profileIndicesInEntireSample_)
286  vec_ObsDiag.emplace_back(vec_all[profileIndex]);
287  }
288  // Add ObsDiag vector to map (even if it is empty).
289  obsDiagData_.emplace(fullname, std::move(vec_ObsDiag));
290  return obsDiagData_[fullname];
291  }
292  }
293 
294  std::vector <ProfileDataHolder> ProfileDataHandler::produceProfileVector
295  (const std::vector <std::string> &variableNamesInt,
296  const std::vector <std::string> &variableNamesFloat,
297  const std::vector <std::string> &variableNamesString,
298  const std::vector <std::string> &variableNamesGeoVaLs,
299  const std::vector <std::string> &variableNamesObsDiags)
300  {
301  profileIndices_->reset();
302  std::vector <ProfileDataHolder> profiles;
303  oops::Log::debug() << "Filling vector of profiles" << std::endl;
304  for (size_t jprof = 0; jprof < obsdb_.nrecs(); ++jprof) {
306  ProfileDataHolder profile(*this);
307  profile.fill(variableNamesInt,
308  variableNamesFloat,
309  variableNamesString,
310  variableNamesGeoVaLs,
311  variableNamesObsDiags);
312  profiles.emplace_back(profile);
313  }
314  return profiles;
315  }
316 
317  void ProfileDataHandler::updateAllProfiles(std::vector <ProfileDataHolder> &profiles)
318  {
319  this->resetProfileIndices();
320  for (size_t jprof = 0; jprof < obsdb_.nrecs(); ++jprof) {
321  this->initialiseNextProfile();
322  auto& profile = profiles[jprof];
323  // Move values from profile to this object.
324  profile.moveValuesToHandler();
325  // Update information, including the 'flagged' vector, for this profile.
326  this->updateProfileInformation();
327  }
328  }
329 } // namespace ufo
Options controlling the operation of the EntireSampleDataHandler and ProfileDataHandler classes.
oops::Parameter< int > nErrorsFail
Number of errors, accumulated over checks, that cause the observation to have failed.
size_t getEntriesPerProfile(const std::string &groupname) const
Determine number of entries per profile for a variable group.
oops::Parameter< std::string > defaultVerticalCoordinate
oops::Parameter< std::map< std::string, std::string > > alternativeVerticalCoordinate
Retrieve and store data for entire sample. This class uses lazy loading; vectors of variables are ret...
size_t nlevs(const std::string &var) const
Return number of levels for a specified variable.
Definition: GeoVaLs.cc:310
bool has(const std::string &var) const
void getAtLocation(std::vector< double > &, const std::string &, const int) const
Get GeoVaLs at a specified location.
Definition: GeoVaLs.cc:380
void get(std::vector< float > &, const std::string &) const
bool has(const std::string &var) const
ObsFilterData provides access to all data related to an ObsFilter.
ProfileDataHandler(const ObsFilterData &data, const DataHandlerParameters &options, const std::vector< bool > &apply, const Variables &filtervars, std::vector< std::vector< bool >> &flagged)
const DataHandlerParameters & options_
Configurable parameters.
void updateAllProfiles(std::vector< ProfileDataHolder > &profiles)
Read values from a collection of profiles and update information related to each one.
std::unique_ptr< EntireSampleDataHandler > entireSampleDataHandler_
Class that handles the entire data sample.
std::vector< float > & getGeoVaLVector(const std::string &variableName)
Get GeoVaLs for a particular profile.
const Variables & filtervars_
Filter variables.
const ObsDiagnostics *const obsdiags_
ObsDiags loaded by the filter.
std::unordered_map< std::string, std::vector< float > > obsDiagData_
Container of ObsDiags in the current profile.
std::vector< ProfileDataHolder > produceProfileVector(const std::vector< std::string > &variableNamesInt, const std::vector< std::string > &variableNamesFloat, const std::vector< std::string > &variableNamesString, const std::vector< std::string > &variableNamesGeoVaLs, const std::vector< std::string > &variableNamesObsDiags)
Produce a vector of all profiles, loading the requested variables into each one.
std::unique_ptr< ProfileIndices > profileIndices_
Class that handles profile indices.
std::vector< float > & getObsDiag(const std::string &variableName)
Get ObsDiags for a particular profile.
std::unordered_map< std::string, std::vector< float > > GeoVaLData_
Container of GeoVaLs in the current profile.
ioda::ObsSpace & obsdb_
Observation database.
void updateValueIfPresent(const std::vector< T > &vecIn, const size_t &idxIn, std::vector< T > &vecOut, const size_t &idxOut)
Transfer values from one vector to another (as long as neither is empty).
void setFinalReportFlags()
Set final report flags based on the NumAnyErrors counter.
std::vector< std::vector< bool > > & flagged_
Flagged values.
void getProfileIndicesInEntireSample(const std::string &groupname)
Get indices in entire sample corresponding to current profile.
std::string getAssociatedVerticalCoordinate(const std::string &variableName) const
std::vector< size_t > profileIndicesInEntireSample_
Indices in the entire data sample that correspond to the current profile.
const GeoVaLs *const geovals_
GeoVaLs loaded by the filter.
std::unordered_map< std::string, boost::variant< std::vector< int >, std::vector< float >, std::vector< std::string > > > profileData_
Container of each variable in the current profile.
Profile data holder class.
Determine indices of observations making up individual profiles. The indices are computed with respec...
size_t size() const
Return the number of constituent Variable objects (some of which may contain multiple channels).
Definition: Variables.cc:92
@ FinalRejectReport
One of flags 1-6 set.
@ FinalRejectFlag
Final QC flag.
constexpr int profile
Definition: QCflags.h:34
Definition: RunCRTM.h:27
void splitVarGroup(const std::string &vargrp, std::string &var, std::string &grp)
Definition: StringUtils.cc:27
std::vector< std::size_t > getSlantPathLocations(const ioda::ObsSpace &odb, const GeoVaLs &gv, const std::vector< std::size_t > &locs, const std::string &modelVerticalCoord, const int itermax)
static constexpr const char *const qcflags_observation_report
Definition: VariableNames.h:98
static constexpr const char *const counter_NumAnyErrors
static constexpr const char *const obs_air_pressure
Definition: VariableNames.h:18