UFO
ObsAccessor.cc
Go to the documentation of this file.
1 /*
2  * (C) Copyright 2020 Met Office UK
3  *
4  * This software is licensed under the terms of the Apache Licence Version 2.0
5  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6  */
7 
9 
10 #include <memory>
11 #include <string>
12 #include <vector>
13 
14 #include "ioda/distribution/InefficientDistribution.h"
15 #include "ioda/ObsSpace.h"
16 #include "ufo/filters/QCflags.h"
18 
19 namespace ufo {
20 
21 namespace {
22 
23 template <typename VariableType>
24 std::vector<VariableType> getVariableFromObsSpaceImpl(
25  const std::string &group, const std::string &variable,
26  const ioda::ObsSpace &obsdb, const ioda::Distribution &obsDistribution) {
27  std::vector<VariableType> result(obsdb.nlocs());
28  obsdb.get_db(group, variable, result);
29  obsDistribution.allGatherv(result);
30  return result;
31 }
32 
33 /// Return the vector of elements of \p categories with indices \p validObsIds.
34 template <typename T>
35 std::vector<T> getValidObservationCategories(const std::vector<T> &categories,
36  const std::vector<size_t> &validObsIds) {
37  std::vector<T> validObsCategories(validObsIds.size());
38  for (size_t validObsIndex = 0; validObsIndex < validObsIds.size(); ++validObsIndex) {
39  validObsCategories[validObsIndex] = categories[validObsIds[validObsIndex]];
40  }
41  return validObsCategories;
42 }
43 
44 template <typename VariableType>
46  const Variable &variable,
47  const std::vector<size_t> &validObsIds,
48  const ioda::ObsSpace &obsdb,
49  const ioda::Distribution &obsDistribution,
50  RecursiveSplitter &splitter) {
51  std::vector<VariableType> obsCategories(obsdb.nlocs());
52  obsdb.get_db(variable.group(), variable.variable(), obsCategories);
53  obsDistribution.allGatherv(obsCategories);
54 
55  const std::vector<VariableType> validObsCategories = getValidObservationCategories(
56  obsCategories, validObsIds);
57 
58  splitter.groupBy(validObsCategories);
59 }
60 
61 } // namespace
62 
63 ObsAccessor::ObsAccessor(const ioda::ObsSpace &obsdb,
64  GroupBy groupBy,
65  boost::optional<Variable> categoryVariable)
66  : obsdb_(&obsdb), groupBy_(groupBy), categoryVariable_(categoryVariable)
67 {
70 
72  // Each record is held by a single process, so there's no need to exchange data between
73  // processes and we can use an InefficientDistribution rather than the distribution taken from
74  // obsdb_. Which in this case is *efficient*!
75  eckit::LocalConfiguration emptyConfig;
76  obsDistribution_ = std::make_shared<ioda::InefficientDistribution>(obsdb_->comm(),
77  emptyConfig);
78  oops::Log::trace() << "ObservationAccessor: no MPI communication necessary" << std::endl;
79  } else {
80  obsDistribution_ = obsdb.distribution();
81  }
82 }
83 
85  const ioda::ObsSpace &obsdb) {
86  return ObsAccessor(obsdb, GroupBy::NOTHING, boost::none);
87 }
88 
90  const ioda::ObsSpace &obsdb) {
91  return ObsAccessor(obsdb, GroupBy::RECORD_ID, boost::none);
92 }
93 
95  const ioda::ObsSpace &obsdb, const Variable &variable) {
96  return ObsAccessor(obsdb, GroupBy::VARIABLE, variable);
97 }
98 
100  const std::vector<bool> &apply, const ioda::ObsDataVector<int> &flags,
101  const ufo::Variables &filtervars, bool validIfAnyFilterVariablePassedQC) const {
102  // TODO(wsmigaj): use std::vector<unsigned char> to save space
103  std::vector<int> globalApply(apply.size());
104  std::vector<ioda::ObsDataRow<int>> filterVariableFlags;
105  // Select flags for respective filtervars
106  for (size_t ivar = 0; ivar < filtervars.nvars(); ++ivar) {
107  std::string filterVariableName = filtervars.variable(ivar).variable();
108  auto it = std::find(flags.varnames().variables().begin(), flags.varnames().variables().end(),
109  filterVariableName);
110  filterVariableFlags.push_back(flags[*it]);
111  }
112  for (size_t obsId = 0; obsId < apply.size(); ++obsId)
113  globalApply[obsId] = apply[obsId]
114  && isValid(filterVariableFlags, obsId, validIfAnyFilterVariablePassedQC);
115  obsDistribution_->allGatherv(globalApply);
116 
117  std::vector<size_t> validObsIds;
118  for (size_t obsId = 0; obsId < globalApply.size(); ++obsId)
119  if (globalApply[obsId])
120  validObsIds.push_back(obsId);
121 
122  return validObsIds;
123 }
124 
126  const std::vector<bool> &apply) const {
127  // TODO(wsmigaj): use std::vector<unsigned char> to save space
128  std::vector<int> globalApply(apply.begin(), apply.end());
129  obsDistribution_->allGatherv(globalApply);
130 
131  std::vector<size_t> validObsIds;
132  for (size_t obsId = 0; obsId < globalApply.size(); ++obsId)
133  if (globalApply[obsId])
134  validObsIds.push_back(obsId);
135 
136  return validObsIds;
137 }
138 
140  const std::string &group, const std::string &variable) const {
141  return getVariableFromObsSpaceImpl<int>(group, variable, *obsdb_, *obsDistribution_);
142 }
143 
145  const std::string &group, const std::string &variable) const {
146  return getVariableFromObsSpaceImpl<float>(group, variable, *obsdb_, *obsDistribution_);
147 }
148 
150  const std::string &group, const std::string &variable) const {
151  return getVariableFromObsSpaceImpl<double>(group, variable, *obsdb_, *obsDistribution_);
152 }
153 
155  const std::string &group, const std::string &variable) const {
156  return getVariableFromObsSpaceImpl<std::string>(group, variable, *obsdb_, *obsDistribution_);
157 }
158 
160  const std::string &group, const std::string &variable) const {
161  return getVariableFromObsSpaceImpl<util::DateTime>(group, variable, *obsdb_, *obsDistribution_);
162 }
163 
164 std::vector<size_t> ObsAccessor::getRecordIds() const {
165  std::vector<size_t> recordIds = obsdb_->recnum();
166  obsDistribution_->allGatherv(recordIds);
167  return recordIds;
168 }
169 
171  return obsdb_->globalNumLocs();
172 }
173 
174 bool ObsAccessor::isValid(const std::vector<ioda::ObsDataRow<int>> &flags, size_t obsId,
175  bool validIfAnyFilterVariablePassedQC) const {
176  bool obIsNotFlagged;
177  if (validIfAnyFilterVariablePassedQC) {
178  obIsNotFlagged = false;
179  for (size_t irow = 0; irow < flags.size(); ++irow) {
180  if (flags[irow][obsId] == QCflags::pass) {
181  obIsNotFlagged = true;
182  break;
183  }
184  }
185  } else {
186  obIsNotFlagged = true;
187  for (size_t irow = 0; irow < flags.size(); ++irow) {
188  if (flags[irow][obsId] != QCflags::pass) {
189  obIsNotFlagged = false;
190  break;
191  }
192  }
193  }
194  return obIsNotFlagged;
195 }
196 
198  const std::vector<size_t> &validObsIds, bool opsCompatibilityMode) const {
199  RecursiveSplitter splitter(validObsIds.size(), opsCompatibilityMode);
200  switch (groupBy_) {
201  case GroupBy::NOTHING:
202  // Nothing to do
203  break;
204  case GroupBy::RECORD_ID:
205  groupObservationsByRecordNumber(validObsIds, splitter);
206  break;
207  case GroupBy::VARIABLE:
208  groupObservationsByCategoryVariable(validObsIds, splitter);
209  break;
210  }
211  return splitter;
212 }
213 
214 void ObsAccessor::groupObservationsByRecordNumber(const std::vector<size_t> &validObsIds,
215  RecursiveSplitter &splitter) const {
216  const std::vector<size_t> &obsCategories = obsdb_->recnum();
217  std::vector<size_t> validObsCategories = getValidObservationCategories(
218  obsCategories, validObsIds);
219  splitter.groupBy(validObsCategories);
220 }
221 
223  const std::vector<size_t> &validObsIds,
224  RecursiveSplitter &splitter) const {
225  switch (obsdb_->dtype(categoryVariable_->group(), categoryVariable_->variable())) {
226  case ioda::ObsDtype::Integer:
227  groupObservationsByVariableImpl<int>(*categoryVariable_, validObsIds,
228  *obsdb_, *obsDistribution_, splitter);
229  break;
230 
231  case ioda::ObsDtype::String:
232  groupObservationsByVariableImpl<std::string>(*categoryVariable_, validObsIds,
233  *obsdb_, *obsDistribution_, splitter);
234  break;
235 
236  default:
237  throw eckit::UserError(
238  categoryVariable_->variable() + "@" + categoryVariable_->group() +
239  " is neither an integer nor a string variable", Here());
240  }
241 }
242 
244  const std::vector<bool> &isRejected, std::vector<std::vector<bool> > &flagged) const {
245  const size_t localNumObs = obsdb_->nlocs();
246  for (const std::vector<bool> & variableFlagged : flagged)
247  ASSERT(variableFlagged.size() == localNumObs);
248 
249  for (size_t localObsId = 0; localObsId < localNumObs; ++localObsId) {
250  const size_t globalObsId =
251  obsDistribution_->globalUniqueConsecutiveLocationIndex(localObsId);
252  if (isRejected[globalObsId]) {
253  for (std::vector<bool> & variableFlagged : flagged)
254  variableFlagged[localObsId] = true;
255  }
256  }
257 }
258 
260  std::vector<std::string> groupingVars = obsdb_->obs_group_vars();
261  std::string groupingVar;
262  if (groupingVars.size() > 0) {
263  groupingVar = groupingVars[0];
264  }
265  return categoryVariable_ != boost::none &&
266  categoryVariable_->variable() == groupingVar &&
267  categoryVariable_->group() == "MetaData";
268 }
269 
270 } // namespace ufo
This class provides access to observations that may be held on multiple MPI ranks.
Definition: ObsAccessor.h:56
RecursiveSplitter splitObservationsIntoIndependentGroups(const std::vector< size_t > &validObsIds, bool opsCompatibilityMode=false) const
Definition: ObsAccessor.cc:197
std::vector< std::string > getStringVariableFromObsSpace(const std::string &group, const std::string &variable) const
Definition: ObsAccessor.cc:154
std::vector< size_t > getValidObservationIds(const std::vector< bool > &apply, const ioda::ObsDataVector< int > &flags, const Variables &filtervars, bool validIfAnyFilterVariablePassedQC=true) const
Return the IDs of observation locations that should be treated as valid by a filter.
Definition: ObsAccessor.cc:99
std::vector< int > getIntVariableFromObsSpace(const std::string &group, const std::string &variable) const
Return the values of the specified variable at successive observation locations.
Definition: ObsAccessor.cc:139
static ObsAccessor toObservationsSplitIntoIndependentGroupsByRecordId(const ioda::ObsSpace &obsdb)
Create an accessor to the collection of observations held in obsdb, assuming that each record can be ...
Definition: ObsAccessor.cc:89
void groupObservationsByCategoryVariable(const std::vector< size_t > &validObsIds, RecursiveSplitter &splitter) const
Definition: ObsAccessor.cc:222
const ioda::ObsSpace * obsdb_
Definition: ObsAccessor.h:225
static ObsAccessor toAllObservations(const ioda::ObsSpace &obsdb)
Create an accessor to observations from the observation space obsdb, assuming that the whole set of o...
Definition: ObsAccessor.cc:84
boost::optional< Variable > categoryVariable_
Definition: ObsAccessor.h:229
std::vector< size_t > getRecordIds() const
Return the vector of IDs of records successive observation locations belong to.
Definition: ObsAccessor.cc:164
static ObsAccessor toObservationsSplitIntoIndependentGroupsByVariable(const ioda::ObsSpace &obsdb, const Variable &variable)
Create an accessor to the collection of observations held in obsdb, assuming that observations with d...
Definition: ObsAccessor.cc:94
std::vector< float > getFloatVariableFromObsSpace(const std::string &group, const std::string &variable) const
Definition: ObsAccessor.cc:144
ObsAccessor(const ObsAccessor &)=delete
void groupObservationsByRecordNumber(const std::vector< size_t > &validObsIds, RecursiveSplitter &splitter) const
Definition: ObsAccessor.cc:214
bool wereRecordsGroupedByCategoryVariable() const
Definition: ObsAccessor.cc:259
std::vector< util::DateTime > getDateTimeVariableFromObsSpace(const std::string &group, const std::string &variable) const
Definition: ObsAccessor.cc:159
void flagRejectedObservations(const std::vector< bool > &isRejected, std::vector< std::vector< bool > > &flagged) const
Update flags of observations held on the current MPI rank.
Definition: ObsAccessor.cc:243
bool isValid(const std::vector< ioda::ObsDataRow< int >> &flags, size_t ObsId, bool validIfAnyFilterVariablePassedQC) const
Return true if filtered variable(s) have passed QC, otherwise false.
Definition: ObsAccessor.cc:174
std::shared_ptr< const ioda::Distribution > obsDistribution_
Definition: ObsAccessor.h:226
std::vector< double > getDoubleVariableFromObsSpace(const std::string &group, const std::string &variable) const
Definition: ObsAccessor.cc:149
size_t totalNumObservations() const
Definition: ObsAccessor.cc:170
Partitions an array into groups of elements equivalent according to certain criteria.
void groupBy(const std::vector< size_t > &categories)
Split existing equivalence classes according to a new criterion.
const std::string & variable() const
Definition: Variable.cc:99
const std::string & group() const
Definition: Variable.cc:116
size_t nvars() const
Return the number of constituent "primitive" (single-channel) variables.
Definition: Variables.cc:104
Variable variable(const size_t) const
Return a given constituent "primitive" (single-channel) variable.
Definition: Variables.cc:114
constexpr int pass
Definition: QCflags.h:14
std::vector< VariableType > getVariableFromObsSpaceImpl(const std::string &group, const std::string &variable, const ioda::ObsSpace &obsdb, const ioda::Distribution &obsDistribution)
Definition: ObsAccessor.cc:24
void groupObservationsByVariableImpl(const Variable &variable, const std::vector< size_t > &validObsIds, const ioda::ObsSpace &obsdb, const ioda::Distribution &obsDistribution, RecursiveSplitter &splitter)
Definition: ObsAccessor.cc:45
std::vector< T > getValidObservationCategories(const std::vector< T > &categories, const std::vector< size_t > &validObsIds)
Return the vector of elements of categories with indices validObsIds.
Definition: ObsAccessor.cc:35
Definition: RunCRTM.h:27