UFO
ObsAccessor.h
Go to the documentation of this file.
1 /*
2  * (C) Copyright 2020 Met Office UK
3  *
4  * This software is licensed under the terms of the Apache Licence Version 2.0
5  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6  */
7 
8 #ifndef UFO_FILTERS_OBSACCESSOR_H_
9 #define UFO_FILTERS_OBSACCESSOR_H_
10 
11 #include <memory>
12 #include <string>
13 #include <vector>
14 
15 #include <boost/optional.hpp>
16 
17 #include "ioda/ObsDataVector.h"
18 #include "oops/util/DateTime.h"
19 #include "ufo/filters/Variable.h"
20 #include "ufo/filters/Variables.h"
21 
22 namespace ioda {
23 class Distribution;
24 template <typename DATATYPE> class ObsDataVector;
25 class ObsSpace;
26 }
27 
28 namespace ufo {
29 
30 class RecursiveSplitter;
31 
32 /// \brief This class provides access to observations that may be held on multiple MPI ranks.
33 ///
34 /// It is used by filters that may be configured to
35 ///
36 /// * process observations held on all MPI ranks as a single group
37 /// * process observations from each record (by definition, held on a single MPI rank)
38 /// independently from all others
39 /// * process observations with each distinct value of a particular variable (held on a single MPI
40 /// rank if this variable was used to group observations into records or on multiple MPI ranks
41 /// if not) independently from all others.
42 ///
43 /// Depending on which of these cases applies, create an ObservationAccessor object by calling the
44 /// ObsAccessor::toAllObservations(),
45 /// ObsAccessor::toObservationsSplitIntoIndependentGroupsByRecordId() or the
46 /// ObsAccessor::toObservationsSplitIntoIndependentGroupsByVariable() static function. The
47 /// ObsAccessor will then determine whether each independent group consists of
48 /// observations held only on a single MPI rank. If so, methods such as getValidObservationIds() and
49 /// getIntVariableFromObsSpace() will return vectors constructed from data held only on the current
50 /// MPI rank (without any MPI communication); otherwise, these vectors will be constructed from
51 /// data obtained from all MPI ranks.
52 ///
53 /// Call splitObservationsIntoIndependentGroups() to construct a RecursiveSplitter object whose
54 /// groups() method will return groups of observations that can be processed independently from
55 /// each other (according to the criterion specified when the ObsAccessor was constructed).
56 class ObsAccessor {
57  public:
58  ~ObsAccessor() = default;
59  ObsAccessor(const ObsAccessor &) = delete;
60  ObsAccessor(ObsAccessor &&) = default;
61  ObsAccessor & operator=(const ObsAccessor &) = delete;
63 
64  /// \brief Create an accessor to observations from the observation space \p obsdb, assuming that
65  /// the whole set of observations held on all MPI ranks must be processed together as a single
66  /// group.
68  const ioda::ObsSpace &obsdb);
69 
70  /// \brief Create an accessor to the collection of observations held in \p obsdb, assuming that
71  /// each record can be processed independently.
73  const ioda::ObsSpace &obsdb);
74 
75  /// \brief Create an accessor to the collection of observations held in \p obsdb, assuming that
76  /// observations with different values of the variable \p variable can be processed independently.
78  const ioda::ObsSpace &obsdb, const Variable &variable);
79 
80  /// \brief Return the IDs of observation locations that should be treated as valid by a filter.
81  ///
82  /// \param apply
83  /// Vector whose ith element is set to true if ith observation location held on the current
84  /// MPI rank was selected by the \c where clause in the filter's configuration.
85  ///
86  /// \param flags
87  /// An ObsDataVector holding the QC flags (set by any filters run previously)
88  /// of observations held on the current MPI rank.
89  ///
90  /// \param filtervars
91  /// List of filter variables.
92  ///
93  /// \param validIfAnyFilterVariablePassedQC
94  /// Boolean switch to treat an observation as valid if any filter variable has not been
95  /// rejected. By default this is true; if false, the observation is only treated as valid
96  /// if all filter variables have passed QC.
97  ///
98  /// An observation location is treated as valid if (a) it has been selected by the \c where
99  /// clause and (b) its QC flag(s) for (some/all) filtered variable(s) are set to \c pass
100  /// (see below).
101  ///
102  /// If each independent group of observations is stored entirely on a single MPI rank, the
103  /// returned vector contains local IDs of valid observation locations held on the current rank
104  /// only. Otherwise the vector contains global IDs of valid locations held on all ranks, with IDs
105  /// from 0 to nlocs(0) - 1 corresponding to locations held on rank 0, IDs from nlocs(0) to
106  /// nlocs(0) + nlocs(1) - 1 corresponding to locations held on rank 1 and so on, where nlocs(i)
107  /// denotes the number of locations held on ith rank.
108  ///
109  /// If there is more than one filtered variable, and their QC flags differ, there is a choice
110  /// as to whether to treat observation locations as valid (i) where none of the filtered variables
111  /// have so far been rejected, or (ii) where at least one of these variables has not yet been
112  /// rejected. The latter choice (ii) is the default, configurable via the switch
113  /// \c validIfAnyFilterVariablePassedQC.
114  std::vector<size_t> getValidObservationIds(const std::vector<bool> &apply,
115  const ioda::ObsDataVector<int> &flags,
116  const Variables &filtervars,
117  bool validIfAnyFilterVariablePassedQC = true) const;
118 
119  /// \brief Return the IDs of both flagged and unflagged observation locations selected by the
120  /// where clause.
121  ///
122  /// \param apply
123  /// Vector whose ith element is set to true if ith observation location held on the current
124  /// MPI rank was selected by the \c where clause in the filter's configuration.
125  ///
126  /// An observation location is treated as valid if it has been selected by the \c where
127  /// clause.
128  ///
129  /// If each independent group of observations is stored entirely on a single MPI rank, the
130  /// returned vector contains local IDs of valid observation locations held on the current rank
131  /// only. Otherwise the vector contains global IDs of valid locations held on all ranks, with IDs
132  /// from 0 to nlocs(0) - 1 corresponding to locations held on rank 0, IDs from nlocs(0) to
133  /// nlocs(0) + nlocs(1) - 1 corresponding to locations held on rank 1 and so on, where nlocs(i)
134  /// denotes the number of locations held on ith rank.
135  std::vector<size_t> getValidObservationIds(const std::vector<bool> &apply) const;
136 
137  /// \brief Return the values of the specified variable at successive observation locations.
138  ///
139  /// If each independent group of observations is stored entirely on a single MPI rank, the
140  /// returned vector contains values observed at locations held on the current rank only.
141  /// Otherwise the vector is a concatenation of vectors obtained on all ranks.
142  std::vector<int> getIntVariableFromObsSpace(const std::string &group,
143  const std::string &variable) const;
144  std::vector<float> getFloatVariableFromObsSpace(const std::string &group,
145  const std::string &variable) const;
146  std::vector<double> getDoubleVariableFromObsSpace(const std::string &group,
147  const std::string &variable) const;
148  std::vector<std::string> getStringVariableFromObsSpace(const std::string &group,
149  const std::string &variable) const;
150  std::vector<util::DateTime> getDateTimeVariableFromObsSpace(const std::string &group,
151  const std::string &variable) const;
152 
153  /// \brief Return the vector of IDs of records successive observation locations belong to.
154  ///
155  /// If each independent group of observations is stored entirely on a single MPI rank, the
156  /// returned vector contains record IDs of observation locations held on the current rank
157  /// only. Otherwise the vector is a concatenation of vectors obtained on all ranks.
158  std::vector<size_t> getRecordIds() const;
159 
160  /// If each independent group of observations is stored entirely on a single MPI rank, return the
161  /// number of observation locations held on the current rank. Otherwise return the total number
162  /// of observation locations held on all ranks.
163  size_t totalNumObservations() const;
164 
165  /// Construct a RecursiveSplitter object whose groups() method will return groups of observations
166  /// that can be processed independently from each other (according to the criterion specified when
167  /// the ObsAccessor was constructed).
168  ///
169  /// \param validObsIds
170  /// Indices of valid observations.
171  /// \param opsCompatibilityMode
172  /// Parameter to pass to the RecursiveSplitter's constructor.
174  const std::vector<size_t> &validObsIds, bool opsCompatibilityMode = false) const;
175 
176  /// \brief Update flags of observations held on the current MPI rank.
177  ///
178  /// \param isRejected
179  /// A vector of length totalNumObservations() whose ith element indicates if ith observation
180  /// should be rejected.
181  ///
182  /// \param[inout] flagged
183  /// A vector of vectors, each with as many elements as there are observation locations on the
184  /// current MPI rank. On output, flagged[i][j] will be set to true for each i if the element of
185  /// isRejected corresponding to jth observation location on the current rank is true.
186  void flagRejectedObservations(const std::vector<bool> &isRejected,
187  std::vector<std::vector<bool> > &flagged) const;
188 
189  private:
190  enum class GroupBy { NOTHING, RECORD_ID, VARIABLE };
191 
192  /// Private constructor. Construct instances of this class by calling toAllObservations(),
193  /// toObservationsSplitIntoIndependentGroupsByRecordId() or
194  /// toObservationsSplitIntoIndependentGroupsByVariable() instead.
195  ObsAccessor(const ioda::ObsSpace &obsdb,
196  GroupBy groupBy,
197  boost::optional<Variable> categoryVariable);
198 
200 
201  void groupObservationsByRecordNumber(const std::vector<size_t> &validObsIds,
202  RecursiveSplitter &splitter) const;
203 
204  void groupObservationsByCategoryVariable(const std::vector<size_t> &validObsIds,
205  RecursiveSplitter &splitter) const;
206 
207  /// \brief Return true if filtered variable(s) have passed QC, otherwise false.
208  ///
209  /// \param flags
210  /// A vector of type ObsDataRow holding the QC flags for the subset of simulated variables
211  /// present in the list of filtered variables.
212  ///
213  /// \param ObsId
214  /// Index of observation location.
215  ///
216  /// \param validIfAnyFilterVariablePassedQC
217  /// Boolean variable to decide how to treat observation locations where QC flags of filtered
218  /// variables differ.
219  /// If true, consider that observation has passed QC if any filtered variable has passed QC.
220  /// If false, consider that observation has passed QC only if all filtered variables passed QC.
221  bool isValid(const std::vector<ioda::ObsDataRow<int>> &flags, size_t ObsId,
222  bool validIfAnyFilterVariablePassedQC) const;
223 
224  private:
225  const ioda::ObsSpace *obsdb_;
226  std::shared_ptr<const ioda::Distribution> obsDistribution_;
227 
229  boost::optional<Variable> categoryVariable_;
230 };
231 
232 } // namespace ufo
233 
234 #endif // UFO_FILTERS_OBSACCESSOR_H_
This class provides access to observations that may be held on multiple MPI ranks.
Definition: ObsAccessor.h:56
RecursiveSplitter splitObservationsIntoIndependentGroups(const std::vector< size_t > &validObsIds, bool opsCompatibilityMode=false) const
Definition: ObsAccessor.cc:197
std::vector< std::string > getStringVariableFromObsSpace(const std::string &group, const std::string &variable) const
Definition: ObsAccessor.cc:154
std::vector< size_t > getValidObservationIds(const std::vector< bool > &apply, const ioda::ObsDataVector< int > &flags, const Variables &filtervars, bool validIfAnyFilterVariablePassedQC=true) const
Return the IDs of observation locations that should be treated as valid by a filter.
Definition: ObsAccessor.cc:99
ObsAccessor & operator=(ObsAccessor &&)=default
std::vector< int > getIntVariableFromObsSpace(const std::string &group, const std::string &variable) const
Return the values of the specified variable at successive observation locations.
Definition: ObsAccessor.cc:139
static ObsAccessor toObservationsSplitIntoIndependentGroupsByRecordId(const ioda::ObsSpace &obsdb)
Create an accessor to the collection of observations held in obsdb, assuming that each record can be ...
Definition: ObsAccessor.cc:89
ObsAccessor(ObsAccessor &&)=default
void groupObservationsByCategoryVariable(const std::vector< size_t > &validObsIds, RecursiveSplitter &splitter) const
Definition: ObsAccessor.cc:222
const ioda::ObsSpace * obsdb_
Definition: ObsAccessor.h:225
static ObsAccessor toAllObservations(const ioda::ObsSpace &obsdb)
Create an accessor to observations from the observation space obsdb, assuming that the whole set of o...
Definition: ObsAccessor.cc:84
boost::optional< Variable > categoryVariable_
Definition: ObsAccessor.h:229
std::vector< size_t > getRecordIds() const
Return the vector of IDs of records successive observation locations belong to.
Definition: ObsAccessor.cc:164
static ObsAccessor toObservationsSplitIntoIndependentGroupsByVariable(const ioda::ObsSpace &obsdb, const Variable &variable)
Create an accessor to the collection of observations held in obsdb, assuming that observations with d...
Definition: ObsAccessor.cc:94
std::vector< float > getFloatVariableFromObsSpace(const std::string &group, const std::string &variable) const
Definition: ObsAccessor.cc:144
ObsAccessor(const ObsAccessor &)=delete
ObsAccessor & operator=(const ObsAccessor &)=delete
void groupObservationsByRecordNumber(const std::vector< size_t > &validObsIds, RecursiveSplitter &splitter) const
Definition: ObsAccessor.cc:214
bool wereRecordsGroupedByCategoryVariable() const
Definition: ObsAccessor.cc:259
std::vector< util::DateTime > getDateTimeVariableFromObsSpace(const std::string &group, const std::string &variable) const
Definition: ObsAccessor.cc:159
void flagRejectedObservations(const std::vector< bool > &isRejected, std::vector< std::vector< bool > > &flagged) const
Update flags of observations held on the current MPI rank.
Definition: ObsAccessor.cc:243
bool isValid(const std::vector< ioda::ObsDataRow< int >> &flags, size_t ObsId, bool validIfAnyFilterVariablePassedQC) const
Return true if filtered variable(s) have passed QC, otherwise false.
Definition: ObsAccessor.cc:174
std::shared_ptr< const ioda::Distribution > obsDistribution_
Definition: ObsAccessor.h:226
~ObsAccessor()=default
std::vector< double > getDoubleVariableFromObsSpace(const std::string &group, const std::string &variable) const
Definition: ObsAccessor.cc:149
size_t totalNumObservations() const
Definition: ObsAccessor.cc:170
Partitions an array into groups of elements equivalent according to certain criteria.
Forward declarations.
Definition: ObsAodExt.h:25
Definition: RunCRTM.h:27