UFO
EntireSampleDataHandler.h
Go to the documentation of this file.
1 /*
2  * (C) Crown copyright 2020, Met Office
3  *
4  * This software is licensed under the terms of the Apache Licence Version 2.0
5  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6  */
7 
8 #ifndef UFO_PROFILE_ENTIRESAMPLEDATAHANDLER_H_
9 #define UFO_PROFILE_ENTIRESAMPLEDATAHANDLER_H_
10 
11 #include <algorithm>
12 #include <cmath>
13 #include <memory>
14 #include <ostream>
15 #include <sstream>
16 #include <string>
17 #include <tuple>
18 #include <unordered_map>
19 #include <utility>
20 #include <vector>
21 
22 #include "boost/variant.hpp"
23 
24 #include "ioda/ObsDataVector.h"
25 #include "ioda/ObsSpace.h"
26 
27 #include "oops/util/missingValues.h"
28 
30 
32 #include "ufo/utils/StringUtils.h"
33 
34 namespace ioda {
35  class ObsSpace;
36 }
37 
38 namespace ufo {
39  /// \brief Retrieve and store data for entire sample.
40  /// This class uses lazy loading; vectors of variables are retrieved once requested
41  /// and cached after that.
42  /// Variables in certain groups are optional, meaning that if they are not present on
43  /// the obsdb they will be filled with a default value if requested.
45  public:
46  EntireSampleDataHandler(ioda::ObsSpace &obsdb,
47  const DataHandlerParameters &options);
48 
49  /// Retrieve a vector containing the requested variable for the entire data sample.
50  /// -# If the variable has previously been placed in a vector, return the vector.
51  /// -# If the variable is present in the input data set, fill the vector with those values.
52  /// -# If the variable is not present in the input data set, and 'optional' is true,
53  /// fill the vector with zeros.
54  /// -# If the variable is not present in the input data set, and 'optional' is false,
55  /// do not fill the vector.
56  /// Also store the name of the variable, enabling it to be retrieved later.
57  template <typename T>
58  std::vector<T>& get(const std::string &fullname)
59  {
60  // Determine variable and group names, optional, and number of entries per profile.
61  std::string varname;
62  std::string groupname;
63  ufo::splitVarGroup(fullname, varname, groupname);
64  const bool optional = options_.getOptional(groupname);
65  const size_t entriesPerProfile = options_.getEntriesPerProfile(groupname);
66 
67  std::vector <T> vec_all; // Vector storing data for entire sample.
68  auto it_entireSampleData = entireSampleData_.find(fullname);
69  if (it_entireSampleData != entireSampleData_.end()) {
70  // If the vector is already present, return it.
71  // If the type T is incorrect then boost::get will return an exception.
72  // Provide additional information if that occurs.
73  try {
74  return boost::get<std::vector<T>> (it_entireSampleData->second);
75  } catch (boost::bad_get) {
76  throw eckit::BadParameter("Template parameter passed to boost::get for " +
77  fullname + " probably has the wrong type", Here());
78  }
79  } else if (obsdb_.has(groupname, varname) || optional) {
80  // Initially fill the vector with the default value for the type T.
81  if (entriesPerProfile == 0) {
82  vec_all.assign(obsdb_.nlocs(), defaultValue(vec_all, groupname));
83  } else {
84  vec_all.assign(entriesPerProfile * obsdb_.nrecs(), defaultValue(vec_all, groupname));
85  }
86  // Retrieve variable from the obsdb if present, overwriting the default value.
87  if (obsdb_.has(groupname, varname)) obsdb_.get_db(groupname, varname, vec_all);
88  }
89 
90  // Add vector to map.
91  entireSampleData_.emplace(fullname, std::move(vec_all));
92  return boost::get<std::vector<T>> (entireSampleData_[fullname]);
93  }
94 
95  /// Write various quantities to the obsdb so they can be used in future QC checks.
96  /// The particular variables written out are hardcoded but this could be changed to a
97  /// configurable list if requred.
99 
100  /// Initialise vector in the entire sample for a variable that is not currently
101  /// stored. Fill the vector with the default value for the data type.
102  template <typename T>
103  void initialiseVector(const std::string fullname)
104  {
105  auto it_entireSampleData = entireSampleData_.find(fullname);
106  if (it_entireSampleData == entireSampleData_.end() ||
107  (it_entireSampleData != entireSampleData_.end() &&
108  get<T>(fullname).size() == 0)) {
109  std::string varname;
110  std::string groupname;
111  ufo::splitVarGroup(fullname, varname, groupname);
112  const size_t entriesPerProfile = options_.getEntriesPerProfile(groupname);
113  std::vector <T> vec_all; // Vector storing data for entire sample.
114  if (entriesPerProfile == 0) {
115  vec_all.assign(obsdb_.nlocs(), defaultValue(vec_all, groupname));
116  } else {
117  vec_all.assign(entriesPerProfile * obsdb_.nrecs(),
118  defaultValue(vec_all, groupname));
119  }
120  entireSampleData_[fullname] = vec_all;
121  }
122  }
123 
124  private: // functions
125  /// Put entire data vector on obsdb.
126  template <typename T>
127  void putDataVector(const std::string &fullname,
128  const std::vector <T> &datavec)
129  {
130  // Do not store the vector if it is empty.
131  if (datavec.empty()) return;
132 
133  std::string varname;
134  std::string groupname;
135  ufo::splitVarGroup(fullname, varname, groupname);
136  obsdb_.put_db(groupname, varname, datavec);
137  }
138 
139  private: // variables
140  /// Observation database.
141  ioda::ObsSpace &obsdb_;
142 
143  /// Configurable parameters.
145 
146  /// Default value used to fill vector of integers.
147  int defaultValue(const std::vector <int> &vec, const std::string &groupname);
148 
149  /// Default value used to fill vector of floats.
150  float defaultValue(const std::vector <float> &vec, const std::string &groupname);
151 
152  /// Default value used to fill vector of strings.
153  std::string defaultValue(const std::vector <std::string> &vec, const std::string &groupname);
154 
155  /// Container of each variable in the entire data set.
156  std::unordered_map <std::string, boost::variant
157  <std::vector <int>, std::vector <float>, std::vector <std::string>>> entireSampleData_;
158 
159  /// Missing value (int)
160  const int missingValueInt = util::missingValue(missingValueInt);
161 
162  /// Missing value (float)
163  const float missingValueFloat = util::missingValue(missingValueFloat);
164 
165  /// Missing value (string)
166  const std::string missingValueString = util::missingValue(missingValueString);
167  };
168 } // namespace ufo
169 
170 #endif // UFO_PROFILE_ENTIRESAMPLEDATAHANDLER_H_
Options controlling the operation of the EntireSampleDataHandler and ProfileDataHandler classes.
size_t getEntriesPerProfile(const std::string &groupname) const
Determine number of entries per profile for a variable group.
bool getOptional(const std::string &groupname) const
Determine whether a variable group is optional or not.
Retrieve and store data for entire sample. This class uses lazy loading; vectors of variables are ret...
const DataHandlerParameters & options_
Configurable parameters.
ioda::ObsSpace & obsdb_
Observation database.
void putDataVector(const std::string &fullname, const std::vector< T > &datavec)
Put entire data vector on obsdb.
std::vector< T > & get(const std::string &fullname)
void initialiseVector(const std::string fullname)
std::unordered_map< std::string, boost::variant< std::vector< int >, std::vector< float >, std::vector< std::string > > > entireSampleData_
Container of each variable in the entire data set.
EntireSampleDataHandler(ioda::ObsSpace &obsdb, const DataHandlerParameters &options)
const float missingValueFloat
Missing value (float)
int defaultValue(const std::vector< int > &vec, const std::string &groupname)
Default value used to fill vector of integers.
const std::string missingValueString
Missing value (string)
const int missingValueInt
Missing value (int)
Forward declarations.
Definition: ObsAodExt.h:25
Definition: RunCRTM.h:27
void splitVarGroup(const std::string &vargrp, std::string &var, std::string &grp)
Definition: StringUtils.cc:27