UFO
EntireSampleDataHandler.h
Go to the documentation of this file.
1 /*
2  * (C) Crown copyright 2020, Met Office
3  *
4  * This software is licensed under the terms of the Apache Licence Version 2.0
5  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6  */
7 
8 #ifndef UFO_PROFILE_ENTIRESAMPLEDATAHANDLER_H_
9 #define UFO_PROFILE_ENTIRESAMPLEDATAHANDLER_H_
10 
11 #include <algorithm>
12 #include <cmath>
13 #include <memory>
14 #include <ostream>
15 #include <sstream>
16 #include <string>
17 #include <tuple>
18 #include <unordered_map>
19 #include <utility>
20 #include <vector>
21 
22 #include "boost/variant.hpp"
23 
24 #include "ioda/ObsDataVector.h"
25 #include "ioda/ObsSpace.h"
26 
28 
30 #include "ufo/utils/StringUtils.h"
31 
32 namespace ioda {
33  class ObsSpace;
34 }
35 
36 namespace ufo {
37  /// \brief Retrieve and store data for entire sample.
38  /// This class uses lazy loading; vectors of variables are retrieved once requested
39  /// and cached after that.
40  /// Variables in certain groups are optional, meaning that if they are not present on
41  /// the obsdb they will be filled with a default value if requested.
43  public:
44  EntireSampleDataHandler(ioda::ObsSpace &obsdb,
45  const DataHandlerParameters &options);
46 
47  /// Retrieve a vector containing the requested variable for the entire data sample.
48  /// -# If the variable has previously been placed in a vector, return the vector.
49  /// -# If the variable is present in the input data set, fill the vector with those values.
50  /// -# If the variable is not present in the input data set, and 'optional' is true,
51  /// fill the vector with zeros.
52  /// -# If the variable is not present in the input data set, and 'optional' is false,
53  /// do not fill the vector.
54  /// Also store the name of the variable, enabling it to be retrieved later.
55  template <typename T>
56  std::vector<T>& get(const std::string &fullname)
57  {
58  // Determine variable and group names, optional, and number of entries per profile.
59  std::string varname;
60  std::string groupname;
61  ufo::splitVarGroup(fullname, varname, groupname);
62  bool optional = options_.getOptional(groupname);
63  size_t entriesPerProfile = options_.getEntriesPerProfile(groupname);
64 
65  std::vector <T> vec_all; // Vector storing data for entire sample.
66  if (entireSampleData_.find(fullname) != entireSampleData_.end()) {
67  // If the vector is already present, return it.
68  // If the type T is incorrect then boost::get will return an exception.
69  // Provide additional information if that occurs.
70  try {
71  return boost::get<std::vector<T>> (entireSampleData_[fullname]);
72  } catch (boost::bad_get) {
73  throw eckit::BadParameter("Template parameter passed to boost::get "
74  "probably has the wrong type", Here());
75  }
76  } else if (obsdb_.has(groupname, varname) || optional) {
77  // Initially fill the vector with the default value for the type T.
78  if (entriesPerProfile == 0) {
79  vec_all.assign(obsdb_.nlocs(), defaultValue(vec_all));
80  } else {
81  vec_all.assign(entriesPerProfile * obsdb_.nrecs(), defaultValue(vec_all));
82  }
83  // Retrieve variable from the obsdb if present, overwriting the default value.
84  if (obsdb_.has(groupname, varname)) obsdb_.get_db(groupname, varname, vec_all);
85  }
86 
87  // If the vector contains entirely missing values, clear it.
88  T missingValue; // Missing value for type T.
89  if (std::is_same<T, int>::value)
90  missingValue = util::missingValue(1);
91  else if (std::is_same<T, float>::value)
92  missingValue = util::missingValue(1.0f);
93  bool allMissing = true; // Signifies all elements in the vector are missing.
94  for (size_t idx = 0; allMissing && idx < vec_all.size(); ++idx)
95  allMissing = vec_all[idx] == missingValue;
96  if (allMissing) {
97  oops::Log::debug() << "All elements of " << fullname << " are missing" << std::endl;
98  vec_all.clear();
99  }
100 
101  // Add vector to map (even if it is empty).
102  entireSampleData_.emplace(fullname, std::move(vec_all));
103  return boost::get<std::vector<T>> (entireSampleData_[fullname]);
104  }
105 
106  /// Write various quantities to the obsdb so they can be used in future QC checks.
107  /// The particular variables written out are hardcoded but this could be changed to a
108  /// configurable list if requred.
109  void writeQuantitiesToObsdb();
110 
111  private:
112  /// Put entire data vector on obsdb.
113  template <typename T>
114  void putDataVector(const std::string &fullname,
115  const std::vector <T> &datavec)
116  {
117  // Do not store the vector if it is empty.
118  if (datavec.empty()) return;
119 
120  std::string varname;
121  std::string groupname;
122  ufo::splitVarGroup(fullname, varname, groupname);
123  obsdb_.put_db(groupname, varname, datavec);
124  }
125 
126  /// Observation database.
127  ioda::ObsSpace &obsdb_;
128 
129  /// Configurable parameters.
131 
132  /// Default value used to fill vector of integers.
133  int defaultValue(const std::vector <int> &vec) {return 0;}
134 
135  /// Default value used to fill vector of floats.
136  float defaultValue(const std::vector <float> &vec) {return 0.0f;}
137 
138  /// Default value used to fill vector of strings.
139  std::string defaultValue(const std::vector <std::string> &vec) {return "";}
140 
141  /// Container of each variable in the entire data set.
142  std::unordered_map <std::string, boost::variant
143  <std::vector <int>, std::vector <float>, std::vector <std::string>>> entireSampleData_;
144  };
145 } // namespace ufo
146 
147 #endif // UFO_PROFILE_ENTIRESAMPLEDATAHANDLER_H_
ufo::EntireSampleDataHandler::defaultValue
std::string defaultValue(const std::vector< std::string > &vec)
Default value used to fill vector of strings.
Definition: EntireSampleDataHandler.h:139
MetOfficeQCFlags.h
DataHandlerParameters.h
ufo::EntireSampleDataHandler::defaultValue
float defaultValue(const std::vector< float > &vec)
Default value used to fill vector of floats.
Definition: EntireSampleDataHandler.h:136
ufo::splitVarGroup
void splitVarGroup(const std::string &vargrp, std::string &var, std::string &grp)
Definition: StringUtils.cc:19
ufo_radiancerttov_utils_mod::debug
logical, public debug
Definition: ufo_radiancerttov_utils_mod.F90:100
ufo::EntireSampleDataHandler::EntireSampleDataHandler
EntireSampleDataHandler(ioda::ObsSpace &obsdb, const DataHandlerParameters &options)
Definition: EntireSampleDataHandler.cc:14
ufo::EntireSampleDataHandler::obsdb_
ioda::ObsSpace & obsdb_
Observation database.
Definition: EntireSampleDataHandler.h:127
ioda
Definition: ObsAtmSfcInterp.h:24
ufo
Definition: RunCRTM.h:27
ufo::EntireSampleDataHandler::defaultValue
int defaultValue(const std::vector< int > &vec)
Default value used to fill vector of integers.
Definition: EntireSampleDataHandler.h:133
ufo::DataHandlerParameters
Options controlling the operation of the EntireSampleDataHandler and ProfileDataHandler classes.
Definition: DataHandlerParameters.h:25
ufo::EntireSampleDataHandler
Retrieve and store data for entire sample. This class uses lazy loading; vectors of variables are ret...
Definition: EntireSampleDataHandler.h:42
ufo::EntireSampleDataHandler::writeQuantitiesToObsdb
void writeQuantitiesToObsdb()
Definition: EntireSampleDataHandler.cc:20
ufo::DataHandlerParameters::getOptional
bool getOptional(const std::string &groupname) const
Determine whether a variable group is optional or not.
Definition: DataHandlerParameters.h:30
StringUtils.h
ufo::EntireSampleDataHandler::entireSampleData_
std::unordered_map< std::string, boost::variant< std::vector< int >, std::vector< float >, std::vector< std::string > > > entireSampleData_
Container of each variable in the entire data set.
Definition: EntireSampleDataHandler.h:143
ufo::DataHandlerParameters::getEntriesPerProfile
size_t getEntriesPerProfile(const std::string &groupname) const
Determine number of entries per profile for a variable group.
Definition: DataHandlerParameters.h:40
ufo::EntireSampleDataHandler::options_
const DataHandlerParameters & options_
Configurable parameters.
Definition: EntireSampleDataHandler.h:130
ufo::EntireSampleDataHandler::putDataVector
void putDataVector(const std::string &fullname, const std::vector< T > &datavec)
Put entire data vector on obsdb.
Definition: EntireSampleDataHandler.h:114
ufo::EntireSampleDataHandler::get
std::vector< T > & get(const std::string &fullname)
Definition: EntireSampleDataHandler.h:56