IODA
ObsData.h
Go to the documentation of this file.
1 /*
2  * (C) Copyright 2017-2019 UCAR
3  *
4  * This software is licensed under the terms of the Apache Licence Version 2.0
5  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6  */
7 
8 #ifndef CORE_OBSDATA_H_
9 #define CORE_OBSDATA_H_
10 
11 #include <map>
12 #include <memory>
13 #include <ostream>
14 #include <set>
15 #include <string>
16 #include <utility>
17 #include <vector>
18 
19 #include "eckit/container/KDTree.h"
20 #include "eckit/geometry/Point2.h"
21 #include "eckit/geometry/Point3.h"
22 #include "eckit/geometry/UnitSphere.h"
23 #include "eckit/mpi/Comm.h"
24 #include "oops/base/Variables.h"
25 #include "oops/util/DateTime.h"
26 #include "oops/util/Logger.h"
27 #include "oops/util/Printable.h"
28 #include "ioda/core/IodaUtils.h"
29 
30 #include "ioda/core/ObsSpaceContainer.h"
31 #include "ioda/distribution/Distribution.h"
32 #include "ioda/io/IodaIO.h"
33 
34 // Forward declarations
35 namespace eckit {
36  class Configuration;
37 }
38 
39 namespace ioda {
40  class ObsVector;
41 
42 //-------------------------------------------------------------------------------------
43 template <typename KeyType>
45  public:
46  bool has(const KeyType Key) {
47  return (obs_grouping_map_.find(Key) != obs_grouping_map_.end());
48  }
49 
50  void insert(const KeyType Key, const std::size_t Val) {
51  obs_grouping_map_.insert(std::pair<KeyType, std::size_t>(Key, Val));
52  }
53 
54  std::size_t at(const KeyType Key) {
55  return obs_grouping_map_.at(Key);
56  }
57 
58  private:
59  std::map<KeyType, std::size_t> obs_grouping_map_;
60 };
61 
62 // Enum type for obs variable data types
63 enum class ObsDtype {
64  None,
65  Float,
66  Integer,
67  String,
68  DateTime
69 };
70 
71 /// Observation Data
72 /*!
73  * \brief Observation data class for IODA
74  *
75  * \details This class handles the memory store of observation data. It handles the transfer
76  * of data between memory and files, the distribution of obs data across multiple
77  * process elements, the filtering out of obs data that is outside the DA timing
78  * window, the transfer of data between UFO, OOPS and IODA, and data type
79  * conversion that is "missing value aware".
80  *
81  * During the DA run, all data transfers are done in memory. The only time file I/O is
82  * invoked is during the constructor (read from the file into the obs container) and
83  * optionally during the the destructor (write from obs container into the file).
84  *
85  * \author Stephen Herbener, Xin Zhang (JCSDA)
86  */
87 class ObsData : public util::Printable {
88  public:
89  typedef std::map<std::size_t, std::vector<std::size_t>> RecIdxMap;
90  typedef RecIdxMap::const_iterator RecIdxIter;
91  struct TreeTrait {
92  typedef eckit::geometry::Point3 Point;
93  typedef double Payload;
94  };
95  typedef eckit::KDTreeMemory<TreeTrait> KDTree;
96 
97  ObsData(const eckit::Configuration &, const eckit::mpi::Comm &,
98  const util::DateTime &, const util::DateTime &, const eckit::mpi::Comm &);
99  /*!
100  * \details Copy constructor for an ObsData object.
101  */
102  ObsData(const ObsData &);
103  ~ObsData();
104 
105  std::size_t gnlocs() const;
106  std::size_t nlocs() const;
107  std::size_t nrecs() const;
108  std::size_t nvars() const;
109  const std::vector<std::size_t> & recnum() const;
110  const std::vector<std::size_t> & index() const;
111 
112  bool has(const std::string &, const std::string &) const;
113  ObsDtype dtype(const std::string &, const std::string &) const;
114 
115  std::string obs_group_var() const;
116  std::string obs_sort_var() const;
117  std::string obs_sort_order() const;
118 
119  void get_db(const std::string & group, const std::string & name,
120  std::vector<int> & vdata) const;
121  void get_db(const std::string & group, const std::string & name,
122  std::vector<float> & vdata) const;
123  void get_db(const std::string & group, const std::string & name,
124  std::vector<double> & vdata) const;
125  void get_db(const std::string & group, const std::string & name,
126  std::vector<std::string> & vdata) const;
127  void get_db(const std::string & group, const std::string & name,
128  std::vector<util::DateTime> & vdata) const;
129 
130  void put_db(const std::string & group, const std::string & name,
131  const std::vector<int> & vdata);
132  void put_db(const std::string & group, const std::string & name,
133  const std::vector<float> & vdata);
134  void put_db(const std::string & group, const std::string & name,
135  const std::vector<double> & vdata);
136  void put_db(const std::string & group, const std::string & name,
137  const std::vector<std::string> & vdata);
138  void put_db(const std::string & group, const std::string & name,
139  const std::vector<util::DateTime> & vdata);
140 
141  KDTree & getKDTree();
142 
143  const RecIdxIter recidx_begin() const;
144  const RecIdxIter recidx_end() const;
145  bool recidx_has(const std::size_t RecNum) const;
146  std::size_t recidx_recnum(const RecIdxIter & Irec) const;
147  const std::vector<std::size_t> & recidx_vector(const RecIdxIter & Irec) const;
148  const std::vector<std::size_t> & recidx_vector(const std::size_t RecNum) const;
149  std::vector<std::size_t> recidx_all_recnums() const;
150 
151  /*! \details This method will return the name of the obs type being stored */
152  const std::string & obsname() const {return obsname_;}
153  /*! \details This method will return the handle to the configuration */
154  const eckit::Configuration & getConfig() const {return config_;}
155  /*! \details This method will return the start of the DA timing window */
156  const util::DateTime & windowStart() const {return winbgn_;}
157  /*! \details This method will return the end of the DA timing window */
158  const util::DateTime & windowEnd() const {return winend_;}
159  /*! \details This method will return the associated MPI communicator */
160  const eckit::mpi::Comm & comm() const {return commMPI_;}
161 
162  void printJo(const ObsVector &, const ObsVector &); // to be removed
163 
164  const oops::Variables & obsvariables() const {return obsvars_;}
165  const std::shared_ptr<Distribution> distribution() const { return dist_;}
166 
167  private:
168  void print(std::ostream &) const;
169 
171 
172  // Initialize the database with auto-generated locations
173  void generateDistribution(const eckit::Configuration &);
174  void genDistRandom(const eckit::Configuration & conf, std::vector<float> & Lats,
175  std::vector<float> & Lons, std::vector<util::DateTime> & Dtimes);
176  void genDistList(const eckit::Configuration & conf, std::vector<float> & Lats,
177  std::vector<float> & Lons, std::vector<util::DateTime> & Dtimes);
178 
179  // Initialize the database from the input file
180  void InitFromFile(const std::string & filename, const std::size_t MaxFrameSize);
181  std::vector<std::size_t> GenFrameIndexRecNums(const std::unique_ptr<IodaIO> & FileIO,
182  const std::size_t FrameStart, const std::size_t FrameSize);
183  bool InsideTimingWindow(const util::DateTime & ObsDt);
184  void BuildSortedObsGroups();
185  void createKDTree();
186 
187  template<typename VarType>
188  std::vector<VarType> ApplyIndex(const std::vector<VarType> & FullData,
189  const std::vector<std::size_t> & FullShape,
190  const std::vector<std::size_t> & Index,
191  std::vector<std::size_t> & IndexedShape) const;
192 
193  static std::string DesiredVarType(std::string & GroupName, std::string & FileVarType);
194 
195  // Dump the database into the output file
196  void SaveToFile(const std::string & file_name, const std::size_t MaxFrameSize);
197 
198  /*! \brief name of obs space */
199  std::string obsname_;
200 
201  /*! \brief Configuration file */
202  const eckit::LocalConfiguration config_;
203 
204  /*! \brief Beginning of DA timing window */
205  const util::DateTime winbgn_;
206 
207  /*! \brief End of DA timing window */
208  const util::DateTime winend_;
209 
210  /*! \brief MPI communicator */
211  const eckit::mpi::Comm & commMPI_;
212 
213  /*! \brief KD Tree */
214  std::shared_ptr<KDTree> kd_;
215 
216  /*! \brief total number of locations */
217  std::size_t gnlocs_;
218 
219  /*! \brief number of locations on this domain */
220  std::size_t nlocs_;
221 
222  /*! \brief number of variables */
223  std::size_t nvars_;
224 
225  /*! \brief number of records */
226  std::size_t nrecs_;
227 
228  /*! \brief flag, file has variables with unexpected data types */
230 
231  /*! \brief flag, file has variables with excess dimensions */
233 
234  /*! \brief path to input file */
235  std::string filein_;
236 
237  /*! \brief path to output file */
238  std::string fileout_;
239 
240  /*! \brief max frame size for input file */
241  std::size_t in_max_frame_size_;
242 
243  /*! \brief max frame size for output file */
244  std::size_t out_max_frame_size_;
245 
246  /*! \brief indexes of locations to extract from the input obs file */
247  std::vector<std::size_t> indx_;
248 
249  /*! \brief record numbers associated with the location indexes */
250  std::vector<std::size_t> recnums_;
251 
252  /*! \brief profile ordering */
254 
255  /*! \brief Multi-index containers */
260 
261  /*! \brief Observation "variables" to be simulated */
262  oops::Variables obsvars_;
263 
264  /*! \brief Distribution type */
265  std::string distname_;
266 
267  /*! \brief Variable that location grouping is based upon */
268  std::string obs_group_variable_;
269 
270  /*! \brief Variable that location group sorting is based upon */
271  std::string obs_sort_variable_;
272 
273  /*! \brief Sort order for obs grouping */
274  std::string obs_sort_order_;
275 
276  /*! \brief MPI distribution object */
277  std::shared_ptr<Distribution> dist_;
278 
279  /*! \brief maps for obs grouping via integer, float or string values */
283 
284  /*! \brief next available record number */
285  std::size_t next_rec_num_;
286 
287  /*! \brief unique record numbers */
288  std::set<std::size_t> unique_rec_nums_;
289 };
290 
291 } // namespace ioda
292 
293 #endif // CORE_OBSDATA_H_
ioda::ObsData::InitFromFile
void InitFromFile(const std::string &filename, const std::size_t MaxFrameSize)
Definition: ObsData.cc:679
ioda::ObsData::float_obs_grouping_
ObsGroupingMap< float > float_obs_grouping_
Definition: ObsData.h:281
ioda::ObsData::dtype
ObsDtype dtype(const std::string &, const std::string &) const
Definition: ObsData.cc:289
ioda::ObsData::winbgn_
const util::DateTime winbgn_
Beginning of DA timing window.
Definition: ObsData.h:205
ioda::ObsData::dist_
std::shared_ptr< Distribution > dist_
MPI distribution object.
Definition: ObsData.h:277
ioda::ObsData::put_db
void put_db(const std::string &group, const std::string &name, const std::vector< int > &vdata)
transfer data from vdata to the obs container
Definition: ObsData.cc:238
ioda::ObsData::winend_
const util::DateTime winend_
End of DA timing window.
Definition: ObsData.h:208
ioda::ObsData::BuildSortedObsGroups
void BuildSortedObsGroups()
Definition: ObsData.cc:948
ioda::ObsGroupingMap::at
std::size_t at(const KeyType Key)
Definition: ObsData.h:54
ioda::ObsData::obs_group_variable_
std::string obs_group_variable_
Variable that location grouping is based upon.
Definition: ObsData.h:268
ioda::ObsData::~ObsData
~ObsData()
Definition: ObsData.cc:167
ioda::ObsData::datetime_database_
ObsSpaceContainer< util::DateTime > datetime_database_
Definition: ObsData.h:259
ioda::ObsGroupingMap::has
bool has(const KeyType Key)
Definition: ObsData.h:46
ioda::ObsData::comm
const eckit::mpi::Comm & comm() const
Definition: ObsData.h:160
ioda::ObsData::recnum
const std::vector< std::size_t > & recnum() const
Definition: ObsData.cc:380
ioda::ObsData::indx_
std::vector< std::size_t > indx_
indexes of locations to extract from the input obs file
Definition: ObsData.h:247
ioda::ObsData::recidx_end
const RecIdxIter recidx_end() const
Definition: ObsData.cc:407
ioda::ObsData::recidx_recnum
std::size_t recidx_recnum(const RecIdxIter &Irec) const
Definition: ObsData.cc:426
ioda::ObsData::generateDistribution
void generateDistribution(const eckit::Configuration &)
Definition: ObsData.cc:482
ioda::ObsData::has
bool has(const std::string &, const std::string &) const
Definition: ObsData.cc:278
ioda::ObsData::nvars
std::size_t nvars() const
Definition: ObsData.cc:371
ioda::ObsDtype::Integer
@ Integer
ioda::ObsData
Observation Data.
Definition: ObsData.h:87
ioda::ObsData::nrecs
std::size_t nrecs() const
Definition: ObsData.cc:361
ioda::ObsData::recnums_
std::vector< std::size_t > recnums_
record numbers associated with the location indexes
Definition: ObsData.h:250
ioda::ObsData::string_obs_grouping_
ObsGroupingMap< std::string > string_obs_grouping_
Definition: ObsData.h:282
ioda::ObsData::windowEnd
const util::DateTime & windowEnd() const
Definition: ObsData.h:158
ioda::ObsData::gnlocs
std::size_t gnlocs() const
Definition: ObsData.cc:339
ioda::ObsData::recidx_begin
const RecIdxIter recidx_begin() const
Definition: ObsData.cc:398
ioda::ObsData::obsname
const std::string & obsname() const
Definition: ObsData.h:152
ioda
Definition: IodaUtils.cc:13
ioda::ObsData::distname_
std::string distname_
Distribution type.
Definition: ObsData.h:265
ioda::ObsData::printJo
void printJo(const ObsVector &, const ObsVector &)
Definition: ObsData.cc:1201
ioda::ObsData::string_database_
ObsSpaceContainer< std::string > string_database_
Definition: ObsData.h:258
ioda::ObsData::kd_
std::shared_ptr< KDTree > kd_
KD Tree.
Definition: ObsData.h:214
ioda::ObsData::genDistList
void genDistList(const eckit::Configuration &conf, std::vector< float > &Lats, std::vector< float > &Lons, std::vector< util::DateTime > &Dtimes)
Definition: ObsData.cc:628
ioda::ObsDtype::String
@ String
ioda::ObsData::obs_sort_variable_
std::string obs_sort_variable_
Variable that location group sorting is based upon.
Definition: ObsData.h:271
ioda::ObsData::getConfig
const eckit::Configuration & getConfig() const
Definition: ObsData.h:154
ioda::ObsSpaceContainer< int >
ioda::ObsVector
ObsVector class to handle vectors in observation space for IODA.
Definition: src/ObsVector.h:34
ioda::ObsGroupingMap
Definition: ObsData.h:44
ioda::ObsData::filein_
std::string filein_
path to input file
Definition: ObsData.h:235
ioda::ObsData::recidx_all_recnums
std::vector< std::size_t > recidx_all_recnums() const
Definition: ObsData.cc:460
ioda::ObsData::getKDTree
KDTree & getKDTree()
Definition: ObsData.cc:1241
ioda::ObsData::ObsData
ObsData(const ObsData &)
eckit
Definition: LocalObsSpaceParameters.h:24
ioda::ObsGroupingMap::insert
void insert(const KeyType Key, const std::size_t Val)
Definition: ObsData.h:50
ioda::ObsData::ApplyIndex
std::vector< VarType > ApplyIndex(const std::vector< VarType > &FullData, const std::vector< std::size_t > &FullShape, const std::vector< std::size_t > &Index, std::vector< std::size_t > &IndexedShape) const
Definition: ObsData.cc:1155
ioda::ObsData::recidx_vector
const std::vector< std::size_t > & recidx_vector(const RecIdxIter &Irec) const
Definition: ObsData.cc:435
ioda::ObsData::obsvariables
const oops::Variables & obsvariables() const
Definition: ObsData.h:164
ioda::ObsData::obs_sort_order
std::string obs_sort_order() const
Definition: ObsData.cc:327
ioda::ObsData::operator=
ObsData & operator=(const ObsData &)
ioda::ObsData::get_db
void get_db(const std::string &group, const std::string &name, std::vector< int > &vdata) const
transfer data from the obs container to vdata
Definition: ObsData.cc:191
ioda::ObsData::in_max_frame_size_
std::size_t in_max_frame_size_
max frame size for input file
Definition: ObsData.h:241
ioda::ObsData::nlocs
std::size_t nlocs() const
Definition: ObsData.cc:351
ioda::ObsData::file_excess_dims_
bool file_excess_dims_
flag, file has variables with excess dimensions
Definition: ObsData.h:232
ioda::ObsData::int_obs_grouping_
ObsGroupingMap< int > int_obs_grouping_
maps for obs grouping via integer, float or string values
Definition: ObsData.h:280
ioda::ObsData::obs_sort_var
std::string obs_sort_var() const
Definition: ObsData.cc:318
ioda::ObsData::recidx_has
bool recidx_has(const std::size_t RecNum) const
Definition: ObsData.cc:416
ioda::ObsData::nlocs_
std::size_t nlocs_
number of locations on this domain
Definition: ObsData.h:220
ioda::ObsData::RecIdxIter
RecIdxMap::const_iterator RecIdxIter
Definition: ObsData.h:90
ioda::ObsData::obs_sort_order_
std::string obs_sort_order_
Sort order for obs grouping.
Definition: ObsData.h:274
ioda::ObsDtype
ObsDtype
Definition: ObsData.h:63
ioda::ObsData::config_
const eckit::LocalConfiguration config_
Configuration file.
Definition: ObsData.h:202
ioda::ObsData::next_rec_num_
std::size_t next_rec_num_
next available record number
Definition: ObsData.h:285
ioda::ObsData::int_database_
ObsSpaceContainer< int > int_database_
Multi-index containers.
Definition: ObsData.h:256
ioda::ObsData::TreeTrait::Point
eckit::geometry::Point3 Point
Definition: ObsData.h:92
ioda::ObsData::KDTree
eckit::KDTreeMemory< TreeTrait > KDTree
Definition: ObsData.h:95
ioda::ObsData::obs_group_var
std::string obs_group_var() const
Definition: ObsData.cc:309
ioda::ObsData::file_unexpected_dtypes_
bool file_unexpected_dtypes_
flag, file has variables with unexpected data types
Definition: ObsData.h:229
ioda::ObsData::out_max_frame_size_
std::size_t out_max_frame_size_
max frame size for output file
Definition: ObsData.h:244
ioda::ObsData::GenFrameIndexRecNums
std::vector< std::size_t > GenFrameIndexRecNums(const std::unique_ptr< IodaIO > &FileIO, const std::size_t FrameStart, const std::size_t FrameSize)
Definition: ObsData.cc:799
ioda::ObsData::print
void print(std::ostream &) const
Definition: ObsData.cc:663
ioda::ObsData::createKDTree
void createKDTree()
Definition: ObsData.cc:1209
ioda::ObsData::nvars_
std::size_t nvars_
number of variables
Definition: ObsData.h:223
ioda::ObsDtype::None
@ None
ioda::ObsData::InsideTimingWindow
bool InsideTimingWindow(const util::DateTime &ObsDt)
Definition: ObsData.cc:938
ioda::ObsData::TreeTrait
Definition: ObsData.h:91
ioda::ObsData::recidx_
RecIdxMap recidx_
profile ordering
Definition: ObsData.h:253
ioda::ObsData::windowStart
const util::DateTime & windowStart() const
Definition: ObsData.h:156
ioda::ObsData::RecIdxMap
std::map< std::size_t, std::vector< std::size_t > > RecIdxMap
Definition: ObsData.h:89
ioda::ObsData::unique_rec_nums_
std::set< std::size_t > unique_rec_nums_
unique record numbers
Definition: ObsData.h:288
ioda::ObsData::genDistRandom
void genDistRandom(const eckit::Configuration &conf, std::vector< float > &Lats, std::vector< float > &Lons, std::vector< util::DateTime > &Dtimes)
Definition: ObsData.cc:531
ioda::ObsData::fileout_
std::string fileout_
path to output file
Definition: ObsData.h:238
ioda::ObsData::obsname_
std::string obsname_
name of obs space
Definition: ObsData.h:199
ioda::ObsGroupingMap::obs_grouping_map_
std::map< KeyType, std::size_t > obs_grouping_map_
Definition: ObsData.h:59
ioda::ObsData::float_database_
ObsSpaceContainer< float > float_database_
Definition: ObsData.h:257
ioda::ObsData::index
const std::vector< std::size_t > & index() const
Definition: ObsData.cc:389
ioda::ObsData::commMPI_
const eckit::mpi::Comm & commMPI_
MPI communicator.
Definition: ObsData.h:211
ioda::ObsData::distribution
const std::shared_ptr< Distribution > distribution() const
Definition: ObsData.h:165
ioda::ObsData::nrecs_
std::size_t nrecs_
number of records
Definition: ObsData.h:226
ioda::ObsData::DesiredVarType
static std::string DesiredVarType(std::string &GroupName, std::string &FileVarType)
Definition: ObsData.cc:1180
ioda::ObsData::obsvars_
oops::Variables obsvars_
Observation "variables" to be simulated.
Definition: ObsData.h:262
ioda::ObsData::SaveToFile
void SaveToFile(const std::string &file_name, const std::size_t MaxFrameSize)
Definition: ObsData.cc:1004
ioda::ObsData::ObsData
ObsData(const eckit::Configuration &, const eckit::mpi::Comm &, const util::DateTime &, const util::DateTime &, const eckit::mpi::Comm &)
Definition: ObsData.cc:51
ioda::ObsDtype::Float
@ Float
ioda::ObsData::TreeTrait::Payload
double Payload
Definition: ObsData.h:93
ioda::ObsData::gnlocs_
std::size_t gnlocs_
total number of locations
Definition: ObsData.h:217