IODA
src/ObsSpace.h
Go to the documentation of this file.
1 /*
2  * (C) Copyright 2017-2021 UCAR
3  *
4  * This software is licensed under the terms of the Apache Licence Version 2.0
5  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6  */
7 
8 #ifndef OBSSPACE_H_
9 #define OBSSPACE_H_
10 
11 #include <functional>
12 #include <map>
13 #include <memory>
14 #include <numeric>
15 #include <ostream>
16 #include <set>
17 #include <string>
18 #include <type_traits>
19 #include <unordered_map>
20 #include <utility>
21 #include <vector>
22 
23 #include "eckit/exception/Exceptions.h"
24 #include "eckit/mpi/Comm.h"
25 
26 #include "oops/base/ObsSpaceBase.h"
27 #include "oops/base/Variables.h"
28 #include "oops/util/DateTime.h"
29 #include "oops/util/Logger.h"
30 #include "ioda/core/IodaUtils.h"
31 #include "ioda/distribution/Distribution.h"
32 #include "ioda/Engines/Factory.h"
33 #include "ioda/Misc/Dimensions.h"
34 #include "ioda/ObsGroup.h"
35 #include "ioda/ObsSpaceParameters.h"
36 #include "ioda/Variables/Fill.h"
37 
38 // Forward declarations
39 namespace eckit {
40  class Configuration;
41 }
42 
43 namespace ioda {
44  class ObsFrameRead;
45  class ObsVector;
46 
47  //-------------------------------------------------------------------------------------
48  // Enum type for obs variable data types
49  enum class ObsDtype {
50  None,
51  Float,
52  Integer,
53  String,
54  DateTime
55  };
56 
57  // Enum type for obs dimension ids
58  // The first two dimension names for now are nlocs and nchans. This will likely expand
59  // in the future, so make sure that this enum class and the following initializer
60  // function stay in sync.
61  enum class ObsDimensionId {
62  Nlocs,
63  Nchans
64  };
65 
66  class ObsDimInfo {
67  public:
68  ObsDimInfo();
69 
70  /// \brief return the standard id value for the given dimension name
71  ObsDimensionId get_dim_id(const std::string & dimName) const;
72 
73  /// \brief return the dimension name for the given dimension id
74  std::string get_dim_name(const ObsDimensionId dimId) const;
75 
76  /// \brief return the dimension size for the given dimension id
77  std::size_t get_dim_size(const ObsDimensionId dimId) const;
78 
79  /// \brief set the dimension size for the given dimension id
80  void set_dim_size(const ObsDimensionId dimId, std::size_t dimSize);
81 
82  private:
83  /// \brief map going from dim id to dim name
84  std::map<ObsDimensionId, std::string> dim_id_name_;
85 
86  /// \brief map going from dim id to dim size
87  std::map<ObsDimensionId, std::size_t> dim_id_size_;
88 
89  /// \brief map going from dim name to id
90  std::map<std::string, ObsDimensionId> dim_name_id_;
91  };
92 
93  /// @brief Template handlers for implicit variable conversion.
94  /// @tparam Type is the source type of the data.
95  template <class Type>
96  struct ConvertType {
97  /// @brief The type that data should be converted to upon write.
98  typedef Type to_type;
99  };
100  template<>
101  struct ConvertType<double> {
102  typedef float to_type;
103  };
104 
105  /// \brief Observation data class for IODA
106  ///
107  /// \details This class handles the memory store of observation data. It handles
108  /// the transfer of data between memory and files, the distribution of obs data
109  /// across multiple process elements, the filtering out of obs data that is outside
110  /// the DA timing window, the transfer of data between UFO, OOPS and IODA, and data type
111  /// conversion that is "missing value aware".
112  ///
113  /// During the DA run, all data transfers are done in memory. The only time file I/O is
114  /// invoked is during the constructor (read from the file into the obs container) and
115  /// optionally during the the destructor (write from obs container into the file).
116  class ObsSpace : public oops::ObsSpaceBase {
117  public:
118  //---------------------------- typedefs -------------------------------
119  typedef std::map<std::size_t, std::vector<std::size_t>> RecIdxMap;
120  typedef RecIdxMap::const_iterator RecIdxIter;
122 
123  //---------------------------- functions ------------------------------
124  /// \brief Config based constructor for an ObsSpace object.
125  ///
126  /// \details This constructor will read in from the obs file and transfer the
127  /// variables into the obs container. Obs falling outside the DA timing window,
128  /// specified by bgn and end, will be discarded before storing them in the
129  /// obs container.
130  ///
131  /// \param params Configuration parameters (an instance of ObsTopLevelParameters)
132  /// \param comm MPI communicator for model grouping
133  /// \param bgn DateTime object holding the start of the DA timing window
134  /// \param end DateTime object holding the end of the DA timing window
135  /// \param timeComm MPI communicator for ensemble
136  ObsSpace(const Parameters_ & params, const eckit::mpi::Comm & comm,
137  const util::DateTime & bgn, const util::DateTime & end,
138  const eckit::mpi::Comm & timeComm);
139  ObsSpace(const ObsSpace &);
140  virtual ~ObsSpace() {}
141 
142  /// \details This method will return the start of the DA timing window
143  const util::DateTime & windowStart() const {return winbgn_;}
144 
145  /// \details This method will return the end of the DA timing window
146  const util::DateTime & windowEnd() const {return winend_;}
147 
148  /// \details This method will return the associated MPI communicator
149  const eckit::mpi::Comm & comm() const {return commMPI_;}
150 
151  /// \details This method will return the associated parameters
152  const ObsSpaceParameters & params() const {return obs_params_;}
153 
154  /// \brief save the obs space data into a file (if obsdataout specified)
155  /// \details This function will save the obs space data into a file, but only if
156  /// the obsdataout parameter is specified in the YAML configuration.
157  /// Note that this function will do nothing if the obsdataout specification
158  /// is not present.
159  ///
160  /// The purpose of this save function is to fix an issue where the hdf5
161  /// library closes the file (via a C API) during the time when the
162  /// ObsSpace destructor (C++) is still writing to that file. These
163  /// actions can sometimes get out of sync since they are being triggered
164  /// from different sources during the clean up after a job completes.
165  void save();
166 
167  /// \brief return the total number of locations in the corresponding obs spaces
168  /// across all MPI tasks
169  std::size_t globalNumLocs() const {return gnlocs_;}
170 
171  /// \brief return number of locations from obs source that were outside the time window
173 
174  /// \brief return the number of locations in the obs space.
175  /// Note that nlocs may be smaller than global unique nlocs due to distribution of obs
176  /// across multiple process elements.
177  inline size_t nlocs() const { return get_dim_size(ObsDimensionId::Nlocs); }
178 
179  /// \brief return the number of channels in the container. If this is not a radiance
180  /// obs type, then this will return zero.
181  inline size_t nchans() const { return get_dim_size(ObsDimensionId::Nchans); }
182 
183  /// \brief return the number of records in the obs space container
184  /// \details This is the number of sets of locations after applying the
185  /// optional grouping.
186  std::size_t nrecs() const {return nrecs_;}
187 
188  /// \brief return the number of variables in the obs space container.
189  /// "Variables" refers to the quantities that can be assimilated as opposed to meta data.
190  std::size_t nvars() const;
191 
192  /// \brief return the standard dimension name for the given dimension id
193  std::string get_dim_name(const ObsDimensionId dimId) const {
194  return dim_info_.get_dim_name(dimId);
195  }
196 
197  /// \brief return the standard dimension size for the given dimension id
198  std::size_t get_dim_size(const ObsDimensionId dimId) const {
199  return dim_info_.get_dim_size(dimId);
200  }
201 
202  /// \brief return the standard dimension id for the given dimension name
203  ObsDimensionId get_dim_id(const std::string & dimName) const {
204  return dim_info_.get_dim_id(dimName);
205  }
206 
207  /// \brief return YAML configuration parameter: obsdatain.obsgrouping.group variables
208  const std::vector<std::string> & obs_group_vars() const;
209 
210  /// \brief return YAML configuration parameter: obsdatain.obsgrouping.sort variable
211  std::string obs_sort_var() const;
212 
213  /// \brief return YAML configuration parameter: obsdatain.obsgrouping.sort order
214  std::string obs_sort_order() const;
215 
216  /// \brief return the name of the obs type being stored
217  const std::string & obsname() const {return obsname_;}
218 
219  /// \brief return the name of the MPI distribution
220  std::string distname() const {return obs_params_.top_level_.distName;}
221 
222  /// \brief return reference to the record number vector
223  const std::vector<std::size_t> & recnum() const {return recnums_;}
224 
225  /// \brief return reference to the index vector
226  /// \details This method returns a reference to the index vector
227  /// data member. This is for read only access.
228  /// The returned vector has length nlocs() and contains the original indices of
229  /// locations from the input ioda file corresponding to locations stored in this
230  /// ObsSpace object -- i.e. those that were selected by the timing window filter
231  /// and the MPI distribution.
232  ///
233  /// Example 1: Suppose the RoundRobin distribution is used and and there are two
234  /// MPI tasks (ranks 0 and 1). The even-numbered locations from the file will go
235  /// to rank 0, and the odd-numbered locations will go to rank 1. This means that
236  /// `ObsSpace::index()` will return the vector `0, 2, 4, 6, ...` on rank 0 and
237  /// `1, 3, 5, 7, ...` on rank 1.
238  ///
239  /// Example 2: Suppose MPI is not used and the file contains 10 locations in total,
240  /// but locations 2, 3 and 7 are outside the DA timing window. In this case,
241  /// `ObsSpace::index()` will return `0, 1, 4, 5, 6, 8, 9`.
242  const std::vector<std::size_t> & index() const {return indx_;}
243 
244  /// \brief return true if variable `name` exists in group `group` or (unless `skipDerived`
245  /// is set to true) `"Derived" + `group`.
246  bool has(const std::string & group, const std::string & name,
247  bool skipDerived = false) const;
248 
249  /// \brief return data type for group/variable
250  /// \param group Group name containting the variable
251  /// \param name Variable name
252  /// \param skipDerived
253  /// By default, this function will look for the variable `name` in the group `"Derived" +
254  /// group` first and only if it doesn't exist will it look in the group `group`. Set this
255  /// parameter to `true` to look only in the group `group`.
256  ObsDtype dtype(const std::string & group, const std::string & name,
257  bool skipDerived = false) const;
258 
259  /// \brief transfer data from the obs container to vdata
260  ///
261  /// \details The following get_db methods are the same except for the data type
262  /// of the data being transferred (integer, float, double, string, DateTime). The
263  /// caller needs to allocate the memory that the vdata parameter points to
264  ///
265  /// \param group Name of container group (ObsValue, ObsError, MetaData, etc.)
266  /// \param name Name of container variable
267  /// \param vdata Vector where container data is being transferred to
268  /// \param chanSelect Channel selection (list of channel numbers)
269  /// \param skipDerived
270  /// By default, this function will look for the variable `name` in the group `"Derived" +
271  /// group` first and only if it doesn't exist will it look in the group `group`. Set this
272  /// parameter to `true` to look only in the group `group`.
273  void get_db(const std::string & group, const std::string & name,
274  std::vector<int> & vdata,
275  const std::vector<int> & chanSelect = { },
276  bool skipDerived = false) const;
277  void get_db(const std::string & group, const std::string & name,
278  std::vector<float> & vdata,
279  const std::vector<int> & chanSelect = { },
280  bool skipDerived = false) const;
281  void get_db(const std::string & group, const std::string & name,
282  std::vector<double> & vdata,
283  const std::vector<int> & chanSelect = { },
284  bool skipDerived = false) const;
285  void get_db(const std::string & group, const std::string & name,
286  std::vector<std::string> & vdata,
287  const std::vector<int> & chanSelect = { },
288  bool skipDerived = false) const;
289  void get_db(const std::string & group, const std::string & name,
290  std::vector<util::DateTime> & vdata,
291  const std::vector<int> & chanSelect = { },
292  bool skipDerived = false) const;
293 
294  /// \brief transfer data from vdata to the obs container
295  ///
296  /// \details The following put_db methods are the same except for the data type
297  /// of the data being transferred (integer, float, double, string, DateTime). The
298  /// caller needs to allocate and assign the memory that the vdata parameter points to.
299  ///
300  /// \param group Name of container group (ObsValue, ObsError, MetaData, etc.)
301  /// \param name Name of container variable
302  /// \param vdata Vector where container data is being transferred from
303  /// \param dimList Vector of dimension names (for creating variable if needed)
304  void put_db(const std::string & group, const std::string & name,
305  const std::vector<int> & vdata,
306  const std::vector<std::string> & dimList = { "nlocs" });
307  void put_db(const std::string & group, const std::string & name,
308  const std::vector<float> & vdata,
309  const std::vector<std::string> & dimList = { "nlocs" });
310  void put_db(const std::string & group, const std::string & name,
311  const std::vector<double> & vdata,
312  const std::vector<std::string> & dimList = { "nlocs" });
313  void put_db(const std::string & group, const std::string & name,
314  const std::vector<std::string> & vdata,
315  const std::vector<std::string> & dimList = { "nlocs" });
316  void put_db(const std::string & group, const std::string & name,
317  const std::vector<util::DateTime> & vdata,
318  const std::vector<std::string> & dimList = { "nlocs" });
319 
320  /// \brief Return the begin iterator associated with the recidx_ data member
321  const RecIdxIter recidx_begin() const;
322 
323  /// \brief Return the end iterator associated with the recidx_ data member
324  const RecIdxIter recidx_end() const;
325 
326  /// \brief true if given record number exists in the recidx_ data member
327  /// \param recNum Record number being searched for
328  bool recidx_has(const std::size_t recNum) const;
329 
330  /// \brief true if the groups in the recidx data member are sorted
331  bool obsAreSorted() const { return recidx_is_sorted_; }
332 
333  /// \brief return record number pointed to by the given iterator
334  /// \param irec Iterator into the recidx_ data member
335  std::size_t recidx_recnum(const RecIdxIter & irec) const;
336 
337  /// \brief return record number vector pointed to by the given iterator
338  /// \param irec Iterator into the recidx_ data member
339  const std::vector<std::size_t> & recidx_vector(const RecIdxIter & irec) const;
340 
341  /// \brief return record number vector selected by the given record number
342  /// \param recNum Record number being searched for
343  const std::vector<std::size_t> & recidx_vector(const std::size_t recNum) const;
344 
345  /// \brief return all record numbers from the recidx_ data member
346  std::vector<std::size_t> recidx_all_recnums() const;
347 
348  /// \brief return the collection of all simulated variables
349  const oops::Variables & obsvariables() const {return obsvars_;}
350 
351  /// \brief return the collection of simulated variables loaded from the input file
352  const oops::Variables & initial_obsvariables() const
353  { return obs_params_.top_level_.simVars; }
354 
355  /// \brief return the collection of derived simulated variables (variables computed
356  /// after loading the input file)
357  const oops::Variables & derived_obsvariables() const
359 
360  /// \brief return MPI distribution object
361  std::shared_ptr<const Distribution> distribution() const { return dist_;}
362 
363  private:
364  // ----------------------------- private data members ---------------------------
365  /// \brief Beginning of DA timing window
366  const util::DateTime winbgn_;
367 
368  /// \brief End of DA timing window
369  const util::DateTime winend_;
370 
371  /// \brief MPI communicator
372  const eckit::mpi::Comm & commMPI_;
373 
374  /// \brief total number of locations
375  std::size_t gnlocs_;
376 
377  /// \brief number of nlocs from the obs source that are outside the time window
379 
380  /// \brief number of records
381  std::size_t nrecs_;
382 
383  /// \brief dimension information for variables in this obs space
385 
386  /// \brief map to go from channel number (not necessarily consecutive)
387  /// to channel index (consecutive, starting from zero).
388  std::map<int, int> chan_num_to_index_;
389 
390  /// \brief observation data store
392 
393  /// \brief obs io parameters
395 
396  /// \brief name of obs space
397  std::string obsname_;
398 
399  /// \brief Observation "variables" to be simulated
400  oops::Variables obsvars_;
401 
402  /// \brief MPI distribution object
403  std::shared_ptr<const Distribution> dist_;
404 
405  /// \brief indexes of locations to extract from the input obs file
406  std::vector<std::size_t> indx_;
407 
408  /// \brief record numbers associated with the location indexes
409  std::vector<std::size_t> recnums_;
410 
411  /// \brief profile ordering
413 
414  /// \brief indicator whether the data in recidx_ is sorted
416 
417  /// \brief map showing association of dim names with each variable name
419 
420  /// \brief cache for frontend selection
421  std::map<std::vector<std::string>, Selection> known_fe_selections_;
422 
423  /// \brief cache for backend selection
424  std::map<std::vector<std::string>, Selection> known_be_selections_;
425 
426  /// \brief disable the "=" operator
427  ObsSpace & operator= (const ObsSpace &) = delete;
428 
429  // ----------------------------- private functions ------------------------------
430  /// \brief print function for oops::Printable class
431  /// \param os output stream
432  void print(std::ostream & os) const;
433 
434  /// \brief Initialize the database from a source (ObsFrame ojbect)
435  /// \param obsFrame obs source object
436  void createObsGroupFromObsFrame(ObsFrameRead & obsFrame);
437 
438  /// \brief Extend the ObsSpace according to the method requested in
439  /// the configuration file.
440  /// \param params object containing specs for extending the ObsSpace
442 
443  /// \brief For each simulated variable that doesn't have an accompanying array
444  /// in the ObsError or DerivedObsError group, create one, fill it with missing values
445  /// and add it to the DerivedObsError group.
446  void createMissingObsErrors();
447 
448  /// \brief Dump the database into the output file
449  void saveToFile();
450 
451  /// \brief Create the recidx data structure holding sorted record groups
452  /// \details This method will construct a data structure that holds the
453  /// location order within each group sorted by the values of the specified
454  /// sort variable.
455  void buildSortedObsGroups();
456 
457  /// \brief Create the recidx data structure with unsorted record groups
458  /// \details This method will initialize the recidx structure without
459  /// any particular ordering of the record groups.
460  void buildRecIdxUnsorted();
461 
462  /// \brief initialize the in-memory obs_group_ (ObsGroup) object from the ObsIo source
463  /// \param obsIo obs source object
464  void initFromObsSource(ObsFrameRead & obsFrame);
465 
466  /// \brief resize along nlocs dimension
467  /// \param nlocsSize new size to either append or reset
468  /// \param append when true append nlocsSize to current size, otherwise reset size
469  void resizeNlocs(const Dimensions_t nlocsSize, const bool append);
470 
471  /// \brief read in values for variable from obs source
472  /// \param obsFrame obs frame object
473  /// \param varName Name of variable in obs source object
474  /// \param varValues values for variable
475  template<typename VarType>
476  bool readObsSource(ObsFrameRead & obsFrame,
477  const std::string & varName, std::vector<VarType> & varValues);
478 
479  /// \brief store a variable in the obs_group_ object
480  /// \param obsIo obs source object
481  /// \param varName Name of obs_group_ variable for obs_group_ object
482  /// \param varValues Values for obs_group_ variable
483  /// \param frameStart is the start of the ObsFrame
484  /// \param frameCount is the size of the ObsFrame
485  template<typename VarType>
486  void storeVar(const std::string & varName, std::vector<VarType> & varValues,
487  const Dimensions_t frameStart, const Dimensions_t frameCount);
488 
489  /// \brief get fill value for use in the obs_group_ object
490  template<typename DataType>
491  DataType getFillValue() {
492  DataType fillVal = util::missingValue(fillVal);
493  return fillVal;
494  }
495 
496  /// \brief load a variable from the obs_group_ object
497  /// \details This function will load data from the obs_group_ object into
498  /// the memory buffer (vector) varValues. The chanSelect parameter
499  /// is only used when the variable is 2D radiance data (nlocs X nchans),
500  /// and contains a list of channel numbers to be selected from the
501  /// obs_group_ variable.
502  /// \param group Name of Group in obs_group_
503  /// \param name Name of Variable in group
504  /// \param selectChan Vector of channel numbers for selection
505  /// \param varValues memory to load from obs_group_ variable
506  /// \param skipDerived
507  /// By default, this function will search for the variable `name` both in the group
508  /// `group` and `"Derived" + group`. Set this parameter to `true` to search only in the
509  /// group `group`.
510  template<typename VarType>
511  void loadVar(const std::string & group, const std::string & name,
512  const std::vector<int> & chanSelect,
513  std::vector<VarType> & varValues, bool skipDerived = false) const;
514 
515  /// \brief save a variable to the obs_group_ object
516  /// \param group Name of Group in obs_group_
517  /// \param name Name of Variable in group.
518  /// \param varValues values to be saved
519  /// \param dimList Vector of dimension names (for creating variable if needed)
520  ///
521  /// If the group `group` does not contain a variable with the specified name, but this name
522  /// has the form <string>_<integer> and `obs_group_` contains an `nchans` dimension, this
523  /// function will save `varValues` in the slice of variable <string> corresponding to
524  /// channel <integer>. If channel <integer> does not exist or the variable <string> already
525  /// exists but is not associated with the `nchans` dimension, an exception will be thrown.
526  template<typename VarType>
527  void saveVar(const std::string & group, std::string name,
528  const std::vector<VarType> & varValues,
529  const std::vector<std::string> & dimList);
530 
531  /// \brief Create selections of slices of the variable \p variable along dimension
532  /// \p nchansDimIndex corresponding to channels \p channels.
533  ///
534  /// \returns The number of elements in each selection.
535  std::size_t createChannelSelections(const Variable & variable,
536  std::size_t nchansDimIndex,
537  const std::vector<int> & channels,
538  Selection & memSelect,
539  Selection & obsGroupSelect) const;
540 
541  /// \brief create set of variables from source variables and lists
542  /// \param srcVarContainer Has_Variables object from source
543  /// \param destVarContainer Has_Variables object from destination
544  /// \param dimsAttachedToVars Map containing list of attached dims for each variable
545  void createVariables(const Has_Variables & srcVarContainer,
546  Has_Variables & destVarContainer,
547  const VarDimMap & dimsAttachedToVars);
548 
549  /// \brief open an obs_group_ variable, create the varialbe if necessary
550  template<typename VarType>
551  Variable openCreateVar(const std::string & varName,
552  const std::vector<std::string> & varDimList) {
553  Variable var;
554  if (obs_group_.vars.exists(varName)) {
555  var = obs_group_.vars.open(varName);
556  } else {
557  // Create a vector of the dimension variables
558  std::vector<Variable> varDims;
559  for (auto & dimName : varDimList) {
560  varDims.push_back(obs_group_.vars.open(dimName));
561  }
562 
563  // Create the variable. Use the JEDI internal missing value marks for
564  // fill values.
565  VarType fillVal = this->getFillValue<VarType>();
567  params.chunk = true;
568  params.compressWithGZIP();
569  params.setFillValue<VarType>(fillVal);
570 
571  var = obs_group_.vars.createWithScales<VarType>(varName, varDims, params);
572  }
573  return var;
574  }
575 
576  /// \brief fill in the channel number to channel index map
577  void fillChanNumToIndexMap();
578 
579  /// \brief split off the channel number suffix from a given variable name
580  /// \details If the given variable name does not exist, the channelSelect vector
581  /// is empty, and the given variable name has a suffix matching
582  /// "_[0-9][0-9]*" (ie, a numeric suffix), then this routine will strip
583  /// off the channel number from the name and place that channel number
584  /// into the ouput canSelectToUse vector. The new name will be returned
585  /// in the nameToUse string.
586  /// This is being done for backward compatibility until the ufo Variables
587  /// class and its clients are modified to handle a single variable name
588  /// and a vector of channel numbers.
589  /// \param group Name of Group in obs_group_
590  /// \param name Name of Variable in group
591  /// \param selectChan Vector of channel numbers for selection
592  /// \param varName Name of Variable after splitting off the channel number
593  /// \param skipDerived
594  /// By default, this function will search for the variable `name` both in the group
595  /// `group` and `"Derived" + group`. Set this parameter to `true` to search only in the
596  /// group `group`.
597  void splitChanSuffix(const std::string & group, const std::string & name,
598  const std::vector<int> & chanSelect, std::string & nameToUse,
599  std::vector<int> & chanSelectToUse,
600  bool skipDerived = false) const;
601 
602  /// \brief Extend the given variable
603  /// \param extendVar database variable to be extended
604  /// \param upperBoundOnGlobalNumOriginalRecs upper bound, across all processors,
605  /// of the number of records in the original ObsSpace.
606  template <typename DataType>
607  void extendVariable(Variable & extendVar, const size_t upperBoundOnGlobalNumOriginalRecs);
608  };
609 
610 } // namespace ioda
611 
612 #endif // OBSSPACE_H_
Describe the dimensions of a ioda::Attribute or ioda::Variable.
Definitions for setting up backends with file and memory I/O.
Fill value getters and setters.
Interfaces for ioda::ObsGroup and related classes.
This class exists inside of ioda::Group and provides the interface to manipulating Variables.
void set_dim_size(const ObsDimensionId dimId, std::size_t dimSize)
set the dimension size for the given dimension id
Definition: ObsSpace.cc:92
std::string get_dim_name(const ObsDimensionId dimId) const
return the dimension name for the given dimension id
Definition: ObsSpace.cc:84
ObsDimensionId get_dim_id(const std::string &dimName) const
return the standard id value for the given dimension name
Definition: ObsSpace.cc:80
std::map< ObsDimensionId, std::size_t > dim_id_size_
map going from dim id to dim size
Definition: src/ObsSpace.h:87
std::size_t get_dim_size(const ObsDimensionId dimId) const
return the dimension size for the given dimension id
Definition: ObsSpace.cc:88
std::map< std::string, ObsDimensionId > dim_name_id_
map going from dim name to id
Definition: src/ObsSpace.h:90
std::map< ObsDimensionId, std::string > dim_id_name_
map going from dim id to dim name
Definition: src/ObsSpace.h:84
Implementation of ObsFrameRead class.
An ObsGroup is a specialization of a ioda::Group. It provides convenience functions and guarantees th...
Definition: ObsGroup.h:32
void extendObsSpace(const ObsExtendParameters &params)
Extend the ObsSpace according to the method requested in the configuration file.
Definition: ObsSpace.cc:1182
void storeVar(const std::string &varName, std::vector< VarType > &varValues, const Dimensions_t frameStart, const Dimensions_t frameCount)
store a variable in the obs_group_ object
void createVariables(const Has_Variables &srcVarContainer, Has_Variables &destVarContainer, const VarDimMap &dimsAttachedToVars)
create set of variables from source variables and lists
Definition: ObsSpace.cc:923
std::size_t get_dim_size(const ObsDimensionId dimId) const
return the standard dimension size for the given dimension id
Definition: src/ObsSpace.h:198
void saveToFile()
Dump the database into the output file.
Definition: ObsSpace.cc:1066
const std::vector< std::size_t > & recnum() const
return reference to the record number vector
Definition: src/ObsSpace.h:223
void put_db(const std::string &group, const std::string &name, const std::vector< util::DateTime > &vdata, const std::vector< std::string > &dimList={ "nlocs" })
void put_db(const std::string &group, const std::string &name, const std::vector< int > &vdata, const std::vector< std::string > &dimList={ "nlocs" })
transfer data from vdata to the obs container
virtual ~ObsSpace()
Definition: src/ObsSpace.h:140
std::shared_ptr< const Distribution > dist_
MPI distribution object.
Definition: src/ObsSpace.h:403
void get_db(const std::string &group, const std::string &name, std::vector< int > &vdata, const std::vector< int > &chanSelect={ }, bool skipDerived=false) const
transfer data from the obs container to vdata
std::size_t gnlocs_outside_timewindow_
number of nlocs from the obs source that are outside the time window
Definition: src/ObsSpace.h:378
void buildRecIdxUnsorted()
Create the recidx data structure with unsorted record groups.
Definition: ObsSpace.cc:1058
ObsDimInfo dim_info_
dimension information for variables in this obs space
Definition: src/ObsSpace.h:384
void get_db(const std::string &group, const std::string &name, std::vector< float > &vdata, const std::vector< int > &chanSelect={ }, bool skipDerived=false) const
void get_db(const std::string &group, const std::string &name, std::vector< std::string > &vdata, const std::vector< int > &chanSelect={ }, bool skipDerived=false) const
size_t nchans() const
return the number of channels in the container. If this is not a radiance obs type,...
Definition: src/ObsSpace.h:181
VarDimMap dims_attached_to_vars_
map showing association of dim names with each variable name
Definition: src/ObsSpace.h:418
std::string get_dim_name(const ObsDimensionId dimId) const
return the standard dimension name for the given dimension id
Definition: src/ObsSpace.h:193
std::size_t nrecs() const
return the number of records in the obs space container
Definition: src/ObsSpace.h:186
void createObsGroupFromObsFrame(ObsFrameRead &obsFrame)
Initialize the database from a source (ObsFrame ojbect)
Definition: ObsSpace.cc:446
std::string obsname_
name of obs space
Definition: src/ObsSpace.h:397
ObsTopLevelParameters Parameters_
Definition: src/ObsSpace.h:121
std::size_t nrecs_
number of records
Definition: src/ObsSpace.h:381
DataType getFillValue()
get fill value for use in the obs_group_ object
Definition: src/ObsSpace.h:491
void print(std::ostream &os) const
print function for oops::Printable class
Definition: ObsSpace.cc:436
void get_db(const std::string &group, const std::string &name, std::vector< double > &vdata, const std::vector< int > &chanSelect={ }, bool skipDerived=false) const
void put_db(const std::string &group, const std::string &name, const std::vector< float > &vdata, const std::vector< std::string > &dimList={ "nlocs" })
const std::vector< std::size_t > & index() const
return reference to the index vector
Definition: src/ObsSpace.h:242
void put_db(const std::string &group, const std::string &name, const std::vector< std::string > &vdata, const std::vector< std::string > &dimList={ "nlocs" })
ObsSpace(const ObsSpace &)
oops::Variables obsvars_
Observation "variables" to be simulated.
Definition: src/ObsSpace.h:400
std::map< int, int > chan_num_to_index_
map to go from channel number (not necessarily consecutive) to channel index (consecutive,...
Definition: src/ObsSpace.h:388
void saveVar(const std::string &group, std::string name, const std::vector< VarType > &varValues, const std::vector< std::string > &dimList)
save a variable to the obs_group_ object
const eckit::mpi::Comm & comm() const
Definition: src/ObsSpace.h:149
void initFromObsSource(ObsFrameRead &obsFrame)
initialize the in-memory obs_group_ (ObsGroup) object from the ObsIo source
Definition: ObsSpace.cc:579
void loadVar(const std::string &group, const std::string &name, const std::vector< int > &chanSelect, std::vector< VarType > &varValues, bool skipDerived=false) const
load a variable from the obs_group_ object
RecIdxMap::const_iterator RecIdxIter
Definition: src/ObsSpace.h:120
void createMissingObsErrors()
For each simulated variable that doesn't have an accompanying array in the ObsError or DerivedObsErro...
Definition: ObsSpace.cc:1316
RecIdxMap recidx_
profile ordering
Definition: src/ObsSpace.h:412
std::shared_ptr< const Distribution > distribution() const
return MPI distribution object
Definition: src/ObsSpace.h:361
ObsDimensionId get_dim_id(const std::string &dimName) const
return the standard dimension id for the given dimension name
Definition: src/ObsSpace.h:203
ObsSpaceParameters obs_params_
obs io parameters
Definition: src/ObsSpace.h:394
std::string distname() const
return the name of the MPI distribution
Definition: src/ObsSpace.h:220
void buildSortedObsGroups()
Create the recidx data structure holding sorted record groups.
Definition: ObsSpace.cc:1009
size_t nlocs() const
return the number of locations in the obs space. Note that nlocs may be smaller than global unique nl...
Definition: src/ObsSpace.h:177
const util::DateTime winbgn_
Beginning of DA timing window.
Definition: src/ObsSpace.h:366
void get_db(const std::string &group, const std::string &name, std::vector< util::DateTime > &vdata, const std::vector< int > &chanSelect={ }, bool skipDerived=false) const
ObsGroup obs_group_
observation data store
Definition: src/ObsSpace.h:391
std::vector< std::size_t > indx_
indexes of locations to extract from the input obs file
Definition: src/ObsSpace.h:406
const util::DateTime winend_
End of DA timing window.
Definition: src/ObsSpace.h:369
const util::DateTime & windowStart() const
Definition: src/ObsSpace.h:143
std::size_t nvars() const
return the number of variables in the obs space container. "Variables" refers to the quantities that ...
bool obsAreSorted() const
true if the groups in the recidx data member are sorted
Definition: src/ObsSpace.h:331
const ObsSpaceParameters & params() const
Definition: src/ObsSpace.h:152
Variable openCreateVar(const std::string &varName, const std::vector< std::string > &varDimList)
open an obs_group_ variable, create the varialbe if necessary
Definition: src/ObsSpace.h:551
void resizeNlocs(const Dimensions_t nlocsSize, const bool append)
resize along nlocs dimension
Definition: ObsSpace.cc:684
const oops::Variables & derived_obsvariables() const
return the collection of derived simulated variables (variables computed after loading the input file...
Definition: src/ObsSpace.h:357
bool readObsSource(ObsFrameRead &obsFrame, const std::string &varName, std::vector< VarType > &varValues)
read in values for variable from obs source
std::size_t createChannelSelections(const Variable &variable, std::size_t nchansDimIndex, const std::vector< int > &channels, Selection &memSelect, Selection &obsGroupSelect) const
Create selections of slices of the variable variable along dimension nchansDimIndex corresponding to ...
Definition: ObsSpace.cc:808
bool recidx_is_sorted_
indicator whether the data in recidx_ is sorted
Definition: src/ObsSpace.h:415
std::size_t globalNumLocsOutsideTimeWindow() const
return number of locations from obs source that were outside the time window
Definition: src/ObsSpace.h:172
const eckit::mpi::Comm & commMPI_
MPI communicator.
Definition: src/ObsSpace.h:372
std::size_t globalNumLocs() const
return the total number of locations in the corresponding obs spaces across all MPI tasks
Definition: src/ObsSpace.h:169
std::size_t gnlocs_
total number of locations
Definition: src/ObsSpace.h:375
std::map< std::vector< std::string >, Selection > known_be_selections_
cache for backend selection
Definition: src/ObsSpace.h:424
void put_db(const std::string &group, const std::string &name, const std::vector< double > &vdata, const std::vector< std::string > &dimList={ "nlocs" })
const oops::Variables & obsvariables() const
return the collection of all simulated variables
Definition: src/ObsSpace.h:349
std::map< std::size_t, std::vector< std::size_t > > RecIdxMap
Definition: src/ObsSpace.h:119
const oops::Variables & initial_obsvariables() const
return the collection of simulated variables loaded from the input file
Definition: src/ObsSpace.h:352
void extendVariable(Variable &extendVar, const size_t upperBoundOnGlobalNumOriginalRecs)
Extend the given variable.
const std::string & obsname() const
return the name of the obs type being stored
Definition: src/ObsSpace.h:217
const util::DateTime & windowEnd() const
Definition: src/ObsSpace.h:146
std::map< std::vector< std::string >, Selection > known_fe_selections_
cache for frontend selection
Definition: src/ObsSpace.h:421
std::vector< std::size_t > recnums_
record numbers associated with the location indexes
Definition: src/ObsSpace.h:409
ObsTopLevelParameters top_level_
sub groups of parameters
oops::RequiredParameter< oops::Variables > simVars
simulated variables
oops::Parameter< oops::Variables > derivedSimVars
oops::Parameter< std::string > distName
name of MPI distribution
A Selection represents the bounds of the data, in ioda or in userspace, that you are reading or writi...
Definition: Selection.h:48
Represents the "type" (i.e. integer, string, float) of a piece of data.
Definition: Type.h:123
Variables store data!
Definition: Variable.h:680
Has_Variables vars
Use this to access variables.
Definition: Group.h:123
Variable createWithScales(const std::string &name, const std::vector< Variable > &dimension_scales, const VariableCreationParameters &params=VariableCreationParameters::defaulted< DataType >())
Convenience function to create a Variable from certain dimension scales.
virtual Variable open(const std::string &name) const
Open a Variable by name.
virtual bool exists(const std::string &name) const
Does a Variable with the specified name exist?
std::map< std::string, std::vector< std::string > > VarDimMap
typedef for holding dim names attached to variables
Definition: IodaUtils.h:36
ObsDimensionId
Definition: src/ObsSpace.h:61
Template handlers for implicit variable conversion.
Definition: src/ObsSpace.h:96
Type to_type
The type that data should be converted to upon write.
Definition: src/ObsSpace.h:98
Used to specify Variable creation-time properties.
Definition: Has_Variables.h:57