IODA Bundle
ioda/src/ObsSpace.h
Go to the documentation of this file.
1 /*
2  * (C) Copyright 2017-2021 UCAR
3  *
4  * This software is licensed under the terms of the Apache Licence Version 2.0
5  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6  */
7 
8 #ifndef OBSSPACE_H_
9 #define OBSSPACE_H_
10 
11 #include <functional>
12 #include <map>
13 #include <memory>
14 #include <numeric>
15 #include <ostream>
16 #include <set>
17 #include <string>
18 #include <type_traits>
19 #include <unordered_map>
20 #include <utility>
21 #include <vector>
22 
23 #include "eckit/exception/Exceptions.h"
24 #include "eckit/mpi/Comm.h"
25 
26 #include "oops/base/ObsSpaceBase.h"
27 #include "oops/base/Variables.h"
28 #include "oops/util/DateTime.h"
29 #include "oops/util/Logger.h"
30 #include "ioda/core/IodaUtils.h"
31 #include "ioda/distribution/Distribution.h"
32 #include "ioda/Engines/Factory.h"
33 #include "ioda/Misc/Dimensions.h"
34 #include "ioda/ObsGroup.h"
35 #include "ioda/ObsSpaceParameters.h"
36 #include "ioda/Variables/Fill.h"
37 
38 // Forward declarations
39 namespace eckit {
40  class Configuration;
41 }
42 
43 namespace ioda {
44  class ObsFrameRead;
45  class ObsVector;
46 
47  //-------------------------------------------------------------------------------------
48  // Enum type for obs variable data types
49  enum class ObsDtype {
50  None,
51  Float,
52  Integer,
53  String,
54  DateTime
55  };
56 
57  // Enum type for obs dimension ids
58  // The first two dimension names for now are nlocs and nchans. This will likely expand
59  // in the future, so make sure that this enum class and the following initializer
60  // function stay in sync.
61  enum class ObsDimensionId {
62  Nlocs,
63  Nchans
64  };
65 
66  class ObsDimInfo {
67  public:
68  ObsDimInfo();
69 
70  /// \brief return the standard id value for the given dimension name
71  ObsDimensionId get_dim_id(const std::string & dimName) const;
72 
73  /// \brief return the dimension name for the given dimension id
74  std::string get_dim_name(const ObsDimensionId dimId) const;
75 
76  /// \brief return the dimension size for the given dimension id
77  std::size_t get_dim_size(const ObsDimensionId dimId) const;
78 
79  /// \brief set the dimension size for the given dimension id
80  void set_dim_size(const ObsDimensionId dimId, std::size_t dimSize);
81 
82  private:
83  /// \brief map going from dim id to dim name
84  std::map<ObsDimensionId, std::string> dim_id_name_;
85 
86  /// \brief map going from dim id to dim size
87  std::map<ObsDimensionId, std::size_t> dim_id_size_;
88 
89  /// \brief map going from dim name to id
90  std::map<std::string, ObsDimensionId> dim_name_id_;
91  };
92 
93  /// @brief Template handlers for implicit variable conversion.
94  /// @tparam Type is the source type of the data.
95  template <class Type>
96  struct ConvertType {
97  /// @brief The type that data should be converted to upon write.
98  typedef Type to_type;
99  };
100  template<>
101  struct ConvertType<double> {
102  typedef float to_type;
103  };
104 
105  /// \brief Observation data class for IODA
106  ///
107  /// \details This class handles the memory store of observation data. It handles
108  /// the transfer of data between memory and files, the distribution of obs data
109  /// across multiple process elements, the filtering out of obs data that is outside
110  /// the DA timing window, the transfer of data between UFO, OOPS and IODA, and data type
111  /// conversion that is "missing value aware".
112  ///
113  /// During the DA run, all data transfers are done in memory. The only time file I/O is
114  /// invoked is during the constructor (read from the file into the obs container) and
115  /// optionally during the the destructor (write from obs container into the file).
116  class ObsSpace : public oops::ObsSpaceBase {
117  public:
118  //---------------------------- typedefs -------------------------------
119  typedef std::map<std::size_t, std::vector<std::size_t>> RecIdxMap;
120  typedef RecIdxMap::const_iterator RecIdxIter;
122 
123  //---------------------------- functions ------------------------------
124  /// \brief Config based constructor for an ObsSpace object.
125  ///
126  /// \details This constructor will read in from the obs file and transfer the
127  /// variables into the obs container. Obs falling outside the DA timing window,
128  /// specified by bgn and end, will be discarded before storing them in the
129  /// obs container.
130  ///
131  /// \param params Configuration parameters (an instance of ObsTopLevelParameters)
132  /// \param comm MPI communicator for model grouping
133  /// \param bgn DateTime object holding the start of the DA timing window
134  /// \param end DateTime object holding the end of the DA timing window
135  /// \param timeComm MPI communicator for ensemble
136  ObsSpace(const Parameters_ & params, const eckit::mpi::Comm & comm,
137  const util::DateTime & bgn, const util::DateTime & end,
138  const eckit::mpi::Comm & timeComm);
139  ObsSpace(const ObsSpace &);
140  virtual ~ObsSpace() {}
141 
142  /// \details This method will return the start of the DA timing window
143  const util::DateTime & windowStart() const {return winbgn_;}
144 
145  /// \details This method will return the end of the DA timing window
146  const util::DateTime & windowEnd() const {return winend_;}
147 
148  /// \details This method will return the associated MPI communicator
149  const eckit::mpi::Comm & comm() const {return commMPI_;}
150 
151  /// \details This method will return the associated parameters
152  const ObsSpaceParameters & params() const {return obs_params_;}
153 
154  /// \brief save the obs space data into a file (if obsdataout specified)
155  /// \details This function will save the obs space data into a file, but only if
156  /// the obsdataout parameter is specified in the YAML configuration.
157  /// Note that this function will do nothing if the obsdataout specification
158  /// is not present.
159  ///
160  /// The purpose of this save function is to fix an issue where the hdf5
161  /// library closes the file (via a C API) during the time when the
162  /// ObsSpace destructor (C++) is still writing to that file. These
163  /// actions can sometimes get out of sync since they are being triggered
164  /// from different sources during the clean up after a job completes.
165  void save();
166 
167  /// \brief return the total number of locations in the corresponding obs spaces
168  /// across all MPI tasks
169  std::size_t globalNumLocs() const {return gnlocs_;}
170 
171  /// \brief return number of locations from obs source that were outside the time window
173 
174  /// \brief return the number of locations in the obs space.
175  /// Note that nlocs may be smaller than global unique nlocs due to distribution of obs
176  /// across multiple process elements.
177  inline size_t nlocs() const { return get_dim_size(ObsDimensionId::Nlocs); }
178 
179  /// \brief return the number of channels in the container. If this is not a radiance
180  /// obs type, then this will return zero.
181  inline size_t nchans() const { return get_dim_size(ObsDimensionId::Nchans); }
182 
183  /// \brief return the number of records in the obs space container
184  /// \details This is the number of sets of locations after applying the
185  /// optional grouping.
186  std::size_t nrecs() const {return nrecs_;}
187 
188  /// \brief return the number of variables in the obs space container.
189  /// "Variables" refers to the quantities that can be assimilated as opposed to meta data.
190  std::size_t nvars() const;
191 
192  /// \brief return the standard dimension name for the given dimension id
193  std::string get_dim_name(const ObsDimensionId dimId) const {
194  return dim_info_.get_dim_name(dimId);
195  }
196 
197  /// \brief return the standard dimension size for the given dimension id
198  std::size_t get_dim_size(const ObsDimensionId dimId) const {
199  return dim_info_.get_dim_size(dimId);
200  }
201 
202  /// \brief return the standard dimension id for the given dimension name
203  ObsDimensionId get_dim_id(const std::string & dimName) const {
204  return dim_info_.get_dim_id(dimName);
205  }
206 
207  /// \brief return YAML configuration parameter: obsdatain.obsgrouping.group variables
208  const std::vector<std::string> & obs_group_vars() const;
209 
210  /// \brief return YAML configuration parameter: obsdatain.obsgrouping.sort variable
211  std::string obs_sort_var() const;
212 
213  /// \brief return YAML configuration parameter: obsdatain.obsgrouping.sort order
214  std::string obs_sort_order() const;
215 
216  /// \brief return the name of the obs type being stored
217  const std::string & obsname() const {return obsname_;}
218 
219  /// \brief return the name of the MPI distribution
220  std::string distname() const {return obs_params_.top_level_.distName;}
221 
222  /// \brief return reference to the record number vector
223  const std::vector<std::size_t> & recnum() const {return recnums_;}
224 
225  /// \brief return reference to the index vector
226  /// \details This method returns a reference to the index vector
227  /// data member. This is for read only access.
228  /// The returned vector has length nlocs() and contains the original indices of
229  /// locations from the input ioda file corresponding to locations stored in this
230  /// ObsSpace object -- i.e. those that were selected by the timing window filter
231  /// and the MPI distribution.
232  ///
233  /// Example 1: Suppose the RoundRobin distribution is used and and there are two
234  /// MPI tasks (ranks 0 and 1). The even-numbered locations from the file will go
235  /// to rank 0, and the odd-numbered locations will go to rank 1. This means that
236  /// `ObsSpace::index()` will return the vector `0, 2, 4, 6, ...` on rank 0 and
237  /// `1, 3, 5, 7, ...` on rank 1.
238  ///
239  /// Example 2: Suppose MPI is not used and the file contains 10 locations in total,
240  /// but locations 2, 3 and 7 are outside the DA timing window. In this case,
241  /// `ObsSpace::index()` will return `0, 1, 4, 5, 6, 8, 9`.
242  const std::vector<std::size_t> & index() const {return indx_;}
243 
244  /// \brief return true if variable `name` exists in group `group` or (unless `skipDerived`
245  /// is set to true) `"Derived" + `group`.
246  bool has(const std::string & group, const std::string & name,
247  bool skipDerived = false) const;
248 
249  /// \brief return data type for group/variable
250  /// \param group Group name containting the variable
251  /// \param name Variable name
252  /// \param skipDerived
253  /// By default, this function will look for the variable `name` in the group `"Derived" +
254  /// group` first and only if it doesn't exist will it look in the group `group`. Set this
255  /// parameter to `true` to look only in the group `group`.
256  ObsDtype dtype(const std::string & group, const std::string & name,
257  bool skipDerived = false) const;
258 
259  /// \brief transfer data from the obs container to vdata
260  ///
261  /// \details The following get_db methods are the same except for the data type
262  /// of the data being transferred (integer, float, double, string, DateTime). The
263  /// caller needs to allocate the memory that the vdata parameter points to
264  ///
265  /// \param group Name of container group (ObsValue, ObsError, MetaData, etc.)
266  /// \param name Name of container variable
267  /// \param vdata Vector where container data is being transferred to
268  /// \param chanSelect Channel selection (list of channel numbers)
269  /// \param skipDerived
270  /// By default, this function will look for the variable `name` in the group `"Derived" +
271  /// group` first and only if it doesn't exist will it look in the group `group`. Set this
272  /// parameter to `true` to look only in the group `group`.
273  void get_db(const std::string & group, const std::string & name,
274  std::vector<int> & vdata,
275  const std::vector<int> & chanSelect = { },
276  bool skipDerived = false) const;
277  void get_db(const std::string & group, const std::string & name,
278  std::vector<float> & vdata,
279  const std::vector<int> & chanSelect = { },
280  bool skipDerived = false) const;
281  void get_db(const std::string & group, const std::string & name,
282  std::vector<double> & vdata,
283  const std::vector<int> & chanSelect = { },
284  bool skipDerived = false) const;
285  void get_db(const std::string & group, const std::string & name,
286  std::vector<std::string> & vdata,
287  const std::vector<int> & chanSelect = { },
288  bool skipDerived = false) const;
289  void get_db(const std::string & group, const std::string & name,
290  std::vector<util::DateTime> & vdata,
291  const std::vector<int> & chanSelect = { },
292  bool skipDerived = false) const;
293 
294  /// \brief transfer data from vdata to the obs container
295  ///
296  /// \details The following put_db methods are the same except for the data type
297  /// of the data being transferred (integer, float, double, string, DateTime). The
298  /// caller needs to allocate and assign the memory that the vdata parameter points to.
299  ///
300  /// \param group Name of container group (ObsValue, ObsError, MetaData, etc.)
301  /// \param name Name of container variable
302  /// \param vdata Vector where container data is being transferred from
303  /// \param dimList Vector of dimension names (for creating variable if needed)
304  void put_db(const std::string & group, const std::string & name,
305  const std::vector<int> & vdata,
306  const std::vector<std::string> & dimList = { "nlocs" });
307  void put_db(const std::string & group, const std::string & name,
308  const std::vector<float> & vdata,
309  const std::vector<std::string> & dimList = { "nlocs" });
310  void put_db(const std::string & group, const std::string & name,
311  const std::vector<double> & vdata,
312  const std::vector<std::string> & dimList = { "nlocs" });
313  void put_db(const std::string & group, const std::string & name,
314  const std::vector<std::string> & vdata,
315  const std::vector<std::string> & dimList = { "nlocs" });
316  void put_db(const std::string & group, const std::string & name,
317  const std::vector<util::DateTime> & vdata,
318  const std::vector<std::string> & dimList = { "nlocs" });
319 
320  /// \brief Return the begin iterator associated with the recidx_ data member
321  const RecIdxIter recidx_begin() const;
322 
323  /// \brief Return the end iterator associated with the recidx_ data member
324  const RecIdxIter recidx_end() const;
325 
326  /// \brief true if given record number exists in the recidx_ data member
327  /// \param recNum Record number being searched for
328  bool recidx_has(const std::size_t recNum) const;
329 
330  /// \brief true if the groups in the recidx data member are sorted
331  bool obsAreSorted() const { return recidx_is_sorted_; }
332 
333  /// \brief return record number pointed to by the given iterator
334  /// \param irec Iterator into the recidx_ data member
335  std::size_t recidx_recnum(const RecIdxIter & irec) const;
336 
337  /// \brief return record number vector pointed to by the given iterator
338  /// \param irec Iterator into the recidx_ data member
339  const std::vector<std::size_t> & recidx_vector(const RecIdxIter & irec) const;
340 
341  /// \brief return record number vector selected by the given record number
342  /// \param recNum Record number being searched for
343  const std::vector<std::size_t> & recidx_vector(const std::size_t recNum) const;
344 
345  /// \brief return all record numbers from the recidx_ data member
346  std::vector<std::size_t> recidx_all_recnums() const;
347 
348  /// \brief return the collection of all simulated variables
349  const oops::Variables & obsvariables() const {return obsvars_;}
350 
351  /// \brief return the collection of simulated variables loaded from the input file
353  { return obs_params_.top_level_.simVars; }
354 
355  /// \brief return the collection of derived simulated variables (variables computed
356  /// after loading the input file)
359 
360  /// \brief return MPI distribution object
361  std::shared_ptr<const Distribution> distribution() const { return dist_;}
362 
363  private:
364  // ----------------------------- private data members ---------------------------
365  /// \brief Beginning of DA timing window
366  const util::DateTime winbgn_;
367 
368  /// \brief End of DA timing window
369  const util::DateTime winend_;
370 
371  /// \brief MPI communicator
372  const eckit::mpi::Comm & commMPI_;
373 
374  /// \brief total number of locations
375  std::size_t gnlocs_;
376 
377  /// \brief number of nlocs from the obs source that are outside the time window
379 
380  /// \brief number of records
381  std::size_t nrecs_;
382 
383  /// \brief dimension information for variables in this obs space
385 
386  /// \brief map to go from channel number (not necessarily consecutive)
387  /// to channel index (consecutive, starting from zero).
388  std::map<int, int> chan_num_to_index_;
389 
390  /// \brief observation data store
392 
393  /// \brief obs io parameters
395 
396  /// \brief name of obs space
397  std::string obsname_;
398 
399  /// \brief Observation "variables" to be simulated
401 
402  /// \brief MPI distribution object
403  std::shared_ptr<const Distribution> dist_;
404 
405  /// \brief indexes of locations to extract from the input obs file
406  std::vector<std::size_t> indx_;
407 
408  /// \brief record numbers associated with the location indexes
409  std::vector<std::size_t> recnums_;
410 
411  /// \brief profile ordering
413 
414  /// \brief indicator whether the data in recidx_ is sorted
416 
417  /// \brief map showing association of dim names with each variable name
419 
420  /// \brief cache for frontend selection
421  std::map<std::vector<std::string>, Selection> known_fe_selections_;
422 
423  /// \brief cache for backend selection
424  std::map<std::vector<std::string>, Selection> known_be_selections_;
425 
426  /// \brief disable the "=" operator
427  ObsSpace & operator= (const ObsSpace &) = delete;
428 
429  // ----------------------------- private functions ------------------------------
430  /// \brief print function for oops::Printable class
431  /// \param os output stream
432  void print(std::ostream & os) const;
433 
434  /// \brief Initialize the database from a source (ObsFrame ojbect)
435  /// \param obsFrame obs source object
436  void createObsGroupFromObsFrame(ObsFrameRead & obsFrame);
437 
438  /// \brief Extend the ObsSpace according to the method requested in
439  /// the configuration file.
440  /// \param params object containing specs for extending the ObsSpace
442 
443  /// \brief For each simulated variable that doesn't have an accompanying array
444  /// in the ObsError or DerivedObsError group, create one, fill it with missing values
445  /// and add it to the DerivedObsError group.
446  void createMissingObsErrors();
447 
448  /// \brief Dump the database into the output file
449  void saveToFile();
450 
451  /// \brief Create the recidx data structure holding sorted record groups
452  /// \details This method will construct a data structure that holds the
453  /// location order within each group sorted by the values of the specified
454  /// sort variable.
455  void buildSortedObsGroups();
456 
457  /// \brief Create the recidx data structure with unsorted record groups
458  /// \details This method will initialize the recidx structure without
459  /// any particular ordering of the record groups.
460  void buildRecIdxUnsorted();
461 
462  /// \brief initialize the in-memory obs_group_ (ObsGroup) object from the ObsIo source
463  /// \param obsIo obs source object
464  void initFromObsSource(ObsFrameRead & obsFrame);
465 
466  /// \brief resize along nlocs dimension
467  /// \param nlocsSize new size to either append or reset
468  /// \param append when true append nlocsSize to current size, otherwise reset size
469  void resizeNlocs(const Dimensions_t nlocsSize, const bool append);
470 
471  /// \brief read in values for variable from obs source
472  /// \param obsFrame obs frame object
473  /// \param varName Name of variable in obs source object
474  /// \param varValues values for variable
475  template<typename VarType>
476  bool readObsSource(ObsFrameRead & obsFrame,
477  const std::string & varName, std::vector<VarType> & varValues);
478 
479  /// \brief store a variable in the obs_group_ object
480  /// \param obsIo obs source object
481  /// \param varName Name of obs_group_ variable for obs_group_ object
482  /// \param varValues Values for obs_group_ variable
483  /// \param frameStart is the start of the ObsFrame
484  /// \param frameCount is the size of the ObsFrame
485  template<typename VarType>
486  void storeVar(const std::string & varName, std::vector<VarType> & varValues,
487  const Dimensions_t frameStart, const Dimensions_t frameCount);
488 
489  /// \brief get fill value for use in the obs_group_ object
490  template<typename DataType>
491  DataType getFillValue() {
492  DataType fillVal = util::missingValue(fillVal);
493  return fillVal;
494  }
495 
496  /// \brief load a variable from the obs_group_ object
497  /// \details This function will load data from the obs_group_ object into
498  /// the memory buffer (vector) varValues. The chanSelect parameter
499  /// is only used when the variable is 2D radiance data (nlocs X nchans),
500  /// and contains a list of channel numbers to be selected from the
501  /// obs_group_ variable.
502  /// \param group Name of Group in obs_group_
503  /// \param name Name of Variable in group
504  /// \param selectChan Vector of channel numbers for selection
505  /// \param varValues memory to load from obs_group_ variable
506  /// \param skipDerived
507  /// By default, this function will search for the variable `name` both in the group
508  /// `group` and `"Derived" + group`. Set this parameter to `true` to search only in the
509  /// group `group`.
510  template<typename VarType>
511  void loadVar(const std::string & group, const std::string & name,
512  const std::vector<int> & chanSelect,
513  std::vector<VarType> & varValues, bool skipDerived = false) const;
514 
515  /// \brief save a variable to the obs_group_ object
516  /// \param group Name of Group in obs_group_
517  /// \param name Name of Variable in group.
518  /// \param varValues values to be saved
519  /// \param dimList Vector of dimension names (for creating variable if needed)
520  ///
521  /// If the group `group` does not contain a variable with the specified name, but this name
522  /// has the form <string>_<integer> and `obs_group_` contains an `nchans` dimension, this
523  /// function will save `varValues` in the slice of variable <string> corresponding to
524  /// channel <integer>. If channel <integer> does not exist or the variable <string> already
525  /// exists but is not associated with the `nchans` dimension, an exception will be thrown.
526  template<typename VarType>
527  void saveVar(const std::string & group, std::string name,
528  const std::vector<VarType> & varValues,
529  const std::vector<std::string> & dimList);
530 
531  /// \brief Create selections of slices of the variable \p variable along dimension
532  /// \p nchansDimIndex corresponding to channels \p channels.
533  ///
534  /// \returns The number of elements in each selection.
535  std::size_t createChannelSelections(const Variable & variable,
536  std::size_t nchansDimIndex,
537  const std::vector<int> & channels,
538  Selection & memSelect,
539  Selection & obsGroupSelect) const;
540 
541  /// \brief create set of variables from source variables and lists
542  /// \param srcVarContainer Has_Variables object from source
543  /// \param destVarContainer Has_Variables object from destination
544  /// \param dimsAttachedToVars Map containing list of attached dims for each variable
545  void createVariables(const Has_Variables & srcVarContainer,
546  Has_Variables & destVarContainer,
547  const VarDimMap & dimsAttachedToVars);
548 
549  /// \brief open an obs_group_ variable, create the varialbe if necessary
550  template<typename VarType>
551  Variable openCreateVar(const std::string & varName,
552  const std::vector<std::string> & varDimList) {
553  Variable var;
554  if (obs_group_.vars.exists(varName)) {
555  var = obs_group_.vars.open(varName);
556  } else {
557  // Create a vector of the dimension variables
558  std::vector<Variable> varDims;
559  for (auto & dimName : varDimList) {
560  varDims.push_back(obs_group_.vars.open(dimName));
561  }
562 
563  // Create the variable. Use the JEDI internal missing value marks for
564  // fill values.
565  VarType fillVal = this->getFillValue<VarType>();
567  params.chunk = true;
568  params.compressWithGZIP();
569  params.setFillValue<VarType>(fillVal);
570 
571  var = obs_group_.vars.createWithScales<VarType>(varName, varDims, params);
572  }
573  return var;
574  }
575 
576  /// \brief fill in the channel number to channel index map
577  void fillChanNumToIndexMap();
578 
579  /// \brief split off the channel number suffix from a given variable name
580  /// \details If the given variable name does not exist, the channelSelect vector
581  /// is empty, and the given variable name has a suffix matching
582  /// "_[0-9][0-9]*" (ie, a numeric suffix), then this routine will strip
583  /// off the channel number from the name and place that channel number
584  /// into the ouput canSelectToUse vector. The new name will be returned
585  /// in the nameToUse string.
586  /// This is being done for backward compatibility until the ufo Variables
587  /// class and its clients are modified to handle a single variable name
588  /// and a vector of channel numbers.
589  /// \param group Name of Group in obs_group_
590  /// \param name Name of Variable in group
591  /// \param selectChan Vector of channel numbers for selection
592  /// \param varName Name of Variable after splitting off the channel number
593  /// \param skipDerived
594  /// By default, this function will search for the variable `name` both in the group
595  /// `group` and `"Derived" + group`. Set this parameter to `true` to search only in the
596  /// group `group`.
597  void splitChanSuffix(const std::string & group, const std::string & name,
598  const std::vector<int> & chanSelect, std::string & nameToUse,
599  std::vector<int> & chanSelectToUse,
600  bool skipDerived = false) const;
601 
602  /// \brief Extend the given variable
603  /// \param extendVar database variable to be extended
604  /// \param startFill nlocs index indicating the start of the extended region
605  template <typename DataType>
606  void extendVariable(Variable & extendVar, const size_t startFill);
607  };
608 
609 } // namespace ioda
610 
611 #endif // OBSSPACE_H_
Describe the dimensions of a ioda::Attribute or ioda::Variable.
Definitions for setting up backends with file and memory I/O.
Fill value getters and setters.
Interfaces for ioda::ObsGroup and related classes.
This class exists inside of ioda::Group and provides the interface to manipulating Variables.
void set_dim_size(const ObsDimensionId dimId, std::size_t dimSize)
set the dimension size for the given dimension id
Definition: ObsSpace.cc:92
std::string get_dim_name(const ObsDimensionId dimId) const
return the dimension name for the given dimension id
Definition: ObsSpace.cc:84
ObsDimensionId get_dim_id(const std::string &dimName) const
return the standard id value for the given dimension name
Definition: ObsSpace.cc:80
std::map< ObsDimensionId, std::size_t > dim_id_size_
map going from dim id to dim size
std::size_t get_dim_size(const ObsDimensionId dimId) const
return the dimension size for the given dimension id
Definition: ObsSpace.cc:88
std::map< std::string, ObsDimensionId > dim_name_id_
map going from dim name to id
std::map< ObsDimensionId, std::string > dim_id_name_
map going from dim id to dim name
Implementation of ObsFrameRead class.
An ObsGroup is a specialization of a ioda::Group. It provides convenience functions and guarantees th...
Definition: ObsGroup.h:32
void extendObsSpace(const ObsExtendParameters &params)
Extend the ObsSpace according to the method requested in the configuration file.
Definition: ObsSpace.cc:1164
void storeVar(const std::string &varName, std::vector< VarType > &varValues, const Dimensions_t frameStart, const Dimensions_t frameCount)
store a variable in the obs_group_ object
void createVariables(const Has_Variables &srcVarContainer, Has_Variables &destVarContainer, const VarDimMap &dimsAttachedToVars)
create set of variables from source variables and lists
Definition: ObsSpace.cc:923
std::size_t get_dim_size(const ObsDimensionId dimId) const
return the standard dimension size for the given dimension id
void saveToFile()
Dump the database into the output file.
Definition: ObsSpace.cc:1066
const std::vector< std::size_t > & recnum() const
return reference to the record number vector
void put_db(const std::string &group, const std::string &name, const std::vector< util::DateTime > &vdata, const std::vector< std::string > &dimList={ "nlocs" })
void put_db(const std::string &group, const std::string &name, const std::vector< int > &vdata, const std::vector< std::string > &dimList={ "nlocs" })
transfer data from vdata to the obs container
std::shared_ptr< const Distribution > dist_
MPI distribution object.
void get_db(const std::string &group, const std::string &name, std::vector< int > &vdata, const std::vector< int > &chanSelect={ }, bool skipDerived=false) const
transfer data from the obs container to vdata
std::size_t gnlocs_outside_timewindow_
number of nlocs from the obs source that are outside the time window
void buildRecIdxUnsorted()
Create the recidx data structure with unsorted record groups.
Definition: ObsSpace.cc:1058
ObsDimInfo dim_info_
dimension information for variables in this obs space
void get_db(const std::string &group, const std::string &name, std::vector< float > &vdata, const std::vector< int > &chanSelect={ }, bool skipDerived=false) const
void get_db(const std::string &group, const std::string &name, std::vector< std::string > &vdata, const std::vector< int > &chanSelect={ }, bool skipDerived=false) const
size_t nchans() const
return the number of channels in the container. If this is not a radiance obs type,...
VarDimMap dims_attached_to_vars_
map showing association of dim names with each variable name
std::string get_dim_name(const ObsDimensionId dimId) const
return the standard dimension name for the given dimension id
std::size_t nrecs() const
return the number of records in the obs space container
void createObsGroupFromObsFrame(ObsFrameRead &obsFrame)
Initialize the database from a source (ObsFrame ojbect)
Definition: ObsSpace.cc:446
std::string obsname_
name of obs space
ObsTopLevelParameters Parameters_
std::size_t nrecs_
number of records
DataType getFillValue()
get fill value for use in the obs_group_ object
void print(std::ostream &os) const
print function for oops::Printable class
Definition: ObsSpace.cc:436
void get_db(const std::string &group, const std::string &name, std::vector< double > &vdata, const std::vector< int > &chanSelect={ }, bool skipDerived=false) const
void put_db(const std::string &group, const std::string &name, const std::vector< float > &vdata, const std::vector< std::string > &dimList={ "nlocs" })
const std::vector< std::size_t > & index() const
return reference to the index vector
void put_db(const std::string &group, const std::string &name, const std::vector< std::string > &vdata, const std::vector< std::string > &dimList={ "nlocs" })
ObsSpace(const ObsSpace &)
oops::Variables obsvars_
Observation "variables" to be simulated.
std::map< int, int > chan_num_to_index_
map to go from channel number (not necessarily consecutive) to channel index (consecutive,...
void saveVar(const std::string &group, std::string name, const std::vector< VarType > &varValues, const std::vector< std::string > &dimList)
save a variable to the obs_group_ object
const eckit::mpi::Comm & comm() const
void initFromObsSource(ObsFrameRead &obsFrame)
initialize the in-memory obs_group_ (ObsGroup) object from the ObsIo source
Definition: ObsSpace.cc:579
void loadVar(const std::string &group, const std::string &name, const std::vector< int > &chanSelect, std::vector< VarType > &varValues, bool skipDerived=false) const
load a variable from the obs_group_ object
RecIdxMap::const_iterator RecIdxIter
void createMissingObsErrors()
For each simulated variable that doesn't have an accompanying array in the ObsError or DerivedObsErro...
Definition: ObsSpace.cc:1298
RecIdxMap recidx_
profile ordering
std::shared_ptr< const Distribution > distribution() const
return MPI distribution object
ObsDimensionId get_dim_id(const std::string &dimName) const
return the standard dimension id for the given dimension name
ObsSpaceParameters obs_params_
obs io parameters
std::string distname() const
return the name of the MPI distribution
void buildSortedObsGroups()
Create the recidx data structure holding sorted record groups.
Definition: ObsSpace.cc:1009
size_t nlocs() const
return the number of locations in the obs space. Note that nlocs may be smaller than global unique nl...
const util::DateTime winbgn_
Beginning of DA timing window.
void get_db(const std::string &group, const std::string &name, std::vector< util::DateTime > &vdata, const std::vector< int > &chanSelect={ }, bool skipDerived=false) const
ObsGroup obs_group_
observation data store
std::vector< std::size_t > indx_
indexes of locations to extract from the input obs file
const util::DateTime winend_
End of DA timing window.
const util::DateTime & windowStart() const
std::size_t nvars() const
return the number of variables in the obs space container. "Variables" refers to the quantities that ...
void extendVariable(Variable &extendVar, const size_t startFill)
Extend the given variable.
bool obsAreSorted() const
true if the groups in the recidx data member are sorted
const ObsSpaceParameters & params() const
Variable openCreateVar(const std::string &varName, const std::vector< std::string > &varDimList)
open an obs_group_ variable, create the varialbe if necessary
void resizeNlocs(const Dimensions_t nlocsSize, const bool append)
resize along nlocs dimension
Definition: ObsSpace.cc:684
const oops::Variables & derived_obsvariables() const
return the collection of derived simulated variables (variables computed after loading the input file...
bool readObsSource(ObsFrameRead &obsFrame, const std::string &varName, std::vector< VarType > &varValues)
read in values for variable from obs source
std::size_t createChannelSelections(const Variable &variable, std::size_t nchansDimIndex, const std::vector< int > &channels, Selection &memSelect, Selection &obsGroupSelect) const
Create selections of slices of the variable variable along dimension nchansDimIndex corresponding to ...
Definition: ObsSpace.cc:808
bool recidx_is_sorted_
indicator whether the data in recidx_ is sorted
std::size_t globalNumLocsOutsideTimeWindow() const
return number of locations from obs source that were outside the time window
const eckit::mpi::Comm & commMPI_
MPI communicator.
std::size_t globalNumLocs() const
return the total number of locations in the corresponding obs spaces across all MPI tasks
std::size_t gnlocs_
total number of locations
std::map< std::vector< std::string >, Selection > known_be_selections_
cache for backend selection
void put_db(const std::string &group, const std::string &name, const std::vector< double > &vdata, const std::vector< std::string > &dimList={ "nlocs" })
const oops::Variables & obsvariables() const
return the collection of all simulated variables
std::map< std::size_t, std::vector< std::size_t > > RecIdxMap
const oops::Variables & initial_obsvariables() const
return the collection of simulated variables loaded from the input file
const std::string & obsname() const
return the name of the obs type being stored
const util::DateTime & windowEnd() const
std::map< std::vector< std::string >, Selection > known_fe_selections_
cache for frontend selection
std::vector< std::size_t > recnums_
record numbers associated with the location indexes
ObsTopLevelParameters top_level_
sub groups of parameters
oops::RequiredParameter< oops::Variables > simVars
simulated variables
oops::Parameter< oops::Variables > derivedSimVars
oops::Parameter< std::string > distName
name of MPI distribution
A Selection represents the bounds of the data, in ioda or in userspace, that you are reading or writi...
Definition: Selection.h:48
Represents the "type" (i.e. integer, string, float) of a piece of data.
Definition: Type.h:123
Has_Variables vars
Use this to access variables.
Definition: Group.h:123
Variable createWithScales(const std::string &name, const std::vector< Variable > &dimension_scales, const VariableCreationParameters &params=VariableCreationParameters::defaulted< DataType >())
Convenience function to create a Variable from certain dimension scales.
virtual Variable open(const std::string &name) const
Open a Variable by name.
virtual bool exists(const std::string &name) const
Does a Variable with the specified name exist?
Base class for observation spaces.
Definition: ObsSpaceBase.h:31
std::map< std::string, std::vector< std::string > > VarDimMap
typedef for holding dim names attached to variables
Definition: IodaUtils.h:36
character(maxvarlen) function variable(this, jj)
logical function has(this, var)
Template handlers for implicit variable conversion.
Type to_type
The type that data should be converted to upon write.
Used to specify Variable creation-time properties.
Definition: Has_Variables.h:57