IODA
src/ObsSpace.h
Go to the documentation of this file.
1 /*
2  * (C) Copyright 2017-2021 UCAR
3  *
4  * This software is licensed under the terms of the Apache Licence Version 2.0
5  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6  */
7 
8 #ifndef OBSSPACE_H_
9 #define OBSSPACE_H_
10 
11 #include <functional>
12 #include <map>
13 #include <memory>
14 #include <numeric>
15 #include <ostream>
16 #include <set>
17 #include <string>
18 #include <type_traits>
19 #include <unordered_map>
20 #include <utility>
21 #include <vector>
22 
23 #include "eckit/exception/Exceptions.h"
24 #include "eckit/mpi/Comm.h"
25 
26 #include "oops/base/ObsSpaceBase.h"
27 #include "oops/base/Variables.h"
28 #include "oops/util/DateTime.h"
29 #include "oops/util/Logger.h"
30 #include "ioda/core/IodaUtils.h"
31 #include "ioda/distribution/Distribution.h"
32 #include "ioda/Engines/Factory.h"
33 #include "ioda/Misc/Dimensions.h"
34 #include "ioda/ObsGroup.h"
35 #include "ioda/ObsSpaceParameters.h"
36 #include "ioda/Variables/Fill.h"
37 
38 // Forward declarations
39 namespace eckit {
40  class Configuration;
41 }
42 
43 namespace ioda {
44  class ObsFrameRead;
45  class ObsVector;
46 
47  //-------------------------------------------------------------------------------------
48  // Enum type for obs variable data types
49  enum class ObsDtype {
50  None,
51  Float,
52  Integer,
53  String,
54  DateTime
55  };
56 
57  // Enum type for obs dimension ids
58  // The first two dimension names for now are nlocs and nchans. This will likely expand
59  // in the future, so make sure that this enum class and the following initializer
60  // function stay in sync.
61  enum class ObsDimensionId {
62  Nlocs,
63  Nchans
64  };
65 
66  class ObsDimInfo {
67  public:
68  ObsDimInfo();
69 
70  /// \brief return the standard id value for the given dimension name
71  ObsDimensionId get_dim_id(const std::string & dimName) const;
72 
73  /// \brief return the dimension name for the given dimension id
74  std::string get_dim_name(const ObsDimensionId dimId) const;
75 
76  /// \brief return the dimension size for the given dimension id
77  std::size_t get_dim_size(const ObsDimensionId dimId) const;
78 
79  /// \brief set the dimension size for the given dimension id
80  void set_dim_size(const ObsDimensionId dimId, std::size_t dimSize);
81 
82  private:
83  /// \brief map going from dim id to dim name
84  std::map<ObsDimensionId, std::string> dim_id_name_;
85 
86  /// \brief map going from dim id to dim size
87  std::map<ObsDimensionId, std::size_t> dim_id_size_;
88 
89  /// \brief map going from dim name to id
90  std::map<std::string, ObsDimensionId> dim_name_id_;
91  };
92 
93  /// @brief Template handlers for implicit variable conversion.
94  /// @tparam Type is the source type of the data.
95  template <class Type>
96  struct ConvertType {
97  /// @brief The type that data should be converted to upon write.
98  typedef Type to_type;
99  };
100  template<>
101  struct ConvertType<double> {
102  typedef float to_type;
103  };
104 
105  /// \brief Observation data class for IODA
106  ///
107  /// \details This class handles the memory store of observation data. It handles
108  /// the transfer of data between memory and files, the distribution of obs data
109  /// across multiple process elements, the filtering out of obs data that is outside
110  /// the DA timing window, the transfer of data between UFO, OOPS and IODA, and data type
111  /// conversion that is "missing value aware".
112  ///
113  /// During the DA run, all data transfers are done in memory. The only time file I/O is
114  /// invoked is during the constructor (read from the file into the obs container) and
115  /// optionally during the the destructor (write from obs container into the file).
116  class ObsSpace : public oops::ObsSpaceBase {
117  public:
118  //---------------------------- typedefs -------------------------------
119  typedef std::map<std::size_t, std::vector<std::size_t>> RecIdxMap;
120  typedef RecIdxMap::const_iterator RecIdxIter;
121 
122  //---------------------------- functions ------------------------------
123  /// \brief Config based constructor for an ObsSpace object.
124  ///
125  /// \details This constructor will read in from the obs file and transfer the
126  /// variables into the obs container. Obs falling outside the DA timing window,
127  /// specified by bgn and end, will be discarded before storing them in the
128  /// obs container.
129  ///
130  /// \param config eckit configuration segment holding obs types specs
131  /// \param comm MPI communicator for model grouping
132  /// \param bgn DateTime object holding the start of the DA timing window
133  /// \param end DateTime object holding the end of the DA timing window
134  /// \param timeComm MPI communicator for ensemble
135  ObsSpace(const eckit::Configuration & config, const eckit::mpi::Comm & comm,
136  const util::DateTime & bgn, const util::DateTime & end,
137  const eckit::mpi::Comm & timeComm);
138  ObsSpace(const ObsSpace &);
139  virtual ~ObsSpace() {}
140 
141  /// \details This method will return the start of the DA timing window
142  const util::DateTime & windowStart() const {return winbgn_;}
143 
144  /// \details This method will return the end of the DA timing window
145  const util::DateTime & windowEnd() const {return winend_;}
146 
147  /// \details This method will return the associated MPI communicator
148  const eckit::mpi::Comm & comm() const {return commMPI_;}
149 
150  /// \details This method will return the associated parameters
151  const ObsSpaceParameters & params() const {return obs_params_;}
152 
153  /// \brief save the obs space data into a file (if obsdataout specified)
154  /// \details This function will save the obs space data into a file, but only if
155  /// the obsdataout parameter is specified in the YAML configuration.
156  /// Note that this function will do nothing if the obsdataout specification
157  /// is not present.
158  ///
159  /// The purpose of this save function is to fix an issue where the hdf5
160  /// library closes the file (via a C API) during the time when the
161  /// ObsSpace destructor (C++) is still writing to that file. These
162  /// actions can sometimes get out of sync since they are being triggered
163  /// from different sources during the clean up after a job completes.
164  void save();
165 
166  /// \brief return the total number of locations in the corresponding obs spaces
167  /// across all MPI tasks
168  std::size_t globalNumLocs() const {return gnlocs_;}
169 
170  /// \brief return number of locations from obs source that were outside the time window
172 
173  /// \brief return the number of locations in the obs space.
174  /// Note that nlocs may be smaller than global unique nlocs due to distribution of obs
175  /// across multiple process elements.
176  inline size_t nlocs() const { return get_dim_size(ObsDimensionId::Nlocs); }
177 
178  /// \brief return the number of channels in the container. If this is not a radiance
179  /// obs type, then this will return zero.
180  inline size_t nchans() const { return get_dim_size(ObsDimensionId::Nchans); }
181 
182  /// \brief return the number of records in the obs space container
183  /// \details This is the number of sets of locations after applying the
184  /// optional grouping.
185  std::size_t nrecs() const {return nrecs_;}
186 
187  /// \brief return the number of variables in the obs space container.
188  /// "Variables" refers to the quantities that can be assimilated as opposed to meta data.
189  std::size_t nvars() const;
190 
191  /// \brief return the standard dimension name for the given dimension id
192  std::string get_dim_name(const ObsDimensionId dimId) const {
193  return dim_info_.get_dim_name(dimId);
194  }
195 
196  /// \brief return the standard dimension size for the given dimension id
197  std::size_t get_dim_size(const ObsDimensionId dimId) const {
198  return dim_info_.get_dim_size(dimId);
199  }
200 
201  /// \brief return the standard dimension id for the given dimension name
202  ObsDimensionId get_dim_id(const std::string & dimName) const {
203  return dim_info_.get_dim_id(dimName);
204  }
205 
206  /// \brief return YAML configuration parameter: obsdatain.obsgrouping.group variables
207  const std::vector<std::string> & obs_group_vars() const;
208 
209  /// \brief return YAML configuration parameter: obsdatain.obsgrouping.sort variable
210  std::string obs_sort_var() const;
211 
212  /// \brief return YAML configuration parameter: obsdatain.obsgrouping.sort order
213  std::string obs_sort_order() const;
214 
215  /// \brief return the name of the obs type being stored
216  const std::string & obsname() const {return obsname_;}
217 
218  /// \brief return the name of the MPI distribution
219  std::string distname() const {return obs_params_.top_level_.distName;}
220 
221  /// \brief return reference to the record number vector
222  const std::vector<std::size_t> & recnum() const {return recnums_;}
223 
224  /// \brief return reference to the index vector
225  /// \details This method returns a reference to the index vector
226  /// data member. This is for read only access.
227  /// The returned vector has length nlocs() and contains the original indices of
228  /// locations from the input ioda file corresponding to locations stored in this
229  /// ObsSpace object -- i.e. those that were selected by the timing window filter
230  /// and the MPI distribution.
231  ///
232  /// Example 1: Suppose the RoundRobin distribution is used and and there are two
233  /// MPI tasks (ranks 0 and 1). The even-numbered locations from the file will go
234  /// to rank 0, and the odd-numbered locations will go to rank 1. This means that
235  /// `ObsSpace::index()` will return the vector `0, 2, 4, 6, ...` on rank 0 and
236  /// `1, 3, 5, 7, ...` on rank 1.
237  ///
238  /// Example 2: Suppose MPI is not used and the file contains 10 locations in total,
239  /// but locations 2, 3 and 7 are outside the DA timing window. In this case,
240  /// `ObsSpace::index()` will return `0, 1, 4, 5, 6, 8, 9`.
241  const std::vector<std::size_t> & index() const {return indx_;}
242 
243  /// \brief return true if group/variable exists
244  bool has(const std::string & group, const std::string & name) const;
245 
246  /// \brief return data type for group/variable
247  /// \param group Group name containting the variable
248  /// \param name Variable name
249  ObsDtype dtype(const std::string & group, const std::string & name) const;
250 
251  /// \brief transfer data from the obs container to vdata
252  ///
253  /// \details The following get_db methods are the same except for the data type
254  /// of the data being transferred (integer, float, double, string, DateTime). The
255  /// caller needs to allocate the memory that the vdata parameter points to
256  ///
257  /// \param group Name of container group (ObsValue, ObsError, MetaData, etc.)
258  /// \param name Name of container variable
259  /// \param vdata Vector where container data is being transferred to
260  /// \param chanSelect Channel selection (list of channel numbers)
261  void get_db(const std::string & group, const std::string & name,
262  std::vector<int> & vdata,
263  const std::vector<int> & chanSelect = { }) const;
264  void get_db(const std::string & group, const std::string & name,
265  std::vector<float> & vdata,
266  const std::vector<int> & chanSelect = { }) const;
267  void get_db(const std::string & group, const std::string & name,
268  std::vector<double> & vdata,
269  const std::vector<int> & chanSelect = { }) const;
270  void get_db(const std::string & group, const std::string & name,
271  std::vector<std::string> & vdata,
272  const std::vector<int> & chanSelect = { }) const;
273  void get_db(const std::string & group, const std::string & name,
274  std::vector<util::DateTime> & vdata,
275  const std::vector<int> & chanSelect = { }) const;
276 
277  /// \brief transfer data from vdata to the obs container
278  ///
279  /// \details The following put_db methods are the same except for the data type
280  /// of the data being transferred (integer, float, double, string, DateTime). The
281  /// caller needs to allocate and assign the memory that the vdata parameter points to.
282  ///
283  /// \param group Name of container group (ObsValue, ObsError, MetaData, etc.)
284  /// \param name Name of container variable
285  /// \param vdata Vector where container data is being transferred from
286  /// \param dimList Vector of dimension names (for creating variable if needed)
287  void put_db(const std::string & group, const std::string & name,
288  const std::vector<int> & vdata,
289  const std::vector<std::string> & dimList = { "nlocs" });
290  void put_db(const std::string & group, const std::string & name,
291  const std::vector<float> & vdata,
292  const std::vector<std::string> & dimList = { "nlocs" });
293  void put_db(const std::string & group, const std::string & name,
294  const std::vector<double> & vdata,
295  const std::vector<std::string> & dimList = { "nlocs" });
296  void put_db(const std::string & group, const std::string & name,
297  const std::vector<std::string> & vdata,
298  const std::vector<std::string> & dimList = { "nlocs" });
299  void put_db(const std::string & group, const std::string & name,
300  const std::vector<util::DateTime> & vdata,
301  const std::vector<std::string> & dimList = { "nlocs" });
302 
303  /// \brief Return the begin iterator associated with the recidx_ data member
304  const RecIdxIter recidx_begin() const;
305 
306  /// \brief Return the end iterator associated with the recidx_ data member
307  const RecIdxIter recidx_end() const;
308 
309  /// \brief true if given record number exists in the recidx_ data member
310  /// \param recNum Record number being searched for
311  bool recidx_has(const std::size_t recNum) const;
312 
313  /// \brief true if the groups in the recidx data member are sorted
314  bool obsAreSorted() const { return recidx_is_sorted_; }
315 
316  /// \brief return record number pointed to by the given iterator
317  /// \param irec Iterator into the recidx_ data member
318  std::size_t recidx_recnum(const RecIdxIter & irec) const;
319 
320  /// \brief return record number vector pointed to by the given iterator
321  /// \param irec Iterator into the recidx_ data member
322  const std::vector<std::size_t> & recidx_vector(const RecIdxIter & irec) const;
323 
324  /// \brief return record number vector selected by the given record number
325  /// \param recNum Record number being searched for
326  const std::vector<std::size_t> & recidx_vector(const std::size_t recNum) const;
327 
328  /// \brief return all record numbers from the recidx_ data member
329  std::vector<std::size_t> recidx_all_recnums() const;
330 
331  /// \brief return oops variables object (simulated variables)
332  const oops::Variables & obsvariables() const {return obsvars_;}
333 
334  /// \brief return MPI distribution object
335  std::shared_ptr<const Distribution> distribution() const { return dist_;}
336 
337  private:
338  // ----------------------------- private data members ---------------------------
339  /// \brief Configuration file
340  const eckit::LocalConfiguration config_;
341 
342  /// \brief Beginning of DA timing window
343  const util::DateTime winbgn_;
344 
345  /// \brief End of DA timing window
346  const util::DateTime winend_;
347 
348  /// \brief MPI communicator
349  const eckit::mpi::Comm & commMPI_;
350 
351  /// \brief total number of locations
352  std::size_t gnlocs_;
353 
354  /// \brief number of nlocs from the obs source that are outside the time window
356 
357  /// \brief number of records
358  std::size_t nrecs_;
359 
360  /// \brief dimension information for variables in this obs space
362 
363  /// \brief map to go from channel number (not necessarily consecutive)
364  /// to channel index (consecutive, starting from zero).
365  std::map<int, int> chan_num_to_index_;
366 
367  /// \brief observation data store
369 
370  /// \brief obs io parameters
372 
373  /// \brief name of obs space
374  std::string obsname_;
375 
376  /// \brief Observation "variables" to be simulated
377  oops::Variables obsvars_;
378 
379  /// \brief MPI distribution object
380  std::shared_ptr<const Distribution> dist_;
381 
382  /// \brief indexes of locations to extract from the input obs file
383  std::vector<std::size_t> indx_;
384 
385  /// \brief record numbers associated with the location indexes
386  std::vector<std::size_t> recnums_;
387 
388  /// \brief profile ordering
390 
391  /// \brief indicator whether the data in recidx_ is sorted
393 
394  /// \brief map showing association of dim names with each variable name
396 
397  /// \brief cache for frontend selection
398  std::map<std::vector<std::string>, Selection> known_fe_selections_;
399 
400  /// \brief cache for backend selection
401  std::map<std::vector<std::string>, Selection> known_be_selections_;
402 
403  /// \brief disable the "=" operator
404  ObsSpace & operator= (const ObsSpace &) = delete;
405 
406  // ----------------------------- private functions ------------------------------
407  /// \brief print function for oops::Printable class
408  /// \param os output stream
409  void print(std::ostream & os) const;
410 
411  /// \brief Initialize the database from a source (ObsFrame ojbect)
412  /// \param obsFrame obs source object
413  void createObsGroupFromObsFrame(ObsFrameRead & obsFrame);
414 
415  /// \brief Extend the ObsSpace according to the method requested in
416  /// the configuration file.
417  /// \param params object containing specs for extending the ObsSpace
419 
420  /// \brief Dump the database into the output file
421  void saveToFile();
422 
423  /// \brief Create the recidx data structure holding sorted record groups
424  /// \details This method will construct a data structure that holds the
425  /// location order within each group sorted by the values of the specified
426  /// sort variable.
427  void buildSortedObsGroups();
428 
429  /// \brief Create the recidx data structure with unsorted record groups
430  /// \details This method will initialize the recidx structure without
431  /// any particular ordering of the record groups.
432  void buildRecIdxUnsorted();
433 
434  /// \brief initialize the in-memory obs_group_ (ObsGroup) object from the ObsIo source
435  /// \param obsIo obs source object
436  void initFromObsSource(ObsFrameRead & obsFrame);
437 
438  /// \brief resize along nlocs dimension
439  /// \param nlocsSize new size to either append or reset
440  /// \param append when true append nlocsSize to current size, otherwise reset size
441  void resizeNlocs(const Dimensions_t nlocsSize, const bool append);
442 
443  /// \brief read in values for variable from obs source
444  /// \param obsFrame obs frame object
445  /// \param varName Name of variable in obs source object
446  /// \param varValues values for variable
447  template<typename VarType>
448  bool readObsSource(ObsFrameRead & obsFrame,
449  const std::string & varName, std::vector<VarType> & varValues);
450 
451  /// \brief store a variable in the obs_group_ object
452  /// \param obsIo obs source object
453  /// \param varName Name of obs_group_ variable for obs_group_ object
454  /// \param varValues Values for obs_group_ variable
455  /// \param frameStart is the start of the ObsFrame
456  /// \param frameCount is the size of the ObsFrame
457  template<typename VarType>
458  void storeVar(const std::string & varName, std::vector<VarType> & varValues,
459  const Dimensions_t frameStart, const Dimensions_t frameCount);
460 
461  /// \brief get fill value for use in the obs_group_ object
462  template<typename DataType>
463  DataType getFillValue() {
464  DataType fillVal = util::missingValue(fillVal);
465  return fillVal;
466  }
467 
468  /// \brief load a variable from the obs_group_ object
469  /// \details This function will load data from the obs_group_ object into
470  /// the memory buffer (vector) varValues. The chanSelect parameter
471  /// is only used when the variable is 2D radiance data (nlocs X nchans),
472  /// and contains a list of channel numbers to be selected from the
473  /// obs_group_ variable.
474  /// \param group Name of Group in obs_group_
475  /// \param name Name of Variable in group
476  /// \param selectChan Vector of channel numbers for selection
477  /// \param varValues memory to load from obs_group_ variable
478  template<typename VarType>
479  void loadVar(const std::string & group, const std::string & name,
480  const std::vector<int> & chanSelect,
481  std::vector<VarType> & varValues) const;
482 
483  /// \brief save a variable to the obs_group_ object
484  /// \param group Name of Group in obs_group_
485  /// \param name Name of Variable in group.
486  /// \param varValues values to be saved
487  /// \param dimList Vector of dimension names (for creating variable if needed)
488  ///
489  /// If the group `group` does not contain a variable with the specified name, but this name
490  /// has the form <string>_<integer> and `obs_group_` contains an `nchans` dimension, this
491  /// function will save `varValues` in the slice of variable <string> corresponding to
492  /// channel <integer>. If channel <integer> does not exist or the variable <string> already
493  /// exists but is not associated with the `nchans` dimension, an exception will be thrown.
494  template<typename VarType>
495  void saveVar(const std::string & group, std::string name,
496  const std::vector<VarType> & varValues,
497  const std::vector<std::string> & dimList);
498 
499  /// \brief Create selections of slices of the variable \p variable along dimension
500  /// \p nchansDimIndex corresponding to channels \p channels.
501  ///
502  /// \returns The number of elements in each selection.
503  std::size_t createChannelSelections(const Variable & variable,
504  std::size_t nchansDimIndex,
505  const std::vector<int> & channels,
506  Selection & memSelect,
507  Selection & obsGroupSelect) const;
508 
509  /// \brief create set of variables from source variables and lists
510  /// \param srcVarContainer Has_Variables object from source
511  /// \param destVarContainer Has_Variables object from destination
512  /// \param dimsAttachedToVars Map containing list of attached dims for each variable
513  void createVariables(const Has_Variables & srcVarContainer,
514  Has_Variables & destVarContainer,
515  const VarDimMap & dimsAttachedToVars);
516 
517  /// \brief open an obs_group_ variable, create the varialbe if necessary
518  template<typename VarType>
519  Variable openCreateVar(const std::string & varName,
520  const std::vector<std::string> & varDimList) {
521  Variable var;
522  if (obs_group_.vars.exists(varName)) {
523  var = obs_group_.vars.open(varName);
524  } else {
525  // Create a vector of the dimension variables
526  std::vector<Variable> varDims;
527  for (auto & dimName : varDimList) {
528  varDims.push_back(obs_group_.vars.open(dimName));
529  }
530 
531  // Create the variable. Use the JEDI internal missing value marks for
532  // fill values.
533  VarType fillVal = this->getFillValue<VarType>();
535  params.chunk = true;
536  params.compressWithGZIP();
537  params.setFillValue<VarType>(fillVal);
538 
539  var = obs_group_.vars.createWithScales<VarType>(varName, varDims, params);
540  }
541  return var;
542  }
543 
544  /// \brief fill in the channel number to channel index map
545  void fillChanNumToIndexMap();
546 
547  /// \brief split off the channel number suffix from a given variable name
548  /// \details If the given variable name does not exist, the channelSelect vector
549  /// is empty, and the given variable name has a suffix matching
550  /// "_[0-9][0-9]*" (ie, a numeric suffix), then this routine will strip
551  /// off the channel number from the name and place that channel number
552  /// into the ouput canSelectToUse vector. The new name will be returned
553  /// in the nameToUse string.
554  /// This is being done for backward compatibility until the ufo Variables
555  /// class and its clients are modified to handle a single variable name
556  /// and a vector of channel numbers.
557  /// \param group Name of Group in obs_group_
558  /// \param name Name of Variable in group
559  /// \param selectChan Vector of channel numbers for selection
560  /// \param varName Name of Variable after splitting off the channel number
561  void splitChanSuffix(const std::string & group, const std::string & name,
562  const std::vector<int> & chanSelect, std::string & nameToUse,
563  std::vector<int> & chanSelectToUse) const;
564 
565  /// \brief Extend the given variable
566  /// \param extendVar database variable to be extended
567  /// \param startFill nlocs index indicating the start of the extended region
568  template <typename DataType>
569  void extendVariable(Variable & extendVar, const size_t startFill);
570  };
571 
572 } // namespace ioda
573 
574 #endif // OBSSPACE_H_
Describe the dimensions of a ioda::Attribute or ioda::Variable.
Definitions for setting up backends with file and memory I/O.
Fill value getters and setters.
Interfaces for ioda::ObsGroup and related classes.
This class exists inside of ioda::Group and provides the interface to manipulating Variables.
void set_dim_size(const ObsDimensionId dimId, std::size_t dimSize)
set the dimension size for the given dimension id
Definition: ObsSpace.cc:92
std::string get_dim_name(const ObsDimensionId dimId) const
return the dimension name for the given dimension id
Definition: ObsSpace.cc:84
ObsDimensionId get_dim_id(const std::string &dimName) const
return the standard id value for the given dimension name
Definition: ObsSpace.cc:80
std::map< ObsDimensionId, std::size_t > dim_id_size_
map going from dim id to dim size
Definition: src/ObsSpace.h:87
std::size_t get_dim_size(const ObsDimensionId dimId) const
return the dimension size for the given dimension id
Definition: ObsSpace.cc:88
std::map< std::string, ObsDimensionId > dim_name_id_
map going from dim name to id
Definition: src/ObsSpace.h:90
std::map< ObsDimensionId, std::string > dim_id_name_
map going from dim id to dim name
Definition: src/ObsSpace.h:84
Implementation of ObsFrameRead class.
An ObsGroup is a specialization of a ioda::Group. It provides convenience functions and guarantees th...
Definition: ObsGroup.h:32
Observation data class for IODA.
Definition: src/ObsSpace.h:116
void extendObsSpace(const ObsExtendParameters &params)
Extend the ObsSpace according to the method requested in the configuration file.
Definition: ObsSpace.cc:1094
void storeVar(const std::string &varName, std::vector< VarType > &varValues, const Dimensions_t frameStart, const Dimensions_t frameCount)
store a variable in the obs_group_ object
Definition: ObsSpace.cc:822
void createVariables(const Has_Variables &srcVarContainer, Has_Variables &destVarContainer, const VarDimMap &dimsAttachedToVars)
create set of variables from source variables and lists
Definition: ObsSpace.cc:863
std::size_t get_dim_size(const ObsDimensionId dimId) const
return the standard dimension size for the given dimension id
Definition: src/ObsSpace.h:197
bool has(const std::string &group, const std::string &name) const
return true if group/variable exists
Definition: ObsSpace.cc:230
void saveToFile()
Dump the database into the output file.
Definition: ObsSpace.cc:1003
const std::vector< std::size_t > & recnum() const
return reference to the record number vector
Definition: src/ObsSpace.h:222
ObsDtype dtype(const std::string &group, const std::string &name) const
return data type for group/variable
Definition: ObsSpace.cc:240
void put_db(const std::string &group, const std::string &name, const std::vector< int > &vdata, const std::vector< std::string > &dimList={ "nlocs" })
transfer data from vdata to the obs container
Definition: ObsSpace.cc:306
virtual ~ObsSpace()
Definition: src/ObsSpace.h:139
std::shared_ptr< const Distribution > dist_
MPI distribution object.
Definition: src/ObsSpace.h:380
std::size_t gnlocs_outside_timewindow_
number of nlocs from the obs source that are outside the time window
Definition: src/ObsSpace.h:355
std::vector< std::size_t > recidx_all_recnums() const
return all record numbers from the recidx_ data member
Definition: ObsSpace.cc:382
const RecIdxIter recidx_begin() const
Return the begin iterator associated with the recidx_ data member.
Definition: ObsSpace.cc:344
void buildRecIdxUnsorted()
Create the recidx data structure with unsorted record groups.
Definition: ObsSpace.cc:995
ObsDimInfo dim_info_
dimension information for variables in this obs space
Definition: src/ObsSpace.h:361
size_t nchans() const
return the number of channels in the container. If this is not a radiance obs type,...
Definition: src/ObsSpace.h:180
VarDimMap dims_attached_to_vars_
map showing association of dim names with each variable name
Definition: src/ObsSpace.h:395
std::size_t recidx_recnum(const RecIdxIter &irec) const
return record number pointed to by the given iterator
Definition: ObsSpace.cc:360
std::string get_dim_name(const ObsDimensionId dimId) const
return the standard dimension name for the given dimension id
Definition: src/ObsSpace.h:192
void splitChanSuffix(const std::string &group, const std::string &name, const std::vector< int > &chanSelect, std::string &nameToUse, std::vector< int > &chanSelectToUse) const
split off the channel number suffix from a given variable name
Definition: ObsSpace.cc:931
void print(std::ostream &os) const
print function for oops::Printable class
Definition: ObsSpace.cc:397
std::size_t nrecs() const
return the number of records in the obs space container
Definition: src/ObsSpace.h:185
void createObsGroupFromObsFrame(ObsFrameRead &obsFrame)
Initialize the database from a source (ObsFrame ojbect)
Definition: ObsSpace.cc:407
std::string obsname_
name of obs space
Definition: src/ObsSpace.h:374
const eckit::LocalConfiguration config_
Configuration file.
Definition: src/ObsSpace.h:340
std::size_t nrecs_
number of records
Definition: src/ObsSpace.h:358
DataType getFillValue()
get fill value for use in the obs_group_ object
Definition: src/ObsSpace.h:463
const std::vector< std::string > & obs_group_vars() const
return YAML configuration parameter: obsdatain.obsgrouping.group variables
Definition: ObsSpace.cc:210
const RecIdxIter recidx_end() const
Return the end iterator associated with the recidx_ data member.
Definition: ObsSpace.cc:349
const std::vector< std::size_t > & index() const
return reference to the index vector
Definition: src/ObsSpace.h:241
ObsSpace(const ObsSpace &)
oops::Variables obsvars_
Observation "variables" to be simulated.
Definition: src/ObsSpace.h:377
std::map< int, int > chan_num_to_index_
map to go from channel number (not necessarily consecutive) to channel index (consecutive,...
Definition: src/ObsSpace.h:365
void saveVar(const std::string &group, std::string name, const std::vector< VarType > &varValues, const std::vector< std::string > &dimList)
save a variable to the obs_group_ object
Definition: ObsSpace.cc:706
const eckit::mpi::Comm & comm() const
Definition: src/ObsSpace.h:148
ObsSpace & operator=(const ObsSpace &)=delete
disable the "=" operator
void initFromObsSource(ObsFrameRead &obsFrame)
initialize the in-memory obs_group_ (ObsGroup) object from the ObsIo source
Definition: ObsSpace.cc:540
const std::vector< std::size_t > & recidx_vector(const RecIdxIter &irec) const
return record number vector pointed to by the given iterator
Definition: ObsSpace.cc:365
RecIdxMap::const_iterator RecIdxIter
Definition: src/ObsSpace.h:120
RecIdxMap recidx_
profile ordering
Definition: src/ObsSpace.h:389
std::shared_ptr< const Distribution > distribution() const
return MPI distribution object
Definition: src/ObsSpace.h:335
ObsDimensionId get_dim_id(const std::string &dimName) const
return the standard dimension id for the given dimension name
Definition: src/ObsSpace.h:202
ObsSpaceParameters obs_params_
obs io parameters
Definition: src/ObsSpace.h:371
std::string distname() const
return the name of the MPI distribution
Definition: src/ObsSpace.h:219
void buildSortedObsGroups()
Create the recidx data structure holding sorted record groups.
Definition: ObsSpace.cc:946
void loadVar(const std::string &group, const std::string &name, const std::vector< int > &chanSelect, std::vector< VarType > &varValues) const
load a variable from the obs_group_ object
Definition: ObsSpace.cc:660
size_t nlocs() const
return the number of locations in the obs space. Note that nlocs may be smaller than global unique nl...
Definition: src/ObsSpace.h:176
const util::DateTime winbgn_
Beginning of DA timing window.
Definition: src/ObsSpace.h:343
ObsGroup obs_group_
observation data store
Definition: src/ObsSpace.h:368
std::vector< std::size_t > indx_
indexes of locations to extract from the input obs file
Definition: src/ObsSpace.h:383
const util::DateTime winend_
End of DA timing window.
Definition: src/ObsSpace.h:346
void get_db(const std::string &group, const std::string &name, std::vector< int > &vdata, const std::vector< int > &chanSelect={ }) const
transfer data from the obs container to vdata
Definition: ObsSpace.cc:270
const util::DateTime & windowStart() const
Definition: src/ObsSpace.h:142
void fillChanNumToIndexMap()
fill in the channel number to channel index map
Definition: ObsSpace.cc:906
std::size_t nvars() const
return the number of variables in the obs space container. "Variables" refers to the quantities that ...
Definition: ObsSpace.cc:189
void extendVariable(Variable &extendVar, const size_t startFill)
Extend the given variable.
Definition: ObsSpace.cc:1075
bool obsAreSorted() const
true if the groups in the recidx data member are sorted
Definition: src/ObsSpace.h:314
const ObsSpaceParameters & params() const
Definition: src/ObsSpace.h:151
std::string obs_sort_order() const
return YAML configuration parameter: obsdatain.obsgrouping.sort order
Definition: ObsSpace.cc:220
void save()
save the obs space data into a file (if obsdataout specified)
Definition: ObsSpace.cc:171
Variable openCreateVar(const std::string &varName, const std::vector< std::string > &varDimList)
open an obs_group_ variable, create the varialbe if necessary
Definition: src/ObsSpace.h:519
void resizeNlocs(const Dimensions_t nlocsSize, const bool append)
resize along nlocs dimension
Definition: ObsSpace.cc:645
bool readObsSource(ObsFrameRead &obsFrame, const std::string &varName, std::vector< VarType > &varValues)
read in values for variable from obs source
Definition: ObsSpace.cc:493
std::size_t createChannelSelections(const Variable &variable, std::size_t nchansDimIndex, const std::vector< int > &channels, Selection &memSelect, Selection &obsGroupSelect) const
Create selections of slices of the variable variable along dimension nchansDimIndex corresponding to ...
Definition: ObsSpace.cc:761
bool recidx_is_sorted_
indicator whether the data in recidx_ is sorted
Definition: src/ObsSpace.h:392
std::size_t globalNumLocsOutsideTimeWindow() const
return number of locations from obs source that were outside the time window
Definition: src/ObsSpace.h:171
const eckit::mpi::Comm & commMPI_
MPI communicator.
Definition: src/ObsSpace.h:349
std::size_t globalNumLocs() const
return the total number of locations in the corresponding obs spaces across all MPI tasks
Definition: src/ObsSpace.h:168
std::size_t gnlocs_
total number of locations
Definition: src/ObsSpace.h:352
std::map< std::vector< std::string >, Selection > known_be_selections_
cache for backend selection
Definition: src/ObsSpace.h:401
const oops::Variables & obsvariables() const
return oops variables object (simulated variables)
Definition: src/ObsSpace.h:332
std::map< std::size_t, std::vector< std::size_t > > RecIdxMap
Definition: src/ObsSpace.h:119
ObsSpace(const eckit::Configuration &config, const eckit::mpi::Comm &comm, const util::DateTime &bgn, const util::DateTime &end, const eckit::mpi::Comm &timeComm)
Config based constructor for an ObsSpace object.
Definition: ObsSpace.cc:112
const std::string & obsname() const
return the name of the obs type being stored
Definition: src/ObsSpace.h:216
const util::DateTime & windowEnd() const
Definition: src/ObsSpace.h:145
std::string obs_sort_var() const
return YAML configuration parameter: obsdatain.obsgrouping.sort variable
Definition: ObsSpace.cc:215
std::map< std::vector< std::string >, Selection > known_fe_selections_
cache for frontend selection
Definition: src/ObsSpace.h:398
std::vector< std::size_t > recnums_
record numbers associated with the location indexes
Definition: src/ObsSpace.h:386
bool recidx_has(const std::size_t recNum) const
true if given record number exists in the recidx_ data member
Definition: ObsSpace.cc:354
ObsTopLevelParameters top_level_
sub groups of parameters
oops::Parameter< std::string > distName
name of MPI distribution
A Selection represents the bounds of the data, in ioda or in userspace, that you are reading or writi...
Definition: Selection.h:48
Represents the "type" (i.e. integer, string, float) of a piece of data.
Definition: Type.h:123
Variables store data!
Definition: Variable.h:680
Has_Variables vars
Use this to access variables.
Definition: Group.h:123
Variable createWithScales(const std::string &name, const std::vector< Variable > &dimension_scales, const VariableCreationParameters &params=VariableCreationParameters::defaulted< DataType >())
Convenience function to create a Variable from certain dimension scales.
virtual Variable open(const std::string &name) const
Open a Variable by name.
virtual bool exists(const std::string &name) const
Does a Variable with the specified name exist?
std::map< std::string, std::vector< std::string > > VarDimMap
typedef for holding dim names attached to variables
Definition: IodaUtils.h:36
ObsDimensionId
Definition: src/ObsSpace.h:61
Template handlers for implicit variable conversion.
Definition: src/ObsSpace.h:96
Type to_type
The type that data should be converted to upon write.
Definition: src/ObsSpace.h:98
Used to specify Variable creation-time properties.
Definition: Has_Variables.h:57