IODA
src/io/ObsFrameRead.h
Go to the documentation of this file.
1 /*
2  * (C) Copyright 2017-2019 UCAR
3  *
4  * This software is licensed under the terms of the Apache Licence Version 2.0
5  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6  */
7 
8 #ifndef IO_OBSFRAMEREAD_H_
9 #define IO_OBSFRAMEREAD_H_
10 
11 #include "eckit/config/LocalConfiguration.h"
12 
13 #include "ioda/core/IodaUtils.h"
14 #include "ioda/distribution/Distribution.h"
15 #include "ioda/io/ObsFrame.h"
16 #include "ioda/ObsSpaceParameters.h"
17 
18 #include "oops/util/Logger.h"
19 #include "oops/util/ObjectCounter.h"
20 #include "oops/util/Printable.h"
21 
22 namespace ioda {
23 
24 /// \brief Implementation of ObsFrameRead class
25 /// \details This class manages one frame of obs data (subset of locations) when
26 /// reading data from an ObsIo object. This includes reading the frame,
27 /// filtering out obs that are outside the DA timing window, generating record
28 /// numbers, applying obs grouping (optional) and applying the MPI distribution.
29 /// \author Stephen Herbener (JCSDA)
30 
31 class ObsFrameRead : public ObsFrame, private util::ObjectCounter<ObsFrameRead> {
32  public:
33  /// \brief classname method for object counter
34  ///
35  /// \details This method is supplied for the ObjectCounter base class.
36  /// It defines a name to identify an object of this class
37  /// for reporting by OOPS.
38  static const std::string classname() {return "ioda::ObsFrameRead";}
39 
40  explicit ObsFrameRead(const ObsSpaceParameters & params);
41 
42  ~ObsFrameRead();
43 
44  /// \brief return list of indices indicating which locations were selected from ObsIo
45  std::vector<std::size_t> index() const override {return indx_;}
46 
47  /// \brief return list of record numbers from ObsIo
48  std::vector<std::size_t> recnums() const override {return recnums_;}
49 
50  /// \brief initialize for walking through the frames
51  void frameInit() override;
52 
53  /// \brief move to the next frame
54  void frameNext() override;
55 
56  /// \brief true if a frame is available (not past end of frames)
57  bool frameAvailable() override;
58 
59  /// \brief return current frame starting index
60  /// \param varName name of variable
61  Dimensions_t frameStart() override;
62 
63  /// \brief return current frame count for variable
64  /// \details Variables can be of different sizes so it's possible that the
65  /// frame has moved past the end of some variables but not so for other
66  /// variables. When the frame is past the end of the given variable, this
67  /// routine returns a zero to indicate that we're done with this variable.
68  /// \param varName variable name
69  Dimensions_t frameCount(const std::string & varName) override;
70 
71  /// \brief return adjusted nlocs frame start
72  Dimensions_t adjNlocsFrameStart() const override {return adjusted_nlocs_frame_start_;}
73 
74  /// \brief return adjusted nlocs frame count
75  Dimensions_t adjNlocsFrameCount() const override {return adjusted_nlocs_frame_count_;}
76 
77  /// \brief read a frame variable
78  /// \details It's possible for some variables to not be included in the
79  /// read because the frame has gone past their ending index.
80  /// Therefore, this function will return true when there exists
81  /// more data available for the variable in the frame.
82  /// This function will allocate the proper amount of memory for the
83  /// output vector varData.
84  /// The following signatures are for different variable data types.
85  /// \param varName variable name
86  /// \param varData varible data
87  /// \param varDataSelect selection information for the selection in memory
88  /// \param frameSelect selection information for the selection in frame
89  bool readFrameVar(const std::string & varName, std::vector<int> & varData);
90  bool readFrameVar(const std::string & varName, std::vector<float> & varData);
91  bool readFrameVar(const std::string & varName, std::vector<std::string> & varData);
92 
93  /// \brief return the MPI distribution
94  std::shared_ptr<const Distribution> distribution() {return dist_;}
95 
96  private:
97  //------------------ private data members ------------------------------
98 
99  /// \brief MPI distribution object
100  std::shared_ptr<Distribution> dist_;
101 
102  /// \brief true if obs_io_ produces a different series of observations on each process,
103  /// false if they are all the same
105 
106  /// \brief current frame start for variable dimensioned along nlocs
107  /// \details This data member is keeping track of the frame start for
108  /// the contiguous storage where the obs source data will be moved to.
109  /// Note that the start_ data member is keeping track of the frame start
110  /// for the obs source itself.
112 
113  /// \brief current frame count for variable dimensioned along nlocs
115 
116  /// \brief map for obs grouping via string keys
117  std::map<std::string, std::size_t> obs_grouping_;
118 
119  /// \brief indexes of locations to extract from the input obs file
120  std::vector<std::size_t> indx_;
121 
122  /// \brief record numbers associated with the location indexes
123  std::vector<std::size_t> recnums_;
124 
125  /// \brief next available record number
126  std::size_t next_rec_num_;
127 
128  /// \brief spacing between record numbers assigned on this process.
129  ///
130  /// Normally 1, but if each process reads observations from a different file, then set to
131  /// the size of the MPI communicator to ensure record numbers assigned by different processes
132  /// are distinct.
133  std::size_t rec_num_increment_;
134 
135  /// \brief unique record numbers
136  std::set<std::size_t> unique_rec_nums_;
137 
138  /// \brief location indices for current frame
139  std::vector<Dimensions_t> frame_loc_index_;
140 
141  /// \brief map showing association of dim names with each variable name
143 
144  /// \brief cache for frame selection
145  std::map<std::vector<std::string>, Selection> known_frame_selections_;
146 
147  /// \brief cache for memory buffer selection
148  std::map<std::vector<std::string>, Selection> known_mem_selections_;
149 
150  //--------------------- private functions ------------------------------
151  /// \brief print routine for oops::Printable base class
152  /// \param ostream output stream
153  void print(std::ostream & os) const override;
154 
155  /// \brief return current frame count for variable
156  /// \details Variables can be of different sizes so it's possible that the
157  /// frame has moved past the end of some variables but not so for other
158  /// variables. When the frame is past the end of the given variable, this
159  /// routine returns a zero to indicate that we're done with this variable.
160  /// \param var variable
161  Dimensions_t basicFrameCount(const Variable & var);
162 
163  /// \brief set up frontend and backend selection objects for the given variable
164  /// \param varShape dimension sizes for variable being transferred
165  Selection createIndexedFrameSelection(const std::vector<Dimensions_t> & varShape);
166 
167  /// \brief generate frame indices and corresponding record numbers
168  /// \details This method generates a list of indices with their corresponding
169  /// record numbers, where the indices denote which locations are to be
170  /// read into this process element.
171  void genFrameIndexRecNums(std::shared_ptr<Distribution> & dist);
172 
173  /// \brief generate indices for all locations in current frame
174  /// \param locIndex vector of location indices relative to entire obs source
175  /// \param frameIndex vector of location indices relative to current frame
176  void genFrameLocationsAll(std::vector<Dimensions_t> & locIndex,
177  std::vector<Dimensions_t> & frameIndex);
178 
179  /// \brief generate indices for locations in current frame after filtering out
180  /// obs outside DA timing window
181  /// \param locIndex vector of location indices relative to entire obs source
182  /// \param frameIndex vector of location indices relative to current frame
183  void genFrameLocationsTimeWindow(std::vector<Dimensions_t> & locIndex,
184  std::vector<Dimensions_t> & frameIndex);
185 
186  /// \brief generate record numbers where each location is a unique record (no grouping)
187  /// \param locIndex vector containing location indices
188  /// \param records vector indexed by location containing the record numbers
189  void genRecordNumbersAll(const std::vector<Dimensions_t> & locIndex,
190  std::vector<Dimensions_t> & records);
191 
192  /// \brief generate record numbers considering obs grouping
193  /// \param obsGroupVarList list of variables controlling the grouping function
194  /// \param frameIndex vector containing frame location indices
195  /// \param records vector indexed by location containing the record numbers
196  void genRecordNumbersGrouping(const std::vector<std::string> & obsGroupVarList,
197  const std::vector<Dimensions_t> & frameIndex,
198  std::vector<Dimensions_t> & records);
199 
200  /// \brief generate string keys for record number assignment
201  /// \param obsGroupVarList list of variables controlling the grouping function
202  /// \param frameIndex vector containing frame location indices
203  /// \param groupingKeys vector of keys for the obs grouping map
204  void buildObsGroupingKeys(const std::vector<std::string> & obsGroupVarList,
205  const std::vector<Dimensions_t> & frameIndex,
206  std::vector<std::string> & groupingKeys);
207 
208  /// \brief apply MPI distribution
209  /// \param dist ioda::Distribution object
210  /// \param records vector indexed by location containing the record numbers
211  void applyMpiDistribution(const std::shared_ptr<Distribution> & dist,
212  const std::vector<Dimensions_t> & locIndex,
213  const std::vector<Dimensions_t> & records);
214 
215  /// \details return true if observation is inside the DA timing window.
216  /// \param obsDt Observation date time object
217  bool insideTimingWindow(const util::DateTime & ObsDt);
218 
219  /// \brief read variable data from frame helper function
220  /// \param varName variable name
221  /// \param varData varible data
222  template<typename DataType>
223  bool readFrameVarHelper(const std::string & varName, std::vector<DataType> & varData) {
224  bool frameVarAvailable;
225  Dimensions_t frameCount = this->frameCount(varName);
226  if (frameCount > 0) {
227  Variable frameVar = obs_frame_.vars.open(varName);
228  std::vector<Dimensions_t> varShape = frameVar.getDimensions().dimsCur;
229 
230  // Form the selection objects for this variable
231 
232  // Check the cache for the selection
233  std::vector<std::string> &dims = dims_attached_to_vars_.at(varName);
234  if (!known_mem_selections_.count(dims)) {
236  if (obs_io_->isVarDimByNlocs(varName)) {
238  createIndexedFrameSelection(varShape);
239  } else {
242  }
243  }
244  Selection & memSelect = known_mem_selections_[dims];
245  Selection & frameSelect = known_frame_selections_[dims];
246 
247  // Read the data into the output varData
248  frameVar.read<DataType>(varData, memSelect, frameSelect);
249 
250  frameVarAvailable = true;
251  } else {
252  frameVarAvailable = false;
253  }
254  return frameVarAvailable;
255  }
256 };
257 
258 } // namespace ioda
259 
260 #endif // IO_OBSFRAMEREAD_H_
std::shared_ptr< ObsIo > obs_io_
ObsIo object.
Definition: ObsFrame.h:161
Selection createMemSelection(const std::vector< Dimensions_t > &varShape, const Dimensions_t frameCount)
create selection object for accessing a memory buffer
Definition: ObsFrame.cc:26
Selection createEntireFrameSelection(const std::vector< Dimensions_t > &varShape, const Dimensions_t frameCount)
create selection object for accessing the entire frame variable
Definition: ObsFrame.cc:49
ObsGroup obs_frame_
ObsGroup object (temporary storage for a single frame)
Definition: ObsFrame.h:164
Implementation of ObsFrameRead class.
bool readFrameVarHelper(const std::string &varName, std::vector< DataType > &varData)
read variable data from frame helper function
std::map< std::string, std::size_t > obs_grouping_
map for obs grouping via string keys
Dimensions_t adjusted_nlocs_frame_start_
current frame start for variable dimensioned along nlocs
Selection createIndexedFrameSelection(const std::vector< Dimensions_t > &varShape)
set up frontend and backend selection objects for the given variable
std::vector< std::size_t > index() const override
return list of indices indicating which locations were selected from ObsIo
void frameNext() override
move to the next frame
Definition: ObsFrameRead.cc:69
void applyMpiDistribution(const std::shared_ptr< Distribution > &dist, const std::vector< Dimensions_t > &locIndex, const std::vector< Dimensions_t > &records)
apply MPI distribution
Dimensions_t adjusted_nlocs_frame_count_
current frame count for variable dimensioned along nlocs
Dimensions_t frameCount(const std::string &varName) override
return current frame count for variable
Dimensions_t adjNlocsFrameCount() const override
return adjusted nlocs frame count
std::size_t rec_num_increment_
spacing between record numbers assigned on this process.
VarDimMap dims_attached_to_vars_
map showing association of dim names with each variable name
std::size_t next_rec_num_
next available record number
bool frameAvailable() override
true if a frame is available (not past end of frames)
Definition: ObsFrameRead.cc:75
void frameInit() override
initialize for walking through the frames
Definition: ObsFrameRead.cc:44
bool each_process_reads_separate_obs_
true if obs_io_ produces a different series of observations on each process, false if they are all th...
Dimensions_t basicFrameCount(const Variable &var)
return current frame count for variable
std::set< std::size_t > unique_rec_nums_
unique record numbers
static const std::string classname()
classname method for object counter
bool readFrameVar(const std::string &varName, std::vector< int > &varData)
read a frame variable
void print(std::ostream &os) const override
print routine for oops::Printable base class
std::shared_ptr< Distribution > dist_
MPI distribution object.
void buildObsGroupingKeys(const std::vector< std::string > &obsGroupVarList, const std::vector< Dimensions_t > &frameIndex, std::vector< std::string > &groupingKeys)
generate string keys for record number assignment
void genRecordNumbersGrouping(const std::vector< std::string > &obsGroupVarList, const std::vector< Dimensions_t > &frameIndex, std::vector< Dimensions_t > &records)
generate record numbers considering obs grouping
std::map< std::vector< std::string >, Selection > known_frame_selections_
cache for frame selection
void genRecordNumbersAll(const std::vector< Dimensions_t > &locIndex, std::vector< Dimensions_t > &records)
generate record numbers where each location is a unique record (no grouping)
std::map< std::vector< std::string >, Selection > known_mem_selections_
cache for memory buffer selection
ObsFrameRead(const ObsSpaceParameters &params)
Definition: ObsFrameRead.cc:20
Dimensions_t frameStart() override
return current frame starting index
void genFrameLocationsAll(std::vector< Dimensions_t > &locIndex, std::vector< Dimensions_t > &frameIndex)
generate indices for all locations in current frame
bool insideTimingWindow(const util::DateTime &ObsDt)
Dimensions_t adjNlocsFrameStart() const override
return adjusted nlocs frame start
std::vector< std::size_t > recnums_
record numbers associated with the location indexes
std::vector< std::size_t > indx_
indexes of locations to extract from the input obs file
void genFrameLocationsTimeWindow(std::vector< Dimensions_t > &locIndex, std::vector< Dimensions_t > &frameIndex)
generate indices for locations in current frame after filtering out obs outside DA timing window
std::shared_ptr< const Distribution > distribution()
return the MPI distribution
std::vector< Dimensions_t > frame_loc_index_
location indices for current frame
std::vector< std::size_t > recnums() const override
return list of record numbers from ObsIo
void genFrameIndexRecNums(std::shared_ptr< Distribution > &dist)
generate frame indices and corresponding record numbers
A Selection represents the bounds of the data, in ioda or in userspace, that you are reading or writi...
Definition: Selection.h:48
Variables store data!
Definition: Variable.h:680
Has_Variables vars
Use this to access variables.
Definition: Group.h:123
virtual Variable open(const std::string &name) const
Open a Variable by name.
virtual Dimensions getDimensions() const
Definition: Variable.cpp:160
virtual Variable read(gsl::span< char > data, const Type &in_memory_dataType, const Selection &mem_selection=Selection::all, const Selection &file_selection=Selection::all) const
Read the Variable - as char array. Ordering is row-major.
Definition: Variable.cpp:330
std::map< std::string, std::vector< std::string > > VarDimMap
typedef for holding dim names attached to variables
Definition: IodaUtils.h:36
std::vector< Dimensions_t > dimsCur
The dimensions of the data.
Definition: Dimensions.h:23