IODA
src/io/ObsFrameRead.h
Go to the documentation of this file.
1 /*
2  * (C) Copyright 2017-2019 UCAR
3  *
4  * This software is licensed under the terms of the Apache Licence Version 2.0
5  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6  */
7 
8 #ifndef IO_OBSFRAMEREAD_H_
9 #define IO_OBSFRAMEREAD_H_
10 
11 #include "eckit/config/LocalConfiguration.h"
12 
13 #include "ioda/core/IodaUtils.h"
14 #include "ioda/distribution/Distribution.h"
15 #include "ioda/io/ObsFrame.h"
16 #include "ioda/ObsSpaceParameters.h"
17 
18 #include "oops/util/Logger.h"
19 #include "oops/util/ObjectCounter.h"
20 #include "oops/util/Printable.h"
21 
22 namespace ioda {
23 
24 /// \brief Implementation of ObsFrameRead class
25 /// \details This class manages one frame of obs data (subset of locations) when
26 /// reading data from an ObsIo object. This includes reading the frame,
27 /// filtering out obs that are outside the DA timing window, generating record
28 /// numbers, applying obs grouping (optional) and applying the MPI distribution.
29 /// \author Stephen Herbener (JCSDA)
30 
31 class ObsFrameRead : public ObsFrame, private util::ObjectCounter<ObsFrameRead> {
32  public:
33  /// \brief classname method for object counter
34  ///
35  /// \details This method is supplied for the ObjectCounter base class.
36  /// It defines a name to identify an object of this class
37  /// for reporting by OOPS.
38  static const std::string classname() {return "ioda::ObsFrameRead";}
39 
40  explicit ObsFrameRead(const ObsSpaceParameters & params);
41 
42  ~ObsFrameRead();
43 
44  /// \brief return list of indices indicating which locations were selected from ObsIo
45  std::vector<std::size_t> index() const override {return indx_;}
46 
47  /// \brief return list of record numbers from ObsIo
48  std::vector<std::size_t> recnums() const override {return recnums_;}
49 
50  /// \brief initialize for walking through the frames
51  void frameInit() override;
52 
53  /// \brief move to the next frame
54  void frameNext() override;
55 
56  /// \brief true if a frame is available (not past end of frames)
57  bool frameAvailable() override;
58 
59  /// \brief return current frame starting index
60  /// \param varName name of variable
61  Dimensions_t frameStart() override;
62 
63  /// \brief return current frame count for variable
64  /// \details Variables can be of different sizes so it's possible that the
65  /// frame has moved past the end of some variables but not so for other
66  /// variables. When the frame is past the end of the given variable, this
67  /// routine returns a zero to indicate that we're done with this variable.
68  /// \param varName variable name
69  Dimensions_t frameCount(const std::string & varName) override;
70 
71  /// \brief return adjusted nlocs frame start
72  Dimensions_t adjNlocsFrameStart() const override {return adjusted_nlocs_frame_start_;}
73 
74  /// \brief return adjusted nlocs frame count
75  Dimensions_t adjNlocsFrameCount() const override {return adjusted_nlocs_frame_count_;}
76 
77  /// \brief read a frame variable
78  /// \details It's possible for some variables to not be included in the
79  /// read because the frame has gone past their ending index.
80  /// Therefore, this function will return true when there exists
81  /// more data available for the variable in the frame.
82  /// This function will allocate the proper amount of memory for the
83  /// output vector varData.
84  /// The following signatures are for different variable data types.
85  /// \param varName variable name
86  /// \param varData varible data
87  /// \param varDataSelect selection information for the selection in memory
88  /// \param frameSelect selection information for the selection in frame
89  bool readFrameVar(const std::string & varName, std::vector<int> & varData);
90  bool readFrameVar(const std::string & varName, std::vector<float> & varData);
91  bool readFrameVar(const std::string & varName, std::vector<std::string> & varData);
92 
93  /// \brief return the MPI distribution
94  std::shared_ptr<const Distribution> distribution() {return dist_;}
95 
96  private:
97  //------------------ private data members ------------------------------
98 
99  /// \brief MPI distribution object
100  std::shared_ptr<Distribution> dist_;
101 
102  /// \brief true if obs_io_ produces a different series of observations on each process,
103  /// false if they are all the same
105 
106  /// \Brief Distribution Name
107  std::string distname_;
108 
109  /// \brief current frame start for variable dimensioned along nlocs
110  /// \details This data member is keeping track of the frame start for
111  /// the contiguous storage where the obs source data will be moved to.
112  /// Note that the start_ data member is keeping track of the frame start
113  /// for the obs source itself.
115 
116  /// \brief current frame count for variable dimensioned along nlocs
118 
119  /// \brief map for obs grouping via string keys
120  std::map<std::string, std::size_t> obs_grouping_;
121 
122  /// \brief indexes of locations to extract from the input obs file
123  std::vector<std::size_t> indx_;
124 
125  /// \brief record numbers associated with the location indexes
126  std::vector<std::size_t> recnums_;
127 
128  /// \brief next available record number
129  std::size_t next_rec_num_;
130 
131  /// \brief spacing between record numbers assigned on this process.
132  ///
133  /// Normally 1, but if each process reads observations from a different file, then set to
134  /// the size of the MPI communicator to ensure record numbers assigned by different processes
135  /// are distinct.
136  std::size_t rec_num_increment_;
137 
138  /// \brief unique record numbers
139  std::set<std::size_t> unique_rec_nums_;
140 
141  /// \brief location indices for current frame
142  std::vector<Dimensions_t> frame_loc_index_;
143 
144  /// \brief map showing association of dim names with each variable name
146 
147  /// \brief cache for frame selection
148  std::map<std::vector<std::string>, Selection> known_frame_selections_;
149 
150  /// \brief cache for memory buffer selection
151  std::map<std::vector<std::string>, Selection> known_mem_selections_;
152 
153  //--------------------- private functions ------------------------------
154  /// \brief print routine for oops::Printable base class
155  /// \param ostream output stream
156  void print(std::ostream & os) const override;
157 
158  /// \brief return current frame count for variable
159  /// \details Variables can be of different sizes so it's possible that the
160  /// frame has moved past the end of some variables but not so for other
161  /// variables. When the frame is past the end of the given variable, this
162  /// routine returns a zero to indicate that we're done with this variable.
163  /// \param var variable
164  Dimensions_t basicFrameCount(const Variable & var);
165 
166  /// \brief set up frontend and backend selection objects for the given variable
167  /// \param varShape dimension sizes for variable being transferred
168  Selection createIndexedFrameSelection(const std::vector<Dimensions_t> & varShape);
169 
170  /// \brief generate frame indices and corresponding record numbers
171  /// \details This method generates a list of indices with their corresponding
172  /// record numbers, where the indices denote which locations are to be
173  /// read into this process element.
174  void genFrameIndexRecNums(std::shared_ptr<Distribution> & dist);
175 
176  /// \brief generate indices for all locations in current frame
177  /// \param locIndex vector of location indices relative to entire obs source
178  /// \param frameIndex vector of location indices relative to current frame
179  void genFrameLocationsAll(std::vector<Dimensions_t> & locIndex,
180  std::vector<Dimensions_t> & frameIndex);
181 
182  /// \brief generate indices for locations in current frame after filtering out
183  /// obs outside DA timing window
184  /// \param locIndex vector of location indices relative to entire obs source
185  /// \param frameIndex vector of location indices relative to current frame
186  void genFrameLocationsTimeWindow(std::vector<Dimensions_t> & locIndex,
187  std::vector<Dimensions_t> & frameIndex);
188 
189  /// \brief generate record numbers where each location is a unique record (no grouping)
190  /// \param locIndex vector containing location indices
191  /// \param records vector indexed by location containing the record numbers
192  void genRecordNumbersAll(const std::vector<Dimensions_t> & locIndex,
193  std::vector<Dimensions_t> & records);
194 
195  /// \brief generate record numbers considering obs grouping
196  /// \param obsGroupVarList list of variables controlling the grouping function
197  /// \param frameIndex vector containing frame location indices
198  /// \param records vector indexed by location containing the record numbers
199  void genRecordNumbersGrouping(const std::vector<std::string> & obsGroupVarList,
200  const std::vector<Dimensions_t> & frameIndex,
201  std::vector<Dimensions_t> & records);
202 
203  /// \brief generate string keys for record number assignment
204  /// \param obsGroupVarList list of variables controlling the grouping function
205  /// \param frameIndex vector containing frame location indices
206  /// \param groupingKeys vector of keys for the obs grouping map
207  void buildObsGroupingKeys(const std::vector<std::string> & obsGroupVarList,
208  const std::vector<Dimensions_t> & frameIndex,
209  std::vector<std::string> & groupingKeys);
210 
211  /// \brief apply MPI distribution
212  /// \param dist ioda::Distribution object
213  /// \param records vector indexed by location containing the record numbers
214  void applyMpiDistribution(const std::shared_ptr<Distribution> & dist,
215  const std::vector<Dimensions_t> & locIndex,
216  const std::vector<Dimensions_t> & records);
217 
218  /// \details return true if observation is inside the DA timing window.
219  /// \param obsDt Observation date time object
220  bool insideTimingWindow(const util::DateTime & ObsDt);
221 
222  /// \brief read variable data from frame helper function
223  /// \param varName variable name
224  /// \param varData varible data
225  template<typename DataType>
226  bool readFrameVarHelper(const std::string & varName, std::vector<DataType> & varData) {
227  bool frameVarAvailable;
228  Dimensions_t frameCount = this->frameCount(varName);
229  if (frameCount > 0) {
230  Variable frameVar = obs_frame_.vars.open(varName);
231  std::vector<Dimensions_t> varShape = frameVar.getDimensions().dimsCur;
232 
233  // Form the selection objects for this variable
234 
235  // Check the cache for the selection
236  std::vector<std::string> &dims = dims_attached_to_vars_.at(varName);
237  if (!known_mem_selections_.count(dims)) {
239  if (obs_io_->isVarDimByNlocs(varName)) {
241  createIndexedFrameSelection(varShape);
242  } else {
245  }
246  }
247  Selection & memSelect = known_mem_selections_[dims];
248  Selection & frameSelect = known_frame_selections_[dims];
249 
250  // Read the data into the output varData
251  frameVar.read<DataType>(varData, memSelect, frameSelect);
252 
253  frameVarAvailable = true;
254  } else {
255  frameVarAvailable = false;
256  }
257  return frameVarAvailable;
258  }
259 };
260 
261 } // namespace ioda
262 
263 #endif // IO_OBSFRAMEREAD_H_
std::shared_ptr< ObsIo > obs_io_
ObsIo object.
Definition: ObsFrame.h:161
Selection createMemSelection(const std::vector< Dimensions_t > &varShape, const Dimensions_t frameCount)
create selection object for accessing a memory buffer
Definition: ObsFrame.cc:26
Selection createEntireFrameSelection(const std::vector< Dimensions_t > &varShape, const Dimensions_t frameCount)
create selection object for accessing the entire frame variable
Definition: ObsFrame.cc:49
ObsGroup obs_frame_
ObsGroup object (temporary storage for a single frame)
Definition: ObsFrame.h:164
Implementation of ObsFrameRead class.
bool readFrameVarHelper(const std::string &varName, std::vector< DataType > &varData)
read variable data from frame helper function
std::map< std::string, std::size_t > obs_grouping_
map for obs grouping via string keys
Dimensions_t adjusted_nlocs_frame_start_
current frame start for variable dimensioned along nlocs
Selection createIndexedFrameSelection(const std::vector< Dimensions_t > &varShape)
set up frontend and backend selection objects for the given variable
std::vector< std::size_t > index() const override
return list of indices indicating which locations were selected from ObsIo
void frameNext() override
move to the next frame
Definition: ObsFrameRead.cc:75
void applyMpiDistribution(const std::shared_ptr< Distribution > &dist, const std::vector< Dimensions_t > &locIndex, const std::vector< Dimensions_t > &records)
apply MPI distribution
Dimensions_t adjusted_nlocs_frame_count_
current frame count for variable dimensioned along nlocs
Dimensions_t frameCount(const std::string &varName) override
return current frame count for variable
Dimensions_t adjNlocsFrameCount() const override
return adjusted nlocs frame count
std::size_t rec_num_increment_
spacing between record numbers assigned on this process.
VarDimMap dims_attached_to_vars_
map showing association of dim names with each variable name
std::size_t next_rec_num_
next available record number
std::string distname_
\Brief Distribution Name
bool frameAvailable() override
true if a frame is available (not past end of frames)
Definition: ObsFrameRead.cc:81
void frameInit() override
initialize for walking through the frames
Definition: ObsFrameRead.cc:50
bool each_process_reads_separate_obs_
true if obs_io_ produces a different series of observations on each process, false if they are all th...
Dimensions_t basicFrameCount(const Variable &var)
return current frame count for variable
std::set< std::size_t > unique_rec_nums_
unique record numbers
static const std::string classname()
classname method for object counter
bool readFrameVar(const std::string &varName, std::vector< int > &varData)
read a frame variable
void print(std::ostream &os) const override
print routine for oops::Printable base class
std::shared_ptr< Distribution > dist_
MPI distribution object.
void buildObsGroupingKeys(const std::vector< std::string > &obsGroupVarList, const std::vector< Dimensions_t > &frameIndex, std::vector< std::string > &groupingKeys)
generate string keys for record number assignment
void genRecordNumbersGrouping(const std::vector< std::string > &obsGroupVarList, const std::vector< Dimensions_t > &frameIndex, std::vector< Dimensions_t > &records)
generate record numbers considering obs grouping
std::map< std::vector< std::string >, Selection > known_frame_selections_
cache for frame selection
void genRecordNumbersAll(const std::vector< Dimensions_t > &locIndex, std::vector< Dimensions_t > &records)
generate record numbers where each location is a unique record (no grouping)
std::map< std::vector< std::string >, Selection > known_mem_selections_
cache for memory buffer selection
ObsFrameRead(const ObsSpaceParameters &params)
Definition: ObsFrameRead.cc:20
Dimensions_t frameStart() override
return current frame starting index
void genFrameLocationsAll(std::vector< Dimensions_t > &locIndex, std::vector< Dimensions_t > &frameIndex)
generate indices for all locations in current frame
bool insideTimingWindow(const util::DateTime &ObsDt)
Dimensions_t adjNlocsFrameStart() const override
return adjusted nlocs frame start
std::vector< std::size_t > recnums_
record numbers associated with the location indexes
std::vector< std::size_t > indx_
indexes of locations to extract from the input obs file
void genFrameLocationsTimeWindow(std::vector< Dimensions_t > &locIndex, std::vector< Dimensions_t > &frameIndex)
generate indices for locations in current frame after filtering out obs outside DA timing window
std::shared_ptr< const Distribution > distribution()
return the MPI distribution
std::vector< Dimensions_t > frame_loc_index_
location indices for current frame
std::vector< std::size_t > recnums() const override
return list of record numbers from ObsIo
void genFrameIndexRecNums(std::shared_ptr< Distribution > &dist)
generate frame indices and corresponding record numbers
A Selection represents the bounds of the data, in ioda or in userspace, that you are reading or writi...
Definition: Selection.h:48
Variables store data!
Definition: Variable.h:680
Has_Variables vars
Use this to access variables.
Definition: Group.h:123
virtual Variable open(const std::string &name) const
Open a Variable by name.
virtual Dimensions getDimensions() const
Definition: Variable.cpp:160
virtual Variable read(gsl::span< char > data, const Type &in_memory_dataType, const Selection &mem_selection=Selection::all, const Selection &file_selection=Selection::all) const
Read the Variable - as char array. Ordering is row-major.
Definition: Variable.cpp:330
std::map< std::string, std::vector< std::string > > VarDimMap
typedef for holding dim names attached to variables
Definition: IodaUtils.h:36
std::vector< Dimensions_t > dimsCur
The dimensions of the data.
Definition: Dimensions.h:23