IODA
ObsData.cc
Go to the documentation of this file.
1 /*
2  * (C) Copyright 2017-2019 UCAR
3  *
4  * This software is licensed under the terms of the Apache Licence Version 2.0
5  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6  */
7 
8 #include "ioda/core/ObsData.h"
9 
10 #include <algorithm>
11 #include <fstream>
12 #include <functional>
13 #include <iomanip>
14 #include <map>
15 #include <memory>
16 #include <numeric>
17 #include <set>
18 #include <string>
19 #include <utility>
20 #include <vector>
21 
22 #include "eckit/config/Configuration.h"
23 #include "eckit/exception/Exceptions.h"
24 
25 #include "oops/mpi/mpi.h"
26 #include "oops/util/abor1_cpp.h"
27 #include "oops/util/DateTime.h"
28 #include "oops/util/Duration.h"
29 #include "oops/util/Logger.h"
30 #include "oops/util/Random.h"
31 #include "oops/util/stringFunctions.h"
32 
33 #include "ioda/distribution/DistributionFactory.h"
34 #include "ioda/io/IodaIOfactory.h"
35 
36 #include "atlas/util/Earth.h"
37 
38 namespace ioda {
39 
40 // -----------------------------------------------------------------------------
41 /*!
42  * \details Config based constructor for an ObsData object. This constructor will read
43  * in from the obs file and transfer the variables into the obs container. Obs
44  * falling outside the DA timing window, specified by bgn and end, will be
45  * discarded before storing them in the obs container.
46  *
47  * \param[in] config ECKIT configuration segment holding obs types specs
48  * \param[in] bgn DateTime object holding the start of the DA timing window
49  * \param[in] end DateTime object holding the end of the DA timing window
50  */
51 ObsData::ObsData(const eckit::Configuration & config, const eckit::mpi::Comm & comm,
52  const util::DateTime & bgn, const util::DateTime & end,
53  const eckit::mpi::Comm & timeComm)
54  : config_(config), winbgn_(bgn), winend_(end), commMPI_(comm),
55  gnlocs_(0), nlocs_(0), nvars_(0), nrecs_(0), file_unexpected_dtypes_(false),
56  file_excess_dims_(false), in_max_frame_size_(0), out_max_frame_size_(0),
57  int_database_(), float_database_(), string_database_(), datetime_database_(),
58  obsvars_(), next_rec_num_(0)
59 {
60  oops::Log::trace() << "ObsData::ObsData config = " << config << std::endl;
61 
62  obsname_ = config.getString("name");
63  distname_ = config.getString("distribution", "RoundRobin");
64 
65  obsvars_ = oops::Variables(config, "simulated variables");
66  oops::Log::info() << obsname_ << " vars: " << obsvars_ << std::endl;
67 
68  // Create the MPI distribution object
69  std::unique_ptr<DistributionFactory> distFactory;
70  dist_.reset(distFactory->createDistribution(this->comm(), distname_));
71 
72  // Initialize the obs space container
73  if (config.has("obsdatain")) {
74  // Initialize the container from an input obs file
75  obs_group_variable_ = config.getString("obsdatain.obsgrouping.group variable", "");
76  obs_sort_variable_ = config.getString("obsdatain.obsgrouping.sort variable", "");
77  obs_sort_order_ = config.getString("obsdatain.obsgrouping.sort order", "ascending");
78  if ((obs_sort_order_ != "ascending") && (obs_sort_order_ != "descending")) {
79  std::string ErrMsg =
80  std::string("ObsData::ObsData: Must use one of 'ascending' or 'descending' ") +
81  std::string("for the 'sort order:' YAML configuration keyword.");
82  ABORT(ErrMsg);
83  }
84  filein_ = config.getString("obsdatain.obsfile");
85  in_max_frame_size_ = config.getUnsigned("obsdatain.max frame size",
87  oops::Log::trace() << obsname_ << " file in = " << filein_ << std::endl;
90  if (commMPI_.rank() == 0) {
91  oops::Log::warning()
92  << "ObsData::ObsData:: WARNING: Input file contains variables "
93  << "with unexpected data types" << std::endl
94  << " Input file: " << filein_ << std::endl;
95  }
96  }
97 
98  if (file_excess_dims_) {
99  if (commMPI_.rank() == 0) {
100  oops::Log::warning()
101  << "ObsData::ObsData:: WARNING: Input file contains variables "
102  << "with excess number of dimensions (these variables were skipped)" << std::endl
103  << " Input file: " << filein_ << std::endl;
104  }
105  }
106 
107  if (obs_sort_variable_ != "") {
109  }
110  } else if (config.has("generate")) {
111  // Initialize the container from the generateDistribution method
112  eckit::LocalConfiguration genconfig(config, "generate");
113  generateDistribution(genconfig);
114  } else {
115  // Error - must have one of obsdatain or Generate
116  std::string ErrorMsg =
117  "ObsData::ObsData: Must use one of 'obsdatain' or 'generate' in the YAML configuration.";
118  ABORT(ErrorMsg);
119  }
120  nrecs_ = unique_rec_nums_.size();
121 
122  // Check to see if an output file has been requested.
123  if (config.has("obsdataout.obsfile")) {
124  std::string filename = config.getString("obsdataout.obsfile");
125  out_max_frame_size_ = config.getUnsigned("obsdataout.max frame size",
127 
128  // If present, change '%{member}%' to 'iii'
129  util::stringfunctions::swapNameMember(config, filename);
130 
131  // Find the left-most dot in the file name, and use that to pick off the file name
132  // and file extension.
133  std::size_t found = filename.find_last_of(".");
134  if (found == std::string::npos)
135  found = filename.length();
136 
137  // Get the process rank number and format it
138  std::ostringstream ss;
139  ss << "_" << std::setw(4) << std::setfill('0') << this->comm().rank();
140  if (timeComm.size() > 1) ss << "_" << timeComm.rank();
141 
142  // Construct the output file name
143  fileout_ = filename.insert(found, ss.str());
144 
145  // Check to see if user is trying to overwrite an existing file. For now always allow
146  // the overwrite, but issue a warning if we are about to clobber an existing file.
147  std::ifstream infile(fileout_);
148  if (infile.good() && (commMPI_.rank() == 0)) {
149  oops::Log::warning() << "ObsData::ObsData WARNING: Overwriting output file "
150  << fileout_ << std::endl;
151  }
152  } else {
153  oops::Log::debug() << "ObsData::ObsData output file is not required " << std::endl;
154  }
155 
156  oops::Log::trace() << "ObsData::ObsData contructed name = " << obsname() << std::endl;
157 }
158 
159 // -----------------------------------------------------------------------------
160 /*!
161  * \details Destructor for an ObsData object. This destructor will clean up the ObsData
162  * object and optionally write out the contents of the obs container into
163  * the output file. The save-to-file operation is invoked when an output obs
164  * file is specified in the ECKIT configuration segment associated with the
165  * ObsData object.
166  */
168  oops::Log::trace() << "ObsData::ObsData destructor begin" << std::endl;
169  if (fileout_.size() != 0) {
170  oops::Log::info() << obsname() << ": save database to " << fileout_ << std::endl;
172  } else {
173  oops::Log::info() << obsname() << " : no output" << std::endl;
174  }
175  oops::Log::trace() << "ObsData::ObsData destructor end" << std::endl;
176 }
177 
178 // -----------------------------------------------------------------------------
179 /*!
180  * \brief transfer data from the obs container to vdata
181  *
182  * \details The following four get_db methods are the same except for the data type
183  * of the data being transferred (integer, float, double, DateTime). The
184  * caller needs to allocate the memory that the vdata parameter points to.
185  *
186  * \param[in] group Name of container group (ObsValue, ObsError, MetaData, etc.)
187  * \param[in] name Name of container variable
188  * \param[out] vdata Vector where container data is being transferred to
189  */
190 
191 void ObsData::get_db(const std::string & group, const std::string & name,
192  std::vector<int> & vdata) const {
193  std::vector<std::size_t> vshape(1, vdata.size());
194  int_database_.LoadFromDb(group, name, vshape, vdata);
195 }
196 
197 void ObsData::get_db(const std::string & group, const std::string & name,
198  std::vector<float> & vdata) const {
199  std::vector<std::size_t> vshape(1, vdata.size());
200  float_database_.LoadFromDb(group, name, vshape, vdata);
201 }
202 
203 void ObsData::get_db(const std::string & group, const std::string & name,
204  std::vector<double> & vdata) const {
205  std::vector<std::size_t> vshape(1, vdata.size());
206  // load the float values from the database and convert to double
207  std::vector<float> FloatData(vdata.size(), 0.0);
208  float_database_.LoadFromDb(group, name, vshape, FloatData);
209  ConvertVarType<float, double>(FloatData, vdata);
210 }
211 
212 void ObsData::get_db(const std::string & group, const std::string & name,
213  std::vector<std::string> & vdata) const {
214  std::vector<std::size_t> vshape(1, vdata.size());
215  string_database_.LoadFromDb(group, name, vshape, vdata);
216 }
217 
218 void ObsData::get_db(const std::string & group, const std::string & name,
219  std::vector<util::DateTime> & vdata) const {
220  std::vector<std::size_t> vshape(1, vdata.size());
221  datetime_database_.LoadFromDb(group, name, vshape, vdata);
222 }
223 
224 // -----------------------------------------------------------------------------
225 /*!
226  * \brief transfer data from vdata to the obs container
227  *
228  * \details The following four put_db methods are the same except for the data type
229  * of the data being transferred (integer, float, double, DateTime). The
230  * caller needs to allocate and assign the memory that the vdata parameter
231  * points to.
232  *
233  * \param[in] group Name of container group (ObsValue, ObsError, MetaData, etc.)
234  * \param[in] name Name of container variable
235  * \param[out] vdata Vector where container data is being transferred from
236  */
237 
238 void ObsData::put_db(const std::string & group, const std::string & name,
239  const std::vector<int> & vdata) {
240  std::vector<std::size_t> vshape(1, vdata.size());
241  int_database_.StoreToDb(group, name, vshape, vdata);
242 }
243 
244 void ObsData::put_db(const std::string & group, const std::string & name,
245  const std::vector<float> & vdata) {
246  std::vector<std::size_t> vshape(1, vdata.size());
247  float_database_.StoreToDb(group, name, vshape, vdata);
248 }
249 
250 void ObsData::put_db(const std::string & group, const std::string & name,
251  const std::vector<double> & vdata) {
252  std::vector<std::size_t> vshape(1, vdata.size());
253  // convert to float, then load into the database
254  std::vector<float> FloatData(vdata.size());
255  ConvertVarType<double, float>(vdata, FloatData);
256  float_database_.StoreToDb(group, name, vshape, FloatData);
257 }
258 
259 void ObsData::put_db(const std::string & group, const std::string & name,
260  const std::vector<std::string> & vdata) {
261  std::vector<std::size_t> vshape(1, vdata.size());
262  string_database_.StoreToDb(group, name, vshape, vdata);
263 }
264 
265 void ObsData::put_db(const std::string & group, const std::string & name,
266  const std::vector<util::DateTime> & vdata) {
267  std::vector<std::size_t> vshape(1, vdata.size());
268  datetime_database_.StoreToDb(group, name, vshape, vdata);
269 }
270 
271 // -----------------------------------------------------------------------------
272 /*!
273  * \details This method checks for the existence of the group, name combination
274  * in the obs container. If the combination exists, "true" is returned,
275  * otherwise "false" is returned.
276  */
277 
278 bool ObsData::has(const std::string & group, const std::string & name) const {
279  return (int_database_.has(group, name) || float_database_.has(group, name) ||
280  string_database_.has(group, name) || datetime_database_.has(group, name));
281 }
282 
283 // -----------------------------------------------------------------------------
284 /*!
285  * \details This method returns the data type of the variable stored in the obs
286  * container.
287  */
288 
289 ObsDtype ObsData::dtype(const std::string & group, const std::string & name) const {
290  ObsDtype VarType = ObsDtype::None;
291  if (int_database_.has(group, name)) {
292  VarType = ObsDtype::Integer;
293  } else if (float_database_.has(group, name)) {
294  VarType = ObsDtype::Float;
295  } else if (string_database_.has(group, name)) {
296  VarType = ObsDtype::String;
297  } else if (datetime_database_.has(group, name)) {
298  VarType = ObsDtype::DateTime;
299  }
300 
301  return VarType;
302 }
303 
304 // -----------------------------------------------------------------------------
305 /*!
306  * \details This method returns the setting of the YAML configuration
307  * parameter: obsdatain.obsgrouping.group variable
308  */
309 std::string ObsData::obs_group_var() const {
310  return obs_group_variable_;
311 }
312 
313 // -----------------------------------------------------------------------------
314 /*!
315  * \details This method returns the setting of the YAML configuration
316  * parameter: obsdatain.obsgrouping.sort variable
317  */
318 std::string ObsData::obs_sort_var() const {
319  return obs_sort_variable_;
320 }
321 
322 // -----------------------------------------------------------------------------
323 /*!
324  * \details This method returns the setting of the YAML configuration
325  * parameter: obsdatain.obsgrouping.sort order
326  */
327 std::string ObsData::obs_sort_order() const {
328  return obs_sort_order_;
329 }
330 
331 // -----------------------------------------------------------------------------
332 /*!
333  * \details This method returns the number of unique locations in the input
334  * obs file. Note that nlocs from the obs container may be smaller
335  * than nlocs from the input obs file due to the removal of obs outside
336  * the DA timing window and/or due to distribution of obs across
337  * multiple process elements.
338  */
339 std::size_t ObsData::gnlocs() const {
340  return gnlocs_;
341 }
342 
343 // -----------------------------------------------------------------------------
344 /*!
345  * \details This method returns the number of unique locations in the obs
346  * container. Note that nlocs from the obs container may be smaller
347  * than nlocs from the input obs file due to the removal of obs outside
348  * the DA timing window and/or due to distribution of obs across
349  * multiple process elements.
350  */
351 std::size_t ObsData::nlocs() const {
352  return nlocs_;
353 }
354 
355 // -----------------------------------------------------------------------------
356 /*!
357  * \details This method returns the number of unique records in the obs
358  * container. A record is an atomic unit of locations that belong
359  * together such as a single radiosonde sounding.
360  */
361 std::size_t ObsData::nrecs() const {
362  return nrecs_;
363 }
364 
365 // -----------------------------------------------------------------------------
366 /*!
367  * \details This method returns the number of unique variables in the obs
368  * container. "Variables" refers to the quantities that can be
369  * assimilated as opposed to meta data.
370  */
371 std::size_t ObsData::nvars() const {
372  return nvars_;
373 }
374 
375 // -----------------------------------------------------------------------------
376 /*!
377  * \details This method returns a reference to the record number vector
378  * data member. This is for read only access.
379  */
380 const std::vector<std::size_t> & ObsData::recnum() const {
381  return recnums_;
382 }
383 
384 // -----------------------------------------------------------------------------
385 /*!
386  * \details This method returns a reference to the index vector
387  * data member. This is for read only access.
388  */
389 const std::vector<std::size_t> & ObsData::index() const {
390  return indx_;
391 }
392 
393 // -----------------------------------------------------------------------------
394 /*!
395  * \details This method returns the begin iterator associated with the
396  * recidx_ data member.
397  */
399  return recidx_.begin();
400 }
401 
402 // -----------------------------------------------------------------------------
403 /*!
404  * \details This method returns the end iterator associated with the
405  * recidx_ data member.
406  */
408  return recidx_.end();
409 }
410 
411 // -----------------------------------------------------------------------------
412 /*!
413  * \details This method returns a boolean value indicating whether the
414  * given record number exists in the recidx_ data member.
415  */
416 bool ObsData::recidx_has(const std::size_t RecNum) const {
417  RecIdxIter irec = recidx_.find(RecNum);
418  return (irec != recidx_.end());
419 }
420 
421 // -----------------------------------------------------------------------------
422 /*!
423  * \details This method returns the current record number, pointed to by the
424  * given iterator, from the recidx_ data member.
425  */
426 std::size_t ObsData::recidx_recnum(const RecIdxIter & Irec) const {
427  return Irec->first;
428 }
429 
430 // -----------------------------------------------------------------------------
431 /*!
432  * \details This method returns the current vector, pointed to by the
433  * given iterator, from the recidx_ data member.
434  */
435 const std::vector<std::size_t> & ObsData::recidx_vector(const RecIdxIter & Irec) const {
436  return Irec->second;
437 }
438 
439 // -----------------------------------------------------------------------------
440 /*!
441  * \details This method returns the current vector, pointed to by the
442  * given iterator, from the recidx_ data member.
443  */
444 const std::vector<std::size_t> & ObsData::recidx_vector(const std::size_t RecNum) const {
445  RecIdxIter Irec = recidx_.find(RecNum);
446  if (Irec == recidx_.end()) {
447  std::string ErrMsg =
448  "ObsData::recidx_vector: Record number, " + std::to_string(RecNum) +
449  ", does not exist in record index map.";
450  ABORT(ErrMsg);
451  }
452  return Irec->second;
453 }
454 
455 // -----------------------------------------------------------------------------
456 /*!
457  * \details This method returns the all of the record numbers from the
458  * recidx_ data member (ie, all the key values) in a vector.
459  */
460 std::vector<std::size_t> ObsData::recidx_all_recnums() const {
461  std::vector<std::size_t> RecNums(nrecs_);
462  std::size_t recnum = 0;
463  for (RecIdxIter Irec = recidx_.begin(); Irec != recidx_.end(); ++Irec) {
464  RecNums[recnum] = Irec->first;
465  recnum++;
466  }
467  return RecNums;
468 }
469 
470 // -----------------------------------------------------------------------------
471 /*!
472  * \details This method will generate a set of latitudes, longitudes and datetimes
473  * which can be used for testing without reading in an obs file. Two methods
474  * are supported, the first generating random values between specified latitudes,
475  * longitudes and a timing window, and the second copying lists specified by
476  * the user. This method is triggered using the "Generate" keyword in the
477  * configuration file and either of the two methods above are specified using
478  * the sub keywords "Random" or "List".
479  *
480  * \param[in] conf ECKIT configuration segment built from an input configuration file.
481  */
482 void ObsData::generateDistribution(const eckit::Configuration & conf) {
483  // Generate lat, lon, time values according to method specified in the config
484  std::vector<float> latitude;
485  std::vector<float> longitude;
486  std::vector<util::DateTime> obs_datetimes;
487  if (conf.has("random")) {
488  genDistRandom(conf, latitude, longitude, obs_datetimes);
489  } else if (conf.has("list")) {
490  genDistList(conf, latitude, longitude, obs_datetimes);
491  } else {
492  std::string ErrorMsg =
493  std::string("ObsData::generateDistribution: Must specify either ") +
494  std::string("'random' or 'list' with 'generate' configuration keyword");
495  ABORT(ErrorMsg);
496  }
497 
498  // number of variables specified in simulate section
499  nvars_ = obsvars_.size();
500 
501  // Read obs errors (one for each variable)
502  const std::vector<float> err = conf.getFloatVector("obs errors");
503  ASSERT(nvars_ == err.size());
504 
505  put_db("MetaData", "datetime", obs_datetimes);
506  put_db("MetaData", "latitude", latitude);
507  put_db("MetaData", "longitude", longitude);
508  for (std::size_t ivar = 0; ivar < nvars_; ivar++) {
509  std::vector<float> obserr(nlocs_, err[ivar]);
510  put_db("ObsError", obsvars_[ivar], obserr);
511  }
512 }
513 
514 // -----------------------------------------------------------------------------
515 /*!
516  * \details This method will generate a set of latitudes and longitudes of which
517  * can be used for testing without reading in an obs file. Two latitude
518  * values, two longitude values, the number of locations (nobs keyword)
519  * and an optional random seed are specified in the configuration given
520  * by the conf parameter. Random locations between the two latitudes and
521  * two longitudes are generated and stored in the obs container as meta data.
522  * Random time stamps that fall inside the given timing window (which is
523  * specified in the configuration file) are alos generated and stored
524  * in the obs container as meta data.
525  *
526  * \param[in] conf ECKIT configuration segment built from an input configuration file.
527  * \param[out] Lats Generated latitude values
528  * \param[out] Lons Generated longitude values
529  * \param[out] Dtims Generated datetime values
530  */
531 void ObsData::genDistRandom(const eckit::Configuration & conf, std::vector<float> & Lats,
532  std::vector<float> & Lons, std::vector<util::DateTime> & Dtimes) {
533  gnlocs_ = conf.getInt("random.nobs");
534  float lat1 = conf.getFloat("random.lat1");
535  float lat2 = conf.getFloat("random.lat2");
536  float lon1 = conf.getFloat("random.lon1");
537  float lon2 = conf.getFloat("random.lon2");
538 
539  // Make the random_seed keyword optional. Can spec it for testing to get repeatable
540  // values, and the user doesn't have to spec it if they want subsequent runs to
541  // use different random sequences.
542  unsigned int ran_seed;
543  if (conf.has("random.random seed")) {
544  ran_seed = conf.getInt("random.random seed");
545  } else {
546  ran_seed = std::time(0); // based on the current date/time.
547  }
548 
549  // Generate the indexing for MPI distribution
550  std::unique_ptr<IodaIO> NoIO(nullptr);
551  std::vector<std::size_t> DummyIndex = GenFrameIndexRecNums(NoIO, 0, gnlocs_);
552 
553  // Use the following formula to generate random lat, lon and time values.
554  //
555  // val = val1 + (random_number_between_0_and_1 * (val2-val1))
556  //
557  // where val2 > val1.
558  //
559  // Create a list of random values between 0 and 1 to be used for genearting
560  // random lat, lon and time vaules.
561  //
562  // Use different seeds for lat and lon so that in the case where lat and lon ranges
563  // are the same, you get a different sequences for lat compared to lon.
564  //
565  // Have rank 0 generate the full length random sequences, and then
566  // broadcast these to the other ranks. This ensures that every rank
567  // contains the same random sequences. If all ranks generated their
568  // own sequences, which they could do, the sequences between ranks
569  // would be different in the case where random_seed is not specified.
570  std::vector<float> RanVals(gnlocs_, 0.0);
571  std::vector<float> RanVals2(gnlocs_, 0.0);
572  if (this->comm().rank() == 0) {
573  util::UniformDistribution<float> RanUD(gnlocs_, 0.0, 1.0, ran_seed);
574  util::UniformDistribution<float> RanUD2(gnlocs_, 0.0, 1.0, ran_seed+1);
575 
576  RanVals = RanUD.data();
577  RanVals2 = RanUD2.data();
578  }
579  this->comm().broadcast(RanVals, 0);
580  this->comm().broadcast(RanVals2, 0);
581 
582  // Form the ranges val2-val for lat, lon, time
583  float LatRange = lat2 - lat1;
584  float LonRange = lon2 - lon1;
585  util::Duration WindowDuration(this->windowEnd() - this->windowStart());
586  float TimeRange = static_cast<float>(WindowDuration.toSeconds());
587 
588  // Create vectors for lat, lon, time, fill them with random values
589  // inside their respective ranges, and put results into the obs container.
590  Lats.assign(nlocs_, 0.0);
591  Lons.assign(nlocs_, 0.0);
592  Dtimes.assign(nlocs_, this->windowStart());
593 
594  util::Duration DurZero(0);
595  util::Duration DurOneSec(1);
596  for (std::size_t ii = 0; ii < nlocs_; ii++) {
597  std::size_t index = indx_[ii];
598  Lats[ii] = lat1 + (RanVals[index] * LatRange);
599  Lons[ii] = lon1 + (RanVals2[index] * LonRange);
600 
601  // Currently the filter for time stamps on obs values is:
602  //
603  // windowStart < ObsTime <= windowEnd
604  //
605  // If we get a zero OffsetDt, then change it to 1 second so that the observation
606  // will remain inside the timing window.
607  util::Duration OffsetDt(static_cast<int64_t>(RanVals[index] * TimeRange));
608  if (OffsetDt == DurZero) {
609  OffsetDt = DurOneSec;
610  }
611  // Dtimes elements were initialized to the window start
612  Dtimes[ii] += OffsetDt;
613  }
614 }
615 
616 // -----------------------------------------------------------------------------
617 /*!
618  * \details This method will generate a set of latitudes and longitudes of which
619  * can be used for testing without reading in an obs file. The values
620  * are simply read from lists in the configuration file. The purpose of
621  * this method is to allow the user to exactly specify obs locations.
622  *
623  * \param[in] conf ECKIT configuration segment built from an input configuration file.
624  * \param[out] Lats Generated latitude values
625  * \param[out] Lons Generated longitude values
626  * \param[out] Dtims Generated datetime values
627  */
628 void ObsData::genDistList(const eckit::Configuration & conf, std::vector<float> & Lats,
629  std::vector<float> & Lons, std::vector<util::DateTime> & Dtimes) {
630  std::vector<float> latitudes = conf.getFloatVector("list.lats");
631  std::vector<float> longitudes = conf.getFloatVector("list.lons");
632  std::vector<std::string> DtStrings = conf.getStringVector("list.datetimes");
633  std::vector<util::DateTime> datetimes;
634  for (std::size_t i = 0; i < DtStrings.size(); ++i) {
635  util::DateTime TempDt(DtStrings[i]);
636  datetimes.push_back(TempDt);
637  }
638 
639  // Generate the indexing for MPI distribution
640  gnlocs_ = latitudes.size();
641  std::unique_ptr<IodaIO> NoIO(nullptr);
642  std::vector<std::size_t> DummyIndex = GenFrameIndexRecNums(NoIO, 0, gnlocs_);
643 
644  // Create vectors for lat, lon, time, fill them with the values from the
645  // lists in the configuration.
646  Lats.assign(nlocs_, 0.0);
647  Lons.assign(nlocs_, 0.0);
648  Dtimes.assign(nlocs_, this->windowStart());
649 
650  for (std::size_t ii = 0; ii < nlocs_; ii++) {
651  std::size_t index = indx_[ii];
652  Lats[ii] = latitudes[index];
653  Lons[ii] = longitudes[index];
654  Dtimes[ii] = datetimes[index];
655  }
656 }
657 
658 // -----------------------------------------------------------------------------
659 /*!
660  * \details This method provides a way to print an ObsData object in an output
661  * stream. It simply prints a dummy message for now.
662  */
663 void ObsData::print(std::ostream & os) const {
664  os << "ObsData::print not implemented";
665 }
666 
667 // -----------------------------------------------------------------------------
668 /*!
669  * \details This method will initialize the obs container from the input obs file.
670  * All the variables from the input file will be read in and loaded into
671  * the obs container. Obs that fall outside the DA timing window will be
672  * filtered out before loading into the container. This method will also
673  * apply obs distribution across multiple process elements. For these reasons,
674  * the number of locations in the obs container may be smaller than the
675  * number of locations in the input obs file.
676  *
677  * \param[in] filename Path to input obs file
678  */
679 void ObsData::InitFromFile(const std::string & filename, const std::size_t MaxFrameSize) {
680  oops::Log::trace() << "ObsData::InitFromFile opening file: " << filename << std::endl;
681 
682  // Open the file for reading and record nlocs and nvars from the file.
683  std::unique_ptr<IodaIO> fileio {ioda::IodaIOfactory::Create(filename, "r", MaxFrameSize)};
684  gnlocs_ = fileio->nlocs();
685 
686  // Walk through the frames and select the records according to the MPI distribution
687  // and if the records fall inside the DA timing window. nvars_ for ObsData is the
688  // number of variables with the GroupName ObsValue. Since we can be reading in
689  // multiple frames, only check for the ObsValue group on the first frame.
690  nvars_ = 0;
691  bool FirstFrame = true;
692  fileio->frame_initialize();
693  for (IodaIO::FrameIter iframe = fileio->frame_begin();
694  iframe != fileio->frame_end(); ++iframe) {
695  std::size_t FrameStart = fileio->frame_start(iframe);
696  std::size_t FrameSize = fileio->frame_size(iframe);
697 
698  // Fill in the current frame from the file
699  fileio->frame_read(iframe);
700 
701  // Calculate the corresponding segments of indx_ and recnums_ vectors for this
702  // frame. Use these segments to select the rows from the frame before storing in
703  // the obs space container.
704  std::vector<std::size_t> FrameIndex = GenFrameIndexRecNums(fileio, FrameStart, FrameSize);
705 
706  // Integer variables
707  for (IodaIO::FrameIntIter idata = fileio->frame_int_begin();
708  idata != fileio->frame_int_end(); ++idata) {
709  std::string GroupName = fileio->frame_int_get_gname(idata);
710  if (FirstFrame && (GroupName == "ObsValue")) { nvars_++; }
711  std::string VarName = fileio->frame_int_get_vname(idata);
712  std::vector<std::size_t> VarShape = fileio->var_shape(GroupName, VarName);
713  std::vector<int> FrameData;
714  fileio->frame_int_get_data(GroupName, VarName, FrameData);
715  std::vector<std::size_t> FrameShape = VarShape;
716  FrameShape[0] = FrameData.size();
717  if (VarShape[0] == gnlocs_) {
718  std::vector<std::size_t> IndexedShape;
719  std::vector<int> SelectedData =
720  ApplyIndex(FrameData, VarShape, FrameIndex, FrameShape);
721  int_database_.StoreToDb(GroupName, VarName, FrameShape, SelectedData, true);
722  } else {
723  int_database_.StoreToDb(GroupName, VarName, FrameShape, FrameData, true);
724  }
725  }
726 
727  // Float variables
728  for (IodaIO::FrameFloatIter idata = fileio->frame_float_begin();
729  idata != fileio->frame_float_end(); ++idata) {
730  std::string GroupName = fileio->frame_float_get_gname(idata);
731  if (FirstFrame && (GroupName == "ObsValue")) { nvars_++; }
732  std::string VarName = fileio->frame_float_get_vname(idata);
733  std::vector<std::size_t> VarShape = fileio->var_shape(GroupName, VarName);
734  std::vector<float> FrameData;
735  fileio->frame_float_get_data(GroupName, VarName, FrameData);
736  std::vector<std::size_t> FrameShape = VarShape;
737  FrameShape[0] = FrameData.size();
738  if (VarShape[0] == gnlocs_) {
739  std::vector<std::size_t> IndexedShape;
740  std::vector<float> SelectedData =
741  ApplyIndex(FrameData, VarShape, FrameIndex, FrameShape);
742  float_database_.StoreToDb(GroupName, VarName, FrameShape, SelectedData, true);
743  } else {
744  float_database_.StoreToDb(GroupName, VarName, FrameShape, FrameData, true);
745  }
746  }
747 
748  // String variables
749  for (IodaIO::FrameStringIter idata = fileio->frame_string_begin();
750  idata != fileio->frame_string_end(); ++idata) {
751  std::string GroupName = fileio->frame_string_get_gname(idata);
752  if (FirstFrame && (GroupName == "ObsValue")) { nvars_++; }
753  std::string VarName = fileio->frame_string_get_vname(idata);
754  std::vector<std::size_t> VarShape = fileio->var_shape(GroupName, VarName);
755  std::vector<std::string> FrameData;
756  fileio->frame_string_get_data(GroupName, VarName, FrameData);
757  std::vector<std::size_t> FrameShape = VarShape;
758  FrameShape[0] = FrameData.size();
759  if (VarShape[0] == gnlocs_) {
760  std::vector<std::size_t> IndexedShape;
761  std::vector<std::string> SelectedData =
762  ApplyIndex(FrameData, VarShape, FrameIndex, FrameShape);
763  if (VarName == "datetime") {
764  // Convert to DateTime objects and store in datetime database
765  std::vector<util::DateTime> DtData;
766  for (std::size_t i = 0; i < SelectedData.size(); ++i) {
767  util::DateTime ObsDt(SelectedData[i]);
768  DtData.push_back(ObsDt);
769  }
770  datetime_database_.StoreToDb(GroupName, VarName, FrameShape, DtData, true);
771  } else {
772  string_database_.StoreToDb(GroupName, VarName, FrameShape, SelectedData, true);
773  }
774  } else {
775  string_database_.StoreToDb(GroupName, VarName, FrameShape, FrameData, true);
776  }
777  }
778  FirstFrame = false;
779  }
780  fileio->frame_finalize();
781 
782  // Record whether any problems occurred when reading the file.
783  file_unexpected_dtypes_ = fileio->unexpected_data_types();
784  file_excess_dims_ = fileio->excess_dims();
785  oops::Log::trace() << "ObsData::InitFromFile opening file ends " << std::endl;
786 }
787 
788 // -----------------------------------------------------------------------------
789 /*!
790  * \details This method generates an list of indices with their corresponding
791  * record numbers, where the indices denote which locations are to be
792  * read into this process element.
793  *
794  * \param[in] FileIO File id (pointer to IodaIO object)
795  * \param[in] FrameStart Row number at beginning of frame.
796  * \param[out] FrameIndex Vector of indices indicating rows belonging to this process element
797  * \param[out] FrameRecNums Vector containing record numbers corresponding to FrameIndex
798  */
799 std::vector<std::size_t> ObsData::GenFrameIndexRecNums(const std::unique_ptr<IodaIO> & FileIO,
800  const std::size_t FrameStart, const std::size_t FrameSize) {
801  // It's possible that the total number of locations (gnlocs_) is smaller than
802  // another dimension (eg, nchans or nvars for a hyperspectral instrument). If that
803  // is the case, we don't want to read past the end of the datetime or obs group
804  // variable which are dimensioned by nlocs.
805  std::size_t LocSize = FrameSize;
806  if ((FrameStart + FrameSize) > gnlocs_) { LocSize = gnlocs_ - FrameStart; }
807 
808  // Apply the timing window if we are reading from a file. Need to filter out locations
809  // that are outside the timing window before generating record numbers. This is because
810  // we are generating record numbers on the fly since we want to get to the point where
811  // we can do the MPI distribution without knowing how many obs (and records) we are going
812  // to encounter.
813  //
814  // Create two vectors as the timing windows are checked, one for location indices the
815  // other for frame indices. Location indices are relative to FrameStart, and frame
816  // indices are relative to this frame (start at zero).
817  //
818  // If we are not reading from a file, then load up the locations and frame indices
819  // with all locations in the frame.
820  std::vector<std::size_t> LocIndex;
821  std::vector<std::size_t> FrameIndex;
822  if (FileIO != nullptr) {
823  // Grab the datetime strings for checking the timing window
824  std::string DtGroupName = "MetaData";
825  std::string DtVarName = "datetime";
826  std::vector<std::string> DtStrings;
827  FileIO->frame_string_get_data(DtGroupName, DtVarName, DtStrings);
828 
829  // Convert the datetime strings to DateTime objects
830  std::vector<util::DateTime> ObsDtimes;
831  for (std::size_t i = 0; i < DtStrings.size(); ++i) {
832  util::DateTime ObsDt(DtStrings[i]);
833  ObsDtimes.push_back(ObsDt);
834  }
835 
836  // Keep all locations that fall inside the timing window
837  for (std::size_t i = 0; i < LocSize; ++i) {
838  if (InsideTimingWindow(ObsDtimes[i])) {
839  LocIndex.push_back(FrameStart + i);
840  FrameIndex.push_back(i);
841  }
842  }
843  LocSize = LocIndex.size(); // in case any locations were rejected
844  } else {
845  // Not reading from file, keep all locations.
846  LocIndex.assign(LocSize, 0);
847  std::iota(LocIndex.begin(), LocIndex.end(), FrameStart);
848 
849  FrameIndex.assign(LocSize, 0);
850  std::iota(FrameIndex.begin(), FrameIndex.end(), 0);
851  }
852 
853  // Generate record numbers for this frame
854  std::vector<std::size_t> Records(LocSize);
855  if ((obs_group_variable_.empty()) || (FileIO == nullptr)) {
856  // Grouping is not specified, so use the location indices as the record indicators.
857  // Using the obs grouping object will make the record numbering go sequentially
858  // from 0 to nrecs_ - 1.
859  for (std::size_t i = 0; i < LocSize; ++i) {
860  int RecValue = LocIndex[i];
861  if (!int_obs_grouping_.has(RecValue)) {
863  next_rec_num_++;
865  }
866  Records[i] = int_obs_grouping_.at(RecValue);
867  }
868  } else {
869  // Group according to the data in obs_group_variable_
870  std::string GroupName = "MetaData";
871  std::string VarName = obs_group_variable_;
872  std::string VarType = FileIO->var_dtype(GroupName, VarName);
873 
874  if (VarType == "int") {
875  std::vector<int> GroupVar;
876  FileIO->frame_int_get_data(GroupName, VarName, GroupVar);
877  for (std::size_t i = 0; i < LocSize; ++i) {
878  int RecValue = GroupVar[FrameIndex[i]];
879  if (!int_obs_grouping_.has(RecValue)) {
881  next_rec_num_++;
883  }
884  Records[i] = int_obs_grouping_.at(RecValue);
885  }
886  } else if (VarType == "float") {
887  std::vector<float> GroupVar;
888  FileIO->frame_float_get_data(GroupName, VarName, GroupVar);
889  for (std::size_t i = 0; i < LocSize; ++i) {
890  float RecValue = GroupVar[FrameIndex[i]];
891  if (!float_obs_grouping_.has(RecValue)) {
893  next_rec_num_++;
894  }
895  Records[i] = float_obs_grouping_.at(RecValue);
896  }
897  } else if (VarType == "string") {
898  std::vector<std::string> GroupVar;
899  FileIO->frame_string_get_data(GroupName, VarName, GroupVar);
900  for (std::size_t i = 0; i < LocSize; ++i) {
901  std::string RecValue = GroupVar[FrameIndex[i]];
902  if (!string_obs_grouping_.has(RecValue)) {
904  next_rec_num_++;
906  }
907  Records[i] = string_obs_grouping_.at(RecValue);
908  }
909  }
910  }
911 
912  // Generate the index and recnums for this frame. We are done with FrameIndex
913  // so it can be reused here.
914  FrameIndex.clear();
915  std::set<std::size_t> UniqueRecNums;
916  for (std::size_t i = 0; i < LocSize; ++i) {
917  std::size_t RowNum = LocIndex[i];
918  std::size_t RecNum = Records[i];
919  if (dist_->isMyRecord(RecNum)) {
920  indx_.push_back(RowNum);
921  recnums_.push_back(RecNum);
922  unique_rec_nums_.insert(RecNum);
923  FrameIndex.push_back(RowNum - FrameStart);
924  }
925  }
926 
927  nlocs_ += FrameIndex.size();
928  return FrameIndex;
929 }
930 
931 // -----------------------------------------------------------------------------
932 /*!
933  * \details This method will return true/false according to whether the
934  * observation datetime (ObsDt) is inside the DA timing window.
935  *
936  * \param[in] ObsDt Observation date time object
937  */
938 bool ObsData::InsideTimingWindow(const util::DateTime & ObsDt) {
939  return ((ObsDt > winbgn_) && (ObsDt <= winend_));
940 }
941 
942 // -----------------------------------------------------------------------------
943 /*!
944  * \details This method will construct a data structure that holds the
945  * location order within each group sorted by the values of
946  * the specified sort variable.
947  */
949  typedef std::map<std::size_t, std::vector<std::pair<float, std::size_t>>> TmpRecIdxMap;
950  typedef TmpRecIdxMap::iterator TmpRecIdxIter;
951 
952  // Get the sort variable from the data store, and convert to a vector of floats.
953  std::vector<float> SortValues(nlocs_);
954  if (obs_sort_variable_ == "datetime") {
955  std::vector<util::DateTime> Dates(nlocs_);
956  get_db("MetaData", obs_sort_variable_, Dates);
957  for (std::size_t iloc = 0; iloc < nlocs_; iloc++) {
958  SortValues[iloc] = (Dates[iloc] - Dates[0]).toSeconds();
959  }
960  } else {
961  get_db("MetaData", obs_sort_variable_, SortValues);
962  }
963 
964  // Construct a temporary structure to do the sorting, then transfer the results
965  // to the data member recidx_.
966  TmpRecIdxMap TmpRecIdx;
967  for (size_t iloc = 0; iloc < nlocs_; iloc++) {
968  TmpRecIdx[recnums_[iloc]].push_back(std::make_pair(SortValues[iloc], iloc));
969  }
970 
971  for (TmpRecIdxIter irec = TmpRecIdx.begin(); irec != TmpRecIdx.end(); ++irec) {
972  if (obs_sort_order_ == "ascending") {
973  sort(irec->second.begin(), irec->second.end());
974  } else {
975  // Use a lambda function to access the std::pair greater-than operator to
976  // implement a descending order sort, ensuring the associated indices remain
977  // in ascending order.
978  sort(irec->second.begin(), irec->second.end(),
979  [](const std::pair<float, std::size_t> & p1,
980  const std::pair<float, std::size_t> & p2){
981  return (p2.first < p1.first ||
982  (!(p1.first < p2.first) && p2.second > p1.second));});
983  }
984  }
985 
986  // Copy indexing to the recidx_ data member.
987  for (TmpRecIdxIter irec = TmpRecIdx.begin(); irec != TmpRecIdx.end(); ++irec) {
988  recidx_[irec->first].resize(irec->second.size());
989  for (std::size_t iloc = 0; iloc < irec->second.size(); iloc++) {
990  recidx_[irec->first][iloc] = irec->second[iloc].second;
991  }
992  }
993 }
994 
995 // -----------------------------------------------------------------------------
996 /*!
997  * \details This method will save the contents of the obs container into the
998  * given file. Currently, all variables in the obs container are written
999  * into the file. This may change in the future where we can select which
1000  * variables we want saved.
1001  *
1002  * \param[in] file_name Path to output obs file.
1003  */
1004 void ObsData::SaveToFile(const std::string & file_name, const std::size_t MaxFrameSize) {
1005  // Open the file for output
1006  std::unique_ptr<IodaIO> fileio
1007  {ioda::IodaIOfactory::Create(file_name, "W", MaxFrameSize)};
1008 
1009  // Add dimensions for nlocs and nvars
1010  fileio->dim_insert("nlocs", nlocs_);
1011  fileio->dim_insert("nvars", nvars_);
1012 
1013  // Build the group, variable info container. This defines the variables
1014  // that will be written into the output file.
1015  std::size_t MaxVarSize = 0;
1017  ivar != int_database_.var_iter_end(); ++ivar) {
1018  std::string GroupName = int_database_.var_iter_gname(ivar);
1019  std::string VarName = int_database_.var_iter_vname(ivar);
1020  std::string GrpVarName = VarName + "@" + GroupName;
1021  std::vector<std::size_t> VarShape = int_database_.var_iter_shape(ivar);
1022  if (VarShape[0] > MaxVarSize) { MaxVarSize = VarShape[0]; }
1023  fileio->grp_var_insert(GroupName, VarName, "int", VarShape, GrpVarName, "int");
1024  }
1026  ivar != float_database_.var_iter_end(); ++ivar) {
1027  std::string GroupName = float_database_.var_iter_gname(ivar);
1028  std::string VarName = float_database_.var_iter_vname(ivar);
1029  std::string GrpVarName = VarName + "@" + GroupName;
1030  std::vector<std::size_t> VarShape = float_database_.var_iter_shape(ivar);
1031  if (VarShape[0] > MaxVarSize) { MaxVarSize = VarShape[0]; }
1032  fileio->grp_var_insert(GroupName, VarName, "float", VarShape, GrpVarName, "float");
1033  }
1035  ivar != string_database_.var_iter_end(); ++ivar) {
1036  std::string GroupName = string_database_.var_iter_gname(ivar);
1037  std::string VarName = string_database_.var_iter_vname(ivar);
1038  std::string GrpVarName = VarName + "@" + GroupName;
1039  std::vector<std::size_t> VarShape = string_database_.var_iter_shape(ivar);
1040  if (VarShape[0] > MaxVarSize) { MaxVarSize = VarShape[0]; }
1041  std::vector<std::string> DbData(VarShape[0], "");
1042  string_database_.LoadFromDb(GroupName, VarName, VarShape, DbData);
1043  std::size_t MaxStringSize = FindMaxStringLength(DbData);
1044  fileio->grp_var_insert(GroupName, VarName, "string", VarShape, GrpVarName, "string",
1045  MaxStringSize);
1046  }
1048  ivar != datetime_database_.var_iter_end(); ++ivar) {
1049  std::string GroupName = datetime_database_.var_iter_gname(ivar);
1050  std::string VarName = datetime_database_.var_iter_vname(ivar);
1051  std::string GrpVarName = VarName + "@" + GroupName;
1052  std::vector<std::size_t> VarShape = datetime_database_.var_iter_shape(ivar);
1053  if (VarShape[0] > MaxVarSize) { MaxVarSize = VarShape[0]; }
1054  fileio->grp_var_insert(GroupName, VarName, "string", VarShape, GrpVarName, "string", 20);
1055  }
1056 
1057  // Build the frame info container
1058  fileio->frame_info_init(MaxVarSize);
1059 
1060  // For every frame, dump out the int, float, string variables.
1061  for (IodaIO::FrameIter iframe = fileio->frame_begin();
1062  iframe != fileio->frame_end(); ++iframe) {
1063  fileio->frame_data_init();
1064  std::size_t FrameStart = fileio->frame_start(iframe);
1065  std::size_t FrameSize = fileio->frame_size(iframe);
1066 
1067  // Integer data
1069  ivar != int_database_.var_iter_end(); ++ivar) {
1070  std::string GroupName = int_database_.var_iter_gname(ivar);
1071  std::string VarName = int_database_.var_iter_vname(ivar);
1072  std::vector<std::size_t> VarShape = int_database_.var_iter_shape(ivar);
1073 
1074  if (VarShape[0] > FrameStart) {
1075  std::size_t Count = FrameSize;
1076  if ((FrameStart + FrameSize) > VarShape[0]) { Count = VarShape[0] - FrameStart; }
1077  std::vector<int> FrameData(Count, 0);
1078  int_database_.LoadFromDb(GroupName, VarName, VarShape, FrameData, FrameStart, Count);
1079  fileio->frame_int_put_data(GroupName, VarName, FrameData);
1080  }
1081  }
1082 
1083  // Float data
1085  ivar != float_database_.var_iter_end(); ++ivar) {
1086  std::string GroupName = float_database_.var_iter_gname(ivar);
1087  std::string VarName = float_database_.var_iter_vname(ivar);
1088  std::vector<std::size_t> VarShape = float_database_.var_iter_shape(ivar);
1089 
1090  if (VarShape[0] > FrameStart) {
1091  std::size_t Count = FrameSize;
1092  if ((FrameStart + FrameSize) > VarShape[0]) { Count = VarShape[0] - FrameStart; }
1093  std::vector<float> FrameData(Count, 0.0);
1094  float_database_.LoadFromDb(GroupName, VarName, VarShape, FrameData, FrameStart, Count);
1095  fileio->frame_float_put_data(GroupName, VarName, FrameData);
1096  }
1097  }
1098 
1099  // String data
1101  ivar != string_database_.var_iter_end(); ++ivar) {
1102  std::string GroupName = string_database_.var_iter_gname(ivar);
1103  std::string VarName = string_database_.var_iter_vname(ivar);
1104  std::vector<std::size_t> VarShape = string_database_.var_iter_shape(ivar);
1105 
1106  if (VarShape[0] > FrameStart) {
1107  std::size_t Count = FrameSize;
1108  if ((FrameStart + FrameSize) > VarShape[0]) { Count = VarShape[0] - FrameStart; }
1109  std::vector<std::string> FrameData(Count, "");
1110  string_database_.LoadFromDb(GroupName, VarName, VarShape, FrameData,
1111  FrameStart, Count);
1112  fileio->frame_string_put_data(GroupName, VarName, FrameData);
1113  }
1114  }
1115 
1117  ivar != datetime_database_.var_iter_end(); ++ivar) {
1118  std::string GroupName = datetime_database_.var_iter_gname(ivar);
1119  std::string VarName = datetime_database_.var_iter_vname(ivar);
1120  std::vector<std::size_t> VarShape = datetime_database_.var_iter_shape(ivar);
1121 
1122  if (VarShape[0] > FrameStart) {
1123  std::size_t Count = FrameSize;
1124  if ((FrameStart + FrameSize) > VarShape[0]) { Count = VarShape[0] - FrameStart; }
1125  util::DateTime TempDt("0000-01-01T00:00:00Z");
1126  std::vector<util::DateTime> FrameData(Count, TempDt);
1127  datetime_database_.LoadFromDb(GroupName, VarName, VarShape, FrameData,
1128  FrameStart, Count);
1129 
1130  // Convert the DateTime vector to a string vector, then save into the file.
1131  std::vector<std::string> StringVector(FrameData.size(), "");
1132  for (std::size_t i = 0; i < FrameData.size(); i++) {
1133  StringVector[i] = FrameData[i].toString();
1134  }
1135  fileio->frame_string_put_data(GroupName, VarName, StringVector);
1136  }
1137  }
1138 
1139  fileio->frame_write(iframe);
1140  }
1141 }
1142 
1143 // -----------------------------------------------------------------------------
1144 /*!
1145  * \details This method applys the distribution index on data read from the input obs file.
1146  * It is expected that when this method is called that the distribution index will
1147  * have the process element and DA timing window effects accounted for.
1148  *
1149  * \param[in] FullData Vector holding data to be indexed
1150  * \param[in] FullShape Shape (dimension sizes) of FullData
1151  * \param[in] Index Index to be applied to FullData
1152  * \param[out] IndexedShape Shape (dimension sizes) of data after indexing
1153  */
1154 template<typename VarType>
1155 std::vector<VarType> ObsData::ApplyIndex(const std::vector<VarType> & FullData,
1156  const std::vector<std::size_t> & FullShape,
1157  const std::vector<std::size_t> & Index,
1158  std::vector<std::size_t> & IndexedShape) const {
1159  std::vector<VarType> SelectedData;
1160  for (std::size_t i = 0; i < Index.size(); ++i) {
1161  std::size_t isrc = Index[i];
1162  SelectedData.push_back(FullData[isrc]);
1163  }
1164  IndexedShape = FullShape;
1165  IndexedShape[0] = SelectedData.size();
1166  return SelectedData;
1167 }
1168 
1169 // -----------------------------------------------------------------------------
1170 /*!
1171  * \details This method will return the desired numeric data type for variables
1172  * read from the input obs file. The rule for now is any variable
1173  * in the group "PreQC" is to be an integer, and any variable that is
1174  * a double is to be a float (single precision). For cases outside of this
1175  * rule, the data type from the file is used.
1176  *
1177  * \param[in] GroupName Name of obs container group
1178  * \param[in] FileVarType Name of the data type of the variable from the input obs file
1179  */
1180 std::string ObsData::DesiredVarType(std::string & GroupName, std::string & FileVarType) {
1181  // By default, make the DbVarType equal to the FileVarType
1182  // Exceptions are:
1183  // Force the group "PreQC" to an integer type.
1184  // Force double to float.
1185  std::string DbVarType = FileVarType;
1186 
1187  if (GroupName == "PreQC") {
1188  DbVarType = "int";
1189  } else if (FileVarType == "double") {
1190  DbVarType = "float";
1191  }
1192 
1193  return DbVarType;
1194 }
1195 
1196 // -----------------------------------------------------------------------------
1197 /*!
1198  * \details This method provides a means for printing Jo in
1199  * an output stream. For now a dummy message is printed.
1200  */
1201 void ObsData::printJo(const ObsVector & dy, const ObsVector & grad) {
1202  oops::Log::info() << "ObsData::printJo not implemented" << std::endl;
1203 }
1204 // -----------------------------------------------------------------------------
1205 /*!
1206  * \details This method creates a private KDTree class member that can be used
1207  * for searching for local obs to create an ObsSpace.
1208  */
1210  // Initialize KDTree class member
1211  kd_ = std::shared_ptr<ObsData::KDTree> ( new KDTree() );
1212  // Define lats,lons
1213  std::vector<float> lats(nlocs_);
1214  std::vector<float> lons(nlocs_);
1215 
1216  // Get latitudes and longitudes of all observations.
1217  this -> get_db("MetaData", "longitude", lons);
1218  this -> get_db("MetaData", "latitude", lats);
1219 
1220  // Define points list from lat/lon values
1221  typedef KDTree::PointType Point;
1222  std::vector<KDTree::Value> points;
1223  for (unsigned int i = 0; i < nlocs_; i++) {
1224  eckit::geometry::Point2 lonlat(lons[i], lats[i]);
1225  Point xyz = Point();
1226  // FIXME: get geometry from yaml, for now assume spherical Earth radius.
1227  atlas::util::Earth::convertSphericalToCartesian(lonlat, xyz);
1228  double index = static_cast<double>(i);
1229  KDTree::Value v(xyz, index);
1230  points.push_back(v);
1231  }
1232 
1233  // Create KDTree class member from points list.
1234  kd_->build(points.begin(), points.end());
1235 }
1236 // -----------------------------------------------------------------------------
1237 /*!
1238  * \details This method returns the KDTree class member that can be used
1239  * for searching for local obs when creating an ObsSpace.
1240  */
1242  // Create the KDTree if it doesn't yet exist
1243  if (kd_ == NULL)
1244  createKDTree();
1245 
1246  return * kd_;
1247 }
1248 // -----------------------------------------------------------------------------
1249 
1250 } // namespace ioda
ioda::ObsData::InitFromFile
void InitFromFile(const std::string &filename, const std::size_t MaxFrameSize)
Definition: ObsData.cc:679
ioda::ObsData::float_obs_grouping_
ObsGroupingMap< float > float_obs_grouping_
Definition: ObsData.h:281
ioda::ObsData::dtype
ObsDtype dtype(const std::string &, const std::string &) const
Definition: ObsData.cc:289
ioda::ObsData::winbgn_
const util::DateTime winbgn_
Beginning of DA timing window.
Definition: ObsData.h:205
ioda::ObsData::dist_
std::shared_ptr< Distribution > dist_
MPI distribution object.
Definition: ObsData.h:277
ioda::ObsData::put_db
void put_db(const std::string &group, const std::string &name, const std::vector< int > &vdata)
transfer data from vdata to the obs container
Definition: ObsData.cc:238
ioda::ObsData::winend_
const util::DateTime winend_
End of DA timing window.
Definition: ObsData.h:208
ioda::IodaIO::dim_insert
void dim_insert(const std::string &, const std::size_t)
Definition: IodaIO.cc:551
ioda::ObsData::BuildSortedObsGroups
void BuildSortedObsGroups()
Definition: ObsData.cc:948
ioda::IodaIO::FrameIntIter
FrameDataMap< int >::FrameStoreIter FrameIntIter
Definition: src/io/IodaIO.h:291
ioda::ObsGroupingMap::at
std::size_t at(const KeyType Key)
Definition: ObsData.h:54
ioda::ObsData::obs_group_variable_
std::string obs_group_variable_
Variable that location grouping is based upon.
Definition: ObsData.h:268
ioda::IodaIOfactory::Create
static ioda::IodaIO * Create(const std::string &FileName, const std::string &FileMode, const std::size_t MaxFrameSize)
Instantiate a IodaIO object.
Definition: IodaIOfactory.cc:29
ioda::ObsData::~ObsData
~ObsData()
Definition: ObsData.cc:167
ioda::ObsData::datetime_database_
ObsSpaceContainer< util::DateTime > datetime_database_
Definition: ObsData.h:259
ioda::ObsGroupingMap::has
bool has(const KeyType Key)
Definition: ObsData.h:46
ioda::ObsData::comm
const eckit::mpi::Comm & comm() const
Definition: ObsData.h:160
ioda::ObsData::recnum
const std::vector< std::size_t > & recnum() const
Definition: ObsData.cc:380
ioda::IODAIO_DEFAULT_FRAME_SIZE
const std::size_t IODAIO_DEFAULT_FRAME_SIZE
Constant to be used for default maximum frame size.
Definition: IodaIOfactory.h:20
ioda::ObsData::indx_
std::vector< std::size_t > indx_
indexes of locations to extract from the input obs file
Definition: ObsData.h:247
ioda::ObsData::recidx_end
const RecIdxIter recidx_end() const
Definition: ObsData.cc:407
ioda::ObsData::recidx_recnum
std::size_t recidx_recnum(const RecIdxIter &Irec) const
Definition: ObsData.cc:426
ioda::IodaIO::FrameIter
FrameInfo::const_iterator FrameIter
Definition: src/io/IodaIO.h:275
ioda::ObsData::generateDistribution
void generateDistribution(const eckit::Configuration &)
Definition: ObsData.cc:482
ioda::ObsData::has
bool has(const std::string &, const std::string &) const
Definition: ObsData.cc:278
ioda::ObsData::nvars
std::size_t nvars() const
Definition: ObsData.cc:371
ioda::ObsDtype::Integer
@ Integer
ioda::ObsData::nrecs
std::size_t nrecs() const
Definition: ObsData.cc:361
ioda::ObsData::recnums_
std::vector< std::size_t > recnums_
record numbers associated with the location indexes
Definition: ObsData.h:250
ioda::ObsData::string_obs_grouping_
ObsGroupingMap< std::string > string_obs_grouping_
Definition: ObsData.h:282
ioda::ObsData::windowEnd
const util::DateTime & windowEnd() const
Definition: ObsData.h:158
ioda::ObsData::gnlocs
std::size_t gnlocs() const
Definition: ObsData.cc:339
ioda::ObsData::recidx_begin
const RecIdxIter recidx_begin() const
Definition: ObsData.cc:398
ioda::ObsData::obsname
const std::string & obsname() const
Definition: ObsData.h:152
ioda
Definition: IodaUtils.cc:13
ioda::ObsData::distname_
std::string distname_
Distribution type.
Definition: ObsData.h:265
ioda::ObsSpaceContainer::LoadFromDb
void LoadFromDb(const std::string &GroupName, const std::string &VarName, const std::vector< std::size_t > &VarShape, std::vector< ContType > &VarData, const std::size_t Start=0, const std::size_t Count=0) const
load data from the obs container
Definition: src/core/ObsSpaceContainer.h:317
ioda::ObsData::printJo
void printJo(const ObsVector &, const ObsVector &)
Definition: ObsData.cc:1201
ioda::ObsData::string_database_
ObsSpaceContainer< std::string > string_database_
Definition: ObsData.h:258
ioda::ObsData::kd_
std::shared_ptr< KDTree > kd_
KD Tree.
Definition: ObsData.h:214
ioda::ObsData::genDistList
void genDistList(const eckit::Configuration &conf, std::vector< float > &Lats, std::vector< float > &Lons, std::vector< util::DateTime > &Dtimes)
Definition: ObsData.cc:628
ioda::ObsDtype::String
@ String
ioda::ObsData::obs_sort_variable_
std::string obs_sort_variable_
Variable that location group sorting is based upon.
Definition: ObsData.h:271
ioda::ObsSpaceContainer
Obs container class for IODA.
Definition: src/core/ObsSpaceContainer.h:64
ioda::ObsVector
ObsVector class to handle vectors in observation space for IODA.
Definition: src/ObsVector.h:34
ioda::ObsData::filein_
std::string filein_
path to input file
Definition: ObsData.h:235
ioda::ObsData::recidx_all_recnums
std::vector< std::size_t > recidx_all_recnums() const
Definition: ObsData.cc:460
ioda::FindMaxStringLength
std::size_t FindMaxStringLength(const std::vector< std::string > &StringVector)
Definition: IodaUtils.cc:106
ioda::ObsSpaceContainer::StoreToDb
void StoreToDb(const std::string &GroupName, const std::string &VarName, const std::vector< std::size_t > &VarShape, const std::vector< ContType > &VarData, const bool Append=false)
store data into the obs container
Definition: src/core/ObsSpaceContainer.h:268
ioda::ObsData::getKDTree
KDTree & getKDTree()
Definition: ObsData.cc:1241
ioda::ObsSpaceContainer::var_iter_shape
std::vector< std::size_t > var_iter_shape(VarIter)
Definition: src/core/ObsSpaceContainer.h:536
ioda::ObsGroupingMap::insert
void insert(const KeyType Key, const std::size_t Val)
Definition: ObsData.h:50
ioda::ObsData::ApplyIndex
std::vector< VarType > ApplyIndex(const std::vector< VarType > &FullData, const std::vector< std::size_t > &FullShape, const std::vector< std::size_t > &Index, std::vector< std::size_t > &IndexedShape) const
Definition: ObsData.cc:1155
ioda::ObsSpaceContainer::var_iter_begin
VarIter var_iter_begin()
Definition: src/core/ObsSpaceContainer.h:377
ioda::ObsData::recidx_vector
const std::vector< std::size_t > & recidx_vector(const RecIdxIter &Irec) const
Definition: ObsData.cc:435
ioda::ObsSpaceContainer::var_iter_end
VarIter var_iter_end()
Definition: src/core/ObsSpaceContainer.h:390
ioda::ObsData::obs_sort_order
std::string obs_sort_order() const
Definition: ObsData.cc:327
ioda::ObsSpaceContainer::var_iter_gname
std::string var_iter_gname(VarIter)
Definition: src/core/ObsSpaceContainer.h:494
ioda::ObsSpaceContainer::var_iter_vname
std::string var_iter_vname(VarIter)
Definition: src/core/ObsSpaceContainer.h:480
ioda::ObsData::get_db
void get_db(const std::string &group, const std::string &name, std::vector< int > &vdata) const
transfer data from the obs container to vdata
Definition: ObsData.cc:191
ioda::ObsData::in_max_frame_size_
std::size_t in_max_frame_size_
max frame size for input file
Definition: ObsData.h:241
ioda::IodaIO::FrameStringIter
FrameDataMap< std::string >::FrameStoreIter FrameStringIter
Definition: src/io/IodaIO.h:341
ioda::ObsData::nlocs
std::size_t nlocs() const
Definition: ObsData.cc:351
ioda::ObsData::file_excess_dims_
bool file_excess_dims_
flag, file has variables with excess dimensions
Definition: ObsData.h:232
ioda::ObsData::int_obs_grouping_
ObsGroupingMap< int > int_obs_grouping_
maps for obs grouping via integer, float or string values
Definition: ObsData.h:280
ioda::ObsSpaceContainer::has
bool has(const std::string &group, const std::string &variable) const
Definition: src/core/ObsSpaceContainer.h:553
ioda::ObsData::obs_sort_var
std::string obs_sort_var() const
Definition: ObsData.cc:318
ioda::ObsData::recidx_has
bool recidx_has(const std::size_t RecNum) const
Definition: ObsData.cc:416
ioda::ObsData::nlocs_
std::size_t nlocs_
number of locations on this domain
Definition: ObsData.h:220
ioda::ObsData::RecIdxIter
RecIdxMap::const_iterator RecIdxIter
Definition: ObsData.h:90
ioda::IodaIO::FrameFloatIter
FrameDataMap< float >::FrameStoreIter FrameFloatIter
Definition: src/io/IodaIO.h:316
ioda::ObsData::obs_sort_order_
std::string obs_sort_order_
Sort order for obs grouping.
Definition: ObsData.h:274
ioda::ObsDtype
ObsDtype
Definition: ObsData.h:63
ioda::ObsData::next_rec_num_
std::size_t next_rec_num_
next available record number
Definition: ObsData.h:285
ioda::ObsData::int_database_
ObsSpaceContainer< int > int_database_
Multi-index containers.
Definition: ObsData.h:256
ioda::ObsData::KDTree
eckit::KDTreeMemory< TreeTrait > KDTree
Definition: ObsData.h:95
ioda::ObsData::obs_group_var
std::string obs_group_var() const
Definition: ObsData.cc:309
ioda::ObsData::file_unexpected_dtypes_
bool file_unexpected_dtypes_
flag, file has variables with unexpected data types
Definition: ObsData.h:229
ioda::ObsData::out_max_frame_size_
std::size_t out_max_frame_size_
max frame size for output file
Definition: ObsData.h:244
ioda::ObsData::GenFrameIndexRecNums
std::vector< std::size_t > GenFrameIndexRecNums(const std::unique_ptr< IodaIO > &FileIO, const std::size_t FrameStart, const std::size_t FrameSize)
Definition: ObsData.cc:799
ioda::ObsData::print
void print(std::ostream &) const
Definition: ObsData.cc:663
ioda::ObsData::createKDTree
void createKDTree()
Definition: ObsData.cc:1209
ioda::ObsData::nvars_
std::size_t nvars_
number of variables
Definition: ObsData.h:223
ioda::ObsDtype::None
@ None
ioda::ObsData::InsideTimingWindow
bool InsideTimingWindow(const util::DateTime &ObsDt)
Definition: ObsData.cc:938
ioda::ObsData::recidx_
RecIdxMap recidx_
profile ordering
Definition: ObsData.h:253
ioda::ObsData::windowStart
const util::DateTime & windowStart() const
Definition: ObsData.h:156
ioda::ObsData::unique_rec_nums_
std::set< std::size_t > unique_rec_nums_
unique record numbers
Definition: ObsData.h:288
ioda::ObsData::genDistRandom
void genDistRandom(const eckit::Configuration &conf, std::vector< float > &Lats, std::vector< float > &Lons, std::vector< util::DateTime > &Dtimes)
Definition: ObsData.cc:531
ioda::ObsData::fileout_
std::string fileout_
path to output file
Definition: ObsData.h:238
ioda::ObsData::obsname_
std::string obsname_
name of obs space
Definition: ObsData.h:199
ioda::ObsData::float_database_
ObsSpaceContainer< float > float_database_
Definition: ObsData.h:257
ioda::ObsData::index
const std::vector< std::size_t > & index() const
Definition: ObsData.cc:389
ioda::ObsData::commMPI_
const eckit::mpi::Comm & commMPI_
MPI communicator.
Definition: ObsData.h:211
ioda::ObsData::nrecs_
std::size_t nrecs_
number of records
Definition: ObsData.h:226
ioda::ObsData::DesiredVarType
static std::string DesiredVarType(std::string &GroupName, std::string &FileVarType)
Definition: ObsData.cc:1180
ioda::ObsData::obsvars_
oops::Variables obsvars_
Observation "variables" to be simulated.
Definition: ObsData.h:262
ioda::ObsData::SaveToFile
void SaveToFile(const std::string &file_name, const std::size_t MaxFrameSize)
Definition: ObsData.cc:1004
ioda::ObsData::ObsData
ObsData(const eckit::Configuration &, const eckit::mpi::Comm &, const util::DateTime &, const util::DateTime &, const eckit::mpi::Comm &)
Definition: ObsData.cc:51
ioda::ObsDtype::Float
@ Float
ioda::ObsData::gnlocs_
std::size_t gnlocs_
total number of locations
Definition: ObsData.h:217