IODA
ObsSpace.cc
Go to the documentation of this file.
1 /*
2  * (C) Copyright 2017-2021 UCAR
3  *
4  * This software is licensed under the terms of the Apache Licence Version 2.0
5  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6  */
7 
8 #include "ioda/ObsSpace.h"
9 
10 #include <algorithm>
11 #include <cmath>
12 #include <fstream>
13 #include <iomanip>
14 #include <map>
15 #include <memory>
16 #include <set>
17 #include <string>
18 #include <utility>
19 #include <vector>
20 
21 #include "eckit/config/Configuration.h"
22 #include "eckit/exception/Exceptions.h"
23 
24 #include "oops/mpi/mpi.h"
25 #include "oops/util/abor1_cpp.h"
26 #include "oops/util/DateTime.h"
27 #include "oops/util/Duration.h"
28 #include "oops/util/Logger.h"
29 #include "oops/util/missingValues.h"
30 #include "oops/util/Random.h"
31 #include "oops/util/stringFunctions.h"
32 
33 #include "ioda/distribution/Accumulator.h"
34 #include "ioda/distribution/DistributionFactory.h"
35 #include "ioda/distribution/DistributionUtils.h"
36 #include "ioda/distribution/PairOfDistributions.h"
37 #include "ioda/Engines/HH.h"
38 #include "ioda/io/ObsFrameRead.h"
39 #include "ioda/io/ObsFrameWrite.h"
41 
42 namespace ioda {
43 
44 namespace {
45 
46 // If the variable name \p name ends with an underscore followed by a number (potentially a channel
47 // number), split it at that underscore, store the two parts in \p nameWithoutChannelSuffix and
48 // \p channel, and return true. Otherwise return false.
49 bool extractChannelSuffixIfPresent(const std::string &name,
50  std::string &nameWithoutChannelSuffix, int &channel) {
51  const std::string::size_type lastUnderscore = name.find_last_of('_');
52  if (lastUnderscore != std::string::npos &&
53  name.find_first_not_of("0123456789", lastUnderscore + 1) == std::string::npos) {
54  // The variable name has a numeric suffix.
55  channel = std::stoi(name.substr(lastUnderscore + 1));
56  nameWithoutChannelSuffix = name.substr(0, lastUnderscore);
57  return true;
58  }
59  return false;
60 }
61 
62 } // namespace
63 
64 // ----------------------------- public functions ------------------------------
65 // -----------------------------------------------------------------------------
67  // The following code needs to stay in sync with the ObsDimensionId enum object.
68  // The entries are the standard dimension names according to the unified naming convention.
69  std::string dimName = "nlocs";
73 
74  dimName = "nchans";
78 }
79 
80 ObsDimensionId ObsDimInfo::get_dim_id(const std::string & dimName) const {
81  return dim_name_id_.at(dimName);
82 }
83 
84 std::string ObsDimInfo::get_dim_name(const ObsDimensionId dimId) const {
85  return dim_id_name_.at(dimId);
86 }
87 
88 std::size_t ObsDimInfo::get_dim_size(const ObsDimensionId dimId) const {
89  return dim_id_size_.at(dimId);
90 }
91 
92 void ObsDimInfo::set_dim_size(const ObsDimensionId dimId, std::size_t dimSize) {
93  dim_id_size_.at(dimId) = dimSize;
94 }
95 
96 // -----------------------------------------------------------------------------
97 /*!
98  * \details Config based constructor for an ObsSpace object. This constructor will read
99  * in from the obs file and transfer the variables into the obs container. Obs
100  * falling outside the DA timing window, specified by bgn and end, will be
101  * discarded before storing them in the obs container.
102  *
103  * \param[in] config ECKIT configuration segment holding obs types specs
104  * \param[in] comm MPI communicator containing all processes that hold the observations for a
105  * given time slot or sub-window.
106  * \param[in] bgn DateTime object holding the start of the DA timing window
107  * \param[in] end DateTime object holding the end of the DA timing window
108  * \param[in] time MPI communicator across time so that the 2D array of processes represented by
109  * the product of the comm and time communicators hold all observations in the
110  * ObsSpace.
111  */
112 ObsSpace::ObsSpace(const eckit::Configuration & config, const eckit::mpi::Comm & comm,
113  const util::DateTime & bgn, const util::DateTime & end,
114  const eckit::mpi::Comm & timeComm)
115  : oops::ObsSpaceBase(config, comm, bgn, end),
116  config_(config), winbgn_(bgn), winend_(end), commMPI_(comm),
117  gnlocs_(0), nrecs_(0), obsvars_(),
118  obs_group_(), obs_params_(bgn, end, comm, timeComm)
119 {
120  oops::Log::trace() << "ObsSpace::ObsSpace config = " << config << std::endl;
121 
122  // transfer the input config to the ObsSpaceParameters object data member
123  obs_params_.deserialize(config);
124 
127  oops::Log::info() << this->obsname() << " vars: " << obsvars_ << std::endl;
128 
129  // Open the source (ObsFrame) of the data for initializing the obs_group_ (ObsGroup)
130  ObsFrameRead obsFrame(obs_params_);
131 
132  // Retrieve the MPI distribution object
133  dist_ = obsFrame.distribution();
134 
135  createObsGroupFromObsFrame(obsFrame);
136  initFromObsSource(obsFrame);
137 
138  // After walking through all the frames, gnlocs_ and gnlocs_outside_timewindow_
139  // are set representing the entire file. This is because they are calculated
140  // before doing the MPI distribution.
141  gnlocs_ = obsFrame.globalNumLocs();
143 
144  if (this->obs_sort_var() != "") {
146  recidx_is_sorted_ = true;
147  } else {
148  // Fill the recidx_ map with indices that represent each group, but are not
149  // sorted. This is done so the recidx_ structure can be used to walk
150  // through the individual groups. For example, this can be used to calculate
151  // RMS values for each group.
153  recidx_is_sorted_ = false;
154  }
155 
157 
158  if (obs_params_.top_level_.obsExtend.value() != boost::none) {
160  }
161 
162  oops::Log::debug() << obsname() << ": " << globalNumLocsOutsideTimeWindow()
163  << " observations are outside of time window out of "
165  << std::endl;
166 
167  oops::Log::trace() << "ObsSpace::ObsSpace constructed name = " << obsname() << std::endl;
168 }
169 
170 // -----------------------------------------------------------------------------
172  if (obs_params_.top_level_.obsOutFile.value() != boost::none) {
173  std::string fileName = obs_params_.top_level_.obsOutFile.value()->fileName;
174  oops::Log::info() << obsname() << ": save database to " << fileName << std::endl;
175  saveToFile();
176  // Call the mpi barrier command here to force all processes to wait until
177  // all processes have finished writing their files. This is done to prevent
178  // the early processes continuing and potentially executing their obs space
179  // destructor before others finish writing. This situation is known to have
180  // issues with hdf file handles getting deallocated before some of the MPI
181  // processes are finished with them.
182  this->comm().barrier();
183  } else {
184  oops::Log::info() << obsname() << " : no output" << std::endl;
185  }
186 }
187 
188 // -----------------------------------------------------------------------------
189 std::size_t ObsSpace::nvars() const {
190  // Nvars is the number of variables in the ObsValue group. By querying
191  // the ObsValue group, nvars will keep track of new variables that are added
192  // during a run.
193  // Some of the generators, upon construction, do not create variables in ObsValue
194  // since the MakeObs function will do that. In this case, they instead create
195  // error estimates in ObsError with the expectation that ObsValue will be filled
196  // in later. So upon construction, nvars will be the number of variables in ObsError
197  // instead of ObsValue.
198  // Because of the generator case above, query ObsValue first and if ObsValue doesn't
199  // exist query ObsError.
200  std::size_t numVars = 0;
201  if (obs_group_.exists("ObsValue")) {
202  numVars = obs_group_.open("ObsValue").vars.list().size();
203  } else if (obs_group_.exists("ObsError")) {
204  numVars = obs_group_.open("ObsError").vars.list().size();
205  }
206  return numVars;
207 }
208 
209 // -----------------------------------------------------------------------------
210 const std::vector<std::string> & ObsSpace::obs_group_vars() const {
211  return obs_params_.top_level_.obsIoInParameters().obsGrouping.value().obsGroupVars;
212 }
213 
214 // -----------------------------------------------------------------------------
215 std::string ObsSpace::obs_sort_var() const {
216  return obs_params_.top_level_.obsIoInParameters().obsGrouping.value().obsSortVar;
217 }
218 
219 // -----------------------------------------------------------------------------
220 std::string ObsSpace::obs_sort_order() const {
221  return obs_params_.top_level_.obsIoInParameters().obsGrouping.value().obsSortOrder;
222 }
223 
224 // -----------------------------------------------------------------------------
225 /*!
226  * \details This method checks for the existence of the group, name combination
227  * in the obs container. If the combination exists, "true" is returned,
228  * otherwise "false" is returned.
229  */
230 bool ObsSpace::has(const std::string & group, const std::string & name) const {
231  // For backward compatibility, recognize and handle appropriately variable names with
232  // channel suffixes.
233  std::string nameToUse;
234  std::vector<int> chanSelectToUse;
235  splitChanSuffix(group, name, { }, nameToUse, chanSelectToUse);
236  return obs_group_.vars.exists(fullVarName(group, nameToUse));
237 }
238 
239 // -----------------------------------------------------------------------------
240 ObsDtype ObsSpace::dtype(const std::string & group, const std::string & name) const {
241  // For backward compatibility, recognize and handle appropriately variable names with
242  // channel suffixes.
243  std::string nameToUse;
244  std::vector<int> chanSelectToUse;
245  splitChanSuffix(group, name, { }, nameToUse, chanSelectToUse);
246 
247  // Set the type to None if there is no type from the backend
248  ObsDtype VarType = ObsDtype::None;
249  if (has(group, nameToUse)) {
250  Variable var = obs_group_.vars.open(fullVarName(group, nameToUse));
251  if (var.isA<int>()) {
252  VarType = ObsDtype::Integer;
253  } else if (var.isA<float>()) {
254  VarType = ObsDtype::Float;
255  } else if (var.isA<std::string>()) {
256  if ((group == "MetaData") && (nameToUse == "datetime")) {
257  // TODO(srh) Workaround to cover when datetime was stored
258  // as a util::DateTime object. For now, ioda will store datetimes
259  // as strings and convert to DateTime objects for client access.
260  VarType = ObsDtype::DateTime;
261  } else {
262  VarType = ObsDtype::String;
263  }
264  }
265  }
266  return VarType;
267 }
268 
269 // -----------------------------------------------------------------------------
270 void ObsSpace::get_db(const std::string & group, const std::string & name,
271  std::vector<int> & vdata,
272  const std::vector<int> & chanSelect) const {
273  loadVar<int>(group, name, chanSelect, vdata);
274 }
275 
276 void ObsSpace::get_db(const std::string & group, const std::string & name,
277  std::vector<float> & vdata,
278  const std::vector<int> & chanSelect) const {
279  loadVar<float>(group, name, chanSelect, vdata);
280 }
281 
282 void ObsSpace::get_db(const std::string & group, const std::string & name,
283  std::vector<double> & vdata,
284  const std::vector<int> & chanSelect) const {
285  // load the float values from the database and convert to double
286  std::vector<float> floatData;
287  loadVar<float>(group, name, chanSelect, floatData);
288  ConvertVarType<float, double>(floatData, vdata);
289 }
290 
291 void ObsSpace::get_db(const std::string & group, const std::string & name,
292  std::vector<std::string> & vdata,
293  const std::vector<int> & chanSelect) const {
294  loadVar<std::string>(group, name, chanSelect, vdata);
295 }
296 
297 void ObsSpace::get_db(const std::string & group, const std::string & name,
298  std::vector<util::DateTime> & vdata,
299  const std::vector<int> & chanSelect) const {
300  std::vector<std::string> dtStrings;
301  loadVar<std::string>(group, name, chanSelect, dtStrings);
302  vdata = convertDtStringsToDtime(dtStrings);
303 }
304 
305 // -----------------------------------------------------------------------------
306 void ObsSpace::put_db(const std::string & group, const std::string & name,
307  const std::vector<int> & vdata,
308  const std::vector<std::string> & dimList) {
309  saveVar(group, name, vdata, dimList);
310 }
311 
312 void ObsSpace::put_db(const std::string & group, const std::string & name,
313  const std::vector<float> & vdata,
314  const std::vector<std::string> & dimList) {
315  saveVar(group, name, vdata, dimList);
316 }
317 
318 void ObsSpace::put_db(const std::string & group, const std::string & name,
319  const std::vector<double> & vdata,
320  const std::vector<std::string> & dimList) {
321  // convert to float, then save to the database
322  std::vector<float> floatData;
323  ConvertVarType<double, float>(vdata, floatData);
324  saveVar(group, name, floatData, dimList);
325 }
326 
327 void ObsSpace::put_db(const std::string & group, const std::string & name,
328  const std::vector<std::string> & vdata,
329  const std::vector<std::string> & dimList) {
330  saveVar(group, name, vdata, dimList);
331 }
332 
333 void ObsSpace::put_db(const std::string & group, const std::string & name,
334  const std::vector<util::DateTime> & vdata,
335  const std::vector<std::string> & dimList) {
336  std::vector<std::string> dtStrings(vdata.size(), "");
337  for (std::size_t i = 0; i < vdata.size(); ++i) {
338  dtStrings[i] = vdata[i].toString();
339  }
340  saveVar(group, name, dtStrings, dimList);
341 }
342 
343 // -----------------------------------------------------------------------------
345  return recidx_.begin();
346 }
347 
348 // -----------------------------------------------------------------------------
350  return recidx_.end();
351 }
352 
353 // -----------------------------------------------------------------------------
354 bool ObsSpace::recidx_has(const std::size_t recNum) const {
355  RecIdxIter irec = recidx_.find(recNum);
356  return (irec != recidx_.end());
357 }
358 
359 // -----------------------------------------------------------------------------
360 std::size_t ObsSpace::recidx_recnum(const RecIdxIter & irec) const {
361  return irec->first;
362 }
363 
364 // -----------------------------------------------------------------------------
365 const std::vector<std::size_t> & ObsSpace::recidx_vector(const RecIdxIter & irec) const {
366  return irec->second;
367 }
368 
369 // -----------------------------------------------------------------------------
370 const std::vector<std::size_t> & ObsSpace::recidx_vector(const std::size_t recNum) const {
371  RecIdxIter Irec = recidx_.find(recNum);
372  if (Irec == recidx_.end()) {
373  std::string ErrMsg =
374  "ObsSpace::recidx_vector: Record number, " + std::to_string(recNum) +
375  ", does not exist in record index map.";
376  ABORT(ErrMsg);
377  }
378  return Irec->second;
379 }
380 
381 // -----------------------------------------------------------------------------
382 std::vector<std::size_t> ObsSpace::recidx_all_recnums() const {
383  std::vector<std::size_t> RecNums(nrecs_);
384  std::size_t recnum = 0;
385  for (RecIdxIter Irec = recidx_.begin(); Irec != recidx_.end(); ++Irec) {
386  RecNums[recnum] = Irec->first;
387  recnum++;
388  }
389  return RecNums;
390 }
391 
392 // ----------------------------- private functions -----------------------------
393 /*!
394  * \details This method provides a way to print an ObsSpace object in an output
395  * stream.
396  */
397 void ObsSpace::print(std::ostream & os) const {
398  std::size_t totalNlocs = this->globalNumLocs();
399  std::size_t nvars = this->obsvariables().size();
400  std::size_t nobs = totalNlocs * nvars;
401 
402  os << obsname() << ": nlocs: " << totalNlocs
403  << ", nvars: " << nvars << ", nobs: " << nobs;
404 }
405 
406 // -----------------------------------------------------------------------------
408  // Determine the maximum frame size
409  Dimensions_t maxFrameSize = obs_params_.top_level_.obsIoInParameters().maxFrameSize;
410 
411  // Create the dimension specs for obs_group_
413  for (auto & dimNameObject : obsFrame.ioDimVarList()) {
414  std::string dimName = dimNameObject.first;
415  Variable srcDimVar = dimNameObject.second;
416  Dimensions_t dimSize = srcDimVar.getDimensions().dimsCur[0];
417  Dimensions_t maxDimSize = dimSize;
418  Dimensions_t chunkSize = dimSize;
419 
420  // If the dimension is nlocs, we want to avoid allocating the file's entire
421  // size because we could be taking a subset of the locations (MPI distribution,
422  // removal of obs outside the DA window).
423  //
424  // Make nlocs unlimited size, and start with its size limited by the
425  // max_frame_size parameter.
426  if (dimName == dim_info_.get_dim_name(ObsDimensionId::Nlocs)) {
427  if (dimSize > maxFrameSize) {
428  dimSize = maxFrameSize;
429  }
430  maxDimSize = Unlimited;
431  chunkSize = dimSize;
432  }
433 
434  if (srcDimVar.isA<int>()) {
435  newDims.push_back(ioda::NewDimensionScale<int>(
436  dimName, dimSize, maxDimSize, chunkSize));
437  } else if (srcDimVar.isA<float>()) {
438  newDims.push_back(ioda::NewDimensionScale<float>(
439  dimName, dimSize, maxDimSize, chunkSize));
440  }
441  }
442 
443  // Create the backend for obs_group_
444  Engines::BackendNames backendName = Engines::BackendNames::ObsStore; // Hdf5Mem; ObsStore;
446  // These parameters only matter if Hdf5Mem is the engine selected. ObsStore ignores.
449  backendParams.fileName = ioda::Engines::HH::genUniqueName();
450  backendParams.allocBytes = 1024*1024*50;
451  backendParams.flush = false;
452  Group backend = constructBackend(backendName, backendParams);
453 
454  // Create the ObsGroup and attach the backend.
456 
457  // fill in dimension coordinate values
458  for (auto & dimNameObject : obsFrame.ioDimVarList()) {
459  std::string dimName = dimNameObject.first;
460  Variable srcDimVar = dimNameObject.second;
461  Variable destDimVar = obs_group_.vars.open(dimName);
462 
463  // Set up the dimension selection objects. The prior loop declared the
464  // sizes of all the dimensions in the frame so use that as a guide, and
465  // transfer the first frame's worth of coordinate values accordingly.
466  std::vector<Dimensions_t> srcDimShape = srcDimVar.getDimensions().dimsCur;
467  std::vector<Dimensions_t> destDimShape = destDimVar.getDimensions().dimsCur;
468 
469  std::vector<Dimensions_t> counts = destDimShape;
470  std::vector<Dimensions_t> starts(counts.size(), 0);
471 
472  Selection srcSelect;
473  srcSelect.extent(srcDimShape).select({SelectionOperator::SET, starts, counts });
474  Selection memSelect;
475  memSelect.extent(destDimShape).select({SelectionOperator::SET, starts, counts });
476  Selection destSelect;
477  destSelect.extent(destDimShape).select({SelectionOperator::SET, starts, counts });
478 
479  if (srcDimVar.isA<int>()) {
480  std::vector<int> dimCoords;
481  srcDimVar.read<int>(dimCoords, memSelect, srcSelect);
482  destDimVar.write<int>(dimCoords, memSelect, destSelect);
483  } else if (srcDimVar.isA<float>()) {
484  std::vector<float> dimCoords;
485  srcDimVar.read<float>(dimCoords, memSelect, srcSelect);
486  destDimVar.write<float>(dimCoords, memSelect, destSelect);
487  }
488  }
489 }
490 
491 // -----------------------------------------------------------------------------
492 template<typename VarType>
494  const std::string & varName, std::vector<VarType> & varValues) {
495  Variable sourceVar = obsFrame.vars().open(varName);
496 
497  // Read the variable
498  bool gotVarData = obsFrame.readFrameVar(varName, varValues);
499 
500  // Replace source fill values with corresponding missing marks
501  if ((gotVarData) && (sourceVar.hasFillValue())) {
502  VarType sourceFillValue;
503  detail::FillValueData_t sourceFvData = sourceVar.getFillValue();
504  sourceFillValue = detail::getFillValue<VarType>(sourceFvData);
505  VarType varFillValue = this->getFillValue<VarType>();
506  for (std::size_t i = 0; i < varValues.size(); ++i) {
507  if ((varValues[i] == sourceFillValue) || std::isinf(varValues[i])
508  || std::isnan(varValues[i])) {
509  varValues[i] = varFillValue;
510  }
511  }
512  }
513  return gotVarData;
514 }
515 
516 template<>
518  const std::string & varName, std::vector<std::string> & varValues) {
519  Variable sourceVar = obsFrame.vars().open(varName);
520 
521  // Read the variable
522  bool gotVarData = obsFrame.readFrameVar(varName, varValues);
523 
524  // Replace source fill values with corresponding missing marks
525  if ((gotVarData) && (sourceVar.hasFillValue())) {
526  std::string sourceFillValue;
527  detail::FillValueData_t sourceFvData = sourceVar.getFillValue();
528  sourceFillValue = detail::getFillValue<std::string>(sourceFvData);
529  std::string varFillValue = this->getFillValue<std::string>();
530  for (std::size_t i = 0; i < varValues.size(); ++i) {
531  if (varValues[i] == sourceFillValue) {
532  varValues[i] = varFillValue;
533  }
534  }
535  }
536  return gotVarData;
537 }
538 
539 // -----------------------------------------------------------------------------
541  // Walk through the frames and copy the data to the obs_group_ storage
542  dims_attached_to_vars_ = obsFrame.ioVarDimMap();
543 
544  // Create variables in obs_group_ based on those in the obs source
546 
548  int iframe = 1;
549  for (obsFrame.frameInit(); obsFrame.frameAvailable(); obsFrame.frameNext()) {
550  Dimensions_t frameStart = obsFrame.frameStart();
551 
552  // Resize the nlocs dimesion according to the adjusted frame size produced
553  // genFrameIndexRecNums. The second argument is to tell resizeNlocs whether
554  // to append or reset to the size given by the first arguemnt.
555  resizeNlocs(obsFrame.adjNlocsFrameCount(), (iframe > 1));
556 
557  // Clear out the selection caches
558  known_fe_selections_.clear();
559  known_be_selections_.clear();
560 
561  for (auto & varNameObject : obsFrame.ioVarList()) {
562  std::string varName = varNameObject.first;
563  Variable var = varNameObject.second;
564  Dimensions_t beFrameStart;
565  if (obsFrame.ioIsVarDimByNlocs(varName)) {
566  beFrameStart = obsFrame.adjNlocsFrameStart();
567  } else {
568  beFrameStart = frameStart;
569  }
570  Dimensions_t frameCount = obsFrame.frameCount(varName);
571 
572  // Transfer the variable to the in-memory storage
573  if (var.isA<int>()) {
574  std::vector<int> varValues;
575  if (readObsSource<int>(obsFrame, varName, varValues)) {
576  storeVar<int>(varName, varValues, beFrameStart, frameCount);
577  }
578  } else if (var.isA<float>()) {
579  std::vector<float> varValues;
580  if (readObsSource<float>(obsFrame, varName, varValues)) {
581  storeVar<float>(varName, varValues, beFrameStart, frameCount);
582  }
583  } else if (var.isA<std::string>()) {
584  std::vector<std::string> varValues;
585  if (readObsSource<std::string>(obsFrame, varName, varValues)) {
586  storeVar<std::string>(varName, varValues, beFrameStart, frameCount);
587  }
588  }
589  }
590  iframe++;
591  }
592 
593  // Record locations and channels dimension sizes
594  // The HDF library has an issue when a dimension marked UNLIMITED is queried for its
595  // size a zero is returned instead of the proper current size. As a workaround for this
596  // ask the frame how many locations it kept instead of asking the nlocs dimension for
597  // its size.
598  std::string nlocsName = dim_info_.get_dim_name(ObsDimensionId::Nlocs);
599  std::size_t nLocs = obsFrame.frameNumLocs();
601 
602  std::string nchansName = dim_info_.get_dim_name(ObsDimensionId::Nchans);
603  if (obs_group_.vars.exists(nchansName)) {
604  std::size_t nChans = obs_group_.vars.open(nchansName).getDimensions().dimsCur[0];
606  }
607 
608  // Record record information
609  nrecs_ = obsFrame.frameNumRecs();
610  indx_ = obsFrame.index();
611  recnums_ = obsFrame.recnums();
612 
613  // TODO(SRH) Eliminate this temporary fix. Some files do not have ISO 8601 date time
614  // strings. Instead they have a reference date time and time offset. If there is no
615  // datetime variable in the obs_group_ after reading in the file, then create one
616  // from the reference/offset time values.
617  std::string dtVarName = fullVarName("MetaData", "datetime");
618  if (!obs_group_.vars.exists(dtVarName)) {
619  int refDtime;
620  obsFrame.atts().open("date_time").read<int>(refDtime);
621 
622  std::vector<float> timeOffset;
623  Variable timeVar = obs_group_.vars.open(fullVarName("MetaData", "time"));
624  timeVar.read<float>(timeOffset);
625 
626  std::vector<util::DateTime> dtVals = convertRefOffsetToDtime(refDtime, timeOffset);
627  std::vector<std::string> dtStrings(dtVals.size(), "");
628  for (std::size_t i = 0; i < dtVals.size(); ++i) {
629  dtStrings[i] = dtVals[i].toString();
630  }
631 
633  params.chunk = true;
634  params.compressWithGZIP();
635  params.setFillValue<std::string>(this->getFillValue<std::string>());
636  std::vector<Variable>
639  .createWithScales<std::string>(dtVarName, dimVars, params)
640  .write<std::string>(dtStrings);
641  }
642 }
643 
644 // -----------------------------------------------------------------------------
645 void ObsSpace::resizeNlocs(const Dimensions_t nlocsSize, const bool append) {
647  Dimensions_t nlocsResize;
648  if (append) {
649  nlocsResize = nlocsVar.getDimensions().dimsCur[0] + nlocsSize;
650  } else {
651  nlocsResize = nlocsSize;
652  }
654  { std::pair<Variable, Dimensions_t>(nlocsVar, nlocsResize) });
655 }
656 
657 // -----------------------------------------------------------------------------
658 
659 template<typename VarType>
660 void ObsSpace::loadVar(const std::string & group, const std::string & name,
661  const std::vector<int> & chanSelect,
662  std::vector<VarType> & varValues) const {
663  // For backward compatibility, recognize and handle appropriately variable names with
664  // channel suffixes.
665  std::string nameToUse;
666  std::vector<int> chanSelectToUse;
667  splitChanSuffix(group, name, chanSelect, nameToUse, chanSelectToUse);
668 
669  Variable var = obs_group_.vars.open(fullVarName(group, nameToUse));
670  std::string nchansVarName = this->get_dim_name(ObsDimensionId::Nchans);
671 
672  // In the following code, assume that if a variable has channels, the
673  // nchans dimension will be the second dimension.
674  if (obs_group_.vars.exists(nchansVarName)) {
675  Variable nchansVar = obs_group_.vars.open(nchansVarName);
676  if (var.getDimensions().dimensionality > 1) {
677  if (var.isDimensionScaleAttached(1, nchansVar) && (chanSelectToUse.size() > 0)) {
678  // This variable has nchans as the second dimension, and channel
679  // selection has been specified. Build selection objects based on the
680  // channel numbers. For now, select all locations (first dimension).
681  const std::size_t nchansDimIndex = 1;
682  Selection memSelect;
683  Selection obsGroupSelect;
684  const std::size_t numElements = createChannelSelections(
685  var, nchansDimIndex, chanSelectToUse, memSelect, obsGroupSelect);
686 
687  var.read<VarType>(varValues, memSelect, obsGroupSelect);
688  varValues.resize(numElements);
689  } else {
690  // Not a radiance variable, just read in the whole variable
691  var.read<VarType>(varValues);
692  }
693  } else {
694  // Not a radiance variable, just read in the whole variable
695  var.read<VarType>(varValues);
696  }
697  } else {
698  // Not a radiance variable, just read in the whole variable
699  var.read<VarType>(varValues);
700  }
701 }
702 
703 // -----------------------------------------------------------------------------
704 
705 template<typename VarType>
706 void ObsSpace::saveVar(const std::string & group, std::string name,
707  const std::vector<VarType> & varValues,
708  const std::vector<std::string> & dimList) {
709  // For backward compatibility, recognize and handle appropriately variable names with
710  // channel suffixes.
711 
712  std::vector<int> channels;
713 
714  const std::string nchansVarName = this->get_dim_name(ObsDimensionId::Nchans);
715  if (group != "MetaData" && obs_group_.vars.exists(nchansVarName)) {
716  // If the variable does not already exist and its name ends with an underscore followed by
717  // a number, interpret the latter as a channel number selecting a slice of the "nchans"
718  // dimension.
719  std::string nameToUse;
720  splitChanSuffix(group, name, {}, nameToUse, channels);
721  name = std::move(nameToUse);
722  }
723 
724  const std::string fullName = fullVarName(group, name);
725 
726  std::vector<std::string> dimListToUse = dimList;
727  if (!obs_group_.vars.exists(fullName) && !channels.empty()) {
728  // Append "channels" to the dimensions list if not already present.
729  const size_t nchansDimIndex =
730  std::find(dimListToUse.begin(), dimListToUse.end(), nchansVarName) -
731  dimListToUse.begin();
732  if (nchansDimIndex == dimListToUse.size())
733  dimListToUse.push_back(nchansVarName);
734  }
735  Variable var = openCreateVar<VarType>(fullName, dimListToUse);
736 
737  if (channels.empty()) {
738  var.write<VarType>(varValues);
739  } else {
740  // Find the index of the nchans dimension
741  Variable nchansVar = obs_group_.vars.open(nchansVarName);
742  std::vector<std::vector<Named_Variable>> dimScales =
743  var.getDimensionScaleMappings({Named_Variable(nchansVarName, nchansVar)});
744  size_t nchansDimIndex = std::find_if(dimScales.begin(), dimScales.end(),
745  [](const std::vector<Named_Variable> &x)
746  { return !x.empty(); }) - dimScales.begin();
747  if (nchansDimIndex == dimScales.size())
748  throw eckit::UserError("Variable " + fullName +
749  " is not indexed by channel numbers", Here());
750 
751  Selection memSelect;
752  Selection obsGroupSelect;
753  createChannelSelections(var, nchansDimIndex, channels,
754  memSelect, obsGroupSelect);
755  var.write<VarType>(varValues, memSelect, obsGroupSelect);
756  }
757 }
758 
759 // -----------------------------------------------------------------------------
760 
761 std::size_t ObsSpace::createChannelSelections(const Variable & variable,
762  std::size_t nchansDimIndex,
763  const std::vector<int> & channels,
764  Selection & memSelect,
765  Selection & obsGroupSelect) const {
766  // Create a vector with the channel indices corresponding to
767  // the channel numbers that have been requested.
768  std::vector<Dimensions_t> chanIndices;
769  chanIndices.reserve(channels.size());
770  for (std::size_t i = 0; i < channels.size(); ++i) {
771  auto ichan = chan_num_to_index_.find(channels[i]);
772  if (ichan != chan_num_to_index_.end()) {
773  chanIndices.push_back(ichan->second);
774  } else {
775  throw eckit::BadParameter("Selected channel number " +
776  std::to_string(channels[i]) + " does not exist.", Here());
777  }
778  }
779 
780  // Form index style selection for selecting channels
781  std::vector<Dimensions_t> varDims = variable.getDimensions().dimsCur;
782  std::vector<std::vector<Dimensions_t>> dimSelects(varDims.size());
783  Dimensions_t numElements = 1;
784  for (std::size_t i = 0; i < varDims.size(); ++i) {
785  if (i == nchansDimIndex) {
786  // channels are the second dimension
787  numElements *= chanIndices.size();
788  dimSelects[i] = chanIndices;
789  } else {
790  numElements *= varDims[i];
791  std::vector<Dimensions_t> allIndices(varDims[i]);
792  std::iota(allIndices.begin(), allIndices.end(), 0);
793  dimSelects[i] = allIndices;
794  }
795  }
796 
797  std::vector<Dimensions_t> memStarts(1, 0);
798  std::vector<Dimensions_t> memCounts(1, numElements);
799  memSelect.extent(memCounts)
800  .select({SelectionOperator::SET, memStarts, memCounts});
801 
802  obsGroupSelect.extent(varDims)
803  .select({SelectionOperator::SET, 0, dimSelects[0]});
804  for (std::size_t i = 1; i < dimSelects.size(); ++i) {
805  obsGroupSelect.select({SelectionOperator::AND, i, dimSelects[i]});
806  }
807 
808  return numElements;
809 }
810 
811 // -----------------------------------------------------------------------------
812 // This function is for transferring data from a memory buffer into the ObsSpace
813 // container. At this point, the time window filtering, obs grouping and MPI
814 // distribution has been applied to the input memory buffer (varValues). Also,
815 // the variable has been resized according to appending a new frame's worth of
816 // data to the existing variable in the ObsSpace container.
817 //
818 // What this means is that you can always transfer the data as a single contiguous
819 // block which can be accomplished with a single hyperslab selection. There should
820 // be no need to cache these selections because of this.
821 template<typename VarType>
822 void ObsSpace::storeVar(const std::string & varName, std::vector<VarType> & varValues,
823  const Dimensions_t frameStart, const Dimensions_t frameCount) {
824  // get the dimensions of the variable
825  Variable var = obs_group_.vars.open(varName);
826  std::vector<Dimensions_t> varDims = var.getDimensions().dimsCur;
827 
828  // check the caches for the selectors
829  std::vector<std::string> &dims = dims_attached_to_vars_.at(varName);
830  if (!known_fe_selections_.count(dims)) {
831  // backend starts at frameStart, and the count for the first dimension
832  // is the frame count
833  std::vector<Dimensions_t> beCounts = varDims;
834  beCounts[0] = frameCount;
835  std::vector<Dimensions_t> beStarts(beCounts.size(), 0);
836  beStarts[0] = frameStart;
837 
838  // front end always starts at zero, and the number of elements is equal to the
839  // product of the var dimensions (with the first dimension adjusted by
840  // the frame count)
841  std::vector<Dimensions_t> feCounts(1, std::accumulate(
842  beCounts.begin(), beCounts.end(), static_cast<Dimensions_t>(1),
843  std::multiplies<Dimensions_t>()));
844  std::vector<Dimensions_t> feStarts(1, 0);
845 
847  .extent(feCounts).select({ SelectionOperator::SET, feStarts, feCounts });
849  .extent(varDims).select({ SelectionOperator::SET, beStarts, beCounts });
850  }
851  Selection & feSelect = known_fe_selections_[dims];
852  Selection & beSelect = known_be_selections_[dims];
853 
854  var.write<VarType>(varValues, feSelect, beSelect);
855 }
856 
857 // -----------------------------------------------------------------------------
858 // NOTE(RH): Variable creation params rewritten so they are constructed once for each type.
859 // There is no need to keep re-creating the same objects over and over again.
860 // TODO(?): Rewrite slightly so that the scales are opened all at once and are kept
861 // open until the end of the function.
862 // TODO(?): Batch variable creation so that the collective function is used.
863 void ObsSpace::createVariables(const Has_Variables & srcVarContainer,
864  Has_Variables & destVarContainer,
865  const VarDimMap & dimsAttachedToVars) {
866  // Set up reusable creation parameters for the loop below. Use the JEDI missing
867  // values for the fill values.
868  VariableCreationParameters paramsInt = VariableCreationParameters::defaults<int>();
869  VariableCreationParameters paramsFloat = VariableCreationParameters::defaults<float>();
870  VariableCreationParameters paramsStr = VariableCreationParameters::defaults<std::string>();
871 
872  paramsInt.setFillValue<int>(this->getFillValue<int>());
873  paramsFloat.setFillValue<float>(this->getFillValue<float>());
874  paramsStr.setFillValue<std::string>(this->getFillValue<std::string>());
875 
876  // Walk through map to get list of variables to create along with
877  // their dimensions. Use the srcVarContainer to get the var data type.
878  for (auto & ivar : dimsAttachedToVars) {
879  std::string varName = ivar.first;
880  std::vector<std::string> varDimNames = ivar.second;
881 
882  // Create a vector with dimension scale vector from destination container
883  std::vector<Variable> varDims;
884  for (auto & dimVarName : varDimNames) {
885  varDims.push_back(destVarContainer.open(dimVarName));
886  }
887 
888  Variable srcVar = srcVarContainer.open(varName);
889  if (srcVar.isA<int>()) {
890  destVarContainer.createWithScales<int>(varName, varDims, paramsInt);
891  } else if (srcVar.isA<float>()) {
892  destVarContainer.createWithScales<float>(varName, varDims, paramsFloat);
893  } else if (srcVar.isA<std::string>()) {
894  destVarContainer.createWithScales<std::string>(varName, varDims, paramsStr);
895  } else {
896  if (this->comm().rank() == 0) {
897  oops::Log::warning() << "WARNING: ObsSpace::createVariables: "
898  << "Skipping variable due to an unexpected data type for variable: "
899  << varName << std::endl;
900  }
901  }
902  }
903 }
904 
905 // -----------------------------------------------------------------------------
907  // If there is a channels dimension, load up the channel number to index map
908  // for channel selection feature.
909  std::string nchansVarName = this->get_dim_name(ObsDimensionId::Nchans);
910  if (obs_group_.vars.exists(nchansVarName)) {
911  // Get the vector of channel numbers
912  Variable nchansVar = obs_group_.vars.open(nchansVarName);
913  std::vector<int> chanNumbers;
914  if (nchansVar.isA<int>()) {
915  nchansVar.read<int>(chanNumbers);
916  } else if (nchansVar.isA<float>()) {
917  std::vector<float> floatChanNumbers;
918  nchansVar.read<float>(floatChanNumbers);
919  ConvertVarType<float, int>(floatChanNumbers, chanNumbers);
920  }
921 
922  // Walk through the vector and place the number to index mapping into
923  // the map structure.
924  for (int i = 0; i < chanNumbers.size(); ++i) {
925  chan_num_to_index_[chanNumbers[i]] = i;
926  }
927  }
928 }
929 
930 // -----------------------------------------------------------------------------
931 void ObsSpace::splitChanSuffix(const std::string & group, const std::string & name,
932  const std::vector<int> & chanSelect, std::string & nameToUse,
933  std::vector<int> & chanSelectToUse) const {
934  nameToUse = name;
935  chanSelectToUse = chanSelect;
936  // For backward compatibility, recognize and handle appropriately variable names with
937  // channel suffixes.
938  if (chanSelect.empty() && !obs_group_.vars.exists(fullVarName(group, name))) {
939  int channelNumber;
940  if (extractChannelSuffixIfPresent(name, nameToUse, channelNumber))
941  chanSelectToUse = {channelNumber};
942  }
943 }
944 
945 // -----------------------------------------------------------------------------
947  typedef std::map<std::size_t, std::vector<std::pair<float, std::size_t>>> TmpRecIdxMap;
948  typedef TmpRecIdxMap::iterator TmpRecIdxIter;
949 
950  // Get the sort variable from the data store, and convert to a vector of floats.
951  std::size_t nLocs = this->nlocs();
952  std::vector<float> SortValues(nLocs);
953  if (this->obs_sort_var() == "datetime") {
954  std::vector<util::DateTime> Dates(nLocs);
955  get_db("MetaData", this->obs_sort_var(), Dates);
956  for (std::size_t iloc = 0; iloc < nLocs; iloc++) {
957  SortValues[iloc] = (Dates[iloc] - Dates[0]).toSeconds();
958  }
959  } else {
960  get_db("MetaData", this->obs_sort_var(), SortValues);
961  }
962 
963  // Construct a temporary structure to do the sorting, then transfer the results
964  // to the data member recidx_.
965  TmpRecIdxMap TmpRecIdx;
966  for (size_t iloc = 0; iloc < nLocs; iloc++) {
967  TmpRecIdx[recnums_[iloc]].push_back(std::make_pair(SortValues[iloc], iloc));
968  }
969 
970  for (TmpRecIdxIter irec = TmpRecIdx.begin(); irec != TmpRecIdx.end(); ++irec) {
971  if (this->obs_sort_order() == "ascending") {
972  sort(irec->second.begin(), irec->second.end());
973  } else {
974  // Use a lambda function to access the std::pair greater-than operator to
975  // implement a descending order sort, ensuring the associated indices remain
976  // in ascending order.
977  sort(irec->second.begin(), irec->second.end(),
978  [](const std::pair<float, std::size_t> & p1,
979  const std::pair<float, std::size_t> & p2){
980  return (p2.first < p1.first ||
981  (!(p1.first < p2.first) && p2.second > p1.second));});
982  }
983  }
984 
985  // Copy indexing to the recidx_ data member.
986  for (TmpRecIdxIter irec = TmpRecIdx.begin(); irec != TmpRecIdx.end(); ++irec) {
987  recidx_[irec->first].resize(irec->second.size());
988  for (std::size_t iloc = 0; iloc < irec->second.size(); iloc++) {
989  recidx_[irec->first][iloc] = irec->second[iloc].second;
990  }
991  }
992 }
993 
994 // -----------------------------------------------------------------------------
996  std::size_t nLocs = this->nlocs();
997  for (size_t iloc = 0; iloc < nLocs; iloc++) {
998  recidx_[recnums_[iloc]].push_back(iloc);
999  }
1000 }
1001 
1002 // -----------------------------------------------------------------------------
1004  // Form lists of regular and dimension scale variables
1005  VarNameObjectList varList;
1006  VarNameObjectList dimVarList;
1007  VarDimMap dimsAttachedToVars;
1008  Dimensions_t maxVarSize;
1009  collectVarDimInfo(obs_group_, varList, dimVarList, dimsAttachedToVars, maxVarSize);
1010 
1011  // Record dimension scale variables for the output file creation.
1012  for (auto & dimNameObject : dimVarList) {
1013  std::string dimName = dimNameObject.first;
1014  Dimensions_t dimSize = dimNameObject.second.getDimensions().dimsCur[0];
1015  Dimensions_t dimMaxSize = dimSize;
1016  Dimensions_t dimChunkSize = dimSize;
1017  if (dimName == dim_info_.get_dim_name(ObsDimensionId::Nlocs)) {
1018  dimSize = this->nlocs();
1019  dimMaxSize = Unlimited;
1020  dimChunkSize = this->globalNumLocs();
1021  }
1022  obs_params_.setDimScale(dimName, dimSize, dimMaxSize, dimChunkSize);
1023  }
1024 
1025  // Record the maximum variable size
1026  obs_params_.setMaxVarSize(maxVarSize);
1027 
1028  // Open the file for output
1029  ObsFrameWrite obsFrame(obs_params_);
1030 
1031  // Iterate through the frames and variables moving data from the database into
1032  // the file.
1033  int iframe = 0;
1034  for (obsFrame.frameInit(varList, dimVarList, dimsAttachedToVars, maxVarSize);
1035  obsFrame.frameAvailable(); obsFrame.frameNext(varList)) {
1036  Dimensions_t frameStart = obsFrame.frameStart();
1037  for (auto & varNameObject : varList) {
1038  // form the destination (ObsFrame) variable name
1039  std::string destVarName = varNameObject.first;
1040 
1041  // open the destination variable and get the associated count
1042  Variable destVar = obsFrame.vars().open(destVarName);
1043  Dimensions_t frameCount = obsFrame.frameCount(destVarName);
1044 
1045  // transfer data if we haven't gone past the end of the variable yet
1046  if (frameCount > 0) {
1047  // Form the hyperslab selection for this frame
1048  Variable srcVar = varNameObject.second;
1049  std::vector<Dimensions_t> varShape = srcVar.getDimensions().dimsCur;
1050  ioda::Selection memSelect = obsFrame.createMemSelection(varShape, frameCount);
1051  ioda::Selection varSelect =
1052  obsFrame.createVarSelection(varShape, frameStart, frameCount);
1053 
1054  // transfer the data
1055  if (srcVar.isA<int>()) {
1056  std::vector<int> varValues;
1057  srcVar.read<int>(varValues, memSelect, varSelect);
1058  obsFrame.writeFrameVar(destVarName, varValues);
1059  } else if (srcVar.isA<float>()) {
1060  std::vector<float> varValues;
1061  srcVar.read<float>(varValues, memSelect, varSelect);
1062  obsFrame.writeFrameVar(destVarName, varValues);
1063  } else if (srcVar.isA<std::string>()) {
1064  std::vector<std::string> varValues;
1065  srcVar.read<std::string>(varValues, memSelect, varSelect);
1066  obsFrame.writeFrameVar(destVarName, varValues);
1067  }
1068  }
1069  }
1070  }
1071 }
1072 
1073 // -----------------------------------------------------------------------------
1074 template <typename DataType>
1075 void ObsSpace::extendVariable(Variable & extendVar, const size_t startFill) {
1076  const DataType missing = util::missingValue(missing);
1077 
1078  // Read in variable data values. At this point the values will contain
1079  // the extended region filled with missing values. The read call will size
1080  // the varVals vector accordingly.
1081  std::vector<DataType> varVals;
1082  extendVar.read<DataType>(varVals);
1083 
1084  // Iterator pointing to the first non-missing value in the input vector.
1085  auto it_nonmissing = std::find_if(varVals.begin(), varVals.end(),
1086  [&missing](DataType x){return x != missing;});
1087  if (it_nonmissing != varVals.end()) {
1088  std::fill(varVals.begin() + startFill, varVals.end(), *it_nonmissing);
1089  extendVar.write<DataType>(varVals);
1090  }
1091 }
1092 
1093 // -----------------------------------------------------------------------------
1095  // In this function we use the following terminology:
1096  // * The word 'original' refers to locations and records present in the ObsSpace before its
1097  // extension.
1098  // * The word 'averaged' refers to locations and records created when extending the ObsSpace
1099  // (they will represent data averaged onto model levels).
1100  // * The word 'extended' refers to the original and averaged locations and records taken
1101  // together.
1102  // * The word 'local` refers to locations and records held on the current process.
1103  // * The word 'global` refers to locations and records held on any process.
1104 
1105  const int nlevs = params.numModelLevels;
1106 
1107  const size_t numOriginalLocs = this->nlocs();
1108  const bool recordsExist = !this->obs_group_vars().empty();
1109  if (nlevs > 0 &&
1110  gnlocs_ > 0 &&
1111  recordsExist) {
1112  // Identify the indices of all local original records.
1113  const std::set<size_t> uniqueOriginalRecs(recnums_.begin(), recnums_.end());
1114 
1115  // Find the largest global indices of locations and records in the original ObsSpace.
1116  // Increment them by one to produce the initial values for the global indices of locations
1117  // and records in the averaged ObsSpace.
1118 
1119  // These are *upper bounds* on the global numbers of original locations and records
1120  // because the sequences of global location indices and records may contain gaps.
1121  size_t upperBoundOnGlobalNumOriginalLocs = 0;
1122  size_t upperBoundOnGlobalNumOriginalRecs = 0;
1123  if (numOriginalLocs > 0) {
1124  upperBoundOnGlobalNumOriginalLocs = indx_.back() + 1;
1125  upperBoundOnGlobalNumOriginalRecs = *uniqueOriginalRecs.rbegin() + 1;
1126  }
1127  dist_->max(upperBoundOnGlobalNumOriginalLocs);
1128  dist_->max(upperBoundOnGlobalNumOriginalRecs);
1129 
1130  // The replica distribution will be used to place each averaged record on the same process
1131  // as the corresponding original record.
1132  std::shared_ptr<Distribution> replicaDist = createReplicaDistribution(
1133  commMPI_, dist_, recnums_);
1134 
1135  // Create averaged locations and records.
1136 
1137  // Local index of an averaged location. Note that these indices, like local indices of
1138  // original locations, start from 0.
1139  size_t averagedLoc = 0;
1140  for (size_t originalRec : uniqueOriginalRecs) {
1141  ASSERT(dist_->isMyRecord(originalRec));
1142  const size_t averagedRec = originalRec;
1143  const size_t extendedRec = upperBoundOnGlobalNumOriginalRecs + averagedRec;
1144  nrecs_++;
1145  // recidx_ stores the locations belonging to each record on the local processor.
1146  std::vector<size_t> &locsInRecord = recidx_[extendedRec];
1147  for (int ilev = 0; ilev < nlevs; ++ilev, ++averagedLoc) {
1148  const size_t extendedLoc = numOriginalLocs + averagedLoc;
1149  const size_t globalAveragedLoc = originalRec * nlevs + ilev;
1150  const size_t globalExtendedLoc = upperBoundOnGlobalNumOriginalLocs + globalAveragedLoc;
1151  // Geographical position shouldn't matter -- the replica distribution is expected
1152  // to assign records to processors solely on the basis of their indices.
1153  replicaDist->assignRecord(averagedRec, globalAveragedLoc, eckit::geometry::Point2());
1154  ASSERT(replicaDist->isMyRecord(averagedRec));
1155  recnums_.push_back(extendedRec);
1156  indx_.push_back(globalExtendedLoc);
1157  locsInRecord.push_back(extendedLoc);
1158  }
1159  }
1160  replicaDist->computePatchLocs();
1161 
1162  const size_t numAveragedLocs = averagedLoc;
1163  const size_t numExtendedLocs = numOriginalLocs + numAveragedLocs;
1164 
1165  // Extend all existing vectors with missing values.
1166  // Only vectors with (at least) one dimension equal to nlocs are modified.
1167  // Second argument (bool) to resizeNlocs tells function:
1168  // true -> append the amount in first argument to the existing size
1169  // false -> reset the existing size to the amount in the first argument
1170  this->resizeNlocs(numExtendedLocs, false);
1171 
1172  // Extend all existing vectors with missing values, excepting those
1173  // that have been selected to be filled with non-missing values.
1174  // By default, some spatial and temporal coordinates are filled in this way.
1175  //
1176  // The resizeNlocs() call above has extended all variables with nlocs as a first
1177  // dimension to the new nlocsext size, and filled all the extended parts with
1178  // missing values. Go through the list of variables that are to be filled with
1179  // non-missing values, check if they exist and if so fill in the extended section
1180  // with non-missing values.
1181  const std::vector <std::string> &nonMissingExtendedVars = params.nonMissingExtendedVars;
1182  for (auto & varName : nonMissingExtendedVars) {
1183  // It is implied that these variables are in the MetaData group
1184  const std::string groupName = "MetaData";
1185  const std::string fullVname = fullVarName(groupName, varName);
1186  if (obs_group_.vars.exists(fullVname)) {
1187  // Note nlocs at this point holds the original size before extending.
1188  // The numOriginalLocs argument passed to extendVariable indicates where to start filling.
1189  Variable extendVar = obs_group_.vars.open(fullVname);
1190  if (extendVar.isA<int>()) {
1191  extendVariable<int>(extendVar, numOriginalLocs);
1192  } else if (extendVar.isA<float>()) {
1193  extendVariable<float>(extendVar, numOriginalLocs);
1194  } else if (extendVar.isA<std::string>()) {
1195  extendVariable<std::string>(extendVar, numOriginalLocs);
1196  }
1197  }
1198  }
1199 
1200  // Fill extended_obs_space with 0, which indicates the standard section of the ObsSpace,
1201  // and 1, which indicates the extended section.
1202  std::vector <int> extended_obs_space(numExtendedLocs, 0);
1203  std::fill(extended_obs_space.begin() + numOriginalLocs, extended_obs_space.end(), 1);
1204  // Save extended_obs_space for use in filters.
1205  put_db("MetaData", "extended_obs_space", extended_obs_space);
1206 
1207  // Calculate the number of newly created locations on all processes (counting those
1208  // held on multiple processes only once).
1209  std::unique_ptr<Accumulator<size_t>> accumulator = replicaDist->createAccumulator<size_t>();
1210  for (size_t averagedLoc = 0; averagedLoc < numAveragedLocs; ++averagedLoc)
1211  accumulator->addTerm(averagedLoc, 1);
1212  size_t globalNumAveragedLocs = accumulator->computeResult();
1213 
1214  // Replace the original distribution with a PairOfDistributions, covering
1215  // both the original and averaged locations.
1216  dist_ = std::make_shared<PairOfDistributions>(commMPI_, dist_, replicaDist,
1217  numOriginalLocs,
1218  upperBoundOnGlobalNumOriginalRecs);
1219 
1220  // Increment nlocs on this processor.
1221  dim_info_.set_dim_size(ObsDimensionId::Nlocs, numExtendedLocs);
1222  // Increment gnlocs_.
1223  gnlocs_ += globalNumAveragedLocs;
1224  }
1225 }
1226 // -----------------------------------------------------------------------------
1227 
1228 } // namespace ioda
HDF5 engine.
Interfaces for ioda::Variable and related classes.
Groups are a new implementation of ObsSpaces.
Definition: Group.h:159
This class exists inside of ioda::Group and provides the interface to manipulating Variables.
void set_dim_size(const ObsDimensionId dimId, std::size_t dimSize)
set the dimension size for the given dimension id
Definition: ObsSpace.cc:92
std::string get_dim_name(const ObsDimensionId dimId) const
return the dimension name for the given dimension id
Definition: ObsSpace.cc:84
ObsDimensionId get_dim_id(const std::string &dimName) const
return the standard id value for the given dimension name
Definition: ObsSpace.cc:80
std::map< ObsDimensionId, std::size_t > dim_id_size_
map going from dim id to dim size
Definition: src/ObsSpace.h:87
std::size_t get_dim_size(const ObsDimensionId dimId) const
return the dimension size for the given dimension id
Definition: ObsSpace.cc:88
std::map< std::string, ObsDimensionId > dim_name_id_
map going from dim name to id
Definition: src/ObsSpace.h:90
std::map< ObsDimensionId, std::string > dim_id_name_
map going from dim id to dim name
Definition: src/ObsSpace.h:84
bool ioIsVarDimByNlocs(const std::string &varName) const
return true if variable is dimensioned by nlocs
Definition: ObsFrame.h:76
Selection createMemSelection(const std::vector< Dimensions_t > &varShape, const Dimensions_t frameCount)
create selection object for accessing a memory buffer
Definition: ObsFrame.cc:26
virtual std::size_t frameNumLocs() const
return number of locations
Definition: ObsFrame.h:81
const VarNameObjectList & ioVarList() const
return list of regular variables from ObsIo
Definition: ObsFrame.h:64
VarDimMap ioVarDimMap() const
return map from variables to their attached dimension scales
Definition: ObsFrame.h:70
Dimensions_t globalNumLocs() const
return number of locations that were selected from ObsIo
Definition: ObsFrame.h:87
const VarNameObjectList & ioDimVarList() const
return list of dimension scale variables from ObsIo
Definition: ObsFrame.h:67
Selection createVarSelection(const std::vector< Dimensions_t > &varShape, const Dimensions_t frameStart, const Dimensions_t frameCount)
create selection object for accessing a frame from a whole variable
Definition: ObsFrame.cc:72
Has_Attributes & atts() const
return attributes container from ObsIo
Definition: ObsFrame.h:61
Dimensions_t globalNumLocsOutsideTimeWindow() const
return number of locations from obs source that were outside the time window
Definition: ObsFrame.h:90
Has_Variables & vars() const
return variables container from ObsIo
Definition: ObsFrame.h:58
virtual std::size_t frameNumRecs() const
return number of records
Definition: ObsFrame.h:84
Implementation of ObsFrameRead class.
std::vector< std::size_t > index() const override
return list of indices indicating which locations were selected from ObsIo
void frameNext() override
move to the next frame
Definition: ObsFrameRead.cc:69
Dimensions_t frameCount(const std::string &varName) override
return current frame count for variable
Dimensions_t adjNlocsFrameCount() const override
return adjusted nlocs frame count
bool frameAvailable() override
true if a frame is available (not past end of frames)
Definition: ObsFrameRead.cc:75
void frameInit() override
initialize for walking through the frames
Definition: ObsFrameRead.cc:44
bool readFrameVar(const std::string &varName, std::vector< int > &varData)
read a frame variable
Dimensions_t frameStart() override
return current frame starting index
Dimensions_t adjNlocsFrameStart() const override
return adjusted nlocs frame start
std::shared_ptr< const Distribution > distribution()
return the MPI distribution
std::vector< std::size_t > recnums() const override
return list of record numbers from ObsIo
Implementation of ObsFrameWrite class.
void writeFrameVar(const std::string &varName, const std::vector< int > &varData)
write a frame variable
void frameNext(const VarNameObjectList &varList) override
move to the next frame
Dimensions_t frameCount(const std::string &varName) override
return current frame count for variable
void frameInit(const VarNameObjectList &varList, const VarNameObjectList &dimVarList, const VarDimMap &varDimMap, const Dimensions_t maxVarSize) override
initialize for walking through the frames
bool frameAvailable() override
true if a frame is available (not past end of frames)
Dimensions_t frameStart() override
return current frame starting index
static ObsGroup generate(Group &emptyGroup, const NewDimensionScales_t &fundamentalDims, std::shared_ptr< const detail::DataLayoutPolicy > layout=nullptr)
Create an empty ObsGroup and populate it with the fundamental dimensions.
Definition: ObsGroup.cpp:72
void resize(const std::vector< std::pair< Variable, ioda::Dimensions_t >> &newDims)
Resize a Dimension and every Variable that depends on it.
Definition: ObsGroup.cpp:85
oops::Parameter< ObsGroupingParameters > obsGrouping
options controlling obs record grouping
oops::Parameter< int > maxFrameSize
maximum frame size
void extendObsSpace(const ObsExtendParameters &params)
Extend the ObsSpace according to the method requested in the configuration file.
Definition: ObsSpace.cc:1094
void storeVar(const std::string &varName, std::vector< VarType > &varValues, const Dimensions_t frameStart, const Dimensions_t frameCount)
store a variable in the obs_group_ object
Definition: ObsSpace.cc:822
void createVariables(const Has_Variables &srcVarContainer, Has_Variables &destVarContainer, const VarDimMap &dimsAttachedToVars)
create set of variables from source variables and lists
Definition: ObsSpace.cc:863
bool has(const std::string &group, const std::string &name) const
return true if group/variable exists
Definition: ObsSpace.cc:230
void saveToFile()
Dump the database into the output file.
Definition: ObsSpace.cc:1003
const std::vector< std::size_t > & recnum() const
return reference to the record number vector
Definition: src/ObsSpace.h:222
ObsDtype dtype(const std::string &group, const std::string &name) const
return data type for group/variable
Definition: ObsSpace.cc:240
void put_db(const std::string &group, const std::string &name, const std::vector< int > &vdata, const std::vector< std::string > &dimList={ "nlocs" })
transfer data from vdata to the obs container
Definition: ObsSpace.cc:306
std::shared_ptr< const Distribution > dist_
MPI distribution object.
Definition: src/ObsSpace.h:380
std::size_t gnlocs_outside_timewindow_
number of nlocs from the obs source that are outside the time window
Definition: src/ObsSpace.h:355
std::vector< std::size_t > recidx_all_recnums() const
return all record numbers from the recidx_ data member
Definition: ObsSpace.cc:382
const RecIdxIter recidx_begin() const
Return the begin iterator associated with the recidx_ data member.
Definition: ObsSpace.cc:344
void buildRecIdxUnsorted()
Create the recidx data structure with unsorted record groups.
Definition: ObsSpace.cc:995
ObsDimInfo dim_info_
dimension information for variables in this obs space
Definition: src/ObsSpace.h:361
VarDimMap dims_attached_to_vars_
map showing association of dim names with each variable name
Definition: src/ObsSpace.h:395
std::size_t recidx_recnum(const RecIdxIter &irec) const
return record number pointed to by the given iterator
Definition: ObsSpace.cc:360
std::string get_dim_name(const ObsDimensionId dimId) const
return the standard dimension name for the given dimension id
Definition: src/ObsSpace.h:192
void splitChanSuffix(const std::string &group, const std::string &name, const std::vector< int > &chanSelect, std::string &nameToUse, std::vector< int > &chanSelectToUse) const
split off the channel number suffix from a given variable name
Definition: ObsSpace.cc:931
void print(std::ostream &os) const
print function for oops::Printable class
Definition: ObsSpace.cc:397
void createObsGroupFromObsFrame(ObsFrameRead &obsFrame)
Initialize the database from a source (ObsFrame ojbect)
Definition: ObsSpace.cc:407
std::string obsname_
name of obs space
Definition: src/ObsSpace.h:374
std::size_t nrecs_
number of records
Definition: src/ObsSpace.h:358
const std::vector< std::string > & obs_group_vars() const
return YAML configuration parameter: obsdatain.obsgrouping.group variables
Definition: ObsSpace.cc:210
const RecIdxIter recidx_end() const
Return the end iterator associated with the recidx_ data member.
Definition: ObsSpace.cc:349
oops::Variables obsvars_
Observation "variables" to be simulated.
Definition: src/ObsSpace.h:377
std::map< int, int > chan_num_to_index_
map to go from channel number (not necessarily consecutive) to channel index (consecutive,...
Definition: src/ObsSpace.h:365
void saveVar(const std::string &group, std::string name, const std::vector< VarType > &varValues, const std::vector< std::string > &dimList)
save a variable to the obs_group_ object
Definition: ObsSpace.cc:706
const eckit::mpi::Comm & comm() const
Definition: src/ObsSpace.h:148
void initFromObsSource(ObsFrameRead &obsFrame)
initialize the in-memory obs_group_ (ObsGroup) object from the ObsIo source
Definition: ObsSpace.cc:540
const std::vector< std::size_t > & recidx_vector(const RecIdxIter &irec) const
return record number vector pointed to by the given iterator
Definition: ObsSpace.cc:365
RecIdxMap::const_iterator RecIdxIter
Definition: src/ObsSpace.h:120
RecIdxMap recidx_
profile ordering
Definition: src/ObsSpace.h:389
ObsSpaceParameters obs_params_
obs io parameters
Definition: src/ObsSpace.h:371
void buildSortedObsGroups()
Create the recidx data structure holding sorted record groups.
Definition: ObsSpace.cc:946
void loadVar(const std::string &group, const std::string &name, const std::vector< int > &chanSelect, std::vector< VarType > &varValues) const
load a variable from the obs_group_ object
Definition: ObsSpace.cc:660
size_t nlocs() const
return the number of locations in the obs space. Note that nlocs may be smaller than global unique nl...
Definition: src/ObsSpace.h:176
ObsGroup obs_group_
observation data store
Definition: src/ObsSpace.h:368
std::vector< std::size_t > indx_
indexes of locations to extract from the input obs file
Definition: src/ObsSpace.h:383
void get_db(const std::string &group, const std::string &name, std::vector< int > &vdata, const std::vector< int > &chanSelect={ }) const
transfer data from the obs container to vdata
Definition: ObsSpace.cc:270
void fillChanNumToIndexMap()
fill in the channel number to channel index map
Definition: ObsSpace.cc:906
std::size_t nvars() const
return the number of variables in the obs space container. "Variables" refers to the quantities that ...
Definition: ObsSpace.cc:189
void extendVariable(Variable &extendVar, const size_t startFill)
Extend the given variable.
Definition: ObsSpace.cc:1075
const ObsSpaceParameters & params() const
Definition: src/ObsSpace.h:151
std::string obs_sort_order() const
return YAML configuration parameter: obsdatain.obsgrouping.sort order
Definition: ObsSpace.cc:220
void save()
save the obs space data into a file (if obsdataout specified)
Definition: ObsSpace.cc:171
void resizeNlocs(const Dimensions_t nlocsSize, const bool append)
resize along nlocs dimension
Definition: ObsSpace.cc:645
bool readObsSource(ObsFrameRead &obsFrame, const std::string &varName, std::vector< VarType > &varValues)
read in values for variable from obs source
Definition: ObsSpace.cc:493
std::size_t createChannelSelections(const Variable &variable, std::size_t nchansDimIndex, const std::vector< int > &channels, Selection &memSelect, Selection &obsGroupSelect) const
Create selections of slices of the variable variable along dimension nchansDimIndex corresponding to ...
Definition: ObsSpace.cc:761
bool recidx_is_sorted_
indicator whether the data in recidx_ is sorted
Definition: src/ObsSpace.h:392
std::size_t globalNumLocsOutsideTimeWindow() const
return number of locations from obs source that were outside the time window
Definition: src/ObsSpace.h:171
const eckit::mpi::Comm & commMPI_
MPI communicator.
Definition: src/ObsSpace.h:349
std::size_t globalNumLocs() const
return the total number of locations in the corresponding obs spaces across all MPI tasks
Definition: src/ObsSpace.h:168
std::size_t gnlocs_
total number of locations
Definition: src/ObsSpace.h:352
std::map< std::vector< std::string >, Selection > known_be_selections_
cache for backend selection
Definition: src/ObsSpace.h:401
const oops::Variables & obsvariables() const
return oops variables object (simulated variables)
Definition: src/ObsSpace.h:332
ObsSpace(const eckit::Configuration &config, const eckit::mpi::Comm &comm, const util::DateTime &bgn, const util::DateTime &end, const eckit::mpi::Comm &timeComm)
Config based constructor for an ObsSpace object.
Definition: ObsSpace.cc:112
const std::string & obsname() const
return the name of the obs type being stored
Definition: src/ObsSpace.h:216
std::string obs_sort_var() const
return YAML configuration parameter: obsdatain.obsgrouping.sort variable
Definition: ObsSpace.cc:215
std::map< std::vector< std::string >, Selection > known_fe_selections_
cache for frontend selection
Definition: src/ObsSpace.h:398
std::vector< std::size_t > recnums_
record numbers associated with the location indexes
Definition: src/ObsSpace.h:386
bool recidx_has(const std::size_t recNum) const
true if given record number exists in the recidx_ data member
Definition: ObsSpace.cc:354
void deserialize(const eckit::Configuration &config)
deserialize the parameter sub groups
void setMaxVarSize(const Dimensions_t maxVarSize)
set the maximum variable size
ObsTopLevelParameters top_level_
sub groups of parameters
void setDimScale(const std::string &dimName, const Dimensions_t curSize, const Dimensions_t maxSize, const Dimensions_t chunkSize)
set a new dimension scale
oops::RequiredParameter< oops::Variables > simVars
simulated variables
oops::OptionalParameter< ObsExtendParameters > obsExtend
extend the ObsSpace with extra fixed-size records
oops::RequiredParameter< std::string > obsSpaceName
name of obs space
oops::OptionalParameter< ObsFileOutParameters > obsOutFile
output specification by writing to a file
const ObsIoParametersBase & obsIoInParameters() const
parameters indicating where to load data from
A Selection represents the bounds of the data, in ioda or in userspace, that you are reading or writi...
Definition: Selection.h:48
Variables store data!
Definition: Variable.h:680
virtual Attribute_Implementation read(gsl::span< char > data, const Type &in_memory_dataType) const
The fundamental read function. Backends overload this function to implement all read operations.
Definition: Attribute.cpp:75
virtual Group open(const std::string &name) const
Open a group.
Definition: Group.cpp:87
Has_Variables vars
Use this to access variables.
Definition: Group.h:123
virtual bool exists(const std::string &name) const
Definition: Group.cpp:65
virtual Attribute open(const std::string &name) const
Open an Attribute by name.
Variable createWithScales(const std::string &name, const std::vector< Variable > &dimension_scales, const VariableCreationParameters &params=VariableCreationParameters::defaulted< DataType >())
Convenience function to create a Variable from certain dimension scales.
virtual Variable open(const std::string &name) const
Open a Variable by name.
virtual std::vector< std::string > list() const
virtual bool exists(const std::string &name) const
Does a Variable with the specified name exist?
virtual FillValueData_t getFillValue() const
Retrieve the fill value.
Definition: Variable.cpp:111
bool isA() const
Convenience function to check a Variable's storage type.
Definition: Variable.h:99
virtual bool isDimensionScaleAttached(unsigned int DimensionNumber, const Variable &scale) const
Is a dimension scale attached to this Variable in a certain position?
Definition: Variable.cpp:288
virtual bool hasFillValue() const
Check if a variable has a fill value set.
Definition: Variable.cpp:98
virtual Dimensions getDimensions() const
Definition: Variable.cpp:160
virtual std::vector< std::vector< Named_Variable > > getDimensionScaleMappings(const std::list< Named_Variable > &scalesToQueryAgainst, bool firstOnly=true) const
Which dimensions are attached at which positions? This function may offer improved performance on som...
Definition: Variable.cpp:303
virtual Variable read(gsl::span< char > data, const Type &in_memory_dataType, const Selection &mem_selection=Selection::all, const Selection &file_selection=Selection::all) const
Read the Variable - as char array. Ordering is row-major.
Definition: Variable.cpp:330
virtual Variable write(gsl::span< char > data, const Type &in_memory_dataType, const Selection &mem_selection=Selection::all, const Selection &file_selection=Selection::all)
The fundamental write function. Backends overload this function to implement all write operations.
Definition: Variable.cpp:317
virtual Variable resize(const std::vector< Dimensions_t > &newDims)
Resize the variable.
Definition: Variable.cpp:172
IODA_DL std::string genUniqueName()
Convenience function to generate a random file name.
Definition: HH.cpp:50
BackendNames
Backend names.
Definition: Factory.h:27
IODA_DL Group constructBackend(BackendNames name, BackendCreationParameters &params)
This is a simple factory style function that will instantiate a different backend based on a given na...
Definition: Factory.cpp:123
@ ObsStore
ObsStore in-memory.
@ Truncate_If_Exists
If the file already exists, overwrite it.
Selection & extent(const VecDimensions_t &sz)
Provide the dimensions of the object that you are selecting from.
Definition: Selection.h:111
Selection & select(const SingleSelection &s)
Append a new selection.
Definition: Selection.h:103
list newDims
Definition: 05-ObsGroup.py:98
bool extractChannelSuffixIfPresent(const std::string &name, std::string &nameWithoutChannelSuffix, int &channel)
Definition: ObsSpace.cc:49
std::vector< util::DateTime > convertDtStringsToDtime(const std::vector< std::string > &dtStrings)
convert datetime strings to DateTime object
Definition: IodaUtils.cc:239
constexpr int Unlimited
Specifies that a dimension is resizable to infinity.
std::vector< util::DateTime > convertRefOffsetToDtime(const int refIntDtime, const std::vector< float > &timeOffsets)
convert reference, time to DateTime object
Definition: IodaUtils.cc:251
std::vector< std::pair< std::string, Variable > > VarNameObjectList
typedef for holding list of variable names with associated variable object
Definition: IodaUtils.h:30
std::string fullVarName(const std::string &groupName, const std::string &varName)
form full variable name given individual group and variable names
Definition: IodaUtils.cc:120
std::vector< std::shared_ptr< NewDimensionScale_Base > > NewDimensionScales_t
std::map< std::string, std::vector< std::string > > VarDimMap
typedef for holding dim names attached to variables
Definition: IodaUtils.h:36
void collectVarDimInfo(const ObsGroup &obsGroup, VarNameObjectList &varObjectList, VarNameObjectList &dimVarObjectList, VarDimMap &dimsAttachedToVars, Dimensions_t &maxVarSize0)
collect variable and dimension information from a ioda ObsGroup
Definition: IodaUtils.cc:125
ObsDimensionId
Definition: src/ObsSpace.h:61
std::shared_ptr< Distribution > createReplicaDistribution(const eckit::mpi::Comm &comm, std::shared_ptr< const Distribution > master, const std::vector< std::size_t > &masterRecordNums)
Create a suitable replica distribution for the distribution master.
std::vector< Dimensions_t > dimsCur
The dimensions of the data.
Definition: Dimensions.h:23
Dimensions_t dimensionality
The dimensionality (rank) of the data.
Definition: Dimensions.h:25
Used to specify backend creation-time properties.
Definition: Factory.h:56
A named pair of (variable_name, ioda::Variable).
Definition: Variable.h:752
Used to specify Variable creation-time properties.
Definition: Has_Variables.h:57
VariableCreationParameters & setFillValue(DataType fill)
Definition: Has_Variables.h:69
Container used to store and manipulate fill values.
Definition: Fill.h:35