IODA
src/ObsDataVector.h
Go to the documentation of this file.
1 /*
2  * (C) Copyright 2018-2019 UCAR
3  *
4  * This software is licensed under the terms of the Apache Licence Version 2.0
5  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6  */
7 
8 #ifndef OBSDATAVECTOR_H_
9 #define OBSDATAVECTOR_H_
10 
11 #include <cmath>
12 #include <limits>
13 #include <ostream>
14 #include <string>
15 #include <vector>
16 
17 #include <boost/math/special_functions/fpclassify.hpp>
18 
19 #include "eckit/exception/Exceptions.h"
20 #include "eckit/mpi/Comm.h"
21 
22 #include "oops/base/Variables.h"
23 #include "oops/util/Logger.h"
24 #include "oops/util/missingValues.h"
25 #include "oops/util/ObjectCounter.h"
26 #include "oops/util/Printable.h"
27 
28 #include "ioda/distribution/Accumulator.h"
29 #include "ioda/distribution/DistributionUtils.h"
30 #include "ioda/ObsSpace.h"
31 #include "ioda/ObsVector.h"
32 
33 namespace ioda {
34 
35 //-----------------------------------------------------------------------------
36 
37 template <typename DATATYPE> using ObsDataRow = std::vector<DATATYPE>;
38 
39 //-----------------------------------------------------------------------------
40 //! ObsDataVector<DATATYPE> handles vectors of data of type DATATYPE in observation space
41 
42 template<typename DATATYPE>
43 class ObsDataVector: public util::Printable,
44  private util::ObjectCounter<ObsDataVector<DATATYPE> > {
45  public:
46  static const std::string classname() {return "ioda::ObsDataVector";}
47 
48  ObsDataVector(ObsSpace &, const oops::Variables &,
49  const std::string & grp = "", const bool fail = true);
50  ObsDataVector(ObsSpace &, const std::string &,
51  const std::string & grp = "", const bool fail = true);
52  explicit ObsDataVector(ObsVector &);
55 
57 
58  void zero();
59  void mask(const ObsDataVector<int> &);
60 
61  void read(const std::string &, const bool fail = true); // only used in GNSSRO QC
62  void save(const std::string &) const;
63 
64 // Methods below are used by UFO but not by OOPS
65  const ObsSpace & space() const {return obsdb_;}
66  size_t nvars() const {return nvars_;} // Size in (local) memory
67  size_t nlocs() const {return nlocs_;} // Size in (local) memory
68  bool has(const std::string & vargrp) const {return obsvars_.has(vargrp);}
69 
70  const ObsDataRow<DATATYPE> & operator[](const size_t ii) const {return rows_.at(ii);}
71  ObsDataRow<DATATYPE> & operator[](const size_t ii) {return rows_.at(ii);}
72 
73  const ObsDataRow<DATATYPE> & operator[](const std::string var) const
74  {return rows_.at(obsvars_.find(var));}
75  ObsDataRow<DATATYPE> & operator[](const std::string var) {return rows_.at(obsvars_.find(var));}
76 
77  const std::string & obstype() const {return obsdb_.obsname();}
78  const oops::Variables & varnames() const {return obsvars_;}
79 
80  private:
81  void print(std::ostream &) const;
82 
84  oops::Variables obsvars_;
85  size_t nvars_;
86  size_t nlocs_;
87  std::vector<ObsDataRow<DATATYPE> > rows_;
88  const DATATYPE missing_;
89 };
90 
91 // -----------------------------------------------------------------------------
92 template <typename DATATYPE>
93 ObsDataVector<DATATYPE>::ObsDataVector(ObsSpace & obsdb, const oops::Variables & vars,
94  const std::string & grp, const bool fail)
95  : obsdb_(obsdb), obsvars_(vars), nvars_(obsvars_.size()),
96  nlocs_(obsdb_.nlocs()), rows_(nvars_),
97  missing_(util::missingValue(missing_))
98 {
99  oops::Log::trace() << "ObsDataVector::ObsDataVector start" << std::endl;
100  for (size_t jj = 0; jj < nvars_; ++jj) {
101  rows_[jj].resize(nlocs_);
102  }
103  if (!grp.empty()) this->read(grp, fail);
104  oops::Log::trace() << "ObsDataVector::ObsDataVector done" << std::endl;
105 }
106 // -----------------------------------------------------------------------------
107 template <typename DATATYPE>
108 ObsDataVector<DATATYPE>::ObsDataVector(ObsSpace & obsdb, const std::string & var,
109  const std::string & grp, const bool fail)
110  : obsdb_(obsdb), obsvars_(std::vector<std::string>(1, var)), nvars_(1),
111  nlocs_(obsdb_.nlocs()), rows_(1),
112  missing_(util::missingValue(missing_))
113 {
114  oops::Log::trace() << "ObsDataVector::ObsDataVector start" << std::endl;
115  rows_[0].resize(nlocs_);
116  if (!grp.empty()) this->read(grp, fail);
117  oops::Log::trace() << "ObsDataVector::ObsDataVector done" << std::endl;
118 }
119 // -----------------------------------------------------------------------------
120 template <typename DATATYPE>
122  : obsdb_(vect.space()), obsvars_(vect.varnames()), nvars_(vect.nvars()), nlocs_(vect.nlocs()),
123  rows_(nvars_), missing_(util::missingValue(missing_))
124 {
125  oops::Log::trace() << "ObsDataVector::ObsDataVector ObsVector start" << std::endl;
126  const double dmiss = util::missingValue(dmiss);
127  for (size_t jv = 0; jv < nvars_; ++jv) {
128  rows_[jv].resize(nlocs_);
129  }
130  size_t ii = 0;
131  for (size_t jl = 0; jl < nlocs_; ++jl) {
132  for (size_t jv = 0; jv < nvars_; ++jv) {
133  if (vect[ii] == dmiss) {
134  rows_[jv][jl] = missing_;
135  } else {
136  rows_[jv][jl] = static_cast<DATATYPE>(vect[ii]);
137  }
138  ++ii;
139  }
140  }
141  oops::Log::trace() << "ObsDataVector::ObsDataVector ObsVector done" << std::endl;
142 }
143 // -----------------------------------------------------------------------------
144 template <typename DATATYPE>
146  : obsdb_(other.obsdb_), obsvars_(other.obsvars_), nvars_(other.nvars_),
147  nlocs_(other.nlocs_), rows_(other.rows_), missing_(util::missingValue(missing_)) {
148  oops::Log::trace() << "ObsDataVector copied" << std::endl;
149 }
150 // -----------------------------------------------------------------------------
151 template <typename DATATYPE>
153 // -----------------------------------------------------------------------------
154 template <typename DATATYPE>
156  oops::Log::trace() << "ObsDataVector::operator= start" << std::endl;
157  ASSERT(&obsdb_ == &rhs.obsdb_);
158  obsvars_ = rhs.obsvars_;
159  nvars_ = rhs.nvars_;
160  nlocs_ = rhs.nlocs_;
161  rows_ = rhs.rows_;
162  oops::Log::trace() << "ObsDataVector::operator= done" << std::endl;
163  return *this;
164 }
165 // -----------------------------------------------------------------------------
166 template <typename DATATYPE>
168  for (size_t jv = 0; jv < nvars_; ++jv) {
169  for (size_t jj = 0; jj < nlocs_; ++jj) {
170  rows_.at(jv).at(jj) = static_cast<DATATYPE>(0);
171  }
172  }
173 }
174 // -----------------------------------------------------------------------------
175 template <typename DATATYPE>
177  ASSERT(nvars_ == flags.nvars());
178  ASSERT(nlocs_ == flags.nlocs());
179  for (size_t jv = 0; jv < nvars_; ++jv) {
180  for (size_t jj = 0; jj < nlocs_; ++jj) {
181  if (flags[jv][jj] > 0) rows_.at(jv).at(jj) = missing_;
182  }
183  }
184 }
185 // -----------------------------------------------------------------------------
186 template <typename DATATYPE>
187 void ObsDataVector<DATATYPE>::read(const std::string & name, const bool fail) {
188  oops::Log::trace() << "ObsDataVector::read, name = " << name << std::endl;
189  const DATATYPE missing = util::missingValue(missing);
190 
191  // Only need to read data when nlocs_ is greater than 0.
192  // e.g. if there is no obs. on current MPI task, no read needed.
193  if ( nlocs_ > 0 ) {
194  std::vector<DATATYPE> tmp(nlocs_);
195 
196  for (size_t jv = 0; jv < nvars_; ++jv) {
197  if (fail || obsdb_.has(name, obsvars_.variables()[jv])) {
198  obsdb_.get_db(name, obsvars_.variables()[jv], tmp);
199  for (size_t jj = 0; jj < nlocs_; ++jj) {
200  rows_.at(jv).at(jj) = tmp.at(jj);
201  }
202  }
203  }
204  }
205 }
206 // -----------------------------------------------------------------------------
207 template <typename DATATYPE>
208 void ObsDataVector<DATATYPE>::save(const std::string & name) const {
209  oops::Log::trace() << "ObsDataVector::save, name = " << name << std::endl;
210  std::vector<DATATYPE> tmp(nlocs_);
211  for (size_t jv = 0; jv < nvars_; ++jv) {
212  for (std::size_t jj = 0; jj < tmp.size(); ++jj) {
213  tmp.at(jj) = rows_.at(jv).at(jj);
214  }
215  obsdb_.put_db(name, obsvars_.variables()[jv], tmp);
216  }
217 }
218 // -----------------------------------------------------------------------------
219 /// Print statistics describing a vector \p obsdatavector of observations taken from \p obsdb
220 /// to the stream \p os.
221 ///
222 /// This is an implementation suitable for non-numeric data. Users shouldn't need to call it
223 /// directly; they should call ObsDataVector::print() instead.
224 ///
225 /// \see printNumericObsDataVectorStats.
226 template <typename DATATYPE>
228  const ObsSpace &obsdb,
229  std::ostream & os) {
230  for (size_t jv = 0; jv < obsdatavector.nvars(); ++jv) {
231  int nloc = obsdb.globalNumLocs();
232  // collect nobs on all processors
233  int nobs = globalNumNonMissingObs(*obsdb.distribution(),
234  obsdatavector.nvars(), obsdatavector[jv]);
235 
236  os << obsdb.obsname() << " " << obsdatavector.varnames()[jv] << " nlocs = " << nloc
237  << ", nobs = " << nobs << std::endl;
238  }
239 }
240 // -----------------------------------------------------------------------------
241 /// Print statistics describing a vector \p obsdatavector of observations taken from \p obsdb
242 /// to the stream \p os.
243 ///
244 /// This is an implementation suitable for numeric data. Users shouldn't need to call it
245 /// directly; they should call ObsDataVector::print() instead.
246 ///
247 /// \see printNonnumericObsDataVectorStats.
248 template <typename DATATYPE>
250  const ObsSpace &obsdb,
251  std::ostream & os) {
252  const DATATYPE missing = util::missingValue(missing);
253  for (size_t jv = 0; jv < obsdatavector.nvars(); ++jv) {
254  DATATYPE zmin = std::numeric_limits<DATATYPE>::max();
255  DATATYPE zmax = std::numeric_limits<DATATYPE>::lowest();
256  std::unique_ptr<Accumulator<DATATYPE>> accumulator =
257  obsdb.distribution()->createAccumulator<DATATYPE>();
258  int nloc = obsdb.globalNumLocs();
259 
260  const std::vector<DATATYPE> &vector = obsdatavector[jv];
261  for (size_t jj = 0; jj < obsdatavector.nlocs(); ++jj) {
262  DATATYPE zz = vector.at(jj);
263  if (zz != missing) {
264  if (zz < zmin) zmin = zz;
265  if (zz > zmax) zmax = zz;
266  accumulator->addTerm(jj, zz);
267  }
268  }
269  // collect zmin, zmax, zavg, globalNumNonMissingObs on all processors
270  obsdb.distribution()->min(zmin);
271  obsdb.distribution()->max(zmax);
272  DATATYPE zsum = accumulator->computeResult();
273  int nobs = globalNumNonMissingObs(*obsdb.distribution(), 1, vector);
274 
275  os << std::endl << obsdb.obsname() << " " << obsdatavector.varnames()[jv]
276  << " nlocs = " << nloc << ", nobs = " << nobs;
277  if (nobs > 0) {
278  os << ", min = " << zmin << ", max = " << zmax << ", avg = " << zsum/nobs;
279  } else {
280  os << " : No observations.";
281  }
282  }
283 }
284 // -----------------------------------------------------------------------------
285 // Default implementation...
286 template <typename DATATYPE>
287 void ObsDataVector<DATATYPE>::print(std::ostream & os) const {
288  printNonnumericObsDataVectorStats(*this, obsdb_, os);
289 }
290 // -----------------------------------------------------------------------------
291 // and specializations for numeric types that can be held in ioda::ObsSpace variables.
292 template <>
293 inline void ObsDataVector<double>::print(std::ostream & os) const {
294  printNumericObsDataVectorStats(*this, obsdb_, os);
295 }
296 // -----------------------------------------------------------------------------
297 template <>
298 inline void ObsDataVector<float>::print(std::ostream & os) const {
299  printNumericObsDataVectorStats(*this, obsdb_, os);
300 }
301 // -----------------------------------------------------------------------------
302 template <>
303 inline void ObsDataVector<int>::print(std::ostream & os) const {
304  printNumericObsDataVectorStats(*this, obsdb_, os);
305 }
306 // -----------------------------------------------------------------------------
307 } // namespace ioda
308 
309 #endif // OBSDATAVECTOR_H_
ObsDataVector<DATATYPE> handles vectors of data of type DATATYPE in observation space.
void mask(const ObsDataVector< int > &)
ObsDataRow< DATATYPE > & operator[](const std::string var)
const DATATYPE missing_
const oops::Variables & varnames() const
oops::Variables obsvars_
void print(std::ostream &) const
const ObsSpace & space() const
static const std::string classname()
bool has(const std::string &vargrp) const
const ObsDataRow< DATATYPE > & operator[](const std::string var) const
const ObsDataRow< DATATYPE > & operator[](const size_t ii) const
size_t nlocs() const
ObsDataVector & operator=(const ObsDataVector &)
ObsDataRow< DATATYPE > & operator[](const size_t ii)
std::vector< ObsDataRow< DATATYPE > > rows_
void save(const std::string &) const
ObsDataVector(ObsSpace &, const oops::Variables &, const std::string &grp="", const bool fail=true)
size_t nvars() const
const std::string & obstype() const
void read(const std::string &, const bool fail=true)
ObsVector class to handle vectors in observation space for IODA.
Definition: src/ObsVector.h:34
void printNonnumericObsDataVectorStats(const ObsDataVector< DATATYPE > &obsdatavector, const ObsSpace &obsdb, std::ostream &os)
std::vector< DATATYPE > ObsDataRow
std::size_t globalNumNonMissingObs(const Distribution &dist, std::size_t numVariables, const std::vector< double > &v)
void printNumericObsDataVectorStats(const ObsDataVector< DATATYPE > &obsdatavector, const ObsSpace &obsdb, std::ostream &os)