IODA
Has_Variables.h
Go to the documentation of this file.
1 #pragma once
2 /*
3  * (C) Copyright 2020-2021 UCAR
4  *
5  * This software is licensed under the terms of the Apache Licence Version 2.0
6  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
7  */
8 /*! \addtogroup ioda_cxx_variable
9  *
10  * @{
11  * \file Has_Variables.h
12  * \brief Interfaces for ioda::Has_Variables and related classes.
13  */
14 
15 #include <cstring>
16 #include <gsl/gsl-lite.hpp>
17 #include <iostream>
18 #include <map>
19 #include <memory>
20 #include <string>
21 #include <utility>
22 #include <vector>
23 
25 #include "ioda/Exception.h"
26 #include "ioda/Layout.h"
27 #include "ioda/Misc/Eigen_Compat.h"
28 #include "ioda/Misc/MergeMethods.h"
29 #include "ioda/Types/Type.h"
32 #include "ioda/defs.h"
33 
34 namespace ioda {
35 class Has_Variables;
36 class ObsGroup;
37 struct Named_Variable;
38 namespace detail {
39 class Has_Variables_Backend;
40 class Has_Variables_Base;
41 class DataLayoutPolicy;
42 class Group_Base;
43 
44 } // namespace detail
45 
46 /// \brief A few chunking strategies for Variables
47 namespace chunking {
48 /// Convenience function for setting default chunking parameters.
49 inline bool Chunking_Max(const std::vector<Dimensions_t>& in, std::vector<Dimensions_t>& out) {
50  out = in;
51  return true;
52 }
53 } // namespace chunking
54 
55 /// \brief Used to specify Variable creation-time properties.
56 /// \ingroup ioda_cxx_variable
58 private:
59  std::vector<std::pair<unsigned int, Variable> > dimsToAttach_;
60  std::string dimScaleName_;
61 
62 public:
63  /// @name Fill Value
64  /// @{
65 
67 
68  template <class DataType>
70  detail::assignFillValue<DataType>(fillValue_, fill);
71  return *this;
72  }
74  fillValue_.set_ = false;
75  return *this;
76  }
77 
78  /// @}
79  /// @name Chunking and compression
80  /// @{
81 
82  /// \brief Do we chunk this variable? Required for extendible / compressible Variables.
83  /// \details Requires a chunking strategy.
84  bool chunk = false;
85 
86  /// \brief Manually specify the chunks. Never directly use. Use getChunks(...) instead.
87  std::vector<Dimensions_t> chunks;
88  /// Set variable chunking strategy. Used only if chunk == true and chunks.size() == 0.
89  std::function<bool(const std::vector<Dimensions_t>&, std::vector<Dimensions_t>&)>
90  fChunkingStrategy = chunking::Chunking_Max;
91  /// Figure out the chunking size
92  /// \param cur_dims are the current dimensions
93  std::vector<Dimensions_t> getChunks(const std::vector<Dimensions_t>& cur_dims) const {
94  if (chunks.size()) return chunks;
95  std::vector<Dimensions_t> res;
96  if (fChunkingStrategy(cur_dims, res)) return res;
97  throw Exception("Cannot figure out an appropriate chunking size.", ioda_Here());
98  }
99 
100  bool gzip_ = false;
101  bool szip_ = false;
102  int gzip_level_ = 6; // 1 (fastest) - 9 (most compression)
103  unsigned int szip_PixelsPerBlock_ = 16;
104  unsigned int szip_options_ = 4; // Defined as H5_SZIP_EC_OPTION_MASK in hdf5.h;
105 
106  void noCompress();
107  void compressWithGZIP(int level = 6);
108  void compressWithSZIP(unsigned PixelsPerBlock = 16, unsigned options = 4);
109 
110  /// @}
111  /// @name General Functions
112  /// @{
113 
114  /// Set any initial attributes here
116 
120 
121  template<class DataType>
124  ret.chunk = true;
125  ret.compressWithGZIP();
126  FillValuePolicies::applyFillValuePolicy<DataType>(FillValuePolicy::NETCDF4, ret.fillValue_);
127  return ret;
128  }
129  template <class DataType>
131  return defaulted<DataType>();
132  }
133 
134  /// Finalize routine to make sure struct members are intact (e.g. for fill values)
136 
138 
139 private:
141  /// Apply the properties to a Variable (second pass; after Variable is created).
142  Variable applyImmediatelyAfterVariableCreation(Variable h) const;
143 
144  /// @}
145 };
146 
147 typedef std::vector<Variable> NewVariables_Scales_t;
148 /// \brief Used to specify a new variable with the collective createWithScales function.
149 struct IODA_DL NewVariable_Base : std::enable_shared_from_this<NewVariable_Base> {
150  /// Name of the variable.
151  std::string name_;
152  /// Type of the new dimension. Int, char, etc. Used if a type is not passed directly.
153  std::type_index dataType_;
154  /// Type of the new dimension. Used if a type is passed directly.
156  /// Dimension scales
158  /// Var creation params
160 
161  virtual ~NewVariable_Base() {}
162 
163  NewVariable_Base(const std::string& name, const Type& dataType,
164  const NewVariables_Scales_t& scales,
166  : name_(name), dataType_(typeid(void)), dataTypeKnown_(dataType),
167  scales_(scales),
168  vcp_(params) {}
169 
170  NewVariable_Base(const std::string& name, const std::type_index& dataType,
171  const NewVariables_Scales_t& scales,
173  : name_(name),
174  dataType_(dataType),
175  scales_(scales),
176  vcp_(params) {}
177 };
178 typedef std::vector<std::shared_ptr<NewVariable_Base>> NewVariables_t;
179 
180 template <class DataType>
181 inline std::shared_ptr<NewVariable_Base> NewVariable(
182  const std::string& name, const NewVariables_Scales_t& scales,
183  const VariableCreationParameters& params = VariableCreationParameters::defaulted<DataType>()) {
184  return std::make_shared<NewVariable_Base>(name, typeid(DataType), scales, params);
185 }
186 
187 inline std::shared_ptr<NewVariable_Base> NewVariable(const std::string& name,
188  const Type& DataType,
189  const NewVariables_Scales_t& scales,
192  return std::make_shared<NewVariable_Base>(name, DataType, scales, params);
193 }
194 
195 
196 namespace detail {
197 
198 /// \ingroup ioda_cxx_variable
200  // friend class Group_Base;
201 private:
202  /// Using an opaque object to implement the backend.
203  std::shared_ptr<Has_Variables_Backend> backend_;
204  /// Set by ObsGroup.
205  std::shared_ptr<const detail::DataLayoutPolicy> layout_;
206  std::vector<ComplementaryVariableCreationParameters> complementaryVariables_;
207  /// \brief FillValuePolicy helper
208  /// \details Hides the template function calls, so that the headers are smaller.
209  static void _py_fvp_helper(BasicTypes dataType, FillValuePolicy& fvp,
211 
212  ComplementaryVariableCreationParameters createDerivedVariableParameters(
213  const std::string &inputName, const std::string &outputName, size_t position);
214  std::vector<std::vector<std::string>> loadComponentVariableData(
215  const ComplementaryVariableCreationParameters& derivedVariableParams);
216 
217 protected:
218  Has_Variables_Base(std::shared_ptr<Has_Variables_Backend>,
219  std::shared_ptr<const DataLayoutPolicy> = nullptr);
220 
221 public:
223 
224  /// Set the mapping policy to determine the Layout of Variables stored under this Group.
225  /// Usually only set by ObsGroup when we create / open.
226  virtual void setLayout(std::shared_ptr<const detail::DataLayoutPolicy>);
227 
228  /// Query the backend and get the type provider.
229  virtual Type_Provider* getTypeProvider() const;
230 
231  /// \brief Get the fill value policy used for Variables within this Group
232  /// \details The backend has to be consulted for this operation. Storage of this policy is
233  /// backend-dependent.
234  virtual FillValuePolicy getFillValuePolicy() const;
235 
236  /// @name General Functions
237  /// @{
238  ///
239 
240  /// \brief Does a Variable with the specified name exist?
241  /// \param name is the name of the Variable that we are looking for.
242  /// \returns true if it exists.
243  /// \returns false otherwise.
244  virtual bool exists(const std::string& name) const;
245  /// \brief Delete an Attribute with the specified name.
246  /// \param attname is the name of the Variable that we are deleting.
247  /// \throws jedi::xError if no such attribute exists.
248  virtual void remove(const std::string& name);
249  /// \brief Open a Variable by name
250  /// \param name is the name of the Variable to be opened.
251  /// \returns An instance of a Variable that can be queried (with getDimensions()) and read.
252  virtual Variable open(const std::string& name) const;
253  /// \brief Open a Variable by name
254  /// \param name is the name of the Variable to be opened.
255  /// \returns An instance of a Variable that can be queried (with getDimensions()) and read.
256  inline Variable operator[](const std::string& name) const { return open(name); }
257 
258  /// List all Variables under this group (one-level search).
259  /// \see Group_Base::listObjects if you need to enumerate both Groups and Variables, or
260  /// if you need recursion.
261  virtual std::vector<std::string> list() const;
262  /// Convenience function to list all Variables under this group (one-level search).
263  /// \see Group_Base::listObjects if you need to enumerate both Groups and Variables, or
264  /// if you need recursion.
265  inline std::vector<std::string> operator()() const { return list(); }
266 
267  /// \brief Combines all complementary variables as specified in the mapping file, opens them,
268  /// and optionally removes the originals from the ObsGroup.
269  ///
270  /// \p removeOriginals determines if the original complementary variables should be removed from
271  /// the ObsGroup. Later functionality will ensure that the original complementary variables can
272  /// be recreated on writing back to the original file.
273  void stitchComplementaryVariables(bool removeOriginals = true);
274 
275  /// \brief Converts unit to SI for all eligible variables. If conversion function not defined,
276  /// stores unit as attribute.
277  ///
278  /// Makes the conversion if the variable's unit is defined in the mapping file and the unit conversion
279  /// is defined in UnitConversions.h.
280  void convertVariableUnits(std::ostream &out = std::cerr);
281 
282  /// \brief Create a Variable without setting its data.
283  /// \param attrname is the name of the Variable.
284  /// \param dimensions is a vector representing the size of the metadata.
285  /// Each element of the vector is a dimension with a certain size.
286  /// \param in_memory_datatype is the runtime description of the Attribute's data type.
287  /// \returns A Variable that can be written to.
288  virtual Variable create(const std::string& name, const Type& in_memory_dataType,
289  const std::vector<Dimensions_t>& dimensions = {1},
290  const std::vector<Dimensions_t>& max_dimensions = {},
291  const VariableCreationParameters& params = VariableCreationParameters());
292 
293  /// Python compatability function
294  /// \note Multiple ways to specify dimensions to match possible
295  /// Python function signatures.
296  Variable _create_py(const std::string& name, BasicTypes dataType,
297  const std::vector<Dimensions_t>& cur_dimensions = {1},
298  const std::vector<Dimensions_t>& max_dimensions = {},
299  const std::vector<Variable>& dimension_scales = {},
300  const VariableCreationParameters& params
301  = VariableCreationParameters());
302 
303  inline Variable create(const std::string& name, const Type& in_memory_dataType,
304  const ioda::Dimensions& dims,
306  return create(name, in_memory_dataType, dims.dimsCur, dims.dimsMax, params);
307  }
308 
309  /// \brief Create a Variable without setting its data.
310  /// \tparam DataType is the type of the data. I.e. float, int32_t, uint16_t, std::string, etc.
311  /// \param name is the name of the Variable.
312  /// \param dimensions is a vector representing the size of the metadata. Each element of the
313  /// vector is a dimension with a certain size.
314  /// \returns A Variable that can be written to.
315  template <class DataType>
316  Variable create(const std::string& name, const std::vector<Dimensions_t>& dimensions = {1},
317  const std::vector<Dimensions_t>& max_dimensions = {},
318  const VariableCreationParameters& params = VariableCreationParameters::defaulted<DataType>()) {
319  try {
321  FillValuePolicies::applyFillValuePolicy<DataType>(getFillValuePolicy(), params2.fillValue_);
322  Type in_memory_dataType = Types::GetType<DataType>(getTypeProvider());
323  auto var = create(name, in_memory_dataType, dimensions,
324  max_dimensions, params2);
325  return var;
326  } catch (...) {
327  std::throw_with_nested(Exception(ioda_Here()));
328  }
329  }
330 
331  template <class DataType>
332  Variable create(const std::string& name, const ioda::Dimensions& dims,
334  = VariableCreationParameters::defaulted<DataType>()) {
335  try {
337  FillValuePolicies::applyFillValuePolicy<DataType>(getFillValuePolicy(), params2.fillValue_);
338  return create<DataType>(name, dims.dimsCur, dims.dimsMax, params2);
339  } catch (...) {
340  std::throw_with_nested(Exception(ioda_Here()));
341  }
342  }
343 
344  /// \brief Convenience function to create a Variable from certain dimension scales.
345  /// \tparam DataType is the type of the data. I.e. int, int32_t, uint16_t, std::string, etc.
346  /// \param name is the name of the Variable.
347  /// \param dimensions is a vector representing the size of the metadata. Each element of the
348  /// vector is a dimension with a certain size.
349  /// \returns A Variable that can be written to.
350  template <class DataType>
351  Variable createWithScales(const std::string& name,
352  const std::vector<Variable>& dimension_scales,
354  = VariableCreationParameters::defaulted<DataType>()) {
355  try {
356  Type in_memory_dataType = Types::GetType<DataType>(getTypeProvider());
357 
358  NewVariables_t newvars{NewVariable(name, in_memory_dataType, dimension_scales, params)};
359  createWithScales(newvars);
360  return open(name);
361  } catch (...) {
362  std::throw_with_nested(Exception(ioda_Here()));
363  }
364  }
365 
366  /// @brief Collective function optimized to mass-construct variables and attach scales.
367  /// @param newvars is a vector of the new variables to be created.
368  /// @see NewVariable for the signature of the objects to add.
369  void createWithScales(const NewVariables_t& newvars);
370 
371  /// @}
372  /// @name Collective functions
373  /// @brief These functions apply the an operation to a *set* of variables in situations where
374  /// such an operation would produce better performance results than a loop of serial
375  /// function calls.
376  /// @{
377 
378  /// @brief Attach dimension scales to many Dimension Numbers in a set of Variables.
379  /// @param DimensionNumber
380  /// @param mapping is the scale mappings for each variable. The first part of the pair refers
381  /// to the variable that you are attaching scales to. The second part is a sequence of
382  /// scales that are attached along each dimension (indexed by the vector).
383  /// @details
384  /// For some backends, particularly HDF5, attaching a dimension scale to a variable is a slow
385  /// procedure when you have many variables. This function batches low-level calls and avoids
386  /// loops.
387  virtual void attachDimensionScales(
388  const std::vector<std::pair<Variable, std::vector<Variable>>>& mapping);
389 
390  /// @}
391 };
392 
394 protected:
396 
397 public:
399  FillValuePolicy getFillValuePolicy() const override;
400  void attachDimensionScales(
401  const std::vector<std::pair<Variable, std::vector<Variable>>>& mapping) override;
402 };
403 } // namespace detail
404 
405 /// \brief This class exists inside of ioda::Group and provides the interface to manipulating
406 /// Variables.
407 /// \ingroup ioda_cxx_variable
408 ///
409 /// \note It should only be constructed inside of a Group. It has no meaning elsewhere.
410 /// \see ioda::Variable for the class that represents individual variables.
411 /// \throws jedi::xError on all exceptions.
413 public:
414  virtual ~Has_Variables();
415  Has_Variables();
416  Has_Variables(std::shared_ptr<detail::Has_Variables_Backend>,
417  std::shared_ptr<const detail::DataLayoutPolicy> = nullptr);
418 };
419 } // namespace ioda
420 
421 /// @}
Flywheel creation of ioda::Attribute.
Convenience functions to work with Eigen objects.
IODA's error system.
Default fill values for ioda files.
Contains definitions for how data are arranged in ioda internally.
Utility functions and structs for combining multiple variables into one.
Interfaces for ioda::Type and related classes.
Interfaces for ioda::Variable and related classes.
Flywheel creation of ioda::Attribute objects.This is needed because you might want to make the same A...
The ioda exception class.
Definition: Exception.h:54
This class exists inside of ioda::Group and provides the interface to manipulating Variables.
virtual ~Has_Variables()
Represents the "type" (i.e. integer, string, float) of a piece of data.
Definition: Type.h:123
Variables store data!
Definition: Variable.h:680
Variable create(const std::string &name, const ioda::Dimensions &dims, const VariableCreationParameters &params=VariableCreationParameters::defaulted< DataType >())
std::shared_ptr< const detail::DataLayoutPolicy > layout_
Set by ObsGroup.
std::vector< std::string > operator()() const
Variable createWithScales(const std::string &name, const std::vector< Variable > &dimension_scales, const VariableCreationParameters &params=VariableCreationParameters::defaulted< DataType >())
Convenience function to create a Variable from certain dimension scales.
std::vector< ComplementaryVariableCreationParameters > complementaryVariables_
std::shared_ptr< Has_Variables_Backend > backend_
Using an opaque object to implement the backend.
Variable create(const std::string &name, const std::vector< Dimensions_t > &dimensions={1}, const std::vector< Dimensions_t > &max_dimensions={}, const VariableCreationParameters &params=VariableCreationParameters::defaulted< DataType >())
Create a Variable without setting its data.
Variable operator[](const std::string &name) const
Open a Variable by name.
Variable create(const std::string &name, const Type &in_memory_dataType, const ioda::Dimensions &dims, const VariableCreationParameters &params=VariableCreationParameters())
Backends implement type providers in conjunction with Attributes, Has_Attributes, Variables and Has_V...
Definition: Type_Provider.h:36
Common preprocessor definitions used throughout IODA.
#define IODA_DL
A preprocessor tag that indicates that a symbol is to be exported/imported.
Definition: defs.h:110
FillValuePolicy
This option describes the default fill values that will be used if the user does not manually specify...
Definition: FillPolicy.h:28
@ NETCDF4
Use NetCDF4 default fill values. This is the default option for ioda files.
bool Chunking_Max(const std::vector< Dimensions_t > &in, std::vector< Dimensions_t > &out)
Convenience function for setting default chunking parameters.
Definition: Has_Variables.h:49
std::vector< std::shared_ptr< NewVariable_Base > > NewVariables_t
std::vector< Variable > NewVariables_Scales_t
std::shared_ptr< NewVariable_Base > NewVariable(const std::string &name, const NewVariables_Scales_t &scales, const VariableCreationParameters &params=VariableCreationParameters::defaulted< DataType >())
BasicTypes
Definition: Type.h:37
#define ioda_Here()
Describes the dimensions of an Attribute or Variable.
Definition: Dimensions.h:22
std::vector< Dimensions_t > dimsCur
The dimensions of the data.
Definition: Dimensions.h:23
std::vector< Dimensions_t > dimsMax
This must always equal dimsCur for Attribute.
Definition: Dimensions.h:24
Used to specify a new variable with the collective createWithScales function.
std::string name_
Name of the variable.
VariableCreationParameters vcp_
Var creation params.
NewVariable_Base(const std::string &name, const Type &dataType, const NewVariables_Scales_t &scales, const VariableCreationParameters &params)
NewVariable_Base(const std::string &name, const std::type_index &dataType, const NewVariables_Scales_t &scales, const VariableCreationParameters &params)
Type dataTypeKnown_
Type of the new dimension. Used if a type is passed directly.
std::type_index dataType_
Type of the new dimension. Int, char, etc. Used if a type is not passed directly.
NewVariables_Scales_t scales_
Dimension scales.
Used to specify Variable creation-time properties.
Definition: Has_Variables.h:57
static VariableCreationParameters defaulted()
detail::python_bindings::VariableCreationFillValues< VariableCreationParameters > _py_setFillValue
Attribute_Creator_Store atts
Set any initial attributes here.
detail::FillValueData_t fillValue_
Definition: Has_Variables.h:66
std::vector< Dimensions_t > chunks
Manually specify the chunks. Never directly use. Use getChunks(...) instead.
Definition: Has_Variables.h:87
VariableCreationParameters & setFillValue(DataType fill)
Definition: Has_Variables.h:69
std::vector< std::pair< unsigned int, Variable > > dimsToAttach_
Definition: Has_Variables.h:59
static VariableCreationParameters defaults()
detail::FillValueData_t::FillValueUnion_t finalize() const
Finalize routine to make sure struct members are intact (e.g. for fill values)
VariableCreationParameters & unsetFillValue()
Definition: Has_Variables.h:73
std::vector< Dimensions_t > getChunks(const std::vector< Dimensions_t > &cur_dims) const
Definition: Has_Variables.h:93
bool chunk
Do we chunk this variable? Required for extendible / compressible Variables.
Definition: Has_Variables.h:84
Container used to store and manipulate fill values.
Definition: Fill.h:35
FillValueUnion_t finalize() const
Definition: Fill.cpp:16