IODA
HH-hasvariables.cpp
Go to the documentation of this file.
1 /*
2  * (C) Copyright 2017-2020 Ryan Honeyager (ryan@honeyager.info)
3  * (C) Copyright 2020-2021 UCAR
4  *
5  * This software is licensed under the terms of the Apache Licence Version 2.0
6  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
7  */
8 /*! \addtogroup ioda_internals_engines_hh
9  *
10  * @{
11  * \file HH-hasvariables.cpp
12  * \brief HDF5 engine implementation of Has_Variables.
13  */
14 
15 #include "./HH/HH-hasvariables.h"
16 
17 #include <hdf5_hl.h>
18 
19 #include <algorithm>
20 #include <numeric>
21 #include <set>
22 
23 #include "./HH/HH-Filters.h"
24 #include "./HH/HH-attributes.h"
25 #include "./HH/HH-hasattributes.h"
26 #include "./HH/HH-types.h"
27 #include "./HH/HH-util.h"
29 #include "./HH/HH-variables.h"
30 #include "./HH/Handles.h"
31 #include "ioda/Exception.h"
33 #include "ioda/Misc/Dimensions.h"
34 #include "ioda/Misc/StringFuncs.h"
35 
36 namespace ioda {
37 namespace detail {
38 namespace Engines {
39 namespace HH {
40 
41 
43 HH_HasVariables::HH_HasVariables() : base_(Handles::HH_hid_t::dummy()) {}
44 
46  : base_(grp), fileroot_(fileroot) {}
47 
50 }
51 
53  // Cheaply-reconstituted object.
54  Has_Attributes fratts(std::make_shared<HH_HasAttributes>(fileroot_));
55 
56  if (fratts.exists("_NCProperties"))
58  else if (fratts.exists("_ioda_layout"))
60  return FillValuePolicy::HDF5;
61 }
62 
63 bool HH_HasVariables::exists(const std::string& dsetname) const {
64  auto paths = splitPaths(dsetname);
65  for (size_t i = 0; i < paths.size(); ++i) {
66  auto p = condensePaths(paths, 0, i + 1);
67  htri_t linkExists = H5Lexists(base_(), p.c_str(), H5P_DEFAULT);
68  if (linkExists < 0) throw Exception("H5Lexists failed.", ioda_Here())
69  .add("here", getNameFromIdentifier(base_()))
70  .add("dsetname", dsetname);
71  if (linkExists == 0) return false;
72  }
73 #if H5_VERSION_GE(1, 12, 0)
74  H5O_info1_t oinfo;
75  herr_t err = H5Oget_info_by_name1(base_(), dsetname.c_str(), &oinfo,
76  H5P_DEFAULT); // H5P_DEFAULT only, per docs.
77 #else
78  H5O_info_t oinfo;
79  herr_t err = H5Oget_info_by_name(base_(), dsetname.c_str(), &oinfo,
80  H5P_DEFAULT); // H5P_DEFAULT only, per docs.
81 #endif
82  if (err < 0) throw Exception("H5Oget_info_by_name failed.", ioda_Here());
83  return (oinfo.type == H5O_type_t::H5O_TYPE_DATASET);
84 }
85 
86 void HH_HasVariables::remove(const std::string& name) {
87  auto ret = H5Ldelete(base_(), name.c_str(), H5P_DEFAULT);
88  if (ret < 0) throw Exception("Failed to remove link to dataset.", ioda_Here()).add("name", name);
89 }
90 
91 Variable HH_HasVariables::open(const std::string& name) const {
92  hid_t dsetid = H5Dopen(base_(), name.c_str(), H5P_DEFAULT);
93  if (dsetid < 0)
94  throw Exception("Cannot open dataset", ioda_Here()).add("name", name);
95 
96  auto b = std::make_shared<HH_Variable>(
97  HH_hid_t(dsetid, Handles::Closers::CloseHDF5Dataset::CloseP), shared_from_this());
98  Variable var{b};
99  return var;
100 }
101 
102 std::vector<std::string> HH_HasVariables::list() const {
103  std::vector<std::string> res;
104  H5G_info_t info;
105  herr_t e = H5Gget_info(base_(), &info);
106  if (e < 0) throw Exception("H5Gget_info failed.", ioda_Here());
107  res.reserve(gsl::narrow<size_t>(info.nlinks));
108  for (hsize_t i = 0; i < info.nlinks; ++i) {
109  // Get the name
110  ssize_t szName
111  = H5Lget_name_by_idx(base_(), ".", H5_INDEX_NAME, H5_ITER_NATIVE, i, NULL, 0, H5P_DEFAULT);
112  if (szName < 0) throw Exception("H5Lget_name_by_idx failed.", ioda_Here());
113  std::vector<char> vName(szName + 1, '\0');
114  if (H5Lget_name_by_idx(base_(), ".", H5_INDEX_NAME, H5_ITER_NATIVE,
115  i, vName.data(), szName + 1,H5P_DEFAULT) < 0)
116  throw Exception("H5Lget_name_by_idx failed.", ioda_Here());
117 
118  // Get the object and check the type
119 #if H5_VERSION_GE(1, 12, 0)
120  H5O_info1_t oinfo;
121  herr_t err = H5Oget_info_by_idx1(base_(), ".", H5_INDEX_NAME, H5_ITER_NATIVE,
122  i, &oinfo, H5P_DEFAULT);
123 #else
124  H5O_info_t oinfo;
125  herr_t err = H5Oget_info_by_idx(base_(), ".", H5_INDEX_NAME, H5_ITER_NATIVE,
126  i, &oinfo, H5P_DEFAULT);
127 #endif
128 
129 if (err < 0) continue;
130 if (oinfo.type == H5O_type_t::H5O_TYPE_DATASET) res.emplace_back(std::string(vName.data()));
131  }
132  return res;
133 }
134 
135 Variable HH_HasVariables::create(const std::string& name, const Type& in_memory_dataType,
136  const std::vector<Dimensions_t>& dimensions,
137  const std::vector<Dimensions_t>& max_dimensions,
139  try {
140  auto typeBackend = std::dynamic_pointer_cast<HH_Type>(in_memory_dataType.getBackend());
141 
142  // Dataset creation parameters and chunk sizes
143  VariableCreation hParams(params, dimensions, max_dimensions, typeBackend);
144 
145  hid_t dsetid
146  = H5Dcreate(base_(), // The group that holds the variable
147  name.c_str(), // The name of the variable. If forward slashes are found,
148  // intermediate groups get created.
149  typeBackend->handle(), // The data type of the variable (int, char, ...)
150  hParams.dataspace()(), // The data space of the variable (dimensions, max dims)
151  hParams.linkCreationPlist()(), // The link creation property list (create
152  // intermediate groups if necessary)
153  hParams.datasetCreationPlist()(), // The dataset creation property list
154  // (compression, chunking, fill value)
155  hParams.datasetAccessPlist()() // The dataset access property list (H5P_DEFAULT)
156  );
157  if (dsetid < 0) throw Exception("Variable creation failed.", ioda_Here())
158  .add("name", name);
160  // Note: this new variable gets handed back to Has_Variables_Base::create,
161  // which then calls params.applyImmediatelyAfterVariableCreation to link up
162  // dimension scales and set initial attributes.
163 
164  auto b = std::make_shared<HH_Variable>(res, shared_from_this());
165  Variable var{ b };
166 
167  // One last thing: if the fill value is set, then we need to add an attribute
168  // called "_FillValue" for NetCDF readers.
169  if (params.fillValue_.set_) {
170  Attribute fv = var.atts.create("_FillValue", in_memory_dataType);
171  const FillValueData_t::FillValueUnion_t fvdata = params.fillValue_.finalize();
172  fv.write(gsl::make_span<char>(
173  const_cast<char*>(reinterpret_cast<const char*>(&fvdata)),
174  sizeof(FillValueData_t::FillValueUnion_t)),in_memory_dataType);
175  }
176 
177  return var;
178  }
179  catch (std::bad_cast&) {
180  std::throw_with_nested(Exception("in_memory_dataType was constructed using the wrong backend.", ioda_Here()));
181  }
182 }
183 
184 
185 
187  const std::vector<std::pair<Variable, std::vector<Variable>>>& mapping) {
188  using std::map;
189  using std::make_pair;
190  using std::pair;
191  using std::shared_ptr;
192  using std::vector;
193 
194  // Forward mapping.
195  // Unravel mapping into something HDF5-specific. We also do not care about the "named"
196  // part of the Named_Variables.
197  vector<pair<shared_ptr<HH_Variable>, vector<shared_ptr<HH_Variable>>>> hmapping;
198  hmapping.reserve(mapping.size());
199  for (const auto& m : mapping) {
200  auto scaleVarBackend = std::dynamic_pointer_cast<HH_Variable>(m.first.get());
201  vector<shared_ptr<HH_Variable>> varsBackend;
202  varsBackend.reserve(m.second.size());
203  for (const auto& v : m.second) {
204  varsBackend.push_back(std::dynamic_pointer_cast<HH_Variable>(v.get()));
205  }
206  hmapping.emplace_back(make_pair(scaleVarBackend, varsBackend));
207  }
208 
209  // Construct a mapping of hid_t to variable references.
210  // We use this to accelerate lookups of HDF5 references that we are encoding into the
211  // scale attributes.
212 
213 
214  // Mapping of hid_t to variable references.
215  map<hid_t, ref_t> HIDtoVarRef;
216  auto EmplaceVarRef = [&HIDtoVarRef](const shared_ptr<HH_Variable> &v) -> void {
217  hobj_ref_t ref;
218  herr_t err = H5Rcreate(&ref, v->get()(), ".", H5R_OBJECT, -1);
219  if (err < 0) throw Exception("H5Rcreate failed.", ioda_Here());
220  HIDtoVarRef[v->get()()] = ref;
221  };
222  for (const auto& m : hmapping) {
223  EmplaceVarRef(m.first);
224  for (const auto& v : m.second) EmplaceVarRef(v);
225  }
226 
227  // Construct forward (var.atts referencing scales) and reverse (scale att referencing vars)
228  // mappings. We need to write both to the file.
229 
230  // Forward mapping of variables and object references.
231  // vector<vector<ref_t>> is dimension number, vector of scales.
232  vector<pair<shared_ptr<HH_Variable>, vector<vector<ref_t>>>> VarToScaleMap;
233  // Reverse mapping of <scale var address, pair<scale, vector<var variable ref>>>.
234  struct VarMapData {
235  shared_ptr<HH_Variable> scale;
236  vector<ds_list_t> vars;
237  };
238  map<haddr_t, VarMapData> ScaleToVarMap;
239 
240  for (auto& m : hmapping) {
241  shared_ptr<HH_Variable> v = m.first;
242  const vector<shared_ptr<HH_Variable>>& scales = m.second;
243  vector<vector<ref_t>> refScalesForVar(gsl::narrow<size_t>(v->getDimensions().dimensionality));
244  for (unsigned i = 0; i < gsl::narrow<unsigned>(scales.size()); ++i) {
245  if (i >= refScalesForVar.size()) {
246  // Indicates that there are more scales than variable dimensions, so user error.
247  throw Exception("There are more scales than variable dimensions.", ioda_Here());
248  }
249  hid_t scale_ht = scales[i]->get()();
250 #if H5_VERSION_GE(1, 12, 0)
251  H5O_info1_t info;
252 #else
253  H5O_info_t info;
254 #endif
255  if (H5Oget_info1(scale_ht, &info) < 0) throw Exception("H5Oget_info failed.", ioda_Here());
256 
257  // Forward mapping
258  refScalesForVar[i].emplace_back(HIDtoVarRef.at(scale_ht));
259  // Reverse mapping
260  if (!ScaleToVarMap.count(info.addr)) // Make new entry if it does not already exist.
261  {
262  VarMapData newdata;
263  newdata.scale = scales[i];
264  ScaleToVarMap.emplace(make_pair(info.addr, newdata));
265  }
266 
267  ScaleToVarMap[info.addr].vars.push_back(ds_list_t{HIDtoVarRef.at(v->get()()), i});
268  }
269  VarToScaleMap.emplace_back(make_pair(v, refScalesForVar));
270  }
271 
272  // Append VarToScaleMap and ScaleToVarMap to the DIMENSION_LIST and REFERENCE_LIST
273  // attributes of all datasets. If these attributes do not exist, create them.
274 
275  // Variables get DIMENSION_LISTs.
276  for (auto& var_scale : VarToScaleMap) {
277  auto& var = var_scale.first;
278  auto& scales = var_scale.second;
279 
280  attr_update_dimension_list(var.get(), scales);
281  }
282  // Scales get REFERENCE_LISTs.
283  for (auto& scale_var : ScaleToVarMap) {
284  auto& scale_addr = scale_var.first;
285  auto& scale = scale_var.second.scale;
286  auto& vars = scale_var.second.vars;
287 
288  attr_update_reference_list(scale.get(), vars);
289  }
290 }
291 
292 } // namespace HH
293 } // namespace Engines
294 } // namespace detail
295 } // namespace ioda
296 
297 /// @}
Convenience classes for constructing ObsSpaces and setting up new Dimension Scales.
Describe the dimensions of a ioda::Attribute or ioda::Variable.
IODA's error system.
HDF5 filters.
HDF5 engine implementation of Attribute.
HDF5 engine implementation of Has_Attributes.
HDF5 engine implementation of Has_Variables.
HDF5 engine implementation of ioda::detail::Type_Provider.
Utility functions for HDF5.
HDF5 engine variable creation parameters.
HDF5 engine implementation of Variable.
HDF5 resource handles in C++.
This class represents attributes, which may be attached to both Variables and Groups.
Definition: Attribute.h:493
The ioda exception class.
Definition: Exception.h:54
Exception & add(const std::string &key, const T value)
Add a key-value pair to the error message.
Definition: Exception.h:75
This class exists inside of ioda::Group or ioda::Variable and provides the interface to manipulating ...
Represents the "type" (i.e. integer, string, float) of a piece of data.
Definition: Type.h:123
Variables store data!
Definition: Variable.h:680
virtual Attribute_Implementation write(gsl::span< char > data, const Type &type)
The fundamental write function. Backends overload this function to implement all write operations.
Definition: Attribute.cpp:65
Variable create(const std::string &name, const Type &in_memory_dataType, const std::vector< Dimensions_t > &dimensions={1}, const std::vector< Dimensions_t > &max_dimensions={}, const VariableCreationParameters &params=VariableCreationParameters()) final
Create a Variable without setting its data.
bool exists(const std::string &name) const final
Does a Variable with the specified name exist?
std::vector< std::string > list() const final
FillValuePolicy getFillValuePolicy() const final
Get the fill value policy used for Variables within this Group.
detail::Type_Provider * getTypeProvider() const final
Query the backend and get the type provider.
Variable open(const std::string &name) const final
Open a Variable by name.
void attachDimensionScales(const std::vector< std::pair< Variable, std::vector< Variable >>> &mapping) final
HDF5-optimized collective variable version of attachDimensionScales.
void remove(const std::string &name) final
Delete an Attribute with the specified name.
static HH_Type_Provider * instance()
Definition: HH-types.cpp:36
A class to wrap HDF5's hid_t resource handles.
Definition: Handles.h:92
This encapsulates dataset creation parameters. Used for generating HDF5 property lists for variable c...
static HH_hid_t datasetAccessPlist()
The default dataset access property list. Currently a nullop.
HH_hid_t datasetCreationPlist() const
Generates a dataset creation property list, which encodes the chunking options, compression,...
static HH_hid_t linkCreationPlist()
The ioda-default link creation property list. @detail This just sets a property to create missing int...
HH_hid_t dataspace() const
Generate a dataspace for the constructor-provided dimensions and max dimensions.
virtual bool exists(const std::string &attname) const
Does an Attribute with the specified name exist?
std::shared_ptr< Type_Backend > getBackend() const
Definition: Type.h:103
Backends implement type providers in conjunction with Attributes, Has_Attributes, Variables and Has_V...
Definition: Type_Provider.h:36
IODA_DL std::string condensePaths(const std::vector< std::string > &p, size_t start=0, size_t end=std::string::npos)
The inverse of splitPaths. Concatenate strings, separating with '/'.
Definition: StringFuncs.cpp:41
IODA_DL std::vector< std::string > splitPaths(const std::string &p)
Split a string based on occurances of the '/' character.
Definition: StringFuncs.cpp:14
FillValuePolicy
This option describes the default fill values that will be used if the user does not manually specify...
Definition: FillPolicy.h:28
@ HDF5
Set all fill values to zero or null strings.
@ NETCDF4
Use NetCDF4 default fill values. This is the default option for ioda files.
IODA_HIDDEN std::string getNameFromIdentifier(hid_t obj_id)
Gets a variable / group / link name from an id. Useful for debugging.
Definition: HH-util.cpp:263
IODA_HIDDEN void attr_update_reference_list(HH_Variable *scale, const std::vector< ds_list_t > &ref_var_axis)
Attribute REFERENCE_LIST update function.
Definition: HH-util.cpp:230
IODA_HIDDEN void attr_update_dimension_list(HH_Variable *var, const std::vector< std::vector< ref_t >> &new_dim_list)
Attribute DIMENSION_LIST update function.
Definition: HH-util.cpp:126
#define ioda_Here()
Used to specify Variable creation-time properties.
Definition: Has_Variables.h:57
Duplicate the HDF5 dataset list structure for REFERENCE_LISTs.
Definition: HH-util.h:42