IODA
Copying.cpp
Go to the documentation of this file.
1 /*
2  * (C) Copyright 2020 UCAR
3  *
4  * This software is licensed under the terms of the Apache Licence Version 2.0
5  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6  */
7 /// \file Copying.cpp
8 /// \brief Generic copying facility
9 
10 #include "ioda/Copying.h"
11 
12 namespace ioda {
15 ObjectSelection::ObjectSelection(const Group& g, bool recurse) : g_(g), recurse_(recurse) {}
16 
17 /*
18 void ObjectSelection::insert(const ObjectSelection&);
19 void ObjectSelection::insert(const Variable&);
20 void ObjectSelection::insert(const std::vector<Variable>&);
21 void ObjectSelection::insert(const Has_Variables&);
22 void ObjectSelection::insert(const Group&, bool recurse);
23 void ObjectSelection::insert(const Group&, const std::vector<std::string>&);
24 */
25 /*
26 template <class T> ObjectSelection add(
27  const ObjectSelection &src, const T& obj) {
28  ObjectSelection res = src;
29  res.insert(obj);
30  return res;
31 }
32 */
33 /*
34 ObjectSelection ObjectSelection::operator+(const ObjectSelection& obj) const { return add(*this,
35 obj); } ObjectSelection ObjectSelection::operator+(const Variable& obj) const { return add(*this,
36 obj); } ObjectSelection ObjectSelection::operator+(const std::vector<Variable>& obj) const { return
37 add(*this, obj); } ObjectSelection ObjectSelection::operator+(const Has_Variables& obj) const {
38 return add(*this, obj); }
39 */
40 
41 /*
42 template <class T> ObjectSelection& emplace(ObjectSelection& src, const T& obj) {
43  src.insert(obj);
44  return src;
45 }
46 */
47 
48 /*
49 template<> ObjectSelection& emplace<ObjectSelection>(ObjectSelection& src, const T& obj) {
50  if (&src == &obj) return src;
51  src.insert(obj);
52  return src;
53 }
54 ObjectSelection& ObjectSelection::operator+=(const ObjectSelection& obj) { return emplace(*this,
55 obj); } ObjectSelection& ObjectSelection::operator+=(const Variable& obj) { return emplace(*this,
56 obj); } ObjectSelection& ObjectSelection::operator+=(const std::vector<Variable>& obj) { return
57 emplace(*this, obj); } ObjectSelection& ObjectSelection::operator+=(const Has_Variables& obj) {
58 return emplace(*this, obj); }
59 */
60 
62 
63 void copy(const ObjectSelection& from, ObjectSelection& to, const ScaleMapping&) {
64  using namespace std;
65 
66  // We really want to maximize performance here and avoid excessive variable
67  // re-opens and closures that would kill the HDF5 backend.
68  // We want to:
69  // 1) separate the dimension scales from the regular variables.
70  // 2) determine the maximum size along the 0-th dimension.
71  // 3) determine which dimensions are attached to which variable axes.
72 
73  // Convenience lambda to hint if a variable is a scale.
74  auto isPossiblyScale = [](const std::string& name) -> bool {
75  return (std::string::npos == name.find('@')) && (std::string::npos == name.find('/')) ? true
76  : false;
77  };
78 
79  // We start with the names of all of the variables.
80  const std::vector<std::string> allVars = from.g_.listObjects<ObjectType::Variable>(true);
81 
82  size_t max_var_size_ = 0;
83  std::vector<std::string> var_list_, dim_var_list_;
84  var_list_.reserve(allVars.size());
85  dim_var_list_.reserve(allVars.size());
86  std::map<std::string, std::vector<std::string>> dims_attached_to_vars_;
87 
88  // In our processing loop, we want to ideally open each variable only once.
89  // But, the variables and dimension scales are mixed together. To give a "hint"
90  // for where to start, we can partially sort allVars. Our dimension scales all
91  // lack '@' and '/' in their path listings. "nlocs" should always come first,
92  // since this is the most frequently-occurring dimension.
93  std::list<std::string> sortedAllVars;
94  for (const auto& name : allVars) {
95  if (sortedAllVars.empty())
96  sortedAllVars.push_back(name);
97  else {
98  if (isPossiblyScale(name)) {
99  auto second = sortedAllVars.begin();
100  second++;
101  if (sortedAllVars.front() == "nlocs")
102  sortedAllVars.insert(second, name);
103  else
104  sortedAllVars.push_front(name);
105  } else
106  sortedAllVars.push_back(name);
107  }
108  }
109 
110  // Now for the main processing loop.
111  // We separate dimension scales from non-dimension scale variables.
112  // We record the maximum sizes of variables.
113  // We construct the in-memory mapping of dimension scales and variable axes.
114  // Keep track of these to avoid re-opening the scales repeatedly.
115  std::list<std::pair<std::string, Variable>> dimension_scales;
116 
117  auto group = from.g_;
118 
119  for (const auto& vname : sortedAllVars) {
120  Variable v = group.vars.open(vname);
121  const auto dims = v.getDimensions();
122  if (dims.dimensionality >= 1) {
123 #ifdef max
124 # undef max
125 #endif
126  max_var_size_ = std::max(max_var_size_, (size_t)dims.dimsCur[0]);
127  }
128  // Expensive function call.
129  // Only 1-D variables can be scales. Also pre-filter based on name.
130  if (dims.dimensionality == 1 && isPossiblyScale(vname)) {
131  if (v.isDimensionScale()) {
132  (vname == "nlocs") // true / false ternary
133  ? dimension_scales.push_front(std::make_pair(vname, v))
134  : dimension_scales.push_back(std::make_pair(vname, v));
135  dim_var_list_.push_back(vname);
136  continue; // Move on to next variable in the for loop.
137  }
138  }
139  // See above block. By this point in execution, we know that this variable
140  // is not a dimension scale.
141  var_list_.push_back(vname);
142  // Let's figure out which scales are attached to which dimensions.
143  auto attached_dimensions = v.getDimensionScaleMappings(dimension_scales);
144  std::vector<std::string> dimVarNames;
145  dimVarNames.reserve(dims.dimensionality);
146  for (const auto& dim_scales_along_axis : attached_dimensions) {
147  if (dim_scales_along_axis.empty())
148  throw jedi_throw.add("Reason",
149  "Bad dimension mapping. Not all dimension scales are known.");
150  dimVarNames.push_back(dim_scales_along_axis[0].first);
151  }
152  dims_attached_to_vars_.emplace(vname, dimVarNames);
153  }
154  // record lists of regular variables and dimension scale variables
155  // this->resetVarLists();
156  // this->resetVarDimMap();
157 
158  /*
159  // Find the dimension scales
160  const map<string, Variable> scale_id_var_from
161  = [](const Group& baseGrp)
162  {
163  map<string, Variable> res;
164  auto basevars = baseGrp.vars.list();
165  //std::cerr << "Finding dim scales. There are " << basevars.size() << " candidates." <<
166  std::endl; for (const auto& name : basevars) { auto v = baseGrp.vars.open(name); if
167  (v.isDimensionScale()) { string id = name;
168  //string id_scale = v.getDimensionScaleName();
169  //if (id_scale.size()) id = id_scale;
170  res[id] = v;
171  //std::cerr << "\tAdded scale var " << id << std::endl;
172  }
173  }
174  return res;
175  }(from.g_);
176 
177  //std::cerr << "Creating scales in destination." << std::endl;
178 
179  const map<string, Variable> scale_id_var_to
180  = [](Group& destGrp, const map<string, Variable>& srcScales)
181  {
182  map<string, Variable> res;
183  for (const auto& src : srcScales) {
184  //std::cerr << "\tCreating scale " << src.first << std::endl;
185  VariableCreationParameters params;
186  params.chunk = true;
187  params.chunks = src.second.getChunkSizes();
188  params.compressWithGZIP();
189  params.setIsDimensionScale(src.second.getDimensionScaleName());
190  // TODO: Propagate attributes
191  // TODO: Propagate fill value
192  BasicTypes typ = src.second.getBasicType();
193  if (typ == BasicTypes::undefined_)
194  throw jedi_throw.add("Reason", "Unrecognized basic type");
195  const auto dims = src.second.getDimensions();
196 
197  auto newscale = destGrp.vars._create_py(src.first, typ, dims.dimsCur,
198  dims.dimsMax, {}, params); res[src.first] = newscale;
199  }
200  return res;
201  }(to.g_, scale_id_var_from);
202 
203  // Iterate over all of the variables.
204  // Select those that are not dimension scales, and get a few properties.
205  //std::cerr << "Iterating over variables to copy." << std::endl;
206  auto basevars = from.g_.vars.list(); //std::vector<std::string>{ "datetime@MetaData"}; //
207  from.g_.vars.list(); for (const auto& name : basevars) { Variable v = from.g_.vars.open(name); if
208  (!v.isDimensionScale()) {
209  //std::cerr << "\t" << name << "\n"; //std::endl;
210  for (size_t i = 0; i < (size_t)v.getDimensions().dimensionality; ++i) {
211  for (const auto& scales_from : scale_id_var_from) {
212  if (v.isDimensionScaleAttached((unsigned)i, scales_from.second)) {
213  //std::cerr << "\t\t" << i << "\t" << scales_from.first <<
214  "\n"; // std::endl;
215  }
216  }
217 
218  }
219  }
220  }
221  */
222 }
223 } // namespace ioda
Generic copying facility.
Groups are a new implementation of ObsSpaces.
Definition: Group.h:159
Allows you to select objects for a copy operation.
Definition: Copying.h:40
Variables store data!
Definition: Variable.h:680
virtual std::map< ObjectType, std::vector< std::string > > listObjects(ObjectType filter=ObjectType::Ignored, bool recurse=false) const
List all objects (groups + variables) within this group.
Definition: Group.cpp:53
virtual bool isDimensionScale() const
Is this Variable used as a dimension scale?
Definition: Variable.cpp:251
virtual Dimensions getDimensions() const
Definition: Variable.cpp:160
virtual std::vector< std::vector< Named_Variable > > getDimensionScaleMappings(const std::list< Named_Variable > &scalesToQueryAgainst, bool firstOnly=true) const
Which dimensions are attached at which positions? This function may offer improved performance on som...
Definition: Variable.cpp:303
IODA_DL void copy(const ObjectSelection &from, ObjectSelection &to, const ScaleMapping &scale_map)
Generic data copying function.
Definition: Copying.cpp:63
Settings for how to remap dimension scales.
Definition: Copying.h:78
bool isPossiblyScale(const std::string &name)
Convenience lambda to hint if a variable might be a scale.
Definition: upgrade.cpp:51