IODA
IodaUtils.cc
Go to the documentation of this file.
1 /*
2  * (C) Copyright 2018-2019 UCAR
3  *
4  * This software is licensed under the terms of the Apache Licence Version 2.0
5  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6  */
7 
8 #include <fstream>
9 #include <iomanip>
10 
11 #include "ioda/core/IodaUtils.h"
12 #include "ioda/ObsSpaceParameters.h"
13 
14 #include "oops/util/DateTime.h"
15 
16 namespace ioda {
17 
18 // -----------------------------------------------------------------------------
19 
20 std::vector<std::size_t> CharShapeFromStringVector(
21  const std::vector<std::string> & StringVector) {
22  std::size_t MaxStrLen = 0;
23  for (std::size_t i = 0; i < StringVector.size(); i++) {
24  std::size_t StrSize = StringVector[i].size();
25  if (StrSize > MaxStrLen) {
26  MaxStrLen = StrSize;
27  }
28  }
29 
30  std::vector<std::size_t> Shape{ StringVector.size(), MaxStrLen };
31  return Shape;
32 }
33 
34 // -----------------------------------------------------------------------------
35 
36 std::vector<std::string> CharArrayToStringVector(const char * CharData,
37  const std::vector<std::size_t> & CharShape) {
38  // CharShape[0] is the number of strings
39  // CharShape[1] is the length of each string
40  std::size_t Nstrings = CharShape[0];
41  std::size_t StrLength = CharShape[1];
42 
43  std::vector<std::string> StringVector(Nstrings, "");
44  for (std::size_t i = 0; i < Nstrings; i++) {
45  // Copy characters for i-th string into a char vector
46  std::vector<char> CharVector(StrLength, ' ');
47  for (std::size_t j = 0; j < StrLength; j++) {
48  CharVector[j] = CharData[(i*StrLength) + j];
49  }
50 
51  // Convert the char vector to a single string. Any trailing white space will be
52  // included in the string, so strip off the trailing white space.
53  //
54  // In order to include null characters in the white space list, the (char *, size_t)
55  // form of the string constructor needs to be used. The size_t (2nd) argument says
56  // how many characters to use from the "buffer" (1st argument). If the (char *) form
57  // of the string constructor is use, the null character terminates the string and only
58  // those characters leading up to the null are used.
59  std::string WhiteSpace(" \t\n\r\f\v\0", 7);
60  std::string String(CharVector.begin(), CharVector.end());
61  String.erase(String.find_last_not_of(WhiteSpace) + 1, std::string::npos);
62  StringVector[i] = String;
63  }
64 
65  return StringVector;
66 }
67 
68 // -----------------------------------------------------------------------------
69 
70 void StringVectorToCharArray(const std::vector<std::string> & StringVector,
71  const std::vector<std::size_t> & CharShape, char * CharData) {
72  // CharShape[0] is the number of strings, and CharShape[1] is the maximum
73  // string lenghth. Walk through the string vector, copy the string and fill
74  // with white space at the ends of strings if necessary.
75  for (std::size_t i = 0; i < CharShape[0]; i++) {
76  for (std::size_t j = 0; j < CharShape[1]; j++) {
77  std::size_t ichar = (i * CharShape[1]) + j;
78  if (j < StringVector[i].size()) {
79  CharData[ichar] = StringVector[i].data()[j];
80  } else {
81  CharData[ichar] = ' ';
82  }
83  }
84  }
85 }
86 
87 // -----------------------------------------------------------------------------
88 
89 std::string TypeIdName(const std::type_info & TypeId) {
90  std::string TypeName;
91  if (TypeId == typeid(int)) {
92  TypeName = "integer";
93  } else if (TypeId == typeid(float)) {
94  TypeName = "float";
95  } else if (TypeId == typeid(double)) {
96  TypeName = "double";
97  } else if (TypeId == typeid(std::string)) {
98  TypeName = "string";
99  } else if (TypeId == typeid(util::DateTime)) {
100  TypeName = "DateTime";
101  } else {
102  TypeName = TypeId.name();
103  }
104 
105  return TypeName;
106 }
107 
108 // -----------------------------------------------------------------------------
109 std::size_t FindMaxStringLength(const std::vector<std::string> & StringVector) {
110  std::size_t MaxStringLength = 0;
111  for (std::size_t i = 0; i < StringVector.size(); ++i) {
112  if (StringVector[i].size() > MaxStringLength) {
113  MaxStringLength = StringVector[i].size();
114  }
115  }
116  return MaxStringLength;
117 }
118 
119 // -----------------------------------------------------------------------------
120 std::string fullVarName(const std::string & groupName, const std::string & varName) {
121  return groupName + std::string("/") + varName;
122 }
123 
124 // -----------------------------------------------------------------------------
125 void collectVarDimInfo(const ObsGroup & obsGroup, VarNameObjectList & varObjectList,
126  VarNameObjectList & dimVarObjectList, VarDimMap & dimsAttachedToVars,
127  Dimensions_t & maxVarSize0) {
128  // We really want to maximize performance here and avoid excessive variable
129  // re-opens and closures that would kill the HDF5 backend.
130  // We want to:
131  // 1) separate the dimension scales from the regular variables.
132  // 2) determine the maximum size along the 0-th dimension.
133  // 3) determine which dimensions are attached to which variable axes.
134 
135  // Convenience lambda to hint if a variable is a scale. This is not definitive,
136  // but has a high likelihood of being correct. The idea is that all variables
137  // will have either a "@" or "/" in their names, whereas dimension scales
138  // will not. This lambda returns true if the name has neither "@" nor "/" in
139  // its value.
140  auto isPossiblyScale = [](const std::string& name) -> bool {
141  return (std::string::npos == name.find('@')) &&
142  (std::string::npos == name.find('/')) ? true : false;
143  };
144 
145  // Retrieve all variable names from the input file. Argument to listObjects is bool
146  // and when true will cause listObjects to recurse through the entire Group hierarchy.
147  std::vector<std::string> allVars = obsGroup.listObjects<ObjectType::Variable>(true);
148 
149  // Create a list that will help optimize the actual processing (below). In this
150  // list place "nlocs" first (since it is by far the most commonly occurring
151  // dimension scale), and try to follow that by the remain dimension scales and finally
152  // the variables using those dimension scales. Use the convenience lambda above for
153  // "identifying" dimension scales.
154  std::list<std::string> sortedAllVars;
155  for (const auto& name : allVars) {
156  if (sortedAllVars.empty()) {
157  sortedAllVars.push_back(name);
158  } else {
159  if (isPossiblyScale(name)) {
160  auto second = sortedAllVars.begin();
161  second++;
162  if (sortedAllVars.front() == "nlocs") {
163  sortedAllVars.insert(second, name);
164  } else {
165  sortedAllVars.push_front(name);
166  }
167  } else {
168  sortedAllVars.push_back(name);
169  }
170  }
171  }
172 
173  // Now for the main processing loop.
174  // We separate dimension scales from non-dimension scale variables.
175  // We record the maximum sizes of variables.
176  // We construct the in-memory mapping of dimension scales and variable axes.
177  // Keep track of these to avoid re-opening the scales repeatedly.
178  std::list<ioda::Named_Variable> dimension_scales;
179 
180  varObjectList.reserve(allVars.size());
181  dimVarObjectList.reserve(allVars.size());
182  maxVarSize0 = 0;
183  for (const auto& vname : sortedAllVars) {
184  Variable v = obsGroup.vars.open(vname);
185  const auto dims = v.getDimensions();
186  if (dims.dimensionality >= 1) {
187  maxVarSize0 = std::max(maxVarSize0, dims.dimsCur[0]);
188  }
189 
190  // Expensive function call.
191  // Only 1-D variables can be scales. Also pre-filter based on name.
192  if (dims.dimensionality == 1 && isPossiblyScale(vname)) {
193  if (v.isDimensionScale()) {
194  (vname == "nlocs") // true / false ternary
195  ? dimension_scales.push_front(ioda::Named_Variable(vname, v))
196  : dimension_scales.push_back(ioda::Named_Variable(vname, v));
197  dimVarObjectList.push_back(std::make_pair(vname, v));
198  continue; // Move on to next variable in the for loop.
199  }
200  }
201 
202  // See above block. By this point in execution, we know that this variable
203  // is not a dimension scale.
204  varObjectList.push_back(std::make_pair(vname, v));
205 
206  // Let's figure out which scales are attached to which dimensions.
207  auto attached_dimensions = v.getDimensionScaleMappings(dimension_scales);
208  std::vector<std::string> dimVarNames;
209  dimVarNames.reserve(dims.dimensionality);
210  for (const auto& dim_scales_along_axis : attached_dimensions) {
211  if (dim_scales_along_axis.empty()) {
212  throw;
213  }
214  dimVarNames.push_back(dim_scales_along_axis[0].name);
215  }
216  dimsAttachedToVars.emplace(vname, dimVarNames);
217  }
218 }
219 
220 //------------------------------------------------------------------------------------
221 std::type_index varDtype(const Group & group, const std::string & varName) {
222  Variable var = group.vars.open(varName);
223  std::type_index varType(typeid(std::string));
224  if (var.isA<int>()) {
225  varType = typeid(int);
226  } else if (var.isA<float>()) {
227  varType = typeid(float);
228  }
229  return varType;
230 }
231 
232 //------------------------------------------------------------------------------------
233 bool varIsDimScale(const Group & group, const std::string & varName) {
234  Variable var = group.vars.open(varName);
235  return var.isDimensionScale();
236 }
237 
238 //------------------------------------------------------------------------------------
239 std::vector<util::DateTime> convertDtStringsToDtime(const std::vector<std::string> & dtStrings) {
240  // Convert ISO 8601 strings directly to DateTime objects
241  std::size_t dtimeSize = dtStrings.size();
242  std::vector<util::DateTime> dateTimeValues(dtimeSize);
243  for (std::size_t i = 0; i < dtimeSize; ++i) {
244  util::DateTime dateTime(dtStrings[i]);
245  dateTimeValues[i] = dateTime;
246  }
247  return dateTimeValues;
248 }
249 
250 //------------------------------------------------------------------------------------
251 std::vector<util::DateTime> convertRefOffsetToDtime(const int refIntDtime,
252  const std::vector<float> & timeOffsets) {
253  // convert refDtime to a DateTime object
254  int Year = refIntDtime / 1000000;
255  int TempInt = refIntDtime % 1000000;
256  int Month = TempInt / 10000;
257  TempInt = TempInt % 10000;
258  int Day = TempInt / 100;
259  int Hour = TempInt % 100;
260  util::DateTime refDtime(Year, Month, Day, Hour, 0, 0);
261 
262  // Convert offset time to a Duration and add to RefDate.
263  std::size_t dtimeSize = timeOffsets.size();
264  std::vector<util::DateTime> dateTimeValues(dtimeSize);
265  for (std::size_t i = 0; i < dtimeSize; ++i) {
266  util::DateTime dateTime =
267  refDtime + util::Duration(round(timeOffsets[i] * 3600));
268  dateTimeValues[i] = dateTime;
269  }
270  return dateTimeValues;
271 }
272 
273 //------------------------------------------------------------------------------------
274 std::vector<std::string> StringArrayToStringVector(
275  const std::vector<std::string> & arrayData,
276  const std::vector<Dimensions_t> & arrayShape) {
277  // arrayShape[0] is the number of strings
278  // arrayShape[1] is the length of each string
279  std::size_t nstrings = arrayShape[0];
280  std::size_t strLength = arrayShape[1];
281 
282  //
283  std::vector<std::string> stringVector(nstrings, "");
284  for (std::size_t i = 0; i < nstrings; i++) {
285  std::string oneString = "";
286  for (std::size_t j = 0; j < strLength; j++) {
287  oneString += arrayData[(i*strLength) + j];
288  }
289 
290  // Strip off trainling whitespace.
291  //
292  // In order to include null characters in the white space list, the (char *, size_t)
293  // form of the string constructor needs to be used. The size_t (2nd) argument says
294  // how many characters to use from the "buffer" (1st argument). If the (char *) form
295  // of the string constructor is use, the null character terminates the string and only
296  // those characters leading up to the null are used.
297  std::string WhiteSpace(" \t\n\r\f\v\0", 7);
298  oneString.erase(oneString.find_last_not_of(WhiteSpace) + 1, std::string::npos);
299  stringVector[i] = oneString;
300  }
301 
302  return stringVector;
303 }
304 
305 // -----------------------------------------------------------------------------
306 void setOfileParamsFromTestConfig(const eckit::LocalConfiguration & obsConfig,
307  ioda::ObsSpaceParameters & obsParams) {
308  // Get dimensions and variables sub configurations
309  std::vector<eckit::LocalConfiguration> writeDimConfigs =
310  obsConfig.getSubConfigurations("write dimensions");
311  std::vector<eckit::LocalConfiguration> writeVarConfigs =
312  obsConfig.getSubConfigurations("write variables");
313 
314  // Add the dimensions scales to the ObsIo parameters
315  std::map<std::string, Dimensions_t> dimSizes;
316  for (std::size_t i = 0; i < writeDimConfigs.size(); ++i) {
317  std::string dimName = writeDimConfigs[i].getString("name");
318  Dimensions_t dimSize = writeDimConfigs[i].getInt("size");
319  bool isUnlimited = writeDimConfigs[i].getBool("unlimited", false);
320 
321  if (isUnlimited) {
322  obsParams.setDimScale(dimName, dimSize, Unlimited, dimSize);
323  } else {
324  obsParams.setDimScale(dimName, dimSize, dimSize, dimSize);
325  }
326  dimSizes.insert(std::pair<std::string, Dimensions_t>(dimName, dimSize));
327  }
328 
329  // Add the maximum variable size to the ObsIo parmeters
330  Dimensions_t maxVarSize = 0;
331  for (std::size_t i = 0; i < writeVarConfigs.size(); ++i) {
332  std::vector<std::string> dimNames = writeVarConfigs[i].getStringVector("dims");
333  Dimensions_t varSize0 = dimSizes.at(dimNames[0]);
334  if (varSize0 > maxVarSize) {
335  maxVarSize = varSize0;
336  }
337  }
338  obsParams.setMaxVarSize(maxVarSize);
339 }
340 
341 
342 // -----------------------------------------------------------------------------
343 std::string uniquifyFileName(const std::string & fileName, const std::size_t rankNum,
344  const int timeRankNum) {
345  // Attach the rank number to the output file name to avoid collisions when running
346  // with multiple MPI tasks.
347  std::string uniqueFileName = fileName;
348 
349  // Find the left-most dot in the file name, and use that to pick off the file name
350  // and file extension.
351  std::size_t found = uniqueFileName.find_last_of(".");
352  if (found == std::string::npos)
353  found = uniqueFileName.length();
354 
355  // Get the process rank number and format it
356  std::ostringstream ss;
357  ss << "_" << std::setw(4) << std::setfill('0') << rankNum;
358  if (timeRankNum >= 0) ss << "_" << timeRankNum;
359 
360  // Construct the output file name
361  return uniqueFileName.insert(found, ss.str());
362 }
363 
364 // -----------------------------------------------------------------------------
365 std::string convertNewVnameToOldVname(const std::string & varName) {
366  // New format is "Group/Variable", old format is "Variable@Group"
367  std::string oldFormat;
368  std::size_t pos = varName.find("/");
369  if (pos == std::string::npos) {
370  // no slash, just return the input string as is
371  oldFormat = varName;
372  } else {
373  std::string gname = varName.substr(0, pos);
374  std::string vname = varName.substr(pos + 1);
375  oldFormat = vname + std::string("@") + gname;
376  }
377  return oldFormat;
378 }
379 
380 // -----------------------------------------------------------------------------
381 } // namespace ioda
Groups are a new implementation of ObsSpaces.
Definition: Group.h:159
An ObsGroup is a specialization of a ioda::Group. It provides convenience functions and guarantees th...
Definition: ObsGroup.h:32
void setMaxVarSize(const Dimensions_t maxVarSize)
set the maximum variable size
void setDimScale(const std::string &dimName, const Dimensions_t curSize, const Dimensions_t maxSize, const Dimensions_t chunkSize)
set a new dimension scale
Variables store data!
Definition: Variable.h:680
Has_Variables vars
Use this to access variables.
Definition: Group.h:123
virtual std::map< ObjectType, std::vector< std::string > > listObjects(ObjectType filter=ObjectType::Ignored, bool recurse=false) const
List all objects (groups + variables) within this group.
Definition: Group.cpp:53
virtual Variable open(const std::string &name) const
Open a Variable by name.
bool isA() const
Convenience function to check a Variable's storage type.
Definition: Variable.h:99
virtual bool isDimensionScale() const
Is this Variable used as a dimension scale?
Definition: Variable.cpp:251
virtual Dimensions getDimensions() const
Definition: Variable.cpp:160
virtual std::vector< std::vector< Named_Variable > > getDimensionScaleMappings(const std::list< Named_Variable > &scalesToQueryAgainst, bool firstOnly=true) const
Which dimensions are attached at which positions? This function may offer improved performance on som...
Definition: Variable.cpp:303
void StringVectorToCharArray(const std::vector< std::string > &StringVector, const std::vector< std::size_t > &CharShape, char *CharData)
Definition: IodaUtils.cc:70
constexpr int Unlimited
Specifies that a dimension is resizable to infinity.
std::vector< std::size_t > CharShapeFromStringVector(const std::vector< std::string > &StringVector)
Definition: IodaUtils.cc:20
std::vector< std::pair< std::string, Variable > > VarNameObjectList
typedef for holding list of variable names with associated variable object
Definition: IodaUtils.h:30
std::string fullVarName(const std::string &groupName, const std::string &varName)
form full variable name given individual group and variable names
Definition: IodaUtils.cc:120
std::map< std::string, std::vector< std::string > > VarDimMap
typedef for holding dim names attached to variables
Definition: IodaUtils.h:36
std::type_index varDtype(const Group &group, const std::string &varName)
get variable data type
Definition: IodaUtils.cc:221
std::vector< std::string > CharArrayToStringVector(const char *CharData, const std::vector< std::size_t > &CharShape)
Definition: IodaUtils.cc:36
std::string uniquifyFileName(const std::string &fileName, const std::size_t rankNum, const int timeRankNum)
uniquify the output file name
Definition: IodaUtils.cc:343
void collectVarDimInfo(const ObsGroup &obsGroup, VarNameObjectList &varObjectList, VarNameObjectList &dimVarObjectList, VarDimMap &dimsAttachedToVars, Dimensions_t &maxVarSize0)
collect variable and dimension information from a ioda ObsGroup
Definition: IodaUtils.cc:125
std::vector< util::DateTime > convertRefOffsetToDtime(const int refIntDtime, const std::vector< float > &timeOffsets)
convert reference, time to DateTime object
Definition: IodaUtils.cc:251
bool varIsDimScale(const Group &group, const std::string &varName)
true if variable is a dimension scale
Definition: IodaUtils.cc:233
std::size_t FindMaxStringLength(const std::vector< std::string > &StringVector)
Definition: IodaUtils.cc:109
std::string TypeIdName(const std::type_info &TypeId)
Definition: IodaUtils.cc:89
void setOfileParamsFromTestConfig(const eckit::LocalConfiguration &obsConfig, ioda::ObsSpaceParameters &obsParams)
set params for output file construction from test YAML configuration
Definition: IodaUtils.cc:306
std::vector< util::DateTime > convertDtStringsToDtime(const std::vector< std::string > &dtStrings)
convert datetime strings to DateTime object
Definition: IodaUtils.cc:239
std::string convertNewVnameToOldVname(const std::string &varName)
convert the new format varible name to the old format
Definition: IodaUtils.cc:365
std::vector< std::string > StringArrayToStringVector(const std::vector< std::string > &arrayData, const std::vector< Dimensions_t > &arrayShape)
convert 2D string array to a vector of strings
Definition: IodaUtils.cc:274
A named pair of (variable_name, ioda::Variable).
Definition: Variable.h:752
bool isPossiblyScale(const std::string &name)
Convenience lambda to hint if a variable might be a scale.
Definition: upgrade.cpp:51