53 return (std::string::npos ==
name.find(
'@'))
54 && (std::string::npos ==
name.find(
'/')) ?
true :
false;
62 std::list<std::string> sortedAllVars;
63 for (
const auto&
name : allVars) {
64 if (sortedAllVars.empty()) {
65 sortedAllVars.push_back(
name);
68 auto second = sortedAllVars.begin();
70 if (sortedAllVars.front() ==
"nlocs") {
71 sortedAllVars.insert(second,
name);
73 sortedAllVars.push_front(
name);
76 sortedAllVars.push_back(
name);
86 typedef std::map<ioda::Named_Variable, Vec_Named_Variable>
VarDimMap;
97 ioda::Dimensions_t& maxVarSize0) {
108 std::vector<std::string> allVars = obsGroup.
listObjects<ObjectType::Variable>(
true);
116 if (obsGroup.
list().empty()) {
117 std::list<std::string> fix_known_scales, fix_known_nonscales;
118 for (
const auto& vname : sortedAllVars) {
120 if (v.var.isDimensionScale()) {
122 ? fix_known_scales.push_front(v.name)
123 : fix_known_scales.push_back(v.name);
125 fix_known_nonscales.push_back(v.name);
127 sortedAllVars.clear();
128 for (
const auto& e : fix_known_scales) sortedAllVars.push_back(e);
129 for (
const auto& e : fix_known_nonscales) sortedAllVars.push_back(e);
137 std::list<Named_Variable> dimension_scales;
139 varList.reserve(allVars.size());
140 dimVarList.reserve(allVars.size());
142 for (
const auto& vname : sortedAllVars) {
145 if (dims.dimensionality >= 1) {
146 maxVarSize0 = std::max(maxVarSize0, dims.dimsCur[0]);
152 if (v.var.isDimensionScale()) {
154 ? dimension_scales.push_front(v)
155 : dimension_scales.push_back(v);
156 dimVarList.push_back(v);
165 varList.push_back(v);
168 auto attached_dimensions = v.var.getDimensionScaleMappings(dimension_scales);
169 std::vector<Named_Variable> dimVars;
170 dimVars.reserve(dims.dimensionality);
171 for (
const auto& dim_scales_along_axis : attached_dimensions) {
172 if (dim_scales_along_axis.empty()) {
175 dimVars.push_back(dim_scales_along_axis[0]);
180 dimsAttachedToVars.emplace(v, dimVars);
196 auto pos =
name.find_last_of(
"_");
197 if (pos != std::string::npos) {
199 if ((pos + 1) <
name.length()) {
200 std::string testSuffix =
name.substr(pos + 1);
201 if (testSuffix.find_first_not_of(
"0123456789") != std::string::npos) {
203 pos = std::string::npos;
216 using namespace ioda;
218 dissimilarVariables.reserve(inVarList.size());
224 sort(sortedNames.begin(), sortedNames.end());
228 auto varsAreSimilar = [](
const std::string& lhs,
const std::string& rhs) ->
bool {
231 if ((lhs.find(
"MetaData/") != string::npos) || (rhs.find(
"MetaData/") != string::npos)) {
236 if (lhs.find_first_of(
"0123456789") == string::npos
237 && rhs.find_first_of(
"0123456789") == string::npos)
241 return id_lhs == id_rhs;
248 auto collect = [&dissimilarVariables, &similarVariables](Vec_Named_Variable::const_iterator start, Vec_Named_Variable::const_iterator end) {
251 if (start->name.find(
"MetaData/") != string::npos ||
254 cout <<
" Unique variable: " << start->name <<
".\n";
255 dissimilarVariables.push_back(*start);
260 cout <<
" Grouping 1 variable into: " << rangeName <<
".\n";
270 sort(range.begin(), range.end(),
272 string sidnum_lhs = lhs.name.substr(getChanSuffixPos(lhs.name) + 1);
273 string sidnum_rhs = rhs.name.substr(getChanSuffixPos(rhs.name) + 1);
274 int idnum_lhs = std::atoi(sidnum_lhs.c_str());
275 int idnum_rhs = std::atoi(sidnum_rhs.c_str());
276 return idnum_lhs < idnum_rhs;
282 cout <<
" Grouping " << range.size() <<
" variables into: " << rangeName <<
".\n";
287 auto rangeStart = sortedNames.cbegin();
288 auto rangeEnd = rangeStart;
289 for (
auto it = sortedNames.cbegin() + 1; it != sortedNames.cend(); ++it) {
290 if (varsAreSimilar(rangeStart->name, it->name)) {
293 collect(rangeStart, rangeEnd);
299 if ((it + 1) == sortedNames.cend()) {
300 collect(rangeStart, rangeEnd);
313 VarDimMap translateToNewDims(
314 const Vec_Named_Variable& newDimList,
315 const VarDimMap& oldDimsAttachedToVars) {
316 std::map<std::string, ioda::Named_Variable> newDims;
317 for (const auto& d : newDimList) newDims[d.name] = d;
321 for (const auto& oldVar : oldDimsAttachedToVars) {
322 const auto& oldVec = oldVar.second;
323 Vec_Named_Variable newVec(oldVec);
324 for (auto& s : newVec) s.var = newDims.at(s.name).var;
325 res[ioda::Named_Variable(oldVar.first.name, ioda::Variable())] = newVec;
338 const std::string & newVarName,
const std::map<int, int> & chanNumToIndex) {
339 using namespace ioda;
348 for (
size_t i = 0; i < old.size(); ++i) {
354 if (oldvar.
isA<std::string>()) {
357 vector<string> buf_in;
358 oldvar.
read<
string>(buf_in);
360 newvar.
write<
string>(buf_in);
363 vector<string> buf_out;
364 buf_out.reserve(gsl::narrow<size_t>(newvar_dims.
numElements));
368 vector<char> new_str(group_sz + 1,
'\0');
369 for (
size_t i = 0; i < buf_in.size(); ++i) {
370 size_t idx = i % group_sz;
371 new_str[idx] = buf_in[i][0];
373 string str(new_str.data());
375 str.erase(std::find_if(str.rbegin(), str.rend(),
376 [](
unsigned char ch) { return !std::isspace(ch); }).base(), str.end());
378 if (idx + 1 == group_sz) buf_out.push_back(str);
380 newvar.
write<
string>(buf_out);
384 vector<char> buf(oldvar_dims.
numElements * sz_type_in_bytes);
385 oldvar.
read(gsl::make_span<char>(buf.data(), buf.size()), oldvar.
getType());
386 if (old.size() == 1) {
388 newvar.
write(gsl::make_span<char>(buf.data(), buf.size()), newvar.
getType());
393 if (!chanNumToIndex.empty()) {
394 string oldVarName = old[i].name;
395 if (oldVarName.find(newVarName) == 0) {
397 int pos = newVarName.length() + 1;
398 int chanNum = stoi(oldVarName.substr(pos));
406 *extent_mem.rbegin() = 1;
413 *start_ioda.rbegin() = i;
422 mem_selection.
select(sel_mem);
426 ioda_selection.
select(sel_ioda);
428 newvar.
write(gsl::make_span<char>(buf.data(), buf.size()), newvar.
getType(), mem_selection,
439 using namespace ioda;
441 vector<pair<string, Attribute>> srcAtts = src.
openAll();
443 for (
const auto &s : srcAtts) {
454 const set<string> ignored_names{
459 "_Netcdf4Coordinates",
463 if (ignored_names.count(s.first))
continue;
464 if (dest.
exists(s.first))
continue;
468 size_t sz_type_in_bytes = typ.
getSize();
474 vector<char> buf(1,
'\0');
476 newatt.
write(gsl::make_span<char>(buf.data(), buf.size()), typ);
479 vector<char> buf(dims.
numElements * sz_type_in_bytes);
480 s.second.read(gsl::make_span<char>(buf.data(), buf.size()), typ);
483 newatt.
write(gsl::make_span<char>(buf.data(), buf.size()), typ);
489 bool groupSimilarVariables =
true;
494 using namespace ioda;
500 Dimensions_t maxVarSize0;
507 const bool groupSimilarVariables =
false;
508 if (
params.groupSimilarVariables)
511 ungrouped_varList = varList;
516 map<string, Vec_Named_Variable> dimsAttachedToVars_bystring;
517 for (
const auto& val : dimsAttachedToVars)
526 set<string> attachedDims;
527 for (
const auto & ivar : dimsAttachedToVars_bystring) {
528 for (
const auto & idim : dimsAttachedToVars_bystring.at(ivar.first)) {
529 attachedDims.insert(idim.name);
534 for (
const auto& dim : dimVarList) {
538 if (!(dim.name ==
"nchans" && old_grouped_vars.size()) &&
539 (attachedDims.find(dim.name) != attachedDims.end()))
543 if (old_grouped_vars.size()) {
544 cout <<
" Creating nchans variable.\n";
551 VarDimMap::iterator chanTemplate;
553 for (VarDimMap::iterator ivar = old_grouped_vars.begin();
554 ivar != old_grouped_vars.end(); ++ivar) {
555 if (ivar->second.size() > maxChanSize) {
557 maxChanSize = ivar->second.size();
561 vector<int32_t>
channels(chanTemplate->second.size());
562 for (
size_t i = 0; i < chanTemplate->second.size(); ++i) {
563 string schan = chanTemplate->second[i].name.substr(
564 chanTemplate->second[i].name.find_last_not_of(
"_0123456789") + 2);
565 channels[i] = std::atoi(schan.c_str());
569 auto nds = NewDimensionScale<int32_t>(
"nchans", gsl::narrow<Dimensions_t>(
channels.size()),
570 gsl::narrow<Dimensions_t>(
channels.size()),
571 gsl::narrow<Dimensions_t>(
channels.size()));
573 newdims.push_back(nds);
586 map<string, Variable> newscales, newvars;
587 for (
const auto& dim : newdims) newscales[dim->name_] = out.vars[dim->name_];
590 if (attachedDims.find(d.name) != attachedDims.end()) {
602 if (oldVar.var.isA<
string>()) {
605 size_t sz_bytes = oldVar.var.getType().getSize();
626 cout <<
" Converting old-format string variable: " << oldVar.name <<
"\n";
628 newvars[oldVar.name] = out.vars.create<
string>(oldVar.name, mod_dims, adjustedParams);
629 return newvars[oldVar.name];
633 adjustedParams.
chunk =
true;
639 adjustedParams.
chunks = dims.dimsCur;
640 auto&
c = adjustedParams.
chunks;
641 const Dimensions_t max_chunk_size = 6400;
642 while (accumulate(
c.begin(),
c.end(),
643 static_cast<Dimensions_t
>(1), multiplies<Dimensions_t>())> max_chunk_size)
645 auto dim =
c.rbegin();
646 while (*dim == 1) dim++;
653 = out.vars.create(oldVar.name, oldVar.var.getType(), dims, adjustedParams);
654 return newvars[oldVar.name];
661 for (
const auto& oldVar : ungrouped_varList) {
665 auto newvar = makeNewVar(oldVar, dims,
params);
672 const Dimensions_t suggested_chan_chunking
673 = (newscales.count(
"nchans")) ? newscales[
"nchans"].atts[
"suggested_chunk_dim"].read<Dimensions_t>() : 100;
674 map<string, Named_Variable> new_grouped_vars;
676 if (old_grouped_vars.size() > 0) {
677 numChans = out.vars.open(
"nchans").getDimensions().dimsCur[0];
679 for (
const auto& oldGroup : old_grouped_vars) {
680 Dimensions dims = oldGroup.second.begin()->var.getDimensions();
681 Dimensions_t n = gsl::narrow<Dimensions_t>(oldGroup.second.size());
691 = oldGroup.second.begin()->var.getCreationParameters(
false,
false);
692 params.chunks.push_back(suggested_chan_chunking);
695 auto createdVar = makeNewVar(proto_var, dims,
params);
697 for (
const auto& src : oldGroup.second)
copyAttributes(src.var.atts, createdVar.atts);
704 grouped_scales.push_back(
Named_Variable{
"nchans", newscales[
"nchans"]});
705 dimsForNewVars[created] = grouped_scales;
706 new_grouped_vars[oldGroup.first.name] = created;
715 vector<pair<Variable, vector<Variable>>> out_dimsAttachedToVars;
716 auto make_out_dimsAttachedToVars
720 vector<Variable> newdims;
721 for (
const auto& d : olddims)
722 newdims.emplace_back(newscales[d.name]);
724 if (m.var.isA<
string>()) {
725 if (m.var.getType().getSize() == 1) {
729 out_dimsAttachedToVars.emplace_back(make_pair(newvar, newdims));
731 for (
const auto& m : ungrouped_varList) {
732 make_out_dimsAttachedToVars(dimsForNewVars.at(m), m);
734 for (
const auto& m : new_grouped_vars) {
735 make_out_dimsAttachedToVars(dimsForNewVars.at(m.second), m.second);
737 out.vars.attachDimensionScales(out_dimsAttachedToVars);
740 cout <<
"\n Copying data:\n";
744 for (
const auto& oldvar : ungrouped_varList) {
745 cout <<
" " << oldvar.name <<
"\n";
749 if (old_grouped_vars.size() > 0) {
750 std::map<int, int> chanNumToIndex;
751 std::vector<int> chanNums;
752 out.vars.open(
"nchans").read<
int>(chanNums);
753 for (
size_t i = 0; i < chanNums.size(); ++i) {
754 chanNumToIndex[chanNums[i]] = i;
757 for (
const auto& v : old_grouped_vars) {
758 cout <<
" " << v.first.name <<
"\n";
759 copyData(v.second, newvars[v.first.name], out, v.first.name, chanNumToIndex);
767 int main(
int argc,
char** argv) {
772 cerr <<
"Usage: ioda-upgrade.x [-n] input_file output_file\n"
773 <<
" -n: do not group similar variables into one 2D varible\n";
780 bool groupVariables =
true;
782 sInputFile = argv[1];
783 sOutputFile = argv[2];
784 }
else if ((argc == 4) && (strcmp(argv[1],
"-n") == 0)) {
785 sInputFile = argv[2];
786 sOutputFile = argv[3];
787 groupVariables =
false;
795 cout <<
"Input: " << sInputFile <<
"\nOutput: " << sOutputFile << endl;
797 params.groupSimilarVariables = groupVariables;
799 cout <<
" Success!\n";
801 }
catch (
const std::exception& e) {
802 cerr <<
"Exception: " << e.what() << endl << endl;
805 cerr <<
"An uncaught exception occurred." << endl << endl;
Convenience classes for constructing ObsSpaces and setting up new Dimension Scales.
Definitions for setting up backends with file and memory I/O.
Interfaces for ioda::Group and related classes.
Interfaces for ioda::ObsGroup and related classes.
This class represents attributes, which may be attached to both Variables and Groups.
The ioda exception class.
Groups are a new implementation of ObsSpaces.
This class exists inside of ioda::Group or ioda::Variable and provides the interface to manipulating ...
An ObsGroup is a specialization of a ioda::Group. It provides convenience functions and guarantees th...
static ObsGroup generate(Group &emptyGroup, const NewDimensionScales_t &fundamentalDims, std::shared_ptr< const detail::DataLayoutPolicy > layout=nullptr)
Create an empty ObsGroup and populate it with the fundamental dimensions.
A Selection represents the bounds of the data, in ioda or in userspace, that you are reading or writi...
Represents the "type" (i.e. integer, string, float) of a piece of data.
std::type_index getType() const
virtual Attribute_Implementation write(gsl::span< char > data, const Type &type)
The fundamental write function. Backends overload this function to implement all write operations.
Has_Attributes atts
Use this to access the metadata for the group / ObsSpace.
Has_Variables vars
Use this to access variables.
virtual std::map< ObjectType, std::vector< std::string > > listObjects(ObjectType filter=ObjectType::Ignored, bool recurse=false) const
List all objects (groups + variables) within this group.
std::vector< std::string > list() const
List all one-level child groups in this group.
virtual Attribute create(const std::string &attrname, const Type &in_memory_dataType, const std::vector< Dimensions_t > &dimensions={1})
Create an Attribute without setting its data.
virtual std::vector< std::pair< std::string, Attribute > > openAll() const
Open all attributes in an object.
virtual bool exists(const std::string &attname) const
Does an Attribute with the specified name exist?
virtual Variable open(const std::string &name) const
Open a Variable by name.
virtual size_t getSize() const
Get the size of a single element of a type, in bytes.
virtual Type getType() const
Get type.
bool isA() const
Convenience function to check a Variable's storage type.
virtual Dimensions getDimensions() const
virtual Variable read(gsl::span< char > data, const Type &in_memory_dataType, const Selection &mem_selection=Selection::all, const Selection &file_selection=Selection::all) const
Read the Variable - as char array. Ordering is row-major.
virtual Variable write(gsl::span< char > data, const Type &in_memory_dataType, const Selection &mem_selection=Selection::all, const Selection &file_selection=Selection::all)
The fundamental write function. Backends overload this function to implement all write operations.
IODA_DL Group createFile(const std::string &filename, BackendCreateModes mode, HDF5_Version_Range compat=defaultVersionRange())
Create a ioda::Group backed by an HDF5 file.
IODA_DL Group openMemoryFile(const std::string &filename, BackendOpenModes mode=BackendOpenModes::Read_Only, bool flush_on_close=false, size_t increment_len_bytes=1000000, HDF5_Version_Range compat=defaultVersionRange())
Map an HDF5 file in memory and open a ioda::Group.
std::pair< HDF5_Version, HDF5_Version > HDF5_Version_Range
@ V18
Use the latest HDF5 v1.8 format for storing objects.
@ Truncate_If_Exists
If the file already exists, overwrite it.
std::vector< Dimensions_t > VecDimensions_t
Selection & select(const SingleSelection &s)
Append a new selection.
IODA_DL std::string convertV1PathToV2Path(const std::string &path)
Split path into substrings separated by @ characters, then concatenate them in reverse order,...
std::vector< std::shared_ptr< NewDimensionScale_Base > > NewDimensionScales_t
std::map< std::string, std::vector< std::string > > VarDimMap
typedef for holding dim names attached to variables
std::shared_ptr< NewDimensionScale_Object< DataType > > NewDimensionScale(const std::string &name, Dimensions_t size, Dimensions_t maxSize=Unspecified, Dimensions_t chunkingSize=Unspecified)
Wrapper function used when listing new dimension scales to construct.
void collectVarDimInfo(const ObsGroup &obsGroup, VarNameObjectList &varObjectList, VarNameObjectList &dimVarObjectList, VarDimMap &dimsAttachedToVars, Dimensions_t &maxVarSize0)
collect variable and dimension information from a ioda ObsGroup
Describes the dimensions of an Attribute or Variable.
std::vector< Dimensions_t > dimsCur
The dimensions of the data.
Dimensions_t dimensionality
The dimensionality (rank) of the data.
std::vector< Dimensions_t > dimsMax
This must always equal dimsCur for Attribute.
A named pair of (variable_name, ioda::Variable).
Represents a hyperslab or a series of points in a selection, coupled with a SelectionOperator "action...
Used to specify Variable creation-time properties.
void compressWithGZIP(int level=6)
std::vector< Dimensions_t > chunks
Manually specify the chunks. Never directly use. Use getChunks(...) instead.
VariableCreationParameters & setFillValue(DataType fill)
bool chunk
Do we chunk this variable? Required for extendible / compressible Variables.
int main(int argc, char **argv)
void identifySimilarVariables(const Vec_Named_Variable &inVarList, VarDimMap &similarVariables, Vec_Named_Variable &dissimilarVariables)
Determine which variables may be grouped.
void collectVarDimInfo(const ioda::Group &obsGroup, Vec_Named_Variable &varList, Vec_Named_Variable &dimVarList, VarDimMap &dimsAttachedToVars, ioda::Dimensions_t &maxVarSize0)
Traverse file structure and determine dimension scales and regular variables. Also determine which di...
bool isPossiblyScale(const std::string &name)
Convenience lambda to hint if a variable might be a scale.
std::list< std::string > preferentialSortVariableNames(const std::vector< std::string > &allVars)
Sort variable names in a preferential way so that likely scales end up first.
void copyData(const Vec_Named_Variable &old, ioda::Variable &newvar, const ioda::ObsGroup &base, const std::string &newVarName, const std::map< int, int > &chanNumToIndex)
Copy data from oldvar into newvar. Offsets are supported for variable combination.
void copyAttributes(const ioda::Has_Attributes &src, ioda::Has_Attributes &dest)
Copy attributes from src to dest. Ignore duplicates and dimension scales.
std::map< ioda::Named_Variable, Vec_Named_Variable > VarDimMap
std::size_t getChanSuffixPos(const std::string &name)
Determine which variables may be grouped.
std::vector< ioda::Named_Variable > Vec_Named_Variable
bool upgradeFile(const std::string &inputName, const std::string &outputName, const UpgradeParameters ¶ms)