21 #include "eckit/config/Configuration.h"
22 #include "eckit/exception/Exceptions.h"
24 #include "oops/mpi/mpi.h"
25 #include "oops/util/abor1_cpp.h"
26 #include "oops/util/DateTime.h"
27 #include "oops/util/Duration.h"
28 #include "oops/util/Logger.h"
29 #include "oops/util/missingValues.h"
30 #include "oops/util/Random.h"
31 #include "oops/util/stringFunctions.h"
33 #include "ioda/distribution/Accumulator.h"
34 #include "ioda/distribution/DistributionFactory.h"
35 #include "ioda/distribution/DistributionUtils.h"
36 #include "ioda/distribution/PairOfDistributions.h"
38 #include "ioda/io/ObsFrameRead.h"
39 #include "ioda/io/ObsFrameWrite.h"
50 std::string &nameWithoutChannelSuffix,
int &channel) {
51 const std::string::size_type lastUnderscore =
name.find_last_of(
'_');
52 if (lastUnderscore != std::string::npos &&
53 name.find_first_not_of(
"0123456789", lastUnderscore + 1) == std::string::npos) {
55 channel = std::stoi(
name.substr(lastUnderscore + 1));
56 nameWithoutChannelSuffix =
name.substr(0, lastUnderscore);
69 std::string dimName =
"nlocs";
113 const util::DateTime & bgn,
const util::DateTime & end,
114 const eckit::mpi::Comm & timeComm)
115 :
oops::ObsSpaceBase(config, comm, bgn, end),
116 config_(config), winbgn_(bgn), winend_(end), commMPI_(comm),
117 gnlocs_(0), nrecs_(0), obsvars_(),
118 obs_group_(), obs_params_(bgn, end, comm, timeComm)
120 oops::Log::trace() <<
"ObsSpace::ObsSpace config = " << config << std::endl;
127 oops::Log::info() << this->
obsname() <<
" vars: " <<
obsvars_ << std::endl;
163 <<
" observations are outside of time window out of "
167 oops::Log::trace() <<
"ObsSpace::ObsSpace constructed name = " <<
obsname() << std::endl;
174 oops::Log::info() <<
obsname() <<
": save database to " << fileName << std::endl;
182 this->
comm().barrier();
184 oops::Log::info() <<
obsname() <<
" : no output" << std::endl;
200 std::size_t numVars = 0;
233 std::string nameToUse;
234 std::vector<int> chanSelectToUse;
243 std::string nameToUse;
244 std::vector<int> chanSelectToUse;
249 if (
has(group, nameToUse)) {
251 if (var.
isA<
int>()) {
253 }
else if (var.
isA<
float>()) {
255 }
else if (var.
isA<std::string>()) {
256 if ((group ==
"MetaData") && (nameToUse ==
"datetime")) {
260 VarType = ObsDtype::DateTime;
271 std::vector<int> & vdata,
272 const std::vector<int> & chanSelect)
const {
273 loadVar<int>(group,
name, chanSelect, vdata);
277 std::vector<float> & vdata,
278 const std::vector<int> & chanSelect)
const {
279 loadVar<float>(group,
name, chanSelect, vdata);
283 std::vector<double> & vdata,
284 const std::vector<int> & chanSelect)
const {
286 std::vector<float> floatData;
287 loadVar<float>(group,
name, chanSelect, floatData);
288 ConvertVarType<float, double>(floatData, vdata);
292 std::vector<std::string> & vdata,
293 const std::vector<int> & chanSelect)
const {
294 loadVar<std::string>(group,
name, chanSelect, vdata);
298 std::vector<util::DateTime> & vdata,
299 const std::vector<int> & chanSelect)
const {
300 std::vector<std::string> dtStrings;
301 loadVar<std::string>(group,
name, chanSelect, dtStrings);
307 const std::vector<int> & vdata,
308 const std::vector<std::string> & dimList) {
313 const std::vector<float> & vdata,
314 const std::vector<std::string> & dimList) {
319 const std::vector<double> & vdata,
320 const std::vector<std::string> & dimList) {
322 std::vector<float> floatData;
323 ConvertVarType<double, float>(vdata, floatData);
328 const std::vector<std::string> & vdata,
329 const std::vector<std::string> & dimList) {
334 const std::vector<util::DateTime> & vdata,
335 const std::vector<std::string> & dimList) {
336 std::vector<std::string> dtStrings(vdata.size(),
"");
337 for (std::size_t i = 0; i < vdata.size(); ++i) {
338 dtStrings[i] = vdata[i].toString();
356 return (irec !=
recidx_.end());
374 "ObsSpace::recidx_vector: Record number, " + std::to_string(recNum) +
375 ", does not exist in record index map.";
383 std::vector<std::size_t> RecNums(
nrecs_);
386 RecNums[
recnum] = Irec->first;
400 std::size_t nobs = totalNlocs *
nvars;
402 os <<
obsname() <<
": nlocs: " << totalNlocs
403 <<
", nvars: " <<
nvars <<
", nobs: " << nobs;
414 std::string dimName = dimNameObject.first;
415 Variable srcDimVar = dimNameObject.second;
417 Dimensions_t maxDimSize = dimSize;
418 Dimensions_t chunkSize = dimSize;
427 if (dimSize > maxFrameSize) {
428 dimSize = maxFrameSize;
434 if (srcDimVar.
isA<
int>()) {
435 newDims.push_back(ioda::NewDimensionScale<int>(
436 dimName, dimSize, maxDimSize, chunkSize));
437 }
else if (srcDimVar.
isA<
float>()) {
438 newDims.push_back(ioda::NewDimensionScale<float>(
439 dimName, dimSize, maxDimSize, chunkSize));
451 backendParams.
flush =
false;
459 std::string dimName = dimNameObject.first;
460 Variable srcDimVar = dimNameObject.second;
469 std::vector<Dimensions_t> counts = destDimShape;
470 std::vector<Dimensions_t> starts(counts.size(), 0);
479 if (srcDimVar.
isA<
int>()) {
480 std::vector<int> dimCoords;
481 srcDimVar.
read<
int>(dimCoords, memSelect, srcSelect);
482 destDimVar.
write<
int>(dimCoords, memSelect, destSelect);
483 }
else if (srcDimVar.
isA<
float>()) {
484 std::vector<float> dimCoords;
485 srcDimVar.
read<
float>(dimCoords, memSelect, srcSelect);
486 destDimVar.
write<
float>(dimCoords, memSelect, destSelect);
492 template<
typename VarType>
494 const std::string & varName, std::vector<VarType> & varValues) {
498 bool gotVarData = obsFrame.
readFrameVar(varName, varValues);
502 VarType sourceFillValue;
504 sourceFillValue = detail::getFillValue<VarType>(sourceFvData);
505 VarType varFillValue = this->getFillValue<VarType>();
506 for (std::size_t i = 0; i < varValues.size(); ++i) {
507 if ((varValues[i] == sourceFillValue) || std::isinf(varValues[i])
508 || std::isnan(varValues[i])) {
509 varValues[i] = varFillValue;
518 const std::string & varName, std::vector<std::string> & varValues) {
522 bool gotVarData = obsFrame.
readFrameVar(varName, varValues);
526 std::string sourceFillValue;
528 sourceFillValue = detail::getFillValue<std::string>(sourceFvData);
529 std::string varFillValue = this->getFillValue<std::string>();
530 for (std::size_t i = 0; i < varValues.size(); ++i) {
531 if (varValues[i] == sourceFillValue) {
532 varValues[i] = varFillValue;
550 Dimensions_t frameStart = obsFrame.
frameStart();
561 for (
auto & varNameObject : obsFrame.
ioVarList()) {
562 std::string varName = varNameObject.first;
563 Variable var = varNameObject.second;
564 Dimensions_t beFrameStart;
568 beFrameStart = frameStart;
570 Dimensions_t frameCount = obsFrame.
frameCount(varName);
573 if (var.
isA<
int>()) {
574 std::vector<int> varValues;
575 if (readObsSource<int>(obsFrame, varName, varValues)) {
576 storeVar<int>(varName, varValues, beFrameStart, frameCount);
578 }
else if (var.
isA<
float>()) {
579 std::vector<float> varValues;
580 if (readObsSource<float>(obsFrame, varName, varValues)) {
581 storeVar<float>(varName, varValues, beFrameStart, frameCount);
583 }
else if (var.
isA<std::string>()) {
584 std::vector<std::string> varValues;
585 if (readObsSource<std::string>(obsFrame, varName, varValues)) {
586 storeVar<std::string>(varName, varValues, beFrameStart, frameCount);
617 std::string dtVarName =
fullVarName(
"MetaData",
"datetime");
622 std::vector<float> timeOffset;
624 timeVar.
read<
float>(timeOffset);
627 std::vector<std::string> dtStrings(dtVals.size(),
"");
628 for (std::size_t i = 0; i < dtVals.size(); ++i) {
629 dtStrings[i] = dtVals[i].toString();
634 params.compressWithGZIP();
635 params.setFillValue<std::string>(this->getFillValue<std::string>());
636 std::vector<Variable>
640 .write<std::string>(dtStrings);
647 Dimensions_t nlocsResize;
649 nlocsResize =
nlocsVar.getDimensions().dimsCur[0] + nlocsSize;
651 nlocsResize = nlocsSize;
654 { std::pair<Variable, Dimensions_t>(
nlocsVar, nlocsResize) });
659 template<
typename VarType>
661 const std::vector<int> & chanSelect,
662 std::vector<VarType> & varValues)
const {
665 std::string nameToUse;
666 std::vector<int> chanSelectToUse;
681 const std::size_t nchansDimIndex = 1;
685 var, nchansDimIndex, chanSelectToUse, memSelect, obsGroupSelect);
687 var.
read<VarType>(varValues, memSelect, obsGroupSelect);
688 varValues.
resize(numElements);
691 var.
read<VarType>(varValues);
695 var.
read<VarType>(varValues);
699 var.
read<VarType>(varValues);
705 template<
typename VarType>
707 const std::vector<VarType> & varValues,
708 const std::vector<std::string> & dimList) {
719 std::string nameToUse;
721 name = std::move(nameToUse);
726 std::vector<std::string> dimListToUse = dimList;
729 const size_t nchansDimIndex =
730 std::find(dimListToUse.begin(), dimListToUse.end(), nchansVarName) -
731 dimListToUse.begin();
732 if (nchansDimIndex == dimListToUse.size())
733 dimListToUse.push_back(nchansVarName);
735 Variable var = openCreateVar<VarType>(fullName, dimListToUse);
738 var.
write<VarType>(varValues);
742 std::vector<std::vector<Named_Variable>> dimScales =
744 size_t nchansDimIndex = std::find_if(dimScales.begin(), dimScales.end(),
745 [](
const std::vector<Named_Variable> &x)
746 { return !x.empty(); }) - dimScales.begin();
747 if (nchansDimIndex == dimScales.size())
748 throw eckit::UserError(
"Variable " + fullName +
749 " is not indexed by channel numbers", Here());
754 memSelect, obsGroupSelect);
755 var.
write<VarType>(varValues, memSelect, obsGroupSelect);
762 std::size_t nchansDimIndex,
768 std::vector<Dimensions_t> chanIndices;
769 chanIndices.reserve(
channels.size());
770 for (std::size_t i = 0; i <
channels.size(); ++i) {
773 chanIndices.push_back(ichan->second);
775 throw eckit::BadParameter(
"Selected channel number " +
776 std::to_string(
channels[i]) +
" does not exist.", Here());
782 std::vector<std::vector<Dimensions_t>> dimSelects(varDims.size());
783 Dimensions_t numElements = 1;
784 for (std::size_t i = 0; i < varDims.size(); ++i) {
785 if (i == nchansDimIndex) {
787 numElements *= chanIndices.size();
788 dimSelects[i] = chanIndices;
790 numElements *= varDims[i];
791 std::vector<Dimensions_t> allIndices(varDims[i]);
792 std::iota(allIndices.begin(), allIndices.end(), 0);
793 dimSelects[i] = allIndices;
797 std::vector<Dimensions_t> memStarts(1, 0);
798 std::vector<Dimensions_t> memCounts(1, numElements);
799 memSelect.
extent(memCounts)
802 obsGroupSelect.
extent(varDims)
804 for (std::size_t i = 1; i < dimSelects.size(); ++i) {
821 template<
typename VarType>
823 const Dimensions_t frameStart,
const Dimensions_t frameCount) {
833 std::vector<Dimensions_t> beCounts = varDims;
834 beCounts[0] = frameCount;
835 std::vector<Dimensions_t> beStarts(beCounts.size(), 0);
836 beStarts[0] = frameStart;
841 std::vector<Dimensions_t> feCounts(1, std::accumulate(
842 beCounts.begin(), beCounts.end(),
static_cast<Dimensions_t
>(1),
843 std::multiplies<Dimensions_t>()));
844 std::vector<Dimensions_t> feStarts(1, 0);
854 var.
write<VarType>(varValues, feSelect, beSelect);
873 paramsFloat.
setFillValue<
float>(this->getFillValue<float>());
874 paramsStr.
setFillValue<std::string>(this->getFillValue<std::string>());
878 for (
auto & ivar : dimsAttachedToVars) {
879 std::string varName = ivar.first;
880 std::vector<std::string> varDimNames = ivar.second;
883 std::vector<Variable> varDims;
884 for (
auto & dimVarName : varDimNames) {
885 varDims.push_back(destVarContainer.
open(dimVarName));
889 if (srcVar.
isA<
int>()) {
891 }
else if (srcVar.
isA<
float>()) {
893 }
else if (srcVar.
isA<std::string>()) {
894 destVarContainer.
createWithScales<std::string>(varName, varDims, paramsStr);
896 if (this->
comm().rank() == 0) {
897 oops::Log::warning() <<
"WARNING: ObsSpace::createVariables: "
898 <<
"Skipping variable due to an unexpected data type for variable: "
899 << varName << std::endl;
913 std::vector<int> chanNumbers;
917 std::vector<float> floatChanNumbers;
919 ConvertVarType<float, int>(floatChanNumbers, chanNumbers);
924 for (
int i = 0; i < chanNumbers.size(); ++i) {
932 const std::vector<int> & chanSelect, std::string & nameToUse,
933 std::vector<int> & chanSelectToUse)
const {
935 chanSelectToUse = chanSelect;
941 chanSelectToUse = {channelNumber};
947 typedef std::map<std::size_t, std::vector<std::pair<float, std::size_t>>> TmpRecIdxMap;
948 typedef TmpRecIdxMap::iterator TmpRecIdxIter;
951 std::size_t nLocs = this->
nlocs();
952 std::vector<float> SortValues(nLocs);
954 std::vector<util::DateTime> Dates(nLocs);
956 for (std::size_t iloc = 0; iloc < nLocs; iloc++) {
957 SortValues[iloc] = (Dates[iloc] - Dates[0]).toSeconds();
965 TmpRecIdxMap TmpRecIdx;
966 for (
size_t iloc = 0; iloc < nLocs; iloc++) {
967 TmpRecIdx[
recnums_[iloc]].push_back(std::make_pair(SortValues[iloc], iloc));
970 for (TmpRecIdxIter irec = TmpRecIdx.begin(); irec != TmpRecIdx.end(); ++irec) {
972 sort(irec->second.begin(), irec->second.end());
977 sort(irec->second.begin(), irec->second.end(),
978 [](
const std::pair<float, std::size_t> &
p1,
979 const std::pair<float, std::size_t> & p2){
980 return (p2.first < p1.first ||
981 (!(p1.first < p2.first) && p2.second > p1.second));});
986 for (TmpRecIdxIter irec = TmpRecIdx.begin(); irec != TmpRecIdx.end(); ++irec) {
987 recidx_[irec->first].resize(irec->second.size());
988 for (std::size_t iloc = 0; iloc < irec->second.size(); iloc++) {
989 recidx_[irec->first][iloc] = irec->second[iloc].second;
996 std::size_t nLocs = this->
nlocs();
997 for (
size_t iloc = 0; iloc < nLocs; iloc++) {
1008 Dimensions_t maxVarSize;
1012 for (
auto & dimNameObject : dimVarList) {
1013 std::string dimName = dimNameObject.first;
1014 Dimensions_t dimSize = dimNameObject.second.getDimensions().dimsCur[0];
1015 Dimensions_t dimMaxSize = dimSize;
1016 Dimensions_t dimChunkSize = dimSize;
1018 dimSize = this->
nlocs();
1034 for (obsFrame.
frameInit(varList, dimVarList, dimsAttachedToVars, maxVarSize);
1036 Dimensions_t frameStart = obsFrame.
frameStart();
1037 for (
auto & varNameObject : varList) {
1039 std::string destVarName = varNameObject.first;
1043 Dimensions_t frameCount = obsFrame.
frameCount(destVarName);
1046 if (frameCount > 0) {
1048 Variable srcVar = varNameObject.second;
1055 if (srcVar.
isA<
int>()) {
1056 std::vector<int> varValues;
1057 srcVar.
read<
int>(varValues, memSelect, varSelect);
1059 }
else if (srcVar.
isA<
float>()) {
1060 std::vector<float> varValues;
1061 srcVar.
read<
float>(varValues, memSelect, varSelect);
1063 }
else if (srcVar.
isA<std::string>()) {
1064 std::vector<std::string> varValues;
1065 srcVar.
read<std::string>(varValues, memSelect, varSelect);
1074 template <
typename DataType>
1076 const DataType missing = util::missingValue(missing);
1081 std::vector<DataType> varVals;
1082 extendVar.
read<DataType>(varVals);
1085 auto it_nonmissing = std::find_if(varVals.begin(), varVals.end(),
1086 [&missing](DataType x){return x != missing;});
1087 if (it_nonmissing != varVals.end()) {
1088 std::fill(varVals.begin() + startFill, varVals.end(), *it_nonmissing);
1089 extendVar.
write<DataType>(varVals);
1105 const int nlevs =
params.numModelLevels;
1107 const size_t numOriginalLocs = this->
nlocs();
1113 const std::set<size_t> uniqueOriginalRecs(
recnums_.begin(),
recnums_.end());
1121 size_t upperBoundOnGlobalNumOriginalLocs = 0;
1122 size_t upperBoundOnGlobalNumOriginalRecs = 0;
1123 if (numOriginalLocs > 0) {
1124 upperBoundOnGlobalNumOriginalLocs =
indx_.back() + 1;
1125 upperBoundOnGlobalNumOriginalRecs = *uniqueOriginalRecs.rbegin() + 1;
1127 dist_->max(upperBoundOnGlobalNumOriginalLocs);
1128 dist_->max(upperBoundOnGlobalNumOriginalRecs);
1139 size_t averagedLoc = 0;
1140 for (
size_t originalRec : uniqueOriginalRecs) {
1141 ASSERT(
dist_->isMyRecord(originalRec));
1142 const size_t averagedRec = originalRec;
1143 const size_t extendedRec = upperBoundOnGlobalNumOriginalRecs + averagedRec;
1146 std::vector<size_t> &locsInRecord =
recidx_[extendedRec];
1147 for (
int ilev = 0; ilev < nlevs; ++ilev, ++averagedLoc) {
1148 const size_t extendedLoc = numOriginalLocs + averagedLoc;
1149 const size_t globalAveragedLoc = originalRec * nlevs + ilev;
1150 const size_t globalExtendedLoc = upperBoundOnGlobalNumOriginalLocs + globalAveragedLoc;
1153 replicaDist->assignRecord(averagedRec, globalAveragedLoc, eckit::geometry::Point2());
1154 ASSERT(replicaDist->isMyRecord(averagedRec));
1156 indx_.push_back(globalExtendedLoc);
1157 locsInRecord.push_back(extendedLoc);
1160 replicaDist->computePatchLocs();
1162 const size_t numAveragedLocs = averagedLoc;
1163 const size_t numExtendedLocs = numOriginalLocs + numAveragedLocs;
1181 const std::vector <std::string> &nonMissingExtendedVars =
params.nonMissingExtendedVars;
1182 for (
auto & varName : nonMissingExtendedVars) {
1184 const std::string groupName =
"MetaData";
1185 const std::string fullVname =
fullVarName(groupName, varName);
1190 if (extendVar.
isA<
int>()) {
1191 extendVariable<int>(extendVar, numOriginalLocs);
1192 }
else if (extendVar.
isA<
float>()) {
1193 extendVariable<float>(extendVar, numOriginalLocs);
1194 }
else if (extendVar.
isA<std::string>()) {
1195 extendVariable<std::string>(extendVar, numOriginalLocs);
1202 std::vector <int> extended_obs_space(numExtendedLocs, 0);
1203 std::fill(extended_obs_space.begin() + numOriginalLocs, extended_obs_space.end(), 1);
1205 put_db(
"MetaData",
"extended_obs_space", extended_obs_space);
1209 std::unique_ptr<Accumulator<size_t>> accumulator = replicaDist->createAccumulator<
size_t>();
1210 for (
size_t averagedLoc = 0; averagedLoc < numAveragedLocs; ++averagedLoc)
1211 accumulator->addTerm(averagedLoc, 1);
1212 size_t globalNumAveragedLocs = accumulator->computeResult();
1218 upperBoundOnGlobalNumOriginalRecs);
1223 gnlocs_ += globalNumAveragedLocs;
Interfaces for ioda::Variable and related classes.
Groups are a new implementation of ObsSpaces.
This class exists inside of ioda::Group and provides the interface to manipulating Variables.
void set_dim_size(const ObsDimensionId dimId, std::size_t dimSize)
set the dimension size for the given dimension id
std::string get_dim_name(const ObsDimensionId dimId) const
return the dimension name for the given dimension id
ObsDimensionId get_dim_id(const std::string &dimName) const
return the standard id value for the given dimension name
std::map< ObsDimensionId, std::size_t > dim_id_size_
map going from dim id to dim size
std::size_t get_dim_size(const ObsDimensionId dimId) const
return the dimension size for the given dimension id
std::map< std::string, ObsDimensionId > dim_name_id_
map going from dim name to id
std::map< ObsDimensionId, std::string > dim_id_name_
map going from dim id to dim name
bool ioIsVarDimByNlocs(const std::string &varName) const
return true if variable is dimensioned by nlocs
Selection createMemSelection(const std::vector< Dimensions_t > &varShape, const Dimensions_t frameCount)
create selection object for accessing a memory buffer
virtual std::size_t frameNumLocs() const
return number of locations
const VarNameObjectList & ioVarList() const
return list of regular variables from ObsIo
VarDimMap ioVarDimMap() const
return map from variables to their attached dimension scales
Dimensions_t globalNumLocs() const
return number of locations that were selected from ObsIo
const VarNameObjectList & ioDimVarList() const
return list of dimension scale variables from ObsIo
Selection createVarSelection(const std::vector< Dimensions_t > &varShape, const Dimensions_t frameStart, const Dimensions_t frameCount)
create selection object for accessing a frame from a whole variable
Has_Attributes & atts() const
return attributes container from ObsIo
Dimensions_t globalNumLocsOutsideTimeWindow() const
return number of locations from obs source that were outside the time window
Has_Variables & vars() const
return variables container from ObsIo
virtual std::size_t frameNumRecs() const
return number of records
Implementation of ObsFrameRead class.
std::vector< std::size_t > index() const override
return list of indices indicating which locations were selected from ObsIo
void frameNext() override
move to the next frame
Dimensions_t frameCount(const std::string &varName) override
return current frame count for variable
Dimensions_t adjNlocsFrameCount() const override
return adjusted nlocs frame count
bool frameAvailable() override
true if a frame is available (not past end of frames)
void frameInit() override
initialize for walking through the frames
bool readFrameVar(const std::string &varName, std::vector< int > &varData)
read a frame variable
Dimensions_t frameStart() override
return current frame starting index
Dimensions_t adjNlocsFrameStart() const override
return adjusted nlocs frame start
std::shared_ptr< const Distribution > distribution()
return the MPI distribution
std::vector< std::size_t > recnums() const override
return list of record numbers from ObsIo
Implementation of ObsFrameWrite class.
void writeFrameVar(const std::string &varName, const std::vector< int > &varData)
write a frame variable
void frameNext(const VarNameObjectList &varList) override
move to the next frame
Dimensions_t frameCount(const std::string &varName) override
return current frame count for variable
void frameInit(const VarNameObjectList &varList, const VarNameObjectList &dimVarList, const VarDimMap &varDimMap, const Dimensions_t maxVarSize) override
initialize for walking through the frames
bool frameAvailable() override
true if a frame is available (not past end of frames)
Dimensions_t frameStart() override
return current frame starting index
static ObsGroup generate(Group &emptyGroup, const NewDimensionScales_t &fundamentalDims, std::shared_ptr< const detail::DataLayoutPolicy > layout=nullptr)
Create an empty ObsGroup and populate it with the fundamental dimensions.
void resize(const std::vector< std::pair< Variable, ioda::Dimensions_t >> &newDims)
Resize a Dimension and every Variable that depends on it.
oops::Parameter< ObsGroupingParameters > obsGrouping
options controlling obs record grouping
oops::Parameter< int > maxFrameSize
maximum frame size
void extendObsSpace(const ObsExtendParameters ¶ms)
Extend the ObsSpace according to the method requested in the configuration file.
void storeVar(const std::string &varName, std::vector< VarType > &varValues, const Dimensions_t frameStart, const Dimensions_t frameCount)
store a variable in the obs_group_ object
void createVariables(const Has_Variables &srcVarContainer, Has_Variables &destVarContainer, const VarDimMap &dimsAttachedToVars)
create set of variables from source variables and lists
bool has(const std::string &group, const std::string &name) const
return true if group/variable exists
void saveToFile()
Dump the database into the output file.
const std::vector< std::size_t > & recnum() const
return reference to the record number vector
ObsDtype dtype(const std::string &group, const std::string &name) const
return data type for group/variable
void put_db(const std::string &group, const std::string &name, const std::vector< int > &vdata, const std::vector< std::string > &dimList={ "nlocs" })
transfer data from vdata to the obs container
std::shared_ptr< const Distribution > dist_
MPI distribution object.
std::size_t gnlocs_outside_timewindow_
number of nlocs from the obs source that are outside the time window
std::vector< std::size_t > recidx_all_recnums() const
return all record numbers from the recidx_ data member
const RecIdxIter recidx_begin() const
Return the begin iterator associated with the recidx_ data member.
void buildRecIdxUnsorted()
Create the recidx data structure with unsorted record groups.
ObsDimInfo dim_info_
dimension information for variables in this obs space
VarDimMap dims_attached_to_vars_
map showing association of dim names with each variable name
std::size_t recidx_recnum(const RecIdxIter &irec) const
return record number pointed to by the given iterator
std::string get_dim_name(const ObsDimensionId dimId) const
return the standard dimension name for the given dimension id
void splitChanSuffix(const std::string &group, const std::string &name, const std::vector< int > &chanSelect, std::string &nameToUse, std::vector< int > &chanSelectToUse) const
split off the channel number suffix from a given variable name
void print(std::ostream &os) const
print function for oops::Printable class
void createObsGroupFromObsFrame(ObsFrameRead &obsFrame)
Initialize the database from a source (ObsFrame ojbect)
std::string obsname_
name of obs space
std::size_t nrecs_
number of records
const std::vector< std::string > & obs_group_vars() const
return YAML configuration parameter: obsdatain.obsgrouping.group variables
const RecIdxIter recidx_end() const
Return the end iterator associated with the recidx_ data member.
oops::Variables obsvars_
Observation "variables" to be simulated.
std::map< int, int > chan_num_to_index_
map to go from channel number (not necessarily consecutive) to channel index (consecutive,...
void saveVar(const std::string &group, std::string name, const std::vector< VarType > &varValues, const std::vector< std::string > &dimList)
save a variable to the obs_group_ object
const eckit::mpi::Comm & comm() const
void initFromObsSource(ObsFrameRead &obsFrame)
initialize the in-memory obs_group_ (ObsGroup) object from the ObsIo source
const std::vector< std::size_t > & recidx_vector(const RecIdxIter &irec) const
return record number vector pointed to by the given iterator
RecIdxMap::const_iterator RecIdxIter
RecIdxMap recidx_
profile ordering
ObsSpaceParameters obs_params_
obs io parameters
void buildSortedObsGroups()
Create the recidx data structure holding sorted record groups.
void loadVar(const std::string &group, const std::string &name, const std::vector< int > &chanSelect, std::vector< VarType > &varValues) const
load a variable from the obs_group_ object
size_t nlocs() const
return the number of locations in the obs space. Note that nlocs may be smaller than global unique nl...
ObsGroup obs_group_
observation data store
std::vector< std::size_t > indx_
indexes of locations to extract from the input obs file
void get_db(const std::string &group, const std::string &name, std::vector< int > &vdata, const std::vector< int > &chanSelect={ }) const
transfer data from the obs container to vdata
void fillChanNumToIndexMap()
fill in the channel number to channel index map
std::size_t nvars() const
return the number of variables in the obs space container. "Variables" refers to the quantities that ...
void extendVariable(Variable &extendVar, const size_t startFill)
Extend the given variable.
const ObsSpaceParameters & params() const
std::string obs_sort_order() const
return YAML configuration parameter: obsdatain.obsgrouping.sort order
void save()
save the obs space data into a file (if obsdataout specified)
void resizeNlocs(const Dimensions_t nlocsSize, const bool append)
resize along nlocs dimension
bool readObsSource(ObsFrameRead &obsFrame, const std::string &varName, std::vector< VarType > &varValues)
read in values for variable from obs source
std::size_t createChannelSelections(const Variable &variable, std::size_t nchansDimIndex, const std::vector< int > &channels, Selection &memSelect, Selection &obsGroupSelect) const
Create selections of slices of the variable variable along dimension nchansDimIndex corresponding to ...
bool recidx_is_sorted_
indicator whether the data in recidx_ is sorted
std::size_t globalNumLocsOutsideTimeWindow() const
return number of locations from obs source that were outside the time window
const eckit::mpi::Comm & commMPI_
MPI communicator.
std::size_t globalNumLocs() const
return the total number of locations in the corresponding obs spaces across all MPI tasks
std::size_t gnlocs_
total number of locations
std::map< std::vector< std::string >, Selection > known_be_selections_
cache for backend selection
const oops::Variables & obsvariables() const
return oops variables object (simulated variables)
ObsSpace(const eckit::Configuration &config, const eckit::mpi::Comm &comm, const util::DateTime &bgn, const util::DateTime &end, const eckit::mpi::Comm &timeComm)
Config based constructor for an ObsSpace object.
const std::string & obsname() const
return the name of the obs type being stored
std::string obs_sort_var() const
return YAML configuration parameter: obsdatain.obsgrouping.sort variable
std::map< std::vector< std::string >, Selection > known_fe_selections_
cache for frontend selection
std::vector< std::size_t > recnums_
record numbers associated with the location indexes
bool recidx_has(const std::size_t recNum) const
true if given record number exists in the recidx_ data member
void deserialize(const eckit::Configuration &config)
deserialize the parameter sub groups
void setMaxVarSize(const Dimensions_t maxVarSize)
set the maximum variable size
ObsTopLevelParameters top_level_
sub groups of parameters
void setDimScale(const std::string &dimName, const Dimensions_t curSize, const Dimensions_t maxSize, const Dimensions_t chunkSize)
set a new dimension scale
oops::RequiredParameter< oops::Variables > simVars
simulated variables
oops::OptionalParameter< ObsExtendParameters > obsExtend
extend the ObsSpace with extra fixed-size records
oops::RequiredParameter< std::string > obsSpaceName
name of obs space
oops::OptionalParameter< ObsFileOutParameters > obsOutFile
output specification by writing to a file
const ObsIoParametersBase & obsIoInParameters() const
parameters indicating where to load data from
A Selection represents the bounds of the data, in ioda or in userspace, that you are reading or writi...
virtual Attribute_Implementation read(gsl::span< char > data, const Type &in_memory_dataType) const
The fundamental read function. Backends overload this function to implement all read operations.
virtual Group open(const std::string &name) const
Open a group.
Has_Variables vars
Use this to access variables.
virtual bool exists(const std::string &name) const
virtual Attribute open(const std::string &name) const
Open an Attribute by name.
Variable createWithScales(const std::string &name, const std::vector< Variable > &dimension_scales, const VariableCreationParameters ¶ms=VariableCreationParameters::defaulted< DataType >())
Convenience function to create a Variable from certain dimension scales.
virtual Variable open(const std::string &name) const
Open a Variable by name.
virtual std::vector< std::string > list() const
virtual bool exists(const std::string &name) const
Does a Variable with the specified name exist?
virtual FillValueData_t getFillValue() const
Retrieve the fill value.
bool isA() const
Convenience function to check a Variable's storage type.
virtual bool isDimensionScaleAttached(unsigned int DimensionNumber, const Variable &scale) const
Is a dimension scale attached to this Variable in a certain position?
virtual bool hasFillValue() const
Check if a variable has a fill value set.
virtual Dimensions getDimensions() const
virtual std::vector< std::vector< Named_Variable > > getDimensionScaleMappings(const std::list< Named_Variable > &scalesToQueryAgainst, bool firstOnly=true) const
Which dimensions are attached at which positions? This function may offer improved performance on som...
virtual Variable read(gsl::span< char > data, const Type &in_memory_dataType, const Selection &mem_selection=Selection::all, const Selection &file_selection=Selection::all) const
Read the Variable - as char array. Ordering is row-major.
virtual Variable write(gsl::span< char > data, const Type &in_memory_dataType, const Selection &mem_selection=Selection::all, const Selection &file_selection=Selection::all)
The fundamental write function. Backends overload this function to implement all write operations.
virtual Variable resize(const std::vector< Dimensions_t > &newDims)
Resize the variable.
IODA_DL std::string genUniqueName()
Convenience function to generate a random file name.
BackendNames
Backend names.
IODA_DL Group constructBackend(BackendNames name, BackendCreationParameters ¶ms)
This is a simple factory style function that will instantiate a different backend based on a given na...
@ Create
Create a new file.
@ ObsStore
ObsStore in-memory.
@ Truncate_If_Exists
If the file already exists, overwrite it.
Selection & extent(const VecDimensions_t &sz)
Provide the dimensions of the object that you are selecting from.
Selection & select(const SingleSelection &s)
Append a new selection.
bool extractChannelSuffixIfPresent(const std::string &name, std::string &nameWithoutChannelSuffix, int &channel)
std::vector< util::DateTime > convertDtStringsToDtime(const std::vector< std::string > &dtStrings)
convert datetime strings to DateTime object
constexpr int Unlimited
Specifies that a dimension is resizable to infinity.
std::vector< util::DateTime > convertRefOffsetToDtime(const int refIntDtime, const std::vector< float > &timeOffsets)
convert reference, time to DateTime object
std::vector< std::pair< std::string, Variable > > VarNameObjectList
typedef for holding list of variable names with associated variable object
std::string fullVarName(const std::string &groupName, const std::string &varName)
form full variable name given individual group and variable names
std::vector< std::shared_ptr< NewDimensionScale_Base > > NewDimensionScales_t
std::map< std::string, std::vector< std::string > > VarDimMap
typedef for holding dim names attached to variables
void collectVarDimInfo(const ObsGroup &obsGroup, VarNameObjectList &varObjectList, VarNameObjectList &dimVarObjectList, VarDimMap &dimsAttachedToVars, Dimensions_t &maxVarSize0)
collect variable and dimension information from a ioda ObsGroup
std::shared_ptr< Distribution > createReplicaDistribution(const eckit::mpi::Comm &comm, std::shared_ptr< const Distribution > master, const std::vector< std::size_t > &masterRecordNums)
Create a suitable replica distribution for the distribution master.
std::vector< Dimensions_t > dimsCur
The dimensions of the data.
Dimensions_t dimensionality
The dimensionality (rank) of the data.
Used to specify backend creation-time properties.
BackendFileActions action
BackendCreateModes createMode
A named pair of (variable_name, ioda::Variable).
Used to specify Variable creation-time properties.
VariableCreationParameters & setFillValue(DataType fill)
Container used to store and manipulate fill values.