21 #include "eckit/config/Configuration.h"
22 #include "eckit/exception/Exceptions.h"
24 #include "oops/mpi/mpi.h"
25 #include "oops/util/abor1_cpp.h"
26 #include "oops/util/DateTime.h"
27 #include "oops/util/Duration.h"
28 #include "oops/util/Logger.h"
29 #include "oops/util/missingValues.h"
30 #include "oops/util/Random.h"
31 #include "oops/util/stringFunctions.h"
33 #include "ioda/distribution/Accumulator.h"
34 #include "ioda/distribution/DistributionFactory.h"
35 #include "ioda/distribution/DistributionUtils.h"
36 #include "ioda/distribution/PairOfDistributions.h"
38 #include "ioda/io/ObsFrameRead.h"
39 #include "ioda/io/ObsFrameWrite.h"
50 std::string &nameWithoutChannelSuffix,
int &channel) {
51 const std::string::size_type lastUnderscore =
name.find_last_of(
'_');
52 if (lastUnderscore != std::string::npos &&
53 name.find_first_not_of(
"0123456789", lastUnderscore + 1) == std::string::npos) {
55 channel = std::stoi(
name.substr(lastUnderscore + 1));
56 nameWithoutChannelSuffix =
name.substr(0, lastUnderscore);
69 std::string dimName =
"nlocs";
113 const util::DateTime & bgn,
const util::DateTime & end,
114 const eckit::mpi::Comm & timeComm)
116 winbgn_(bgn), winend_(end), commMPI_(comm),
117 gnlocs_(0), nrecs_(0), obsvars_(),
118 obs_group_(), obs_params_(
params, bgn, end, comm, timeComm)
130 oops::Log::info() << this->
obsname() <<
" vars: " <<
obsvars_ << std::endl;
180 std::vector<int> idx2int(
nlocs);
181 std::vector<int> rec2int(
nlocs);
183 for (
size_t loc = 0; loc <
nlocs; ++loc) {
184 idx2int[loc] =
static_cast<int>(
indx_[loc]);
185 rec2int[loc] =
static_cast<int>(
recnums_[loc]);
189 put_db(
"MetaData",
"saved_index", idx2int);
190 put_db(
"MetaData",
"saved_record_number", rec2int);
196 <<
" observations are outside of time window out of "
200 oops::Log::trace() <<
"ObsSpace::ObsSpace constructed name = " <<
obsname() << std::endl;
204 void ObsSpace::save() {
207 oops::Log::info() <<
obsname() <<
": save database to " << fileName << std::endl;
215 this->
comm().barrier();
217 oops::Log::info() <<
obsname() <<
" : no output" << std::endl;
233 std::size_t numVars = 0;
234 if (obs_group_.
exists(
"ObsValue")) {
235 numVars = obs_group_.
open(
"ObsValue").
vars.
list().size();
236 }
else if (obs_group_.
exists(
"ObsError")) {
237 numVars = obs_group_.
open(
"ObsError").
vars.
list().size();
243 const std::vector<std::string> & ObsSpace::obs_group_vars()
const {
248 std::string ObsSpace::obs_sort_var()
const {
253 std::string ObsSpace::obs_sort_order()
const {
263 bool ObsSpace::has(
const std::string & group,
const std::string &
name,
bool skipDerived)
const {
266 std::string nameToUse;
267 std::vector<int> chanSelectToUse;
275 bool skipDerived)
const {
278 std::string nameToUse;
279 std::vector<int> chanSelectToUse;
282 std::string groupToUse =
"Derived" + group;
288 if (
has(groupToUse, nameToUse, skipDerived)) {
290 if (var.
isA<
int>()) {
292 }
else if (var.
isA<
float>()) {
294 }
else if (var.
isA<std::string>()) {
295 if ((group ==
"MetaData") && (nameToUse ==
"datetime")) {
299 VarType = ObsDtype::DateTime;
309 void ObsSpace::get_db(
const std::string & group,
const std::string &
name,
310 std::vector<int> & vdata,
311 const std::vector<int> & chanSelect,
bool skipDerived)
const {
312 loadVar<int>(group,
name, chanSelect, vdata, skipDerived);
315 void ObsSpace::get_db(
const std::string & group,
const std::string &
name,
316 std::vector<float> & vdata,
317 const std::vector<int> & chanSelect,
bool skipDerived)
const {
318 loadVar<float>(group,
name, chanSelect, vdata, skipDerived);
321 void ObsSpace::get_db(
const std::string & group,
const std::string &
name,
322 std::vector<double> & vdata,
323 const std::vector<int> & chanSelect,
bool skipDerived)
const {
325 std::vector<float> floatData;
326 loadVar<float>(group,
name, chanSelect, floatData, skipDerived);
327 ConvertVarType<float, double>(floatData, vdata);
330 void ObsSpace::get_db(
const std::string & group,
const std::string &
name,
331 std::vector<std::string> & vdata,
332 const std::vector<int> & chanSelect,
bool skipDerived)
const {
333 loadVar<std::string>(group,
name, chanSelect, vdata, skipDerived);
336 void ObsSpace::get_db(
const std::string & group,
const std::string &
name,
337 std::vector<util::DateTime> & vdata,
338 const std::vector<int> & chanSelect,
bool skipDerived)
const {
339 std::vector<std::string> dtStrings;
340 loadVar<std::string>(group,
name, chanSelect, dtStrings, skipDerived);
345 void ObsSpace::put_db(
const std::string & group,
const std::string &
name,
346 const std::vector<int> & vdata,
347 const std::vector<std::string> & dimList) {
351 void ObsSpace::put_db(
const std::string & group,
const std::string &
name,
352 const std::vector<float> & vdata,
353 const std::vector<std::string> & dimList) {
357 void ObsSpace::put_db(
const std::string & group,
const std::string &
name,
358 const std::vector<double> & vdata,
359 const std::vector<std::string> & dimList) {
361 std::vector<float> floatData;
362 ConvertVarType<double, float>(vdata, floatData);
366 void ObsSpace::put_db(
const std::string & group,
const std::string &
name,
367 const std::vector<std::string> & vdata,
368 const std::vector<std::string> & dimList) {
372 void ObsSpace::put_db(
const std::string & group,
const std::string &
name,
373 const std::vector<util::DateTime> & vdata,
374 const std::vector<std::string> & dimList) {
375 std::vector<std::string> dtStrings(vdata.size(),
"");
376 for (std::size_t i = 0; i < vdata.size(); ++i) {
377 dtStrings[i] = vdata[i].toString();
393 bool ObsSpace::recidx_has(
const std::size_t recNum)
const {
395 return (irec !=
recidx_.end());
399 std::size_t ObsSpace::recidx_recnum(
const RecIdxIter & irec)
const {
404 const std::vector<std::size_t> & ObsSpace::recidx_vector(
const RecIdxIter & irec)
const {
409 const std::vector<std::size_t> & ObsSpace::recidx_vector(
const std::size_t recNum)
const {
413 "ObsSpace::recidx_vector: Record number, " + std::to_string(recNum) +
414 ", does not exist in record index map.";
421 std::vector<std::size_t> ObsSpace::recidx_all_recnums()
const {
422 std::vector<std::size_t> RecNums(
nrecs_);
425 RecNums[recnum] = Irec->first;
436 void ObsSpace::print(std::ostream & os)
const {
439 std::size_t nobs = totalNlocs *
nvars;
441 os <<
obsname() <<
": nlocs: " << totalNlocs
442 <<
", nvars: " <<
nvars <<
", nobs: " << nobs;
453 std::string dimName = dimNameObject.first;
454 Variable srcDimVar = dimNameObject.second;
456 Dimensions_t maxDimSize = dimSize;
457 Dimensions_t chunkSize = dimSize;
466 if (dimSize > maxFrameSize) {
467 dimSize = maxFrameSize;
473 if (srcDimVar.
isA<
int>()) {
474 newDims.push_back(ioda::NewDimensionScale<int>(
475 dimName, dimSize, maxDimSize, chunkSize));
476 }
else if (srcDimVar.
isA<
float>()) {
477 newDims.push_back(ioda::NewDimensionScale<float>(
478 dimName, dimSize, maxDimSize, chunkSize));
490 backendParams.
flush =
false;
498 std::string dimName = dimNameObject.first;
499 Variable srcDimVar = dimNameObject.second;
508 std::vector<Dimensions_t> counts = destDimShape;
509 std::vector<Dimensions_t> starts(counts.size(), 0);
518 if (srcDimVar.
isA<
int>()) {
519 std::vector<int> dimCoords;
520 srcDimVar.
read<
int>(dimCoords, memSelect, srcSelect);
521 destDimVar.
write<
int>(dimCoords, memSelect, destSelect);
522 }
else if (srcDimVar.
isA<
float>()) {
523 std::vector<float> dimCoords;
524 srcDimVar.
read<
float>(dimCoords, memSelect, srcSelect);
525 destDimVar.
write<
float>(dimCoords, memSelect, destSelect);
531 template<
typename VarType>
533 const std::string & varName, std::vector<VarType> & varValues) {
537 bool gotVarData = obsFrame.
readFrameVar(varName, varValues);
541 VarType sourceFillValue;
543 sourceFillValue = detail::getFillValue<VarType>(sourceFvData);
544 VarType varFillValue = this->getFillValue<VarType>();
545 for (std::size_t i = 0; i < varValues.size(); ++i) {
546 if ((varValues[i] == sourceFillValue) || std::isinf(varValues[i])
547 || std::isnan(varValues[i])) {
548 varValues[i] = varFillValue;
557 const std::string & varName, std::vector<std::string> & varValues) {
561 bool gotVarData = obsFrame.
readFrameVar(varName, varValues);
565 std::string sourceFillValue;
567 sourceFillValue = detail::getFillValue<std::string>(sourceFvData);
568 std::string varFillValue = this->getFillValue<std::string>();
569 for (std::size_t i = 0; i < varValues.size(); ++i) {
570 if (varValues[i] == sourceFillValue) {
571 varValues[i] = varFillValue;
589 Dimensions_t frameStart = obsFrame.
frameStart();
600 for (
auto & varNameObject : obsFrame.
ioVarList()) {
601 std::string varName = varNameObject.first;
602 Variable var = varNameObject.second;
603 Dimensions_t beFrameStart;
607 beFrameStart = frameStart;
609 Dimensions_t frameCount = obsFrame.
frameCount(varName);
612 if (var.
isA<
int>()) {
613 std::vector<int> varValues;
614 if (readObsSource<int>(obsFrame, varName, varValues)) {
615 storeVar<int>(varName, varValues, beFrameStart, frameCount);
617 }
else if (var.
isA<
float>()) {
618 std::vector<float> varValues;
619 if (readObsSource<float>(obsFrame, varName, varValues)) {
620 storeVar<float>(varName, varValues, beFrameStart, frameCount);
622 }
else if (var.
isA<std::string>()) {
623 std::vector<std::string> varValues;
624 if (readObsSource<std::string>(obsFrame, varName, varValues)) {
625 storeVar<std::string>(varName, varValues, beFrameStart, frameCount);
656 std::string dtVarName =
fullVarName(
"MetaData",
"datetime");
661 std::vector<float> timeOffset;
663 timeVar.
read<
float>(timeOffset);
666 std::vector<std::string> dtStrings(dtVals.size(),
"");
667 for (std::size_t i = 0; i < dtVals.size(); ++i) {
668 dtStrings[i] = dtVals[i].toString();
673 params.compressWithGZIP();
674 params.setFillValue<std::string>(this->getFillValue<std::string>());
675 std::vector<Variable>
679 .write<std::string>(dtStrings);
684 void ObsSpace::resizeNlocs(
const Dimensions_t nlocsSize,
const bool append) {
686 Dimensions_t nlocsResize;
688 nlocsResize =
nlocsVar.getDimensions().dimsCur[0] + nlocsSize;
690 nlocsResize = nlocsSize;
693 { std::pair<Variable, Dimensions_t>(
nlocsVar, nlocsResize) });
698 template<
typename VarType>
699 void ObsSpace::loadVar(
const std::string & group,
const std::string &
name,
700 const std::vector<int> & chanSelect,
701 std::vector<VarType> & varValues,
702 bool skipDerived)
const {
705 std::string nameToUse;
706 std::vector<int> chanSelectToUse;
710 std::string groupToUse =
"Derived" + group;
728 const std::size_t nchansDimIndex = 1;
732 var, nchansDimIndex, chanSelectToUse, memSelect, obsGroupSelect);
734 var.
read<VarType>(varValues, memSelect, obsGroupSelect);
735 varValues.
resize(numElements);
738 var.
read<VarType>(varValues);
742 var.
read<VarType>(varValues);
746 var.
read<VarType>(varValues);
752 template<
typename VarType>
753 void ObsSpace::saveVar(
const std::string & group, std::string
name,
754 const std::vector<VarType> & varValues,
755 const std::vector<std::string> & dimList) {
766 std::string nameToUse;
768 name = std::move(nameToUse);
773 std::vector<std::string> dimListToUse = dimList;
776 const size_t nchansDimIndex =
777 std::find(dimListToUse.begin(), dimListToUse.end(), nchansVarName) -
778 dimListToUse.begin();
779 if (nchansDimIndex == dimListToUse.size())
780 dimListToUse.push_back(nchansVarName);
782 Variable var = openCreateVar<VarType>(fullName, dimListToUse);
785 var.
write<VarType>(varValues);
789 std::vector<std::vector<Named_Variable>> dimScales =
791 size_t nchansDimIndex = std::find_if(dimScales.begin(), dimScales.end(),
792 [](
const std::vector<Named_Variable> &x)
793 { return !x.empty(); }) - dimScales.begin();
794 if (nchansDimIndex == dimScales.size())
795 throw eckit::UserError(
"Variable " + fullName +
796 " is not indexed by channel numbers", Here());
801 memSelect, obsGroupSelect);
802 var.
write<VarType>(varValues, memSelect, obsGroupSelect);
808 std::size_t ObsSpace::createChannelSelections(
const Variable & variable,
809 std::size_t nchansDimIndex,
815 std::vector<Dimensions_t> chanIndices;
816 chanIndices.reserve(
channels.size());
817 for (std::size_t i = 0; i <
channels.size(); ++i) {
820 chanIndices.push_back(ichan->second);
822 throw eckit::BadParameter(
"Selected channel number " +
823 std::to_string(
channels[i]) +
" does not exist.", Here());
829 std::vector<std::vector<Dimensions_t>> dimSelects(varDims.size());
830 Dimensions_t numElements = 1;
831 for (std::size_t i = 0; i < varDims.size(); ++i) {
832 if (i == nchansDimIndex) {
834 numElements *= chanIndices.size();
835 dimSelects[i] = chanIndices;
837 numElements *= varDims[i];
838 std::vector<Dimensions_t> allIndices(varDims[i]);
839 std::iota(allIndices.begin(), allIndices.end(), 0);
840 dimSelects[i] = allIndices;
844 std::vector<Dimensions_t> memStarts(1, 0);
845 std::vector<Dimensions_t> memCounts(1, numElements);
846 memSelect.
extent(memCounts)
853 if (numElements == 0) {
855 std::vector<Dimensions_t> obsGroupStarts(varDims.size(), 0);
856 std::vector<Dimensions_t> obsGroupCounts(varDims.size(), 0);
857 obsGroupSelect.
extent(varDims)
861 obsGroupSelect.
extent(varDims)
863 for (std::size_t i = 1; i < dimSelects.size(); ++i) {
881 template<
typename VarType>
882 void ObsSpace::storeVar(
const std::string & varName, std::vector<VarType> & varValues,
883 const Dimensions_t frameStart,
const Dimensions_t frameCount) {
893 std::vector<Dimensions_t> beCounts = varDims;
894 beCounts[0] = frameCount;
895 std::vector<Dimensions_t> beStarts(beCounts.size(), 0);
896 beStarts[0] = frameStart;
901 std::vector<Dimensions_t> feCounts(1, std::accumulate(
902 beCounts.begin(), beCounts.end(),
static_cast<Dimensions_t
>(1),
903 std::multiplies<Dimensions_t>()));
904 std::vector<Dimensions_t> feStarts(1, 0);
914 var.
write<VarType>(varValues, feSelect, beSelect);
933 paramsFloat.
setFillValue<
float>(this->getFillValue<float>());
934 paramsStr.
setFillValue<std::string>(this->getFillValue<std::string>());
938 for (
auto & ivar : dimsAttachedToVars) {
939 std::string varName = ivar.first;
940 std::vector<std::string> varDimNames = ivar.second;
943 std::vector<Variable> varDims;
944 for (
auto & dimVarName : varDimNames) {
945 varDims.push_back(destVarContainer.
open(dimVarName));
949 if (srcVar.
isA<
int>()) {
951 }
else if (srcVar.
isA<
float>()) {
953 }
else if (srcVar.
isA<std::string>()) {
954 destVarContainer.
createWithScales<std::string>(varName, varDims, paramsStr);
956 if (this->
comm().rank() == 0) {
957 oops::Log::warning() <<
"WARNING: ObsSpace::createVariables: "
958 <<
"Skipping variable due to an unexpected data type for variable: "
959 << varName << std::endl;
966 void ObsSpace::fillChanNumToIndexMap() {
973 std::vector<int> chanNumbers;
977 std::vector<float> floatChanNumbers;
979 ConvertVarType<float, int>(floatChanNumbers, chanNumbers);
984 for (
int i = 0; i < chanNumbers.size(); ++i) {
991 void ObsSpace::splitChanSuffix(
const std::string & group,
const std::string &
name,
992 const std::vector<int> & chanSelect, std::string & nameToUse,
993 std::vector<int> & chanSelectToUse,
994 bool skipDerived)
const {
996 chanSelectToUse = chanSelect;
999 if (chanSelect.empty() &&
1004 chanSelectToUse = {channelNumber};
1009 void ObsSpace::buildSortedObsGroups() {
1010 typedef std::map<std::size_t, std::vector<std::pair<float, std::size_t>>> TmpRecIdxMap;
1011 typedef TmpRecIdxMap::iterator TmpRecIdxIter;
1014 std::size_t nLocs = this->
nlocs();
1015 std::vector<float> SortValues(nLocs);
1017 std::vector<util::DateTime> Dates(nLocs);
1019 for (std::size_t iloc = 0; iloc < nLocs; iloc++) {
1020 SortValues[iloc] = (Dates[iloc] - Dates[0]).toSeconds();
1028 TmpRecIdxMap TmpRecIdx;
1029 for (
size_t iloc = 0; iloc < nLocs; iloc++) {
1030 TmpRecIdx[
recnums_[iloc]].push_back(std::make_pair(SortValues[iloc], iloc));
1033 for (TmpRecIdxIter irec = TmpRecIdx.begin(); irec != TmpRecIdx.end(); ++irec) {
1035 sort(irec->second.begin(), irec->second.end());
1040 sort(irec->second.begin(), irec->second.end(),
1041 [](
const std::pair<float, std::size_t> &
p1,
1042 const std::pair<float, std::size_t> & p2){
1043 return (p2.first < p1.first ||
1044 (!(p1.first < p2.first) && p2.second > p1.second));});
1049 for (TmpRecIdxIter irec = TmpRecIdx.begin(); irec != TmpRecIdx.end(); ++irec) {
1050 recidx_[irec->first].resize(irec->second.size());
1051 for (std::size_t iloc = 0; iloc < irec->second.size(); iloc++) {
1052 recidx_[irec->first][iloc] = irec->second[iloc].second;
1058 void ObsSpace::buildRecIdxUnsorted() {
1059 std::size_t nLocs = this->
nlocs();
1060 for (
size_t iloc = 0; iloc < nLocs; iloc++) {
1066 void ObsSpace::saveToFile() {
1071 Dimensions_t maxVarSize;
1075 for (
auto & dimNameObject : dimVarList) {
1076 std::string dimName = dimNameObject.first;
1077 Dimensions_t dimSize = dimNameObject.second.getDimensions().dimsCur[0];
1078 Dimensions_t dimMaxSize = dimSize;
1079 Dimensions_t dimChunkSize = dimSize;
1081 dimSize = this->
nlocs();
1089 if (dimChunkSize == 0) {
1104 for (obsFrame.
frameInit(varList, dimVarList, dimsAttachedToVars, maxVarSize);
1106 Dimensions_t frameStart = obsFrame.
frameStart();
1107 for (
auto & varNameObject : varList) {
1109 std::string destVarName = varNameObject.first;
1113 Dimensions_t frameCount = obsFrame.
frameCount(destVarName);
1116 if (frameCount > 0) {
1118 Variable srcVar = varNameObject.second;
1125 if (srcVar.
isA<
int>()) {
1126 std::vector<int> varValues;
1127 srcVar.
read<
int>(varValues, memSelect, varSelect);
1129 }
else if (srcVar.
isA<
float>()) {
1130 std::vector<float> varValues;
1131 srcVar.
read<
float>(varValues, memSelect, varSelect);
1133 }
else if (srcVar.
isA<std::string>()) {
1134 std::vector<std::string> varValues;
1135 srcVar.
read<std::string>(varValues, memSelect, varSelect);
1144 template <
typename DataType>
1146 const size_t upperBoundOnGlobalNumOriginalRecs) {
1147 const DataType missing = util::missingValue(missing);
1152 std::vector<DataType> varVals;
1153 extendVar.
read<DataType>(varVals);
1155 for (
const auto & recordindex :
recidx_) {
1157 if (recordindex.first >= upperBoundOnGlobalNumOriginalRecs)
break;
1160 DataType fillValue = missing;
1161 for (
const auto & jloc : recordindex.second) {
1162 if (varVals[jloc] != missing) {
1163 fillValue = varVals[jloc];
1170 if (fillValue != missing) {
1171 for (
const auto & jloc : recidx_[recordindex.first + upperBoundOnGlobalNumOriginalRecs]) {
1172 varVals[jloc] = fillValue;
1178 extendVar.
write<DataType>(varVals);
1193 const int nlevs =
params.numModelLevels;
1195 const size_t numOriginalLocs = this->
nlocs();
1201 const std::set<size_t> uniqueOriginalRecs(
recnums_.begin(),
recnums_.end());
1209 size_t upperBoundOnGlobalNumOriginalLocs = 0;
1210 size_t upperBoundOnGlobalNumOriginalRecs = 0;
1211 if (numOriginalLocs > 0) {
1212 upperBoundOnGlobalNumOriginalLocs =
indx_.back() + 1;
1213 upperBoundOnGlobalNumOriginalRecs = *uniqueOriginalRecs.rbegin() + 1;
1215 dist_->max(upperBoundOnGlobalNumOriginalLocs);
1216 dist_->max(upperBoundOnGlobalNumOriginalRecs);
1227 size_t averagedLoc = 0;
1228 for (
size_t originalRec : uniqueOriginalRecs) {
1229 ASSERT(
dist_->isMyRecord(originalRec));
1230 const size_t averagedRec = originalRec;
1231 const size_t extendedRec = upperBoundOnGlobalNumOriginalRecs + averagedRec;
1234 std::vector<size_t> &locsInRecord =
recidx_[extendedRec];
1235 for (
int ilev = 0; ilev < nlevs; ++ilev, ++averagedLoc) {
1236 const size_t extendedLoc = numOriginalLocs + averagedLoc;
1237 const size_t globalAveragedLoc = originalRec * nlevs + ilev;
1238 const size_t globalExtendedLoc = upperBoundOnGlobalNumOriginalLocs + globalAveragedLoc;
1241 replicaDist->assignRecord(averagedRec, globalAveragedLoc, eckit::geometry::Point2());
1242 ASSERT(replicaDist->isMyRecord(averagedRec));
1244 indx_.push_back(globalExtendedLoc);
1245 locsInRecord.push_back(extendedLoc);
1248 replicaDist->computePatchLocs();
1250 const size_t numAveragedLocs = averagedLoc;
1251 const size_t numExtendedLocs = numOriginalLocs + numAveragedLocs;
1269 const std::vector <std::string> &nonMissingExtendedVars =
params.nonMissingExtendedVars;
1270 for (
auto & varName : nonMissingExtendedVars) {
1272 const std::string groupName =
"MetaData";
1273 const std::string fullVname =
fullVarName(groupName, varName);
1278 if (extendVar.
isA<
int>()) {
1279 extendVariable<int>(extendVar, upperBoundOnGlobalNumOriginalRecs);
1280 }
else if (extendVar.
isA<
float>()) {
1281 extendVariable<float>(extendVar, upperBoundOnGlobalNumOriginalRecs);
1282 }
else if (extendVar.
isA<std::string>()) {
1283 extendVariable<std::string>(extendVar, upperBoundOnGlobalNumOriginalRecs);
1290 std::vector <int> extended_obs_space(numExtendedLocs, 0);
1291 std::fill(extended_obs_space.begin() + numOriginalLocs, extended_obs_space.end(), 1);
1293 put_db(
"MetaData",
"extended_obs_space", extended_obs_space);
1297 std::unique_ptr<Accumulator<size_t>> accumulator = replicaDist->createAccumulator<
size_t>();
1298 for (
size_t averagedLoc = 0; averagedLoc < numAveragedLocs; ++averagedLoc)
1299 accumulator->addTerm(averagedLoc, 1);
1300 size_t globalNumAveragedLocs = accumulator->computeResult();
1306 upperBoundOnGlobalNumOriginalRecs);
1311 gnlocs_ += globalNumAveragedLocs;
1316 void ObsSpace::createMissingObsErrors() {
1317 std::vector<float> obserror;
1319 for (
size_t i = 0; i <
obsvars_.size(); ++i) {
1321 if (obserror.empty())
1322 obserror.assign(
nlocs(), util::missingValue(
float()));
Interfaces for ioda::Variable and related classes.
Groups are a new implementation of ObsSpaces.
This class exists inside of ioda::Group and provides the interface to manipulating Variables.
void set_dim_size(const ObsDimensionId dimId, std::size_t dimSize)
set the dimension size for the given dimension id
std::string get_dim_name(const ObsDimensionId dimId) const
return the dimension name for the given dimension id
ObsDimensionId get_dim_id(const std::string &dimName) const
return the standard id value for the given dimension name
std::map< ObsDimensionId, std::size_t > dim_id_size_
map going from dim id to dim size
std::size_t get_dim_size(const ObsDimensionId dimId) const
return the dimension size for the given dimension id
std::map< std::string, ObsDimensionId > dim_name_id_
map going from dim name to id
std::map< ObsDimensionId, std::string > dim_id_name_
map going from dim id to dim name
bool ioIsVarDimByNlocs(const std::string &varName) const
return true if variable is dimensioned by nlocs
Selection createMemSelection(const std::vector< Dimensions_t > &varShape, const Dimensions_t frameCount)
create selection object for accessing a memory buffer
virtual std::size_t frameNumLocs() const
return number of locations
const VarNameObjectList & ioVarList() const
return list of regular variables from ObsIo
VarDimMap ioVarDimMap() const
return map from variables to their attached dimension scales
Dimensions_t globalNumLocs() const
return number of locations that were selected from ObsIo
const VarNameObjectList & ioDimVarList() const
return list of dimension scale variables from ObsIo
Selection createVarSelection(const std::vector< Dimensions_t > &varShape, const Dimensions_t frameStart, const Dimensions_t frameCount)
create selection object for accessing a frame from a whole variable
Has_Attributes & atts() const
return attributes container from ObsIo
Dimensions_t globalNumLocsOutsideTimeWindow() const
return number of locations from obs source that were outside the time window
Has_Variables & vars() const
return variables container from ObsIo
virtual std::size_t frameNumRecs() const
return number of records
Implementation of ObsFrameRead class.
std::vector< std::size_t > index() const override
return list of indices indicating which locations were selected from ObsIo
void frameNext() override
move to the next frame
Dimensions_t frameCount(const std::string &varName) override
return current frame count for variable
Dimensions_t adjNlocsFrameCount() const override
return adjusted nlocs frame count
bool frameAvailable() override
true if a frame is available (not past end of frames)
void frameInit() override
initialize for walking through the frames
bool readFrameVar(const std::string &varName, std::vector< int > &varData)
read a frame variable
Dimensions_t frameStart() override
return current frame starting index
Dimensions_t adjNlocsFrameStart() const override
return adjusted nlocs frame start
std::shared_ptr< const Distribution > distribution()
return the MPI distribution
std::vector< std::size_t > recnums() const override
return list of record numbers from ObsIo
Implementation of ObsFrameWrite class.
void writeFrameVar(const std::string &varName, const std::vector< int > &varData)
write a frame variable
void frameNext(const VarNameObjectList &varList) override
move to the next frame
Dimensions_t frameCount(const std::string &varName) override
return current frame count for variable
void frameInit(const VarNameObjectList &varList, const VarNameObjectList &dimVarList, const VarDimMap &varDimMap, const Dimensions_t maxVarSize) override
initialize for walking through the frames
bool frameAvailable() override
true if a frame is available (not past end of frames)
Dimensions_t frameStart() override
return current frame starting index
static ObsGroup generate(Group &emptyGroup, const NewDimensionScales_t &fundamentalDims, std::shared_ptr< const detail::DataLayoutPolicy > layout=nullptr)
Create an empty ObsGroup and populate it with the fundamental dimensions.
void resize(const std::vector< std::pair< Variable, ioda::Dimensions_t >> &newDims)
Resize a Dimension and every Variable that depends on it.
oops::Parameter< ObsGroupingParameters > obsGrouping
options controlling obs record grouping
oops::Parameter< int > maxFrameSize
maximum frame size
void extendObsSpace(const ObsExtendParameters ¶ms)
Extend the ObsSpace according to the method requested in the configuration file.
void createVariables(const Has_Variables &srcVarContainer, Has_Variables &destVarContainer, const VarDimMap &dimsAttachedToVars)
create set of variables from source variables and lists
void saveToFile()
Dump the database into the output file.
const std::vector< std::size_t > & recnum() const
return reference to the record number vector
void put_db(const std::string &group, const std::string &name, const std::vector< int > &vdata, const std::vector< std::string > &dimList={ "nlocs" })
transfer data from vdata to the obs container
std::shared_ptr< const Distribution > dist_
MPI distribution object.
void get_db(const std::string &group, const std::string &name, std::vector< int > &vdata, const std::vector< int > &chanSelect={ }, bool skipDerived=false) const
transfer data from the obs container to vdata
std::size_t gnlocs_outside_timewindow_
number of nlocs from the obs source that are outside the time window
void buildRecIdxUnsorted()
Create the recidx data structure with unsorted record groups.
ObsDimInfo dim_info_
dimension information for variables in this obs space
void splitChanSuffix(const std::string &group, const std::string &name, const std::vector< int > &chanSelect, std::string &nameToUse, std::vector< int > &chanSelectToUse, bool skipDerived=false) const
split off the channel number suffix from a given variable name
VarDimMap dims_attached_to_vars_
map showing association of dim names with each variable name
bool has(const std::string &group, const std::string &name, bool skipDerived=false) const
return true if variable name exists in group group or (unless skipDerived is set to true) "Derived" +...
std::string get_dim_name(const ObsDimensionId dimId) const
return the standard dimension name for the given dimension id
void createObsGroupFromObsFrame(ObsFrameRead &obsFrame)
Initialize the database from a source (ObsFrame ojbect)
std::string obsname_
name of obs space
std::size_t nrecs_
number of records
const std::vector< std::string > & obs_group_vars() const
return YAML configuration parameter: obsdatain.obsgrouping.group variables
oops::Variables obsvars_
Observation "variables" to be simulated.
std::map< int, int > chan_num_to_index_
map to go from channel number (not necessarily consecutive) to channel index (consecutive,...
void saveVar(const std::string &group, std::string name, const std::vector< VarType > &varValues, const std::vector< std::string > &dimList)
save a variable to the obs_group_ object
const eckit::mpi::Comm & comm() const
void initFromObsSource(ObsFrameRead &obsFrame)
initialize the in-memory obs_group_ (ObsGroup) object from the ObsIo source
RecIdxMap::const_iterator RecIdxIter
void createMissingObsErrors()
For each simulated variable that doesn't have an accompanying array in the ObsError or DerivedObsErro...
RecIdxMap recidx_
profile ordering
ObsSpaceParameters obs_params_
obs io parameters
void buildSortedObsGroups()
Create the recidx data structure holding sorted record groups.
ObsGroup obs_group_
observation data store
std::vector< std::size_t > indx_
indexes of locations to extract from the input obs file
void fillChanNumToIndexMap()
fill in the channel number to channel index map
std::string obs_sort_order() const
return YAML configuration parameter: obsdatain.obsgrouping.sort order
void resizeNlocs(const Dimensions_t nlocsSize, const bool append)
resize along nlocs dimension
std::size_t createChannelSelections(const Variable &variable, std::size_t nchansDimIndex, const std::vector< int > &channels, Selection &memSelect, Selection &obsGroupSelect) const
Create selections of slices of the variable variable along dimension nchansDimIndex corresponding to ...
bool recidx_is_sorted_
indicator whether the data in recidx_ is sorted
std::size_t globalNumLocsOutsideTimeWindow() const
return number of locations from obs source that were outside the time window
const eckit::mpi::Comm & commMPI_
MPI communicator.
std::size_t globalNumLocs() const
return the total number of locations in the corresponding obs spaces across all MPI tasks
std::size_t gnlocs_
total number of locations
std::map< std::vector< std::string >, Selection > known_be_selections_
cache for backend selection
const oops::Variables & obsvariables() const
return the collection of all simulated variables
const std::string & obsname() const
return the name of the obs type being stored
std::string obs_sort_var() const
return YAML configuration parameter: obsdatain.obsgrouping.sort variable
std::map< std::vector< std::string >, Selection > known_fe_selections_
cache for frontend selection
std::vector< std::size_t > recnums_
record numbers associated with the location indexes
void setMaxVarSize(const Dimensions_t maxVarSize)
set the maximum variable size
ObsTopLevelParameters top_level_
sub groups of parameters
void setDimScale(const std::string &dimName, const Dimensions_t curSize, const Dimensions_t maxSize, const Dimensions_t chunkSize)
set a new dimension scale
oops::RequiredParameter< oops::Variables > simVars
simulated variables
oops::OptionalParameter< ObsExtendParameters > obsExtend
extend the ObsSpace with extra fixed-size records
oops::RequiredParameter< std::string > obsSpaceName
name of obs space
oops::Parameter< bool > saveObsDistribution
oops::OptionalParameter< ObsFileOutParameters > obsOutFile
output specification by writing to a file
oops::Parameter< oops::Variables > derivedSimVars
oops::Parameter< std::string > distName
name of MPI distribution
const ObsIoParametersBase & obsIoInParameters() const
parameters indicating where to load data from
A Selection represents the bounds of the data, in ioda or in userspace, that you are reading or writi...
virtual Attribute_Implementation read(gsl::span< char > data, const Type &in_memory_dataType) const
The fundamental read function. Backends overload this function to implement all read operations.
virtual Group open(const std::string &name) const
Open a group.
Has_Variables vars
Use this to access variables.
virtual bool exists(const std::string &name) const
virtual Attribute open(const std::string &name) const
Open an Attribute by name.
Variable createWithScales(const std::string &name, const std::vector< Variable > &dimension_scales, const VariableCreationParameters ¶ms=VariableCreationParameters::defaulted< DataType >())
Convenience function to create a Variable from certain dimension scales.
virtual Variable open(const std::string &name) const
Open a Variable by name.
virtual std::vector< std::string > list() const
virtual bool exists(const std::string &name) const
Does a Variable with the specified name exist?
virtual FillValueData_t getFillValue() const
Retrieve the fill value.
bool isA() const
Convenience function to check a Variable's storage type.
virtual bool isDimensionScaleAttached(unsigned int DimensionNumber, const Variable &scale) const
Is a dimension scale attached to this Variable in a certain position?
virtual bool hasFillValue() const
Check if a variable has a fill value set.
virtual Dimensions getDimensions() const
virtual std::vector< std::vector< Named_Variable > > getDimensionScaleMappings(const std::list< Named_Variable > &scalesToQueryAgainst, bool firstOnly=true) const
Which dimensions are attached at which positions? This function may offer improved performance on som...
virtual Variable read(gsl::span< char > data, const Type &in_memory_dataType, const Selection &mem_selection=Selection::all, const Selection &file_selection=Selection::all) const
Read the Variable - as char array. Ordering is row-major.
virtual Variable write(gsl::span< char > data, const Type &in_memory_dataType, const Selection &mem_selection=Selection::all, const Selection &file_selection=Selection::all)
The fundamental write function. Backends overload this function to implement all write operations.
virtual Variable resize(const std::vector< Dimensions_t > &newDims)
Resize the variable.
IODA_DL std::string genUniqueName()
Convenience function to generate a random file name.
BackendNames
Backend names.
IODA_DL Group constructBackend(BackendNames name, BackendCreationParameters ¶ms)
This is a simple factory style function that will instantiate a different backend based on a given na...
@ Create
Create a new file.
@ ObsStore
ObsStore in-memory.
@ Truncate_If_Exists
If the file already exists, overwrite it.
Selection & extent(const VecDimensions_t &sz)
Provide the dimensions of the object that you are selecting from.
Selection & select(const SingleSelection &s)
Append a new selection.
bool extractChannelSuffixIfPresent(const std::string &name, std::string &nameWithoutChannelSuffix, int &channel)
constexpr int Unlimited
Specifies that a dimension is resizable to infinity.
std::vector< std::pair< std::string, Variable > > VarNameObjectList
typedef for holding list of variable names with associated variable object
std::string fullVarName(const std::string &groupName, const std::string &varName)
form full variable name given individual group and variable names
std::vector< std::shared_ptr< NewDimensionScale_Base > > NewDimensionScales_t
std::map< std::string, std::vector< std::string > > VarDimMap
typedef for holding dim names attached to variables
void collectVarDimInfo(const ObsGroup &obsGroup, VarNameObjectList &varObjectList, VarNameObjectList &dimVarObjectList, VarDimMap &dimsAttachedToVars, Dimensions_t &maxVarSize0)
collect variable and dimension information from a ioda ObsGroup
std::vector< util::DateTime > convertRefOffsetToDtime(const int refIntDtime, const std::vector< float > &timeOffsets)
convert reference, time to DateTime object
std::shared_ptr< Distribution > createReplicaDistribution(const eckit::mpi::Comm &comm, std::shared_ptr< const Distribution > master, const std::vector< std::size_t > &masterRecordNums)
Create a suitable replica distribution for the distribution master.
std::vector< util::DateTime > convertDtStringsToDtime(const std::vector< std::string > &dtStrings)
convert datetime strings to DateTime object
std::vector< Dimensions_t > dimsCur
The dimensions of the data.
Dimensions_t dimensionality
The dimensionality (rank) of the data.
Used to specify backend creation-time properties.
BackendFileActions action
BackendCreateModes createMode
A named pair of (variable_name, ioda::Variable).
Used to specify Variable creation-time properties.
VariableCreationParameters & setFillValue(DataType fill)
Container used to store and manipulate fill values.