9 #include <unordered_map>
13 #include <boost/multi_array.hpp>
14 #include <boost/variant.hpp>
16 #include "eckit/exception/Exceptions.h"
17 #include "eckit/parser/CSVParser.h"
18 #include "eckit/utils/StringTools.h"
20 #include "ioda/Misc/StringFuncs.h"
21 #include "ioda/ObsSpace.h"
23 #include "oops/util/Logger.h"
50 value = util::missingValue(value);
53 value =
static_cast<int>(value_.as<
long long>());
54 values.push_back(value);
60 value = util::missingValue(value);
62 value =
static_cast<float>(value_.as<
double>());
63 values.push_back(value);
66 void operator()(std::vector<std::string> &values)
const {
67 std::string value = value_.as<std::string>();
69 value = util::missingValue(value);
70 values.push_back(value);
77 template <
typename Source,
typename Destination>
79 boost::multi_array<Destination, 3> &destination) {
80 const Source missingSource = util::missingValue(Source());
81 const Destination missingDestination = util::missingValue(Destination());
82 destination.resize(boost::extents[source.size()][1][1]);
83 for (
size_t i = 0; i < source.size(); ++i)
84 if (source[i] != missingSource)
85 destination[i][0][0] =
static_cast<Destination
>(source[i]);
87 destination[i][0][0] = missingDestination;
91 template <
typename ExtractedValue>
99 typename std::enable_if<std::is_convertible<T, ExtractedValue>::value,
bool>::type
102 output_.resize(boost::extents[values.size()][1][1]);
103 for (
size_t i = 0; i < values.size(); ++i)
104 output_[i][0][0] = values[i];
107 template <
typename T,
108 typename std::enable_if<!std::is_convertible<T, ExtractedValue>::value,
bool>::type
112 throw eckit::NotImplemented(Here());
116 boost::multi_array<ExtractedValue, 3> &
output_;
124 const std::string &payloadGroup) {
125 const std::string prefix = payloadGroup +
'/';
126 const std::string suffix =
'@' + payloadGroup;
127 auto isInPayloadGroup = [&prefix, &suffix](
const std::string &name) {
128 return eckit::StringTools::beginsWith(name, prefix) ||
129 eckit::StringTools::endsWith(name, suffix);
131 auto payloadColumnIt = std::find_if(columnNames.begin(), columnNames.end(), isInPayloadGroup);
132 if (payloadColumnIt == columnNames.end())
133 throw eckit::UserError(
"No payload column found: no column name begins with '" + prefix +
134 "' or ends with '" + suffix +
"'",
136 if (std::any_of(payloadColumnIt + 1, columnNames.end(), isInPayloadGroup))
137 throw eckit::UserError(
"Multiple payload candidates found: "
138 "more than one column name begins with '" + prefix +
139 "' or ends with '" + suffix +
"'", Here());
140 return payloadColumnIt - columnNames.begin();
143 template <
typename T>
145 std::vector<T> values;
146 values.reserve(numValues);
152 template <
typename ExtractedValue>
157 if (type !=
"float" && type !=
"int")
158 throw eckit::UserError(
"The payload column must contain numeric data", Here());
163 if (type !=
"float" && type !=
"int")
164 throw eckit::UserError(
"The payload column must contain numeric data", Here());
168 void checkPayloadColumnType<std::string>(
const std::string &type) {
169 if (type !=
"string" && type !=
"datetime")
170 throw eckit::UserError(
"The payload column must contain strings or datetimes", Here());
175 template <
typename ExtractedValue>
177 : filepath_(filepath)
180 template <
typename ExtractedValue>
182 const std::string &interpolatedArrayGroup)
const {
185 const eckit::Value contents = eckit::CSVParser::decodeFile(filepath_,
false );
186 const size_t numRows = contents.size();
192 throw eckit::UserError(
"No data could be loaded from the file '" + filepath_ +
"'", Here());
196 const eckit::Value nameHeader = contents[0];
197 const size_t numColumns = nameHeader.size();
198 std::vector<std::string> columnNames(numColumns);
199 columnNames.reserve(numColumns);
200 for (
size_t column = 0; column < numColumns; ++column)
201 columnNames[column] = nameHeader[column].as<std::string>();
203 const size_t payloadColumnIndex =
findPayloadColumn(columnNames, interpolatedArrayGroup);
207 for (std::string &columnName : columnNames)
208 columnName = ioda::convertV1PathToV2Path(columnName);
211 const eckit::Value typeHeader = contents[1];
212 if (typeHeader.size() != numColumns)
213 throw eckit::UserError(
"The number of columns in line 2 differs from that in line 1", Here());
216 std::vector<DataExtractorInputBase::Coordinate> columns(numColumns);
217 for (
size_t column = 0; column < numColumns; ++column) {
218 const std::string type = typeHeader[column];
219 if (column == payloadColumnIndex)
220 checkPayloadColumnType<ExtractedValue>(type);
221 if (type ==
"string" || type ==
"datetime") {
222 columns[column] = createColumn<std::string>(numValues);
223 }
else if (type ==
"int" || type ==
"integer") {
224 columns[column] = createColumn<int>(numValues);
225 }
else if (type ==
"float") {
226 columns[column] = createColumn<float>(numValues);
228 throw eckit::UserError(
"Unsupported data type '" + type +
"'", Here());
234 const eckit::Value rowContents = contents[row];
235 if (rowContents.size() == 1 && rowContents[0] ==
"")
237 if (rowContents.size() != numColumns)
238 throw eckit::UserError(
"The number of columns in line " + std::to_string(1 + row) +
239 " differs from that in line 1", Here());
240 for (
size_t column = 0; column < numColumns; ++column)
241 boost::apply_visitor(AppendValueVisitor(rowContents[column]), columns[column]);
245 const int firstDim = 0;
247 for (
size_t column = 0; column < numColumns; ++column) {
248 if (column == payloadColumnIndex) {
249 ConvertToBoostMultiArrayVisitor<ExtractedValue> visitor(result.
payloadArray);
250 boost::apply_visitor(visitor, columns[column]);
252 result.
coordsVals[columnNames[column]] = std::move(columns[column]);
259 throw eckit::UserError(
"No data could be loaded from the file '" + filepath_ +
"'", Here());