IODA Bundle
test_text_reader.cc
Go to the documentation of this file.
1 /*
2  * (C) Copyright 1996-2012 ECMWF.
3  *
4  * This software is licensed under the terms of the Apache Licence Version 2.0
5  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6  * In applying this licence, ECMWF does not waive the privileges and immunities
7  * granted to it by virtue of its status as an intergovernmental organisation nor
8  * does it submit to any jurisdiction.
9  */
10 
11 #include <string>
12 #include <cmath>
13 #include <algorithm>
14 
15 #include "eckit/testing/Test.h"
16 
17 #include "odc/csv/TextReader.h"
19 
20 using namespace eckit::testing;
21 
22 
23 // ------------------------------------------------------------------------------------------------------
24 
25 CASE("Read columnar data from CSV") {
26 
27  // n.b. have the first long string not be in the first row, which will force the mechanism
28  // to resize.
29 
30  std::stringstream data;
31  data << "col1:INTEGER,col2:REAL,col3:DOUBLE,col4:STRING,col5:BITFIELD[a:1;b:2;c:5]\n";
32  data << "1,1.001,0,a-string,0\n";
33  data << "1234,0,88,b-string,2\n";
34  data << "-5432,-6.543210,99.999,string-c,4\n";
35  data << "-2147483648,6.543210,11.63e-37,testing,7\n";
36  data << "2147483647,NaN,Nan,12345678,8\n";
37  data << "0,+inf,-inf,this-is-a-longer-string,11\n";
38  data << "0,-inf,0,short,0\n";
39 
40  odc::TextReader reader(data, ",");
41  odc::TextReader::iterator it = reader.begin();
42 
43  std::vector<long> INTEGERS {1, 1234, -5432, -2147483648, 2147483647, 0, 0};
44  std::vector<float> REALS {1.001, 0.0, -6.543210, 6.543210, std::numeric_limits<float>::quiet_NaN(),
45  std::numeric_limits<float>::infinity(), -std::numeric_limits<float>::infinity()};
46  std::vector<double> DOUBLES {0, 88, 99.999, 11.63e-37, std::numeric_limits<double>::quiet_NaN(),
47  -std::numeric_limits<double>::infinity(), 0};
48  std::vector<std::string> STRINGS {"a-string", "b-string", "string-c", "testing", "12345678", "this-is-a-longer-string", "short"};
49  std::vector<long> BITFIELDS {0, 2, 4, 7, 8, 11, 0};
50 
51  EXPECT(it->columns().size() == 5);
52 
53  EXPECT(it->columns()[0]->name() == "col1");
54  EXPECT(it->columns()[1]->name() == "col2");
55  EXPECT(it->columns()[2]->name() == "col3");
56  EXPECT(it->columns()[3]->name() == "col4");
57  EXPECT(it->columns()[4]->name() == "col5");
58 
59  EXPECT(it->columns()[0]->type() == odc::api::INTEGER);
60  EXPECT(it->columns()[1]->type() == odc::api::REAL);
61  EXPECT(it->columns()[2]->type() == odc::api::DOUBLE);
62  EXPECT(it->columns()[3]->type() == odc::api::STRING);
63  EXPECT(it->columns()[4]->type() == odc::api::BITFIELD);
64 
65  size_t count = 0;
66  for (; it != reader.end(); ++it) {
67  ++count;
68 
69  // Only resize on new, longer string
70  EXPECT(it->isNewDataset() == (count == 6));
71 
72  EXPECT(it->data(0) == INTEGERS[count-1]);
73  EXPECT(it->data(1) == double(REALS[count-1]) || (std::isnan(it->data(1)) && std::isnan(REALS[count-1])));
74  EXPECT(it->data(2) == DOUBLES[count-1] || (std::isnan(it->data(2)) && std::isnan(DOUBLES[count-1])));
75  EXPECT(it->dataSizeDoubles(3) == ((count >= 6) ? 3 : 1));
76  EXPECT(::strncmp(STRINGS[count-1].c_str(), (char*)&it->data(3), it->dataSizeDoubles(3) * sizeof(double)) == 0);
77  EXPECT(it->data(4) == BITFIELDS[count-1]);
78 
79  for (const auto& col : it->columns()) {
80  EXPECT(!col->hasMissing());
81  }
82  }
83 
84  EXPECT(count == 7);
85 }
86 
87 CASE("Starting with long strings") {
88 
89  // n.b. have the first long string not be in the first row, which will force the mechanism
90  // to resize.
91 
92  std::stringstream data;
93  data << "col4:STRING\n";
94  data << "a-string-is-long\n";
95  data << "b-string-is-very-long-indeed-whoah\n";
96 
97  odc::TextReader reader(data, ",");
98  odc::TextReader::iterator it = reader.begin();
99 
100  std::vector<std::string> STRINGS {"a-string-is-long", "b-string-is-very-long-indeed-whoah"};
101 
102  EXPECT(it->columns().size() == 1);
103  EXPECT(it->columns()[0]->name() == "col4");
104  EXPECT(it->columns()[0]->type() == odc::api::STRING);
105 
106  size_t count = 0;
107  for (; it != reader.end(); ++it) {
108  ++count;
109 
110  // Only resize on new, longer string
111  EXPECT(it->isNewDataset());
112  EXPECT(it->dataSizeDoubles(0) == (count == 1 ? 2 : 5));
113  EXPECT(::strncmp(STRINGS[count-1].c_str(), (char*)&it->data(0), it->dataSizeDoubles(0) * sizeof(double)) == 0);
114 
115  for (const auto& col : it->columns()) {
116  EXPECT(!col->hasMissing());
117  }
118  }
119 
120  EXPECT(count == 2);
121 }
122 
123 CASE("Test parsing bitfields") {
124 
125  std::string bitfieldDefinition = "en4_level_flag@hdr:bitfield[TempLevelReject:1;SaltLevelReject:1;LevelVertStability:1;IncreasingDepthCheck:1;NotUsed1:1;NotUsed2:1;NotUsed3:1;NotUsed4:1;NotUsed5:1;TempLevelStatList:1;TempLevelArgoQC:1;TempLevelOutOfRangeSetToMDI:1;TempLevelEN3List:1;TempLevelVertCheck:1;TempLevelNoBckgrnd:1;TempLevelBays:1;TempLevelBaysBud:1;TempLevelBaysBudReinstate:1;TempLevelWaterfallCheck:1;NotUsed6:1;NotUsed7:1;SaltLevelStatList:1;SaltLevelArgoQC:1;SaltLevelOutOfRangeSetToMDI:1;SaltLevelEN3List:1;SaltLevelVertCheck:1;SaltLevelNoBckgrnd:1;SaltLevelBays:1;SaltLevelBaysBud:1;SaltLevelBaysBudReinstate:1;SaltLevelWaterfallCheck:1]";
126 
127  eckit::sql::BitfieldDef def (odc::TextReaderIterator::parseBitfields(bitfieldDefinition));
128  eckit::sql::FieldNames names(def.first);
129  eckit::sql::Sizes sizes(def.second);
130 
131  std::vector<std::string> FIELD_NAMES {
132  "TempLevelReject", "SaltLevelReject", "LevelVertStability", "IncreasingDepthCheck",
133  "NotUsed1", "NotUsed2", "NotUsed3", "NotUsed4", "NotUsed5", "TempLevelStatList",
134  "TempLevelArgoQC", "TempLevelOutOfRangeSetToMDI", "TempLevelEN3List",
135  "TempLevelVertCheck", "TempLevelNoBckgrnd", "TempLevelBays", "TempLevelBaysBud",
136  "TempLevelBaysBudReinstate", "TempLevelWaterfallCheck", "NotUsed6", "NotUsed7",
137  "SaltLevelStatList", "SaltLevelArgoQC", "SaltLevelOutOfRangeSetToMDI",
138  "SaltLevelEN3List", "SaltLevelVertCheck", "SaltLevelNoBckgrnd", "SaltLevelBays",
139  "SaltLevelBaysBud", "SaltLevelBaysBudReinstate", "SaltLevelWaterfallCheck" };
140 
141  ASSERT(names.size() == 31);
142  ASSERT(sizes.size() == 31);
143 
144  EXPECT(names == FIELD_NAMES);
145  EXPECT(std::all_of(sizes.begin(), sizes.end(), [](int x){return x == 1;}));
146 }
147 
148 
149 CASE("Test parsing bitfields - 32bit limit") {
150  std::string bitfieldDefinition = "en4_level_flag@hdr:bitfield[TempLevelReject:1;SaltLevelReject:1;LevelVertStability:1;IncreasingDepthCheck:1;NotUsed1:1;NotUsed2:1;NotUsed3:1;NotUsed4:1;NotUsed5:1;TempLevelStatList:1;TempLevelArgoQC:1;TempLevelOutOfRangeSetToMDI:1;TempLevelEN3List:1;TempLevelVertCheck:1;TempLevelNoBckgrnd:1;TempLevelBays:1;TempLevelBaysBud:1;TempLevelBaysBudReinstate:1;TempLevelWaterfallCheck:1;NotUsed6:1;NotUsed7:1;SaltLevelStatList:1;SaltLevelArgoQC:1;SaltLevelOutOfRangeSetToMDI:1;SaltLevelEN3List:1;SaltLevelVertCheck:1;SaltLevelNoBckgrnd:1;SaltLevelBays:1;SaltLevelBaysBud:1;SaltLevelBaysBudReinstate:1;SaltLevelWaterfallCheck:1;NotUsed8:1;NotUsed9:1]";
151  EXPECT_THROWS_AS(odc::TextReaderIterator::parseBitfields(bitfieldDefinition), eckit::UserError);
152 }
153 
154 // ------------------------------------------------------------------------------------------------------
155 
156 int main(int argc, char* argv[]) {
157  return run_tests(argc, argv);
158 }
159 
static void count(void *counter, const double *data, size_t n)
Definition: UnitTests.cc:531
bool isNewDataset()
Definition: IteratorProxy.h:97
size_t dataSizeDoubles(size_t i) const
Definition: IteratorProxy.h:92
DATA * data()
Definition: IteratorProxy.h:77
const core::MetaData & columns() const
Definition: IteratorProxy.h:94
iterator end() const
Definition: TextReader.cc:77
iterator begin()
Definition: TextReader.cc:71
static eckit::sql::BitfieldDef parseBitfields(const std::string &)
@ BITFIELD
Definition: ColumnType.h:27
int main(int argc, char *argv[])
CASE("Read columnar data from CSV")