IODA Bundle
test_reencode_string_table.cc
Go to the documentation of this file.
1 /*
2  * (C) Copyright 1996-2012 ECMWF.
3  *
4  * This software is licensed under the terms of the Apache Licence Version 2.0
5  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6  * In applying this licence, ECMWF does not waive the privileges and immunities
7  * granted to it by virtue of its status as an intergovernmental organisation nor
8  * does it submit to any jurisdiction.
9  */
10 
11 #include <ctime>
12 #include <cstdlib>
13 #include <algorithm>
14 #include <cmath>
15 
16 #include "eckit/testing/Test.h"
17 #include "eckit/system/SystemInfo.h"
18 #include "eckit/eckit_ecbuild_config.h"
19 
20 #include "odc/api/ColumnType.h"
21 #include "odc/core/Codec.h"
22 #include "odc/codec/String.h"
23 
24 
25 using namespace eckit::testing;
26 using namespace odc::core;
27 using namespace odc::codec;
28 
29 // ------------------------------------------------------------------------------------------------------
30 
31 // TODO with codecs:
32 //
33 // i) Make them templated on the stream/datahandle directly
34 // ii) Construct them with a specific data handle/stream
35 // iii) Why are we casting data handles via a void* ???
36 
37 // Given the codec-initialising data, add the header on that is used to construct the
38 // codec.
39 
40 size_t prepend_codec_selection_header(std::vector<unsigned char>& data,
41  const std::string& codec_name,
42  bool bigEndian=false) {
43 
44  data.insert(data.begin(), 4, 0);
45  data[bigEndian ? 3 : 0] = static_cast<unsigned char>(codec_name.size());
46 
47  data.insert(data.begin() + 4, codec_name.begin(), codec_name.end());
48 
49  return 4 + codec_name.length();
50 }
51 
52 
53 CASE("Character strings can be stored in a flat list, and indexed") {
54 
55  // n.b. no missing values
56 
57  const char* source_data[] = {
58 
59  // Codec header
60  "\x00\x00\x00\x00", // 0 = hasMissing
61  "\x00\x00\x00\x00\x00\x00\x00\x00", // min unspecified
62  "\x00\x00\x00\x00\x00\x00\x00\x00", // max unspecified
63  "\x00\x00\x00\x00\x00\x00\x00\x00", // missingValue unspecified
64 
65  // How many strings are there in the table?
66  "\x06\x00\x00\x00",
67 
68  // String data (prepended with lengths)
69  // length, data, "cnt (discarded)", index
70 
71  "\x08\x00\x00\x00", "ghijklmn", "\x00\x00\x00\x00", "\x00\x00\x00\x00",
72  "\x0c\x00\x00\x00", "uvwxyzabcdef", "\x00\x00\x00\x00", "\x01\x00\x00\x00", // too long
73  "\x08\x00\x00\x00", "opqrstuv", "\x00\x00\x00\x00", "\x02\x00\x00\x00",
74  "\x02\x00\x00\x00", "ab", "\x00\x00\x00\x00", "\x03\x00\x00\x00", // This string is too short
75  "\x06\x00\x00\x00", "ghijkl", "\x00\x00\x00\x00", "\x04\x00\x00\x00",
76  "\x08\x00\x00\x00", "mnopqrst", "\x00\x00\x00\x00", "\x05\x00\x00\x00", // 8-byte length
77  };
78 
79  // Loop throumgh endiannesses for the source data
80 
81  for (int i = 0; i < 4; i++) {
82 
83  bool bigEndianSource = (i % 2 == 0);
84 
85  bool bits16 = (i > 1);
86 
87  std::vector<unsigned char> data;
88 
89  for (size_t j = 0; j < sizeof(source_data) / sizeof(const char*); j++) {
90  size_t len =
91  (j < 5) ? ((j == 0 || j == 4) ? 4 : 8)
92  : ((j+2) % 4 == 0 ? ::strlen(source_data[j]) : 4);
93  data.insert(data.end(), source_data[j], source_data[j] + len);
94 
95  // n.b. Don't reverse the endianness of the string data.
96  if (bigEndianSource && !((j > 5) && ((j+2) % 4 == 0)))
97  std::reverse(data.end()-len, data.end());
98  }
99 
100  // Which strings do we wish to decode (look at them in reverse. nb refers to index column)
101 
102  for (int n = 5; n >= 0; n--) {
103  if (bits16 && bigEndianSource)
104  data.push_back(0);
105  data.push_back(static_cast<unsigned char>(n));
106  if (bits16 && !bigEndianSource)
107  data.push_back(0);
108  }
109 
110  // Construct codec directly, and decode the header
111 
112  // Skip name of codec
113  GeneralDataStream ds(bigEndianSource != eckit::system::SystemInfo::isBigEndian(), &data[0], data.size());
114 
115  std::unique_ptr<Codec> c;
116  if (bigEndianSource == eckit::system::SystemInfo::isBigEndian()) {
117  if (bits16) {
119  } else {
121  }
122  } else {
123  if (bits16) {
125  } else {
127  }
128  }
129  c->load(ds);
130 
131  EXPECT(ds.position() == eckit::Offset(148));
132 
133  // Now re-encode the codec header, and check that we get what we started with!
134 
135  eckit::Buffer writeBuffer(4096);
136  ::memset(writeBuffer, 0, writeBuffer.size());
137  GeneralDataStream ds2(bigEndianSource != eckit::system::SystemInfo::isBigEndian(), writeBuffer);
138  c->save(ds2);
139 
140  // Check that the data is the same both times!
141 
142  EXPECT(ds2.position() == eckit::Offset(148));
143 
144 // eckit::Log::info() << "DATA: " << std::endl;
145 // for (size_t n = 0; n < data.size(); n++) {
146 // eckit::Log::info() << std::hex << int(data[n]) << " " << int(dh_write.getBuffer()[n]) << std::endl;
147 // if (int(data[n]) != int(dh_write.getBuffer()[n]))
148 // eckit::Log::info() << "******************************" << std::endl;
149 // }
150 
151  // The header should be correctly re-encoded.
152  EXPECT(::memcmp(writeBuffer, &data[0], 148) == 0);
153 
154  // We haven't encoded the data itself
155  for (size_t i = 148; i < 154; i++) {
156  EXPECT(writeBuffer[i] == 0);
157  }
158  }
159 }
160 
161 // ------------------------------------------------------------------------------------------------------
162 
163 int main(int argc, char* argv[]) {
164 
165  return run_tests(argc, argv);
166 }
eckit::Offset position() const
Definition: DataStream.h:132
int main(int argc, char *argv[])
CASE("Character strings can be stored in a flat list, and indexed")
size_t prepend_codec_selection_header(std::vector< unsigned char > &data, const std::string &codec_name, bool bigEndian=false)