IODA Bundle
test_codecs_write.cc
Go to the documentation of this file.
1 /*
2  * (C) Copyright 1996-2012 ECMWF.
3  *
4  * This software is licensed under the terms of the Apache Licence Version 2.0
5  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6  * In applying this licence, ECMWF does not waive the privileges and immunities
7  * granted to it by virtue of its status as an intergovernmental organisation nor
8  * does it submit to any jurisdiction.
9  */
10 
11 #include <cmath>
12 #include <ctime>
13 #include <cstdlib>
14 #include <algorithm>
15 
16 #include "eckit/eckit_ecbuild_config.h"
17 #include "eckit/io/Buffer.h"
18 #include "eckit/system/SystemInfo.h"
19 #include "eckit/testing/Test.h"
20 
21 #include "odc/api/ColumnType.h"
22 #include "odc/core/Codec.h"
23 #include "odc/codec/Constant.h"
24 #include "odc/codec/Integer.h"
26 #include "odc/codec/Real.h"
27 #include "odc/codec/String.h"
28 
29 
30 using namespace eckit;
31 using namespace eckit::testing;
32 using namespace odc::codec;
33 using odc::core::Codec;
37 
38 // Log::info() << "DS: " << ds.position() << std::endl;
39 // for (int i = 0; i < ds.position(); ++i) {
40 // Log::info() << i << " : " << std::hex << (int)ds.data()[i] << std::dec << " -- " << (char)ds.data()[i] << std::endl;
41 // }
42 // Log::info() << "pos: " << ds.position() << " : " << expectedHdrSize << std::endl;
43 // exit(-1);
44 
45 // ------------------------------------------------------------------------------------------------------
46 
47 // TODO with codecs:
48 //
49 // i) Make them templated on the stream/datahandle directly
50 // ii) Construct them with a specific data handle/stream
51 // iii) Why are we casting data handles via a void* ???
52 // iv) Why are load/save not virtual functions?
53 // v) We should ASSERT() that encoded data is constant for constant codecs. Currently it is just ignored.
54 
55 // Given the codec-initialising data, add the header on that is used to construct the
56 // codec.
57 
58 size_t prepend_codec_selection_header(std::vector<unsigned char>& data,
59  const std::string& codec_name,
60  bool bigEndian=false) {
61 
62  data.insert(data.begin(), 4, 0);
63  data[bigEndian ? 3 : 0] = static_cast<unsigned char>(codec_name.size());
64 
65  data.insert(data.begin() + 4, codec_name.begin(), codec_name.end());
66 
67  return 4 + codec_name.length();
68 }
69 
70 
72 
73 public:
74 
75  EndianCodecSave(bool bigEndianData, Codec& codec) :
76  buffer_(4095) {
77 
78  if (eckit::system::SystemInfo::isBigEndian() == bigEndianData) {
79  DataStream<SameByteOrder> ds(buffer_);
80  codec.save(ds);
81  position_ = ds.position();
82  } else {
83  DataStream<OtherByteOrder> ds(buffer_);
84  codec.save(ds);
85  position_ = ds.position();
86  }
87  }
88 
89  const char* data() const { return buffer_; }
90  eckit::Offset position() const { return position_; }
91 
92  char* get() { return &buffer_[position_]; }
93  void set(char* p) {
94  ASSERT(p >= &buffer_[0]);
95  ASSERT(p - &buffer_[0] < static_cast<long>(buffer_.size()));
96  position_ = static_cast<size_t>(p - &buffer_[0]);
97  }
98 
99 private:
100  eckit::Buffer buffer_;
101  eckit::Offset position_;
102 };
103 
104 // Normal write process:
105 //
106 // i) Initialise codecs and set missing value if appropriate
107 // ii) Append values into a buffor for multiple rows
108 // iii) Call gatherStats on codecs once for each row
109 // iv) When block of rows is gathered, write header
110 // v) Write the data, by calling encode on the data
111 // [vi) Re-initialise fresh codecs].
112 
113 
114 CASE("Constant values consume no space in the output data buffer") {
115 
116  // Data in little endian format.
117  // "min" value is used for constants
118 
119  const char* expected_data[] = {
120 
121  // Codec header
122  "\x00\x00\x00\x00", // no missing value
123  "\xb7\xe6\x87\xb4\x80\x65\xd2\x41", // min (1234567890.1234567)
124  "\xb7\xe6\x87\xb4\x80\x65\xd2\x41", // max (1234567890.1234567)
125  "\x00\x00\x00\x00\x00\x00\x00\x00", // missing value unspecified
126 
127  };
128 
129  // Loop throumgh endiannesses for the source data
130 
131  for (int i = 0; i < 2; i++) {
132 
133  bool bigEndianOutput = (i == 1);
134 
135  std::vector<unsigned char> data;
136 
137  for (size_t j = 0; j < sizeof(expected_data) / sizeof(const char*); j++) {
138  size_t len = (j == 0) ? 4 : 8;
139  data.insert(data.end(), expected_data[j], expected_data[j] + len);
140  if (bigEndianOutput)
141  std::reverse(data.end()-len, data.end());
142  }
143 
144  // Initialise codecs
145 
146  std::unique_ptr<Codec> c;
147  if (bigEndianOutput == eckit::system::SystemInfo::isBigEndian()) {
149  } else {
151  }
152 
153  c->missingValue(0.0);
154 
155  // Statistics in writing order
156 
157  c->gatherStats(1234567890.1234567);
158  c->gatherStats(1234567890.1234567);
159  c->gatherStats(1234567890.1234567);
160  c->gatherStats(1234567890.1234567);
161  c->gatherStats(1234567890.1234567);
162 
163  EXPECT(!c->hasMissing());
164 
165  // Encode the header to the data stream
166 
167  EndianCodecSave ds(bigEndianOutput, *c);
168 
169  EXPECT(ds.position() == eckit::Offset(28));
170  EXPECT(::memcmp(&data[0], ds.data(), data.size()) == 0);
171 
172  // Encode the data to wherever we want it (in reality would be after the header, via a buffer.).
173 
174  // n.b. We don't produce any data when encoding with this codec
175 
176  std::vector<unsigned char> buf(1024, 0);
177  EXPECT(c->encode(&buf[0], 1234567890.1234567) == &buf[0]);
178  EXPECT(c->encode(&buf[0], 1234567890.1234567) == &buf[0]);
179  EXPECT(c->encode(&buf[0], 1234567890.1234567) == &buf[0]);
180  EXPECT(c->encode(&buf[0], 1234567890.1234567) == &buf[0]);
181  EXPECT(c->encode(&buf[0], 1234567890.1234567) == &buf[0]);
182 
183  for (size_t n = 0; n < buf.size(); n++)
184  EXPECT(buf[n] == 0);
185  }
186 }
187 
188 
189 CASE("Constant integer values consume no space in the output data buffer") {
190 
191  // Data in little endian format.
192  // "min" value is used for constants
193 
194  const char* expected_data[] = {
195 
196  // Codec header
197  "\x00\x00\x00\x00", // no missing value
198  "\x00\x00\x80\xb4\x80\x65\xd2\x41", // min (1234567890.)
199  "\x00\x00\x80\xb4\x80\x65\xd2\x41", // max (1234567890.)
200  "\x00\x00\x00\x00\x00\x00\x00\x00", // missing value unspecified
201 
202  };
203 
204  // Loop throumgh endiannesses for the source data
205 
206  for (int i = 0; i < 2; i++) {
207 
208  bool bigEndianOutput = (i == 1);
209 
210  std::vector<unsigned char> data;
211 
212  for (size_t j = 0; j < sizeof(expected_data) / sizeof(const char*); j++) {
213  size_t len = (j == 0) ? 4 : 8;
214  data.insert(data.end(), expected_data[j], expected_data[j] + len);
215  if (bigEndianOutput)
216  std::reverse(data.end()-len, data.end());
217  }
218 
219  // Initialise codecs
220 
221  std::unique_ptr<Codec> c;
222  if (bigEndianOutput == eckit::system::SystemInfo::isBigEndian()) {
224  } else {
226  }
227 
228  c->missingValue(0.0);
229 
230  // Statistics in writing order
231 
232  int64_t x = 1234567890;
233  double& rx(reinterpret_cast<double&>(x));
234  c->gatherStats(rx);
235  c->gatherStats(rx);
236  c->gatherStats(rx);
237  c->gatherStats(rx);
238  c->gatherStats(rx);
239 
240  EXPECT(!c->hasMissing());
241 
242  // Encode the header to the data stream
243 
244  EndianCodecSave ds(bigEndianOutput, *c);
245 
246  EXPECT(ds.position() == eckit::Offset(28));
247  EXPECT(::memcmp(&data[0], ds.data(), data.size()) == 0);
248 
249  // Encode the data to wherever we want it (in reality would be after the header, via a buffer.).
250 
251  // n.b. We don't produce any data when encoding with this codec
252 
253  std::vector<unsigned char> buf(1024, 0);
254  EXPECT(c->encode(&buf[0], rx) == &buf[0]);
255  EXPECT(c->encode(&buf[0], rx) == &buf[0]);
256  EXPECT(c->encode(&buf[0], rx) == &buf[0]);
257  EXPECT(c->encode(&buf[0], rx) == &buf[0]);
258  EXPECT(c->encode(&buf[0], rx) == &buf[0]);
259 
260  for (size_t n = 0; n < buf.size(); n++)
261  EXPECT(buf[n] == 0);
262  }
263 }
264 
265 
266 
267 CASE("constant strings consume no output data space") {
268 
269  // Data in little endian format.
270  // "min" value is used for constants
271 
272  // NOTE that strings are NOT swapped around when things are in the
273  // reverse byte order.
274 
275  const char* expected_data[] = {
276 
277  // Codec header
278  "\x00\x00\x00\x00", // no missing value
279  "hi-there", // minimum supplies string
280  "hi-there", // maximum unspecified
281  "\x00\x00\x00\x00\x00\x00\x00\x00", // missing value unspecified
282  };
283 
284  // Loop through endiannesses for the source data
285 
286  for (int i = 0; i < 2; i++) {
287 
288  bool bigEndianOutput = (i == 1);
289 
290  std::vector<unsigned char> data;
291 
292  for (size_t j = 0; j < sizeof(expected_data) / sizeof(const char*); j++) {
293  size_t len = (j == 0) ? 4 : 8;
294  data.insert(data.end(), expected_data[j], expected_data[j] + len);
295  // n.b. Don't swap string data around with endianness
296  }
297 
298  // Initialise codecs
299 
300  std::unique_ptr<Codec> c;
301  if (bigEndianOutput == eckit::system::SystemInfo::isBigEndian()) {
303  } else {
305  }
306 
307  c->missingValue(0.0);
308 
309  // Statistics in writing order
310 
311  const char* str = "hi-there";
312  c->gatherStats(*reinterpret_cast<const double*>(str));
313  c->gatherStats(*reinterpret_cast<const double*>(str));
314  c->gatherStats(*reinterpret_cast<const double*>(str));
315  c->gatherStats(*reinterpret_cast<const double*>(str));
316 
317  EXPECT(!c->hasMissing());
318 
319  // Encode the header to the data stream
320 
321  EndianCodecSave ds(bigEndianOutput, *c);
322 
323  EXPECT(ds.position() == eckit::Offset(28));
324  EXPECT(::memcmp(&data[0], ds.data(), data.size()) == 0);
325 
326  // Encode the data to wherever we want it (in reality would be after the header, via a buffer.).
327 
328  // n.b. We don't produce any data when encoding with this codec
329 
330  std::vector<unsigned char> buf(1024, 0);
331  EXPECT(c->encode(&buf[0], *reinterpret_cast<const double*>(str)) == &buf[0]);
332  EXPECT(c->encode(&buf[0], *reinterpret_cast<const double*>(str)) == &buf[0]);
333  EXPECT(c->encode(&buf[0], *reinterpret_cast<const double*>(str)) == &buf[0]);
334  EXPECT(c->encode(&buf[0], *reinterpret_cast<const double*>(str)) == &buf[0]);
335 
336  for (size_t n = 0; n < buf.size(); n++)
337  EXPECT(buf[n] == 0);
338  }
339 }
340 
341 
342 CASE("Constant integer or missing value behaves a bit oddly") {
343 
344  EXPECT(odc::MDI::integerMDI() == 2147483647);
345 
346  // Note that there is absolutely NOTHING that enforces that these are integers...
347  // --> This test tests the generic case, with a double, which is odd
348  // --> TODO: Really we ought to enforce integers for an integer case...
349 
350  double customMissingValue = 2222222222;
351  double baseValue = 987654321.9876;
352  const size_t expectedHdrSize = 28;
353 
354  const char* expected_data[] = {
355 
356  // Codec header
357  "\x01\x00\x00\x00", // has missing value
358 // "\x00\x00\x80\x58\x34\x6f\xcd\x41, // min (little-endian: 987654321)
359 // "\x00\x00\x80\x58\x34\x6f\xcd\x41, // max == min
360  "\xad\x69\xfe\x58\x34\x6f\xcd\x41", // minimum value = 987654321.9876
361  "\xad\x69\xfe\xd7\x34\x6f\xcd\x41", // maximum value = 987654321.9876 + 254
362  "\x00\x00\xc0\x71\x8d\x8e\xe0\x41" // missingValue = -2222222222
363  };
364 
365  // Loop through endiannesses for the source data
366 
367  for (int i = 0; i < 2; i++) {
368 
369  bool bigEndianOutput = (i == 1);
370 
371  std::vector<unsigned char> data;
372 
373  for (size_t j = 0; j < sizeof(expected_data) / sizeof(const char*); j++) {
374  size_t len = (j == 0) ? 4 : 8;
375  data.insert(data.end(), expected_data[j], expected_data[j] + len);
376  if (bigEndianOutput)
377  std::reverse(data.end()-len, data.end());
378  }
379 
380  // Insert the sequence of test values
381 
382  data.push_back(0);
383  data.push_back(0xff); // missing
384  for (size_t n = 0; n < 255; n++) {
385  data.push_back(static_cast<unsigned char>(n));
386  }
387  data.push_back(0xff); // missing
388 
389  // Initialise codecs
390 
391  std::unique_ptr<Codec> c;
392  if (bigEndianOutput == eckit::system::SystemInfo::isBigEndian()) {
394  } else {
396  }
397 
398  c->missingValue(customMissingValue);
399  EXPECT(!c->hasMissing());
400 
401  // Statistics in writing order
402 
403  c->gatherStats(baseValue + 0);
404  c->gatherStats(customMissingValue); // missing
405  for (size_t n = 0; n < 255; n++) {
406  c->gatherStats(baseValue + n);
407  }
408  c->gatherStats(customMissingValue); // missing
409 
410  // Detects that we have added a missing value
411  EXPECT(c->hasMissing());
412 
413  // Encode the header to the data stream
414 
415  EndianCodecSave ds(bigEndianOutput, *c);
416 
417  EXPECT(ds.position() == eckit::Offset(expectedHdrSize));
418 
419  // Encode the data to wherever we want it (in reality would be after the header, via a buffer.).
420  // Expect one byte per element.
421 
422  char* posNext;
423 
424  EXPECT((posNext = c->encode(ds.get(), baseValue + 0)) == (ds.get() + 1));
425  ds.set(posNext);
426  EXPECT((posNext = c->encode(ds.get(), customMissingValue)) == (ds.get() + 1));
427  ds.set(posNext);
428 
429  for (size_t n = 0; n < 255; n++) {
430  EXPECT((posNext = c->encode(ds.get(), baseValue + n)) == (ds.get() + 1));
431  ds.set(posNext);
432  }
433 
434  EXPECT((posNext = c->encode(ds.get(), customMissingValue)) == (ds.get() + 1));
435  ds.set(posNext);
436 
437  // Check we have the data we expect
438 
439  size_t nelem = 258;
440  EXPECT(ds.position() == eckit::Offset(expectedHdrSize + nelem));
441 
442  EXPECT(::memcmp(&data[0], ds.data(), expectedHdrSize + nelem) == 0);
443  }
444 }
445 
446 
447 CASE("real constant or missing value is not quite constant") {
448 
449  EXPECT(odc::MDI::realMDI() == -2147483647);
450 
451  // TODO: Really something labelled constant ought to be actually constant...
452  // Do this one big-endian just because.
453 
454  double customMissingValue = -2222222222;
455  double baseValue = 987654321.9876;
456  const size_t expectedHdrSize = 28;
457 
458  const char* expected_data[] = {
459 
460  // Codec header
461  "\x01\x00\x00\x00", // has missing value
462  "\xad\x69\xfe\x58\x34\x6f\xcd\x41", // minimum value = 987654321.9876
463  "\xad\x69\xfe\xd7\x34\x6f\xcd\x41", // maximum value = 987654321.9876 + 254
464  "\x00\x00\xc0\x71\x8d\x8e\xe0\xc1" // missingValue = -2222222222
465  };
466 
467  // Loop through endiannesses for the source data
468 
469  for (int i = 0; i < 2; i++) {
470 
471  bool bigEndianOutput = (i == 1);
472 
473  std::vector<unsigned char> data;
474 
475  for (size_t j = 0; j < sizeof(expected_data) / sizeof(const char*); j++) {
476  size_t len = (j == 0) ? 4 : 8;
477  data.insert(data.end(), expected_data[j], expected_data[j] + len);
478  if (bigEndianOutput)
479  std::reverse(data.end()-len, data.end());
480  }
481 
482  // Insert the sequence of test values
483 
484  data.push_back(0);
485  data.push_back(0xff); // missing
486  for (size_t n = 0; n < 255; n++) {
487  data.push_back(static_cast<unsigned char>(n));
488  }
489  data.push_back(0xff); // missing
490 
491  // Initialise codecs
492 
493  std::unique_ptr<Codec> c;
494  if (bigEndianOutput == eckit::system::SystemInfo::isBigEndian()) {
496  } else {
498  }
499 
500  c->missingValue(customMissingValue);
501  EXPECT(!c->hasMissing());
502 
503  // Statistics in writing order
504 
505  c->gatherStats(baseValue + 0);
506  c->gatherStats(customMissingValue);
507  for (size_t n = 0; n < 255; n++) {
508  c->gatherStats(baseValue + n);
509  }
510  c->gatherStats(customMissingValue);
511 
512  // Detect that we have added a missing value
513  EXPECT(c->hasMissing());
514 
515  // Encode the header to the data stream
516 
517  EndianCodecSave ds(bigEndianOutput, *c);
518 
519  EXPECT(ds.position() == eckit::Offset(expectedHdrSize));
520 
521  // Encode the data to wherever we want it
522  // Expect one byte per element.
523 
524  char* posNext;
525 
526  EXPECT((posNext = c->encode(ds.get(), baseValue + 0)) == (ds.get() + 1));
527  ds.set(posNext);
528  EXPECT((posNext = c->encode(ds.get(), customMissingValue)) == (ds.get() + 1));
529  ds.set(posNext);
530  for (size_t n = 0; n < 255; n++) {
531  EXPECT((posNext = c->encode(ds.get(), baseValue + n)) == (ds.get() + 1));
532  ds.set(posNext);
533  }
534  EXPECT((posNext = c->encode(ds.get(), customMissingValue)) == (ds.get() + 1));
535  ds.set(posNext);
536 
537  // Check we have the data we expect
538 
539  size_t nelem = 258;
540  EXPECT(ds.position() == eckit::Offset(expectedHdrSize + nelem));
541 
542 // eckit::Log::info() << "DATA: " << std::endl;
543 // for (size_t n = 0; n < expectedHdrSize + nelem; n++) {
544 // eckit::Log::info() << std::hex << int(data[n]) << " " << int(ds.data()[n]) << std::endl;
545 // if (int(data[n]) != int(ds.data()[n]))
546 // eckit::Log::info() << "******************************" << std::endl;
547 // }
548 
549  EXPECT(::memcmp(&data[0], ds.data(), expectedHdrSize + nelem) == 0);
550  }
551 }
552 
553 
554 CASE("Character strings are 8-byte sequences coerced into being treated as doubles") {
555 
556  // n.b. there are no missing values for CodecChars
557 
558  const size_t expectedHdrSize = 32;
559 
560  const char* expected_data[] = {
561 
562  // Codec header
563  "\x00\x00\x00\x00", // 0 = hasMissing
564  "\x00\x00\x00\x00\x00\x00\x00\x00", // min = 987654321.9876 (big-endian) // UNUSED
565  "\x00\x00\x00\x00\x00\x00\x00\x00", // max = 987654321.9876 (big-endian) // UNUSED
566  "\x00\x00\x00\x00\x00\x00\x00\x00", // missingValue = -2147483647 // UNUSED
567  "\x00\x00\x00\x00", // Unused 0 value required by chars codec
568 
569  // String data
570  "\0\0\0\0\0\0\0\0",
571  "hi-there",
572  "\0\xff\0\xff\0\xff\0\xff",
573  "a-string",
574  "\xff\xff\xff\xff\xff\xff\xff\xff",
575  };
576 
577  // Loop throumgh endiannesses for the source data
578 
579  for (int i = 0; i < 2; i++) {
580 
581  bool bigEndianOutput = (i == 1);
582 
583  std::vector<unsigned char> data;
584 
585  for (size_t j = 0; j < sizeof(expected_data) / sizeof(const char*); j++) {
586  size_t len = (j == 0 || j == 4) ? 4 : 8;
587  data.insert(data.end(), expected_data[j], expected_data[j] + len);
588 
589  // n.b. Don't reverse the endianness of the string data.
590  if (bigEndianOutput && j < 5)
591  std::reverse(data.end()-len, data.end());
592  }
593 
594  // Initialise codecs
595 
596  std::unique_ptr<Codec> c;
597  if (bigEndianOutput == eckit::system::SystemInfo::isBigEndian()) {
599  } else {
601  }
602 
603  c->missingValue(0.0);
604  EXPECT(!c->hasMissing());
605 
606  // Statistics in writing order
607 
608  c->gatherStats(*reinterpret_cast<const double*>(expected_data[5]));
609  c->gatherStats(*reinterpret_cast<const double*>(expected_data[6]));
610  c->gatherStats(*reinterpret_cast<const double*>(expected_data[7]));
611  c->gatherStats(*reinterpret_cast<const double*>(expected_data[8]));
612  c->gatherStats(*reinterpret_cast<const double*>(expected_data[9]));
613 
614  EXPECT(!c->hasMissing());
615 
616  // Encode the header to the data stream
617 
618  EndianCodecSave ds(bigEndianOutput, *c);
619 
620  EXPECT(ds.position() == eckit::Offset(expectedHdrSize));
621 
622  // Encode the data where we want it (ensuring that the data is written in appropriate
623  // sized blocks.
624 
625  char* posNext;
626  for(size_t n = 5; n < 10; n++) {
627  EXPECT((posNext = c->encode(ds.get(), *reinterpret_cast<const double*>(expected_data[n])))
628  == (ds.get() + 8));
629  ds.set(posNext);
630  }
631 
632  // Check we have the data we expect
633 
634  // n.b. We exclude the min/max/missing section of the header. This is not used for reading
635  // CodecChars (at the moment), and the existing codec does odd things. We don't want
636  // to code this behaviour into a test, as that would be weird.
637 
638  size_t dataSize = (8 * 5);
639 
640  EXPECT(ds.position() == eckit::Offset(expectedHdrSize + dataSize));
641  EXPECT(::memcmp(&data[0], ds.data(), 4) == 0);
642  EXPECT(::memcmp(&data[28], &ds.data()[28], expectedHdrSize + dataSize - 28) == 0);
643  }
644 }
645 
646 
647 CASE("long floating point values can include the missing data value") {
648 
649  const uint64_t inf_bits = 0x7ff0000000000000;
650  const uint64_t neg_inf_bits = 0xfff0000000000000;
651  const uint64_t sig_nan_bits = 0xfffffffffffff77f;
652  const uint64_t quiet_nan_bits = 0xffffffffffffff7f;
653 
654  const size_t expectedHdrSize = 28;
655  double customMissingValue = 2222222222;
656 
657  const char* expected_data[] = {
658 
659  // Codec header
660  "\x01\x00\x00\x00", // has missing value
661  "\x00\x00\x00\x00\x00\x00\xf0\xff", // minimum (-inf)
662  "\x00\x00\x00\x00\x00\x00\xf0\x7f", // maximum (+inf)
663  "\x00\x00\xc0\x71\x8d\x8e\xe0\x41", // missingValue = 2222222222
664 
665  // data to encode
666  "\x00\x00\x00\x00\x00\x00\x00\x00", // 0.0
667  "\x53\xa4\x0c\x54\x34\x6f\x9d\x41", // 123456789.0123456
668  "\x9b\xe6\x57\xb7\x80\x65\x02\xc2", // -9876543210.9876
669  "\x00\x00\x00\x00\x00\x00\xf0\x7f", // +inf
670  "\x00\x00\x00\x00\x00\x00\xf0\xff", // -inf
671  "\x7f\xf7\xff\xff\xff\xff\xff\xff", // NaN (signalling)
672  "\x7f\xff\xff\xff\xff\xff\xff\xff", // NaN (quiet)
673  "\x00\x00\xc0\xff\xff\xff\xdf\xc1", // -2147483647 (otherwise the missing value)
674  "\x00\x00\xc0\x71\x8d\x8e\xe0\x41" // missingValue = 2222222222
675  };
676 
677  // Loop through endiannesses for the source data
678 
679  for (int i = 0; i < 2; i++) {
680 
681  bool bigEndianOutput = (i == 1);
682 
683  std::vector<unsigned char> data;
684 
685  for (size_t j = 0; j < sizeof(expected_data) / sizeof(const char*); j++) {
686  size_t len = (j == 0) ? 4 : 8;
687  data.insert(data.end(), expected_data[j], expected_data[j] + len);
688  if (bigEndianOutput)
689  std::reverse(data.end()-len, data.end());
690  }
691 
692  // Initialise codecs
693 
694  std::unique_ptr<Codec> c;
695  if (bigEndianOutput == eckit::system::SystemInfo::isBigEndian()) {
697  } else {
699  }
700 
701  c->missingValue(customMissingValue);
702  EXPECT(!c->hasMissing());
703 
704  // Statistics in writing order
705 
706  c->gatherStats(0.0);
707  c->gatherStats(123456789.0123456);
708  c->gatherStats(-9876543210.9876);
709  double v = *reinterpret_cast<const double*>(&inf_bits);
710  EXPECT(std::isinf(v));
711  EXPECT(v > 0);
712  c->gatherStats(v);
713  v = *reinterpret_cast<const double*>(&neg_inf_bits);
714  EXPECT(std::isinf(v));
715  EXPECT(v < 0);
716  c->gatherStats(v);
717  v = *reinterpret_cast<const double*>(&sig_nan_bits);
718  EXPECT(std::isnan(v));
719  c->gatherStats(v);
720  v = *reinterpret_cast<const double*>(&quiet_nan_bits);
721  EXPECT(std::isnan(v));
722  c->gatherStats(v);
723  c->gatherStats(customMissingValue);
724  c->gatherStats(-2147483647);
725 
726  // Detect that we have added a missing value
727  EXPECT(c->hasMissing());
728 
729  // Encode the header to the data stream
730 
731  EndianCodecSave ds(bigEndianOutput, *c);
732 
733  EXPECT(ds.position() == eckit::Offset(expectedHdrSize));
734 
735  // Encode the data to wherever we want it
736  // Expect 8 bytes per element
737 
738  char* posNext;
739 
740  EXPECT((posNext = c->encode(ds.get(), 0.0)) == (ds.get() + 8));
741  ds.set(posNext);
742  EXPECT((posNext = c->encode(ds.get(), 123456789.0123456)) == (ds.get() + 8));
743  ds.set(posNext);
744  EXPECT((posNext = c->encode(ds.get(), -9876543210.9876)) == (ds.get() + 8));
745  ds.set(posNext);
746  EXPECT((posNext = c->encode(ds.get(), *reinterpret_cast<const double*>(&inf_bits))) == (ds.get() + 8));
747  ds.set(posNext);
748  EXPECT((posNext = c->encode(ds.get(), *reinterpret_cast<const double*>(&neg_inf_bits))) == (ds.get() + 8));
749  ds.set(posNext);
750  EXPECT((posNext = c->encode(ds.get(), *reinterpret_cast<const double*>(&sig_nan_bits))) == (ds.get() + 8));
751  ds.set(posNext);
752  EXPECT((posNext = c->encode(ds.get(), *reinterpret_cast<const double*>(&quiet_nan_bits))) == (ds.get() + 8));
753  ds.set(posNext);
754  EXPECT((posNext = c->encode(ds.get(), -2147483647)) == (ds.get() + 8));
755  ds.set(posNext);
756  EXPECT((posNext = c->encode(ds.get(), customMissingValue)) == (ds.get() + 8));
757  ds.set(posNext);
758 
759  // Check we have the data we expect
760 
761  size_t data_size = (9 * 8);
762  EXPECT(ds.position() == eckit::Offset(expectedHdrSize + data_size));
763 
764 // eckit::Log::info() << "DATA: " << std::endl;
765 // for (size_t n = 0; n < expectedHdrSize + data_size; n++) {
766 // eckit::Log::info() << std::hex << int(data[n]) << " " << int(ds.data()[n]) << std::endl;
767 // if (int(data[n]) != int(ds.data()[n]))
768 // eckit::Log::info() << "******************************" << std::endl;
769 // }
770 
771  EXPECT(::memcmp(&data[0], ds.data(), expectedHdrSize + data_size) == 0);
772  }
773 }
774 
775 
776 CASE("short floating point values can include the missing data value") {
777 
778  // Use a curious, custom missingValue to show it is being used.
779  // n.b. This cannot be represented with a float!
780 
781  const uint64_t inf_bits = 0x7ff0000000000000;
782  const uint64_t neg_inf_bits = 0xfff0000000000000;
783  const uint32_t sig_nan_bits = 0xffffbf7f;
784  const uint32_t quiet_nan_bits = 0xffffff7f;
785 
786  const size_t expectedHdrSize = 28;
787  double customMissingValue = -22222222222;
788 
789  const char* expected_data[] = {
790 
791  // Codec header
792  "\x01\x00\x00\x00", // has missing value
793  "\x00\x00\x00\x00\x00\x00\xf0\xff", // minimum (-inf)
794  "\x00\x00\x00\x00\x00\x00\xf0\x7f", // maximum (+inf)
795  "\x00\x00\x38\xce\x30\xb2\x14\xc2", // missingValue = -22222222222
796 
797  // data to encode
798  "\x00\x00\x00\x00", // 0.0
799  "\x12\xbf\x1f\x49", // 654321.123
800  "\x00\x00\x80\x00", // Smallest available, internal missing value for short_real (1.17549435082229e-38)
801  "\xff\xff\x7f\xff", // Lowest available, internal missing value for short_real2 (-3.40282346638529e+38)
802  "\x00\x00\x80\x7f", // +inf
803  "\x00\x00\x80\xff", // -inf
804  "\x7f\xbf\xff\xff", // NaN (signalling)
805  "\x7f\xff\xff\xff", // NaN (quiet)
806  };
807 
808  // Loop through endiannesses for the source data
809 
810  for (int i = 0; i < 4; i++) {
811 
812  bool bigEndianOutput = (i % 2 == 0);
813  bool secondCodec = (i > 1);
814 
815  std::vector<unsigned char> data;
816 
817  for (size_t j = 0; j < sizeof(expected_data) / sizeof(const char*); j++) {
818  size_t len = (j == 0 || j > 3) ? 4 : 8;
819  data.insert(data.end(), expected_data[j], expected_data[j] + len);
820  if (bigEndianOutput)
821  std::reverse(data.end()-len, data.end());
822  }
823 
824  // Add the missing value!!!
825 
826  uint32_t mv = secondCodec ? 0xff7fffff : 0x00800000;
827  data.insert(data.end(), reinterpret_cast<const unsigned char*>(&mv), reinterpret_cast<const unsigned char*>(&mv)+4);
828  if (bigEndianOutput)
829  std::reverse(data.end()-4, data.end());
830 
831  // Initialise codecs
832 
833  std::unique_ptr<Codec> c;
834  if (bigEndianOutput == eckit::system::SystemInfo::isBigEndian()) {
835  if (secondCodec) {
837  } else {
839  }
840  } else {
841  if (secondCodec) {
843  } else {
845  }
846  }
847 
848  c->missingValue(customMissingValue);
849  EXPECT(!c->hasMissing());
850 
851  // Statistics in writing order
852 
853  c->gatherStats(0.0);
854  c->gatherStats(654321.123);
855  c->gatherStats(1.17549435082229e-38);
856  c->gatherStats(-3.40282346638529e+38);
857  double v = *reinterpret_cast<const double*>(&inf_bits);
858  EXPECT(std::isinf(v));
859  EXPECT(v > 0);
860  c->gatherStats(v);
861  v = *reinterpret_cast<const double*>(&neg_inf_bits);
862  EXPECT(std::isinf(v));
863  EXPECT(v < 0);
864  c->gatherStats(v);
865  v = static_cast<double>(*reinterpret_cast<const float*>(&sig_nan_bits));
866  EXPECT(std::isnan(v));
867  c->gatherStats(v);
868  v = static_cast<double>(*reinterpret_cast<const float*>(&quiet_nan_bits));
869  EXPECT(std::isnan(v));
870  c->gatherStats(v);
871  c->gatherStats(customMissingValue);
872 
873  // Detect that we have added a missing value
874  EXPECT(c->hasMissing());
875 
876  // Encode the header to the data stream
877 
878  EndianCodecSave ds(bigEndianOutput, *c);
879 
880  EXPECT(ds.position() == eckit::Offset(expectedHdrSize));
881 
882  // Encode the data to wherever we want it
883  // Expect 4 bytes per element
884 
885  // n.b. If we directly supply the float that is equivalent to the internal missing value, it
886  // is just treated as missing on read, without flagging hasMissing(). We do this here
887  // just to demonstrate. See ODB-367
888 
889  char* posNext;
890 
891  EXPECT((posNext = c->encode(ds.get(), 0.0)) == (ds.get() + 4));
892  ds.set(posNext);
893  EXPECT((posNext = c->encode(ds.get(), 654321.123)) == (ds.get() + 4));
894  ds.set(posNext);
895 
896  size_t offsetMissing = 0;
897  if (secondCodec) {
898  EXPECT((posNext = c->encode(ds.get(), 1.17549435082229e-38)) == (ds.get() + 4));
899  ds.set(posNext);
900  EXPECT_THROWS_AS(c->encode(ds.get(), -3.40282346638529e+38), eckit::AssertionFailed);
901  offsetMissing = ds.position();
902  ds.set(ds.get() + 4);
903  } else {
904  EXPECT_THROWS_AS(c->encode(ds.get(), 1.17549435082229e-38), eckit::AssertionFailed);
905  offsetMissing = ds.position();
906  ds.set(ds.get() + 4);
907  EXPECT((posNext = c->encode(ds.get(), -3.40282346638529e+38)) == (ds.get() + 4));
908  ds.set(posNext);
909  }
910 
911  EXPECT((posNext = c->encode(ds.get(), *reinterpret_cast<const double*>(&inf_bits))) == (ds.get() + 4));
912  ds.set(posNext);
913  EXPECT((posNext = c->encode(ds.get(), *reinterpret_cast<const double*>(&neg_inf_bits))) == (ds.get() + 4));
914  ds.set(posNext);
915  EXPECT((posNext = c->encode(ds.get(), static_cast<double>(*reinterpret_cast<const float*>(&sig_nan_bits)))) == (ds.get() + 4));
916  ds.set(posNext);
917  EXPECT((posNext = c->encode(ds.get(), static_cast<double>(*reinterpret_cast<const float*>(&quiet_nan_bits)))) == (ds.get() + 4));
918  ds.set(posNext);
919  EXPECT((posNext = c->encode(ds.get(), customMissingValue)) == (ds.get() + 4));
920  ds.set(posNext);
921 
922  // Check we have the data we expect
923 
924  size_t data_size = (9 * 4);
925  EXPECT(ds.position() == eckit::Offset(expectedHdrSize + data_size));
926 
927 // eckit::Log::info() << "DATA: " << std::endl;
928 // for (size_t n = 0; n < expectedHdrSize + data_size; n++) {
929 // eckit::Log::info() << std::hex << int(data[n]) << " " << int(ds.data()[n]) << std::endl;
930 // if (int(data[n]) != int(ds.data()[n]))
931 // eckit::Log::info() << "******************************" << std::endl;
932 // }
933 
934  // The missing values won't be encoded when they are hit, so skip them in the data test
935 
936  EXPECT(offsetMissing != 0);
937  EXPECT(::memcmp(&data[0], ds.data(), offsetMissing) == 0);
938  EXPECT(::memcmp(&data[0] + offsetMissing + 4,
939  ds.data() + offsetMissing + 4,
940  expectedHdrSize + data_size - offsetMissing - 4) == 0);
941  }
942 }
943 
944 
945 CASE("32bit integers are as-is") {
946 
947  const size_t expectedHdrSize = 28;
948 
949  const char* expected_data[] = {
950 
951  // Codec header
952  "\x01\x00\x00\x00", // has missing value
953  "\x00\x00\x00\x00\x00\x00\xe0\xc1", // minimum = -2147483648
954  "\x00\x00\xc0\xff\xff\xff\xdf\x41", // maximum = 2147483647
955  "\x00\x00\x00\x1c\xaf\x7d\xaa\x41", // missing value = 222222222
956 
957  // data to encode
958  "\x00\x00\x00\x00", // 0.0
959  "\xff\xff\xff\xff", // -1
960  "\xff\xff\xff\x7f", // 2147483647 == largest
961  "\x00\x00\x00\x80", // -2147483648 == smallest
962  "\x8e\xd7\x3e\x0d", // 222222222 == missingValue (unused by codec)
963  "\x96\x28\x9c\xff" // -6543210
964  };
965 
966  // Loop through endiannesses for the source data
967 
968  for (int i = 0; i < 2; i++) {
969 
970  bool bigEndianOutput = (i == 1);
971 
972  std::vector<unsigned char> data;
973 
974  for (size_t j = 0; j < sizeof(expected_data) / sizeof(const char*); j++) {
975  size_t len = (j == 0 || j > 3) ? 4 : 8;
976  data.insert(data.end(), expected_data[j], expected_data[j] + len);
977  if (bigEndianOutput)
978  std::reverse(data.end()-len, data.end());
979  }
980 
981  // Initialise codecs
982 
983  std::unique_ptr<Codec> c;
984  if (bigEndianOutput == eckit::system::SystemInfo::isBigEndian()) {
986  } else {
988  }
989 
990  c->missingValue(222222222);
991  EXPECT(!c->hasMissing());
992 
993  // Statistics in writing order
994 
995  c->gatherStats(0);
996  c->gatherStats(-1);
997  c->gatherStats(2147483647);
998  c->gatherStats(-2147483648);
999  EXPECT(!c->hasMissing());
1000  c->gatherStats(222222222);
1001  EXPECT(c->hasMissing());
1002  c->gatherStats(-6543210);
1003 
1004  // Encode the header to the data stream
1005 
1006  EndianCodecSave ds(bigEndianOutput, *c);
1007 
1008  EXPECT(ds.position() == eckit::Offset(expectedHdrSize));
1009 
1010  // Encode the data to wherever we want it
1011  // Expect 4 bytes per element
1012 
1013  char* posNext;
1014 
1015  EXPECT((posNext = c->encode(ds.get(), 0)) == (ds.get() + 4));
1016  ds.set(posNext);
1017  EXPECT((posNext = c->encode(ds.get(), -1)) == (ds.get() + 4));
1018  ds.set(posNext);
1019  EXPECT((posNext = c->encode(ds.get(), 2147483647)) == (ds.get() + 4));
1020  ds.set(posNext);
1021  EXPECT((posNext = c->encode(ds.get(), -2147483648)) == (ds.get() + 4));
1022  ds.set(posNext);
1023  EXPECT((posNext = c->encode(ds.get(), 222222222)) == (ds.get() + 4));
1024  ds.set(posNext);
1025  EXPECT((posNext = c->encode(ds.get(), -6543210)) == (ds.get() + 4));
1026  ds.set(posNext);
1027 
1028  // Check we have the data we expect
1029 
1030  size_t data_size = (6 * 4);
1031  EXPECT(ds.position() == eckit::Offset(expectedHdrSize + data_size));
1032 
1033 // eckit::Log::info() << "DATA: " << std::endl;
1034 // for (size_t n = 0; n < expectedHdrSize + data_size; n++) {
1035 // eckit::Log::info() << std::hex << int(data[n]) << " " << int(ds.data()[n]) << std::endl;
1036 // if (int(data[n]) != int(ds.data()[n]))
1037 // eckit::Log::info() << "******************************" << std::endl;
1038 // }
1039 
1040  EXPECT(::memcmp(&data[0], ds.data(), expectedHdrSize + data_size) == 0);
1041  }
1042 }
1043 
1044 
1045 CASE("16bit integers are stored with an offset. This need not (strictly) be integral!!") {
1046 
1047  // n.b. we use a non-standard, non-integral minimum to demonstrate the offset behaviour.
1048 
1049  // Use a curious, custom missingValue to show it is being used.
1050 
1051  const double customMissingValue = 6.54565456545599971850917315786e-123;
1052  const double baseVal = -123.45;
1053  const size_t expectedHdrSize = 28;
1054 
1055  const char* expected_data[] = {
1056 
1057  // Codec header
1058  "\x01\x00\x00\x00", // has missing value
1059  "\xcd\xcc\xcc\xcc\xcc\xdc\x5e\xc0", // minimum = -123.45
1060  "\x9a\x99\x99\x99\x71\xf0\xef\x40", // maximum = -123.45 + 65535
1061  "\x04\x4f\xab\xa0\xe4\x4e\x91\x26", // missing value = 6.54565456545599971850917315786e-123
1062 
1063  // data to encode
1064  "\x00\x00", // 0.0
1065  "\xff\xff", // 65535 and the missing value
1066  "\xff\x7f", // 32767 (no negatives)
1067  "\x00\x80", // 32768 (no negatives)
1068  "\x39\x30" // 12345
1069  };
1070 
1071  // Loop through endiannesses for the source data
1072 
1073  for (int i = 0; i < 4; i++) {
1074 
1075  bool bigEndianOutput = (i % 2 == 0);
1076 
1077  bool withMissing = (i > 1);
1078 
1079 // eckit::Log::info() << "---------------------------------------------------------" << std::endl;
1080 // eckit::Log::info() << "ITERATION: " << i << std::endl;
1081 // eckit::Log::info() << "big endian: " << (bigEndianOutput ? "T" : "F") << std::endl;
1082 // eckit::Log::info() << "with missing: " << (withMissing ? "T" : "F") << std::endl;
1083 // eckit::Log::info() << "---------------------------------------------------------" << std::endl;
1084 
1085  std::vector<unsigned char> data;
1086 
1087  for (size_t j = 0; j < sizeof(expected_data) / sizeof(const char*); j++) {
1088  size_t len = (j == 0) ? 4 : (j > 3) ? 2 : 8;
1089  data.insert(data.end(), expected_data[j], expected_data[j] + len);
1090  if (bigEndianOutput)
1091  std::reverse(data.end()-len, data.end());
1092  }
1093 
1094  // The missing value is odd. It should be handled properly for the Missing codec, but things are just mangled
1095  // if we do the direct codec.
1096  // See ODB-370 and ODB-371
1097  uint16_t mv = withMissing ? 0xffff : 0x007b;
1098  data.insert(data.end(), reinterpret_cast<const unsigned char*>(&mv), reinterpret_cast<const unsigned char*>(&mv) + 2);
1099  if (bigEndianOutput)
1100  std::reverse(data.end()-2, data.end());
1101 
1102  // Initialise codecs
1103 
1104  std::unique_ptr<Codec> c;
1105  if (bigEndianOutput == eckit::system::SystemInfo::isBigEndian()) {
1106  if (withMissing) {
1108  } else {
1110  }
1111  } else {
1112  if (withMissing) {
1114  } else {
1116  }
1117  }
1118 
1119  c->missingValue(customMissingValue);
1120  EXPECT(!c->hasMissing());
1121 
1122  // Statistics in writing order
1123 
1124  c->gatherStats(baseVal + 0.0);
1125  c->gatherStats(baseVal + 65535); // n.b. This triggers a missingValue in Int16Missing. See ODB-369
1126  c->gatherStats(baseVal + 32767);
1127  c->gatherStats(baseVal + 32768);
1128  c->gatherStats(baseVal + 12345);
1129  EXPECT(!c->hasMissing());
1130  c->gatherStats(customMissingValue);
1131  EXPECT(c->hasMissing()); // See ODB-371
1132 
1133  // Encode the header to the data stream
1134 
1135  EndianCodecSave ds(bigEndianOutput, *c);
1136 
1137  EXPECT(ds.position() == eckit::Offset(expectedHdrSize));
1138 
1139  // Encode the data to wherever we want it
1140  // Expect 2 bytes per element
1141 
1142  // n.b. If we directly supply the value that is equivalent to the internal missing value, it
1143  // is just treated as missing on read, without flagging hasMissing(). We do this here
1144  // just to demonstrate. See ODB-369
1145 
1146  char* posNext;
1147 
1148  EXPECT((posNext = c->encode(ds.get(), baseVal + 0.0)) == (ds.get() + 2));
1149  ds.set(posNext);
1150  if (withMissing) {
1151  EXPECT_THROWS_AS(c->encode(ds.get(), baseVal + 65535), eckit::AssertionFailed);
1152  EXPECT((posNext = c->encode(ds.get(), customMissingValue)) == (ds.get() + 2)); // Ensure data is the same
1153  } else {
1154  EXPECT((posNext = c->encode(ds.get(), baseVal + 65535)) == (ds.get() + 2));
1155  }
1156  ds.set(posNext);
1157  EXPECT((posNext = c->encode(ds.get(), baseVal + 32767)) == (ds.get() + 2));
1158  ds.set(posNext);
1159  EXPECT((posNext = c->encode(ds.get(), baseVal + 32768)) == (ds.get() + 2));
1160  ds.set(posNext);
1161  EXPECT((posNext = c->encode(ds.get(), baseVal + 12345)) == (ds.get() + 2));
1162  ds.set(posNext);
1163  EXPECT((posNext = c->encode(ds.get(), customMissingValue)) == (ds.get() + 2));
1164  ds.set(posNext);
1165 
1166  // Check we have the data we expect
1167 
1168  size_t data_size = (6 * 2);
1169  EXPECT(ds.position() == eckit::Offset(expectedHdrSize + data_size));
1170 
1171 // eckit::Log::info() << "DATA: " << std::endl;
1172 // for (size_t n = 0; n < expectedHdrSize + data_size; n++) {
1173 // eckit::Log::info() << std::hex << int(data[n]) << " " << int(ds.data()[n]) << std::endl;
1174 // if (int(data[n]) != int(ds.data()[n]))
1175 // eckit::Log::info() << "******************************" << std::endl;
1176 // }
1177 
1178  EXPECT(::memcmp(&data[0], ds.data(), expectedHdrSize + data_size) == 0);
1179  }
1180 }
1181 
1182 
1183 CASE("8bit integers are stored with an offset. This need not (strictly) be integral!!") {
1184 
1185  // n.b. we use a non-standard, non-integral minimum to demonstrate the offset behaviour.
1186 
1187  // Use a curious, custom missingValue to show it is being used.
1188 
1189  const double customMissingValue = 6.54565456545599971850917315786e-123;
1190  const double baseVal = -5000.5;
1191  const size_t expectedHdrSize = 28;
1192 
1193  const char* expected_data[] = {
1194 
1195  // Codec header
1196  "\x01\x00\x00\x00", // has missing value
1197  "\x00\x00\x00\x00\x80\x88\xb3\xc0", // minimum = -5000.5
1198  "\x00\x00\x00\x00\x80\x89\xb2\xc0", // maximum = -5000.5 + 255
1199  "\x04\x4f\xab\xa0\xe4\x4e\x91\x26", // missing value = 6.54565456545599971850917315786e-123
1200  };
1201 
1202  // Loop through endiannesses for the source data
1203 
1204  for (int i = 0; i < 4; i++) {
1205 
1206  bool bigEndianOutput = (i % 2 == 0);
1207 
1208  bool withMissing = (i > 1);
1209 
1210 // eckit::Log::info() << "---------------------------------------------------------" << std::endl;
1211 // eckit::Log::info() << "ITERATION: " << i << std::endl;
1212 // eckit::Log::info() << "big endian: " << (bigEndianOutput ? "T" : "F") << std::endl;
1213 // eckit::Log::info() << "with missing: " << (withMissing ? "T" : "F") << std::endl;
1214 // eckit::Log::info() << "---------------------------------------------------------" << std::endl;
1215 
1216  std::vector<unsigned char> data;
1217 
1218  for (size_t j = 0; j < sizeof(expected_data) / sizeof(const char*); j++) {
1219  size_t len = (j == 0) ? 4 : 8;
1220  data.insert(data.end(), expected_data[j], expected_data[j] + len);
1221  if (bigEndianOutput)
1222  std::reverse(data.end()-len, data.end());
1223  }
1224 
1225  // Add all of the data values
1226 
1227  for (int n = 0; n < 256; n++) {
1228  data.push_back(static_cast<unsigned char>(n));
1229  }
1230 
1231  // n.b. we can end up with garbage for the missing value... in this case it will be 0x88...
1232  // See ODB-371 ODB-370
1233 
1234  data.push_back(withMissing ? 0xff : 0x88);
1235 
1236  // Initialise the codecs
1237 
1238  std::unique_ptr<Codec> c;
1239  if (bigEndianOutput == eckit::system::SystemInfo::isBigEndian()) {
1240  if (withMissing) {
1242  } else {
1244  }
1245  } else {
1246  if (withMissing) {
1248  } else {
1250  }
1251  }
1252 
1253  c->missingValue(customMissingValue);
1254  EXPECT(!c->hasMissing());
1255 
1256  // Statistics in writing order
1257 
1258  // n.b. n == 255 will be silently promoted to missing if using Int8Missing. See ODB-369
1259  for (size_t n = 0; n < 256; n++) {
1260  c->gatherStats(baseVal + n);
1261  }
1262  EXPECT(!c->hasMissing());
1263  c->gatherStats(customMissingValue);
1264  EXPECT(c->hasMissing()); // See ODB-71
1265 
1266  // Encode the header to the data stream
1267 
1268  EndianCodecSave ds(bigEndianOutput, *c);
1269 
1270  EXPECT(ds.position() == eckit::Offset(expectedHdrSize));
1271 
1272  // Encode the data to wherever we want it
1273  // Expect 1 bytes per element
1274 
1275  // n.b. If we directly supply the value that is equivalent to the internal missing value, it
1276  // is just treated as missing on read, without flagging hasMissing(). We do this here
1277  // just to demonstrate. See ODB-369
1278 
1279  char* posNext;
1280 
1281  for (size_t n = 0; n < 255; n++) {
1282  EXPECT((posNext = c->encode(ds.get(), baseVal + n)) == (ds.get() + 1));
1283  ds.set(posNext);
1284  }
1285 
1286  if (withMissing) {
1287  EXPECT_THROWS_AS(c->encode(ds.get(), baseVal + 255), eckit::AssertionFailed);
1288  EXPECT((posNext = c->encode(ds.get(), customMissingValue)) == (ds.get() + 1)); // Ensure data is the same
1289  } else {
1290  EXPECT((posNext = c->encode(ds.get(), baseVal + 255)) == (ds.get() + 1));
1291  }
1292  ds.set(posNext);
1293 
1294  EXPECT((posNext = c->encode(ds.get(), customMissingValue)) == (ds.get() + 1));
1295  ds.set(posNext);
1296 
1297  // Check we have the data we expect
1298 
1299  size_t data_size = (257 * 1);
1300  EXPECT(ds.position() == eckit::Offset(expectedHdrSize + data_size));
1301 
1302 // eckit::Log::info() << "DATA: " << std::endl;
1303 // for (size_t n = 0; n < expectedHdrSize + data_size; n++) {
1304 // eckit::Log::info() << std::hex << int(data[n]) << " " << int(ds.data()[n]) << std::endl;
1305 // if (int(data[n]) != int(ds.data()[n]))
1306 // eckit::Log::info() << "******************************" << std::endl;
1307 // }
1308 
1309  EXPECT(::memcmp(&data[0], ds.data(), expectedHdrSize + data_size) == 0);
1310  }
1311 }
1312 
1313 
1314 CASE("Character strings can be stored in a flat list, and indexed") {
1315 
1316  // n.b. no missing values
1317 
1318  const size_t expectedHdrSize = 144;
1319 
1320  const char* expected_data[] = {
1321 
1322  // Codec header
1323  "\x00\x00\x00\x00", // 0 = hasMissing
1324  "\x6f\x70\x71\x72\x73\x74\x75\x76", // min unspecified == "opqrstuv"
1325  "\x00\x00\x00\x00\x00\x00\x00\x00", // max unspecified
1326  "\x00\x00\x00\x00\x00\x00\x00\x00", // missingValue unspecified
1327 
1328  // How many strings are there in the table?
1329  "\x06\x00\x00\x00",
1330 
1331  // String data (prepended with lengths)
1332  // length, data, "cnt (discarded)", index
1333 
1334  // The order of these is a matter of implementation detail of the internal "hash table"
1335  // This is what happens to happen in current ODB-API
1336 
1337  "\x02\x00\x00\x00", "ab", "\x00\x00\x00\x00", "\x00\x00\x00\x00", // This string is too short
1338  "\x06\x00\x00\x00", "ghijkl", "\x00\x00\x00\x00", "\x01\x00\x00\x00",
1339  "\x08\x00\x00\x00", "mnopqrst", "\x00\x00\x00\x00", "\x02\x00\x00\x00", // 8-byte length
1340  "\x08\x00\x00\x00", "uvwxyzab", "\x00\x00\x00\x00", "\x03\x00\x00\x00", // n.b. truncated.
1341  "\x08\x00\x00\x00", "ghijklmn", "\x00\x00\x00\x00", "\x04\x00\x00\x00",
1342  "\x08\x00\x00\x00", "opqrstuv", "\x00\x00\x00\x00", "\x05\x00\x00\x00",
1343  };
1344 
1345  // Loop through endiannesses for the source data
1346 
1347  for (int i = 0; i < 4; i++) {
1348 
1349  bool bigEndianOutput = (i % 2 == 0);
1350 
1351  bool bits16 = (i > 1);
1352 
1353 // eckit::Log::info() << "---------------------------------------------------------" << std::endl;
1354 // eckit::Log::info() << "ITERATION: " << i << std::endl;
1355 // eckit::Log::info() << "big endian: " << (bigEndianOutput ? "T" : "F") << std::endl;
1356 // eckit::Log::info() << "16-bit: " << (bits16 ? "T" : "F") << std::endl;
1357 // eckit::Log::info() << "---------------------------------------------------------" << std::endl;
1358 
1359  std::vector<unsigned char> data;
1360 
1361  for (size_t j = 0; j < sizeof(expected_data) / sizeof(const char*); j++) {
1362  size_t len =
1363  (j < 5) ? ((j == 0 || j == 4) ? 4 : 8)
1364  : ((j+2) % 4 == 0 ? ::strlen(expected_data[j]) : 4);
1365  data.insert(data.end(), expected_data[j], expected_data[j] + len);
1366 
1367  // n.b. Don't reverse the endianness of the string data.
1368  if (bigEndianOutput && !((j > 5) && ((j+2) % 4 == 0)))
1369  std::reverse(data.end()-len, data.end());
1370  }
1371 
1372  // Which strings do we wish to encode (look at them in reverse. nb refers to index column)
1373 
1374  for (int n = 0; n < 6; n++) {
1375  if (bits16 && bigEndianOutput)
1376  data.push_back(0);
1377  data.push_back(static_cast<unsigned char>(n));
1378  if (bits16 && !bigEndianOutput)
1379  data.push_back(0);
1380  }
1381 
1382  // Initialise codecs
1383 
1384  std::unique_ptr<Codec> c;
1385  if (bigEndianOutput == eckit::system::SystemInfo::isBigEndian()) {
1386  if (bits16) {
1388  } else {
1390  }
1391  } else {
1392  if (bits16) {
1394  } else {
1396  }
1397  }
1398 
1399  c->missingValue(0.0);
1400  EXPECT(!c->hasMissing());
1401 
1402  // Statistics in writing order
1403 
1404  const char* s1 = "ab"; // check that we can handle short strings!
1405  const char* s2 = "ghijkl";
1406  const char* s3 = "mnopqrst";
1407  const char* s4 = "uvwxyzabcdef"; // n.b. will be trucated to 8-bytes
1408  const char* s5 = "ghijklmn";
1409  const char* s6 = "opqrstuv";
1410 
1411  // n.b. these casts are a bit dubious in terms of memory access. May go beyond ends of s1, s2
1412 
1413  c->gatherStats(*reinterpret_cast<const double*>(s1));
1414  c->gatherStats(*reinterpret_cast<const double*>(s2));
1415  c->gatherStats(*reinterpret_cast<const double*>(s3));
1416  c->gatherStats(*reinterpret_cast<const double*>(s4));
1417  c->gatherStats(*reinterpret_cast<const double*>(s5));
1418  c->gatherStats(*reinterpret_cast<const double*>(s6));
1419  EXPECT(!c->hasMissing());
1420 
1421  // Encode the header to the data stream
1422 
1423  EndianCodecSave ds(bigEndianOutput, *c);
1424 
1425  EXPECT(ds.position() == eckit::Offset(expectedHdrSize));
1426 
1427  // Encode the data to wherever we want it
1428  // Expect 1 or 2 bytes per element
1429 
1430  char* posNext;
1431 
1432  EXPECT((posNext = c->encode(ds.get(), *reinterpret_cast<const double*>(s1))) == (ds.get() + (bits16 ? 2 : 1)));
1433  ds.set(posNext);
1434  EXPECT((posNext = c->encode(ds.get(), *reinterpret_cast<const double*>(s2))) == (ds.get() + (bits16 ? 2 : 1)));
1435  ds.set(posNext);
1436  EXPECT((posNext = c->encode(ds.get(), *reinterpret_cast<const double*>(s3))) == (ds.get() + (bits16 ? 2 : 1)));
1437  ds.set(posNext);
1438  EXPECT((posNext = c->encode(ds.get(), *reinterpret_cast<const double*>(s4))) == (ds.get() + (bits16 ? 2 : 1)));
1439  ds.set(posNext);
1440  EXPECT((posNext = c->encode(ds.get(), *reinterpret_cast<const double*>(s5))) == (ds.get() + (bits16 ? 2 : 1)));
1441  ds.set(posNext);
1442  EXPECT((posNext = c->encode(ds.get(), *reinterpret_cast<const double*>(s6))) == (ds.get() + (bits16 ? 2 : 1)));
1443  ds.set(posNext);
1444 
1445  // Check we have the data we expect
1446 
1447  size_t data_size = (6 * (bits16 ? 2 : 1));
1448  EXPECT(ds.position() == eckit::Offset(expectedHdrSize + data_size));
1449 
1450 // eckit::Log::info() << "DATA: " << std::endl;
1451 // for (size_t n = 0; n < expectedHdrSize + data_size; n++) {
1452 // eckit::Log::info() << std::hex << int(data[n]) << " " << int(ds.data()[n]) << std::endl;
1453 // if (int(data[n]) != int(ds.data()[n]))
1454 // eckit::Log::info() << "******************************" << std::endl;
1455 // }
1456 
1457  EXPECT(::memcmp(&data[0], ds.data(), expectedHdrSize + data_size) == 0);
1458  }
1459 }
1460 
1461 
1462 CASE("Character strings can be stored in a flat list, and indexed, and longer than 8 bytes") {
1463 
1464  // n.b. no missing values
1465 
1466  const size_t expectedHdrSize = 156;
1467 
1468  const char* expected_data[] = {
1469 
1470  // Codec header
1471  "\x00\x00\x00\x00", // 0 = hasMissing
1472  "\x6f\x70\x71\x72\x73\x74\x75\x76", // min unspecified == "opqrstuv"
1473  "\x00\x00\x00\x00\x00\x00\x00\x00", // max unspecified
1474  "\x00\x00\x00\x00\x00\x00\x00\x00", // missingValue unspecified
1475 
1476  // How many strings are there in the table?
1477  "\x06\x00\x00\x00",
1478 
1479  // String data (prepended with lengths)
1480  // length, data, "cnt (discarded)", index
1481 
1482  // The order of these is a matter of implementation detail of the internal "hash table"
1483  // This is what happens to happen in current ODB-API
1484 
1485  "\x02\x00\x00\x00", "ab", "\x00\x00\x00\x00", "\x00\x00\x00\x00", // This string is too short
1486  "\x06\x00\x00\x00", "ghijkl", "\x00\x00\x00\x00", "\x01\x00\x00\x00",
1487  "\x08\x00\x00\x00", "mnopqrst", "\x00\x00\x00\x00", "\x02\x00\x00\x00", // 8-byte length
1488  "\x0c\x00\x00\x00", "uvwxyzabcdef", "\x00\x00\x00\x00", "\x03\x00\x00\x00", // n.b. truncated.
1489  "\x10\x00\x00\x00", "ghijklmnopqrstuv", "\x00\x00\x00\x00", "\x04\x00\x00\x00",
1490  "\x08\x00\x00\x00", "opqrstuv", "\x00\x00\x00\x00", "\x05\x00\x00\x00",
1491  };
1492 
1493  // Loop through endiannesses for the source data
1494 
1495  for (int i = 0; i < 4; i++) {
1496 
1497  bool bigEndianOutput = (i % 2 == 0);
1498 
1499  bool bits16 = (i > 1);
1500 
1501 // eckit::Log::info() << "---------------------------------------------------------" << std::endl;
1502 // eckit::Log::info() << "ITERATION: " << i << std::endl;
1503 // eckit::Log::info() << "big endian: " << (bigEndianOutput ? "T" : "F") << std::endl;
1504 // eckit::Log::info() << "16-bit: " << (bits16 ? "T" : "F") << std::endl;
1505 // eckit::Log::info() << "---------------------------------------------------------" << std::endl;
1506 
1507  std::vector<unsigned char> data;
1508 
1509  for (size_t j = 0; j < sizeof(expected_data) / sizeof(const char*); j++) {
1510  size_t len =
1511  (j < 5) ? ((j == 0 || j == 4) ? 4 : 8)
1512  : ((j+2) % 4 == 0 ? ::strlen(expected_data[j]) : 4);
1513  data.insert(data.end(), expected_data[j], expected_data[j] + len);
1514 
1515  // n.b. Don't reverse the endianness of the string data.
1516  if (bigEndianOutput && !((j > 5) && ((j+2) % 4 == 0)))
1517  std::reverse(data.end()-len, data.end());
1518  }
1519 
1520  // Which strings do we wish to encode (look at them in reverse. nb refers to index column)
1521 
1522  for (int n = 0; n < 6; n++) {
1523  if (bits16 && bigEndianOutput)
1524  data.push_back(0);
1525  data.push_back(static_cast<unsigned char>(n));
1526  if (bits16 && !bigEndianOutput)
1527  data.push_back(0);
1528  }
1529 
1530  // Initialise codecs
1531 
1532  std::unique_ptr<Codec> c;
1533  if (bigEndianOutput == eckit::system::SystemInfo::isBigEndian()) {
1534  if (bits16) {
1536  } else {
1538  }
1539  } else {
1540  if (bits16) {
1542  } else {
1544  }
1545  }
1546 
1547  c->missingValue(0.0);
1548  EXPECT(!c->hasMissing());
1549 
1550  // Allow strings up to 16 bytes
1551  c->dataSizeDoubles(2);
1552 
1553  // Statistics in writing order
1554 
1555  const char* s1 = "ab"; // check that we can handle short strings!
1556  const char* s2 = "ghijkl";
1557  const char* s3 = "mnopqrst";
1558  const char* s4 = "uvwxyzabcdef"; // n.b. will NOT be trucated to 8-bytes
1559  const char* s5 = "ghijklmnopqrstuvwxyz"; // n.b. will be truncated to 16-bytes
1560  const char* s6 = "opqrstuv";
1561 
1562  // n.b. these casts are a bit dubious in terms of memory access. May go beyond ends of s1, s2
1563 
1564  c->gatherStats(*reinterpret_cast<const double*>(s1));
1565  c->gatherStats(*reinterpret_cast<const double*>(s2));
1566  c->gatherStats(*reinterpret_cast<const double*>(s3));
1567  c->gatherStats(*reinterpret_cast<const double*>(s4));
1568  c->gatherStats(*reinterpret_cast<const double*>(s5));
1569  c->gatherStats(*reinterpret_cast<const double*>(s6));
1570  EXPECT(!c->hasMissing());
1571 
1572  // Encode the header to the data stream
1573 
1574  EndianCodecSave ds(bigEndianOutput, *c);
1575 
1576  EXPECT(ds.position() == eckit::Offset(expectedHdrSize));
1577 
1578  // Encode the data to wherever we want it
1579  // Expect 1 or 2 bytes per element
1580 
1581  char* posNext;
1582 
1583  EXPECT((posNext = c->encode(ds.get(), *reinterpret_cast<const double*>(s1))) == (ds.get() + (bits16 ? 2 : 1)));
1584  ds.set(posNext);
1585  EXPECT((posNext = c->encode(ds.get(), *reinterpret_cast<const double*>(s2))) == (ds.get() + (bits16 ? 2 : 1)));
1586  ds.set(posNext);
1587  EXPECT((posNext = c->encode(ds.get(), *reinterpret_cast<const double*>(s3))) == (ds.get() + (bits16 ? 2 : 1)));
1588  ds.set(posNext);
1589  EXPECT((posNext = c->encode(ds.get(), *reinterpret_cast<const double*>(s4))) == (ds.get() + (bits16 ? 2 : 1)));
1590  ds.set(posNext);
1591  EXPECT((posNext = c->encode(ds.get(), *reinterpret_cast<const double*>(s5))) == (ds.get() + (bits16 ? 2 : 1)));
1592  ds.set(posNext);
1593  EXPECT((posNext = c->encode(ds.get(), *reinterpret_cast<const double*>(s6))) == (ds.get() + (bits16 ? 2 : 1)));
1594  ds.set(posNext);
1595 
1596  // Check we have the data we expect
1597 
1598  size_t data_size = (6 * (bits16 ? 2 : 1));
1599  EXPECT(ds.position() == eckit::Offset(expectedHdrSize + data_size));
1600 
1601 // eckit::Log::info() << "DATA: " << std::endl;
1602 // for (size_t n = 0; n < expectedHdrSize + data_size; n++) {
1603 // eckit::Log::info() << std::hex << int(data[n]) << " " << int(ds.data()[n]) << std::endl;
1604 // if (int(data[n]) != int(ds.data()[n]))
1605 // eckit::Log::info() << "******************************" << std::endl;
1606 // }
1607 
1608  EXPECT(::memcmp(&data[0], ds.data(), expectedHdrSize + data_size) == 0);
1609  }
1610 }
1611 
1612 // ------------------------------------------------------------------------------------------------------
1613 
1614 int main(int argc, char* argv[]) {
1615 
1616  return run_tests(argc, argv);
1617 }
eckit::Offset position_
EndianCodecSave(bool bigEndianData, Codec &codec)
eckit::Offset position() const
const char * data() const
eckit::Buffer buffer_
void set(char *p)
static double realMDI()
Definition: MDI.h:21
static double integerMDI()
Definition: MDI.h:22
void save(GeneralDataStream &ds)
Definition: Codec.cc:76
eckit::Offset position() const
Definition: DataStream.h:198
int main(int argc, char *argv[])
CASE("Constant values consume no space in the output data buffer")
size_t prepend_codec_selection_header(std::vector< unsigned char > &data, const std::string &codec_name, bool bigEndian=false)