IODA Bundle
Encoder.cc
Go to the documentation of this file.
1 /*
2  * (C) Copyright 1996-2018 ECMWF.
3  *
4  * This software is licensed under the terms of the Apache Licence Version 2.0
5  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6  * In applying this licence, ECMWF does not waive the privileges and immunities
7  * granted to it by virtue of its status as an intergovernmental organisation nor
8  * does it submit to any jurisdiction.
9  */
10 
11 #include "odc/core/Encoder.h"
12 
13 #include "odc/LibOdc.h"
15 #include "odc/core/Header.h"
16 
17 using namespace eckit;
18 
19 
20 namespace odc {
21 namespace core {
22 
23 //----------------------------------------------------------------------------------------------------------------------
24 
25 void encodeFrame(eckit::DataHandle& out,
26  const std::vector<api::ColumnInfo>& columns,
27  const std::vector<api::ConstStridedData>& data,
28  const std::map<std::string, std::string>& properties) {
29 
30  ASSERT(columns.size() == data.size());
31  ASSERT(columns.size() > 0);
32  MetaData md;
33 
34  size_t ncols = columns.size();
35  size_t nrows = data[0].nelem();
36 
37  // Construct the default codecs
38 
39  md.setSize(ncols);
40  for (size_t i = 0; i < ncols; ++i) {
41  md[i]->name(columns[i].name);
42  md[i]->type<SameByteOrder>(columns[i].type);
43  ASSERT(columns[i].decodedSize % sizeof(double) == 0);
44  md[i]->dataSizeDoubles(columns[i].decodedSize / sizeof(double));
45  if (!columns[i].bitfield.empty()) {
46  eckit::sql::BitfieldDef bf;
47  for (const auto& bit : columns[i].bitfield) {
48  bf.first.push_back(bit.name);
49  bf.second.push_back(bit.size);
50  }
51  md[i]->bitfieldDef(bf);
52  md[i]->missingValue(0);
53  }
54  }
55 
56  // Gather statistics over all the columns
57 
58  size_t maxRowSize = sizeof(uint16_t); // all rows contain a marker
59 
60  for (size_t col = 0; col < ncols; ++col) {
61  ASSERT(data[col].nelem() == nrows);
62  Codec& coder(md[col]->coder());
63 
64  for (const char* d : data[col]) {
65  coder.gatherStats(*reinterpret_cast<const double*>(d));
66  }
67  maxRowSize += data[col].dataSize();
68  }
69 
70  // Optimise the codecs
71 
73 
74  // TODO: Sort the columns into the optimal order for encoding
75 
76 // std::sort(md.begin(), md.end(), [](Column* a, Column* b) { ASSERT(false); });
77 
78  // TODO: Sort the data columns as well.
79 // ASSERT(false);
80 
81  // Encode the data
82  const std::vector<api::ConstStridedData>& sortedData(data);
83  std::vector<Codec*> coders;
84  for (const auto& col : md) coders.push_back(&col->coder());
85 
86  Buffer encodedBuffer(maxRowSize * nrows);
87  DataStream<SameByteOrder> encodedStream(encodedBuffer);
88 
89  for (size_t row = 0; row < nrows; row++) {
90  size_t startCol = 0;
91 
92  if (row != 0) {
93  for (; startCol < ncols; ++startCol) {
94  if (sortedData[startCol].isNewValue(row)) break;
95  }
96  }
97 
98  // Write the marker
99  uint8_t marker[2] {
100  static_cast<uint8_t>((startCol / 256) % 256),
101  static_cast<uint8_t>(startCol % 256)
102  };
103  encodedStream.writeBytes(marker, sizeof(marker)); // n.b. raw write
104 
105  // Write the updated values
106  char* p = encodedStream.get();
107  for (size_t col = startCol; col < ncols; col++) {
108  p = coders[col]->encode(p, *reinterpret_cast<const double*>(sortedData[col].get(row)));
109  }
110  encodedStream.set(p);
111  }
112 
113  // Encode the header
114 
115  Properties props {properties};
116  props["encoder"] = std::string("odc version ") + LibOdc::instance().version();
117  std::pair<Buffer, size_t> encodedHeader = Header::serializeHeader(encodedStream.position(), nrows, props, md);
118 
119  // And output the data
120 
121  ASSERT(out.write(encodedHeader.first, encodedHeader.second) == long(encodedHeader.second));
122  ASSERT(out.write(encodedBuffer, encodedStream.position()) == encodedStream.position());
123 }
124 
125 //----------------------------------------------------------------------------------------------------------------------
126 
127 }
128 }
int setOptimalCodecs(core::MetaData &columns)
virtual void gatherStats(const double &v)
Definition: Codec.cc:98
void writeBytes(const void *addr, size_t bytes)
Definition: DataStream.h:321
void set(char *p)
Definition: DataStream.h:95
eckit::Offset position() const
Definition: DataStream.h:198
void setSize(size_t)
Definition: MetaData.cc:64
std::map< std::string, std::string > Properties
Definition: Header.h:35
void encodeFrame(eckit::DataHandle &out, const std::vector< api::ColumnInfo > &columns, const std::vector< api::ConstStridedData > &data, const std::map< std::string, std::string > &properties)
Definition: Encoder.cc:25
Definition: ColumnInfo.h:23