IODA Bundle
TextReaderIterator.cc
Go to the documentation of this file.
1 /*
2  * (C) Copyright 1996-2012 ECMWF.
3  *
4  * This software is licensed under the terms of the Apache Licence Version 2.0
5  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6  * In applying this licence, ECMWF does not waive the privileges and immunities
7  * granted to it by virtue of its status as an intergovernmental organisation nor
8  * does it submit to any jurisdiction.
9  */
10 
11 ///
12 /// \file TextReaderIterator.cc
13 ///
14 /// @author Piotr Kuchta, Oct 2010
15 
17 
18 #include <algorithm>
19 #include <fstream>
20 
21 #include "eckit/filesystem/PathName.h"
22 #include "eckit/utils/StringTools.h"
23 #include "eckit/types/Types.h"
24 #include "eckit/utils/Translator.h"
25 
26 #include "odc/csv/TextReader.h"
27 #include "odc/api/ColumnType.h"
28 
29 using namespace eckit;
30 
31 typedef StringTools S;
32 
33 namespace odc {
34 
35 TextReaderIterator::TextReaderIterator(TextReader &owner)
36 : columns_(0),
37  lastValues_(0),
38  columnOffsets_(0),
39  nrows_(0),
40  delimiter_(owner.delimiter()),
41  in_(0),
42  newDataset_(false),
43  noMore_(false),
44  ownsF_(false),
45  refCount_(0)
46 {
47  in_ = &owner.stream();
48  ASSERT(in_);
49 
50  parseHeader();
51  next();
52 }
53 
54 TextReaderIterator::TextReaderIterator(TextReader &owner, const PathName& pathName)
55 : columns_(0),
56  lastValues_(0),
57  columnOffsets_(0),
58  nrows_(0),
59  delimiter_(owner.delimiter()),
60  in_(0),
61  newDataset_(false),
62  noMore_(false),
63  ownsF_(false),
64  refCount_(0)
65 {
66  in_ = new std::ifstream(pathName.localPath());
67  ASSERT(in_);
68  ownsF_ = true;
69  parseHeader();
70  next();
71 }
72 
73 eckit::sql::BitfieldDef TextReaderIterator::parseBitfields(const std::string& c)
74 {
75  //std::ostream& L( Log::debug() );
76 
77  size_t leftBracket (c.find('['));
78  size_t rightBracket (c.find(']'));
79 
80  if ( !(leftBracket != std::string::npos && rightBracket != std::string::npos))
81  throw UserError(std::string("Error parsing bitfield definition. Should be like: bitfield_column_name:BITFIELD[a:1;b:3] was: '") + c + "'");
82 
83  std::string s(c.substr(leftBracket + 1, rightBracket - leftBracket - 1));
84 
85  //L << "TextReaderIterator::parseBitfields: s='" << s << "'" << std::endl;
86 
87  eckit::sql::FieldNames names;
88  eckit::sql::Sizes sizes;
89 
90  size_t numberOfBits = 0;
91  std::vector<std::string> bs(S::split(";", s));
92 
93  //L << "TextReaderIterator::parseBitfields: bs=" << bs << std::endl;
94 
95  for (size_t i = 0; i < bs.size(); ++i)
96  {
97  std::vector<std::string> v(S::split(":", bs[i]));
98 
99  //L << "TextReaderIterator::parseBitfields: bs[" << i << "] = " << bs[i] << " " << v << " : " << v.size() << std::endl;
100 
101  if (v.size() != 2)
102  throw UserError("Bitfields definition parse error");
103 
104  if (std::find(names.begin(), names.end(), v[0]) != names.end())
105  throw UserError("Names of fields must be unique within one bitfield");
106 
107  names.push_back(v[0]);
108 
109  int size = atoi(v[1].c_str());
110 
111  if ( !(v.size() > 0) )
112  throw UserError("Size of a bitfield must be positive and larger than zero");
113 
114  numberOfBits += size;
115  sizes.push_back(size);
116  }
117  //L << "TextReaderIterator::parseBitfields: numberOfbits=" << numberOfBits << std::endl;
118 
119  if (numberOfBits > 31) {
120  throw UserError("Bitfields can have up to 31 bits only currently");
121  }
122 
123  return eckit::sql::BitfieldDef(make_pair(names, sizes));
124 }
125 
127 {
128  std::string header;
129  std::getline(*in_, header);
130  std::vector<std::string> columns (S::split(delimiter_, header));
131  //c->missingValue(missingValue);
132 
133  std::ostream& L(Log::info());
134 
135  L << "TextReaderIterator::parseHeader: columns: " << columns << std::endl;
136  L << "TextReaderIterator::parseHeader: delimiter: '" << delimiter_ << "'" << std::endl;
137  L << "TextReaderIterator::parseHeader: header: '" << header << "'" << std::endl;
138 
139  for (size_t i = 0; i < columns.size(); ++i)
140  {
141  Log::debug() << "TextReaderIterator::parseHeader: column " << i << " '" << columns[i] << "'" << std::endl;
142  std::vector<std::string> column (S::split(":", columns[i]));
143  if (column.size() < 2)
144  throw UserError(std::string("Column '") + columns[i] + "': format should be NAME \":\" TYPE");
145 
146  const std::string columnName (S::trim(column[0]));
147  const std::string columnType (S::upper(S::join(":", std::vector<std::string>(column.begin() + 1, column.end()))));
148 
149  if (! S::startsWith(columnType, "BITFIELD"))
150  {
151  Log::debug() << "TextReaderIterator::parseHeader: adding column " << columns_.size() << " '" << columnName << "' : "
152  << columnType << std::endl;
153  columns_.addColumn(columnName, columnType);
154  }
155  else
156  {
157  Log::debug() << "TextReaderIterator::parseHeader: adding BITFIELD " << columns_.size() << " '" << columns[i] << std::endl;
159  }
160  }
161  initRowBuffer();
162 }
163 
165 {
166  close();
167  delete [] lastValues_;
168  delete [] columnOffsets_;
169 }
170 
171 
173 {
174  return noMore_;
175 }
176 
178 {
179  delete [] lastValues_;
180  delete [] columnOffsets_;
181 
183  columnOffsets_ = new size_t[columns().size()];
184  for (size_t i = 0; i < columns().size(); i++) {
186  rowDataSizeDoubles_ += columns()[i]->dataSizeDoubles();
187  }
188 
189  lastValues_ = new double [rowDataSizeDoubles_];
190  for(size_t i = 0; i < columns().size(); i++)
191  lastValues_[columnOffsets_[i]] = columns()[i]->missingValue();
192 }
193 
195 {
196  newDataset_ = false;
197  if (noMore_)
198  return false;
199 
200  std::string line;
201  std::getline(*in_, line);
202  line = S::trim(line);
203  std::vector<std::string> values(S::split(delimiter_, line));
204 
205  size_t nCols = values.size();
206  if (nCols == 0)
207  return ! (noMore_ = true);
208  ASSERT(nCols == columns().size());
209 
210  for(size_t i = 0; i < nCols; ++i)
211  {
212  const std::string& v (S::trim(values[i]));
213  if (S::upper(v) == "NULL") {
214  lastValues_[columnOffsets_[i]] = columns_[i]->missingValue();
215  } else {
216  api::ColumnType typ ( columns()[i]->type() );
217 
218  switch (typ) {
219 
220  case api::STRING: {
221  std::string unquoted = S::unQuote(v);
222  size_t charlen = unquoted.length();
223  size_t lenDoubles = charlen > 0 ? (((charlen - 1) / 8) + 1): 1;
224 
225  // If the string is bigger than any we have come across before, we need to
226  // resize the buffers to cope for this
227  // TODO: Adjust the writer to be able to easily continue if all we have changed is a column size.
228  if (lenDoubles > columns_[i]->dataSizeDoubles()) {
229 
230  newDataset_ = true;
231  columns_[i]->dataSizeDoubles(lenDoubles);
232 
233  // Allocate a new buffer, but keep the old data around
234  double* oldData = lastValues_;
235  lastValues_ = 0;
236  initRowBuffer();
237  ASSERT(oldData);
238  ::memcpy(lastValues_, oldData, columnOffsets_[i]*sizeof(double));
239  delete oldData;
240  }
241 
242  char* buf = reinterpret_cast<char*>(&lastValues_[columnOffsets_[i]]);
243  lenDoubles = columns_[i]->dataSizeDoubles();
244 
245  ::memcpy(buf, &unquoted[0], charlen);
246  ::memset(buf + charlen, 0, (lenDoubles * sizeof(double)) - charlen);
247  break;
248  }
249 
250  case api::REAL:
251  lastValues_[columnOffsets_[i]] = static_cast<double>(Translator<std::string, float>()(v));
252  break;
253 
254  case api::DOUBLE:
255  lastValues_[columnOffsets_[i]] = Translator<std::string, double>()(v);
256  break;
257 
258  case api::INTEGER:
259  case api::BITFIELD:
260  lastValues_[columnOffsets_[i]] = static_cast<double>(Translator<std::string, long>()(v));
261  break;
262 
263  default:
264  throw SeriousBug("Unexpected type in column", Here());
265  }
266  }
267  }
268 
269  return nCols;
270 }
271 
273 
275 {
276  ASSERT(i >= 0 && i < columns().size());
277  return lastValues_[columnOffsets_[i]];
278 }
279 
281 {
282  //if (ownsF_ && f) { f->close(); delete f; f = 0; }
283 
284  if (ownsF_ && in_)
285  {
286  delete in_;
287  in_ = 0;
288  }
289 
290  return 0;
291 }
292 
293 } // namespace odc
294 
StringTools S
std::istream & stream()
Definition: TextReader.h:52
core::MetaData & columns()
static eckit::sql::BitfieldDef parseBitfields(const std::string &)
bool operator!=(const TextReaderIterator &other)
const double * data() const
MetaData & addBitfield(const std::string &name, const eckit::sql::BitfieldDef &)
Definition: MetaData.cc:297
MetaData & addColumn(const std::string &name, const std::string &type)
Definition: MetaData.cc:279
std::string trim(const std::string &str)
@ BITFIELD
Definition: ColumnType.h:27
Definition: ColumnInfo.h:23