IODA Bundle
Indexer.cc
Go to the documentation of this file.
1 /*
2  * (C) Copyright 1996-2012 ECMWF.
3  *
4  * This software is licensed under the terms of the Apache Licence Version 2.0
5  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6  * In applying this licence, ECMWF does not waive the privileges and immunities
7  * granted to it by virtue of its status as an intergovernmental organisation nor
8  * does it submit to any jurisdiction.
9  */
10 
11 #include "odc/Indexer.h"
12 
13 #include "eckit/eckit.h"
14 #include "eckit/io/Length.h"
15 #include "eckit/io/Offset.h"
16 #include "eckit/io/PartFileHandle.h"
17 
18 #include "odc/core/TablesReader.h"
19 #include "odc/core/MetaData.h"
20 #include "odc/Reader.h"
21 #include "odc/RowsCounter.h"
22 #include "odc/Select.h"
23 #include "odc/SelectIterator.h"
24 #include "odc/Writer.h"
25 
26 using namespace eckit;
27 using namespace odc::core;
28 
29 namespace odc {
30 
31 BlockOffsets Indexer::offsetsOfBlocks(const PathName &db)
32 {
33  BlockOffsets r;
34 
35  core::TablesReader reader(db);
36  auto it = reader.begin();
37  auto end = reader.end();
38  for (; it != end; ++it)
39  {
40  Offset offset = it->startPosition();
41  Length length = it->nextPosition() - it->startPosition();
42 
43  r.push_back(std::make_pair(offset,length));
44  }
45 
46  return r;
47 }
48 
49 std::vector<eckit::PathName> Indexer::createIndex(const std::vector<PathName> &dataFiles)
50 {
51  std::vector<eckit::PathName> indices;
52  for (size_t i(0); i < dataFiles.size(); ++i)
53  {
54  const PathName index (dataFiles[i] + ".idx");
55 
56  createIndex(dataFiles[i], index);
57  indices.push_back(index);
58  }
59  return indices;
60 }
61 
62 void Indexer::createIndex(const PathName &dataFile, const PathName& indexFile)
63 {
64  BlockOffsets offsets (offsetsOfBlocks(dataFile));
65 
66  MetaData metaData;
67  metaData
68  .addColumn("block_begin", "INTEGER")
69  .addColumn("block_length", "INTEGER")
70  .addColumn("seqno", "INTEGER")
71  .addColumn("n_rows", "INTEGER");
72 
73  odc::Writer<> write (indexFile);
75  writer->columns(metaData);
76  writer->writeHeader();
77 
78  for (size_t i(0); i < offsets.size(); ++i)
79  {
80  Offset blockBegin (offsets[i].first);
81  Length blockLength (offsets[i].second);
82 
83  PartFileHandle h(dataFile, blockBegin, blockLength);
84  h.openForRead();
85 
86  int prevSeqno (-1);
87  int nRows (0);
88 
89  odc::Select in("select seqno;", h);
90  for (odc::Select::iterator it (in.begin()), end (in.end());
91  it != end;
92  ++it)
93  {
94  int seqno ( (*it)[0] );
95  if (seqno == prevSeqno)
96  ++nRows;
97  else
98  {
99  if (nRows > 0)
100  {
101  (*writer)[0] = blockBegin;
102  (*writer)[1] = blockLength;
103  (*writer)[2] = prevSeqno;
104  (*writer)[3] = nRows;
105  ++writer;
106  nRows = 0;
107  }
108  prevSeqno = seqno;
109  ++nRows;
110  }
111  }
112  if (nRows > 0)
113  {
114  (*writer)[0] = blockBegin;
115  (*writer)[1] = blockLength;
116  (*writer)[2] = prevSeqno;
117  (*writer)[3] = nRows;
118  ++writer;
119  nRows = 0;
120  }
121  }
122 }
123 
124 } // namespace odc
125 
MetaData & addColumn(const std::string &name, const std::string &type)
Definition: MetaData.cc:279
eckit::Offset startPosition() const
Definition: Table.cc:37
Definition: ColumnInfo.h:23
std::vector< std::pair< eckit::Offset, eckit::Length > > BlockOffsets
Definition: Indexer.h:22