IODA Bundle
BufrParser.cpp
Go to the documentation of this file.
1 /*
2  * (C) Copyright 2020 NOAA/NWS/NCEP/EMC
3  *
4  * This software is licensed under the terms of the Apache Licence Version 2.0
5  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6  */
7 
8 #include "BufrParser.h"
9 
10 #include <map>
11 #include <ostream>
12 #include <iostream>
13 
14 #include "eckit/exception/Exceptions.h"
15 
16 #if __has_include("bufr_interface.h") // TODO(rmclaren): Remove this in future
17  #include "bufr_interface.h"
18 #else
19  #include "bufr.interface.h"
20 #endif
21 
23 #include "BufrMnemonicSet.h"
24 #include "DataContainer.h"
25 #include "Exports/Export.h"
26 #include "Exports/Splits/Split.h"
27 
28 
29 namespace Ingester
30 {
32  description_(description),
33  fortranFileId_(0),
34  table1FileId_(0),
35  table2FileId_(0)
36  {
37  reset();
38  }
39 
40  BufrParser::BufrParser(const eckit::Configuration& conf) :
41  description_(BufrDescription(conf)),
42  fortranFileId_(0),
43  table1FileId_(0),
44  table2FileId_(0)
45  {
46  reset();
47  }
48 
50  {
51  closeBufrFile();
52  }
53 
54  std::shared_ptr <DataContainer> BufrParser::parse(const size_t maxMsgsToParse)
55  {
56  const unsigned int SubsetStringLength = 25;
57 
58  if (fortranFileId_ <= 10)
59  {
60  throw eckit::BadValue("Fortran File ID is an invalid number (must be > 10).");
61  }
62 
63  auto collectors = BufrCollectors(fortranFileId_);
64  collectors.addMnemonicSets(description_.getMnemonicSets());
65 
66  char subset[SubsetStringLength];
67  int iddate;
68 
69  unsigned int messageNum = 0;
70  while (ireadmg_f(fortranFileId_, subset, &iddate, SubsetStringLength) == 0)
71  {
72  while (ireadsb_f(fortranFileId_) == 0)
73  {
74  collectors.collect();
75  }
76 
77  if (maxMsgsToParse > 0 && ++messageNum >= maxMsgsToParse) break;
78  }
79 
80  return exportData(collectors.finalize());
81  }
82 
83  std::shared_ptr<DataContainer> BufrParser::exportData(const BufrDataMap& srcData)
84  {
85  auto exportDescription = description_.getExport();
86 
87  auto filters = exportDescription.getFilters();
88  auto splitMap = exportDescription.getSplits();
89  auto varMap = exportDescription.getVariables();
90 
91  // Filter
92  BufrDataMap dataCopy = srcData; // make mutable copy
93  for (const auto& filter : filters)
94  {
95  filter->apply(dataCopy);
96  }
97 
98  // Split
99  CategoryMap catMap;
100  for (const auto& splitPair : splitMap)
101  {
102  std::ostringstream catName;
103  catName << "splits/" << splitPair.first;
104  catMap.insert({catName.str(), splitPair.second->subCategories(dataCopy)});
105  }
106 
107  BufrParser::CatDataMap splitDataMaps;
108  splitDataMaps.insert({std::vector<std::string>(), dataCopy});
109  for (const auto& splitPair : splitMap)
110  {
111  splitDataMaps = splitData(splitDataMaps, *splitPair.second);
112  }
113 
114  // Export
115  auto exportData = std::make_shared<DataContainer>(catMap);
116  for (const auto& dataPair : splitDataMaps)
117  {
118  for (const auto& varPair : varMap)
119  {
120  std::ostringstream pathStr;
121  pathStr << "variables/" << varPair.first;
122 
123  exportData->add(pathStr.str(),
124  varPair.second->exportData(dataPair.second),
125  dataPair.first);
126  }
127  }
128 
129  return exportData;
130  }
131 
133  {
134  CatDataMap splitDataMap;
135 
136  for (const auto& splitMapPair : splitMaps)
137  {
138  auto newData = split.split(splitMapPair.second);
139 
140  for (const auto& newDataPair : newData)
141  {
142  auto catVect = splitMapPair.first;
143  catVect.push_back(newDataPair.first);
144  splitDataMap.insert({catVect, newDataPair.second});
145  }
146  }
147 
148  return splitDataMap;
149  }
150 
151  void BufrParser::openBufrFile(const std::string& filepath,
152  bool isWmoFormat,
153  const std::string& tablepath)
154  {
155  fortranFileId_ = 11; // Fortran file id must be a integer > 10
156  open_f(fortranFileId_, filepath.c_str());
157 
158  if (!isWmoFormat)
159  {
160  openbf_f(fortranFileId_, "IN", fortranFileId_);
161  }
162  else
163  {
164  openbf_f(fortranFileId_, "SEC3", fortranFileId_);
165 
166  if (!tablepath.empty()) // else use the default tables
167  {
170  mtinfo_f(tablepath.c_str(), table1FileId_, table2FileId_);
171  }
172  }
173  }
174 
176  {
177  exitbufr_f();
178 
179  fortranFileId_ = 0;
180  table1FileId_ = 0;
181  table2FileId_ = 0;
182  }
183 
185  {
186  if (fortranFileId_ != 0)
187  {
188  closeBufrFile();
189  }
190 
194  }
195 
197  {
198  for (const auto &mp : map)
199  {
200  std::cout << " keys: ";
201  for (const auto &s : mp.first)
202  {
203  std::cout << s;
204  }
205 
206  std::cout << " subkeys: ";
207  for (const auto &m2p : mp.second)
208  {
209  std::cout << m2p.first << " " << m2p.second.rows() << " ";
210  }
211 
212  std::cout << std::endl;
213  }
214  }
215 } // namespace Ingester
Manager of collectors.
Description of the data to be read from a BUFR file and how to expose that data to the outside world.
std::vector< BufrMnemonicSet > getMnemonicSets() const
std::string tablepath() const
std::string filepath() const
void closeBufrFile()
Closes the open BUFR file.
Definition: BufrParser.cpp:175
unsigned int fortranFileId_
The Fortran file ID to an open BUFR file (0 when no file open)
Definition: BufrParser.h:52
void printMap(const CatDataMap &map)
Convenience method to print the Categorical data map to stdout.
Definition: BufrParser.cpp:196
BufrDescription description_
The description the defines what to parse from the BUFR file.
Definition: BufrParser.h:49
unsigned int table2FileId_
The Fortran file ID to an open BUFR file (0 when no file open)
Definition: BufrParser.h:58
std::map< std::vector< std::string >, BufrDataMap > CatDataMap
Definition: BufrParser.h:46
CatDataMap splitData(CatDataMap &splitMaps, Split &split)
Function responsible for dividing the data into subcategories.
Definition: BufrParser.cpp:132
std::shared_ptr< DataContainer > exportData(const BufrDataMap &srcData)
Exports collected data into a DataContainer.
Definition: BufrParser.cpp:83
std::shared_ptr< DataContainer > parse(const size_t maxMsgsToParse=0) final
Uses the provided description to parse the buffer file.
Definition: BufrParser.cpp:54
void reset() final
Start over from beginning of the BUFR file.
Definition: BufrParser.cpp:184
void openBufrFile(const std::string &filepath, bool isWmoFormat, const std::string &tablepath)
Opens a BUFR file using the Fortran BUFR interface.
Definition: BufrParser.cpp:151
unsigned int table1FileId_
The Fortran file ID to an open BUFR file (0 when no file open)
Definition: BufrParser.h:55
BufrParser(const BufrDescription &description)
Definition: BufrParser.cpp:31
Filters getFilters() const
Definition: Export.h:38
Base class for all Split objects that split data into sub-parts.
Definition: Split.h:19
virtual std::map< std::string, BufrDataMap > split(const BufrDataMap &dataMap)=0
Split the data according to internal rules.
std::map< std::string, SubCategory > CategoryMap
Map of data set id's to vector of possible value strings.
Definition: DataContainer.h:27
IngesterArrayMap BufrDataMap
Definition: BufrTypes.h:21
def subset(infile, nlocsout, suffix, geofile, diagfile)
Definition: subset_files.py:12