UFO
HistoryCheck.cc
Go to the documentation of this file.
1 /*
2  * (C) 2021 Crown Copyright Met Office. All rights reserved.
3  * This software is licensed under the terms of the Apache Licence Version 2.0
4  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5  */
6 
8 
9 #include <algorithm>
10 #include <functional>
11 #include <map>
12 #include <set>
13 #include <string>
14 #include <tuple>
15 #include <utility>
16 #include <vector>
17 
18 #include <boost/none.hpp>
19 #include <boost/optional.hpp>
20 #include "eckit/config/Configuration.h"
21 #include "ioda/ObsDataVector.h"
22 #include "ioda/ObsSpace.h"
23 #include "oops/base/Variables.h"
24 #include "oops/mpi/mpi.h"
25 #include "oops/util/DateTime.h"
26 #include "oops/util/Duration.h"
27 #include "oops/util/Logger.h"
30 #include "ufo/filters/QCflags.h"
31 #include "ufo/filters/StuckCheck.h"
34 #include "ufo/utils/Constants.h"
36 
37 namespace ufo {
38 
39 HistoryCheck::HistoryCheck(ioda::ObsSpace &obsdb, const Parameters_ &parameters,
40  std::shared_ptr<ioda::ObsDataVector<int> > flags,
41  std::shared_ptr<ioda::ObsDataVector<float> > obserr)
42  : FilterBase(obsdb, parameters, flags, obserr), options_(parameters)
43 {
44  oops::Log::debug() << "HistoryCheck: config = " << options_ << "\n";
45 }
46 
47 HistoryCheck::HistoryCheck(ioda::ObsSpace &obsdb,
48  const ufo::HistoryCheck::Parameters_ &parameters,
49  std::shared_ptr<ioda::ObsDataVector<int> > flags,
50  std::shared_ptr<ioda::ObsDataVector<float> > obserr,
51  const eckit::LocalConfiguration &conf)
52  : HistoryCheck(obsdb, parameters, flags, obserr)
53 {
54  unitTestConfig_ = conf;
55 }
56 
57 /// This filter runs the ship track check and stuck check filters consecutively over an auxiliary
58 /// obs space (which is assumed to be a superset of \p obsdb_ with an earlier starting time),
59 /// before checking which observations have both been (1) flagged by either of the sub-filters
60 /// from the superset obs space and (2) are located within \p obsdb_
61 void HistoryCheck::applyFilter(const std::vector<bool> & apply,
62  const Variables & filtervars,
63  std::vector<std::vector<bool> > & flagged) const {
64  util::DateTime widerWindowStart = obsdb_.windowStart() - options_.timeBeforeStartOfWindow.value();
65  // In order to prevent the MPI from distributing the aux spaces's observations to different
66  // ranks from the distribution used for obsdb_, widerObsSpace uses the myself communicator
67  // for both time and spatial communicators, ensuring that all observations in widerObsSpace
68  // are saved to all ranks
69  ioda::ObsSpace widerObsSpace(options_.largerObsSpace, oops::mpi::myself(), widerWindowStart,
70  obsdb_.windowEnd(), oops::mpi::myself());
71  if (options_.resetLargerObsSpaceVariables) { // used for unit testing
72  if (unitTestConfig_.has("station_ids_wide")) {
73  const std::vector<int> stationIds = unitTestConfig_.getIntVector("station_ids_wide");
74  widerObsSpace.put_db("MetaData", "station_id", stationIds);
75  } else if (unitTestConfig_.has("station_ids_wide_string")) {
76  const std::vector<std::string> stationIds =
77  unitTestConfig_.getStringVector("station_ids_wide_string");
78  widerObsSpace.put_db("MetaData", "station_id", stationIds);
79  }
80  if (unitTestConfig_.has("air_temperatures_wide")) {
81  const std::vector<float> airTemperatures =
82  unitTestConfig_.getFloatVector("air_temperatures_wide");
83  widerObsSpace.put_db("ObsValue", "air_temperature", airTemperatures);
84  }
85  } // end of manual data entry section used for unit testing
86  std::shared_ptr<ioda::ObsDataVector<float>> obserrWide(
87  new ioda::ObsDataVector<float>(widerObsSpace, widerObsSpace.obsvariables(), "ObsError"));
88  std::shared_ptr<ioda::ObsDataVector<int>> qcflagsWide(
89  new ioda::ObsDataVector<int>(widerObsSpace, widerObsSpace.obsvariables()));
90  const oops::RequiredParameter<SurfaceObservationSubtype> &subtype =
92  const boost::optional<TrackCheckShipCoreParameters> &trackOptions =
94  // If the observation type is one which the track check ship filter should be run on and
95  // the necessary filter parameters were set within the configuration file
96  if (subtype != SurfaceObservationSubtype::LNDSYB &&
98  trackOptions) {
99  // Collecting parameters relevant for running the track check ship filter on the wider obs space
100  eckit::LocalConfiguration configTrackCheckShip =
101  trackOptions->toConfiguration();
102  subtype.serialize(configTrackCheckShip);
103  // Importing the base parameters
104  options_.TrackCheckUtilsParameters::serialize(configTrackCheckShip);
105  ufo::TrackCheckShipParameters trackParams;
106  trackParams.deserialize(configTrackCheckShip);
107  // Setting up and running the track check ship filter on the wider obs space
108  ufo::TrackCheckShip trackCheck(widerObsSpace, trackParams,
109  qcflagsWide, obserrWide);
110  trackCheck.preProcess();
111  }
112  const boost::optional<StuckCheckCoreParameters> &stuckOptions =
114  // If the stuck check filter parameters were set and if the stuck check filter has the potential
115  // to flag observations (number of observations is greater than the numberStuckTolerance value)
116  if (stuckOptions &&
117  widerObsSpace.index().size() >
118  (stuckOptions)->numberStuckTolerance) {
119  // If the observation subtype is one which the stuck check filter should be run on
120  if (subtype != SurfaceObservationSubtype::TEMP &&
124  // Collecting the relevant parameters for running the stuck check filter
125  eckit::LocalConfiguration configStuckCheck =
126  stuckOptions->toConfiguration();
127  options_.TrackCheckUtilsParameters::serialize(configStuckCheck);
128  ufo::StuckCheckParameters stuckParams;
129  stuckParams.deserialize(configStuckCheck);
130  // Setting up and running the stuck check filter on the wider obs space
131  ufo::StuckCheck stuckCheck(widerObsSpace, stuckParams,
132  qcflagsWide, obserrWide);
133  stuckCheck.preProcess();
134  }
135  }
136  // Creating obs accessors for both obs spaces, assuming the same variable is used for grouping
137  // into stations on both obs spaces
139  widerObsSpace);
140  ObsAccessor windowObsAccessor =
142 
143  std::vector<util::DateTime> wideDts = historicalObsAccessor.getDateTimeVariableFromObsSpace(
144  "MetaData", "datetime");
145  std::vector<float> wideLats = historicalObsAccessor.getFloatVariableFromObsSpace(
146  "MetaData", "latitude");
147  std::vector<float> wideLons = historicalObsAccessor.getFloatVariableFromObsSpace(
148  "MetaData", "longitude");
149 
150  std::map<std::string, int> stationIdMap;
151 
152  // Create a "central source of truth" map of integer index values for each
153  // string-valued station id across both obs spaces.
154  const boost::optional<Variable> &statIdVar = options_.stationIdVariable.value();
155  // If the station id variable is in use, and the underlying ids are string-formatted.
156  if (statIdVar != boost::none &&
157  obsdb_.dtype(statIdVar->group(), statIdVar->variable()) == ioda::ObsDtype::String) {
158  // retrieve the station ids from both obs spaces
159  std::vector<std::string> wideIds = historicalObsAccessor.getStringVariableFromObsSpace(
160  statIdVar->group(), statIdVar->variable());
161  std::vector<std::string> windowIds = windowObsAccessor.getStringVariableFromObsSpace(
162  statIdVar->group(), statIdVar->variable());
163  // append the assimilation window ids to the end of the historical obs space's ids
164  std::move(windowIds.begin(), windowIds.end(), std::back_inserter(wideIds));
165  // remove all non-unique station ids from the collection of ids
166  std::sort(wideIds.begin(), wideIds.end());
167  const auto endOfUnique = std::unique(wideIds.begin(), wideIds.end());
168  wideIds.erase(endOfUnique, wideIds.end());
169  // map all unique station ids across both obs spaces to an integer index
170  int index = 0;
171  for (const std::string& value : wideIds)
172  stationIdMap[value] = index++;
173  }
174 
175  std::vector<int> wideRecordIds = getStationIds(stationIdMap,
176  options_.stationIdVariable.value(),
177  widerObsSpace, historicalObsAccessor);
178 
179  // obsIdentifierData: all of the MetaData needed to uniquely identify each observation
180  // MetaData in use: time stamp, lat/lon coordinates, the station id
181  // (actual or integer equivalent), and an additional number for differentiating identical
182  // observations
183  typedef std::tuple<util::DateTime, float, float, int, size_t> obsIdentifierData;
184 
185  // Collect identifiers of each flagged observation from the wider obs space (which was used to
186  // run the stuck check and/or the track check ship filters. Increment "differentiator
187  // counter" until the full identifier to-be-added is unique within the set.
188  std::set<obsIdentifierData> wideFlaggedLocationIds;
189  // qc flags are the same across all variables for these filters
190  const std::vector<int> &wideFlags = (*qcflagsWide)[0];
191  for (size_t i = 0; i < wideFlags.size(); i++) {
192  if (wideFlags[i] == QCflags::track) {
193  obsIdentifierData obsLabel = {
194  wideDts.at(i), wideLats.at(i), wideLons.at(i), wideRecordIds.at(i), 0
195  };
196  while (wideFlaggedLocationIds.find(obsLabel) != wideFlaggedLocationIds.end()) {
197  (std::get<4>(obsLabel))++;
198  }
199  wideFlaggedLocationIds.insert(obsLabel);
200  }
201  }
202  // Retrieve relevant identifier information for all observations in assimilation window.
203  std::vector<util::DateTime> windowDts = windowObsAccessor.getDateTimeVariableFromObsSpace(
204  "MetaData", "datetime");
205  std::vector<float> windowLats = windowObsAccessor.getFloatVariableFromObsSpace(
206  "MetaData", "latitude");
207  std::vector<float> windowLons = windowObsAccessor.getFloatVariableFromObsSpace(
208  "MetaData", "longitude");
209  std::vector<int> windowStationIds = getStationIds(stationIdMap,
210  options_.stationIdVariable.value(),
211  obsdb_, windowObsAccessor);
212 
213  // Map obsIdentifierData for every assimilation window observation to its index within the
214  // window's full observation accessor.
215  std::map<obsIdentifierData, const size_t> locationIdToIndex;
216  for (size_t i = 0; i < windowObsAccessor.totalNumObservations(); i++) {
217  // Set up all observation labels with differentiator counter initially set to 0
218  obsIdentifierData obsLabel = {windowDts.at(i), windowLats.at(i), windowLons.at(i),
219  windowStationIds.at(i), 0};
220  // If the observation label has already been added to the map, increment the differentiator
221  // counter until the label is not present within the location id map.
222  while (locationIdToIndex.find(obsLabel) != locationIdToIndex.end()) {
223  (std::get<4>(obsLabel))++;
224  }
225  locationIdToIndex.insert(std::pair<obsIdentifierData, const size_t>(obsLabel, i));
226  }
227  // Comparator defined in order to use set intersection operation for map of indexed
228  // assimilated observation ids and set of flagged observations from wider observation spaces
229  struct setIntersectionComparator {
230  bool operator()(const obsIdentifierData &lhs,
231  const std::pair<const obsIdentifierData, const size_t> &rhs) {
232  return lhs < rhs.first;
233  }
234  bool operator()(const std::pair<const obsIdentifierData, const size_t> &lhs,
235  const obsIdentifierData &rhs) {
236  return lhs.first < rhs;
237  }
238  };
239 
240  // Iterate through flagged observations in the historical obs space,
241  // finding the observations which are also in the assimilation obs space, and
242  // marking the associated indices to flag using the ObsAccessor flagRejectedObservations method
243  std::vector<bool> globalObsToFlag(windowObsAccessor.totalNumObservations(), false);
244  for (const obsIdentifierData &id : wideFlaggedLocationIds) {
245  if (locationIdToIndex.find(id) != locationIdToIndex.end()) {
246  size_t locToFlag = locationIdToIndex.at(id);
247  globalObsToFlag.at(locToFlag) = true;
248  }
249  }
250  windowObsAccessor.flagRejectedObservations(globalObsToFlag, flagged);
251 }
252 
253 std::vector<int> HistoryCheck::getStationIds(const std::map<std::string, int> &stringMap,
254  const boost::optional<Variable> &stationIdVar,
255  const ioda::ObsSpace &obsdb,
256  const ObsAccessor &obsacc) const {
257  if (stationIdVar == boost::none) {
258  if (obsdb.obs_group_vars().empty()) {
259  // Observations were not grouped into records.
260  // Assume all observations were taken by the same station.
261  return std::vector<int>(obsacc.totalNumObservations(), 0);
262  } else {
263  const std::vector<size_t> &recordNumbers = obsacc.getRecordIds();
264  return std::vector<int>(recordNumbers.begin(), recordNumbers.end());
265  }
266  } else {
267  switch (obsdb.dtype(stationIdVar->group(), stationIdVar->variable())) {
268  case ioda::ObsDtype::Integer:
269  {
270  return obsacc.getIntVariableFromObsSpace(stationIdVar->group(),
271  stationIdVar->variable());
272  }
273 
274  case ioda::ObsDtype::String:
275  {
276  std::vector<std::string> stringIds =
277  obsacc.getStringVariableFromObsSpace(stationIdVar->group(),
278  stationIdVar->variable());
279  std::vector<int> ints;
280  ints.reserve(stringIds.size());
281  std::transform(stringIds.begin(), stringIds.end(), std::back_inserter(ints),
282  [&stringMap](const std::string& value) { return stringMap.at(value); });
283  return ints;
284  }
285 
286  default:
287  throw eckit::UserError("Only integer and string variables may be used as station IDs",
288  Here());
289  }
290  }
291 }
292 
293 void HistoryCheck::print(std::ostream & os) const {
294  os << "HistoryCheck: config = " << config_ << '\n';
295 }
296 } // namespace ufo
Base class for UFO QC filters.
Definition: FilterBase.h:45
const eckit::LocalConfiguration config_
Definition: FilterBase.h:59
std::vector< int > getStationIds(const std::map< std::string, int > &stringMap, const boost::optional< Variable > &stationIdVar, const ioda::ObsSpace &obsdb, const ObsAccessor &obsacc) const
Retrieve all station ids from the ObsAccessor. If string-labelled, ids will be converted to integers.
void applyFilter(const std::vector< bool > &, const Variables &, std::vector< std::vector< bool >> &) const override
Definition: HistoryCheck.cc:61
void print(std::ostream &) const override
eckit::LocalConfiguration unitTestConfig_
HistoryCheck(ioda::ObsSpace &obsdb, const Parameters_ &parameters, std::shared_ptr< ioda::ObsDataVector< int > > flags, std::shared_ptr< ioda::ObsDataVector< float > > obserr)
Definition: HistoryCheck.cc:39
Options controlling the operation of history check filter.
oops::OptionalParameter< StuckCheckCoreParameters > stuckCheckParameters
oops::Parameter< bool > resetLargerObsSpaceVariables
oops::RequiredParameter< ioda::ObsTopLevelParameters > largerObsSpace
oops::RequiredParameter< SurfaceObservationSubtype > surfaceObservationSubtype
oops::RequiredParameter< util::Duration > timeBeforeStartOfWindow
Amount of time before start of assimilation window to collect for the history check.
oops::OptionalParameter< TrackCheckShipCoreParameters > trackCheckShipParameters
This class provides access to observations that may be held on multiple MPI ranks.
Definition: ObsAccessor.h:56
std::vector< std::string > getStringVariableFromObsSpace(const std::string &group, const std::string &variable) const
Definition: ObsAccessor.cc:154
std::vector< int > getIntVariableFromObsSpace(const std::string &group, const std::string &variable) const
Return the values of the specified variable at successive observation locations.
Definition: ObsAccessor.cc:139
std::vector< size_t > getRecordIds() const
Return the vector of IDs of records successive observation locations belong to.
Definition: ObsAccessor.cc:164
std::vector< float > getFloatVariableFromObsSpace(const std::string &group, const std::string &variable) const
Definition: ObsAccessor.cc:144
std::vector< util::DateTime > getDateTimeVariableFromObsSpace(const std::string &group, const std::string &variable) const
Definition: ObsAccessor.cc:159
void flagRejectedObservations(const std::vector< bool > &isRejected, std::vector< std::vector< bool > > &flagged) const
Update flags of observations held on the current MPI rank.
Definition: ObsAccessor.cc:243
size_t totalNumObservations() const
Definition: ObsAccessor.cc:170
void preProcess() override
ioda::ObsSpace & obsdb_
Options controlling the operation of stuck check filter.
Checks tracks of ships and buoys, rejecting observations whose locations and timestamps make them inc...
Options controlling the operation of the ship track check filter.
oops::OptionalParameter< Variable > stationIdVariable
constexpr int track
Definition: QCflags.h:31
ObsAccessor createObsAccessor(const boost::optional< Variable > &stationIdVariable, const ioda::ObsSpace &obsdb)
Create an ObsAccessor object providing access to observations that need to be checked by the current ...
Definition: RunCRTM.h:27