UFO
RecursiveSplitter.cc
Go to the documentation of this file.
1 /*
2  * (C) Copyright 2019 Met Office UK
3  *
4  * This software is licensed under the terms of the Apache Licence Version 2.0
5  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6  */
7 
9 
10 #include <algorithm>
11 #include <numeric>
12 
13 #include "oops/util/Random.h"
14 
15 namespace ufo
16 {
17 
18 RecursiveSplitter::RecursiveSplitter(size_t numIds, bool opsCompatibilityMode) :
19  opsCompatibilityMode_(opsCompatibilityMode) {
20  orderedIds_.resize(numIds);
21  std::iota(orderedIds_.begin(), orderedIds_.end(), 0);
23 }
24 
26  const size_t numIds = orderedIds_.size();
27  encodedGroups_.resize(numIds);
28  if (numIds == 1) {
29  // There are no groups of 2 or more elements
30  encodedGroups_[0] = numIds;
31  } else if (numIds > 1) {
32  encodedGroups_[0] = 0;
33  encodedGroups_[1] = numIds - 1;
34  }
35 }
36 
37 template <typename T>
38 void RecursiveSplitter::groupByImpl(const std::vector<T> &categories) {
39  auto orderedCategory = [&](size_t index) { return categories[orderedIds_[index]]; };
40 
41  const auto numIds = orderedIds_.size();
42  ptrdiff_t lastIndexInGroup = -1;
43  ptrdiff_t lastIndexInLastGroup = -1;
44  while (lastIndexInGroup + 1 < numIds) {
45  const size_t firstIndexInGroup = encodedGroups_[lastIndexInGroup + 1];
46  if (firstIndexInGroup + 1 >= numIds)
47  break;
48  lastIndexInGroup = encodedGroups_[firstIndexInGroup + 1];
49 
51  metOfficeSort(orderedIds_.begin() + firstIndexInGroup,
52  orderedIds_.begin() + lastIndexInGroup + 1,
53  [&categories](size_t id) { return categories[id]; });
54  } else {
55  std::stable_sort(orderedIds_.begin() + firstIndexInGroup,
56  orderedIds_.begin() + lastIndexInGroup + 1,
57  [&categories](size_t idA, size_t idB)
58  { return categories[idA] < categories[idB]; });
59  }
60 
61  // Now update the group
62  size_t newFirstIndex = firstIndexInGroup;
63  for (size_t newLastIndex = firstIndexInGroup;
64  newLastIndex <= lastIndexInGroup;
65  ++newLastIndex) {
66  if (newLastIndex != lastIndexInGroup &&
67  orderedCategory(newLastIndex) == orderedCategory(newLastIndex + 1))
68  continue;
69 
70  if (newLastIndex > newFirstIndex) {
71  // We've found a new group of equivalent elements
72  encodedGroups_[lastIndexInLastGroup + 1] = newFirstIndex;
73  encodedGroups_[newFirstIndex + 1] = newLastIndex;
74  lastIndexInLastGroup = newLastIndex;
75  }
76  newFirstIndex = newLastIndex + 1;
77  }
78  }
79 
80  if (lastIndexInLastGroup + 1 < numIds)
81  encodedGroups_[lastIndexInLastGroup + 1] = numIds;
82 }
83 
85  for (Group group : multiElementGroups()) {
86  std::vector<size_t>::iterator nonConstGroupBegin =
87  orderedIds_.begin() + (group.begin() - orderedIds_.cbegin());
88  std::vector<size_t>::iterator nonConstGroupEnd =
89  orderedIds_.begin() + (group.end() - orderedIds_.cbegin());
90  util::shuffle(nonConstGroupBegin, nonConstGroupEnd);
91  }
92 }
93 
94 void RecursiveSplitter::setSeed(unsigned int seed, bool force) {
95  std::vector<size_t> dummy;
96  util::shuffle(dummy.begin(), dummy.end(), seed, force);
97 }
98 
99 template void RecursiveSplitter::groupByImpl(const std::vector<int> &);
100 template void RecursiveSplitter::groupByImpl(const std::vector<size_t> &);
101 template void RecursiveSplitter::groupByImpl(const std::vector<std::string> &);
102 
103 } // namespace ufo
A range of indices of all array elements belonging to a particular equivalence class.
void groupByImpl(const std::vector< T > &categories)
MultiElementGroupRange multiElementGroups() const
Return the range of equivalence classes consisting of more than one element.
void shuffleGroups()
Randomly shuffle the elements of each equivalence class.
std::vector< size_t > orderedIds_
Indices of elements of the partitioned array ordered by equivalence class.
RecursiveSplitter(size_t numIds, bool opsCompatibilityMode=false)
Initialize partitioning of an array of numIds elements.
std::vector< size_t > encodedGroups_
Encoded locations of multi-element equivalence classes in orderedIds_.
void setSeed(unsigned int seed, bool force)
Initialise the random number generator used by shuffleGroups() with a seed.
Definition: RunCRTM.h:27
void metOfficeSort(RandomIt first, RandomIt last, const UnaryOperation &key)
Sort the range [first, last) in the order of ascending keys using the same algorithm as the Ops_Integ...