UFO
RecursiveSplitter.cc
Go to the documentation of this file.
1 /*
2  * (C) Copyright 2019 Met Office UK
3  *
4  * This software is licensed under the terms of the Apache Licence Version 2.0
5  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6  */
7 
9 
10 #include <algorithm>
11 #include <numeric>
12 
13 #include "oops/util/Random.h"
14 
15 namespace ufo
16 {
17 
19  orderedIds_.resize(numIds);
20  std::iota(orderedIds_.begin(), orderedIds_.end(), 0);
22 }
23 
25  const size_t numIds = orderedIds_.size();
26  encodedGroups_.resize(numIds);
27  if (numIds == 1) {
28  // There are no groups of 2 or more elements
29  encodedGroups_[0] = numIds;
30  } else if (numIds > 1) {
31  encodedGroups_[0] = 0;
32  encodedGroups_[1] = numIds - 1;
33  }
34 }
35 
36 template <typename T>
37 void RecursiveSplitter::groupByImpl(const std::vector<T> &categories) {
38  auto orderedCategory = [&](size_t index) { return categories[orderedIds_[index]]; };
39 
40  const auto numIds = orderedIds_.size();
41  ptrdiff_t lastIndexInGroup = -1;
42  ptrdiff_t lastIndexInLastGroup = -1;
43  while (lastIndexInGroup + 1 < numIds) {
44  const size_t firstIndexInGroup = encodedGroups_[lastIndexInGroup + 1];
45  if (firstIndexInGroup + 1 >= numIds)
46  break;
47  lastIndexInGroup = encodedGroups_[firstIndexInGroup + 1];
48 
49  std::stable_sort(orderedIds_.begin() + firstIndexInGroup,
50  orderedIds_.begin() + lastIndexInGroup + 1,
51  [&categories](size_t idA, size_t idB)
52  { return categories[idA] < categories[idB];});
53 
54  // Now update the group
55  size_t newFirstIndex = firstIndexInGroup;
56  for (size_t newLastIndex = firstIndexInGroup;
57  newLastIndex <= lastIndexInGroup;
58  ++newLastIndex) {
59  if (newLastIndex != lastIndexInGroup &&
60  orderedCategory(newLastIndex) == orderedCategory(newLastIndex + 1))
61  continue;
62 
63  if (newLastIndex > newFirstIndex) {
64  // We've found a new group of equivalent elements
65  encodedGroups_[lastIndexInLastGroup + 1] = newFirstIndex;
66  encodedGroups_[newFirstIndex + 1] = newLastIndex;
67  lastIndexInLastGroup = newLastIndex;
68  }
69  newFirstIndex = newLastIndex + 1;
70  }
71  }
72 
73  if (lastIndexInLastGroup + 1 < numIds)
74  encodedGroups_[lastIndexInLastGroup + 1] = numIds;
75 }
76 
77 void RecursiveSplitter::shuffleGroups(unsigned int seed) {
78  for (Group group : multiElementGroups()) {
79  std::vector<size_t>::iterator nonConstGroupBegin =
80  orderedIds_.begin() + (group.begin() - orderedIds_.cbegin());
81  std::vector<size_t>::iterator nonConstGroupEnd =
82  orderedIds_.begin() + (group.end() - orderedIds_.cbegin());
83  util::shuffle(nonConstGroupBegin, nonConstGroupEnd, seed);
84  }
85 }
86 
88  for (Group group : multiElementGroups()) {
89  std::vector<size_t>::iterator nonConstGroupBegin =
90  orderedIds_.begin() + (group.begin() - orderedIds_.cbegin());
91  std::vector<size_t>::iterator nonConstGroupEnd =
92  orderedIds_.begin() + (group.end() - orderedIds_.cbegin());
93  util::shuffle(nonConstGroupBegin, nonConstGroupEnd);
94  }
95 }
96 
97 template void RecursiveSplitter::groupByImpl(const std::vector<int> &);
98 template void RecursiveSplitter::groupByImpl(const std::vector<size_t> &);
99 template void RecursiveSplitter::groupByImpl(const std::vector<std::string> &);
100 
101 } // namespace ufo
ufo::RecursiveSplitter::Group
A range of indices of all array elements belonging to a particular equivalence class.
Definition: src/ufo/utils/RecursiveSplitter.h:67
ufo::RecursiveSplitter::shuffleGroups
void shuffleGroups()
Randomly shuffle the elements of each equivalence class.
Definition: RecursiveSplitter.cc:87
ufo::RecursiveSplitter::multiElementGroups
MultiElementGroupRange multiElementGroups() const
Return the range of equivalence classes consisting of more than one element.
Definition: src/ufo/utils/RecursiveSplitter.h:293
ufo::RecursiveSplitter::RecursiveSplitter
RecursiveSplitter(size_t numIds)
Initialize partitioning of an array of numIds elements.
Definition: RecursiveSplitter.cc:18
ufo
Definition: RunCRTM.h:27
ufo::RecursiveSplitter::orderedIds_
std::vector< size_t > orderedIds_
Indices of elements of the partitioned array ordered by equivalence class.
Definition: src/ufo/utils/RecursiveSplitter.h:323
ufo::RecursiveSplitter::encodedGroups_
std::vector< size_t > encodedGroups_
Encoded locations of multi-element equivalence classes in orderedIds_.
Definition: src/ufo/utils/RecursiveSplitter.h:325
ufo::RecursiveSplitter::initializeEncodedGroups
void initializeEncodedGroups()
Definition: RecursiveSplitter.cc:24
ufo::RecursiveSplitter::groupByImpl
void groupByImpl(const std::vector< T > &categories)
Definition: RecursiveSplitter.cc:37
RecursiveSplitter.h