UFO
src/ufo/utils/RecursiveSplitter.h
Go to the documentation of this file.
1 /*
2  * (C) Copyright 2019 Met Office UK
3  *
4  * This software is licensed under the terms of the Apache Licence Version 2.0
5  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6  */
7 
8 #ifndef UFO_UTILS_RECURSIVESPLITTER_H_
9 #define UFO_UTILS_RECURSIVESPLITTER_H_
10 
11 #include <algorithm>
12 #include <cassert>
13 #include <cstddef> // for size_t
14 #include <string>
15 #include <vector>
16 
17 #include "ufo/utils/ArrowProxy.h"
18 
19 namespace ufo {
20 
21 /// \brief Partitions an array into groups of elements equivalent according to certain criteria.
22 ///
23 /// Example:
24 /// \code
25 /// RecursiveSplitter splitter(8);
26 /// // Tell the splitter that according to a certain criterion, some elements belong to category #1
27 /// // and others to category #2
28 /// splitter.groupBy(std::vector<int>({1, 2, 1, 2, 1, 2, 1, 2}));
29 /// std::cout << "After first split:" << std::endl;
30 /// for (const auto &group : splitter.groups()) {
31 /// std::cout << " Elements with these indices are equivalent: ";
32 /// for (const auto &index : group)
33 /// std::cout << index << ", ";
34 /// std::cout << std::endl;
35 /// }
36 /// // Tell the splitter that according to a different criterion, there is a different split between
37 /// // categories
38 /// splitter.groupBy(std::vector<int>({1, 1, 1, 1, 2, 2, 2, 2}));
39 /// std::cout << "After second split:" << std::endl;
40 /// for (const auto &group : splitter.groups()) {
41 /// std::cout << " Elements with these indices are equivalent: ";
42 /// for (const auto &index : group)
43 /// std::cout << index << ", ";
44 /// std::cout << std::endl;
45 /// }
46 /// \endcode
47 ///
48 /// Output:
49 /// \code{.unparsed}
50 /// After first split:
51 /// Elements with these indices are equivalent: 0, 2, 4, 6,
52 /// Elements with these indices are equivalent: 1, 3, 5, 7,
53 /// After second split:
54 /// Elements with these indices are equivalent: 0, 2,
55 /// Elements with these indices are equivalent: 4, 6,
56 /// Elements with these indices are equivalent: 1, 3,
57 /// Elements with these indices are equivalent: 5, 7,
58 /// \endcode
59 ///
60 /// \internal In the implementation, indices into the partitioned array are referred to as _ids_.
61 /// The term _index_ denotes an index into the vector \c orderedIds_.
63 {
64  public:
65  /// \brief A range of indices of all array elements belonging to a particular equivalence class.
66  class Group
67  {
68  public:
69  Group(const std::vector<size_t>::const_iterator &begin,
70  const std::vector<size_t>::const_iterator &end)
71  : begin_(begin), end_(end)
72  {}
73 
74  std::vector<size_t>::const_iterator begin() const
75  {
76  return begin_;
77  }
78 
79  std::vector<size_t>::const_iterator end() const
80  {
81  return end_;
82  }
83 
84  private:
85  std::vector<size_t>::const_iterator begin_;
86  std::vector<size_t>::const_iterator end_;
87  };
88 
89  /// \brief An iterator over equivalence classes consisting of more than one element.
91  {
92  public:
93  typedef ptrdiff_t difference_type;
94  typedef Group value_type;
96  typedef std::forward_iterator_tag iterator_category;
97 
98  struct BeginTag {};
99  struct EndTag {};
100 
102  : splitter_(splitter) {
103  if (splitter_.encodedGroups_.empty()) {
104  firstIndexInGroup_ = 0;
105  } else {
107  }
108  }
109 
111  : splitter_(splitter) {
113  }
114 
115  Group operator*() const {
116  assert(!isSentinel());
117  size_t lastIndexInGroup = splitter_.encodedGroups_[firstIndexInGroup_ + 1];
119  splitter_.orderedIds_.begin() + lastIndexInGroup + 1);
120  }
121 
123  return ArrowProxy<Group>(operator*());
124  }
125 
127  const size_t lastIndexInGroup = splitter_.encodedGroups_[firstIndexInGroup_ + 1];
128  if (lastIndexInGroup + 1 < splitter_.encodedGroups_.size()) {
129  firstIndexInGroup_ = splitter_.encodedGroups_[lastIndexInGroup + 1];
130  } else {
132  }
133  return *this;
134  }
135 
136  bool operator==(const MultiElementGroupIterator& other) const {
137  // In principle we should also check splitters for equality. We don't do it for efficiency.
138  return firstIndexInGroup_ == other.firstIndexInGroup_;
139  }
140 
141  bool operator!=(const MultiElementGroupIterator& other) const {
142  return !operator==(other);
143  }
144 
145  protected:
146  bool isSentinel() const {
148  }
149 
150  protected:
153  };
154 
155  /// \brief An iterator over all equivalence classes.
157  public:
162 
165 
166  explicit GroupIterator(const RecursiveSplitter &splitter, BeginTag)
168  {}
169 
170  explicit GroupIterator(const RecursiveSplitter &splitter, EndTag)
172  {}
173 
174  Group operator*() const {
175  assert(!isSentinel());
178  } else {
179  return Group(splitter_.orderedIds_.begin() + currentIndex_,
180  splitter_.orderedIds_.begin() + currentIndex_ + 1);
181  }
182  }
183 
185  return ArrowProxy<Group>(operator*());
186  }
187 
190  const size_t lastIndexInGroup = splitter_.encodedGroups_[firstIndexInGroup_ + 1];
191  if (lastIndexInGroup + 1 < splitter_.encodedGroups_.size()) {
192  firstIndexInGroup_ = splitter_.encodedGroups_[lastIndexInGroup + 1];
193  } else {
195  }
196  currentIndex_ = lastIndexInGroup + 1;
197  } else {
198  ++currentIndex_;
199  }
200  return *this;
201  }
202 
203  bool operator==(const GroupIterator& other) const {
204  // In principle we should also check splitters for equality. We don't do it for efficiency.
205  return firstIndexInGroup_ == other.firstIndexInGroup_ &&
206  currentIndex_ == other.currentIndex_;
207  }
208 
209  bool operator!=(const GroupIterator& other) const {
210  return !operator==(other);
211  }
212 
213  private:
214  bool isSentinel() const {
215  return currentIndex_ == splitter_.encodedGroups_.size();
216  }
217 
218  private:
220  };
221 
222  /// \brief A range of equivalence classes.
224  {
225  public:
226  explicit GroupRange(const RecursiveSplitter& splitter)
227  : splitter_(splitter)
228  {}
229 
232  }
233 
234  GroupIterator end() const {
236  }
237  private:
239  };
240 
241  /// \brief A range of equivalence classes consisting of more than one element.
243  {
244  public:
245  explicit MultiElementGroupRange(const RecursiveSplitter& splitter)
246  : splitter_(splitter)
247  {}
248 
251  }
252 
255  }
256  private:
258  };
259 
260  /// \brief Initialize partitioning of an array of \p numIds elements.
261  ///
262  /// Initially, all elements are assumed to belong to the same equivalence class.
263  explicit RecursiveSplitter(size_t numIds);
264 
265  /// \brief Split existing equivalence classes according to a new criterion.
266  ///
267  /// \param categories
268  /// A vector assigning each element of the partitioned array to a unique category.
269  ///
270  /// Each existing equivalence class E consisting of N_E elements with indices E_i (0 <= i < N_E)
271  /// is split into one or more classes according to the equivalence relation
272  ///
273  /// E_i'th element is equivalent to E_j'th element if and only if
274  /// categories[E_i] == categories[E_j].
275  void groupBy(const std::vector<size_t> &categories) {
276  return groupByImpl(categories);
277  }
278 
279  /// \overload
280  void groupBy(const std::vector<int> &categories) {
281  return groupByImpl(categories);
282  }
283 
284  /// \overload
285  void groupBy(const std::vector<std::string> &categories) {
286  return groupByImpl(categories);
287  }
288 
289  /// \brief Return the range of equivalence classes.
290  GroupRange groups() const { return GroupRange(*this); }
291 
292  /// \brief Return the range of equivalence classes consisting of more than one element.
294 
295  /// \brief Sort the elements in each equivalence class in ascending order.
296  ///
297  /// The elements are compared using the binary comparison function \p comp. This function needs
298  /// to satisfy the same requirements as the \c comp argument of std::sort().
299  template <typename Compare>
300  void sortGroupsBy(Compare comp);
301 
302  /// \brief Randomly shuffle the elements of each equivalence class.
303  ///
304  /// \param seed
305  /// Seed with which to initialise the random number generator used by the shuffling algorithm
306  /// if this hasn't been done before (in a previous call to shuffleGroups() or another function
307  /// calling util::shuffle()).
308  void shuffleGroups(unsigned int seed);
309 
310  /// \brief Randomly shuffle the elements of each equivalence class.
311  ///
312  /// This overload uses the defaul seed.
313  void shuffleGroups();
314 
315  private:
317 
318  private:
319  template <typename T>
320  void groupByImpl(const std::vector<T> &categories);
321 
322  /// Indices of elements of the partitioned array ordered by equivalence class.
323  std::vector<size_t> orderedIds_;
324  /// Encoded locations of multi-element equivalence classes in orderedIds_.
325  std::vector<size_t> encodedGroups_;
326 };
327 
328 template <typename Compare>
330  for (Group group : multiElementGroups()) {
331  std::vector<size_t>::iterator nonConstGroupBegin =
332  orderedIds_.begin() + (group.begin() - orderedIds_.cbegin());
333  std::vector<size_t>::iterator nonConstGroupEnd =
334  orderedIds_.begin() + (group.end() - orderedIds_.cbegin());
335  std::sort(nonConstGroupBegin, nonConstGroupEnd, comp);
336  }
337 }
338 
339 } // namespace ufo
340 
341 #endif // UFO_UTILS_RECURSIVESPLITTER_H_
ufo::RecursiveSplitter::Group::begin
std::vector< size_t >::const_iterator begin() const
Definition: src/ufo/utils/RecursiveSplitter.h:74
ufo::RecursiveSplitter::MultiElementGroupIterator::operator==
bool operator==(const MultiElementGroupIterator &other) const
Definition: src/ufo/utils/RecursiveSplitter.h:136
ufo::RecursiveSplitter::MultiElementGroupIterator::operator!=
bool operator!=(const MultiElementGroupIterator &other) const
Definition: src/ufo/utils/RecursiveSplitter.h:141
ufo::RecursiveSplitter::GroupIterator
An iterator over all equivalence classes.
Definition: src/ufo/utils/RecursiveSplitter.h:156
ufo::RecursiveSplitter::MultiElementGroupIterator::value_type
Group value_type
Definition: src/ufo/utils/RecursiveSplitter.h:94
ufo::RecursiveSplitter::Group
A range of indices of all array elements belonging to a particular equivalence class.
Definition: src/ufo/utils/RecursiveSplitter.h:67
ufo::RecursiveSplitter::sortGroupsBy
void sortGroupsBy(Compare comp)
Sort the elements in each equivalence class in ascending order.
Definition: src/ufo/utils/RecursiveSplitter.h:329
ufo::RecursiveSplitter::MultiElementGroupIterator
An iterator over equivalence classes consisting of more than one element.
Definition: src/ufo/utils/RecursiveSplitter.h:91
ufo::RecursiveSplitter::MultiElementGroupRange
A range of equivalence classes consisting of more than one element.
Definition: src/ufo/utils/RecursiveSplitter.h:243
ufo::RecursiveSplitter::MultiElementGroupIterator::operator++
MultiElementGroupIterator & operator++()
Definition: src/ufo/utils/RecursiveSplitter.h:126
ufo::RecursiveSplitter::shuffleGroups
void shuffleGroups()
Randomly shuffle the elements of each equivalence class.
Definition: RecursiveSplitter.cc:87
ufo::RecursiveSplitter::GroupIterator::reference
MultiElementGroupIterator::reference reference
Definition: src/ufo/utils/RecursiveSplitter.h:160
ufo::RecursiveSplitter::GroupIterator::currentIndex_
size_t currentIndex_
Definition: src/ufo/utils/RecursiveSplitter.h:219
ufo::RecursiveSplitter::MultiElementGroupIterator::isSentinel
bool isSentinel() const
Definition: src/ufo/utils/RecursiveSplitter.h:146
ufo::RecursiveSplitter::multiElementGroups
MultiElementGroupRange multiElementGroups() const
Return the range of equivalence classes consisting of more than one element.
Definition: src/ufo/utils/RecursiveSplitter.h:293
ufo::RecursiveSplitter::GroupRange::begin
GroupIterator begin() const
Definition: src/ufo/utils/RecursiveSplitter.h:230
ufo::RecursiveSplitter::Group::end_
std::vector< size_t >::const_iterator end_
Definition: src/ufo/utils/RecursiveSplitter.h:86
ufo::RecursiveSplitter::RecursiveSplitter
RecursiveSplitter(size_t numIds)
Initialize partitioning of an array of numIds elements.
Definition: RecursiveSplitter.cc:18
ufo::RecursiveSplitter::GroupIterator::operator++
GroupIterator & operator++()
Definition: src/ufo/utils/RecursiveSplitter.h:188
ufo::test::Group
std::set< size_t > Group
Definition: test/ufo/RecursiveSplitter.h:25
ufo::RecursiveSplitter::MultiElementGroupRange::splitter_
const RecursiveSplitter & splitter_
Definition: src/ufo/utils/RecursiveSplitter.h:257
ufo::RecursiveSplitter
Partitions an array into groups of elements equivalent according to certain criteria.
Definition: src/ufo/utils/RecursiveSplitter.h:63
ufo::RecursiveSplitter::Group::begin_
std::vector< size_t >::const_iterator begin_
Definition: src/ufo/utils/RecursiveSplitter.h:85
ufo::RecursiveSplitter::MultiElementGroupRange::end
MultiElementGroupIterator end() const
Definition: src/ufo/utils/RecursiveSplitter.h:253
ufo::RecursiveSplitter::Group::end
std::vector< size_t >::const_iterator end() const
Definition: src/ufo/utils/RecursiveSplitter.h:79
ufo::RecursiveSplitter::GroupIterator::EndTag
MultiElementGroupIterator::EndTag EndTag
Definition: src/ufo/utils/RecursiveSplitter.h:164
ufo::RecursiveSplitter::MultiElementGroupRange::MultiElementGroupRange
MultiElementGroupRange(const RecursiveSplitter &splitter)
Definition: src/ufo/utils/RecursiveSplitter.h:245
ufo::RecursiveSplitter::groupBy
void groupBy(const std::vector< int > &categories)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: src/ufo/utils/RecursiveSplitter.h:280
ufo
Definition: RunCRTM.h:27
ufo::RecursiveSplitter::GroupRange::splitter_
const RecursiveSplitter & splitter_
Definition: src/ufo/utils/RecursiveSplitter.h:238
ufo::RecursiveSplitter::groupBy
void groupBy(const std::vector< size_t > &categories)
Split existing equivalence classes according to a new criterion.
Definition: src/ufo/utils/RecursiveSplitter.h:275
ufo::RecursiveSplitter::MultiElementGroupIterator::difference_type
ptrdiff_t difference_type
Definition: src/ufo/utils/RecursiveSplitter.h:93
ufo::RecursiveSplitter::groups
GroupRange groups() const
Return the range of equivalence classes.
Definition: src/ufo/utils/RecursiveSplitter.h:290
ufo::RecursiveSplitter::GroupIterator::isSentinel
bool isSentinel() const
Definition: src/ufo/utils/RecursiveSplitter.h:214
ufo::RecursiveSplitter::MultiElementGroupIterator::splitter_
const RecursiveSplitter & splitter_
Definition: src/ufo/utils/RecursiveSplitter.h:151
ufo::RecursiveSplitter::orderedIds_
std::vector< size_t > orderedIds_
Indices of elements of the partitioned array ordered by equivalence class.
Definition: src/ufo/utils/RecursiveSplitter.h:323
ufo::RecursiveSplitter::GroupIterator::operator!=
bool operator!=(const GroupIterator &other) const
Definition: src/ufo/utils/RecursiveSplitter.h:209
ufo::RecursiveSplitter::MultiElementGroupIterator::MultiElementGroupIterator
MultiElementGroupIterator(const RecursiveSplitter &splitter, EndTag)
Definition: src/ufo/utils/RecursiveSplitter.h:110
ufo::RecursiveSplitter::GroupIterator::operator->
ArrowProxy< Group > operator->() const
Definition: src/ufo/utils/RecursiveSplitter.h:184
ufo::RecursiveSplitter::MultiElementGroupIterator::iterator_category
std::forward_iterator_tag iterator_category
Definition: src/ufo/utils/RecursiveSplitter.h:96
ufo::RecursiveSplitter::encodedGroups_
std::vector< size_t > encodedGroups_
Encoded locations of multi-element equivalence classes in orderedIds_.
Definition: src/ufo/utils/RecursiveSplitter.h:325
ufo::RecursiveSplitter::MultiElementGroupIterator::operator*
Group operator*() const
Definition: src/ufo/utils/RecursiveSplitter.h:115
ufo::RecursiveSplitter::Group::Group
Group(const std::vector< size_t >::const_iterator &begin, const std::vector< size_t >::const_iterator &end)
Definition: src/ufo/utils/RecursiveSplitter.h:69
ufo::RecursiveSplitter::groupBy
void groupBy(const std::vector< std::string > &categories)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: src/ufo/utils/RecursiveSplitter.h:285
ufo::RecursiveSplitter::MultiElementGroupIterator::EndTag
Definition: src/ufo/utils/RecursiveSplitter.h:99
ufo::RecursiveSplitter::MultiElementGroupIterator::MultiElementGroupIterator
MultiElementGroupIterator(const RecursiveSplitter &splitter, BeginTag)
Definition: src/ufo/utils/RecursiveSplitter.h:101
ufo::RecursiveSplitter::MultiElementGroupRange::begin
MultiElementGroupIterator begin() const
Definition: src/ufo/utils/RecursiveSplitter.h:249
ufo::RecursiveSplitter::GroupIterator::difference_type
MultiElementGroupIterator::difference_type difference_type
Definition: src/ufo/utils/RecursiveSplitter.h:158
ufo::RecursiveSplitter::MultiElementGroupIterator::operator->
ArrowProxy< Group > operator->() const
Definition: src/ufo/utils/RecursiveSplitter.h:122
ufo::RecursiveSplitter::GroupIterator::operator*
Group operator*() const
Definition: src/ufo/utils/RecursiveSplitter.h:174
ufo::RecursiveSplitter::MultiElementGroupIterator::firstIndexInGroup_
size_t firstIndexInGroup_
Definition: src/ufo/utils/RecursiveSplitter.h:152
ufo::RecursiveSplitter::initializeEncodedGroups
void initializeEncodedGroups()
Definition: RecursiveSplitter.cc:24
ufo::RecursiveSplitter::GroupIterator::value_type
MultiElementGroupIterator::value_type value_type
Definition: src/ufo/utils/RecursiveSplitter.h:159
ufo::RecursiveSplitter::MultiElementGroupIterator::BeginTag
Definition: src/ufo/utils/RecursiveSplitter.h:98
ufo::RecursiveSplitter::GroupIterator::iterator_category
MultiElementGroupIterator::iterator_category iterator_category
Definition: src/ufo/utils/RecursiveSplitter.h:161
ArrowProxy.h
ufo::RecursiveSplitter::groupByImpl
void groupByImpl(const std::vector< T > &categories)
Definition: RecursiveSplitter.cc:37
ufo::RecursiveSplitter::GroupRange
A range of equivalence classes.
Definition: src/ufo/utils/RecursiveSplitter.h:224
ufo::RecursiveSplitter::GroupIterator::BeginTag
MultiElementGroupIterator::BeginTag BeginTag
Definition: src/ufo/utils/RecursiveSplitter.h:163
ufo::RecursiveSplitter::GroupRange::GroupRange
GroupRange(const RecursiveSplitter &splitter)
Definition: src/ufo/utils/RecursiveSplitter.h:226
ufo::ArrowProxy
Utility class used in overloads of operator-> in forward iterators.
Definition: ArrowProxy.h:17
ufo::RecursiveSplitter::GroupIterator::GroupIterator
GroupIterator(const RecursiveSplitter &splitter, EndTag)
Definition: src/ufo/utils/RecursiveSplitter.h:170
ufo::RecursiveSplitter::GroupIterator::GroupIterator
GroupIterator(const RecursiveSplitter &splitter, BeginTag)
Definition: src/ufo/utils/RecursiveSplitter.h:166
ufo::RecursiveSplitter::GroupIterator::operator==
bool operator==(const GroupIterator &other) const
Definition: src/ufo/utils/RecursiveSplitter.h:203
ufo::RecursiveSplitter::MultiElementGroupIterator::reference
ArrowProxy< Group > reference
Definition: src/ufo/utils/RecursiveSplitter.h:95
ufo::RecursiveSplitter::GroupRange::end
GroupIterator end() const
Definition: src/ufo/utils/RecursiveSplitter.h:234