ClickHouse
205 строк · 5.0 Кб
1#include <algorithm>
2#include <type_traits>
3#include <utility>
4#include <iostream>
5
6#include "pcg_random.hpp"
7
8#include <Columns/ColumnVector.h>
9#include <Common/ArenaAllocator.h>
10#include <Common/RadixSort.h>
11#include <Columns/ColumnArray.h>
12
13
14using namespace DB;
15
16template <typename T>
17struct GroupArraySortedDataHeap
18{
19using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
20using Array = PODArray<T, 32, Allocator>;
21
22Array values;
23
24static bool compare(const T & lhs, const T & rhs)
25{
26return lhs < rhs;
27}
28
29struct Comparator
30{
31bool operator()(const T & lhs, const T & rhs)
32{
33return compare(lhs, rhs);
34}
35};
36
37ALWAYS_INLINE void replaceTop()
38{
39size_t size = values.size();
40if (size < 2)
41return;
42
43size_t child_index = 1;
44
45if (values.size() > 2 && compare(values[1], values[2]))
46++child_index;
47
48/// Check if we are in order
49if (compare(values[child_index], values[0]))
50return;
51
52size_t current_index = 0;
53auto current = values[current_index];
54
55do
56{
57/// We are not in heap-order, swap the parent with it's largest child.
58values[current_index] = values[child_index];
59current_index = child_index;
60
61// Recompute the child based off of the updated parent
62child_index = 2 * child_index + 1;
63
64if (child_index >= size)
65break;
66
67if ((child_index + 1) < size && compare(values[child_index], values[child_index + 1]))
68{
69/// Right child exists and is greater than left child.
70++child_index;
71}
72
73/// Check if we are in order.
74} while (!compare(values[child_index], current));
75
76values[current_index] = current;
77}
78
79ALWAYS_INLINE void addElement(const T & element, size_t max_elements, Arena * arena)
80{
81if (values.size() >= max_elements)
82{
83/// Element is greater or equal than current max element, it cannot be in k min elements
84if (!compare(element, values[0]))
85return;
86
87values[0] = element;
88replaceTop();
89return;
90}
91
92values.push_back(element, arena);
93std::push_heap(values.begin(), values.end(), Comparator());
94}
95
96ALWAYS_INLINE void dump()
97{
98while (!values.empty())
99{
100std::pop_heap(values.begin(), values.end(), Comparator());
101std::cerr << values.back() << ' ';
102values.pop_back();
103}
104
105std::cerr << '\n';
106}
107};
108
109template <typename T>
110struct GroupArraySortedDataSort
111{
112using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
113using Array = PODArray<T, 32, Allocator>;
114
115Array values;
116
117static bool compare(const T & lhs, const T & rhs)
118{
119return lhs < rhs;
120}
121
122struct Comparator
123{
124bool operator()(const T & lhs, const T & rhs)
125{
126return compare(lhs, rhs);
127}
128};
129
130ALWAYS_INLINE void sortAndLimit(size_t max_elements, Arena * arena)
131{
132RadixSort<RadixSortNumTraits<T>>::executeLSD(values.data(), values.size());
133values.resize(max_elements, arena);
134}
135
136ALWAYS_INLINE void partialSortAndLimitIfNeeded(size_t max_elements, Arena * arena)
137{
138if (values.size() < max_elements * 4)
139return;
140
141std::nth_element(values.begin(), values.begin() + max_elements, values.end(), Comparator());
142values.resize(max_elements, arena);
143}
144
145ALWAYS_INLINE void addElement(const T & element, size_t max_elements, Arena * arena)
146{
147values.push_back(element, arena);
148partialSortAndLimitIfNeeded(max_elements, arena);
149}
150};
151
152template <typename SortedData>
153NO_INLINE void benchmark(size_t elements, size_t max_elements)
154{
155Stopwatch watch;
156watch.start();
157
158SortedData data;
159pcg64_fast rng;
160
161Arena arena;
162
163for (size_t i = 0; i < elements; ++i)
164{
165uint64_t value = rng();
166data.addElement(value, max_elements, &arena);
167}
168
169watch.stop();
170std::cerr << "Elapsed " << watch.elapsedMilliseconds() << " milliseconds" << '\n';
171}
172
173int main(int argc, char ** argv)
174{
175(void)(argc);
176(void)(argv);
177
178if (argc != 4)
179{
180std::cerr << "./group_array_sorted method elements max_elements" << '\n';
181return 1;
182}
183
184std::string method = std::string(argv[1]);
185uint64_t elements = std::atol(argv[2]); /// NOLINT
186uint64_t max_elements = std::atol(argv[3]); /// NOLINT
187
188std::cerr << "Method " << method << " elements " << elements << " max elements " << max_elements << '\n';
189
190if (method == "heap")
191{
192benchmark<GroupArraySortedDataHeap<UInt64>>(elements, max_elements);
193}
194else if (method == "sort")
195{
196benchmark<GroupArraySortedDataSort<UInt64>>(elements, max_elements);
197}
198else
199{
200std::cerr << "Invalid method " << method << '\n';
201return 1;
202}
203
204return 0;
205}
206