ClickHouse

Форк
0
/
group_array_sorted.cpp 
205 строк · 5.0 Кб
1
#include <algorithm>
2
#include <type_traits>
3
#include <utility>
4
#include <iostream>
5

6
#include "pcg_random.hpp"
7

8
#include <Columns/ColumnVector.h>
9
#include <Common/ArenaAllocator.h>
10
#include <Common/RadixSort.h>
11
#include <Columns/ColumnArray.h>
12

13

14
using namespace DB;
15

16
template <typename T>
17
struct GroupArraySortedDataHeap
18
{
19
    using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
20
    using Array = PODArray<T, 32, Allocator>;
21

22
    Array values;
23

24
    static bool compare(const T & lhs, const T & rhs)
25
    {
26
        return lhs < rhs;
27
    }
28

29
    struct Comparator
30
    {
31
        bool operator()(const T & lhs, const T & rhs)
32
        {
33
            return compare(lhs, rhs);
34
        }
35
    };
36

37
    ALWAYS_INLINE void replaceTop()
38
    {
39
        size_t size = values.size();
40
        if (size < 2)
41
            return;
42

43
        size_t child_index = 1;
44

45
        if (values.size() > 2 && compare(values[1], values[2]))
46
            ++child_index;
47

48
        /// Check if we are in order
49
        if (compare(values[child_index], values[0]))
50
            return;
51

52
        size_t current_index = 0;
53
        auto current = values[current_index];
54

55
        do
56
        {
57
            /// We are not in heap-order, swap the parent with it's largest child.
58
            values[current_index] = values[child_index];
59
            current_index = child_index;
60

61
            // Recompute the child based off of the updated parent
62
            child_index = 2 * child_index + 1;
63

64
            if (child_index >= size)
65
                break;
66

67
            if ((child_index + 1) < size && compare(values[child_index], values[child_index + 1]))
68
            {
69
                /// Right child exists and is greater than left child.
70
                ++child_index;
71
            }
72

73
            /// Check if we are in order.
74
        } while (!compare(values[child_index], current));
75

76
        values[current_index] = current;
77
    }
78

79
    ALWAYS_INLINE void addElement(const T & element, size_t max_elements, Arena * arena)
80
    {
81
        if (values.size() >= max_elements)
82
        {
83
            /// Element is greater or equal than current max element, it cannot be in k min elements
84
            if (!compare(element, values[0]))
85
                return;
86

87
            values[0] = element;
88
            replaceTop();
89
            return;
90
        }
91

92
        values.push_back(element, arena);
93
        std::push_heap(values.begin(), values.end(), Comparator());
94
    }
95

96
    ALWAYS_INLINE void dump()
97
    {
98
        while (!values.empty())
99
        {
100
            std::pop_heap(values.begin(), values.end(), Comparator());
101
            std::cerr << values.back() << ' ';
102
            values.pop_back();
103
        }
104

105
        std::cerr << '\n';
106
    }
107
};
108

109
template <typename T>
110
struct GroupArraySortedDataSort
111
{
112
    using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
113
    using Array = PODArray<T, 32, Allocator>;
114

115
    Array values;
116

117
    static bool compare(const T & lhs, const T & rhs)
118
    {
119
        return lhs < rhs;
120
    }
121

122
    struct Comparator
123
    {
124
        bool operator()(const T & lhs, const T & rhs)
125
        {
126
            return compare(lhs, rhs);
127
        }
128
    };
129

130
    ALWAYS_INLINE void sortAndLimit(size_t max_elements, Arena * arena)
131
    {
132
        RadixSort<RadixSortNumTraits<T>>::executeLSD(values.data(), values.size());
133
        values.resize(max_elements, arena);
134
    }
135

136
    ALWAYS_INLINE void partialSortAndLimitIfNeeded(size_t max_elements, Arena * arena)
137
    {
138
        if (values.size() < max_elements * 4)
139
            return;
140

141
        std::nth_element(values.begin(), values.begin() + max_elements, values.end(), Comparator());
142
        values.resize(max_elements, arena);
143
    }
144

145
    ALWAYS_INLINE void addElement(const T & element, size_t max_elements, Arena * arena)
146
    {
147
        values.push_back(element, arena);
148
        partialSortAndLimitIfNeeded(max_elements, arena);
149
    }
150
};
151

152
template <typename SortedData>
153
NO_INLINE void benchmark(size_t elements, size_t max_elements)
154
{
155
    Stopwatch watch;
156
    watch.start();
157

158
    SortedData data;
159
    pcg64_fast rng;
160

161
    Arena arena;
162

163
    for (size_t i = 0; i < elements; ++i)
164
    {
165
        uint64_t value = rng();
166
        data.addElement(value, max_elements, &arena);
167
    }
168

169
    watch.stop();
170
    std::cerr << "Elapsed " << watch.elapsedMilliseconds() << " milliseconds" << '\n';
171
}
172

173
int main(int argc, char ** argv)
174
{
175
    (void)(argc);
176
    (void)(argv);
177

178
    if (argc != 4)
179
    {
180
        std::cerr << "./group_array_sorted method elements max_elements" << '\n';
181
        return 1;
182
    }
183

184
    std::string method = std::string(argv[1]);
185
    uint64_t elements = std::atol(argv[2]); /// NOLINT
186
    uint64_t max_elements = std::atol(argv[3]); /// NOLINT
187

188
    std::cerr << "Method " << method << " elements " << elements << " max elements " << max_elements << '\n';
189

190
    if (method == "heap")
191
    {
192
        benchmark<GroupArraySortedDataHeap<UInt64>>(elements, max_elements);
193
    }
194
    else if (method == "sort")
195
    {
196
        benchmark<GroupArraySortedDataSort<UInt64>>(elements, max_elements);
197
    }
198
    else
199
    {
200
        std::cerr << "Invalid method " << method << '\n';
201
        return 1;
202
    }
203

204
    return 0;
205
}
206

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.