ClickHouse

Форк
0
/
string_pool.cpp 
258 строк · 7.9 Кб
1
#include <iostream>
2
#include <fstream>
3
#include <iomanip>
4
#include <unordered_map>
5
#include <sparsehash/dense_hash_map>
6

7
#include <Common/Stopwatch.h>
8

9
#include <base/StringRef.h>
10
#include <Common/Arena.h>
11

12
#include <IO/ReadBufferFromFileDescriptor.h>
13
#include <IO/ReadHelpers.h>
14

15
//#define DBMS_HASH_MAP_COUNT_COLLISIONS
16
#include <Common/HashTable/HashMap.h>
17

18
int main(int argc, char ** argv)
19
{
20
    if (argc < 2)
21
    {
22
        std::cerr << "Usage: program n\n";
23
        return 1;
24
    }
25

26
    std::cerr << std::fixed << std::setprecision(3);
27
    std::ofstream devnull("/dev/null");
28

29
    DB::ReadBufferFromFileDescriptor in(STDIN_FILENO);
30
    size_t n = std::stol(argv[1]);
31
    size_t elems_show = 1;
32

33
    using Vec = std::vector<std::string>;
34
    using Set = std::unordered_map<std::string, int>;
35
    using RefsSet = std::unordered_map<StringRef, int, StringRefHash>;
36
    using DenseSet = ::google::dense_hash_map<std::string, int>;
37
    using RefsDenseSet = ::google::dense_hash_map<StringRef, int, StringRefHash>;
38
    using RefsHashMap = HashMap<StringRef, int, StringRefHash>;
39
    Vec vec;
40

41
    vec.reserve(n);
42

43
    {
44
        Stopwatch watch;
45

46
        std::string s;
47
        for (size_t i = 0; i < n && !in.eof(); ++i)
48
        {
49
            DB::readEscapedString(s, in);
50
            DB::assertChar('\n', in);
51
            vec.push_back(s);
52
        }
53

54
        std::cerr << "Read and inserted into vector in " << watch.elapsedSeconds() << " sec, "
55
            << vec.size() / watch.elapsedSeconds() << " rows/sec., "
56
            << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
57
            << std::endl;
58
    }
59

60
    {
61
        DB::Arena pool;
62
        Stopwatch watch;
63
        const char * res = nullptr;
64

65
        for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
66
        {
67
            const char * tmp = pool.insert(it->data(), it->size());
68
            if (it == vec.begin())
69
                res = tmp;
70
        }
71

72
        std::cerr << "Inserted into pool in " << watch.elapsedSeconds() << " sec, "
73
            << vec.size() / watch.elapsedSeconds() << " rows/sec., "
74
            << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
75
            << std::endl;
76

77
        devnull.write(res, 100);
78
        devnull << std::endl;
79
    }
80

81
    {
82
        Set set;
83
        Stopwatch watch;
84

85
        for (const auto & elem : vec)
86
            set[elem] = 0;
87

88
        std::cerr << "Inserted into std::unordered_map in " << watch.elapsedSeconds() << " sec, "
89
            << vec.size() / watch.elapsedSeconds() << " rows/sec., "
90
            << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
91
            << std::endl;
92

93
        size_t i = 0;
94
        for (Set::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
95
        {
96
            devnull << it->first;
97
            devnull << std::endl;
98
        }
99
    }
100

101
    {
102
        RefsSet set;
103
        Stopwatch watch;
104

105
        for (const auto & elem : vec)
106
            set[StringRef(elem)] = 0;
107

108
        std::cerr << "Inserted refs into std::unordered_map in " << watch.elapsedSeconds() << " sec, "
109
            << vec.size() / watch.elapsedSeconds() << " rows/sec., "
110
            << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
111
            << std::endl;
112

113
        size_t i = 0;
114
        for (RefsSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
115
        {
116
            devnull.write(it->first.data, it->first.size);
117
            devnull << std::endl;
118
        }
119
    }
120

121
    {
122
        DB::Arena pool;
123
        RefsSet set;
124
        Stopwatch watch;
125

126
        for (const auto & elem : vec)
127
            set[StringRef(pool.insert(elem.data(), elem.size()), elem.size())] = 0;
128

129
        std::cerr << "Inserted into pool and refs into std::unordered_map in " << watch.elapsedSeconds() << " sec, "
130
            << vec.size() / watch.elapsedSeconds() << " rows/sec., "
131
            << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
132
            << std::endl;
133

134
        size_t i = 0;
135
        for (RefsSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
136
        {
137
            devnull.write(it->first.data, it->first.size);
138
            devnull << std::endl;
139
        }
140
    }
141

142
    {
143
        DenseSet set;
144
        set.set_empty_key(DenseSet::key_type());
145
        Stopwatch watch;
146

147
        for (const auto & elem : vec)
148
            set[elem] = 0;
149

150
        std::cerr << "Inserted into google::dense_hash_map in " << watch.elapsedSeconds() << " sec, "
151
            << vec.size() / watch.elapsedSeconds() << " rows/sec., "
152
            << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
153
            << std::endl;
154

155
        size_t i = 0;
156
        for (DenseSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
157
        {
158
            devnull << it->first;
159
            devnull << std::endl;
160
        }
161
    }
162

163
    {
164
        RefsDenseSet set;
165
        set.set_empty_key(RefsDenseSet::key_type());
166
        Stopwatch watch;
167

168
        for (const auto & elem : vec)
169
            set[StringRef(elem.data(), elem.size())] = 0;
170

171
        std::cerr << "Inserted refs into google::dense_hash_map in " << watch.elapsedSeconds() << " sec, "
172
            << vec.size() / watch.elapsedSeconds() << " rows/sec., "
173
            << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
174
            << std::endl;
175

176
        size_t i = 0;
177
        for (RefsDenseSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
178
        {
179
            devnull.write(it->first.data, it->first.size);
180
            devnull << std::endl;
181
        }
182
    }
183

184
    {
185
        DB::Arena pool;
186
        RefsDenseSet set;
187
        set.set_empty_key(RefsDenseSet::key_type());
188
        Stopwatch watch;
189

190
        for (const auto & elem : vec)
191
            set[StringRef(pool.insert(elem.data(), elem.size()), elem.size())] = 0;
192

193
        std::cerr << "Inserted into pool and refs into google::dense_hash_map in " << watch.elapsedSeconds() << " sec, "
194
            << vec.size() / watch.elapsedSeconds() << " rows/sec., "
195
            << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
196
            << std::endl;
197

198
        size_t i = 0;
199
        for (RefsDenseSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
200
        {
201
            devnull.write(it->first.data, it->first.size);
202
            devnull << std::endl;
203
        }
204
    }
205

206
    {
207
        RefsHashMap set;
208
        Stopwatch watch;
209

210
        for (const auto & elem : vec)
211
        {
212
            RefsHashMap::LookupResult inserted_it;
213
            bool inserted;
214
            set.emplace(StringRef(elem), inserted_it, inserted);
215
        }
216

217
        std::cerr << "Inserted refs into HashMap in " << watch.elapsedSeconds() << " sec, "
218
            << vec.size() / watch.elapsedSeconds() << " rows/sec., "
219
            << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
220
            << std::endl;
221

222
        size_t i = 0;
223
        for (auto it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
224
        {
225
            devnull.write(it->getKey().data, it->getKey().size);
226
            devnull << std::endl;
227
        }
228

229
        //std::cerr << set.size() << ", " << set.getCollisions() << std::endl;
230
    }
231

232
    {
233
        DB::Arena pool;
234
        RefsHashMap set;
235
        Stopwatch watch;
236

237
        for (const auto & elem : vec)
238
        {
239
            RefsHashMap::LookupResult inserted_it;
240
            bool inserted;
241
            set.emplace(StringRef(pool.insert(elem.data(), elem.size()), elem.size()), inserted_it, inserted);
242
        }
243

244
        std::cerr << "Inserted into pool and refs into HashMap in " << watch.elapsedSeconds() << " sec, "
245
            << vec.size() / watch.elapsedSeconds() << " rows/sec., "
246
            << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
247
            << std::endl;
248

249
        size_t i = 0;
250
        for (auto it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
251
        {
252
            devnull.write(it->getKey().data, it->getKey().size);
253
            devnull << std::endl;
254
        }
255
    }
256

257
    return 0;
258
}
259

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.