ClickHouse
258 строк · 7.9 Кб
1#include <iostream>
2#include <fstream>
3#include <iomanip>
4#include <unordered_map>
5#include <sparsehash/dense_hash_map>
6
7#include <Common/Stopwatch.h>
8
9#include <base/StringRef.h>
10#include <Common/Arena.h>
11
12#include <IO/ReadBufferFromFileDescriptor.h>
13#include <IO/ReadHelpers.h>
14
15//#define DBMS_HASH_MAP_COUNT_COLLISIONS
16#include <Common/HashTable/HashMap.h>
17
18int main(int argc, char ** argv)
19{
20if (argc < 2)
21{
22std::cerr << "Usage: program n\n";
23return 1;
24}
25
26std::cerr << std::fixed << std::setprecision(3);
27std::ofstream devnull("/dev/null");
28
29DB::ReadBufferFromFileDescriptor in(STDIN_FILENO);
30size_t n = std::stol(argv[1]);
31size_t elems_show = 1;
32
33using Vec = std::vector<std::string>;
34using Set = std::unordered_map<std::string, int>;
35using RefsSet = std::unordered_map<StringRef, int, StringRefHash>;
36using DenseSet = ::google::dense_hash_map<std::string, int>;
37using RefsDenseSet = ::google::dense_hash_map<StringRef, int, StringRefHash>;
38using RefsHashMap = HashMap<StringRef, int, StringRefHash>;
39Vec vec;
40
41vec.reserve(n);
42
43{
44Stopwatch watch;
45
46std::string s;
47for (size_t i = 0; i < n && !in.eof(); ++i)
48{
49DB::readEscapedString(s, in);
50DB::assertChar('\n', in);
51vec.push_back(s);
52}
53
54std::cerr << "Read and inserted into vector in " << watch.elapsedSeconds() << " sec, "
55<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
56<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
57<< std::endl;
58}
59
60{
61DB::Arena pool;
62Stopwatch watch;
63const char * res = nullptr;
64
65for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
66{
67const char * tmp = pool.insert(it->data(), it->size());
68if (it == vec.begin())
69res = tmp;
70}
71
72std::cerr << "Inserted into pool in " << watch.elapsedSeconds() << " sec, "
73<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
74<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
75<< std::endl;
76
77devnull.write(res, 100);
78devnull << std::endl;
79}
80
81{
82Set set;
83Stopwatch watch;
84
85for (const auto & elem : vec)
86set[elem] = 0;
87
88std::cerr << "Inserted into std::unordered_map in " << watch.elapsedSeconds() << " sec, "
89<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
90<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
91<< std::endl;
92
93size_t i = 0;
94for (Set::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
95{
96devnull << it->first;
97devnull << std::endl;
98}
99}
100
101{
102RefsSet set;
103Stopwatch watch;
104
105for (const auto & elem : vec)
106set[StringRef(elem)] = 0;
107
108std::cerr << "Inserted refs into std::unordered_map in " << watch.elapsedSeconds() << " sec, "
109<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
110<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
111<< std::endl;
112
113size_t i = 0;
114for (RefsSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
115{
116devnull.write(it->first.data, it->first.size);
117devnull << std::endl;
118}
119}
120
121{
122DB::Arena pool;
123RefsSet set;
124Stopwatch watch;
125
126for (const auto & elem : vec)
127set[StringRef(pool.insert(elem.data(), elem.size()), elem.size())] = 0;
128
129std::cerr << "Inserted into pool and refs into std::unordered_map in " << watch.elapsedSeconds() << " sec, "
130<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
131<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
132<< std::endl;
133
134size_t i = 0;
135for (RefsSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
136{
137devnull.write(it->first.data, it->first.size);
138devnull << std::endl;
139}
140}
141
142{
143DenseSet set;
144set.set_empty_key(DenseSet::key_type());
145Stopwatch watch;
146
147for (const auto & elem : vec)
148set[elem] = 0;
149
150std::cerr << "Inserted into google::dense_hash_map in " << watch.elapsedSeconds() << " sec, "
151<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
152<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
153<< std::endl;
154
155size_t i = 0;
156for (DenseSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
157{
158devnull << it->first;
159devnull << std::endl;
160}
161}
162
163{
164RefsDenseSet set;
165set.set_empty_key(RefsDenseSet::key_type());
166Stopwatch watch;
167
168for (const auto & elem : vec)
169set[StringRef(elem.data(), elem.size())] = 0;
170
171std::cerr << "Inserted refs into google::dense_hash_map in " << watch.elapsedSeconds() << " sec, "
172<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
173<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
174<< std::endl;
175
176size_t i = 0;
177for (RefsDenseSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
178{
179devnull.write(it->first.data, it->first.size);
180devnull << std::endl;
181}
182}
183
184{
185DB::Arena pool;
186RefsDenseSet set;
187set.set_empty_key(RefsDenseSet::key_type());
188Stopwatch watch;
189
190for (const auto & elem : vec)
191set[StringRef(pool.insert(elem.data(), elem.size()), elem.size())] = 0;
192
193std::cerr << "Inserted into pool and refs into google::dense_hash_map in " << watch.elapsedSeconds() << " sec, "
194<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
195<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
196<< std::endl;
197
198size_t i = 0;
199for (RefsDenseSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
200{
201devnull.write(it->first.data, it->first.size);
202devnull << std::endl;
203}
204}
205
206{
207RefsHashMap set;
208Stopwatch watch;
209
210for (const auto & elem : vec)
211{
212RefsHashMap::LookupResult inserted_it;
213bool inserted;
214set.emplace(StringRef(elem), inserted_it, inserted);
215}
216
217std::cerr << "Inserted refs into HashMap in " << watch.elapsedSeconds() << " sec, "
218<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
219<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
220<< std::endl;
221
222size_t i = 0;
223for (auto it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
224{
225devnull.write(it->getKey().data, it->getKey().size);
226devnull << std::endl;
227}
228
229//std::cerr << set.size() << ", " << set.getCollisions() << std::endl;
230}
231
232{
233DB::Arena pool;
234RefsHashMap set;
235Stopwatch watch;
236
237for (const auto & elem : vec)
238{
239RefsHashMap::LookupResult inserted_it;
240bool inserted;
241set.emplace(StringRef(pool.insert(elem.data(), elem.size()), elem.size()), inserted_it, inserted);
242}
243
244std::cerr << "Inserted into pool and refs into HashMap in " << watch.elapsedSeconds() << " sec, "
245<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
246<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
247<< std::endl;
248
249size_t i = 0;
250for (auto it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
251{
252devnull.write(it->getKey().data, it->getKey().size);
253devnull << std::endl;
254}
255}
256
257return 0;
258}
259