ClickHouse
73 строки · 2.0 Кб
1#include <Functions/FunctionsStringSearchToString.h>
2#include <Functions/FunctionFactory.h>
3#include <Functions/Regexps.h>
4#include <Common/OptimizedRegularExpression.h>
5
6
7namespace DB
8{
9namespace
10{
11
12struct ExtractImpl
13{
14static void vector(
15const ColumnString::Chars & data,
16const ColumnString::Offsets & offsets,
17const std::string & pattern,
18ColumnString::Chars & res_data,
19ColumnString::Offsets & res_offsets)
20{
21res_data.reserve(data.size() / 5);
22res_offsets.resize(offsets.size());
23
24const OptimizedRegularExpression regexp = Regexps::createRegexp<false, false, false>(pattern);
25
26unsigned capture = regexp.getNumberOfSubpatterns() > 0 ? 1 : 0;
27OptimizedRegularExpression::MatchVec matches;
28matches.reserve(capture + 1);
29size_t prev_offset = 0;
30size_t res_offset = 0;
31
32for (size_t i = 0; i < offsets.size(); ++i)
33{
34size_t cur_offset = offsets[i];
35
36unsigned count
37= regexp.match(reinterpret_cast<const char *>(&data[prev_offset]), cur_offset - prev_offset - 1, matches, capture + 1);
38if (count > capture && matches[capture].offset != std::string::npos)
39{
40const auto & match = matches[capture];
41res_data.resize(res_offset + match.length + 1);
42memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &data[prev_offset + match.offset], match.length);
43res_offset += match.length;
44}
45else
46{
47res_data.resize(res_offset + 1);
48}
49
50res_data[res_offset] = 0;
51++res_offset;
52res_offsets[i] = res_offset;
53
54prev_offset = cur_offset;
55}
56}
57};
58
59struct NameExtract
60{
61static constexpr auto name = "extract";
62};
63
64using FunctionExtract = FunctionsStringSearchToString<ExtractImpl, NameExtract>;
65
66}
67
68REGISTER_FUNCTION(Extract)
69{
70factory.registerFunction<FunctionExtract>();
71}
72
73}
74