ClickHouse
118 строк · 3.6 Кб
1#include <Columns/ColumnString.h>
2#include <DataTypes/DataTypeString.h>
3#include <Functions/FunctionFactory.h>
4#include <Functions/FunctionHelpers.h>
5#include <base/find_symbols.h>
6
7
8namespace DB
9{
10namespace ErrorCodes
11{
12extern const int ILLEGAL_COLUMN;
13extern const int ILLEGAL_TYPE_OF_ARGUMENT;
14}
15
16namespace
17{
18
19class FunctionRegexpQuoteMeta : public IFunction
20{
21public:
22static constexpr auto name = "regexpQuoteMeta";
23
24static FunctionPtr create(ContextPtr)
25{
26return std::make_shared<FunctionRegexpQuoteMeta>();
27}
28
29String getName() const override
30{
31return name;
32}
33
34size_t getNumberOfArguments() const override
35{
36return 1;
37}
38
39bool useDefaultImplementationForConstants() const override
40{
41return true;
42}
43
44bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
45
46DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
47{
48if (!WhichDataType(arguments[0].type).isString())
49throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 1 argument of function {}. Must be String.",
50arguments[0].type->getName(), getName());
51
52return std::make_shared<DataTypeString>();
53}
54
55ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
56{
57const ColumnPtr & column_string = arguments[0].column;
58const ColumnString * input = checkAndGetColumn<ColumnString>(column_string.get());
59
60if (!input)
61throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",
62arguments[0].column->getName(), getName());
63
64auto dst_column = ColumnString::create();
65auto & dst_data = dst_column->getChars();
66auto & dst_offsets = dst_column->getOffsets();
67
68dst_offsets.resize(input_rows_count);
69
70const ColumnString::Offsets & src_offsets = input->getOffsets();
71
72const auto * src_begin = reinterpret_cast<const char *>(input->getChars().data());
73const auto * src_pos = src_begin;
74
75for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx)
76{
77/// NOTE This implementation slightly differs from re2::RE2::QuoteMeta.
78/// It escapes zero byte as \0 instead of \x00
79/// and it escapes only required characters.
80/// This is Ok. Look at comments in re2.cc
81
82const char * src_end = src_begin + src_offsets[row_idx] - 1;
83
84while (true)
85{
86const char * next_src_pos = find_first_symbols<'\0', '\\', '|', '(', ')', '^', '$', '.', '[', ']', '?', '*', '+', '{', ':', '-'>(src_pos, src_end);
87
88size_t bytes_to_copy = next_src_pos - src_pos;
89size_t old_dst_size = dst_data.size();
90dst_data.resize(old_dst_size + bytes_to_copy);
91memcpySmallAllowReadWriteOverflow15(dst_data.data() + old_dst_size, src_pos, bytes_to_copy);
92src_pos = next_src_pos + 1;
93
94if (next_src_pos == src_end)
95{
96dst_data.emplace_back('\0');
97break;
98}
99
100dst_data.emplace_back('\\');
101dst_data.emplace_back(*next_src_pos);
102}
103
104dst_offsets[row_idx] = dst_data.size();
105}
106
107return dst_column;
108}
109};
110
111}
112
113REGISTER_FUNCTION(RegexpQuoteMeta)
114{
115factory.registerFunction<FunctionRegexpQuoteMeta>();
116}
117
118}
119