ClickHouse
144 строки · 5.0 Кб
1#include <Columns/ColumnString.h>
2#include <Functions/FunctionFactory.h>
3#include <Functions/FunctionStringToString.h>
4#include <base/find_symbols.h>
5
6
7namespace DB
8{
9namespace ErrorCodes
10{
11extern const int ILLEGAL_TYPE_OF_ARGUMENT;
12}
13
14namespace
15{
16struct EncodeXMLComponentName
17{
18static constexpr auto name = "encodeXMLComponent";
19};
20
21class FunctionEncodeXMLComponentImpl
22{
23public:
24static void vector(
25const ColumnString::Chars & data,
26const ColumnString::Offsets & offsets,
27ColumnString::Chars & res_data,
28ColumnString::Offsets & res_offsets)
29{
30/// 6 is the maximum size amplification (the maximum length of encoded entity: ")
31res_data.resize(data.size() * 6);
32size_t size = offsets.size();
33res_offsets.resize(size);
34
35size_t prev_offset = 0;
36size_t res_offset = 0;
37
38for (size_t i = 0; i < size; ++i)
39{
40const char * src_data = reinterpret_cast<const char *>(&data[prev_offset]);
41size_t src_size = offsets[i] - prev_offset;
42size_t dst_size = execute(src_data, src_size, reinterpret_cast<char *>(res_data.data() + res_offset));
43
44res_offset += dst_size;
45res_offsets[i] = res_offset;
46prev_offset = offsets[i];
47}
48
49res_data.resize(res_offset);
50}
51
52[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
53{
54throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function encodeXML cannot work with FixedString argument");
55}
56
57private:
58static size_t execute(const char * src, size_t src_size, char * dst)
59{
60const char * src_prev_pos = src;
61const char * src_curr_pos = src;
62const char * src_end = src + src_size;
63char * dst_pos = dst;
64
65while (true)
66{
67src_curr_pos = find_first_symbols<'<', '&', '>', '"', '\''>(src_curr_pos, src_end);
68
69if (src_curr_pos == src_end)
70{
71break;
72}
73else if (*src_curr_pos == '<')
74{
75size_t bytes_to_copy = src_curr_pos - src_prev_pos;
76memcpySmallAllowReadWriteOverflow15(dst_pos, src_prev_pos, bytes_to_copy);
77dst_pos += bytes_to_copy;
78memcpy(dst_pos, "<", 4);
79dst_pos += 4;
80src_prev_pos = src_curr_pos + 1;
81++src_curr_pos;
82}
83else if (*src_curr_pos == '&')
84{
85size_t bytes_to_copy = src_curr_pos - src_prev_pos;
86memcpySmallAllowReadWriteOverflow15(dst_pos, src_prev_pos, bytes_to_copy);
87dst_pos += bytes_to_copy;
88memcpy(dst_pos, "&", 5);
89dst_pos += 5;
90src_prev_pos = src_curr_pos + 1;
91++src_curr_pos;
92}
93else if (*src_curr_pos == '>')
94{
95size_t bytes_to_copy = src_curr_pos - src_prev_pos;
96memcpySmallAllowReadWriteOverflow15(dst_pos, src_prev_pos, bytes_to_copy);
97dst_pos += bytes_to_copy;
98memcpy(dst_pos, ">", 4);
99dst_pos += 4;
100src_prev_pos = src_curr_pos + 1;
101++src_curr_pos;
102}
103else if (*src_curr_pos == '"')
104{
105size_t bytes_to_copy = src_curr_pos - src_prev_pos;
106memcpySmallAllowReadWriteOverflow15(dst_pos, src_prev_pos, bytes_to_copy);
107dst_pos += bytes_to_copy;
108memcpy(dst_pos, """, 6);
109dst_pos += 6;
110src_prev_pos = src_curr_pos + 1;
111++src_curr_pos;
112}
113else if (*src_curr_pos == '\'')
114{
115size_t bytes_to_copy = src_curr_pos - src_prev_pos;
116memcpySmallAllowReadWriteOverflow15(dst_pos, src_prev_pos, bytes_to_copy);
117dst_pos += bytes_to_copy;
118memcpy(dst_pos, "'", 6);
119dst_pos += 6;
120src_prev_pos = src_curr_pos + 1;
121++src_curr_pos;
122}
123}
124
125if (src_prev_pos < src_curr_pos)
126{
127size_t bytes_to_copy = src_curr_pos - src_prev_pos;
128memcpySmallAllowReadWriteOverflow15(dst_pos, src_prev_pos, bytes_to_copy);
129dst_pos += bytes_to_copy;
130}
131
132return dst_pos - dst;
133}
134};
135
136using FunctionEncodeXMLComponent = FunctionStringToString<FunctionEncodeXMLComponentImpl, EncodeXMLComponentName>;
137
138}
139
140REGISTER_FUNCTION(EncodeXMLComponent)
141{
142factory.registerFunction<FunctionEncodeXMLComponent>();
143}
144}
145