ClickHouse
312 строк · 10.8 Кб
1#include <boost/convert.hpp>
2#include <boost/convert/strtol.hpp>
3
4#include <Columns/ColumnsNumber.h>
5#include <DataTypes/DataTypesNumber.h>
6#include <Functions/FunctionFactory.h>
7#include <Functions/IFunction.h>
8
9namespace DB
10{
11
12namespace ErrorCodes
13{
14extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
15extern const int ILLEGAL_TYPE_OF_ARGUMENT;
16extern const int BAD_ARGUMENTS;
17}
18
19namespace
20{
21const std::unordered_map<std::string_view, Float64> time_unit_to_float =
22{
23{"years", 365 * 24 * 3600},
24{"year", 365 * 24 * 3600},
25{"yr", 365 * 24 * 3600},
26{"y", 365 * 24 * 3600},
27
28{"months", 30.5 * 24 * 3600},
29{"month", 30.5 * 24 * 3600},
30{"mo", 30.5 * 24 * 3600},
31
32{"weeks", 7 * 24 * 3600},
33{"week", 7 * 24 * 3600},
34{"w", 7 * 24 * 3600},
35
36{"days", 24 * 3600},
37{"day", 24 * 3600},
38{"d", 24 * 3600},
39
40{"hours", 3600},
41{"hour", 3600},
42{"hr", 3600},
43{"h", 3600},
44
45{"minutes", 60},
46{"minute", 60},
47{"min", 60},
48{"m", 60},
49
50{"seconds", 1},
51{"second", 1},
52{"sec", 1},
53{"s", 1},
54
55{"milliseconds", 1e-3},
56{"millisecond", 1e-3},
57{"millisec", 1e-3},
58{"ms", 1e-3},
59
60{"microseconds", 1e-6},
61{"microsecond", 1e-6},
62{"microsec", 1e-6},
63{"μs", 1e-6},
64{"us", 1e-6},
65
66{"nanoseconds", 1e-9},
67{"nanosecond", 1e-9},
68{"nanosec", 1e-9},
69{"ns", 1e-9},
70};
71
72/** Prints amount of seconds in form of:
73* "1 year 2 months 4 weeks 12 days 3 hours 1 minute 33 seconds".
74* ' ', ';', '-', '+', ',', ':' can be used as separator, eg. "1yr-2mo", "2m:6s"
75*
76* valid expressions:
77* SELECT parseTimeDelta('1 min 35 sec');
78* SELECT parseTimeDelta('0m;11.23s.');
79* SELECT parseTimeDelta('11hr 25min 3.1s');
80* SELECT parseTimeDelta('0.00123 seconds');
81* SELECT parseTimeDelta('1yr2mo');
82* SELECT parseTimeDelta('11s+22min');
83* SELECT parseTimeDelta('1yr-2mo-4w + 12 days, 3 hours : 1 minute ; 33 seconds');
84*
85* invalid expressions:
86* SELECT parseTimeDelta();
87* SELECT parseTimeDelta('1yr', 1);
88* SELECT parseTimeDelta(1);
89* SELECT parseTimeDelta(' ');
90* SELECT parseTimeDelta('-1yr');
91* SELECT parseTimeDelta('1yr-');
92* SELECT parseTimeDelta('yr2mo');
93* SELECT parseTimeDelta('1.yr2mo');
94* SELECT parseTimeDelta('1-yr');
95* SELECT parseTimeDelta('1 1yr');
96* SELECT parseTimeDelta('1yyr');
97* SELECT parseTimeDelta('1yr-2mo-4w + 12 days, 3 hours : 1 minute ;. 33 seconds');
98*
99* The length of years and months (and even days in presence of time adjustments) are rough:
100* year is just 365 days, month is 30.5 days, day is 86400 seconds, similarly to what formatReadableTimeDelta is doing.
101*/
102class FunctionParseTimeDelta : public IFunction
103{
104public:
105static constexpr auto name = "parseTimeDelta";
106static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionParseTimeDelta>(); }
107
108String getName() const override { return name; }
109
110bool isVariadic() const override { return true; }
111
112bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
113
114size_t getNumberOfArguments() const override { return 0; }
115
116DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
117{
118if (arguments.empty())
119throw Exception(
120ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
121"Number of arguments for function {} doesn't match: passed {}, should be 1.",
122getName(),
123arguments.size());
124
125if (arguments.size() > 1)
126throw Exception(
127ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
128"Number of arguments for function {} doesn't match: passed {}, should be 1.",
129getName(),
130arguments.size());
131
132const IDataType & type = *arguments[0];
133
134if (!isString(type))
135throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot format {} as time string.", type.getName());
136
137return std::make_shared<DataTypeFloat64>();
138}
139
140bool useDefaultImplementationForConstants() const override { return true; }
141
142ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
143{
144auto col_to = ColumnFloat64::create();
145auto & res_data = col_to->getData();
146
147for (size_t i = 0; i < input_rows_count; ++i)
148{
149std::string_view str{arguments[0].column->getDataAt(i)};
150Int64 token_tail = 0;
151Int64 token_front = 0;
152Int64 last_pos = str.length() - 1;
153Float64 result = 0;
154
155/// ignore '.' and ' ' at the end of string
156while (last_pos >= 0 && (str[last_pos] == ' ' || str[last_pos] == '.'))
157--last_pos;
158
159/// no valid characters
160if (last_pos < 0)
161{
162throw Exception(
163ErrorCodes::BAD_ARGUMENTS,
164"Invalid expression for function {}, don't find valid characters, str: \"{}\".",
165getName(),
166String(str));
167}
168
169/// last pos character must be character and not be separator or number after ignoring '.' and ' '
170if (!isalpha(str[last_pos]))
171{
172throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid expression for function {}, str: \"{}\".", getName(), String(str));
173}
174
175/// scan spaces at the beginning
176scanSpaces(str, token_tail, last_pos);
177token_front = token_tail;
178
179while (token_tail <= last_pos)
180{
181/// scan unsigned integer
182if (!scanUnsignedInteger(str, token_tail, last_pos))
183{
184throw Exception(
185ErrorCodes::BAD_ARGUMENTS,
186"Invalid expression for function {}, find number failed, str: \"{}\".",
187getName(),
188String(str));
189}
190
191/// if there is a '.', then scan another integer to get a float number
192if (token_tail <= last_pos && str[token_tail] == '.')
193{
194token_tail++;
195if (!scanUnsignedInteger(str, token_tail, last_pos))
196{
197throw Exception(
198ErrorCodes::BAD_ARGUMENTS,
199"Invalid expression for function {}, find number after '.' failed, str: \"{}\".",
200getName(),
201String(str));
202}
203}
204
205/// convert float/integer string to float
206Float64 base = 0;
207std::string_view base_str = str.substr(token_front, token_tail - token_front);
208auto value = boost::convert<Float64>(base_str, boost::cnv::strtol());
209if (!value.has_value())
210{
211throw Exception(
212ErrorCodes::BAD_ARGUMENTS,
213"Invalid expression for function {}, convert string to float64 failed: \"{}\".",
214getName(),
215String(base_str));
216}
217base = value.get();
218
219scanSpaces(str, token_tail, last_pos);
220token_front = token_tail;
221
222/// scan a unit
223if (!scanUnit(str, token_tail, last_pos))
224{
225throw Exception(
226ErrorCodes::BAD_ARGUMENTS,
227"Invalid expression for function {}, find unit failed, str: \"{}\".",
228getName(),
229String(str));
230}
231
232/// get unit number
233std::string_view unit = str.substr(token_front, token_tail - token_front);
234auto iter = time_unit_to_float.find(unit);
235if (iter == time_unit_to_float.end()) /// not find unit
236{
237throw Exception(
238ErrorCodes::BAD_ARGUMENTS, "Invalid expression for function {}, parse unit failed: \"{}\".", getName(), unit);
239}
240result += base * iter->second;
241
242/// scan separator between two tokens
243scanSeparator(str, token_tail, last_pos);
244token_front = token_tail;
245}
246
247res_data.emplace_back(result);
248}
249
250return col_to;
251}
252
253/// scan an unsigned integer number
254static bool scanUnsignedInteger(std::string_view & str, Int64 & index, Int64 last_pos)
255{
256int64_t begin_index = index;
257while (index <= last_pos && isdigit(str[index]))
258{
259index++;
260}
261return index != begin_index;
262}
263
264/// scan a unit
265static bool scanUnit(std::string_view & str, Int64 & index, Int64 last_pos)
266{
267int64_t begin_index = index;
268while (index <= last_pos && !isdigit(str[index]) && !isSeparator(str[index]))
269{
270index++;
271}
272return index != begin_index;
273}
274
275/// scan spaces
276static void scanSpaces(std::string_view & str, Int64 & index, Int64 last_pos)
277{
278while (index <= last_pos && (str[index] == ' '))
279{
280index++;
281}
282}
283
284/// scan for characters to ignore
285static void scanSeparator(std::string_view & str, Int64 & index, Int64 last_pos)
286{
287/// ignore spaces
288scanSpaces(str, index, last_pos);
289
290/// ignore separator
291if (index <= last_pos && isSeparator(str[index]))
292{
293index++;
294}
295
296scanSpaces(str, index, last_pos);
297}
298
299static bool isSeparator(char symbol)
300{
301return symbol == ';' || symbol == '-' || symbol == '+' || symbol == ',' || symbol == ':' || symbol == ' ';
302}
303};
304
305}
306
307REGISTER_FUNCTION(ParseTimeDelta)
308{
309factory.registerFunction<FunctionParseTimeDelta>();
310}
311
312}
313