ClickHouse

Форк
0
/
parseTimeDelta.cpp 
312 строк · 10.8 Кб
1
#include <boost/convert.hpp>
2
#include <boost/convert/strtol.hpp>
3

4
#include <Columns/ColumnsNumber.h>
5
#include <DataTypes/DataTypesNumber.h>
6
#include <Functions/FunctionFactory.h>
7
#include <Functions/IFunction.h>
8

9
namespace DB
10
{
11

12
namespace ErrorCodes
13
{
14
    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
15
    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
16
    extern const int BAD_ARGUMENTS;
17
}
18

19
namespace
20
{
21
    const std::unordered_map<std::string_view, Float64> time_unit_to_float =
22
    {
23
        {"years", 365 * 24 * 3600},
24
        {"year", 365 * 24 * 3600},
25
        {"yr", 365 * 24 * 3600},
26
        {"y", 365 * 24 * 3600},
27

28
        {"months", 30.5 * 24 * 3600},
29
        {"month", 30.5 * 24 * 3600},
30
        {"mo", 30.5 * 24 * 3600},
31

32
        {"weeks", 7 * 24 * 3600},
33
        {"week", 7 * 24 * 3600},
34
        {"w", 7 * 24 * 3600},
35

36
        {"days", 24 * 3600},
37
        {"day", 24 * 3600},
38
        {"d", 24 * 3600},
39

40
        {"hours", 3600},
41
        {"hour", 3600},
42
        {"hr", 3600},
43
        {"h", 3600},
44

45
        {"minutes", 60},
46
        {"minute", 60},
47
        {"min", 60},
48
        {"m", 60},
49

50
        {"seconds", 1},
51
        {"second", 1},
52
        {"sec", 1},
53
        {"s", 1},
54

55
        {"milliseconds", 1e-3},
56
        {"millisecond", 1e-3},
57
        {"millisec", 1e-3},
58
        {"ms", 1e-3},
59

60
        {"microseconds", 1e-6},
61
        {"microsecond", 1e-6},
62
        {"microsec", 1e-6},
63
        {"μs", 1e-6},
64
        {"us", 1e-6},
65

66
        {"nanoseconds", 1e-9},
67
        {"nanosecond", 1e-9},
68
        {"nanosec", 1e-9},
69
        {"ns", 1e-9},
70
    };
71

72
    /** Prints amount of seconds in form of:
73
     * "1 year 2 months 4 weeks 12 days 3 hours 1 minute 33 seconds".
74
     * ' ', ';', '-', '+', ',', ':' can be used as separator, eg. "1yr-2mo", "2m:6s"
75
     *
76
     * valid expressions:
77
     * SELECT parseTimeDelta('1 min 35 sec');
78
     * SELECT parseTimeDelta('0m;11.23s.');
79
     * SELECT parseTimeDelta('11hr 25min 3.1s');
80
     * SELECT parseTimeDelta('0.00123 seconds');
81
     * SELECT parseTimeDelta('1yr2mo');
82
     * SELECT parseTimeDelta('11s+22min');
83
     * SELECT parseTimeDelta('1yr-2mo-4w + 12 days, 3 hours : 1 minute ; 33 seconds');
84
     *
85
     * invalid expressions:
86
     * SELECT parseTimeDelta();
87
     * SELECT parseTimeDelta('1yr', 1);
88
     * SELECT parseTimeDelta(1);
89
     * SELECT parseTimeDelta(' ');
90
     * SELECT parseTimeDelta('-1yr');
91
     * SELECT parseTimeDelta('1yr-');
92
     * SELECT parseTimeDelta('yr2mo');
93
     * SELECT parseTimeDelta('1.yr2mo');
94
     * SELECT parseTimeDelta('1-yr');
95
     * SELECT parseTimeDelta('1 1yr');
96
     * SELECT parseTimeDelta('1yyr');
97
     * SELECT parseTimeDelta('1yr-2mo-4w + 12 days, 3 hours : 1 minute ;. 33 seconds');
98
     *
99
     * The length of years and months (and even days in presence of time adjustments) are rough:
100
     * year is just 365 days, month is 30.5 days, day is 86400 seconds, similarly to what formatReadableTimeDelta is doing.
101
     */
102
    class FunctionParseTimeDelta : public IFunction
103
    {
104
    public:
105
        static constexpr auto name = "parseTimeDelta";
106
        static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionParseTimeDelta>(); }
107

108
        String getName() const override { return name; }
109

110
        bool isVariadic() const override { return true; }
111

112
        bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
113

114
        size_t getNumberOfArguments() const override { return 0; }
115

116
        DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
117
        {
118
            if (arguments.empty())
119
                throw Exception(
120
                    ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
121
                    "Number of arguments for function {} doesn't match: passed {}, should be 1.",
122
                    getName(),
123
                    arguments.size());
124

125
            if (arguments.size() > 1)
126
                throw Exception(
127
                    ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
128
                    "Number of arguments for function {} doesn't match: passed {}, should be 1.",
129
                    getName(),
130
                    arguments.size());
131

132
            const IDataType & type = *arguments[0];
133

134
            if (!isString(type))
135
                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot format {} as time string.", type.getName());
136

137
            return std::make_shared<DataTypeFloat64>();
138
        }
139

140
        bool useDefaultImplementationForConstants() const override { return true; }
141

142
        ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
143
        {
144
            auto col_to = ColumnFloat64::create();
145
            auto & res_data = col_to->getData();
146

147
            for (size_t i = 0; i < input_rows_count; ++i)
148
            {
149
                std::string_view str{arguments[0].column->getDataAt(i)};
150
                Int64 token_tail = 0;
151
                Int64 token_front = 0;
152
                Int64 last_pos = str.length() - 1;
153
                Float64 result = 0;
154

155
                /// ignore '.' and ' ' at the end of string
156
                while (last_pos >= 0 && (str[last_pos] == ' ' || str[last_pos] == '.'))
157
                    --last_pos;
158

159
                /// no valid characters
160
                if (last_pos < 0)
161
                {
162
                    throw Exception(
163
                        ErrorCodes::BAD_ARGUMENTS,
164
                        "Invalid expression for function {}, don't find valid characters, str: \"{}\".",
165
                        getName(),
166
                        String(str));
167
                }
168

169
                /// last pos character must be character and not be separator or number after ignoring '.' and ' '
170
                if (!isalpha(str[last_pos]))
171
                {
172
                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid expression for function {}, str: \"{}\".", getName(), String(str));
173
                }
174

175
                /// scan spaces at the beginning
176
                scanSpaces(str, token_tail, last_pos);
177
                token_front = token_tail;
178

179
                while (token_tail <= last_pos)
180
                {
181
                    /// scan unsigned integer
182
                    if (!scanUnsignedInteger(str, token_tail, last_pos))
183
                    {
184
                        throw Exception(
185
                            ErrorCodes::BAD_ARGUMENTS,
186
                            "Invalid expression for function {}, find number failed, str: \"{}\".",
187
                            getName(),
188
                            String(str));
189
                    }
190

191
                    /// if there is a '.', then scan another integer to get a float number
192
                    if (token_tail <= last_pos && str[token_tail] == '.')
193
                    {
194
                        token_tail++;
195
                        if (!scanUnsignedInteger(str, token_tail, last_pos))
196
                        {
197
                            throw Exception(
198
                                ErrorCodes::BAD_ARGUMENTS,
199
                                "Invalid expression for function {}, find number after '.' failed, str: \"{}\".",
200
                                getName(),
201
                                String(str));
202
                        }
203
                    }
204

205
                    /// convert float/integer string to float
206
                    Float64 base = 0;
207
                    std::string_view base_str = str.substr(token_front, token_tail - token_front);
208
                    auto value = boost::convert<Float64>(base_str, boost::cnv::strtol());
209
                    if (!value.has_value())
210
                    {
211
                        throw Exception(
212
                            ErrorCodes::BAD_ARGUMENTS,
213
                            "Invalid expression for function {}, convert string to float64 failed: \"{}\".",
214
                            getName(),
215
                            String(base_str));
216
                    }
217
                    base = value.get();
218

219
                    scanSpaces(str, token_tail, last_pos);
220
                    token_front = token_tail;
221

222
                    /// scan a unit
223
                    if (!scanUnit(str, token_tail, last_pos))
224
                    {
225
                        throw Exception(
226
                            ErrorCodes::BAD_ARGUMENTS,
227
                            "Invalid expression for function {}, find unit failed, str: \"{}\".",
228
                            getName(),
229
                            String(str));
230
                    }
231

232
                    /// get unit number
233
                    std::string_view unit = str.substr(token_front, token_tail - token_front);
234
                    auto iter = time_unit_to_float.find(unit);
235
                    if (iter == time_unit_to_float.end()) /// not find unit
236
                    {
237
                        throw Exception(
238
                            ErrorCodes::BAD_ARGUMENTS, "Invalid expression for function {}, parse unit failed: \"{}\".", getName(), unit);
239
                    }
240
                    result += base * iter->second;
241

242
                    /// scan separator between two tokens
243
                    scanSeparator(str, token_tail, last_pos);
244
                    token_front = token_tail;
245
                }
246

247
                res_data.emplace_back(result);
248
            }
249

250
            return col_to;
251
        }
252

253
        /// scan an unsigned integer number
254
        static bool scanUnsignedInteger(std::string_view & str, Int64 & index, Int64 last_pos)
255
        {
256
            int64_t begin_index = index;
257
            while (index <= last_pos && isdigit(str[index]))
258
            {
259
                index++;
260
            }
261
            return index != begin_index;
262
        }
263

264
        /// scan a unit
265
        static bool scanUnit(std::string_view & str, Int64 & index, Int64 last_pos)
266
        {
267
            int64_t begin_index = index;
268
            while (index <= last_pos && !isdigit(str[index]) && !isSeparator(str[index]))
269
            {
270
                index++;
271
            }
272
            return index != begin_index;
273
        }
274

275
        /// scan spaces
276
        static void scanSpaces(std::string_view & str, Int64 & index, Int64 last_pos)
277
        {
278
            while (index <= last_pos && (str[index] == ' '))
279
            {
280
                index++;
281
            }
282
        }
283

284
        /// scan for characters to ignore
285
        static void scanSeparator(std::string_view & str, Int64 & index, Int64 last_pos)
286
        {
287
            /// ignore spaces
288
            scanSpaces(str, index, last_pos);
289

290
            /// ignore separator
291
            if (index <= last_pos && isSeparator(str[index]))
292
            {
293
                index++;
294
            }
295

296
            scanSpaces(str, index, last_pos);
297
        }
298

299
        static bool isSeparator(char symbol)
300
        {
301
            return symbol == ';' || symbol == '-' || symbol == '+' || symbol == ',' || symbol == ':' || symbol == ' ';
302
        }
303
    };
304

305
}
306

307
REGISTER_FUNCTION(ParseTimeDelta)
308
{
309
    factory.registerFunction<FunctionParseTimeDelta>();
310
}
311

312
}
313

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.