ClickHouse

Форк
0
/
bitSlice.cpp 
424 строки · 16.8 Кб
1
#include <Columns/ColumnConst.h>
2
#include <Columns/ColumnFixedString.h>
3
#include <Columns/ColumnString.h>
4
#include <DataTypes/DataTypeString.h>
5
#include <Functions/FunctionFactory.h>
6
#include <Functions/FunctionHelpers.h>
7
#include <Functions/GatherUtils/Algorithms.h>
8
#include <Functions/GatherUtils/Sinks.h>
9
#include <Functions/GatherUtils/Slices.h>
10
#include <Functions/GatherUtils/Sources.h>
11
#include <Functions/IFunction.h>
12
#include <IO/WriteHelpers.h>
13

14
namespace DB
15
{
16
using namespace GatherUtils;
17

18
namespace ErrorCodes
19
{
20
    extern const int ILLEGAL_COLUMN;
21
    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
22
    extern const int ZERO_ARRAY_OR_TUPLE_INDEX;
23
    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
24
}
25

26
class FunctionBitSlice : public IFunction
27
{
28
    const UInt8 word_size = 8;
29

30
public:
31
    static constexpr auto name = "bitSlice";
32
    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionBitSlice>(); }
33

34
    String getName() const override { return name; }
35

36
    bool isVariadic() const override { return true; }
37
    size_t getNumberOfArguments() const override { return 0; }
38

39
    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
40

41
    bool useDefaultImplementationForConstants() const override { return true; }
42

43
    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
44
    {
45
        const size_t number_of_arguments = arguments.size();
46

47
        if (number_of_arguments < 2 || number_of_arguments > 3)
48
            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
49
                "Number of arguments for function {} doesn't match: passed {}, should be 2 or 3",
50
                getName(), number_of_arguments);
51

52
        if (!isString(arguments[0]) && !isStringOrFixedString(arguments[0]))
53
            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}",
54
                arguments[0]->getName(), getName());
55
        if (arguments[0]->onlyNull())
56
            return arguments[0];
57

58
        if (!isNativeNumber(arguments[1]))
59
            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of second argument of function {}",
60
                arguments[1]->getName(), getName());
61

62
        if (number_of_arguments == 3 && !isNativeNumber(arguments[2]))
63
            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of second argument of function {}",
64
                arguments[2]->getName(), getName());
65

66
        return std::make_shared<DataTypeString>();
67
    }
68

69
    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
70
    {
71
        size_t number_of_arguments = arguments.size();
72

73
        ColumnPtr column_string = arguments[0].column;
74
        ColumnPtr column_start = arguments[1].column;
75
        ColumnPtr column_length;
76

77
        std::optional<Int64> start_const;
78
        std::optional<Int64> length_const;
79

80
        if (const auto * column_start_const = checkAndGetColumn<ColumnConst>(column_start.get()))
81
        {
82
            start_const = column_start_const->getInt(0);
83
        }
84

85
        if (number_of_arguments == 3)
86
        {
87
            column_length = arguments[2].column;
88
            if (const auto * column_length_const = checkAndGetColumn<ColumnConst>(column_length.get()))
89
                length_const = column_length_const->getInt(0);
90
        }
91

92

93
        if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_string.get()))
94
            return executeForSource(column_start, column_length, start_const, length_const, StringSource(*col), input_rows_count);
95
        else if (const ColumnFixedString * col_fixed = checkAndGetColumn<ColumnFixedString>(column_string.get()))
96
            return executeForSource(
97
                column_start, column_length, start_const, length_const, FixedStringSource(*col_fixed), input_rows_count);
98
        else if (const ColumnConst * col_const = checkAndGetColumnConst<ColumnString>(column_string.get()))
99
            return executeForSource(
100
                column_start, column_length, start_const, length_const, ConstSource<StringSource>(*col_const), input_rows_count);
101
        else if (const ColumnConst * col_const_fixed = checkAndGetColumnConst<ColumnFixedString>(column_string.get()))
102
            return executeForSource(
103
                column_start, column_length, start_const, length_const, ConstSource<FixedStringSource>(*col_const_fixed), input_rows_count);
104
        else
105
            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",
106
                arguments[0].column->getName(), getName());
107
    }
108

109
    template <class Source>
110
    ColumnPtr executeForSource(
111
        const ColumnPtr & column_start,
112
        const ColumnPtr & column_length,
113
        std::optional<Int64> start_const,
114
        std::optional<Int64> length_const,
115
        Source && source,
116
        size_t input_rows_count) const
117
    {
118
        auto col_res = ColumnString::create();
119

120
        if (!column_length)
121
        {
122
            if (start_const)
123
            {
124
                Int64 start_value = start_const.value();
125
                if (start_value > 0)
126
                    bitSliceFromLeftConstantOffsetUnbounded(
127
                        source, StringSink(*col_res, input_rows_count), static_cast<size_t>(start_value - 1));
128
                else if (start_value < 0)
129
                    bitSliceFromRightConstantOffsetUnbounded(
130
                        source, StringSink(*col_res, input_rows_count), -static_cast<size_t>(start_value));
131
                else
132
                    throw Exception(ErrorCodes::ZERO_ARRAY_OR_TUPLE_INDEX, "Indices in strings are 1-based");
133
            }
134
            else
135
                bitSliceDynamicOffsetUnbounded(source, StringSink(*col_res, input_rows_count), *column_start);
136
        }
137
        else
138
        {
139
            if (start_const && length_const)
140
            {
141
                Int64 start_value = start_const.value();
142
                Int64 length_value = length_const.value();
143
                if (start_value > 0)
144
                    bitSliceFromLeftConstantOffsetBounded(
145
                        source, StringSink(*col_res, input_rows_count), static_cast<size_t>(start_value - 1), length_value);
146
                else if (start_value < 0)
147
                    bitSliceFromRightConstantOffsetBounded(
148
                        source, StringSink(*col_res, input_rows_count), -static_cast<size_t>(start_value), length_value);
149
                else
150
                    throw Exception(ErrorCodes::ZERO_ARRAY_OR_TUPLE_INDEX, "Indices in strings are 1-based");
151
            }
152
            else
153
                bitSliceDynamicOffsetBounded(source, StringSink(*col_res, input_rows_count), *column_start, *column_length);
154
        }
155

156
        return col_res;
157
    }
158

159
    void writeSliceWithLeftShift(const StringSource::Slice & slice, StringSink & sink, size_t shift_bit, size_t abandon_last_bit = 0) const
160
    {
161
        if (!shift_bit && !abandon_last_bit)
162
        {
163
            writeSlice(slice, sink);
164
            return;
165
        }
166
        size_t size = slice.size;
167
        if (!size)
168
            return;
169
        bool abandon_last_byte = abandon_last_bit + shift_bit >= word_size;
170
        if (abandon_last_byte) // shift may eliminate last byte
171
            size--;
172
        sink.elements.resize(sink.current_offset + size);
173
        UInt8 * out = &sink.elements[sink.current_offset];
174
        const UInt8 * input = slice.data;
175

176
        for (size_t i = 0; i < size - 1; i++)
177
        {
178
            out[i] = (input[i] << shift_bit) | (input[i + 1] >> (word_size - shift_bit));
179
        }
180
        if (abandon_last_byte)
181
        {
182
            out[size - 1] = (input[size - 1] << shift_bit) | (input[size] >> (word_size - shift_bit));
183
            out[size - 1] = out[size - 1] & (0xFF << (abandon_last_bit + shift_bit - word_size));
184
        }
185
        else
186
        {
187
            out[size - 1] = (input[size - 1] << shift_bit) & (0xFF << (abandon_last_bit + shift_bit));
188
        }
189

190

191
        sink.current_offset += size;
192
    }
193

194

195
    template <class Source>
196
    void bitSliceFromLeftConstantOffsetUnbounded(Source && src, StringSink && sink, size_t offset) const
197
    {
198
        size_t offset_byte = offset / word_size;
199
        size_t offset_bit = offset % word_size;
200
        while (!src.isEnd())
201
        {
202
            auto sl = src.getSliceFromLeft(offset_byte);
203
            if (sl.size)
204
                writeSliceWithLeftShift(sl, sink, offset_bit);
205

206
            sink.next();
207
            src.next();
208
        }
209
    }
210

211
    template <class Source>
212
    void bitSliceFromRightConstantOffsetUnbounded(Source && src, StringSink && sink, size_t offset) const
213
    {
214
        size_t offset_byte = offset / word_size;
215
        size_t offset_bit = (word_size - (offset % word_size)) % word_size; // offset_bit always represent left offset bit
216
        if (offset_bit)
217
            offset_byte++;
218
        while (!src.isEnd())
219
        {
220
            auto slice = src.getSliceFromRight(offset_byte);
221
            size_t size = src.getElementSize();
222
            bool left_truncate = offset_byte > size;
223
            size_t shift_bit = left_truncate ? 0 : offset_bit;
224
            if (slice.size)
225
                writeSliceWithLeftShift(slice, sink, shift_bit);
226

227
            sink.next();
228
            src.next();
229
        }
230
    }
231

232
    template <class Source>
233
    void bitSliceDynamicOffsetUnbounded(Source && src, StringSink && sink, const IColumn & offset_column) const
234
    {
235
        while (!src.isEnd())
236
        {
237
            auto row_num = src.rowNum();
238
            Int64 start = offset_column.getInt(row_num);
239
            if (start != 0)
240
            {
241
                typename std::decay_t<Source>::Slice slice;
242
                size_t shift_bit;
243

244
                if (start > 0)
245
                {
246
                    UInt64 offset = start - 1;
247
                    size_t offset_byte = offset / word_size;
248
                    size_t offset_bit = offset % word_size;
249
                    shift_bit = offset_bit;
250
                    slice = src.getSliceFromLeft(offset_byte);
251
                }
252
                else
253
                {
254
                    UInt64 offset = -static_cast<UInt64>(start);
255
                    size_t offset_byte = offset / word_size;
256
                    size_t offset_bit = (word_size - (offset % word_size)) % word_size; // offset_bit always represent left offset bit
257
                    if (offset_bit)
258
                        offset_byte++;
259
                    size_t size = src.getElementSize();
260
                    bool left_truncate = offset_byte > size;
261
                    shift_bit = left_truncate ? 0 : offset_bit;
262
                    slice = src.getSliceFromRight(offset_byte);
263
                }
264
                if (slice.size)
265
                    writeSliceWithLeftShift(slice, sink, shift_bit);
266
            }
267

268
            sink.next();
269
            src.next();
270
        }
271
    }
272

273
    template <class Source>
274
    void bitSliceFromLeftConstantOffsetBounded(Source && src, StringSink && sink, size_t offset, ssize_t length) const
275
    {
276
        size_t offset_byte = offset / word_size;
277
        size_t offset_bit = offset % word_size;
278
        size_t shift_bit = offset_bit;
279
        size_t length_byte = 0;
280
        size_t over_bit = 0;
281
        if (length > 0)
282
        {
283
            length_byte = (length + offset_bit) / word_size;
284
            over_bit = (length + offset_bit) % word_size;
285
            if (over_bit && (length_byte || over_bit > offset_bit)) // begin and end are not in same byte OR there are gaps
286
                length_byte++;
287
        }
288

289
        while (!src.isEnd())
290
        {
291
            ssize_t remain_byte = src.getElementSize() - offset_byte;
292
            if (length < 0)
293
            {
294
                length_byte = std::max(remain_byte + (length / word_size), 0z);
295
                over_bit = word_size + (length % word_size);
296
                if (length_byte == 1 && over_bit <= offset_bit) // begin and end are in same byte AND there are no gaps
297
                    length_byte = 0;
298
            }
299
            bool right_truncate = static_cast<ssize_t>(length_byte) > remain_byte;
300
            size_t abandon_last_bit = (over_bit && !right_truncate) ? word_size - over_bit : 0;
301
            auto slice = src.getSliceFromLeft(offset_byte, length_byte);
302
            if (slice.size)
303
                writeSliceWithLeftShift(slice, sink, shift_bit, abandon_last_bit);
304

305
            sink.next();
306
            src.next();
307
        }
308
    }
309

310

311
    template <class Source>
312
    void bitSliceFromRightConstantOffsetBounded(Source && src, StringSink && sink, size_t offset, ssize_t length) const
313
    {
314
        size_t offset_byte = offset / word_size;
315
        size_t offset_bit = (word_size - (offset % word_size)) % word_size; // offset_bit always represent left offset bit
316
        if (offset_bit)
317
            offset_byte++;
318
        size_t length_byte = 0;
319
        size_t over_bit = 0;
320
        if (length > 0)
321
        {
322
            length_byte = (length + offset_bit) / word_size;
323
            over_bit = (length + offset_bit) % word_size;
324
            if (over_bit && (length_byte || over_bit > offset_bit)) // begin and end are not in same byte OR there are gaps
325
                length_byte++;
326
        }
327

328
        while (!src.isEnd())
329
        {
330
            size_t size = src.getElementSize();
331
            if (length < 0)
332
            {
333
                length_byte = std::max(static_cast<ssize_t>(offset_byte) + (length / word_size), 0z);
334
                over_bit = word_size + (length % word_size);
335
                if (length_byte == 1 && over_bit <= offset_bit) // begin and end are in same byte AND there are no gaps
336
                    length_byte = 0;
337
            }
338
            bool left_truncate = offset_byte > size;
339
            bool right_truncate = length_byte > offset_byte;
340
            size_t shift_bit = left_truncate ? 0 : offset_bit;
341
            size_t abandon_last_bit = (over_bit && !right_truncate) ? word_size - over_bit : 0;
342
            auto slice = src.getSliceFromRight(offset_byte, length_byte);
343
            if (slice.size)
344
                writeSliceWithLeftShift(slice, sink, shift_bit, abandon_last_bit);
345

346
            sink.next();
347
            src.next();
348
        }
349
    }
350

351
    template <class Source>
352
    void bitSliceDynamicOffsetBounded(Source && src, StringSink && sink, const IColumn & offset_column, const IColumn & length_column) const
353
    {
354
        while (!src.isEnd())
355
        {
356
            size_t row_num = src.rowNum();
357
            Int64 start = offset_column.getInt(row_num);
358
            Int64 length = length_column.getInt(row_num);
359

360
            if (start && length)
361
            {
362
                bool left_offset = start > 0;
363
                size_t offset = left_offset ? static_cast<size_t>(start - 1) : -static_cast<size_t>(start);
364
                size_t size = src.getElementSize();
365

366
                size_t offset_byte;
367
                size_t offset_bit;
368
                size_t shift_bit;
369
                if (left_offset)
370
                {
371
                    offset_byte = offset / word_size;
372
                    offset_bit = offset % word_size;
373
                    shift_bit = offset_bit;
374
                }
375
                else
376
                {
377
                    offset_byte = offset / word_size;
378
                    offset_bit = (word_size - (offset % word_size)) % word_size; // offset_bit always represent left offset bit
379
                    if (offset_bit)
380
                        offset_byte++;
381
                    bool left_truncate = offset_byte > size;
382
                    shift_bit = left_truncate ? 0 : offset_bit;
383
                }
384

385
                ssize_t remain_byte = left_offset ? size - offset_byte : offset_byte;
386

387
                size_t length_byte;
388
                size_t over_bit;
389
                if (length > 0)
390
                {
391
                    length_byte = (length + offset_bit) / word_size;
392
                    over_bit = (length + offset_bit) % word_size;
393
                    if (over_bit && (length_byte || (over_bit > offset_bit))) // begin and end are not in same byte OR there are gaps
394
                        length_byte++;
395
                }
396
                else
397
                {
398
                    length_byte = std::max(remain_byte + (static_cast<ssize_t>(length) / word_size), 0z);
399
                    over_bit = word_size + (length % word_size);
400
                    if (length_byte == 1 && over_bit <= offset_bit) // begin and end are in same byte AND there are no gaps
401
                        length_byte = 0;
402
                }
403

404
                bool right_truncate = static_cast<ssize_t>(length_byte) > remain_byte;
405
                size_t abandon_last_bit = (over_bit && !right_truncate) ? word_size - over_bit : 0;
406
                auto slice = left_offset ? src.getSliceFromLeft(offset_byte, length_byte) : src.getSliceFromRight(offset_byte, length_byte);
407
                if (slice.size)
408
                    writeSliceWithLeftShift(slice, sink, shift_bit, abandon_last_bit);
409
            }
410

411
            sink.next();
412
            src.next();
413
        }
414
    }
415
};
416

417

418
REGISTER_FUNCTION(BitSlice)
419
{
420
    factory.registerFunction<FunctionBitSlice>();
421
}
422

423

424
}
425

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.