ClickHouse
424 строки · 16.8 Кб
1#include <Columns/ColumnConst.h>2#include <Columns/ColumnFixedString.h>3#include <Columns/ColumnString.h>4#include <DataTypes/DataTypeString.h>5#include <Functions/FunctionFactory.h>6#include <Functions/FunctionHelpers.h>7#include <Functions/GatherUtils/Algorithms.h>8#include <Functions/GatherUtils/Sinks.h>9#include <Functions/GatherUtils/Slices.h>10#include <Functions/GatherUtils/Sources.h>11#include <Functions/IFunction.h>12#include <IO/WriteHelpers.h>13
14namespace DB15{
16using namespace GatherUtils;17
18namespace ErrorCodes19{
20extern const int ILLEGAL_COLUMN;21extern const int ILLEGAL_TYPE_OF_ARGUMENT;22extern const int ZERO_ARRAY_OR_TUPLE_INDEX;23extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;24}
25
26class FunctionBitSlice : public IFunction27{
28const UInt8 word_size = 8;29
30public:31static constexpr auto name = "bitSlice";32static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionBitSlice>(); }33
34String getName() const override { return name; }35
36bool isVariadic() const override { return true; }37size_t getNumberOfArguments() const override { return 0; }38
39bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }40
41bool useDefaultImplementationForConstants() const override { return true; }42
43DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override44{45const size_t number_of_arguments = arguments.size();46
47if (number_of_arguments < 2 || number_of_arguments > 3)48throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,49"Number of arguments for function {} doesn't match: passed {}, should be 2 or 3",50getName(), number_of_arguments);51
52if (!isString(arguments[0]) && !isStringOrFixedString(arguments[0]))53throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}",54arguments[0]->getName(), getName());55if (arguments[0]->onlyNull())56return arguments[0];57
58if (!isNativeNumber(arguments[1]))59throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of second argument of function {}",60arguments[1]->getName(), getName());61
62if (number_of_arguments == 3 && !isNativeNumber(arguments[2]))63throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of second argument of function {}",64arguments[2]->getName(), getName());65
66return std::make_shared<DataTypeString>();67}68
69ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override70{71size_t number_of_arguments = arguments.size();72
73ColumnPtr column_string = arguments[0].column;74ColumnPtr column_start = arguments[1].column;75ColumnPtr column_length;76
77std::optional<Int64> start_const;78std::optional<Int64> length_const;79
80if (const auto * column_start_const = checkAndGetColumn<ColumnConst>(column_start.get()))81{82start_const = column_start_const->getInt(0);83}84
85if (number_of_arguments == 3)86{87column_length = arguments[2].column;88if (const auto * column_length_const = checkAndGetColumn<ColumnConst>(column_length.get()))89length_const = column_length_const->getInt(0);90}91
92
93if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_string.get()))94return executeForSource(column_start, column_length, start_const, length_const, StringSource(*col), input_rows_count);95else if (const ColumnFixedString * col_fixed = checkAndGetColumn<ColumnFixedString>(column_string.get()))96return executeForSource(97column_start, column_length, start_const, length_const, FixedStringSource(*col_fixed), input_rows_count);98else if (const ColumnConst * col_const = checkAndGetColumnConst<ColumnString>(column_string.get()))99return executeForSource(100column_start, column_length, start_const, length_const, ConstSource<StringSource>(*col_const), input_rows_count);101else if (const ColumnConst * col_const_fixed = checkAndGetColumnConst<ColumnFixedString>(column_string.get()))102return executeForSource(103column_start, column_length, start_const, length_const, ConstSource<FixedStringSource>(*col_const_fixed), input_rows_count);104else105throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",106arguments[0].column->getName(), getName());107}108
109template <class Source>110ColumnPtr executeForSource(111const ColumnPtr & column_start,112const ColumnPtr & column_length,113std::optional<Int64> start_const,114std::optional<Int64> length_const,115Source && source,116size_t input_rows_count) const117{118auto col_res = ColumnString::create();119
120if (!column_length)121{122if (start_const)123{124Int64 start_value = start_const.value();125if (start_value > 0)126bitSliceFromLeftConstantOffsetUnbounded(127source, StringSink(*col_res, input_rows_count), static_cast<size_t>(start_value - 1));128else if (start_value < 0)129bitSliceFromRightConstantOffsetUnbounded(130source, StringSink(*col_res, input_rows_count), -static_cast<size_t>(start_value));131else132throw Exception(ErrorCodes::ZERO_ARRAY_OR_TUPLE_INDEX, "Indices in strings are 1-based");133}134else135bitSliceDynamicOffsetUnbounded(source, StringSink(*col_res, input_rows_count), *column_start);136}137else138{139if (start_const && length_const)140{141Int64 start_value = start_const.value();142Int64 length_value = length_const.value();143if (start_value > 0)144bitSliceFromLeftConstantOffsetBounded(145source, StringSink(*col_res, input_rows_count), static_cast<size_t>(start_value - 1), length_value);146else if (start_value < 0)147bitSliceFromRightConstantOffsetBounded(148source, StringSink(*col_res, input_rows_count), -static_cast<size_t>(start_value), length_value);149else150throw Exception(ErrorCodes::ZERO_ARRAY_OR_TUPLE_INDEX, "Indices in strings are 1-based");151}152else153bitSliceDynamicOffsetBounded(source, StringSink(*col_res, input_rows_count), *column_start, *column_length);154}155
156return col_res;157}158
159void writeSliceWithLeftShift(const StringSource::Slice & slice, StringSink & sink, size_t shift_bit, size_t abandon_last_bit = 0) const160{161if (!shift_bit && !abandon_last_bit)162{163writeSlice(slice, sink);164return;165}166size_t size = slice.size;167if (!size)168return;169bool abandon_last_byte = abandon_last_bit + shift_bit >= word_size;170if (abandon_last_byte) // shift may eliminate last byte171size--;172sink.elements.resize(sink.current_offset + size);173UInt8 * out = &sink.elements[sink.current_offset];174const UInt8 * input = slice.data;175
176for (size_t i = 0; i < size - 1; i++)177{178out[i] = (input[i] << shift_bit) | (input[i + 1] >> (word_size - shift_bit));179}180if (abandon_last_byte)181{182out[size - 1] = (input[size - 1] << shift_bit) | (input[size] >> (word_size - shift_bit));183out[size - 1] = out[size - 1] & (0xFF << (abandon_last_bit + shift_bit - word_size));184}185else186{187out[size - 1] = (input[size - 1] << shift_bit) & (0xFF << (abandon_last_bit + shift_bit));188}189
190
191sink.current_offset += size;192}193
194
195template <class Source>196void bitSliceFromLeftConstantOffsetUnbounded(Source && src, StringSink && sink, size_t offset) const197{198size_t offset_byte = offset / word_size;199size_t offset_bit = offset % word_size;200while (!src.isEnd())201{202auto sl = src.getSliceFromLeft(offset_byte);203if (sl.size)204writeSliceWithLeftShift(sl, sink, offset_bit);205
206sink.next();207src.next();208}209}210
211template <class Source>212void bitSliceFromRightConstantOffsetUnbounded(Source && src, StringSink && sink, size_t offset) const213{214size_t offset_byte = offset / word_size;215size_t offset_bit = (word_size - (offset % word_size)) % word_size; // offset_bit always represent left offset bit216if (offset_bit)217offset_byte++;218while (!src.isEnd())219{220auto slice = src.getSliceFromRight(offset_byte);221size_t size = src.getElementSize();222bool left_truncate = offset_byte > size;223size_t shift_bit = left_truncate ? 0 : offset_bit;224if (slice.size)225writeSliceWithLeftShift(slice, sink, shift_bit);226
227sink.next();228src.next();229}230}231
232template <class Source>233void bitSliceDynamicOffsetUnbounded(Source && src, StringSink && sink, const IColumn & offset_column) const234{235while (!src.isEnd())236{237auto row_num = src.rowNum();238Int64 start = offset_column.getInt(row_num);239if (start != 0)240{241typename std::decay_t<Source>::Slice slice;242size_t shift_bit;243
244if (start > 0)245{246UInt64 offset = start - 1;247size_t offset_byte = offset / word_size;248size_t offset_bit = offset % word_size;249shift_bit = offset_bit;250slice = src.getSliceFromLeft(offset_byte);251}252else253{254UInt64 offset = -static_cast<UInt64>(start);255size_t offset_byte = offset / word_size;256size_t offset_bit = (word_size - (offset % word_size)) % word_size; // offset_bit always represent left offset bit257if (offset_bit)258offset_byte++;259size_t size = src.getElementSize();260bool left_truncate = offset_byte > size;261shift_bit = left_truncate ? 0 : offset_bit;262slice = src.getSliceFromRight(offset_byte);263}264if (slice.size)265writeSliceWithLeftShift(slice, sink, shift_bit);266}267
268sink.next();269src.next();270}271}272
273template <class Source>274void bitSliceFromLeftConstantOffsetBounded(Source && src, StringSink && sink, size_t offset, ssize_t length) const275{276size_t offset_byte = offset / word_size;277size_t offset_bit = offset % word_size;278size_t shift_bit = offset_bit;279size_t length_byte = 0;280size_t over_bit = 0;281if (length > 0)282{283length_byte = (length + offset_bit) / word_size;284over_bit = (length + offset_bit) % word_size;285if (over_bit && (length_byte || over_bit > offset_bit)) // begin and end are not in same byte OR there are gaps286length_byte++;287}288
289while (!src.isEnd())290{291ssize_t remain_byte = src.getElementSize() - offset_byte;292if (length < 0)293{294length_byte = std::max(remain_byte + (length / word_size), 0z);295over_bit = word_size + (length % word_size);296if (length_byte == 1 && over_bit <= offset_bit) // begin and end are in same byte AND there are no gaps297length_byte = 0;298}299bool right_truncate = static_cast<ssize_t>(length_byte) > remain_byte;300size_t abandon_last_bit = (over_bit && !right_truncate) ? word_size - over_bit : 0;301auto slice = src.getSliceFromLeft(offset_byte, length_byte);302if (slice.size)303writeSliceWithLeftShift(slice, sink, shift_bit, abandon_last_bit);304
305sink.next();306src.next();307}308}309
310
311template <class Source>312void bitSliceFromRightConstantOffsetBounded(Source && src, StringSink && sink, size_t offset, ssize_t length) const313{314size_t offset_byte = offset / word_size;315size_t offset_bit = (word_size - (offset % word_size)) % word_size; // offset_bit always represent left offset bit316if (offset_bit)317offset_byte++;318size_t length_byte = 0;319size_t over_bit = 0;320if (length > 0)321{322length_byte = (length + offset_bit) / word_size;323over_bit = (length + offset_bit) % word_size;324if (over_bit && (length_byte || over_bit > offset_bit)) // begin and end are not in same byte OR there are gaps325length_byte++;326}327
328while (!src.isEnd())329{330size_t size = src.getElementSize();331if (length < 0)332{333length_byte = std::max(static_cast<ssize_t>(offset_byte) + (length / word_size), 0z);334over_bit = word_size + (length % word_size);335if (length_byte == 1 && over_bit <= offset_bit) // begin and end are in same byte AND there are no gaps336length_byte = 0;337}338bool left_truncate = offset_byte > size;339bool right_truncate = length_byte > offset_byte;340size_t shift_bit = left_truncate ? 0 : offset_bit;341size_t abandon_last_bit = (over_bit && !right_truncate) ? word_size - over_bit : 0;342auto slice = src.getSliceFromRight(offset_byte, length_byte);343if (slice.size)344writeSliceWithLeftShift(slice, sink, shift_bit, abandon_last_bit);345
346sink.next();347src.next();348}349}350
351template <class Source>352void bitSliceDynamicOffsetBounded(Source && src, StringSink && sink, const IColumn & offset_column, const IColumn & length_column) const353{354while (!src.isEnd())355{356size_t row_num = src.rowNum();357Int64 start = offset_column.getInt(row_num);358Int64 length = length_column.getInt(row_num);359
360if (start && length)361{362bool left_offset = start > 0;363size_t offset = left_offset ? static_cast<size_t>(start - 1) : -static_cast<size_t>(start);364size_t size = src.getElementSize();365
366size_t offset_byte;367size_t offset_bit;368size_t shift_bit;369if (left_offset)370{371offset_byte = offset / word_size;372offset_bit = offset % word_size;373shift_bit = offset_bit;374}375else376{377offset_byte = offset / word_size;378offset_bit = (word_size - (offset % word_size)) % word_size; // offset_bit always represent left offset bit379if (offset_bit)380offset_byte++;381bool left_truncate = offset_byte > size;382shift_bit = left_truncate ? 0 : offset_bit;383}384
385ssize_t remain_byte = left_offset ? size - offset_byte : offset_byte;386
387size_t length_byte;388size_t over_bit;389if (length > 0)390{391length_byte = (length + offset_bit) / word_size;392over_bit = (length + offset_bit) % word_size;393if (over_bit && (length_byte || (over_bit > offset_bit))) // begin and end are not in same byte OR there are gaps394length_byte++;395}396else397{398length_byte = std::max(remain_byte + (static_cast<ssize_t>(length) / word_size), 0z);399over_bit = word_size + (length % word_size);400if (length_byte == 1 && over_bit <= offset_bit) // begin and end are in same byte AND there are no gaps401length_byte = 0;402}403
404bool right_truncate = static_cast<ssize_t>(length_byte) > remain_byte;405size_t abandon_last_bit = (over_bit && !right_truncate) ? word_size - over_bit : 0;406auto slice = left_offset ? src.getSliceFromLeft(offset_byte, length_byte) : src.getSliceFromRight(offset_byte, length_byte);407if (slice.size)408writeSliceWithLeftShift(slice, sink, shift_bit, abandon_last_bit);409}410
411sink.next();412src.next();413}414}415};416
417
418REGISTER_FUNCTION(BitSlice)419{
420factory.registerFunction<FunctionBitSlice>();421}
422
423
424}
425