ClickHouse
182 строки · 6.5 Кб
1#include <Functions/FunctionHelpers.h>
2#include <Functions/FunctionFactory.h>
3
4#include <BridgeHelper/CatBoostLibraryBridgeHelper.h>
5#include <BridgeHelper/IBridgeHelper.h>
6#include <Columns/ColumnNullable.h>
7#include <Columns/ColumnString.h>
8#include <Columns/ColumnTuple.h>
9#include <Columns/ColumnsNumber.h>
10#include <Common/assert_cast.h>
11#include <DataTypes/DataTypeNullable.h>
12#include <DataTypes/DataTypeTuple.h>
13#include <DataTypes/DataTypesNumber.h>
14#include <Functions/IFunction.h>
15#include <Interpreters/Context.h>
16#include <Interpreters/Context_fwd.h>
17
18
19namespace DB
20{
21
22namespace ErrorCodes
23{
24extern const int FILE_DOESNT_EXIST;
25extern const int ILLEGAL_TYPE_OF_ARGUMENT;
26extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
27extern const int ILLEGAL_COLUMN;
28}
29
30/// Evaluate CatBoost model.
31/// - Arguments: float features first, then categorical features.
32/// - Result: Float64.
33class FunctionCatBoostEvaluate final : public IFunction, WithContext
34{
35private:
36mutable std::unique_ptr<CatBoostLibraryBridgeHelper> bridge_helper;
37
38public:
39static constexpr auto name = "catboostEvaluate";
40
41static FunctionPtr create(ContextPtr context_) { return std::make_shared<FunctionCatBoostEvaluate>(context_); }
42
43explicit FunctionCatBoostEvaluate(ContextPtr context_) : WithContext(context_) {}
44String getName() const override { return name; }
45bool isVariadic() const override { return true; }
46bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
47bool isDeterministic() const override { return false; }
48bool useDefaultImplementationForNulls() const override { return false; }
49size_t getNumberOfArguments() const override { return 0; }
50
51void initBridge(const ColumnConst * name_col) const
52{
53String library_path = getContext()->getConfigRef().getString("catboost_lib_path");
54if (!std::filesystem::exists(library_path))
55throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Can't load library {}: file doesn't exist", library_path);
56
57String model_path = name_col->getValue<String>();
58if (!std::filesystem::exists(model_path))
59throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Can't load model {}: file doesn't exist", model_path);
60
61bridge_helper = std::make_unique<CatBoostLibraryBridgeHelper>(getContext(), model_path, library_path);
62}
63
64DataTypePtr getReturnTypeFromLibraryBridge() const
65{
66size_t tree_count = bridge_helper->getTreeCount();
67auto type = std::make_shared<DataTypeFloat64>();
68if (tree_count == 1)
69return type;
70
71DataTypes types(tree_count, type);
72
73return std::make_shared<DataTypeTuple>(types);
74}
75
76DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
77{
78if (arguments.size() < 2)
79throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} expects at least 2 arguments", getName());
80
81if (!isString(arguments[0].type))
82throw Exception(
83ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
84"Illegal type {} of first argument of function {}, expected a string.", arguments[0].type->getName(), getName());
85
86const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[0].column.get());
87if (!name_col)
88throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument of function {} must be a constant string", getName());
89
90initBridge(name_col);
91
92auto type = getReturnTypeFromLibraryBridge();
93
94bool has_nullable = false;
95for (size_t i = 1; i < arguments.size(); ++i)
96has_nullable = has_nullable || arguments[i].type->isNullable();
97
98if (has_nullable)
99{
100if (const auto * tuple = typeid_cast<const DataTypeTuple *>(type.get()))
101{
102auto elements = tuple->getElements();
103for (auto & element : elements)
104element = makeNullable(element);
105
106type = std::make_shared<DataTypeTuple>(elements);
107}
108else
109type = makeNullable(type);
110}
111
112return type;
113}
114
115ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
116{
117const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[0].column.get());
118if (!name_col)
119throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument of function {} must be a constant string", getName());
120
121ColumnRawPtrs column_ptrs;
122Columns materialized_columns;
123ColumnPtr null_map;
124
125ColumnsWithTypeAndName feature_arguments(arguments.begin() + 1, arguments.end());
126for (auto & arg : feature_arguments)
127{
128if (auto full_column = arg.column->convertToFullColumnIfConst())
129{
130materialized_columns.push_back(full_column);
131arg.column = full_column;
132}
133if (const auto * col_nullable = checkAndGetColumn<ColumnNullable>(&*arg.column))
134{
135if (!null_map)
136null_map = col_nullable->getNullMapColumnPtr();
137else
138{
139auto mut_null_map = IColumn::mutate(std::move(null_map));
140
141NullMap & result_null_map = assert_cast<ColumnUInt8 &>(*mut_null_map).getData();
142const NullMap & src_null_map = col_nullable->getNullMapColumn().getData();
143
144for (size_t i = 0, size = result_null_map.size(); i < size; ++i)
145if (src_null_map[i])
146result_null_map[i] = 1;
147
148null_map = std::move(mut_null_map);
149}
150
151arg.column = col_nullable->getNestedColumn().getPtr();
152arg.type = static_cast<const DataTypeNullable &>(*arg.type).getNestedType();
153}
154}
155
156auto res = bridge_helper->evaluate(feature_arguments);
157
158if (null_map)
159{
160if (const auto * tuple = typeid_cast<const ColumnTuple *>(res.get()))
161{
162auto nested = tuple->getColumns();
163for (auto & col : nested)
164col = ColumnNullable::create(col, null_map);
165
166res = ColumnTuple::create(nested);
167}
168else
169res = ColumnNullable::create(res, null_map);
170}
171
172return res;
173}
174};
175
176
177REGISTER_FUNCTION(CatBoostEvaluate)
178{
179factory.registerFunction<FunctionCatBoostEvaluate>();
180}
181
182}
183