ClickHouse
1027 строк · 40.6 Кб
1#include <Planner/PlannerActionsVisitor.h>
2
3#include <Analyzer/Utils.h>
4#include <Analyzer/SetUtils.h>
5#include <Analyzer/ConstantNode.h>
6#include <Analyzer/FunctionNode.h>
7#include <Analyzer/ColumnNode.h>
8#include <Analyzer/LambdaNode.h>
9#include <Analyzer/SortNode.h>
10#include <Analyzer/WindowNode.h>
11#include <Analyzer/QueryNode.h>
12
13#include <DataTypes/FieldToDataType.h>
14#include <DataTypes/DataTypeSet.h>
15
16#include <Common/FieldVisitorToString.h>
17#include <DataTypes/DataTypeTuple.h>
18
19#include <Columns/ColumnSet.h>
20#include <Columns/ColumnConst.h>
21
22#include <Functions/FunctionsMiscellaneous.h>
23#include <Functions/FunctionFactory.h>
24#include <Functions/indexHint.h>
25
26#include <Interpreters/ExpressionActions.h>
27#include <Interpreters/Context.h>
28
29#include <Planner/PlannerContext.h>
30#include <Planner/TableExpressionData.h>
31#include <Planner/Utils.h>
32
33
34namespace DB
35{
36
37namespace ErrorCodes
38{
39extern const int UNSUPPORTED_METHOD;
40extern const int LOGICAL_ERROR;
41extern const int BAD_ARGUMENTS;
42extern const int INCORRECT_QUERY;
43}
44
45namespace
46{
47
48/* Calculates Action node name for ConstantNode.
49*
50* If converting to AST will add a '_CAST' function call,
51* the result action name will also include it.
52*/
53String calculateActionNodeNameWithCastIfNeeded(const ConstantNode & constant_node)
54{
55WriteBufferFromOwnString buffer;
56if (constant_node.requiresCastCall())
57buffer << "_CAST(";
58
59buffer << calculateConstantActionNodeName(constant_node.getValue(), constant_node.getResultType());
60
61if (constant_node.requiresCastCall())
62{
63buffer << ", '" << constant_node.getResultType()->getName() << "'_String)";
64}
65
66return buffer.str();
67}
68
69class ActionNodeNameHelper
70{
71public:
72ActionNodeNameHelper(QueryTreeNodeToName & node_to_name_,
73const PlannerContext & planner_context_,
74bool use_column_identifier_as_action_node_name_)
75: node_to_name(node_to_name_)
76, planner_context(planner_context_)
77, use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_)
78{
79}
80
81String calculateActionNodeName(const QueryTreeNodePtr & node)
82{
83auto it = node_to_name.find(node);
84if (it != node_to_name.end())
85return it->second;
86
87String result;
88auto node_type = node->getNodeType();
89
90switch (node_type)
91{
92case QueryTreeNodeType::COLUMN:
93{
94const ColumnIdentifier * column_identifier = nullptr;
95if (use_column_identifier_as_action_node_name)
96column_identifier = planner_context.getColumnNodeIdentifierOrNull(node);
97
98if (column_identifier)
99{
100result = *column_identifier;
101}
102else
103{
104const auto & column_node = node->as<ColumnNode &>();
105result = column_node.getColumnName();
106}
107
108break;
109}
110case QueryTreeNodeType::CONSTANT:
111{
112const auto & constant_node = node->as<ConstantNode &>();
113/* To ensure that headers match during distributed query we need to simulate action node naming on
114* secondary servers. If we don't do that headers will mismatch due to constant folding.
115*
116* +--------+
117* -----------------| Server |----------------
118* / +--------+ \
119* / \
120* v v
121* +-----------+ +-----------+
122* | Initiator | ------ | Secondary |------
123* +-----------+ / +-----------+ \
124* | / \
125* | / \
126* v / \
127* +---------------+ v v
128* | Wrap in _CAST | +----------------------------+ +----------------------+
129* | if needed | | Constant folded from _CAST | | Constant folded from |
130* +---------------+ +----------------------------+ | another expression |
131* | +----------------------+
132* v |
133* +----------------------------+ v
134* | Name ConstantNode the same | +--------------------------+
135* | as on initiator server | | Generate action name for |
136* | (wrap in _CAST if needed) | | original expression |
137* +----------------------------+ +--------------------------+
138*/
139if (planner_context.isASTLevelOptimizationAllowed())
140{
141result = calculateActionNodeNameWithCastIfNeeded(constant_node);
142}
143else
144{
145// Need to check if constant folded from QueryNode until https://github.com/ClickHouse/ClickHouse/issues/60847 is fixed.
146if (constant_node.hasSourceExpression() && constant_node.getSourceExpression()->getNodeType() != QueryTreeNodeType::QUERY)
147{
148if (constant_node.receivedFromInitiatorServer())
149result = calculateActionNodeNameWithCastIfNeeded(constant_node);
150else
151result = calculateActionNodeName(constant_node.getSourceExpression());
152}
153else
154result = calculateConstantActionNodeName(constant_node.getValue(), constant_node.getResultType());
155}
156break;
157}
158case QueryTreeNodeType::FUNCTION:
159{
160const auto & function_node = node->as<FunctionNode &>();
161if (function_node.getFunctionName() == "__actionName")
162{
163result = toString(function_node.getArguments().getNodes().at(1)->as<ConstantNode>()->getValue());
164break;
165}
166
167String in_function_second_argument_node_name;
168
169if (isNameOfInFunction(function_node.getFunctionName()))
170{
171const auto & in_first_argument_node = function_node.getArguments().getNodes().at(0);
172const auto & in_second_argument_node = function_node.getArguments().getNodes().at(1);
173in_function_second_argument_node_name = PlannerContext::createSetKey(in_first_argument_node->getResultType(), in_second_argument_node);
174}
175
176WriteBufferFromOwnString buffer;
177buffer << function_node.getFunctionName();
178
179const auto & function_parameters_nodes = function_node.getParameters().getNodes();
180
181if (!function_parameters_nodes.empty())
182{
183buffer << '(';
184
185size_t function_parameters_nodes_size = function_parameters_nodes.size();
186for (size_t i = 0; i < function_parameters_nodes_size; ++i)
187{
188const auto & function_parameter_node = function_parameters_nodes[i];
189buffer << calculateActionNodeName(function_parameter_node);
190
191if (i + 1 != function_parameters_nodes_size)
192buffer << ", ";
193}
194
195buffer << ')';
196}
197
198const auto & function_arguments_nodes = function_node.getArguments().getNodes();
199String function_argument_name;
200
201buffer << '(';
202
203size_t function_arguments_nodes_size = function_arguments_nodes.size();
204for (size_t i = 0; i < function_arguments_nodes_size; ++i)
205{
206if (i == 1 && !in_function_second_argument_node_name.empty())
207{
208function_argument_name = in_function_second_argument_node_name;
209}
210else
211{
212const auto & function_argument_node = function_arguments_nodes[i];
213function_argument_name = calculateActionNodeName(function_argument_node);
214}
215
216buffer << function_argument_name;
217
218if (i + 1 != function_arguments_nodes_size)
219buffer << ", ";
220}
221
222buffer << ')';
223
224if (function_node.isWindowFunction())
225{
226buffer << " OVER (";
227buffer << calculateWindowNodeActionName(function_node.getWindowNode());
228buffer << ')';
229}
230
231result = buffer.str();
232break;
233}
234case QueryTreeNodeType::LAMBDA:
235{
236auto lambda_hash = node->getTreeHash();
237result = "__lambda_" + toString(lambda_hash);
238break;
239}
240default:
241{
242throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid action query tree node {}", node->formatASTForErrorMessage());
243}
244}
245
246node_to_name.emplace(node, result);
247
248return result;
249}
250
251static String calculateConstantActionNodeName(const Field & constant_literal, const DataTypePtr & constant_type)
252{
253auto constant_name = applyVisitor(FieldVisitorToString(), constant_literal);
254return constant_name + "_" + constant_type->getName();
255}
256
257static String calculateConstantActionNodeName(const Field & constant_literal)
258{
259return calculateConstantActionNodeName(constant_literal, applyVisitor(FieldToDataType(), constant_literal));
260}
261
262String calculateWindowNodeActionName(const QueryTreeNodePtr & node)
263{
264auto & window_node = node->as<WindowNode &>();
265WriteBufferFromOwnString buffer;
266
267if (window_node.hasPartitionBy())
268{
269buffer << "PARTITION BY ";
270
271auto & partition_by_nodes = window_node.getPartitionBy().getNodes();
272size_t partition_by_nodes_size = partition_by_nodes.size();
273
274for (size_t i = 0; i < partition_by_nodes_size; ++i)
275{
276auto & partition_by_node = partition_by_nodes[i];
277buffer << calculateActionNodeName(partition_by_node);
278if (i + 1 != partition_by_nodes_size)
279buffer << ", ";
280}
281}
282
283if (window_node.hasOrderBy())
284{
285if (window_node.hasPartitionBy())
286buffer << ' ';
287
288buffer << "ORDER BY ";
289
290auto & order_by_nodes = window_node.getOrderBy().getNodes();
291size_t order_by_nodes_size = order_by_nodes.size();
292
293for (size_t i = 0; i < order_by_nodes_size; ++i)
294{
295auto & sort_node = order_by_nodes[i]->as<SortNode &>();
296buffer << calculateActionNodeName(sort_node.getExpression());
297
298auto sort_direction = sort_node.getSortDirection();
299buffer << (sort_direction == SortDirection::ASCENDING ? " ASC" : " DESC");
300
301auto nulls_sort_direction = sort_node.getNullsSortDirection();
302
303if (nulls_sort_direction)
304buffer << " NULLS " << (nulls_sort_direction == sort_direction ? "LAST" : "FIRST");
305
306if (auto collator = sort_node.getCollator())
307buffer << " COLLATE " << collator->getLocale();
308
309if (sort_node.withFill())
310{
311buffer << " WITH FILL";
312
313if (sort_node.hasFillFrom())
314buffer << " FROM " << calculateActionNodeName(sort_node.getFillFrom());
315
316if (sort_node.hasFillTo())
317buffer << " TO " << calculateActionNodeName(sort_node.getFillTo());
318
319if (sort_node.hasFillStep())
320buffer << " STEP " << calculateActionNodeName(sort_node.getFillStep());
321}
322
323if (i + 1 != order_by_nodes_size)
324buffer << ", ";
325}
326}
327
328auto & window_frame = window_node.getWindowFrame();
329if (!window_frame.is_default)
330{
331if (window_node.hasPartitionBy() || window_node.hasOrderBy())
332buffer << ' ';
333
334buffer << window_frame.type << " BETWEEN ";
335if (window_frame.begin_type == WindowFrame::BoundaryType::Current)
336{
337buffer << "CURRENT ROW";
338}
339else if (window_frame.begin_type == WindowFrame::BoundaryType::Unbounded)
340{
341buffer << "UNBOUNDED";
342buffer << " " << (window_frame.begin_preceding ? "PRECEDING" : "FOLLOWING");
343}
344else
345{
346buffer << calculateActionNodeName(window_node.getFrameBeginOffsetNode());
347buffer << " " << (window_frame.begin_preceding ? "PRECEDING" : "FOLLOWING");
348}
349
350buffer << " AND ";
351
352if (window_frame.end_type == WindowFrame::BoundaryType::Current)
353{
354buffer << "CURRENT ROW";
355}
356else if (window_frame.end_type == WindowFrame::BoundaryType::Unbounded)
357{
358buffer << "UNBOUNDED";
359buffer << " " << (window_frame.end_preceding ? "PRECEDING" : "FOLLOWING");
360}
361else
362{
363buffer << calculateActionNodeName(window_node.getFrameEndOffsetNode());
364buffer << " " << (window_frame.end_preceding ? "PRECEDING" : "FOLLOWING");
365}
366}
367
368return buffer.str();
369}
370private:
371std::unordered_map<QueryTreeNodePtr, std::string> & node_to_name;
372const PlannerContext & planner_context;
373bool use_column_identifier_as_action_node_name = true;
374};
375
376class ActionsScopeNode
377{
378public:
379explicit ActionsScopeNode(ActionsDAGPtr actions_dag_, QueryTreeNodePtr scope_node_)
380: actions_dag(std::move(actions_dag_))
381, scope_node(std::move(scope_node_))
382{
383for (const auto & node : actions_dag->getNodes())
384node_name_to_node[node.result_name] = &node;
385}
386
387const QueryTreeNodePtr & getScopeNode() const
388{
389return scope_node;
390}
391
392[[maybe_unused]] bool containsNode(const std::string & node_name)
393{
394return node_name_to_node.find(node_name) != node_name_to_node.end();
395}
396
397[[maybe_unused]] bool containsInputNode(const std::string & node_name)
398{
399const auto * node = tryGetNode(node_name);
400if (node && node->type == ActionsDAG::ActionType::INPUT)
401return true;
402
403return false;
404}
405
406[[maybe_unused]] const ActionsDAG::Node * tryGetNode(const std::string & node_name)
407{
408auto it = node_name_to_node.find(node_name);
409if (it == node_name_to_node.end())
410return {};
411
412return it->second;
413}
414
415const ActionsDAG::Node * getNodeOrThrow(const std::string & node_name)
416{
417auto it = node_name_to_node.find(node_name);
418if (it == node_name_to_node.end())
419throw Exception(ErrorCodes::LOGICAL_ERROR,
420"No node with name {}. There are only nodes {}",
421node_name,
422actions_dag->dumpNames());
423
424return it->second;
425}
426
427const ActionsDAG::Node * addInputColumnIfNecessary(const std::string & node_name, const DataTypePtr & column_type)
428{
429auto it = node_name_to_node.find(node_name);
430if (it != node_name_to_node.end())
431return it->second;
432
433const auto * node = &actions_dag->addInput(node_name, column_type);
434node_name_to_node[node->result_name] = node;
435
436return node;
437}
438
439const ActionsDAG::Node * addInputConstantColumnIfNecessary(const std::string & node_name, const ColumnWithTypeAndName & column)
440{
441auto it = node_name_to_node.find(node_name);
442if (it != node_name_to_node.end())
443return it->second;
444
445const auto * node = &actions_dag->addInput(column);
446node_name_to_node[node->result_name] = node;
447
448return node;
449}
450
451const ActionsDAG::Node * addConstantIfNecessary(const std::string & node_name, const ColumnWithTypeAndName & column)
452{
453auto it = node_name_to_node.find(node_name);
454if (it != node_name_to_node.end())
455return it->second;
456
457const auto * node = &actions_dag->addColumn(column);
458node_name_to_node[node->result_name] = node;
459
460return node;
461}
462
463template <typename FunctionOrOverloadResolver>
464const ActionsDAG::Node * addFunctionIfNecessary(const std::string & node_name, ActionsDAG::NodeRawConstPtrs children, const FunctionOrOverloadResolver & function)
465{
466auto it = node_name_to_node.find(node_name);
467if (it != node_name_to_node.end())
468return it->second;
469
470const auto * node = &actions_dag->addFunction(function, children, node_name);
471node_name_to_node[node->result_name] = node;
472
473return node;
474}
475
476const ActionsDAG::Node * addArrayJoinIfNecessary(const std::string & node_name, const ActionsDAG::Node * child)
477{
478auto it = node_name_to_node.find(node_name);
479if (it != node_name_to_node.end())
480return it->second;
481
482const auto * node = &actions_dag->addArrayJoin(*child, node_name);
483node_name_to_node[node->result_name] = node;
484
485return node;
486}
487
488private:
489std::unordered_map<std::string_view, const ActionsDAG::Node *> node_name_to_node;
490ActionsDAGPtr actions_dag;
491QueryTreeNodePtr scope_node;
492};
493
494class PlannerActionsVisitorImpl
495{
496public:
497PlannerActionsVisitorImpl(ActionsDAGPtr actions_dag,
498const PlannerContextPtr & planner_context_,
499bool use_column_identifier_as_action_node_name_);
500
501ActionsDAG::NodeRawConstPtrs visit(QueryTreeNodePtr expression_node);
502
503private:
504
505class Levels
506{
507public:
508explicit Levels(size_t level) { set(level); }
509
510void set(size_t level)
511{
512check(level);
513if (level)
514mask |= (uint64_t(1) << (level - 1));
515}
516
517void reset(size_t level)
518{
519check(level);
520if (level)
521mask &= ~(uint64_t(1) << (level - 1));
522}
523
524void add(Levels levels) { mask |= levels.mask; }
525
526size_t max() const { return 64 - getLeadingZeroBits(mask); }
527
528private:
529uint64_t mask = 0;
530
531void check(size_t level)
532{
533if (level > 64)
534throw Exception(ErrorCodes::INCORRECT_QUERY, "Maximum lambda depth exceeded. Maximum 64.");
535}
536};
537
538using NodeNameAndNodeMinLevel = std::pair<std::string, Levels>;
539
540NodeNameAndNodeMinLevel visitImpl(QueryTreeNodePtr node);
541
542NodeNameAndNodeMinLevel visitColumn(const QueryTreeNodePtr & node);
543
544NodeNameAndNodeMinLevel visitConstant(const QueryTreeNodePtr & node);
545
546NodeNameAndNodeMinLevel visitLambda(const QueryTreeNodePtr & node);
547
548NodeNameAndNodeMinLevel makeSetForInFunction(const QueryTreeNodePtr & node);
549
550NodeNameAndNodeMinLevel visitIndexHintFunction(const QueryTreeNodePtr & node);
551
552NodeNameAndNodeMinLevel visitFunction(const QueryTreeNodePtr & node);
553
554std::vector<ActionsScopeNode> actions_stack;
555std::unordered_map<QueryTreeNodePtr, std::string> node_to_node_name;
556const PlannerContextPtr planner_context;
557ActionNodeNameHelper action_node_name_helper;
558bool use_column_identifier_as_action_node_name;
559};
560
561PlannerActionsVisitorImpl::PlannerActionsVisitorImpl(ActionsDAGPtr actions_dag,
562const PlannerContextPtr & planner_context_,
563bool use_column_identifier_as_action_node_name_)
564: planner_context(planner_context_)
565, action_node_name_helper(node_to_node_name, *planner_context, use_column_identifier_as_action_node_name_)
566, use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_)
567{
568actions_stack.emplace_back(std::move(actions_dag), nullptr);
569}
570
571ActionsDAG::NodeRawConstPtrs PlannerActionsVisitorImpl::visit(QueryTreeNodePtr expression_node)
572{
573ActionsDAG::NodeRawConstPtrs result;
574
575if (auto * expression_list_node = expression_node->as<ListNode>())
576{
577for (auto & node : expression_list_node->getNodes())
578{
579auto [node_name, _] = visitImpl(node);
580result.push_back(actions_stack.front().getNodeOrThrow(node_name));
581}
582}
583else
584{
585auto [node_name, _] = visitImpl(expression_node);
586result.push_back(actions_stack.front().getNodeOrThrow(node_name));
587}
588
589return result;
590}
591
592PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitImpl(QueryTreeNodePtr node)
593{
594auto node_type = node->getNodeType();
595
596if (node_type == QueryTreeNodeType::COLUMN)
597return visitColumn(node);
598else if (node_type == QueryTreeNodeType::CONSTANT)
599return visitConstant(node);
600else if (node_type == QueryTreeNodeType::FUNCTION)
601return visitFunction(node);
602
603throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
604"Expected column, constant, function. Actual {} with type: {}",
605node->formatASTForErrorMessage(), node_type);
606}
607
608PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitColumn(const QueryTreeNodePtr & node)
609{
610auto column_node_name = action_node_name_helper.calculateActionNodeName(node);
611const auto & column_node = node->as<ColumnNode &>();
612if (column_node.hasExpression() && !use_column_identifier_as_action_node_name)
613return visitImpl(column_node.getExpression());
614Int64 actions_stack_size = static_cast<Int64>(actions_stack.size() - 1);
615for (Int64 i = actions_stack_size; i >= 0; --i)
616{
617actions_stack[i].addInputColumnIfNecessary(column_node_name, column_node.getColumnType());
618
619auto column_source = column_node.getColumnSourceOrNull();
620if (column_source &&
621column_source->getNodeType() == QueryTreeNodeType::LAMBDA &&
622actions_stack[i].getScopeNode().get() == column_source.get())
623{
624return {column_node_name, Levels(i)};
625}
626}
627
628return {column_node_name, Levels(0)};
629}
630
631PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitConstant(const QueryTreeNodePtr & node)
632{
633const auto & constant_node = node->as<ConstantNode &>();
634const auto & constant_literal = constant_node.getValue();
635const auto & constant_type = constant_node.getResultType();
636
637auto constant_node_name = [&]()
638{
639/* To ensure that headers match during distributed query we need to simulate action node naming on
640* secondary servers. If we don't do that headers will mismatch due to constant folding.
641*
642* +--------+
643* -----------------| Server |----------------
644* / +--------+ \
645* / \
646* v v
647* +-----------+ +-----------+
648* | Initiator | ------ | Secondary |------
649* +-----------+ / +-----------+ \
650* | / \
651* | / \
652* v / \
653* +---------------+ v v
654* | Wrap in _CAST | +----------------------------+ +----------------------+
655* | if needed | | Constant folded from _CAST | | Constant folded from |
656* +---------------+ +----------------------------+ | another expression |
657* | +----------------------+
658* v |
659* +----------------------------+ v
660* | Name ConstantNode the same | +--------------------------+
661* | as on initiator server | | Generate action name for |
662* | (wrap in _CAST if needed) | | original expression |
663* +----------------------------+ +--------------------------+
664*/
665if (planner_context->isASTLevelOptimizationAllowed())
666{
667return calculateActionNodeNameWithCastIfNeeded(constant_node);
668}
669else
670{
671// Need to check if constant folded from QueryNode until https://github.com/ClickHouse/ClickHouse/issues/60847 is fixed.
672if (constant_node.hasSourceExpression() && constant_node.getSourceExpression()->getNodeType() != QueryTreeNodeType::QUERY)
673{
674if (constant_node.receivedFromInitiatorServer())
675return calculateActionNodeNameWithCastIfNeeded(constant_node);
676else
677return action_node_name_helper.calculateActionNodeName(constant_node.getSourceExpression());
678}
679else
680return calculateConstantActionNodeName(constant_literal, constant_type);
681}
682}();
683
684ColumnWithTypeAndName column;
685column.name = constant_node_name;
686column.type = constant_type;
687column.column = column.type->createColumnConst(1, constant_literal);
688
689actions_stack[0].addConstantIfNecessary(constant_node_name, column);
690
691size_t actions_stack_size = actions_stack.size();
692for (size_t i = 1; i < actions_stack_size; ++i)
693{
694auto & actions_stack_node = actions_stack[i];
695actions_stack_node.addInputConstantColumnIfNecessary(constant_node_name, column);
696}
697
698return {constant_node_name, Levels(0)};
699
700}
701
702PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitLambda(const QueryTreeNodePtr & node)
703{
704auto & lambda_node = node->as<LambdaNode &>();
705auto result_type = lambda_node.getResultType();
706if (!result_type)
707throw Exception(ErrorCodes::LOGICAL_ERROR,
708"Lambda {} is not resolved during query analysis",
709lambda_node.formatASTForErrorMessage());
710
711auto & lambda_arguments_nodes = lambda_node.getArguments().getNodes();
712size_t lambda_arguments_nodes_size = lambda_arguments_nodes.size();
713
714NamesAndTypesList lambda_arguments_names_and_types;
715
716for (size_t i = 0; i < lambda_arguments_nodes_size; ++i)
717{
718const auto & lambda_argument_name = lambda_node.getArgumentNames().at(i);
719auto lambda_argument_type = lambda_arguments_nodes[i]->getResultType();
720lambda_arguments_names_and_types.emplace_back(lambda_argument_name, std::move(lambda_argument_type));
721}
722
723auto lambda_actions_dag = std::make_shared<ActionsDAG>();
724actions_stack.emplace_back(lambda_actions_dag, node);
725
726auto [lambda_expression_node_name, levels] = visitImpl(lambda_node.getExpression());
727lambda_actions_dag->getOutputs().push_back(actions_stack.back().getNodeOrThrow(lambda_expression_node_name));
728lambda_actions_dag->removeUnusedActions(Names(1, lambda_expression_node_name));
729
730auto expression_actions_settings = ExpressionActionsSettings::fromContext(planner_context->getQueryContext(), CompileExpressions::yes);
731auto lambda_actions = std::make_shared<ExpressionActions>(lambda_actions_dag, expression_actions_settings);
732
733Names captured_column_names;
734ActionsDAG::NodeRawConstPtrs lambda_children;
735Names required_column_names = lambda_actions->getRequiredColumns();
736
737actions_stack.pop_back();
738levels.reset(actions_stack.size());
739size_t level = levels.max();
740
741const auto & lambda_argument_names = lambda_node.getArgumentNames();
742
743for (const auto & required_column_name : required_column_names)
744{
745auto it = std::find(lambda_argument_names.begin(), lambda_argument_names.end(), required_column_name);
746
747if (it == lambda_argument_names.end())
748{
749lambda_children.push_back(actions_stack[level].getNodeOrThrow(required_column_name));
750captured_column_names.push_back(required_column_name);
751}
752}
753
754auto lambda_node_name = calculateActionNodeName(node, *planner_context);
755auto function_capture = std::make_shared<FunctionCaptureOverloadResolver>(
756lambda_actions, captured_column_names, lambda_arguments_names_and_types, lambda_node.getExpression()->getResultType(), lambda_expression_node_name);
757
758// TODO: Pass IFunctionBase here not FunctionCaptureOverloadResolver.
759const auto * actions_node = actions_stack[level].addFunctionIfNecessary(lambda_node_name, std::move(lambda_children), function_capture);
760
761if (!result_type->equals(*actions_node->result_type))
762throw Exception(ErrorCodes::LOGICAL_ERROR,
763"Lambda resolved type {} is not equal to type from actions DAG {}",
764result_type, actions_node->result_type);
765
766size_t actions_stack_size = actions_stack.size();
767for (size_t i = level + 1; i < actions_stack_size; ++i)
768{
769auto & actions_stack_node = actions_stack[i];
770actions_stack_node.addInputColumnIfNecessary(lambda_node_name, result_type);
771}
772
773return {lambda_node_name, levels};
774}
775
776PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::makeSetForInFunction(const QueryTreeNodePtr & node)
777{
778const auto & function_node = node->as<FunctionNode &>();
779auto in_first_argument = function_node.getArguments().getNodes().at(0);
780auto in_second_argument = function_node.getArguments().getNodes().at(1);
781
782DataTypes set_element_types;
783
784auto in_second_argument_node_type = in_second_argument->getNodeType();
785
786bool subquery_or_table =
787in_second_argument_node_type == QueryTreeNodeType::QUERY ||
788in_second_argument_node_type == QueryTreeNodeType::UNION ||
789in_second_argument_node_type == QueryTreeNodeType::TABLE;
790
791FutureSetPtr set;
792auto set_key = in_second_argument->getTreeHash();
793
794if (!subquery_or_table)
795{
796set_element_types = {in_first_argument->getResultType()};
797const auto * left_tuple_type = typeid_cast<const DataTypeTuple *>(set_element_types.front().get());
798if (left_tuple_type && left_tuple_type->getElements().size() != 1)
799set_element_types = left_tuple_type->getElements();
800
801set_element_types = Set::getElementTypes(std::move(set_element_types), planner_context->getQueryContext()->getSettingsRef().transform_null_in);
802set = planner_context->getPreparedSets().findTuple(set_key, set_element_types);
803}
804else
805{
806set = planner_context->getPreparedSets().findSubquery(set_key);
807if (!set)
808set = planner_context->getPreparedSets().findStorage(set_key);
809}
810
811if (!set)
812throw Exception(ErrorCodes::LOGICAL_ERROR,
813"No set is registered for key {}",
814PreparedSets::toString(set_key, set_element_types));
815
816ColumnWithTypeAndName column;
817column.name = planner_context->createSetKey(in_first_argument->getResultType(), in_second_argument);
818column.type = std::make_shared<DataTypeSet>();
819
820bool set_is_created = set->get() != nullptr;
821auto column_set = ColumnSet::create(1, std::move(set));
822
823if (set_is_created)
824column.column = ColumnConst::create(std::move(column_set), 1);
825else
826column.column = std::move(column_set);
827
828actions_stack[0].addConstantIfNecessary(column.name, column);
829
830size_t actions_stack_size = actions_stack.size();
831for (size_t i = 1; i < actions_stack_size; ++i)
832{
833auto & actions_stack_node = actions_stack[i];
834actions_stack_node.addInputConstantColumnIfNecessary(column.name, column);
835}
836
837return {column.name, Levels(0)};
838}
839
840PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitIndexHintFunction(const QueryTreeNodePtr & node)
841{
842const auto & function_node = node->as<FunctionNode &>();
843auto function_node_name = action_node_name_helper.calculateActionNodeName(node);
844
845auto index_hint_actions_dag = std::make_shared<ActionsDAG>();
846auto & index_hint_actions_dag_outputs = index_hint_actions_dag->getOutputs();
847std::unordered_set<std::string_view> index_hint_actions_dag_output_node_names;
848PlannerActionsVisitor actions_visitor(planner_context);
849
850for (const auto & argument : function_node.getArguments())
851{
852auto index_hint_argument_expression_dag_nodes = actions_visitor.visit(index_hint_actions_dag, argument);
853
854for (auto & expression_dag_node : index_hint_argument_expression_dag_nodes)
855{
856if (index_hint_actions_dag_output_node_names.contains(expression_dag_node->result_name))
857continue;
858
859index_hint_actions_dag_output_node_names.insert(expression_dag_node->result_name);
860index_hint_actions_dag_outputs.push_back(expression_dag_node);
861}
862}
863
864auto index_hint_function = std::make_shared<FunctionIndexHint>();
865index_hint_function->setActions(std::move(index_hint_actions_dag));
866auto index_hint_function_overload_resolver = std::make_shared<FunctionToOverloadResolverAdaptor>(std::move(index_hint_function));
867
868size_t index_hint_function_level = actions_stack.size() - 1;
869actions_stack[index_hint_function_level].addFunctionIfNecessary(function_node_name, {}, index_hint_function_overload_resolver);
870
871return {function_node_name, Levels(index_hint_function_level)};
872}
873
874PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitFunction(const QueryTreeNodePtr & node)
875{
876const auto & function_node = node->as<FunctionNode &>();
877if (function_node.getFunctionName() == "indexHint")
878return visitIndexHintFunction(node);
879
880std::optional<NodeNameAndNodeMinLevel> in_function_second_argument_node_name_with_level;
881
882if (isNameOfInFunction(function_node.getFunctionName()))
883in_function_second_argument_node_name_with_level = makeSetForInFunction(node);
884
885auto function_node_name = action_node_name_helper.calculateActionNodeName(node);
886
887/* Aggregate functions, window functions, and GROUP BY expressions were already analyzed in the previous steps.
888* If we have already visited some expression, we don't need to revisit it or its arguments again.
889* For example, the expression from the aggregation step is also present in the projection:
890* SELECT foo(a, b, c) as x FROM table GROUP BY foo(a, b, c)
891* In this case we should not analyze `a`, `b`, `c` again.
892* Moreover, it can lead to an error if we have arrayJoin in the arguments because it will be calculated twice.
893*/
894bool is_input_node = function_node.isAggregateFunction() || function_node.isWindowFunction()
895|| actions_stack.front().containsInputNode(function_node_name);
896if (is_input_node)
897{
898size_t actions_stack_size = actions_stack.size();
899
900for (size_t i = 0; i < actions_stack_size; ++i)
901{
902auto & actions_stack_node = actions_stack[i];
903actions_stack_node.addInputColumnIfNecessary(function_node_name, function_node.getResultType());
904}
905
906return {function_node_name, Levels(0)};
907}
908
909const auto & function_arguments = function_node.getArguments().getNodes();
910size_t function_arguments_size = function_arguments.size();
911
912Names function_arguments_node_names;
913function_arguments_node_names.reserve(function_arguments_size);
914
915Levels levels(0);
916for (size_t function_argument_index = 0; function_argument_index < function_arguments_size; ++function_argument_index)
917{
918if (in_function_second_argument_node_name_with_level && function_argument_index == 1)
919{
920auto & [node_name, node_levels] = *in_function_second_argument_node_name_with_level;
921function_arguments_node_names.push_back(std::move(node_name));
922levels.add(node_levels);
923continue;
924}
925
926const auto & argument = function_arguments[function_argument_index];
927
928if (argument->getNodeType() == QueryTreeNodeType::LAMBDA)
929{
930auto [node_name, node_levels] = visitLambda(argument);
931function_arguments_node_names.push_back(std::move(node_name));
932levels.add(node_levels);
933continue;
934}
935
936auto [node_name, node_levels] = visitImpl(argument);
937function_arguments_node_names.push_back(std::move(node_name));
938levels.add(node_levels);
939}
940
941ActionsDAG::NodeRawConstPtrs children;
942children.reserve(function_arguments_size);
943
944size_t level = levels.max();
945for (auto & function_argument_node_name : function_arguments_node_names)
946children.push_back(actions_stack[level].getNodeOrThrow(function_argument_node_name));
947
948if (function_node.getFunctionName() == "arrayJoin")
949{
950if (level != 0)
951throw Exception(ErrorCodes::BAD_ARGUMENTS,
952"Expression in arrayJoin cannot depend on lambda argument: {} ",
953function_arguments_node_names.at(0));
954
955actions_stack[level].addArrayJoinIfNecessary(function_node_name, children.at(0));
956}
957else
958{
959actions_stack[level].addFunctionIfNecessary(function_node_name, children, function_node);
960}
961
962size_t actions_stack_size = actions_stack.size();
963for (size_t i = level + 1; i < actions_stack_size; ++i)
964{
965auto & actions_stack_node = actions_stack[i];
966actions_stack_node.addInputColumnIfNecessary(function_node_name, function_node.getResultType());
967}
968
969return {function_node_name, levels};
970}
971
972}
973
974PlannerActionsVisitor::PlannerActionsVisitor(const PlannerContextPtr & planner_context_, bool use_column_identifier_as_action_node_name_)
975: planner_context(planner_context_)
976, use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_)
977{}
978
979ActionsDAG::NodeRawConstPtrs PlannerActionsVisitor::visit(ActionsDAGPtr actions_dag, QueryTreeNodePtr expression_node)
980{
981PlannerActionsVisitorImpl actions_visitor_impl(actions_dag, planner_context, use_column_identifier_as_action_node_name);
982return actions_visitor_impl.visit(expression_node);
983}
984
985String calculateActionNodeName(const QueryTreeNodePtr & node,
986const PlannerContext & planner_context,
987QueryTreeNodeToName & node_to_name,
988bool use_column_identifier_as_action_node_name)
989{
990ActionNodeNameHelper helper(node_to_name, planner_context, use_column_identifier_as_action_node_name);
991return helper.calculateActionNodeName(node);
992}
993
994String calculateActionNodeName(const QueryTreeNodePtr & node, const PlannerContext & planner_context, bool use_column_identifier_as_action_node_name)
995{
996QueryTreeNodeToName empty_map;
997ActionNodeNameHelper helper(empty_map, planner_context, use_column_identifier_as_action_node_name);
998return helper.calculateActionNodeName(node);
999}
1000
1001String calculateConstantActionNodeName(const Field & constant_literal, const DataTypePtr & constant_type)
1002{
1003return ActionNodeNameHelper::calculateConstantActionNodeName(constant_literal, constant_type);
1004}
1005
1006String calculateConstantActionNodeName(const Field & constant_literal)
1007{
1008return ActionNodeNameHelper::calculateConstantActionNodeName(constant_literal);
1009}
1010
1011String calculateWindowNodeActionName(const QueryTreeNodePtr & node,
1012const PlannerContext & planner_context,
1013QueryTreeNodeToName & node_to_name,
1014bool use_column_identifier_as_action_node_name)
1015{
1016ActionNodeNameHelper helper(node_to_name, planner_context, use_column_identifier_as_action_node_name);
1017return helper.calculateWindowNodeActionName(node);
1018}
1019
1020String calculateWindowNodeActionName(const QueryTreeNodePtr & node, const PlannerContext & planner_context, bool use_column_identifier_as_action_node_name)
1021{
1022QueryTreeNodeToName empty_map;
1023ActionNodeNameHelper helper(empty_map, planner_context, use_column_identifier_as_action_node_name);
1024return helper.calculateWindowNodeActionName(node);
1025}
1026
1027}
1028