ClickHouse

Форк
0
195 строк · 7.6 Кб
1
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
2
#include <Processors/QueryPlan/ExpressionStep.h>
3
#include <Processors/QueryPlan/FilterStep.h>
4
#include <Processors/QueryPlan/SourceStepWithFilter.h>
5
#include <Storages/MergeTree/MergeTreeWhereOptimizer.h>
6
#include <Storages/StorageDummy.h>
7
#include <Interpreters/ActionsDAG.h>
8
#include <Functions/FunctionsLogical.h>
9
#include <Functions/IFunctionAdaptors.h>
10

11
namespace DB
12
{
13

14
namespace QueryPlanOptimizations
15
{
16

17
static void removeFromOutput(ActionsDAG & dag, const std::string name)
18
{
19
    const auto * node = &dag.findInOutputs(name);
20
    auto & outputs = dag.getOutputs();
21
    for (size_t i = 0; i < outputs.size(); ++i)
22
    {
23
        if (node == outputs[i])
24
        {
25
            outputs.erase(outputs.begin() + i);
26
            return;
27
        }
28
    }
29
}
30

31
void optimizePrewhere(Stack & stack, QueryPlan::Nodes &)
32
{
33
    if (stack.size() < 3)
34
        return;
35

36
    auto & frame = stack.back();
37

38
    /** Assume that on stack there are at least 3 nodes:
39
      *
40
      * 1. SomeNode
41
      * 2. FilterNode
42
      * 3. SourceStepWithFilterNode
43
      */
44
    auto * source_step_with_filter = dynamic_cast<SourceStepWithFilter *>(frame.node->step.get());
45
    if (!source_step_with_filter)
46
        return;
47

48
    const auto & storage_snapshot = source_step_with_filter->getStorageSnapshot();
49
    const auto & storage = storage_snapshot->storage;
50
    if (!storage.canMoveConditionsToPrewhere())
51
        return;
52

53
    const auto & storage_prewhere_info = source_step_with_filter->getPrewhereInfo();
54
    if (storage_prewhere_info && storage_prewhere_info->prewhere_actions)
55
        return;
56

57
    /// TODO: We can also check for UnionStep, such as StorageBuffer and local distributed plans.
58
    QueryPlan::Node * filter_node = (stack.rbegin() + 1)->node;
59
    const auto * filter_step = typeid_cast<FilterStep *>(filter_node->step.get());
60
    if (!filter_step)
61
        return;
62

63
    const auto & context = source_step_with_filter->getContext();
64
    const auto & settings = context->getSettingsRef();
65

66
    bool is_final = source_step_with_filter->isQueryWithFinal();
67
    bool optimize_move_to_prewhere = settings.optimize_move_to_prewhere && (!is_final || settings.optimize_move_to_prewhere_if_final);
68
    if (!optimize_move_to_prewhere)
69
        return;
70

71
    const auto & storage_metadata = storage_snapshot->metadata;
72
    auto column_sizes = storage.getColumnSizes();
73
    if (column_sizes.empty())
74
        return;
75

76
    /// Extract column compressed sizes
77
    std::unordered_map<std::string, UInt64> column_compressed_sizes;
78
    for (const auto & [name, sizes] : column_sizes)
79
        column_compressed_sizes[name] = sizes.data_compressed;
80

81
    Names queried_columns = source_step_with_filter->requiredSourceColumns();
82

83
    MergeTreeWhereOptimizer where_optimizer{
84
        std::move(column_compressed_sizes),
85
        storage_metadata,
86
        storage.getConditionEstimatorByPredicate(source_step_with_filter->getQueryInfo(), storage_snapshot, context),
87
        queried_columns,
88
        storage.supportedPrewhereColumns(),
89
        getLogger("QueryPlanOptimizePrewhere")};
90

91
    auto optimize_result = where_optimizer.optimize(filter_step->getExpression(),
92
        filter_step->getFilterColumnName(),
93
        source_step_with_filter->getContext(),
94
        is_final);
95

96
    if (optimize_result.prewhere_nodes.empty())
97
        return;
98

99
    PrewhereInfoPtr prewhere_info;
100
    if (storage_prewhere_info)
101
        prewhere_info = storage_prewhere_info->clone();
102
    else
103
        prewhere_info = std::make_shared<PrewhereInfo>();
104

105
    prewhere_info->need_filter = true;
106
    prewhere_info->remove_prewhere_column = optimize_result.fully_moved_to_prewhere && filter_step->removesFilterColumn();
107

108
    auto filter_expression = filter_step->getExpression();
109
    const auto & filter_column_name = filter_step->getFilterColumnName();
110

111
    if (prewhere_info->remove_prewhere_column)
112
    {
113
        removeFromOutput(*filter_expression, filter_column_name);
114
        auto & outputs = filter_expression->getOutputs();
115
        size_t size = outputs.size();
116
        outputs.insert(outputs.end(), optimize_result.prewhere_nodes.begin(), optimize_result.prewhere_nodes.end());
117
        filter_expression->removeUnusedActions(false);
118
        outputs.resize(size);
119
    }
120

121
    auto split_result = filter_step->getExpression()->split(optimize_result.prewhere_nodes, true);
122

123
    /// This is the leak of abstraction.
124
    /// Splited actions may have inputs which are needed only for PREWHERE.
125
    /// This is fine for ActionsDAG to have such a split, but it breaks defaults calculation.
126
    ///
127
    /// See 00950_default_prewhere for example.
128
    /// Table has structure `APIKey UInt8, SessionType UInt8` and default `OperatingSystem = SessionType+1`
129
    /// For a query with `SELECT OperatingSystem WHERE APIKey = 42 AND SessionType = 42` we push everything to PREWHERE
130
    /// and columns APIKey, SessionType are removed from inputs (cause only OperatingSystem is needed).
131
    /// However, column OperatingSystem is calculated after PREWHERE stage, based on SessionType value.
132
    /// If column SessionType is removed by PREWHERE actions, we use zero as default, and get a wrong result.
133
    ///
134
    /// So, here we restore removed inputs for PREWHERE actions
135
    {
136
        std::unordered_set<const ActionsDAG::Node *> first_outputs(
137
            split_result.first->getOutputs().begin(), split_result.first->getOutputs().end());
138
        for (const auto * input : split_result.first->getInputs())
139
        {
140
            if (!first_outputs.contains(input))
141
            {
142
                split_result.first->getOutputs().push_back(input);
143
                /// Add column to second actions as input.
144
                /// Do not add it to result, so it would be removed.
145
                split_result.second->addInput(input->result_name, input->result_type);
146
            }
147
        }
148
    }
149

150
    ActionsDAG::NodeRawConstPtrs conditions;
151
    conditions.reserve(split_result.split_nodes_mapping.size());
152
    for (const auto * condition : optimize_result.prewhere_nodes_list)
153
        conditions.push_back(split_result.split_nodes_mapping.at(condition));
154

155
    prewhere_info->prewhere_actions = std::move(split_result.first);
156
    prewhere_info->remove_prewhere_column = optimize_result.fully_moved_to_prewhere && filter_step->removesFilterColumn();
157

158
    if (conditions.size() == 1)
159
    {
160
        prewhere_info->prewhere_column_name = conditions.front()->result_name;
161
        if (prewhere_info->remove_prewhere_column)
162
            prewhere_info->prewhere_actions->getOutputs().push_back(conditions.front());
163
    }
164
    else
165
    {
166
        prewhere_info->remove_prewhere_column = true;
167

168
        FunctionOverloadResolverPtr func_builder_and = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionAnd>());
169
        const auto * node = &prewhere_info->prewhere_actions->addFunction(func_builder_and, std::move(conditions), {});
170
        prewhere_info->prewhere_column_name = node->result_name;
171
        prewhere_info->prewhere_actions->getOutputs().push_back(node);
172
    }
173

174
    source_step_with_filter->updatePrewhereInfo(prewhere_info);
175

176
    if (!optimize_result.fully_moved_to_prewhere)
177
    {
178
        filter_node->step = std::make_unique<FilterStep>(
179
            source_step_with_filter->getOutputStream(),
180
            std::move(split_result.second),
181
            filter_step->getFilterColumnName(),
182
            filter_step->removesFilterColumn());
183
    }
184
    else
185
    {
186
        /// Have to keep this expression to change column names to column identifiers
187
        filter_node->step = std::make_unique<ExpressionStep>(
188
            source_step_with_filter->getOutputStream(),
189
            std::move(split_result.second));
190
    }
191
}
192

193
}
194

195
}
196

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.