Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/Core/Settings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2110,6 +2110,11 @@ DECLARE(BoolAuto, query_plan_join_swap_table, Field("auto"), R"(
DECLARE(Bool, query_plan_join_shard_by_pk_ranges, false, R"(
Apply sharding for JOIN if join keys contain a prefix of PRIMARY KEY for both tables. Supported for hash, parallel_hash and full_sorting_merge algorithms. Usually does not speed up queries but may lower memory consumption.
)", 0) \
\
DECLARE(Bool, enable_alias_marker, true, R"(
Enable __aliasMarker injection for ALIAS column expressions when using the analyzer.
This stabilizes action node names across planner/analyzer stages without changing query semantics.
)", 0) \
\
DECLARE(UInt64, preferred_block_size_bytes, 1000000, R"(
This setting adjusts the data block size for query processing and represents additional fine-tuning to the more rough 'max_block_size' setting. If the columns are large and with 'max_block_size' rows the block size is likely to be larger than the specified amount of bytes, its size will be lowered for better CPU cache locality.
Expand Down
1 change: 1 addition & 0 deletions src/Core/SettingsChangesHistory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ const VersionToSettingsChangesMap & getSettingsChangesHistory()
{"iceberg_timezone_for_timestamptz", "UTC", "UTC", "New setting."},
{"hybrid_table_auto_cast_columns", true, true, "New setting to automatically cast Hybrid table columns when segments disagree on types. Default enabled."},
{"allow_experimental_hybrid_table", false, false, "Added new setting to allow the Hybrid table engine."},
{"enable_alias_marker", true, true, "New setting."},
{"input_format_parquet_use_native_reader_v3", false, true, "Seems stable"},
{"input_format_parquet_verify_checksums", true, true, "New setting."},
{"output_format_parquet_write_checksums", false, true, "New setting."},
Expand Down
18 changes: 18 additions & 0 deletions src/Functions/identity.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <Functions/identity.h>
#include <Functions/FunctionFactory.h>
#include <Common/FunctionDocumentation.h>

namespace DB
{
Expand All @@ -19,4 +20,21 @@ REGISTER_FUNCTION(ActionName)
factory.registerFunction<FunctionActionName>();
}

REGISTER_FUNCTION(AliasMarker)
{
factory.registerFunction<FunctionAliasMarker>(FunctionDocumentation{
.description = R"(
Internal function that marks ALIAS column expressions for the analyzer. Not intended for direct use.
)",
.syntax = {"__aliasMarker(expr, alias_name)"},
.arguments = {
{"expr", "Expression to mark.", {"Any"}},
{"alias_name", "Alias name attached to the expression.", {"String"}},
},
.returned_value = {"Returns expr unchanged.", {"Any"}},
.introduced_in = {25, 8},
.category = FunctionDocumentation::Category::Other,
});
}

}
34 changes: 34 additions & 0 deletions src/Functions/identity.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,4 +79,38 @@ class FunctionActionName : public FunctionIdentityBase<ActionNameName>
}
};

struct AliasMarkerName
{
static constexpr auto name = "__aliasMarker";
};

class FunctionAliasMarker : public IFunction
{
public:
static constexpr auto name = AliasMarkerName::name;
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionAliasMarker>(); }

String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 2; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
bool isSuitableForConstantFolding() const override { return false; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }

DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (arguments.size() != 2)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function __aliasMarker expects 2 arguments");

if (!WhichDataType(arguments[1]).isString())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function __aliasMarker is internal and should not be used directly");

return arguments.front();
}

ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
{
return arguments.front().column;
}
};

}
3 changes: 3 additions & 0 deletions src/Interpreters/ActionsVisitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -923,6 +923,9 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
if (node.name == "lambda")
throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Unexpected lambda expression");

if (node.name == "__aliasMarker")
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function __aliasMarker is internal and supported only with the analyzer");

/// Function arrayJoin.
if (node.name == "arrayJoin")
{
Expand Down
65 changes: 64 additions & 1 deletion src/Planner/PlannerActionsVisitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <Analyzer/WindowNode.h>

#include <DataTypes/DataTypeSet.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/FieldToDataType.h>

Expand Down Expand Up @@ -178,7 +179,25 @@ class ActionNodeNameHelper
case QueryTreeNodeType::FUNCTION:
{
const auto & function_node = node->as<FunctionNode &>();
if (function_node.getFunctionName() == "__actionName")
if (function_node.getFunctionName() == "__aliasMarker")
{
/// Perform sanity check, because user may call this function with unexpected arguments
const auto & function_argument_nodes = function_node.getArguments().getNodes();
if (function_argument_nodes.size() == 2)
{
if (const auto * second_argument = function_argument_nodes.at(1)->as<ConstantNode>())
{
if (isString(second_argument->getResultType()))
result = second_argument->getValue().safeGet<String>();
}
}

/// Empty node name is not allowed and leads to logical errors
if (result.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function __aliasMarker is internal and should not be used directly");
break;
}
else if (function_node.getFunctionName() == "__actionName")
{
/// Perform sanity check, because user may call this function with unexpected arguments
const auto & function_argument_nodes = function_node.getArguments().getNodes();
Expand Down Expand Up @@ -601,6 +620,18 @@ class ActionsScopeNode
return node;
}

const ActionsDAG::Node * addAliasIfNecessary(const std::string & node_name, const ActionsDAG::Node * child)
{
auto it = node_name_to_node.find(node_name);
if (it != node_name_to_node.end())
return it->second;

const auto * node = &actions_dag.addAlias(*child, node_name);
node_name_to_node[node->result_name] = node;

return node;
}

private:
std::unordered_map<std::string_view, const ActionsDAG::Node *> node_name_to_node;
ActionsDAG & actions_dag;
Expand Down Expand Up @@ -1062,6 +1093,38 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi
{
const auto & function_node = node->as<FunctionNode &>();

if (function_node.getFunctionName() == "__aliasMarker")
{
const auto & function_arguments = function_node.getArguments().getNodes();
if (function_arguments.size() != 2)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function __aliasMarker expects 2 arguments");

const auto * alias_id_node = function_arguments.at(1)->as<ConstantNode>();
if (!alias_id_node || !isString(alias_id_node->getResultType()))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function __aliasMarker is internal and should not be used directly");

const auto & alias_id = alias_id_node->getValue().safeGet<String>();
if (alias_id.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function __aliasMarker is internal and should not be used directly");

auto [child_name, levels] = visitImpl(function_arguments.at(0));
if (alias_id == child_name)
return {child_name, levels};

size_t level = levels.max();
const auto * child_node = actions_stack[level].getNodeOrThrow(child_name);
actions_stack[level].addAliasIfNecessary(alias_id, child_node);

size_t actions_stack_size = actions_stack.size();
for (size_t i = level + 1; i < actions_stack_size; ++i)
{
auto & actions_stack_node = actions_stack[i];
actions_stack_node.addInputColumnIfNecessary(alias_id, function_node.getResultType());
}

return {alias_id, levels};
}

if (function_node.getFunctionName() == "indexHint")
return visitIndexHintFunction(node);
if (function_node.getFunctionName() == "exists")
Expand Down
5 changes: 3 additions & 2 deletions src/Planner/PlannerJoinTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -654,13 +654,14 @@ UInt64 mainQueryNodeBlockSizeByLimit(const SelectQueryInfo & select_query_info)
}

std::unique_ptr<ExpressionStep> createComputeAliasColumnsStep(
std::unordered_map<std::string, ActionsDAG> & alias_column_expressions, const SharedHeader & current_header)
AliasColumnExpressions & alias_column_expressions, const SharedHeader & current_header)
{
ActionsDAG merged_alias_columns_actions_dag(current_header->getColumnsWithTypeAndName());
ActionsDAG::NodeRawConstPtrs action_dag_outputs = merged_alias_columns_actions_dag.getInputs();

for (auto & [column_name, alias_column_actions_dag] : alias_column_expressions)
for (auto & alias_column_expression : alias_column_expressions)
{
auto & alias_column_actions_dag = alias_column_expression.second;
const auto & current_outputs = alias_column_actions_dag.getOutputs();
action_dag_outputs.insert(action_dag_outputs.end(), current_outputs.begin(), current_outputs.end());
merged_alias_columns_actions_dag.mergeNodes(std::move(alias_column_actions_dag));
Expand Down
22 changes: 19 additions & 3 deletions src/Planner/TableExpressionData.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ namespace ErrorCodes
using ColumnIdentifier = std::string;
using ColumnIdentifiers = std::vector<ColumnIdentifier>;
using ColumnIdentifierSet = std::unordered_set<ColumnIdentifier>;
using AliasColumnExpression = std::pair<std::string, ActionsDAG>;
using AliasColumnExpressions = std::vector<AliasColumnExpression>;

struct PrewhereInfo;
using PrewhereInfoPtr = std::shared_ptr<PrewhereInfo>;
Expand Down Expand Up @@ -77,7 +79,8 @@ class TableExpressionData
/// Add alias column
void addAliasColumn(const NameAndTypePair & column, const ColumnIdentifier & column_identifier, ActionsDAG actions_dag, bool is_selected_column = true)
{
alias_column_expressions.emplace(column.name, std::move(actions_dag));
alias_column_expressions.emplace_back(column.name, std::move(actions_dag));
alias_column_names_set.emplace(column.name);
addColumnImpl(column, column_identifier, is_selected_column);
}

Expand All @@ -96,11 +99,21 @@ class TableExpressionData
}

/// Get ALIAS columns names mapped to expressions
std::unordered_map<std::string, ActionsDAG> & getAliasColumnExpressions()
AliasColumnExpressions & getAliasColumnExpressions()
{
return alias_column_expressions;
}

const AliasColumnExpressions & getAliasColumnExpressions() const
{
return alias_column_expressions;
}

bool hasAliasColumn(const std::string & column_name) const
{
return alias_column_names_set.contains(column_name);
}

/// Get column name to column map
const ColumnNameToColumn & getColumnNameToColumn() const
{
Expand Down Expand Up @@ -279,7 +292,10 @@ class TableExpressionData
NameSet selected_column_names_set;

/// Expression to calculate ALIAS columns
std::unordered_map<std::string, ActionsDAG> alias_column_expressions;
/// Keep alias name (String) + expression (ActionsDAG) pairs; vector preserves insertion order.
AliasColumnExpressions alias_column_expressions;
/// Fast membership checks for alias column names.
NameSet alias_column_names_set;

/// Valid for table, table function, array join, query, union nodes
ColumnNameToColumn column_name_to_column;
Expand Down
52 changes: 51 additions & 1 deletion src/Planner/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include <Analyzer/ConstantNode.h>
#include <Analyzer/ColumnNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/QueryNode.h>
#include <Analyzer/UnionNode.h>
#include <Analyzer/TableNode.h>
Expand Down Expand Up @@ -191,13 +192,62 @@ ASTPtr queryNodeToSelectQuery(const QueryTreeNodePtr & query_node, bool set_subq
return result_ast;
}

namespace
{
class NormalizeAliasMarkerVisitor : public InDepthQueryTreeVisitor<NormalizeAliasMarkerVisitor>
{
public:
void visitImpl(QueryTreeNodePtr & node)
{
auto * function_node = node->as<FunctionNode>();
if (!function_node || function_node->getFunctionName() != "__aliasMarker")
return;

auto & arguments = function_node->getArguments().getNodes();
if (arguments.size() != 2)
return;

while (true)
{
auto * inner_function = arguments.front()->as<FunctionNode>();
if (!inner_function || inner_function->getFunctionName() != "__aliasMarker")
break;

auto & inner_arguments = inner_function->getArguments().getNodes();
if (inner_arguments.size() != 2)
break;

arguments.front() = inner_arguments.front();
}
}

bool needChildVisit(QueryTreeNodePtr & parent, QueryTreeNodePtr & child)
{
auto * parent_function = parent->as<FunctionNode>();
if (parent_function && parent_function->getFunctionName() == "__aliasMarker")
return false;

auto child_node_type = child->getNodeType();
return !(child_node_type == QueryTreeNodeType::QUERY || child_node_type == QueryTreeNodeType::UNION);
}
};

void normalizeAliasMarkersInQueryTree(QueryTreeNodePtr & node)
{
NormalizeAliasMarkerVisitor visitor;
visitor.visit(node);
}
}

ASTPtr queryNodeToDistributedSelectQuery(const QueryTreeNodePtr & query_node)
{
/// Remove CTEs information from distributed queries.
/// Now, if cte_name is set for subquery node, AST -> String serialization will only print cte name.
/// But CTE is defined only for top-level query part, so may not be sent.
/// Removing cte_name forces subquery to be always printed.
auto ast = queryNodeToSelectQuery(query_node, /*set_subquery_cte_name=*/false);
auto query_node_to_convert = query_node->clone();
normalizeAliasMarkersInQueryTree(query_node_to_convert);
auto ast = queryNodeToSelectQuery(query_node_to_convert, /*set_subquery_cte_name=*/false);
return ast;
}

Expand Down
3 changes: 1 addition & 2 deletions src/Storages/SelectQueryInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,11 @@ std::unordered_map<std::string, ColumnWithTypeAndName> SelectQueryInfo::buildNod
if (planner_context)
{
auto & table_expression_data = planner_context->getTableExpressionDataOrThrow(table_expression);
const auto & alias_column_expressions = table_expression_data.getAliasColumnExpressions();
for (const auto & [column_identifier, column_name] : table_expression_data.getColumnIdentifierToColumnName())
{
/// ALIAS columns cannot be used in the filter expression without being calculated in ActionsDAG,
/// so they should not be added to the input nodes.
if (alias_column_expressions.contains(column_name))
if (table_expression_data.hasAliasColumn(column_name))
continue;
const auto & column = table_expression_data.getColumnOrThrow(column_name);
node_name_to_input_node_column.emplace(column_identifier, ColumnWithTypeAndName(column.type, column_name));
Expand Down
Loading
Loading