mysql-server/sql/sql_resolver.cc
2025-03-05 14:31:37 +07:00

8320 lines
315 KiB
C++

/* Copyright (c) 2000, 2024, Oracle and/or its affiliates.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2.0,
as published by the Free Software Foundation.
This program is designed to work with certain software (including
but not limited to OpenSSL) that is licensed under separate terms,
as designated in a particular file or component or in included license
documentation. The authors of MySQL hereby grant you an additional
permission to link the program and your derivative works with the
separately licensed software that they have either included with
the program or referenced in the documentation.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License, version 2.0, for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
/**
@file
@brief
Implementation of name resolution stage
@defgroup Query_Resolver Query Resolver
@{
*/
#include "sql/sql_resolver.h"
#include <sys/types.h>
#include <algorithm>
#include <cassert>
#include <cstddef> // size_t
#include <cstdio> // snprintf
#include <cstring> // strcmp
#include <deque>
#include <functional>
#include <initializer_list>
#include <utility>
#include <vector>
#include "field_types.h"
#include "lex_string.h"
#include "map_helpers.h"
#include "mem_root_deque.h"
#include "my_alloc.h"
#include "my_bitmap.h"
#include "my_compiler.h"
#include "my_dbug.h"
#include "my_inttypes.h"
#include "my_sqlcommand.h"
#include "my_sys.h"
#include "my_table_map.h"
#include "mysql/components/services/bits/psi_bits.h"
#include "mysql_com.h" // NAME_LEN
#include "mysqld_error.h"
#include "prealloced_array.h" // Prealloced_array
#include "sql/aggregate_check.h" // Group_check
#include "sql/auth/auth_acls.h"
#include "sql/auth/auth_common.h" // check_single_table_access
#include "sql/check_stack.h" // check_stack_overrun
#include "sql/current_thd.h" // current_thd
#include "sql/derror.h" // ER_THD
#include "sql/enum_query_type.h"
#include "sql/error_handler.h" // View_error_handler
#include "sql/field.h"
#include "sql/item.h"
#include "sql/item_cmpfunc.h"
#include "sql/item_func.h"
#include "sql/item_row.h"
#include "sql/item_subselect.h"
#include "sql/item_sum.h" // Item_sum
#include "sql/join_optimizer/bit_utils.h"
#include "sql/join_optimizer/join_optimizer.h"
#include "sql/mdl.h" // MDL_SHARED_READ
#include "sql/mem_root_array.h"
#include "sql/nested_join.h"
#include "sql/opt_hints.h"
#include "sql/opt_trace.h" // Opt_trace_object
#include "sql/opt_trace_context.h"
#include "sql/parse_tree_nodes.h" // PT_order_expr
#include "sql/parser_yystype.h"
#include "sql/query_options.h"
#include "sql/query_result.h" // Query_result
#include "sql/range_optimizer/partition_pruning.h"
#include "sql/range_optimizer/range_optimizer.h" // prune_partitions
#include "sql/sql_base.h" // setup_fields
#include "sql/sql_class.h"
#include "sql/sql_cmd.h" // Sql_cmd
#include "sql/sql_const.h"
#include "sql/sql_derived.h" //Condition_pushdown
#include "sql/sql_error.h"
#include "sql/sql_executor.h" // is_rollup_sum_wrapper, is_rollup_group_wrapper
#include "sql/sql_lex.h"
#include "sql/sql_list.h"
#include "sql/sql_optimizer.h" // build_bitmap_for_nested_joins
#include "sql/sql_select.h"
#include "sql/sql_test.h" // print_where
#include "sql/sql_union.h" // Query_result_union
#include "sql/system_variables.h"
#include "sql/table.h"
#include "sql/thd_raii.h"
#include "sql/thr_malloc.h"
#include "sql/visible_fields.h"
#include "sql/window.h"
#include "template_utils.h"
#include "thr_lock.h" // TL_READ
using std::find;
using std::function;
static const enum_walk walk_options =
enum_walk::PREFIX | enum_walk::POSTFIX | enum_walk::SUBQUERY;
static bool simplify_const_condition(THD *thd, Item **cond,
bool remove_cond = true,
bool *ret_cond_value = nullptr);
static Item *create_rollup_switcher(THD *thd, Query_block *query_block,
Item_sum *item, int send_group_parts);
static bool fulltext_uses_rollup_column(const Query_block *query_block);
/**
Prepare query block for optimization.
Resolve table and column information.
Resolve all expressions (item trees), ie WHERE clause, join conditions,
GROUP BY clause, HAVING clause, ORDER BY clause, LIMIT clause.
Prepare all subqueries recursively as part of resolving the expressions.
Apply permanent transformations to the abstract syntax tree, such as
semi-join transformation, derived table transformation, elimination of
constant values and redundant clauses (e.g ORDER BY, GROUP BY).
@param thd thread handler
@param insert_field_list List of fields when used in INSERT, otherwise NULL
@returns false if success, true if error
@note on privilege checking for SELECT query that possibly contains view
or derived table references:
- When this function is called, it is assumed that the precheck() function
has been called. precheck() ensures that the user has some SELECT
privileges to the tables involved in the query. When resolving views
it has also been established that the user has some privileges for them.
To prepare a view for privilege checking, it is also needed to call
check_view_privileges() after views have been merged into the query.
This is not necessary for unnamed derived tables since it has already
been established that we have SELECT privileges for the underlying tables
by the precheck functions. (precheck() checks a query without resolved
views, ie. before tables are opened, so underlying tables of views
are not yet available).
- When a query block is resolved, always ensure that the user has SELECT
privileges to the columns referenced in the WHERE clause, the join
conditions, the GROUP BY clause, the HAVING clause and the ORDER BY clause.
- When resolving the outer-most query block, ensure that the user also has
SELECT privileges to the columns in the selected expressions.
- When setting up a derived table or view for materialization, ensure that
the user has SELECT privileges to the columns in the selected expressions
- Column privileges are normally checked by Item_field::fix_fields().
Exceptions are select list of derived tables/views which are checked
in Table_ref::setup_materialized_derived(), and natural/using join
conditions that are checked in mark_common_columns().
- As far as INSERT, UPDATE and DELETE statements have the same expressions
as a SELECT statement, this note applies to those statements as well.
*/
bool Query_block::prepare(THD *thd, mem_root_deque<Item *> *insert_field_list) {
DBUG_TRACE;
assert(this == thd->lex->current_query_block());
assert(join == nullptr);
assert(!thd->is_error());
// If this query block is a table value constructor, a lot of the preparation
// done in Query_block::prepare becomes irrelevant. Thus we call our own
// Query_block::prepare_values in this case.
if (is_table_value_constructor) return prepare_values(thd);
Query_expression *const unit = master_query_expression();
if (!m_table_nest.empty()) propagate_nullability(&m_table_nest, false);
/*
Determine whether it is suggested to merge immediate derived tables, based
on the placement of the query block:
- DTs belonging to outermost query block: always
- DTs belonging to first level subqueries: Yes if inside SELECT statement,
no otherwise (including UPDATE and DELETE).
This is required to support a workaround for allowing subqueries
containing the same table as is target for delete or update,
by forcing a materialization of the subquery.
- All other cases inherit status of parent query block.
*/
allow_merge_derived = outer_query_block() == nullptr ||
master_query_expression()->item == nullptr ||
(outer_query_block()->outer_query_block() == nullptr
? parent_lex->sql_command == SQLCOM_SELECT ||
parent_lex->sql_command == SQLCOM_SET_OPTION
: outer_query_block()->allow_merge_derived);
Opt_trace_context *const trace = &thd->opt_trace;
Opt_trace_object trace_wrapper_prepare(trace);
Opt_trace_object trace_prepare(trace, "join_preparation");
trace_prepare.add_select_number(select_number);
Opt_trace_array trace_steps(trace, "steps");
/*
Setup the expressions in the SELECT list.
For derived tables/views, wait with privilege checking of columns and
marking in read/write sets until we know how they are used (may be used in
UPDATE and INSERT). Exceptions:
- Always assume columns referenced in subqueries are selected.
- Always assume outer references are selected (marking is then done in
Item_outer_ref::fix_fields).
Expressions must be resolved here, before tables are set up, otherwise table
function's arguments are not resolved properly.
*/
const bool check_privs = !thd->derived_tables_processing ||
master_query_expression()->item != nullptr;
thd->mark_used_columns = check_privs ? MARK_COLUMNS_READ : MARK_COLUMNS_NONE;
Access_bitmask want_privilege_saved = thd->want_privilege;
thd->want_privilege = check_privs ? SELECT_ACL : 0;
/*
Expressions in lateral join can't refer to item list, thus item list lookup
shouldn't be allowed during table/table function setup.
*/
is_item_list_lookup = false;
/* Check that all tables, fields, conds and order are ok */
if (setup_tables(thd, get_table_list(), false)) return true;
if ((derived_table_count || table_func_count) &&
resolve_placeholder_tables(thd, true))
return true;
// Wait with privilege checking until all derived tables are resolved.
if (derived_table_count && !thd->derived_tables_processing &&
check_view_privileges(thd, SELECT_ACL, SELECT_ACL))
return true;
is_item_list_lookup = true;
// Precompute and store the row types of NATURAL/USING joins.
if (leaf_table_count >= 2 &&
setup_natural_join_row_types(thd, m_current_table_nest, &context))
return true;
Mem_root_array<Item_exists_subselect *> sj_candidates_local(thd->mem_root);
set_sj_candidates(&sj_candidates_local);
/*
Item and Item_field CTORs will both increment some counters
in current_query_block(), based on the current parsing context.
We are not parsing anymore: any new Items created now are due to
query rewriting, so stop incrementing counters.
*/
assert(parsing_place == CTX_NONE);
parsing_place = CTX_NONE;
resolve_place = RESOLVE_SELECT_LIST;
if (with_wild && setup_wild(thd)) return true;
if (setup_base_ref_items(thd)) return true; /* purecov: inspected */
if (setup_fields(thd, thd->want_privilege, /*allow_sum_func=*/true,
/*split_sum_funcs=*/true, /*column_update=*/false,
insert_field_list, &fields, base_ref_items))
return true;
resolve_place = RESOLVE_NONE;
const nesting_map save_allow_sum_func = thd->lex->allow_sum_func;
const nesting_map save_deny_window_func = thd->lex->m_deny_window_func;
// Do not allow local set functions for join conditions, WHERE and GROUP BY
thd->lex->allow_sum_func &= ~((nesting_map)1 << nest_level);
thd->mark_used_columns = MARK_COLUMNS_READ;
thd->want_privilege = SELECT_ACL;
// Set up join conditions and WHERE clause
if (setup_conds(thd)) return true;
// Set up the GROUP BY clause
int all_fields_count = fields.size();
if (group_list.elements && setup_group(thd)) return true;
hidden_group_field_count = fields.size() - all_fields_count;
// Allow local set functions in HAVING and ORDER BY
thd->lex->allow_sum_func |= (nesting_map)1 << nest_level;
// Windowing is not allowed with HAVING
thd->lex->m_deny_window_func |= (nesting_map)1 << nest_level;
if (is_non_primitive_grouped()) {
for (Item *item : fields) {
mark_item_as_maybe_null_if_non_primitive_grouped(item);
item->update_used_tables();
}
if (populate_grouping_sets(thd)) {
return true;
}
}
// Setup the HAVING clause
if (m_having_cond) {
assert(m_having_cond->is_bool_func());
thd->where = "having clause";
having_fix_field = true;
resolve_place = RESOLVE_HAVING;
if (!m_having_cond->fixed &&
(m_having_cond->fix_fields(thd, &m_having_cond) ||
m_having_cond->check_cols(1)))
return true;
assert(m_having_cond->data_type() != MYSQL_TYPE_INVALID);
/*
Rollup may alter nullability of HAVING condition, so wait with
simplification of this condition until after rollup is resolved.
*/
having_fix_field = false;
resolve_place = RESOLVE_NONE;
}
if (olap == ROLLUP_TYPE && resolve_rollup(thd))
return true; /* purecov: inspected */
thd->lex->m_deny_window_func = save_deny_window_func;
if (m_having_cond != nullptr) {
if (olap == ROLLUP_TYPE) {
m_having_cond = resolve_rollup_item(thd, m_having_cond);
if (m_having_cond == nullptr) {
return true;
}
}
/*
Simplify the having condition if it is a const item.
Leave a TRUE condition if HAVING is always true, so that query block
is still marked as having a HAVING condition.
*/
if (m_having_cond->const_item() && !thd->lex->is_view_context_analysis() &&
!m_having_cond->walk(&Item::is_non_const_over_literals,
enum_walk::POSTFIX, nullptr) &&
simplify_const_condition(thd, &m_having_cond, false))
return true;
}
if (m_qualify_cond != nullptr) {
assert(m_qualify_cond->is_bool_func());
thd->where = "qualify clause";
resolve_place = RESOLVE_QUALIFY;
if (!m_qualify_cond->fixed &&
(m_qualify_cond->fix_fields(thd, &m_qualify_cond) ||
m_qualify_cond->check_cols(1)))
return true;
assert(m_qualify_cond->data_type() != MYSQL_TYPE_INVALID);
resolve_place = RESOLVE_NONE;
/*
Simplify the QUALIFY condition if it is a const item.
Leave a TRUE condition if QUALIFY is always true, so that query block
is still marked as having a QUALIFY condition.
*/
if (m_qualify_cond->const_item() && !thd->lex->is_view_context_analysis() &&
!m_qualify_cond->walk(&Item::is_non_const_over_literals,
enum_walk::POSTFIX, nullptr) &&
simplify_const_condition(thd, &m_qualify_cond, false))
return true;
/*
The QUALIFY clause requires the inclusion of at least one window function
in the query block. The window function can be part of any one of the
following: a) SELECT column list. b) Filter predicate of the QUALIFY
clause.
Rejects the query if the QUALIFY clause is present but neither of the
above conditions are satisfied.
*/
if (!has_windows() && !m_qualify_cond->has_wf()) {
my_error(ER_QUALIFY_WITHOUT_WINDOW_FUNCTION, MYF(0));
return true;
}
}
// Set up the ORDER BY clause
all_fields_count = fields.size();
if (order_list.elements) {
if (setup_order(thd, base_ref_items, get_table_list(), &fields,
order_list.first))
return true;
}
hidden_order_field_count = fields.size() - all_fields_count;
if (fulltext_uses_rollup_column(this)) {
my_error(ER_FULLTEXT_WITH_ROLLUP, MYF(0));
return true;
}
// Resolve OFFSET and LIMIT clauses
if (resolve_limits(thd)) return true;
/*
Query block is completely resolved, except for windows (see below) which
handles its own, so restore set function allowance.
*/
thd->lex->allow_sum_func = save_allow_sum_func;
/*
Permanently remove redundant parts from the query if
1) This is a subquery
2) Not normalizing a view. Removal should take place when a
query involving a view is optimized, not when the view
is created
*/
if (unit->item && // 1)
!thd->lex->is_view_context_analysis()) // 2)
{
if (remove_redundant_subquery_clauses(thd)) return true;
}
/*
Set up windows after setup_order() (as the query's ORDER BY may contain
window functions), and before setup_order_final() (as such function needs
to know about implicit grouping which may be induced by an aggregate
function in the window's PARTITION or ORDER clause).
*/
const size_t fields_cnt = fields.size();
if (m_windows.elements != 0 &&
Window::setup_windows1(thd, this, base_ref_items, get_table_list(),
&fields, &m_windows))
return true;
bool added_new_sum_funcs = fields.size() > fields_cnt;
if (order_list.elements) {
if (setup_order_final(thd)) return true; /* purecov: inspected */
added_new_sum_funcs = true;
}
thd->want_privilege = want_privilege_saved;
if (is_distinct() && can_skip_distinct())
remove_base_options(SELECT_DISTINCT);
/*
Printing the expanded query should happen here and not elsewhere, because
when a view is merged (when the view is opened in open_tables()), the
parent query's query_block does not yet contain a correct WHERE clause (it
misses the view's merged WHERE clause). This is corrected only just above,
in Table_ref::prep_where(), called by
setup_without_group()->setup_conds().
We also have to wait for fix_fields() on HAVING, above.
At this stage, we also have properly set up Item_ref-s.
*/
{
Opt_trace_object trace_wrapper(trace);
opt_trace_print_expanded_query(thd, this, &trace_wrapper);
}
// Transform eligible scalar subqueries to derived tables.
//
// Don't transform if analyzing a view: the resulting query may not be
// compilable from sqldump, (due to group by check/visibility in HAVING).
//
// Don't transform if the switch subquery_to_derived is false.
//
// Note that the transformation must precede m_having_cond->split_sum_func2
// below since substitutions may be made in the HAVING clause which would not
// otherwise get done.
if (!(thd->lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_VIEW) &&
(thd->optimizer_switch_flag(OPTIMIZER_SWITCH_SUBQUERY_TO_DERIVED) ||
(parent_lex->m_sql_cmd != nullptr &&
thd->secondary_engine_optimization() ==
Secondary_engine_optimization::SECONDARY)) &&
transform_scalar_subqueries_to_join_with_derived(thd))
return true; /* purecov: inspected */
/* Preserve the original table map for later reference. */
original_tables_map = all_tables_map();
/*
If GROUPING function is present in having condition -
1. Set that the evaluation of this condition depends on rollup
result.
2. Add a reference to the condition so that result is stored
after evaluation.
*/
if (m_having_cond && (m_having_cond->has_aggregation() ||
m_having_cond->has_grouping_func())) {
if (m_having_cond->split_sum_func2(thd, base_ref_items, &fields,
&m_having_cond, true)) {
return true;
}
added_new_sum_funcs = true;
}
// Move aggregation functions and window functions present in
// QUALIFY clause to the field list and replace them with references.
if (m_qualify_cond != nullptr &&
(m_qualify_cond->has_aggregation() || m_qualify_cond->has_wf())) {
if (m_qualify_cond->split_sum_func2(thd, base_ref_items, &fields,
&m_qualify_cond, true)) {
return true;
}
added_new_sum_funcs = true;
}
if (inner_sum_func_list) {
Item_sum *end = inner_sum_func_list;
Item_sum *item_sum = end;
do {
item_sum = item_sum->next_sum;
if (item_sum->split_sum_func2(thd, base_ref_items, &fields, nullptr,
false)) {
return true;
}
added_new_sum_funcs = true;
} while (item_sum != end);
}
if (added_new_sum_funcs && olap == ROLLUP_TYPE) {
uint send_group_parts = group_list_size();
for (auto it = fields.begin(); it != fields.end(); ++it) {
Item *item = *it;
if (item->type() == Item::SUM_FUNC_ITEM && !item->const_item()) {
Item_sum *item_sum = down_cast<Item_sum *>(item);
if (item_sum->aggr_query_block == this &&
!item_sum->is_rollup_sum_wrapper()) {
// split_sum_func2 created a new aggregate function item,
// so we need to update it for rollup.
Item *new_item =
create_rollup_switcher(thd, this, item_sum, send_group_parts);
if (new_item == nullptr) return true;
*it = new_item;
}
}
}
}
if (group_list.elements) {
/*
Because HEAP tables can't index BIT fields we need to use an
additional hidden field for grouping because later it will be
converted to a LONG field. Original field will remain of the
BIT type and will be returned to a client.
*/
for (ORDER *ord = group_list.first; ord; ord = ord->next) {
if ((*ord->item)->type() == Item::FIELD_ITEM &&
(*ord->item)->data_type() == MYSQL_TYPE_BIT) {
Item_field *field = new Item_field(thd, *(Item_field **)ord->item);
ord->item = add_hidden_item(field);
}
}
}
// Setup full-text functions after resolving HAVING
if (has_ft_funcs()) {
// The full-text search function cannot be called after aggregation, as it
// needs the underlying scan to be positioned on the correct row. Therefore,
// lift calls to the full-text search MATCH function to the SELECT list (as
// hidden items), so the results can be materialized before or during
// aggregation.
if (lift_fulltext_from_having_to_select_list(thd)) {
return true;
}
if (setup_ftfuncs(thd, this)) return true;
}
if (query_result() && query_result()->prepare(thd, fields, unit)) return true;
if (has_sj_candidates() && flatten_subqueries(thd)) return true;
set_sj_candidates(nullptr);
/*
When reaching the top-most query block, or the next-to-top query block for
the SQL command SET and for SP instructions (indicated with SQLCOM_END),
apply local transformations to this query block and all underlying query
blocks.
*/
if (!thd->lex->is_view_context_analysis() &&
(outer_query_block() == nullptr ||
((parent_lex->sql_command == SQLCOM_SET_OPTION ||
parent_lex->sql_command == SQLCOM_END ||
parent_lex->sql_command == SQLCOM_LOAD) &&
outer_query_block()->outer_query_block() == nullptr)) &&
!skip_local_transforms) {
/*
This code is invoked in the following cases:
- if this is not a create view statement as transformations are
not required when creating a view.
- if this is an outer-most query block of a SELECT or multi-table
UPDATE/DELETE statement. Notice that for a UNION, this applies to
all query blocks. It also applies to a fake_query_block object.
- if this is one of highest-level subqueries, if the statement is
something else; like subq-i in:
UPDATE t1 SET col1=(subq-1), col2=(subq-2);
- If this is a subquery in a SET command,
or scalar subqueries used in SP expressions like sp_instr_freturn
(undicated by SQLCOM_END).
@todo: Refactor SET so that this is not needed.
- If this is a subquery in a LOAD command.
- INSERT may in some cases alter the sequence of preparation calls, by
setting the skip_local_transforms flag before calling prepare().
Local transforms are applied after query block merging.
This means that we avoid unnecessary invocations, as local transforms
would otherwise have been performed first before query block merging and
then another time after query block merging.
Thus, apply_local_transforms() may run only after the top query
is finished with query block merging. That's why
apply_local_transforms() is initiated only by the top query, and then
recurses into subqueries.
*/
if (apply_local_transforms(thd, true)) return true;
}
// Eliminate unused window definitions, redundant sorts etc.
if (!m_windows.is_empty()) Window::eliminate_unused_objects(&m_windows);
// Replace group by field references inside window functions with references
// in the presence of ROLLUP.
if (olap == ROLLUP_TYPE && resolve_rollup_wfs(thd))
return true; /* purecov: inspected */
// If CUBE is present in the query, all expressions that include
// any GROUP BY expression need to be marked as dependent on
// grouping set.
if (olap == CUBE_TYPE) {
for (Item *item : fields) {
bool is_updated = false;
WalkItem(item, enum_walk::POSTFIX, [this, &is_updated](Item *inner_item) {
if (find_in_group_list(inner_item, /*rollup_level=*/nullptr) !=
nullptr) {
inner_item->set_group_by_modifier();
is_updated = true;
}
return false;
});
if (is_updated) item->update_used_tables();
}
}
assert(!thd->is_error());
return false;
}
/*
Push conditions if possible to all the materialized derived tables.
Keep pushing as far down as possible making the call to this function
recursively.
@param thd thread handler
@returns false if success, true if error
Since this is called at the end after applying local transformations,
call this function while traversing the query block hierarchy top-down.
*/
bool Query_block::push_conditions_to_derived_tables(THD *thd) {
if (materialized_derived_table_count > 0)
for (Table_ref *tl = leaf_tables; tl; tl = tl->next_leaf) {
if (tl->is_view_or_derived() && tl->uses_materialization() &&
where_cond() && tl->can_push_condition_to_derived(thd)) {
Item **where = where_cond_ref();
Opt_trace_context *const trace = &thd->opt_trace;
Condition_pushdown cp(*where, tl, thd, trace);
// Make condition for the derived table
if (cp.make_cond_for_derived()) return true;
// The remaining condition that could not be pushed stays in this
// WHERE clause.
*where = cp.get_remainder_cond();
}
}
/*
Push conditions if possible to derived tables which were not merged. By
running top-down, the resulting pushed down condition can be pushed down
even more, in the case where a derived table contains an inner derived
table.
*/
for (Query_expression *unit = first_inner_query_expression(); unit;
unit = unit->next_query_expression()) {
for (Query_block *sl = unit->first_query_block(); sl;
sl = sl->next_query_block()) {
if (sl->push_conditions_to_derived_tables(thd)) return true;
}
}
return false;
}
/**
Prepare a table value constructor query block for optimization.
In the case of a table value constructor Query_block, we return the result of
this function from Query_block::prepare, instead of doing the standard prepare
routine.
For a table value constructor block, most preparation of a standard
Query_block becomes irrelevant (in particular INTO, FROM, WHERE, GROUP, HAVING
and WINDOW). We therefore substitute the standard resolving routine with this
one, which is simply responsible for resolving the expressions contained in
VALUES, as well as the query result.
@param thd thread handler
@returns false if success, true if error
*/
bool Query_block::prepare_values(THD *thd) {
Query_expression *const unit = master_query_expression();
if (resolve_table_value_constructor_values(thd)) return true;
if (setup_tables(thd, get_table_list(), /*select_insert=*/false)) {
return true;
}
// Setup the HAVING clause, duplicating code from Query_block::prepare. This
// is strictly necessary in the case of PREPARE statements, where
// subquery transformations may rewrite its Query_block to use m_having_cond.
//
// For example, a query like `SELECT * FROM t WHERE (a, b) IN (VALUES ROW(1,
// 10))` may be rewritten such that the Query_block within the IN subquery has
// a HAVING clause with an Item_cond_and. This must be taken into account
// during the second preparation that is done when the prepared statement is
// _executed_; we now have to resolve m_having_cond properly.
//
// Note that this duplicated code should be removed in the future. TODO: for
// wl#9384, which refactors DML statement preparation to be done only once.
if (m_having_cond) {
assert(m_having_cond->is_bool_func());
thd->where = "having clause";
having_fix_field = true;
resolve_place = RESOLVE_HAVING;
if (!m_having_cond->fixed &&
(m_having_cond->fix_fields(thd, &m_having_cond) ||
m_having_cond->check_cols(1)))
return true; /* purecov: inspected */
assert(!m_having_cond->const_item());
having_fix_field = false;
resolve_place = RESOLVE_NONE;
}
assert(qualify_cond() == nullptr);
/*
A table value constructor may have a defined ordering, thus calling
setup_order() is needed, however calling setup_order_final() is
not necessary since this construct cannot be aggregated.
*/
const size_t all_fields_count = fields.size();
if (is_ordered() && setup_order(thd, base_ref_items, get_table_list(),
&fields, order_list.first)) {
return true;
}
hidden_order_field_count = fields.size() - all_fields_count;
if (query_result() && query_result()->prepare(thd, fields, unit))
return true; /* purecov: inspected */
if (resolve_limits(thd)) return true;
// If this is a subquery, remove redundant clauses (ORDER BY in particular).
if (unit->item != nullptr && !thd->lex->is_view_context_analysis()) {
if (remove_redundant_subquery_clauses(thd)) return true;
}
return false;
}
/**
Apply local transformations, such as join nest simplification. 'Local' means
that each transformation happens on one single query block.
Also perform partition pruning, which is most effective after transformations
have been done.
This function also does condition pushdown to derived tables after all
the local transformations are applied although condition pushdown is
strictly not a local transform.
@param thd thread handler
@param prune if true, then prune partitions based on const conditions
@returns false if success, true if error
Since this is called after flattening of query blocks, call this function
while traversing the query block hierarchy top-down.
*/
bool Query_block::apply_local_transforms(THD *thd, bool prune) {
DBUG_TRACE;
assert(first_execution);
/*
If query block contains one or more merged derived tables/views,
walk through lists of columns in select lists and remove unused columns.
*/
if (derived_table_count) delete_unused_merged_columns(&m_table_nest);
for (Query_expression *unit = first_inner_query_expression(); unit;
unit = unit->next_query_expression())
for (auto qt : unit->query_terms<>())
if (qt->query_block()->apply_local_transforms(thd, true)) return true;
// Convert all outer joins to inner joins if possible
if (simplify_joins(thd, &m_table_nest, true, false, &m_where_cond))
return true;
if (record_join_nest_info(&m_table_nest)) return true;
build_bitmap_for_nested_joins(&m_table_nest, 0);
/*
Here are the reasons why we do the following check here (i.e. late).
* setup_fields () may have done split_sum_func () on aggregate items of
the SELECT list, so for reliable comparison of the ORDER BY list with
the SELECT list, we need to wait until split_sum_func() is done with
the ORDER BY list.
* we get resolved expressions "most of the time", which is always a good
thing. Some outer references may not be resolved, though.
* we need nested_join::used_tables, and this member is set in
simplify_joins()
* simplify_joins() does outer-join-to-inner conversion, which increases
opportunities for functional dependencies (weak-to-strong, which is
unusable, becomes strong-to-strong).
* check_only_full_group_by() is dependent on processing done by
simplify_joins() (for example it uses the value of
Query_block::outer_join).
The drawback is that the checks are after subquery transformations, so can
meet strange "internally added" items.
Note that when we are creating a view, simplify_joins() doesn't run so
check_only_full_group_by() cannot run, any error will be raised only
when the view is later used (SELECTed...)
*/
if ((is_distinct() || is_grouped()) &&
(thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY) &&
check_only_full_group_by(thd))
return true;
/*
Prune partitions for all query blocks after query block merging, if
pruning is wanted.
*/
if (partitioned_table_count && prune) {
for (Table_ref *tbl = leaf_tables; tbl; tbl = tbl->next_leaf) {
/*
This will only prune constant conditions, which will be used for
lock pruning.
*/
if (prune_partitions(thd, tbl->table, this,
tbl->join_cond() ? tbl->join_cond() : m_where_cond))
return true; /* purecov: inspected */
if (tbl->table->all_partitions_pruned_away &&
!tbl->is_inner_table_of_outer_join())
set_empty_query();
}
}
/*
Pushing conditions down to derived tables must be done after validity
checks of grouped queries done above; indeed, by replacing columns
with expressions, inside equalities of WHERE, pushdown makes the checks
impossible.
The said validity checks must be done after simplify_joins() has been
done on all query blocks. While pushdown must be done on the outer
most query block first, then on subqueries.
These circular dependencies explain why:
- pushdown is done after all local transformations have been applied.
- a pushed-down condition cannot help to convert LEFT JOIN to inner join
inside a derived table's definition.
*/
if (outer_query_block() == nullptr && push_conditions_to_derived_tables(thd))
return true;
return false;
}
/**
Update used tables information for a JOIN expression
*/
static void update_used_tables_for_join(mem_root_deque<Table_ref *> *tables) {
for (Table_ref *table_ref : *tables) {
if (table_ref->join_cond() != nullptr)
table_ref->join_cond()->update_used_tables();
if (table_ref->nested_join != nullptr)
update_used_tables_for_join(&table_ref->nested_join->m_tables);
}
}
/**
Update used tables information for all local expressions.
*/
void Query_block::update_used_tables() {
for (Item *item : visible_fields()) {
item->update_used_tables();
}
if (m_current_table_nest != nullptr)
update_used_tables_for_join(m_current_table_nest);
if (where_cond() != nullptr) where_cond()->update_used_tables();
for (ORDER *group = group_list.first; group; group = group->next)
(*group->item)->update_used_tables();
if (having_cond() != nullptr) having_cond()->update_used_tables();
for (ORDER *order = order_list.first; order; order = order->next)
(*order->item)->update_used_tables();
List_iterator<Window> wi(m_windows);
Window *w;
while ((w = wi++)) {
for (ORDER *wp = w->first_partition_by(); wp != nullptr; wp = wp->next)
(*wp->item)->update_used_tables();
for (ORDER *wo = w->first_order_by(); wo != nullptr; wo = wo->next)
(*wo->item)->update_used_tables();
}
}
/**
Resolve OFFSET and LIMIT clauses for a query block.
@param thd Thread handler
@returns false if success, true if error
OFFSET and LIMIT clauses may be attached to query blocks that make up
a query expression. OFFSET and LIMIT clauses that apply to a whole
query expression are attached to the fake_query_block, hence we can use
this interface to resolve them as well.
OFFSET and LIMIT may be unsigned integer literal values or parameters.
If parameters, ensure that the type is unsigned integer.
*/
bool Query_block::resolve_limits(THD *thd) {
if (offset_limit != nullptr) {
if (offset_limit->fix_fields(thd, nullptr))
return true; /* purecov: inspected */
if (offset_limit->data_type() == MYSQL_TYPE_INVALID) {
if (offset_limit->propagate_type(
thd, Type_properties(MYSQL_TYPE_LONGLONG, true)))
return true;
offset_limit->pin_data_type();
}
}
if (select_limit != nullptr) {
if (select_limit->fix_fields(thd, nullptr))
return true; /* purecov: inspected */
if (select_limit->data_type() == MYSQL_TYPE_INVALID) {
if (select_limit->propagate_type(
thd, Type_properties(MYSQL_TYPE_LONGLONG, true)))
return true;
select_limit->pin_data_type();
}
}
return false;
}
/**
Try to replace a const condition with a simple constant.
A true condition is replaced with an empty item pointer if remove_cond
is true. Else it is replaced with the constant TRUE.
A false condition is replaced with the constant FALSE.
@param thd Thread handler
@param[in,out] cond Address of condition, may be substituted with a literal
@param remove_cond If true removes a "true" condition. Else replaces
it with a constant TRUE.
@param ret_cond_value Store the result of the evaluated const condition
@returns false if success, true if error
*/
static bool simplify_const_condition(THD *thd, Item **cond, bool remove_cond,
bool *ret_cond_value) {
assert((*cond)->const_item());
bool cond_value;
/* Push ignore / strict error handler */
Ignore_error_handler ignore_handler;
Strict_error_handler strict_handler;
if (thd->lex->is_ignore())
thd->push_internal_handler(&ignore_handler);
else if (thd->is_strict_mode())
thd->push_internal_handler(&strict_handler);
bool err = eval_const_cond(thd, *cond, &cond_value);
/* Pop ignore / strict error handler */
if (thd->lex->is_ignore() || thd->is_strict_mode())
thd->pop_internal_handler();
if (err) return true;
DBUG_EXECUTE("where",
print_where(thd, *cond, "simplify_const_cond", QT_ORDINARY););
if (cond_value) {
if (remove_cond)
*cond = nullptr;
else {
Prepared_stmt_arena_holder ps_arena_holder(thd);
*cond = new (thd->mem_root) Item_func_true();
if (*cond == nullptr) return true;
}
} else if ((*cond)->type() != Item::INT_ITEM) {
Prepared_stmt_arena_holder ps_arena_holder(thd);
*cond = new (thd->mem_root) Item_func_false();
if (*cond == nullptr) return true;
}
if (ret_cond_value) *ret_cond_value = cond_value;
return false;
}
/**
Check if the subquery predicate can be executed via materialization.
@param thd THD
@param query_block Query_block of the subquery
@param outer Parent Query_block (outer to subquery)
@return true if subquery allows materialization, false otherwise.
*/
bool Item_in_subselect::subquery_allows_materialization(
THD *thd, Query_block *query_block, const Query_block *outer) {
const uint elements = query_expr()->first_query_block()->num_visible_fields();
DBUG_TRACE;
assert(elements >= 1);
assert(left_expr->cols() == elements);
OPT_TRACE_TRANSFORM(&thd->opt_trace, trace_wrapper, trace_mat,
query_block->select_number, "IN (SELECT)",
"materialization");
const char *cause = nullptr;
if (subquery_type() != Item_subselect::IN_SUBQUERY) {
// Subq-mat cannot handle 'outer_expr > {ANY|ALL}(subq)'...
cause = "not an IN predicate";
} else if (m_subquery_used_tables & RAND_TABLE_BIT) {
// Subquery with a random function cannot be materalized.
// But random function in left expression is OK
cause = "non-deterministic";
} else if (!query_block->is_simple_query_block()) {
// Subquery must be a simple query specification clause (not a set operation
// or a parenthesized query expression).
cause = "in set operation or a parenthesized query expression";
} else if (!query_block->master_query_expression()
->first_query_block()
->leaf_tables) {
// Subquery has no tables, hence no point in materializing.
cause = "no inner tables";
} else if (!outer->join) {
/*
Maybe this is a subquery of a single table UPDATE/DELETE (TODO:
handle this by switching to multi-table UPDATE/DELETE).
*/
cause = "parent query has no JOIN";
} else if (!outer->leaf_tables) {
// The upper query is SELECT ... FROM DUAL. No gain in materializing.
cause = "no tables in outer query";
} else if (dependent_before_in2exists()) {
/*
Subquery should not be correlated; the correlation due to predicates
injected by IN->EXISTS does not count as we will remove them if we
choose materialization.
TODO:
This is an overly restrictive condition. It can be extended to:
(Subquery is non-correlated ||
Subquery is correlated to any query outer to IN predicate ||
(Subquery is correlated to the immediate outer query &&
Subquery !contains {GROUP BY, ORDER BY [LIMIT],
aggregate functions}) && subquery predicate is not under "NOT IN"))
*/
cause = "correlated";
} else {
/*
Check that involved expression types allow materialization.
This is a temporary fix for BUG#36752; see bug report for
description of restrictions we need to put on the compared expressions.
*/
assert(left_expr->fixed);
// @see comment in Item_subselect::element_index()
bool has_nullables = left_expr->is_nullable();
uint i = 0;
for (Item *const inner_item :
query_expr()->first_query_block()->visible_fields()) {
Item *const outer_item = left_expr->element_index(i++);
if (!types_allow_materialization(outer_item, inner_item)) {
cause = "type mismatch";
break;
}
if (inner_item->is_blob_field()) // 6
{
cause = "inner blob";
break;
}
has_nullables |= inner_item->is_nullable();
}
if (!cause) {
trace_mat.add("has_nullable_expressions", has_nullables);
/*
Subquery materialization cannot handle NULLs partial matching
properly, yet. If the outer or inner values are NULL, the
subselect_hash_sj_engine may reply FALSE when it should reply UNKNOWN.
So, we must limit it to those three cases:
- when FALSE and UNKNOWN are equivalent answers. I.e. this is a a
top-level predicate (this implies it is not negated).
- when outer and inner values cannot be NULL.
- when there is a single inner column (because for this we have a
limited implementation of NULLs partial matching).
*/
trace_mat.add("treat_UNKNOWN_as_FALSE", abort_on_null);
if (!abort_on_null && has_nullables && (elements > 1))
cause = "cannot_handle_partial_matches";
else {
trace_mat.add("possible", true);
return true;
}
}
}
assert(cause != nullptr);
trace_mat.add("possible", false).add_alnum("cause", cause);
return false;
}
/**
Make list of leaf tables of join table tree
@param list pointer to pointer on list first element
Must be set to NULL before first (recursive) call
@param tables table list
@returns pointer on pointer to next_leaf of last element
*/
static Table_ref **make_leaf_tables(Table_ref **list, Table_ref *tables) {
for (Table_ref *table = tables; table; table = table->next_local) {
// A mergeable view is not allowed to have a table pointer.
assert(!(table->is_view() && table->is_merged() && table->table));
if (table->merge_underlying_list) {
assert(table->is_merged());
list = make_leaf_tables(list, table->merge_underlying_list);
} else {
*list = table;
list = &table->next_leaf;
}
}
return list;
}
/**
Check privileges for the view tables merged into a query block.
@param thd Thread context.
@param want_privilege_first Privileges requested for the first leaf.
@param want_privilege_next Privileges requested for the remaining leaves.
@note Beware that it can't properly check privileges in cases when
table being changed is not the first table in the list of leaf
tables (for example, for multi-UPDATE).
@note The inner loop is slightly inefficient. A view will have its privileges
checked once for every base table that it refers to.
@returns false if success, true if error.
*/
bool Query_block::check_view_privileges(THD *thd,
Access_bitmask want_privilege_first,
Access_bitmask want_privilege_next) {
Access_bitmask want_privilege = want_privilege_first;
Internal_error_handler_holder<View_error_handler, Table_ref> view_handler(
thd, true, leaf_tables);
for (Table_ref *tl = leaf_tables; tl; tl = tl->next_leaf) {
for (Table_ref *ref = tl; ref->referencing_view;
ref = ref->referencing_view) {
if (check_single_table_access(thd, want_privilege, ref, false))
return true;
}
want_privilege = want_privilege_next;
}
return false;
}
/**
Set up table leaves in the query block based on list of tables.
@param thd Thread handler
@param tables List of tables to handle
@param select_insert It is SELECT ... INSERT command
@note
Check also that the 'used keys' and 'ignored keys' exists and set up the
table structure accordingly.
Create a list of leaf tables.
This function has to be called for all tables that are used by items,
as otherwise table->map is not set and all Item_field will be regarded
as const items.
@returns False on success, true on error
*/
bool Query_block::setup_tables(THD *thd, Table_ref *tables,
bool select_insert) {
DBUG_TRACE;
assert((select_insert && !tables->next_name_resolution_table) || !tables ||
(context.table_list && context.first_name_resolution_table));
leaf_tables = nullptr;
(void)make_leaf_tables(&leaf_tables, tables);
Table_ref *first_query_block_table = nullptr;
if (select_insert) {
// "insert_table" is needed for remap_tables().
thd->lex->insert_table = leaf_tables->top_table();
// Get first table in SELECT part
first_query_block_table = thd->lex->insert_table->next_local;
// Then, find the first leaf table
if (first_query_block_table)
first_query_block_table = first_query_block_table->first_leaf_table();
}
uint tableno = 0;
leaf_table_count = 0;
partitioned_table_count = 0;
for (Table_ref *tr = leaf_tables; tr; tr = tr->next_leaf, tableno++) {
TABLE *const table = tr->table;
if (tr == first_query_block_table) {
/*
For INSERT ... SELECT command, restart numbering from zero for first
leaf table from SELECT part of query.
*/
first_query_block_table = nullptr;
tableno = 0;
}
if (tableno >= MAX_TABLES) {
my_error(ER_TOO_MANY_TABLES, MYF(0), static_cast<int>(MAX_TABLES));
return true;
}
tr->set_tableno(tableno);
leaf_table_count++; // Count the input tables of the query
if (opt_hints_qb && // QB hints initialized
!tr->opt_hints_table) // Table hints are not adjusted yet
{
tr->opt_hints_table = opt_hints_qb->adjust_table_hints(tr);
}
if (tr->has_tablesample() && tr->validate_tablesample_clause(thd)) {
return true;
}
if (table == nullptr) continue;
assert(table->pos_in_table_list == tr);
if (!tr->opt_hints_table ||
// Ignore old index hint processing if new style hints are specified.
!tr->opt_hints_table->update_index_hint_maps(thd, tr->table)) {
if (tr->process_index_hints(thd, table)) return true;
}
if (table->part_info) // Count number of partitioned tables
partitioned_table_count++;
}
/*
@todo - consider calling this from SELECT::prepare() instead.
It might save the test on select_insert to prevent check_unresolved()
from being called twice for INSERT ... SELECT.
*/
if (opt_hints_qb && !select_insert) opt_hints_qb->check_unresolved(thd);
return false;
}
/**
Re-map table numbers for all tables in a query block.
@param thd Thread handler
@note
This function needs to be called after setup_tables() has been called,
and after a query block for a subquery has been merged into a parent
quary block.
*/
void Query_block::remap_tables(THD *thd) {
LEX *const lex = thd->lex;
Table_ref *first_query_block_table = nullptr;
if (lex->insert_table && lex->insert_table == leaf_tables->top_table()) {
/*
For INSERT ... SELECT command, restart numbering from zero for first
leaf table from SELECT part of query.
*/
// Get first table in SELECT part
first_query_block_table = lex->insert_table->next_local;
// Then, recurse down to get first leaf table
if (first_query_block_table)
first_query_block_table = first_query_block_table->first_leaf_table();
}
uint tableno = 0;
for (Table_ref *tl = leaf_tables; tl; tl = tl->next_leaf) {
// Reset table number after having reached first table after insert table
if (first_query_block_table == tl) tableno = 0;
tl->set_tableno(tableno++);
}
}
/**
@brief Resolve derived table, view or table function references in query block
@param thd Pointer to THD.
@param apply_semijoin if true, apply semi-join transform when possible
@return false if success, true if error
*/
bool Query_block::resolve_placeholder_tables(THD *thd, bool apply_semijoin) {
DBUG_TRACE;
assert(derived_table_count > 0 || table_func_count > 0);
// Prepare derived tables and views that belong to this query block.
for (Table_ref *tl = get_table_list(); tl; tl = tl->next_local) {
if (!tl->is_view_or_derived() && !tl->is_table_function()) continue;
// scalar to derived: derived tables may have been merged already:
// WL#6570 transform_grouped_to_derived() calls setup_tables() and
// resolve_placeholder_tables().
if (tl->is_merged() || tl->uses_materialization()) {
continue;
}
assert(!tl->is_merged() && !tl->uses_materialization());
if (tl->resolve_derived(thd, apply_semijoin)) return true;
/*
Merge the derived tables that do not require materialization into
the current query block, if possible.
Merging is only done once and must not be repeated for prepared execs.
*/
if (!thd->lex->is_view_context_analysis()) {
if (tl->is_mergeable() && merge_derived(thd, tl))
return true; /* purecov: inspected */
}
if (tl->is_merged()) continue;
// Prepare remaining derived tables for materialization
if (tl->is_table_function()) {
if (tl->setup_table_function(thd)) {
return true;
}
} else if (tl->table == nullptr && tl->setup_materialized_derived(thd)) {
return true;
}
materialized_derived_table_count++;
}
return false;
}
/**
Check if the offset and limit are valid for a semijoin. A semijoin
can be used only if OFFSET is 0 and select LIMIT is not 0.
@retval false if OFFSET and LIMIT does not permit a semijoin,
@retval true otherwise.
*/
bool Query_block::is_row_count_valid_for_semi_join() {
if (offset_limit != nullptr &&
(!offset_limit->const_item() || offset_limit->val_int() != 0))
return false;
if (select_limit != nullptr &&
(!select_limit->const_item() || select_limit->val_int() == 0))
return false;
return true;
}
/**
Expand all '*' in list of expressions with the matching column references
Function should not be called with no wild cards in select list
@param thd thread handler
@returns false if OK, true if error
*/
bool Query_block::setup_wild(THD *thd) {
DBUG_TRACE;
assert(with_wild > 0);
// PS/SP uses arena so that changes are made permanently.
Prepared_stmt_arena_holder ps_arena_holder(thd);
for (auto it = fields.begin(); with_wild > 0 && it != fields.end(); ++it) {
Item *item = *it;
if (item->hidden) continue;
Item_field *item_field;
if (item->type() == Item::FIELD_ITEM &&
(item_field = down_cast<Item_field *>(item)) &&
item_field->is_asterisk()) {
assert(item_field->field == nullptr);
const bool any_privileges = item_field->any_privileges;
Item_subselect *subsel = master_query_expression()->item;
/*
In case of EXISTS(SELECT * ... HAVING ...), don't use this
transformation. The columns in HAVING will need to resolve to the
select list. Replacing * with 1 effectively eliminates this
possibility.
*/
if (subsel != nullptr &&
subsel->subquery_type() == Item_subselect::EXISTS_SUBQUERY &&
!having_cond()) {
/*
It is EXISTS(SELECT * ...) and we can replace * by any constant.
Item_int do not need fix_fields() because it is basic constant.
*/
*it = new Item_int(NAME_STRING("Not_used"), 1,
MY_INT64_NUM_DECIMAL_DIGITS);
} else {
assert(item_field->context == &this->context);
if (insert_fields(thd, this, item_field->db_name,
item_field->table_name, &fields, &it, any_privileges))
return true;
}
with_wild--;
}
}
return false;
}
/**
Resolve WHERE condition and join conditions
@param thd thread handler
@returns false if success, true if error
*/
bool Query_block::setup_conds(THD *thd) {
DBUG_TRACE;
/*
it_is_update set to true when tables of primary Query_block (Query_block
which belong to LEX, i.e. most up SELECT) will be updated by
INSERT/UPDATE/LOAD
NOTE: using this condition helps to prevent call of prepare_check_option()
from subquery of VIEW, because tables of subquery belongs to VIEW
(see condition before prepare_check_option() call)
*/
const bool it_is_update = (this == thd->lex->query_block) &&
thd->lex->which_check_option_applicable();
const bool save_is_item_list_lookup = is_item_list_lookup;
is_item_list_lookup = false;
DBUG_PRINT("info", ("thd->mark_used_columns: %d", thd->mark_used_columns));
if (m_where_cond) {
assert(m_where_cond->is_bool_func());
resolve_place = Query_block::RESOLVE_CONDITION;
thd->where = "where clause";
if ((!m_where_cond->fixed &&
m_where_cond->fix_fields(thd, &m_where_cond)) ||
m_where_cond->check_cols(1))
return true;
assert(m_where_cond->data_type() != MYSQL_TYPE_INVALID);
// Simplify the where condition if it's a const item
if (m_where_cond->const_item() && !thd->lex->is_view_context_analysis() &&
!m_where_cond->walk(&Item::is_non_const_over_literals,
enum_walk::POSTFIX, nullptr) &&
simplify_const_condition(thd, &m_where_cond))
return true;
resolve_place = Query_block::RESOLVE_NONE;
}
// Resolve all join condition clauses
if (!m_table_nest.empty() &&
setup_join_cond(thd, &m_table_nest, it_is_update))
return true;
is_item_list_lookup = save_is_item_list_lookup;
assert(thd->lex->current_query_block() == this);
assert(!thd->is_error());
return false;
}
/**
Resolve join conditions for a join nest
@param thd thread handler
@param tables List of tables with join conditions
@param in_update True if used in update command that may have CHECK OPTION
@returns false if success, true if error
*/
bool Query_block::setup_join_cond(THD *thd, mem_root_deque<Table_ref *> *tables,
bool in_update) {
DBUG_TRACE;
for (Table_ref *tr : *tables) {
// Traverse join conditions recursively
if (tr->nested_join != nullptr &&
setup_join_cond(thd, &tr->nested_join->m_tables, in_update))
return true;
Item **ref = tr->join_cond_ref();
Item *join_cond = tr->join_cond();
bool remove_cond = false;
if (join_cond) {
assert(join_cond->is_bool_func());
resolve_place = Query_block::RESOLVE_JOIN_NEST;
resolve_nest = tr;
thd->where = "on clause";
if ((!join_cond->fixed && join_cond->fix_fields(thd, ref)) ||
join_cond->check_cols(1))
return true;
cond_count++;
assert(tr->join_cond()->data_type() != MYSQL_TYPE_INVALID);
if ((*ref)->const_item() && !thd->lex->is_view_context_analysis() &&
!(*ref)->walk(&Item::is_non_const_over_literals, enum_walk::POSTFIX,
nullptr) &&
simplify_const_condition(thd, ref, remove_cond))
return true;
resolve_place = Query_block::RESOLVE_NONE;
resolve_nest = nullptr;
}
if (in_update) {
// Process CHECK OPTION
Table_ref *view = tr->top_table();
if (view->is_view() && view->is_merged()) {
if (view->prepare_check_option(thd))
return true; /* purecov: inspected */
tr->check_option = view->check_option;
}
}
}
return false;
}
/**
Set NESTED_JOIN::counter=0 in all nested joins in passed list.
@param join_list Pass NULL. Non-NULL is reserved for recursive inner calls,
then it is a list of nested joins to process, and may also contain base
tables which will be ignored.
*/
void Query_block::reset_nj_counters(mem_root_deque<Table_ref *> *join_list) {
DBUG_TRACE;
if (join_list == nullptr) join_list = &m_table_nest;
for (Table_ref *table : *join_list) {
NESTED_JOIN *nested_join;
if ((nested_join = table->nested_join)) {
nested_join->nj_counter = 0;
reset_nj_counters(&nested_join->m_tables);
}
}
}
/**
Simplify joins replacing outer joins by inner joins whenever it's
possible.
The function, during a retrieval of join_list, eliminates those
outer joins that can be converted into inner join, possibly nested.
It also moves the join conditions for the converted outer joins
and from inner joins to conds.
The function also calculates some attributes for nested joins:
-# used_tables
-# not_null_tables
-# dep_tables.
-# join_cond_dep_tables
The first two attributes are used to test whether an outer join can
be substituted by an inner join. The third attribute represents the
relation 'to be dependent on' for tables. If table t2 is dependent
on table t1, then in any evaluated execution plan table access to
table t2 must precede access to table t2. This relation is used also
to check whether the query contains invalid cross-references.
The fourth attribute is an auxiliary one and is used to calculate
dep_tables.
As the attribute dep_tables qualifies possibles orders of tables in the
execution plan, the dependencies required by the straight join
modifiers are reflected in this attribute as well.
The function also removes all parentheses that can be removed from the join
expression without changing its meaning.
@note
An outer join can be replaced by an inner join if the where condition
or the join condition for an embedding nested join contains a conjunctive
predicate rejecting null values for some attribute of the inner tables.
E.g. in the query:
@code
SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a WHERE t2.b < 5
@endcode
the predicate t2.b < 5 rejects nulls.
The query is converted first to:
@code
SELECT * FROM t1 INNER JOIN t2 ON t2.a=t1.a WHERE t2.b < 5
@endcode
then to the equivalent form:
@code
SELECT * FROM t1, t2 ON t2.a=t1.a WHERE t2.b < 5 AND t2.a=t1.a
@endcode
Similarly the following query:
@code
SELECT * from t1 LEFT JOIN (t2, t3) ON t2.a=t1.a t3.b=t1.b
WHERE t2.c < 5
@endcode
is converted to:
@code
SELECT * FROM t1, (t2, t3) WHERE t2.c < 5 AND t2.a=t1.a t3.b=t1.b
@endcode
One conversion might trigger another:
@code
SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a
LEFT JOIN t3 ON t3.b=t2.b
WHERE t3 IS NOT NULL =>
SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a, t3
WHERE t3 IS NOT NULL AND t3.b=t2.b =>
SELECT * FROM t1, t2, t3
WHERE t3 IS NOT NULL AND t3.b=t2.b AND t2.a=t1.a
@endcode
The function removes all unnecessary parentheses from the expression
produced by the conversions.
E.g.
@code
SELECT * FROM t1, (t2, t3) WHERE t2.c < 5 AND t2.a=t1.a AND t3.b=t1.b
@endcode
finally is converted to:
@code
SELECT * FROM t1, t2, t3 WHERE t2.c < 5 AND t2.a=t1.a AND t3.b=t1.b
@endcode
It also will remove parentheses from the following queries:
@code
SELECT * from (t1 LEFT JOIN t2 ON t2.a=t1.a) LEFT JOIN t3 ON t3.b=t2.b
SELECT * from (t1, (t2,t3)) WHERE t1.a=t2.a AND t2.b=t3.b.
@endcode
The benefit of this simplification procedure is that it might return
a query for which the optimizer can evaluate execution plans with more
join orders. With a left join operation the optimizer does not
consider any plan where one of the inner tables is before some of outer
tables.
IMPLEMENTATION
The function is implemented by a recursive procedure. On the recursive
ascent all attributes are calculated, all outer joins that can be
converted are replaced and then all unnecessary parentheses are removed.
As join list contains join tables in the reverse order sequential
elimination of outer joins does not require extra recursive calls.
SEMI-JOIN NOTES
Remove all semi-joins that have are within another semi-join (i.e. have
an "ancestor" semi-join nest)
EXAMPLES
Here is an example of a join query with invalid cross references:
@code
SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t3.a LEFT JOIN t3 ON t3.b=t1.b
@endcode
@param thd thread handler
@param join_list list representation of the join to be converted
@param top true <=> cond is the where condition
@param in_sj true <=> processing semi-join nest's children
@param[in,out] cond In: condition to which the join condition for converted
outer joins is to be added;
Out: new condition
@param changelog Don't specify this parameter, it is reserved for
recursive calls inside this function
@returns true for error, false for success
*/
bool Query_block::simplify_joins(THD *thd,
mem_root_deque<Table_ref *> *join_list,
bool top, bool in_sj, Item **cond,
uint *changelog) {
/*
Each type of change done by this function, or its recursive calls, is
tracked in a bitmap:
*/
enum change {
NONE = 0,
OUTER_JOIN_TO_INNER = 1 << 0,
JOIN_COND_TO_WHERE = 1 << 1,
PAREN_REMOVAL = 1 << 2,
SEMIJOIN = 1 << 3
};
uint changes = 0; // To keep track of changes.
if (changelog == nullptr) // This is the top call.
changelog = &changes;
Table_ref *prev_table = nullptr;
const bool straight_join = active_options() & SELECT_STRAIGHT_JOIN;
DBUG_TRACE;
/*
Try to simplify join operations from join_list.
The most outer join operation is checked for conversion first.
join_list is a join nest, and 'cond' is a condition which acts as a filter
applied to the nest's operation (post-filter).
Thus, considering this example:
(A LEFT JOIN B ON JC) WHERE W ,
we'll "confront W with A LEFT JOIN B": this will, recursively,
- confront W with B,
- confront W with A.
Because W is external to the nest, if W would be false when B is
NULL-complemented we know we can change LEFT JOIN to JOIN.
We will not confront JC with B or A, it wouldn't make sense, as JC isn't a
post-filter for their join operation.
Another example:
(A LEFT JOIN (B LEFT JOIN C ON JC2) ON JC1) WHERE W ,
while confronting W with (B LEFT JOIN C), we will also, as first step,
confront JC1 with (B LEFT JOIN C), and thus recursively confront JC1
with C and then with B.
Another example:
(A LEFT JOIN (B SEMI JOIN C ON JC2) ON JC1) WHERE W ,
while confronting W with (B SEMI JOIN C), if W is known false we will
*/
for (Table_ref *table : *join_list) {
table_map used_tables;
table_map not_null_tables = table_map(0);
NESTED_JOIN *nested_join = table->nested_join;
if (nested_join != nullptr) {
/*
If the element of join_list is a nested join apply
the procedure to its nested join list first.
This confronts the join nest's condition with each member of the
nest.
*/
if (table->join_cond() != nullptr) {
Item *join_cond = table->join_cond();
/*
If a join condition JC is attached to the table,
check all null rejected predicates in this condition.
If such a predicate over an attribute belonging to
an inner table of an embedded outer join is found,
the outer join is converted to an inner join and
the corresponding join condition is added to JC.
*/
if (simplify_joins(
thd, &nested_join->m_tables,
false, // not 'top' as it's not WHERE.
// SJ nests can dissolve into upper SJ or anti SJ nests:
in_sj || table->is_sj_or_aj_nest(), &join_cond, changelog))
return true;
if (join_cond != table->join_cond()) {
assert(join_cond != nullptr);
table->set_join_cond(join_cond);
/*
For a semi-join or anti-join table nest, if the join condition
has been reduced to a constant value, it means that factored out
join condition operands can be removed.
*/
if (table->is_sj_or_aj_nest() && join_cond->const_item()) {
clear_sj_expressions(nested_join);
}
}
}
nested_join->used_tables = table_map(0);
nested_join->not_null_tables = table_map(0);
// This recursively confronts "cond" with each member of the nest
if (simplify_joins(thd, &nested_join->m_tables,
top, // if it was WHERE it still is
in_sj || table->is_sj_or_aj_nest(), cond, changelog))
return true;
used_tables = nested_join->used_tables;
not_null_tables = nested_join->not_null_tables;
} else {
used_tables = table->map();
if (*cond != nullptr) not_null_tables = (*cond)->not_null_tables();
}
if (table->embedding != nullptr) {
table->embedding->nested_join->used_tables |= used_tables;
table->embedding->nested_join->not_null_tables |= not_null_tables;
}
if (!table->outer_join || (used_tables & not_null_tables)) {
/*
For some of the inner tables there are conjunctive predicates
that reject nulls => the outer join can be replaced by an inner join.
*/
if (table->outer_join) {
*changelog |= OUTER_JOIN_TO_INNER;
table->outer_join = false;
}
if (table->join_cond() != nullptr) {
*changelog |= JOIN_COND_TO_WHERE;
/* Add join condition to the WHERE or upper-level join condition. */
if (*cond != nullptr) {
Item *i1 = *cond;
Item *i2 = table->join_cond();
/*
User supplied stored procedures in the query can violate row-level
filter enforced by a view. So make sure view's filter conditions
precede any other conditions.
*/
if (table->is_view() && i1->has_stored_program()) {
std::swap(i1, i2);
}
Item_cond_and *new_cond =
down_cast<Item_cond_and *>(and_conds(i1, i2));
if (new_cond == nullptr) return true;
new_cond->apply_is_true();
/*
It is always a new item as both the upper-level condition and a
join condition existed
*/
assert(!new_cond->fixed);
Item *cond_after_fix = new_cond;
if (new_cond->fix_fields(thd, &cond_after_fix)) return true;
if (new_cond == cond_after_fix) {
}
*cond = cond_after_fix;
} else {
*cond = table->join_cond();
}
table->set_join_cond(nullptr);
}
}
// A table is traversed when 'cond' is WHERE, and when 'cond' is the join
// condition of any nest containing the table. Some bitmaps can be set
// only after all traversals of this table i.e. when 'cond' is WHERE.
if (!top) continue;
/*
Only inner tables of non-convertible outer joins remain with
the join condition.
*/
if (table->join_cond() != nullptr) {
table->dep_tables |= table->join_cond()->used_tables();
// At this point the joined tables always have an embedding join nest:
assert(table->embedding != nullptr);
table->dep_tables &= ~table->embedding->nested_join->used_tables;
// Embedding table depends on tables used in embedded join conditions.
table->embedding->join_cond_dep_tables |=
table->join_cond()->used_tables();
}
if (prev_table != nullptr) {
/* The order of tables is reverse: prev_table follows table */
if (prev_table->straight || straight_join)
prev_table->dep_tables |= used_tables;
if (prev_table->join_cond() != nullptr) {
prev_table->dep_tables |= table->join_cond_dep_tables;
table_map prev_used_tables = prev_table->nested_join != nullptr
? prev_table->nested_join->used_tables
: prev_table->map();
/*
If join condition contains no reference to outer tables
we still make the inner tables dependent on the outer tables,
as the outer must go before the inner since the executor requires
that at least one outer table is before the inner tables.
It would be enough to set dependency only on one outer table
for them. Yet this is really a rare case.
Note:
PSEUDO_TABLE_BITS mask should not be counted as it
prevents update of inner table dependencies.
For example it might happen if RAND()/COUNT(*) function
is used in JOIN ON clause.
*/
if ((((prev_table->join_cond()->used_tables() & ~PSEUDO_TABLE_BITS) &
~prev_used_tables) &
used_tables) == 0) {
prev_table->dep_tables |= used_tables;
}
}
}
prev_table = table;
}
/*
Flatten nested joins that can be flattened.
no join condition and not a semi-join => can be flattened.
*/
for (auto li = join_list->begin(); li != join_list->end();) {
Table_ref *table = *li;
NESTED_JOIN *nested_join = table->nested_join;
if (table->is_sj_nest() && !in_sj) {
/*
If this is a semi-join that is not contained within another semi-join,
leave it intact.
Otherwise it is flattened, for example
A SJ (B SJ (C)) becomes the equivalent A SJ (B JOIN C),
A AJ (B SJ (C)) becomes the equivalent A AJ (B JOIN C),
While dissolving a SJ nest into an AJ nest is ok (for the AJ
this may lead to duplicates but AJ only cares for "at least
one match"), dissolving an AJ nest into a SJ is not ok:
A SJ (B AJ (C)) is not equivalent to A SJ (B JOIN C);
that is why the next if() block is guarded by !join_cond() which takes
care of that.
Note that when dissolving the SJ nest, its condition isn't lost as it
has previously been added to WHERE or outer nest's condition in
convert_subquery_to_semijoin().
*/
*changelog |= SEMIJOIN;
} else if (nested_join != nullptr && table->join_cond() == nullptr) {
*changelog |= PAREN_REMOVAL;
for (Table_ref *tbl : nested_join->m_tables) {
tbl->embedding = table->embedding;
tbl->join_list = table->join_list;
tbl->dep_tables |= table->dep_tables;
}
li = join_list->erase(li);
li = join_list->insert(li, nested_join->m_tables.begin(),
nested_join->m_tables.end());
// Don't advance li; we want to process the newly added tables.
continue;
}
++li;
}
if (changes) {
Opt_trace_context *trace = &thd->opt_trace;
if (unlikely(trace->is_started())) {
Opt_trace_object trace_wrapper(trace);
Opt_trace_object trace_object(trace, "transformations_to_nested_joins");
{
Opt_trace_array trace_changes(trace, "transformations");
if (changes & SEMIJOIN) trace_changes.add_alnum("semijoin");
if (changes & OUTER_JOIN_TO_INNER)
trace_changes.add_alnum("outer_join_to_inner_join");
if (changes & JOIN_COND_TO_WHERE)
trace_changes.add_alnum("JOIN_condition_to_WHERE");
if (changes & PAREN_REMOVAL)
trace_changes.add_alnum("parenthesis_removal");
}
// the newly transformed query is worth printing
opt_trace_print_expanded_query(thd, this, &trace_object);
}
}
return false;
}
/**
Record join nest info in the select block.
After simplification of inner join, outer join and semi-join structures:
- record the remaining semi-join structures in the enclosing query block.
- record transformed join conditions in Table_ref objects.
This function is called recursively for each join nest and/or table
in the query block.
@param tables List of tables and join nests
@return False if successful, True if failure
*/
bool Query_block::record_join_nest_info(mem_root_deque<Table_ref *> *tables) {
for (Table_ref *table : *tables) {
if (table->nested_join == nullptr) {
if (table->join_cond()) outer_join |= table->map();
continue;
}
if (record_join_nest_info(&table->nested_join->m_tables)) return true;
/*
sj_inner_tables is set properly later in pull_out_semijoin_tables().
This assignment is required in case pull_out_semijoin_tables()
is not called.
*/
if (table->is_sj_or_aj_nest())
table->sj_inner_tables = table->nested_join->used_tables;
if (table->is_sj_or_aj_nest()) {
sj_nests.push_back(table);
}
if (table->join_cond()) outer_join |= table->nested_join->used_tables;
}
return false;
}
/**
Update table reference information for conditions and expressions due to
query blocks having been merged in from derived tables/views and due to
semi-join transformation.
This is needed for two reasons:
1. Since table numbers are changed, we need to update used_tables
information for all conditions and expressions that are possibly touched.
2. For semi-join, some column references are changed from outer references
to local references.
The function needs to recursively walk down into join nests,
in order to cover all conditions and expressions.
For a semi-join, tables from the subquery are added last in the query block.
This means that conditions and expressions from the outer query block
are unaffected. But all conditions inside the semi-join nest, including
join conditions, must have their table numbers changed.
For a derived table/view, tables from the subquery are merged into the
outer query, and this function is called for every derived table that is
merged in. This algorithm only works when derived tables are merged in
the order of their original table numbers.
A hypothetical example with a triple self-join over a mergeable view:
CREATE VIEW v AS SELECT t1.a, t2.b FROM t1 JOIN t2 USING (a);
SELECT v1.a, v1.b, v2.b, v3.b
FROM v AS v1 JOIN v AS v2 ON ... JOIN v AS v3 ON ...;
The analysis starts with three tables v1, v2 and v3 having numbers 0, 1, 2.
First we merge in v1, so we get (t1, t2, v2, v3). v2 and v3 are shifted up.
Tables from v1 need to have their table numbers altered (actually they do not
since both old and new numbers are 0 and 1, but this is a special case).
v2 and v3 are not merged in yet, so we delay pullout on them until they
are merged. Conditions and expressions from the outer query are not resolved
yet, so regular resolving will take of them later.
Then we merge in v2, so we get (t1, t2, t1, t2, v3). The tables from this
view gets numbers 2 and 3, and v3 gets number 4.
Because v2 had a higher number than the tables from v1, the join nest
representing v1 is unaffected. And v3 is still not merged, so the only
join nest we need to consider is v2.
Finally we merge in v3, and then we have tables (t1, t2, t1, t2, t1, t2),
with numbers 0 through 5.
Again, since v3 has higher number than any of the already merged in views,
only this join nest needs the pullout.
@param parent_query_block Query block being merged into
@param removed_query_block Query block that is removed (subquery)
@param tr Table object this pullout is applied to
@param table_adjust Number of positions that a derived table nest is
adjusted, used to fix up semi-join related fields.
Tables are adjusted from position N to N+table_adjust
@param lateral_deps Lateral dependencies of the unit owning
removed_query_block
*/
static void fix_tables_after_pullout(Query_block *parent_query_block,
Query_block *removed_query_block,
Table_ref *tr, uint table_adjust,
table_map lateral_deps) {
if (tr->is_merged()) {
// Update select list of merged derived tables:
for (Field_translator *transl = tr->field_translation;
transl < tr->field_translation_end; transl++) {
assert(transl->item->fixed);
transl->item->fix_after_pullout(parent_query_block, removed_query_block);
}
// Update used table info for the WHERE clause of the derived table
assert(!tr->derived_where_cond || tr->derived_where_cond->fixed);
if (tr->derived_where_cond)
tr->derived_where_cond->fix_after_pullout(parent_query_block,
removed_query_block);
}
/*
If join_cond() is fixed, it contains a join condition from a subquery
that has already been resolved. Call fix_after_pullout() to update
used table information since table numbers may have changed.
If join_cond() is not fixed, it contains a condition that was generated
in the derived table merge operation, which will be fixed later.
This condition may also contain a fixed part, but this is saved as
derived_where_cond and is pulled out explicitly.
*/
if (tr->join_cond() && tr->join_cond()->fixed)
tr->join_cond()->fix_after_pullout(parent_query_block, removed_query_block);
if (tr->nested_join) {
// In case a derived table is merged-in, these fields need adjustment:
tr->nested_join->sj_corr_tables <<= table_adjust;
tr->nested_join->sj_depends_on <<= table_adjust;
// If the removed query block is from a LATERAL derived table, and
// contains a semi-join nest, this nest may depend on the lateral
// dependencies, and if then, these should now be recorded as
// local dependencies of the nest. But it's impossible to know if this is
// the case, as the members below don't mention outer references. Be
// conservative and add dependencies unconditionally. At least this will
// prevent materialization.
tr->nested_join->sj_corr_tables |= lateral_deps;
tr->nested_join->sj_depends_on |= lateral_deps;
for (Table_ref *child : tr->nested_join->m_tables) {
fix_tables_after_pullout(parent_query_block, removed_query_block, child,
table_adjust, lateral_deps);
}
}
if (tr->is_derived() && tr->table &&
tr->derived_query_expression()->uncacheable & UNCACHEABLE_DEPENDENT) {
/*
It's a materialized derived table which is being pulled up.
If it has an outer reference, and this ref belongs to parent_query_block,
then the derived table will need re-materialization as if it were
LATERAL, not just once per execution of parent_query_block.
We thus compute its used_tables in the new context, to decide.
*/
Query_expression *unit = tr->derived_query_expression();
unit->m_lateral_deps = OUTER_REF_TABLE_BIT;
unit->fix_after_pullout(parent_query_block, removed_query_block);
unit->m_lateral_deps &= ~PSEUDO_TABLE_BITS;
tr->dep_tables |= unit->m_lateral_deps;
/*
If m_lateral_deps!=0, some outer ref is now a neighbour in FROM: we have
made 'tr' LATERAL.
Note that 'tr' might be a common table expression: it means we now have a
"lateral CTE".
*/
}
}
/**
Fix used tables information for a subquery after query transformations.
This is for transformations where the subquery remains a subquery - it is
not merged, it merely moves up by effect of a transformation on a containing
query block.
Most actions here involve re-resolving information for conditions
and items belonging to the subquery.
If the subquery contains an outer reference into removed_query_block or
parent_query_block, the relevant information is updated by
Item_ident::fix_after_pullout().
*/
void Query_expression::fix_after_pullout(Query_block *parent_query_block,
Query_block *removed_query_block)
{
// Go through all query specification objects of the subquery and re-resolve
// all relevant expressions belonging to them.
for (Query_block *sel = first_query_block(); sel;
sel = sel->next_query_block()) {
sel->fix_after_pullout(parent_query_block, removed_query_block);
}
// @todo figure out if we need to do it for fake_query_block too.
}
/// @see Query_expression::fix_after_pullout
void Query_block::fix_after_pullout(Query_block *parent_query_block,
Query_block *removed_query_block) {
if (where_cond())
where_cond()->fix_after_pullout(parent_query_block, removed_query_block);
/*
Join conditions can contain an outer reference; and
derived table merging changes WHERE to a join condition, which thus can
have an outer reference. So we have to call fix_after_pullout() on join
conditions. The reference may also be located in a derived table used by
this subquery. fix_tables_after_pullout() will handle the two cases.
table_adjust and lateral_deps are 0 because we're not merging these tables
up.
*/
for (Table_ref *tr : m_table_nest) {
fix_tables_after_pullout(parent_query_block, removed_query_block, tr,
/*table_adjust=*/0, /*lateral_deps=*/0);
}
if (having_cond())
having_cond()->fix_after_pullout(parent_query_block, removed_query_block);
for (Item *item : visible_fields()) {
item->fix_after_pullout(parent_query_block, removed_query_block);
}
/* Re-resolve ORDER BY and GROUP BY fields */
for (ORDER *order = order_list.first; order; order = order->next)
(*order->item)->fix_after_pullout(parent_query_block, removed_query_block);
for (ORDER *group = group_list.first; group; group = group->next)
(*group->item)->fix_after_pullout(parent_query_block, removed_query_block);
}
/**
Remove SJ outer/inner expressions.
@param nested_join join nest
*/
void Query_block::clear_sj_expressions(NESTED_JOIN *nested_join) {
nested_join->sj_outer_exprs.clear();
nested_join->sj_inner_exprs.clear();
assert(sj_nests.empty());
}
/**
Build equality conditions using outer expressions and inner
expressions. If the equality condition is not constant, add
it to the semi-join condition. Otherwise, evaluate it and
remove the constant expressions from the
outer/inner expressions list if the result is true. If the
result is false, remove all the expressions in outer/inner
expression list and attach an always false condition
to semijoin condition.
@param thd Thread context
@param nested_join Join nest
@param subq_query_block Query block for the subquery
@param outer_tables_map Map of tables from original outer query block
@param[in,out] sj_cond Semi-join condition to be constructed
Contains non-equalities on input.
@param[out] simple_const true if the returned semi-join condition is
a simple true or false predicate, false otherwise.
@return false if success, true if error
*/
bool Query_block::build_sj_cond(THD *thd, NESTED_JOIN *nested_join,
Query_block *subq_query_block,
table_map outer_tables_map, Item **sj_cond,
bool *simple_const) {
*simple_const = false;
Item *new_cond = nullptr;
auto ii = nested_join->sj_inner_exprs.begin();
auto oi = nested_join->sj_outer_exprs.begin();
while (ii != nested_join->sj_inner_exprs.end() &&
oi != nested_join->sj_outer_exprs.end()) {
bool should_remove = false;
Item *inner = *ii;
Item *outer = *oi;
/*
Ensure that all involved expressions are pulled out after transformation.
(If they are already out, this is a no-op).
*/
outer->fix_after_pullout(this, subq_query_block);
inner->fix_after_pullout(this, subq_query_block);
Item_func_eq *item_eq = new Item_func_eq(outer, inner);
if (item_eq == nullptr) return true; /* purecov: inspected */
Item *predicate = item_eq;
if (!item_eq->fixed && item_eq->fix_fields(thd, &predicate)) return true;
// Evaluate if the condition is on const expressions
if (predicate->const_item() &&
!(predicate)->walk(&Item::is_non_const_over_literals,
enum_walk::POSTFIX, nullptr)) {
bool cond_value = true;
/* Push ignore / strict error handler */
Ignore_error_handler ignore_handler;
Strict_error_handler strict_handler;
if (thd->lex->is_ignore())
thd->push_internal_handler(&ignore_handler);
else if (thd->is_strict_mode())
thd->push_internal_handler(&strict_handler);
bool err = eval_const_cond(thd, predicate, &cond_value);
/* Pop ignore / strict error handler */
if (thd->lex->is_ignore() || thd->is_strict_mode())
thd->pop_internal_handler();
if (err) return true;
if (cond_value) {
/*
Remove the expression from inner/outer expression list if the
const condition evaluates to true as Item_cond::fix_fields will
remove the condition later.
*/
should_remove = true;
} else {
/*
Remove all the expressions in inner/outer expression list if
one of condition evaluates to always false. Add an always false
condition to semi-join condition.
*/
nested_join->sj_inner_exprs.clear();
nested_join->sj_outer_exprs.clear();
Item *new_item = new Item_func_false();
if (new_item == nullptr) return true;
(*sj_cond) = new_item;
*simple_const = true;
return false;
}
}
/*
If the selected expression has a reference to our query block, add it as
a non-trivially correlated reference (to avoid materialization).
The case of yet-more-outer references is handled like this:
- if this nest is part of a LATERAL derived table, which is later
merged, fix_tables_after_pullout will update sj_corr_tables (with its
lateral_deps argument).
- if this nest is part of a subquery which later becomes a
semi/anti-join nest, it will be dissolved into the new parent nest, so
the inner nest's sj_corr_tables will be unused, while the parent's
will be correct as it will be computed from the concatenated new WHERE
condition.
*/
nested_join->sj_corr_tables |= inner->used_tables() & outer_tables_map;
if (should_remove) {
ii = nested_join->sj_inner_exprs.erase(ii);
oi = nested_join->sj_outer_exprs.erase(oi);
} else {
new_cond = and_items(new_cond, predicate);
if (new_cond == nullptr) return true; /* purecov: inspected */
++ii, ++oi;
}
}
/*
Semijoin processing expects at least one inner/outer expression
in the list if there is a sj_nest present. This is required for semi-join
materialization and loose scan.
*/
if (nested_join->sj_inner_exprs.empty()) {
Item *const_item = new Item_int(1);
if (const_item == nullptr) return true;
nested_join->sj_inner_exprs.push_back(const_item);
nested_join->sj_outer_exprs.push_back(const_item);
new_cond = new Item_func_true();
if (new_cond == nullptr) return true;
*simple_const = true;
}
(*sj_cond) = and_items(*sj_cond, new_cond);
if (*sj_cond == nullptr) return true; /* purecov: inspected */
return false;
}
/// Context object used by semijoin equality decorrelation code.
class Semijoin_decorrelation {
mem_root_deque<Item *> *sj_outer_exprs, *sj_inner_exprs;
/// If nullptr: only a=b is decorrelated.
/// Otherwise, a OP b is decorrelated for OP in <>, >=, >, <=, <, and
/// for each decorrelated SJ outer/inner pair, located at position N
/// in sj_outer_exprs and sj_inner_exprs, we store, at the
/// same position in op_types, the operator's type code representing "outer OP
/// inner" (for example, LE_FUNC for outer<=inner as well as inner>=outer).
Mem_root_array<Item_func::Functype> *op_types;
public:
Semijoin_decorrelation(mem_root_deque<Item *> *sj_outer_exprs_arg,
mem_root_deque<Item *> *sj_inner_exprs_arg,
Mem_root_array<Item_func::Functype> *op_types_arg)
: sj_outer_exprs(sj_outer_exprs_arg),
sj_inner_exprs(sj_inner_exprs_arg),
op_types(op_types_arg) {}
void add_outer(Item *i) { sj_outer_exprs->push_back(i); }
void add_inner(Item *i) { sj_inner_exprs->push_back(i); }
bool decorrelate_only_eq() const { return op_types == nullptr; }
bool add_op_type(Item_func::Functype op_type) {
return (op_types != nullptr) ? op_types->push_back(op_type) : false;
}
Item_func::Functype op_type_at(int j) const {
return (op_types != nullptr) ? op_types->at(j) : Item_func::EQ_FUNC;
}
};
/**
Try to decorrelate an (in)equality node. The node can be decorrelated if one
argument contains only outer references and the other argument contains
references only to local tables.
Both arguments should be deterministic.
const-for-execution values are accepted in both arguments.
@note that a predicate like '(a,b) IN ((c,d))' is changed to two equalities
only during optimization, so at the present stage it isn't decorrelate-able.
@param sj_decor Object for recording the decorrelated expressions
@param func The query function node
@param[out] was_correlated = true if comparison is correlated and the
the expressions are added to sj_nest.
@returns false if success, true if error
*/
static bool decorrelate_equality(Semijoin_decorrelation &sj_decor,
Item_func *func, bool *was_correlated) {
*was_correlated = false;
Item_bool_func2 *bool_func = down_cast<Item_bool_func2 *>(func);
Item *const left = bool_func->arguments()[0];
Item *const right = bool_func->arguments()[1];
Item *inner = nullptr;
Item *outer = nullptr;
table_map left_used_tables = left->used_tables() & ~INNER_TABLE_BIT;
table_map right_used_tables = right->used_tables() & ~INNER_TABLE_BIT;
/*
Predicates that have non-deterministic elements are not decorrelated,
see explanation for Query_block::decorrelate_condition().
*/
if ((left_used_tables & RAND_TABLE_BIT) ||
(right_used_tables & RAND_TABLE_BIT))
return false;
if (left_used_tables == OUTER_REF_TABLE_BIT) {
outer = left;
} else if (!(left_used_tables & OUTER_REF_TABLE_BIT)) {
inner = left;
}
if (right_used_tables == OUTER_REF_TABLE_BIT) {
outer = right;
} else if (!(right_used_tables & OUTER_REF_TABLE_BIT)) {
inner = right;
}
if (inner == nullptr || outer == nullptr) return false;
// Equalities over row items cannot be decorrelated
if (outer->type() == Item::ROW_ITEM) return false;
sj_decor.add_outer(outer);
sj_decor.add_inner(inner);
if (sj_decor.add_op_type(
// use canonical form "outer OP inner":
(outer == left) ? bool_func->functype() : bool_func->rev_functype()))
return true;
*was_correlated = true;
return false;
}
static inline bool can_decorrelate_operator(Item_func *func, bool only_eq) {
auto op_type = func->functype();
switch (op_type) {
case Item_func::EQ_FUNC:
return true;
case Item_func::NE_FUNC:
case Item_func::LT_FUNC:
case Item_func::LE_FUNC:
case Item_func::GT_FUNC:
case Item_func::GE_FUNC:
return !only_eq;
default:
return false;
}
}
/**
Decorrelate the WHERE clause or a join condition of a subquery used in
an IN or EXISTS predicate.
Correlated predicates are removed from the condition and added to the
supplied semi-join nest.
The predicate must be either a simple (in)equality, or an AND condition that
contains one or more simple equalities, in order for decorrelation to be
possible.
@param sj_decor Object for recording the decorrelated expressions
@param join_nest Nest containing join condition to be decorrelated
=NULL: decorrelate the WHERE condition
@returns false if success, true if error
Decorrelation for subqueries containing non-deterministic components:
--------------------------------------------------------------------
There are two types of IN and EXISTS queries with non-deterministic
functions that may be meaningful (the EXISTS queries below are correlated
equivalents of the respective IN queries):
1. Non-deterministic function as substitute for expression from outer
query block:
A SELECT * FROM t1
WHERE RAND() IN (SELECT t2.x FROM t2)
B SELECT * FROM t1
WHERE EXISTS (SELECT * FROM t2 WHERE RAND() = t2.x);
Pick a set of random rows that matches against a fixed set (the subquery).
The intuitive interpretation of the IN subquery is that the random function
is evaluated per row of the outer query block, whereas in the EXISTS subquery,
it should be evaluated per row of the inner query block, and the subquery
is evaluated once per row of the outer query block.
2. Non-deterministic function as substitute for expression from inner
query block:
A SELECT * FROM t1
WHERE t1.x IN (SELECT RAND() FROM t2)
B SELECT * FROM t1
WHERE EXISTS (SELECT * FROM t2 WHERE RAND() = t1.x);
This is another way of picking a random row, but now the non-determinism
occurs in the inner query block.
The user will expect that only query 1A has the evaluation of
non-deterministic functions being performed in the outer query block.
Using decorrelation for query 1B would change the apparent semantics of
the query.
The purpose of decorrelation is to be able to use more execution strategies.
Without decorrelation, EXISTS is limited to FirstMatch and DupsWeedout
strategies. Decorrelation enables LooseScan and Materialization.
We can rule out LooseScan for case 2B, since it requires an indexed column
from the subquery, and for case 1B, since it requires that the outer table
is partitioned according to the distinct values of the index, and random
values do not fulfill that partitioning requirement.
The only strategy left is Materialization. With decorrelation, 1B would be
evaluated like 1A, which is not the intuitive way. 2B would also be
implemented like 2A, meaning that evaluation of non-deterministic functions
would move to the materialization function.
Thus, the intuitive interpretation is to avoid materialization for subqueries
with non-deterministic components in the inner query block, and hence
such predicates will not be decorrelated.
*/
bool Query_block::decorrelate_condition(Semijoin_decorrelation &sj_decor,
Table_ref *join_nest) {
Item *base_cond =
join_nest == nullptr ? where_cond() : join_nest->join_cond();
Item_cond *cond;
Item_func *func;
assert(base_cond != nullptr);
if (base_cond->type() == Item::FUNC_ITEM &&
(func = down_cast<Item_func *>(base_cond)) &&
can_decorrelate_operator(func, sj_decor.decorrelate_only_eq())) {
bool was_correlated;
if (decorrelate_equality(sj_decor, func, &was_correlated)) return true;
if (was_correlated) { // The simple equality has been decorrelated
if (join_nest == nullptr)
set_where_cond(nullptr);
else // Join conditions cannot be empty so install a TRUE value
join_nest->set_join_cond(new Item_func_true());
}
} else if (base_cond->type() == Item::COND_ITEM &&
(cond = down_cast<Item_cond *>(base_cond)) &&
cond->functype() == Item_func::COND_AND_FUNC) {
List<Item> *args = cond->argument_list();
List_iterator<Item> li(*args);
Item *item;
while ((item = li++)) {
if (item->type() == Item::FUNC_ITEM &&
(func = down_cast<Item_func *>(item)) &&
can_decorrelate_operator(func, sj_decor.decorrelate_only_eq())) {
bool was_correlated;
if (decorrelate_equality(sj_decor, func, &was_correlated)) return true;
if (was_correlated) li.remove();
}
}
if (args->is_empty()) { // All predicates have been decorrelated
if (join_nest == nullptr)
set_where_cond(nullptr);
else // Join conditions cannot be empty so install a TRUE value
join_nest->set_join_cond(new Item_func_true());
}
}
return false;
}
bool Query_block::allocate_grouping_sets(THD *thd) {
auto max_group_by_elements = GetMaximumNumGrpByColsSupported(olap);
if (group_list.elements > static_cast<uint>(max_group_by_elements)) {
/* The number of Grouping sets cannot be greater than INT_MAX as IsBitSet
* take integer as the input bit*/
my_error(ER_TOO_MANY_GROUP_BY_MODIFIER_BRANCHES, MYF(0),
GroupByModifierString(olap), max_group_by_elements);
return true;
}
m_num_grouping_sets = (olap == ROLLUP_TYPE)
? group_list.elements + 1
: pow(static_cast<double>(2),
static_cast<double>(group_list.elements));
assert(m_num_grouping_sets != 0);
/* Allocate bitmap for grouping sets. */
for (ORDER *grp = group_list.first; grp != nullptr; grp = grp->next) {
grp->grouping_set_info =
pointer_cast<MY_BITMAP *>(thd->alloc(sizeof(MY_BITMAP)));
if (grp->grouping_set_info == nullptr) {
return true;
}
my_bitmap_map *bitbuf = pointer_cast<my_bitmap_map *>(
thd->alloc(bitmap_buffer_size(m_num_grouping_sets)));
bitmap_init(grp->grouping_set_info, bitbuf, m_num_grouping_sets);
}
return false;
}
/**
Populate the grouping set bitvector if the query block has non-primitive
grouping. If the non-primitive grouping is ROLLUP or CUBE, the grouping sets
have to be computed. The representation of the grouping set is done using a
bitfield in the ORDER object.
case ROLLUP : Say the query has GROUP BY ROLLUP (a,b) then the grouping sets
will be (a,b) (a) () where () represents single group aggregate without any
grouping. Here there are 3 grouping sets ranging from 0 to 2 and 0 is the
single group aggregate. The bitfield associated with GROUP BY element 'a'
will be 3 (i,e. 2+1) The bitfield associated with Group by element 'b' will
be 2 as it is part of only set number 2.
case CUBE: Say the query has GROUP BY CUBE (a,b) then the grouping sets
will be (a,b) (a) (b) (). The number of grouping sets will be (2^n)
where n is the number of elements in the GROUP BY list. The bitfield
associated with Group by element 'a' will be 6 (i.e. 4+2).
The bitfield associated with Group by element 'b' will be 1.
*/
bool Query_block::populate_grouping_sets(THD *thd) {
assert(group_list.elements != 0 && olap != UNSPECIFIED_OLAP_TYPE);
if (allocate_grouping_sets(thd)) {
return true;
}
bool rollup = (olap == ROLLUP_TYPE);
int gby_idx = 0;
for (ORDER *grp = group_list.first; grp != nullptr;
grp = grp->next, gby_idx++) {
for (int gs = 1; gs < m_num_grouping_sets; gs++) {
if ((rollup && gby_idx < gs) || (!rollup && IsBitSet(gby_idx, gs))) {
bitmap_set_bit(grp->grouping_set_info, gs);
}
}
}
return false;
}
bool walk_join_list(mem_root_deque<Table_ref *> &list,
std::function<bool(Table_ref *)> action) {
for (Table_ref *tl : list) {
if (action(tl)) return true;
if (tl->nested_join != nullptr &&
walk_join_list(tl->nested_join->m_tables, action))
return true;
}
return false;
}
/**
Builds the list of SJ outer/inner expressions
@param thd Connection handle
@param[out] sj_outer_exprs Will add outer expressions here
@param[out] sj_inner_exprs Will add inner expressions here
@param subq_pred Item for the subquery
@param subq_query_block Single query block for the subquery
@returns true if error
*/
static bool build_sj_exprs(THD *thd, mem_root_deque<Item *> *sj_outer_exprs,
mem_root_deque<Item *> *sj_inner_exprs,
Item_exists_subselect *subq_pred,
Query_block *subq_query_block) {
Item_in_subselect *in_subq_pred = down_cast<Item_in_subselect *>(subq_pred);
assert(in_subq_pred->left_expr->fixed);
/*
We have a special case for IN predicates with a scalar subquery or a
row subquery in the predicand (left operand), such as this:
(SELECT 1,2 FROM t1) IN (SELECT x,y FROM t2)
We cannot make the join condition 1=x AND 2=y, since that might evaluate
to true even if t1 is empty. Instead make the join condition
(SELECT 1,2 FROM t1) = (x,y) in this case.
*/
Item_subselect *left_subquery =
(in_subq_pred->left_expr->type() == Item::SUBQUERY_ITEM)
? static_cast<Item_subselect *>(in_subq_pred->left_expr)
: nullptr;
if (left_subquery &&
(left_subquery->subquery_type() == Item_subselect::SCALAR_SUBQUERY)) {
mem_root_deque<Item *> ref_list(thd->mem_root);
Item *header = subq_query_block->base_ref_items[0];
for (uint i = 1; i < in_subq_pred->left_expr->cols(); i++) {
ref_list.push_back(subq_query_block->base_ref_items[i]);
}
Item_row *right_expr = new Item_row(header, ref_list);
if (!right_expr) return true; /* purecov: inspected */
sj_outer_exprs->push_back(in_subq_pred->left_expr);
sj_inner_exprs->push_back(right_expr);
} else {
for (uint i = 0; i < in_subq_pred->left_expr->cols(); i++) {
Item *const li = in_subq_pred->left_expr->element_index(i);
sj_outer_exprs->push_back(li);
sj_inner_exprs->push_back(subq_query_block->base_ref_items[i]);
}
}
return false;
}
/**
Convert a subquery predicate of this query block into a Table_ref
semi-join nest.
@param thd Thread handle
@param subq_pred Subquery predicate to be converted.
This is either an IN, =ANY or EXISTS predicate, possibly
negated.
@returns false if success, true if error
The following transformations are performed:
1. IN/=ANY predicates on the form:
@code
SELECT ...
FROM ot1 ... otN
WHERE (oe1, ... oeM) IN (SELECT ie1, ..., ieM
FROM it1 ... itK
[WHERE inner-cond])
[AND outer-cond]
[GROUP BY ...] [HAVING ...] [ORDER BY ...]
@endcode
are transformed into:
@code
SELECT ...
FROM (ot1 ... otN) SJ (it1 ... itK)
ON (oe1, ... oeM) = (ie1, ..., ieM)
[AND inner-cond]
[WHERE outer-cond]
[GROUP BY ...] [HAVING ...] [ORDER BY ...]
@endcode
Notice that the inner-cond may contain correlated and non-correlated
expressions. Further transformations will analyze and break up such
expressions.
2. EXISTS predicates on the form:
@code
SELECT ...
FROM ot1 ... otN
WHERE EXISTS (SELECT expressions
FROM it1 ... itK
[WHERE inner-cond])
[AND outer-cond]
[GROUP BY ...] [HAVING ...] [ORDER BY ...]
@endcode
are transformed into:
@code
SELECT ...
FROM (ot1 ... otN) SJ (it1 ... itK)
[ON inner-cond]
[WHERE outer-cond]
[GROUP BY ...] [HAVING ...] [ORDER BY ...]
@endcode
3. Negated EXISTS predicates on the form:
@code
SELECT ...
FROM ot1 ... otN
WHERE NOT EXISTS (SELECT expressions
FROM it1 ... itK
[WHERE inner-cond])
[AND outer-cond]
[GROUP BY ...] [HAVING ...] [ORDER BY ...]
@endcode
are transformed into:
@code
SELECT ...
FROM (ot1 ... otN) AJ (it1 ... itK)
[ON inner-cond]
[WHERE outer-cond AND is-null-cond(it1)]
[GROUP BY ...] [HAVING ...] [ORDER BY ...]
@endcode
where AJ means "antijoin" and is like a LEFT JOIN; and is-null-cond is
false if the row of it1 is "found" and "not_null_compl" (i.e. matches
inner-cond).
4. Negated IN predicates on the form:
@code
SELECT ...
FROM ot1 ... otN
WHERE (oe1, ... oeM) NOT IN (SELECT ie1, ..., ieM
FROM it1 ... itK
[WHERE inner-cond])
[AND outer-cond]
[GROUP BY ...] [HAVING ...] [ORDER BY ...]
@endcode
are transformed into:
@code
SELECT ...
FROM (ot1 ... otN) AJ (it1 ... itK)
ON (oe1, ... oeM) = (ie1, ..., ieM)
[AND inner-cond]
[WHERE outer-cond]
[GROUP BY ...] [HAVING ...] [ORDER BY ...]
@endcode
5. The cases 1/2 (respectively 3/4) above also apply when the predicate is
decorated with IS TRUE or IS NOT FALSE (respectively IS NOT TRUE or IS
FALSE).
*/
bool Query_block::convert_subquery_to_semijoin(
THD *thd, Item_exists_subselect *subq_pred) {
Table_ref *emb_tbl_nest = nullptr;
mem_root_deque<Table_ref *> *emb_join_list = &m_table_nest;
DBUG_TRACE;
assert(subq_pred->subquery_type() == Item_subselect::IN_SUBQUERY ||
subq_pred->subquery_type() == Item_subselect::EXISTS_SUBQUERY);
Opt_trace_context *trace = &thd->opt_trace;
Opt_trace_object trace_object(trace, "transformation_to_semi_join");
if (unlikely(trace->is_started())) {
trace_object.add("subquery_predicate", subq_pred);
}
bool outer_join = false; // True if predicate is inner to an outer join
// Save the set of tables in the outer query block:
table_map outer_tables_map = all_tables_map();
const bool do_aj = subq_pred->can_do_aj;
/*
Find out where to insert the semi-join nest and the generated condition.
For t1 LEFT JOIN t2, embedding_join_nest will be t2.
Note that t2 may be a simple table or may itself be a join nest
(e.g. in the case t1 LEFT JOIN (t2 JOIN t3))
*/
if (subq_pred->embedding_join_nest != nullptr) {
// Is this on inner side of an outer join?
outer_join = subq_pred->embedding_join_nest->is_inner_table_of_outer_join();
if (subq_pred->embedding_join_nest->nested_join) {
/*
We're dealing with
... [LEFT] JOIN ( ... ) ON (subquery AND condition) ...
The sj-nest will be inserted into the brackets nest.
*/
emb_tbl_nest = subq_pred->embedding_join_nest;
emb_join_list = &emb_tbl_nest->nested_join->m_tables;
} else if (!subq_pred->embedding_join_nest->outer_join) {
/*
We're dealing with
... INNER JOIN tblX ON (subquery AND condition) ...
The sj-nest will be tblX's "sibling", i.e. another child of its
parent. This is ok because tblX is joined as an inner join.
*/
emb_tbl_nest = subq_pred->embedding_join_nest->embedding;
if (emb_tbl_nest) emb_join_list = &emb_tbl_nest->nested_join->m_tables;
} else {
Table_ref *outer_tbl = subq_pred->embedding_join_nest;
/*
We're dealing with
... LEFT JOIN tbl ON (on_expr AND subq_pred) ...
tbl will be replaced with:
( tbl SJ (subq_tables) )
| |
|<----- wrap_nest ---->|
giving:
... LEFT JOIN ( tbl SJ (subq_tables) ) ON (on_expr AND subq_pred) ...
Q: other subqueries may be pointing to this element. What to do?
A1: simple solution: copy *subq_pred->embedding_join_nest= *parent_nest.
But we'll need to fix other pointers.
A2: Another way: have Table_ref::next_ptr so the following
subqueries know the table has been nested.
A3: changes in the Table_ref::outer_join will make everything work
automatically.
*/
Table_ref *const wrap_nest = Table_ref::new_nested_join(
thd->mem_root, "(sj-wrap)", outer_tbl->embedding,
outer_tbl->join_list, this);
if (wrap_nest == nullptr) return true;
wrap_nest->nested_join->m_tables.push_back(outer_tbl);
outer_tbl->embedding = wrap_nest;
outer_tbl->join_list = &wrap_nest->nested_join->m_tables;
/*
wrap_nest will take place of outer_tbl, so move the outer join flag
and join condition.
*/
wrap_nest->outer_join = outer_tbl->outer_join;
outer_tbl->outer_join = false;
wrap_nest->set_join_cond(outer_tbl->join_cond());
outer_tbl->set_join_cond(nullptr);
for (auto li = wrap_nest->join_list->begin();
li != wrap_nest->join_list->end(); ++li) {
Table_ref *tbl = *li;
if (tbl == outer_tbl) {
*li = wrap_nest;
break;
}
}
/*
outer_tbl is replaced by wrap_nest. Any subquery which was attached to
outer_tbl must be attached to embedding_join_nest instead.
*/
for (Item_exists_subselect *subquery : (*sj_candidates)) {
if (subquery->embedding_join_nest == outer_tbl)
subquery->embedding_join_nest = wrap_nest;
}
/*
Ok now wrap_nest 'contains' outer_tbl and we're ready to add the
semi-join nest into it
*/
emb_join_list = &wrap_nest->nested_join->m_tables;
emb_tbl_nest = wrap_nest;
}
}
// else subquery is in WHERE.
if (do_aj) {
/*
A negated IN/EXISTS like:
NOT EXISTS(... FROM subq_tables WHERE subq_cond)
The above code has ensured that we have one of these 3 situations:
(a) FROM ... WHERE (subquery AND condition)
(emb_tbl_nest == nullptr, emb_join_list == FROM clause)
which has to be changed to
FROM (...) LEFT JOIN (subq_tables) ON subq_cond
^ aj-left-nest ^aj-nest
WHERE x IS NULL AND condition
or:
(b) ... [LEFT] JOIN ( ... ) ON (subquery AND condition) ...
^ emb_tbl_nest, emb_join_list
which has to be changed to
... [LEFT] JOIN ( (...) LEFT JOIN (subq_tables) ON subq_cond)
^aj-left-nest ^aj-nest
^ emb_tbl_nest, emb_join_list
ON x IS NULL AND condition ...
or:
(c) ... INNER JOIN tblX ON (subquery AND condition) ...
^ emb_tbl_nest, emb_join_list
(if no '()' above this INNER JOIN up to the root, emb_tbl_nest ==
nullptr and emb_join_list == FROM clause)
which has to be changed to
( ... INNER JOIN tblX ON condition) LEFT JOIN (subq_tables) ON subq_cond
^aj-left-nest ^aj-nest
so:
- move all tables of emb_join_list into a new aj-left-nest
- emb_join_list is now empty
- put subq_tables in a new aj-nest
- add the subq's subq_cond to aj-nest's ON
- add a LEFT JOIN operator between the aj-left-nest and aj-nest, with
ON condition subq_cond.
- insert aj-nest and aj-left-nest into emb_join_list
- for some reason, a LEFT JOIN must always be wrapped into a nest (call
nest_last_join() then)
- do not yet add 'x IS NULL to WHERE' (add it in optimization phase when
we have the QEP_TABs so we can set up the 'found'/'not_null_compl'
pointers in trig conds).
*/
Table_ref *const wrap_nest = Table_ref::new_nested_join(
thd->mem_root, "(aj-left-nest)", emb_tbl_nest, emb_join_list, this);
if (wrap_nest == nullptr) return true;
// Go through tables of emb_join_list, insert them in wrap_nest
for (Table_ref *outer_tbl : *emb_join_list) {
wrap_nest->nested_join->m_tables.push_back(outer_tbl);
outer_tbl->embedding = wrap_nest;
outer_tbl->join_list = &wrap_nest->nested_join->m_tables;
}
// FROM clause is now only the new left nest
emb_join_list->clear();
emb_join_list->push_back(wrap_nest);
outer_join = true;
}
if (unlikely(trace->is_started()))
trace_object.add_alnum("embedded in", emb_tbl_nest ? "JOIN" : "WHERE");
Table_ref *const sj_nest = Table_ref::new_nested_join(
thd->mem_root, do_aj ? "(aj-nest)" : "(sj-nest)", emb_tbl_nest,
emb_join_list, this);
if (sj_nest == nullptr) return true; /* purecov: inspected */
NESTED_JOIN *const nested_join = sj_nest->nested_join;
/* Nests do not participate in those 'chains', so: */
/* sj_nest->next_leaf= sj_nest->next_local= sj_nest->next_global == NULL*/
/*
Using push_front, as sj_nest may be right arg of LEFT JOIN if
antijoin, and right args of LEFT JOIN go before left arg.
*/
emb_join_list->push_front(sj_nest);
/*
Natural joins inside a semi-join nest were already processed when the
subquery went through initial preparation.
*/
sj_nest->nested_join->natural_join_processed = true;
/*
nested_join->used_tables and nested_join->not_null_tables are
initialized in simplify_joins().
*/
Query_block *const subq_query_block =
subq_pred->query_expr()->first_query_block();
nested_join->query_block_id = subq_query_block->select_number;
// Merge tables from underlying query block into this join nest
if (sj_nest->merge_underlying_tables(subq_query_block))
return true; /* purecov: inspected */
/*
Add tables from subquery at end of leaf table chain.
(This also means that table map for parent query block tables are unchanged)
*/
Table_ref *tl;
for (tl = leaf_tables; tl->next_leaf; tl = tl->next_leaf) {
}
tl->next_leaf = subq_query_block->leaf_tables;
// Add tables from subquery at end of next_local chain.
m_table_list.push_back(&subq_query_block->m_table_list);
// Note that subquery's tables are already in the next_global chain
// Remove the original subquery predicate from the WHERE/ON
// The subqueries were replaced with TRUE value earlier
// @todo also reset the 'with_subselect' there.
// Walk through child's tables and adjust table map
uint table_no = leaf_table_count;
for (tl = subq_query_block->leaf_tables; tl; tl = tl->next_leaf, table_no++) {
tl->dep_tables <<= leaf_table_count;
tl->set_tableno(table_no);
}
/*
If we leave this function in an error path before subq_query_block is
unlinked, make sure tables are not duplicated, or cleanup code could be
confused:
*/
subq_query_block->m_table_list.clear();
subq_query_block->leaf_tables = nullptr;
// Adjust table and expression counts in parent query block:
derived_table_count += subq_query_block->derived_table_count;
materialized_derived_table_count +=
subq_query_block->materialized_derived_table_count;
table_func_count += subq_query_block->table_func_count;
has_sj_nests |= subq_query_block->has_sj_nests;
has_aj_nests |= subq_query_block->has_aj_nests;
partitioned_table_count += subq_query_block->partitioned_table_count;
leaf_table_count += subq_query_block->leaf_table_count;
cond_count += subq_query_block->cond_count;
between_count += subq_query_block->between_count;
if (subq_query_block->active_options() & OPTION_SCHEMA_TABLE)
add_base_options(OPTION_SCHEMA_TABLE);
if (outer_join) propagate_nullability(&sj_nest->nested_join->m_tables, true);
nested_join->sj_outer_exprs.clear();
nested_join->sj_inner_exprs.clear();
if (subq_pred->subquery_type() == Item_subselect::IN_SUBQUERY) {
build_sj_exprs(thd, &nested_join->sj_outer_exprs,
&nested_join->sj_inner_exprs, subq_pred, subq_query_block);
} else { // this is EXISTS
// Expressions from the SELECT list will not be used; unlike in the case of
// IN, they are not part of sj_inner_exprs.
// @todo in WL#6570, move this to resolve_subquery().
for (Item *item : subq_query_block->visible_fields()) {
Item::Cleanup_after_removal_context ctx(this);
item->walk(&Item::clean_up_after_removal, walk_options,
pointer_cast<uchar *>(&ctx));
}
}
{
/*
The WHERE clause and the join conditions may contain equalities that may
be leveraged by semi-join strategies (e.g to set up key lookups in
semi-join materialization), decorrelate them (ie. add respective fields
and expressions to sj_inner_exprs and sj_outer_exprs).
*/
Semijoin_decorrelation sj_decor(&sj_nest->nested_join->sj_outer_exprs,
&sj_nest->nested_join->sj_inner_exprs,
// decorrelate only equalities
/*op_types=*/nullptr);
if (subq_query_block->where_cond() &&
subq_query_block->decorrelate_condition(sj_decor, nullptr))
return true;
if (walk_join_list(
subq_query_block->m_table_nest, [&](Table_ref *tr) -> bool {
return !tr->is_inner_table_of_outer_join() && tr->join_cond() &&
subq_query_block->decorrelate_condition(sj_decor, tr);
}))
return true;
}
// Unlink the subquery's query expression:
subq_query_block->master_query_expression()->exclude_level();
// Merge subquery's name resolution contexts into parent's
merge_contexts(subq_query_block);
repoint_contexts_of_join_nests(subq_query_block->m_table_nest);
// Update table map for semi-join nest's WHERE condition and join conditions
fix_tables_after_pullout(this, subq_query_block, sj_nest, 0, 0);
Item *sj_cond = subq_query_block->where_cond();
if (sj_cond != nullptr) sj_cond->fix_after_pullout(this, subq_query_block);
// Assign the set of non-trivially tables after decorrelation
nested_join->sj_corr_tables =
(sj_cond != nullptr ? sj_cond->used_tables() & outer_tables_map : 0);
walk_join_list(subq_query_block->m_table_nest, [&](Table_ref *tr) -> bool {
if (tr->join_cond())
nested_join->sj_corr_tables |=
tr->join_cond()->used_tables() & outer_tables_map;
if (tr->is_derived() && tr->uses_materialization())
nested_join->sj_corr_tables |=
tr->derived_query_expression()->m_lateral_deps;
return false;
});
// Build semijoin condition using the inner/outer expression list
bool simple_cond;
if (build_sj_cond(thd, nested_join, subq_query_block, outer_tables_map,
&sj_cond, &simple_cond))
return true;
// Processing requires a non-empty semi-join condition:
assert(sj_cond != nullptr);
// Fix the created equality and AND
if (!sj_cond->fixed) {
Opt_trace_array sj_on_trace(&thd->opt_trace,
"evaluating_constant_semijoin_conditions");
sj_cond->apply_is_true();
if (sj_cond->fix_fields(thd, &sj_cond))
return true; /* purecov: inspected */
}
sj_nest->set_sj_or_aj_nest();
assert(sj_nest->join_cond() == nullptr);
if (do_aj) {
sj_nest->outer_join = true;
sj_nest->set_join_cond(sj_cond);
this->outer_join |= sj_nest->nested_join->used_tables;
if (emb_tbl_nest == nullptr)
nest_last_join(thd); // as is done for a true LEFT JOIN
}
if (unlikely(trace->is_started())) {
trace_object.add("semi-join condition", sj_cond);
Opt_trace_array trace_dep(trace, "decorrelated_predicates");
auto ii = nested_join->sj_inner_exprs.begin();
auto oi = nested_join->sj_outer_exprs.begin();
while (ii != nested_join->sj_inner_exprs.end() &&
oi != nested_join->sj_outer_exprs.end()) {
Item *inner = *ii++, *outer = *oi++;
Opt_trace_object trace_predicate(trace);
trace_predicate.add("outer", outer);
trace_predicate.add("inner", inner);
}
}
/*
sj_depends_on contains the set of outer tables referred in the
subquery's WHERE clause as well as tables referred in the IN predicate's
left-hand side, and lateral dependencies from materialized derived tables
contained in the original subquery.
*/
nested_join->sj_depends_on =
nested_join->sj_corr_tables | (sj_cond->used_tables() & outer_tables_map);
assert((nested_join->sj_corr_tables & OUTER_REF_TABLE_BIT) == 0);
assert((nested_join->sj_depends_on & OUTER_REF_TABLE_BIT) == 0);
// TODO fix QT_
DBUG_EXECUTE("where", print_where(thd, sj_cond, "SJ-COND", QT_ORDINARY););
Item *cond = nullptr;
if (do_aj) {
// Condition remains attached to inner table, as for LEFT JOIN
cond = sj_cond;
} else if (emb_tbl_nest != nullptr) {
// Inject semi-join condition into parent's join condition
emb_tbl_nest->set_join_cond(and_items(emb_tbl_nest->join_cond(), sj_cond));
if (emb_tbl_nest->join_cond() == nullptr) return true;
emb_tbl_nest->join_cond()->apply_is_true();
if (!emb_tbl_nest->join_cond()->fixed &&
emb_tbl_nest->join_cond()->fix_fields(thd,
emb_tbl_nest->join_cond_ref()))
return true;
cond = emb_tbl_nest->join_cond();
} else {
// Inject semi-join condition into parent's WHERE condition
m_where_cond = and_items(m_where_cond, sj_cond);
if (m_where_cond == nullptr) return true;
m_where_cond->apply_is_true();
if (m_where_cond->fix_fields(thd, &m_where_cond)) return true;
cond = m_where_cond;
}
/*
If the current semi-join or anti-join condition is always TRUE or
always FALSE:
(a) there is no need to set up lookups (for loosescan or materialization).
(b) if some predicates were eliminated as part of const value optimization,
their expressions are still in the inner/outer expression list
and must be removed.
(If a "simple condition" was added in build_sj_cond(), this is not necessary
since the expressions were constant values and are safe to keep.)
*/
if (cond != nullptr && cond->const_item() && !simple_cond) {
clear_sj_expressions(nested_join);
}
if (subq_query_block->ftfunc_list->elements &&
add_ftfunc_list(subq_query_block->ftfunc_list))
return true; /* purecov: inspected */
if (do_aj)
has_aj_nests = true;
else
has_sj_nests = true; // This query block has semi-join nests
return false;
}
/**
Merge a derived table or view into a query block.
If some constraint prevents the derived table from being merged then do
nothing, which means the table will be prepared for materialization later.
After this call, check is_merged() to see if the table was really merged.
@param thd Thread handler
@param derived_table Derived table which is to be merged.
@return false if successful, true if error
*/
bool Query_block::merge_derived(THD *thd, Table_ref *derived_table) {
DBUG_TRACE;
if (!derived_table->is_view_or_derived() || derived_table->is_merged())
return false;
Query_expression *const derived_query_expression =
derived_table->derived_query_expression();
// A derived table must be prepared before we can merge it
assert(derived_query_expression->is_prepared());
LEX *const lex = parent_lex;
// Check whether the outer query allows merged views
if ((master_query_expression() == lex->unit && !lex->can_use_merged()) ||
lex->can_not_use_merged())
return false;
/*
@todo: The implementation of LEX::can_use_merged() currently avoids
merging of views that are contained in other views if
can_use_merged() returns false.
*/
/*
Check whether derived table is mergeable, and directives allow merging;
priority order is:
- ALGORITHM says MERGE or TEMPTABLE
- hint specifies MERGE or NO_MERGE (=materialization)
- optimizer_switch's derived_merge is ON and heuristic suggests merge
*/
if (derived_table->algorithm == VIEW_ALGORITHM_TEMPTABLE ||
!derived_query_expression->is_mergeable())
return false;
if (derived_table->algorithm == VIEW_ALGORITHM_UNDEFINED) {
const bool merge_heuristic =
(derived_table->is_view() || allow_merge_derived) &&
derived_query_expression->merge_heuristic(thd->lex);
if (!hint_table_state(thd, derived_table, DERIVED_MERGE_HINT_ENUM,
merge_heuristic ? OPTIMIZER_SWITCH_DERIVED_MERGE : 0))
return false;
}
Query_block *const derived_query_block =
derived_query_expression->first_query_block();
/*
If STRAIGHT_JOIN is specified, it is not valid to merge in a query block
that contains semi-join nests
*/
if ((active_options() & SELECT_STRAIGHT_JOIN) &&
(derived_query_block->has_sj_nests || derived_query_block->has_aj_nests))
return false;
// Check that we have room for the merged tables in the table map:
if (leaf_table_count + derived_query_block->leaf_table_count - 1 > MAX_TABLES)
return false;
derived_table->set_merged();
DBUG_PRINT("info", ("algorithm: MERGE"));
Opt_trace_context *const trace = &thd->opt_trace;
Opt_trace_object trace_wrapper(trace);
Opt_trace_object trace_derived(trace,
derived_table->is_view() ? "view" : "derived");
trace_derived.add_utf8_table(derived_table)
.add("select#", derived_query_block->select_number)
.add("merged", true);
Prepared_stmt_arena_holder ps_arena_holder(thd);
// Save offset for table number adjustment
uint table_adjust = derived_table->tableno();
// Set up permanent list of underlying tables of a merged view
derived_table->merge_underlying_list = derived_query_block->get_table_list();
/**
A view is updatable if any underlying table is updatable.
A view is insertable-into if all underlying tables are insertable.
A view is not updatable nor insertable if it contains an outer join
@see mysql_register_view()
*/
if (derived_table->is_view()) {
bool updatable = false;
bool insertable = true;
bool outer_joined = false;
for (Table_ref *tr = derived_table->merge_underlying_list; tr;
tr = tr->next_local) {
updatable |= tr->is_updatable();
insertable &= tr->is_insertable();
outer_joined |= tr->is_inner_table_of_outer_join();
}
updatable &= !outer_joined;
insertable &= !outer_joined;
if (updatable) derived_table->set_updatable();
if (insertable) derived_table->set_insertable();
}
// Add a nested join object to the derived table object
if (!(derived_table->nested_join = new (thd->mem_root) NESTED_JOIN))
return true;
// Merge tables from underlying query block into this join nest
if (derived_table->merge_underlying_tables(derived_query_block))
return true; /* purecov: inspected */
// Replace derived table in leaf table list with underlying tables:
for (Table_ref **tl = &leaf_tables; *tl; tl = &(*tl)->next_leaf) {
if (*tl == derived_table) {
for (Table_ref *leaf = derived_query_block->leaf_tables; leaf;
leaf = leaf->next_leaf) {
leaf->dep_tables <<= table_adjust;
if (leaf->next_leaf == nullptr) {
leaf->next_leaf = (*tl)->next_leaf;
break;
}
}
*tl = derived_query_block->leaf_tables;
break;
}
}
leaf_table_count += (derived_query_block->leaf_table_count - 1);
derived_table_count += derived_query_block->derived_table_count;
table_func_count += derived_query_block->table_func_count;
materialized_derived_table_count +=
derived_query_block->materialized_derived_table_count;
has_sj_nests |= derived_query_block->has_sj_nests;
has_aj_nests |= derived_query_block->has_aj_nests;
partitioned_table_count += derived_query_block->partitioned_table_count;
cond_count += derived_query_block->cond_count;
between_count += derived_query_block->between_count;
// Remove tables from old query block:
derived_query_block->leaf_tables = nullptr;
derived_query_block->leaf_table_count = 0;
derived_query_block->m_table_list.clear();
// Propagate schema table indication:
// @todo: Add to BASE options instead
if (derived_query_block->active_options() & OPTION_SCHEMA_TABLE)
add_base_options(OPTION_SCHEMA_TABLE);
// Propagate nullability for derived tables within outer joins:
if (derived_table->is_inner_table_of_outer_join())
propagate_nullability(&derived_table->nested_join->m_tables, true);
select_n_having_items += derived_query_block->select_n_having_items;
// Merge the WHERE clause into the outer query block
if (derived_table->merge_where(thd)) return true; /* purecov: inspected */
if (derived_table->create_field_translation(thd))
return true; /* purecov: inspected */
// Exclude the derived table query expression from query graph.
derived_query_expression->exclude_level();
// Don't try to access it:
derived_table->set_derived_query_expression((Query_expression *)1);
// Merge subquery's name resolution contexts into parent's
merge_contexts(derived_query_block);
repoint_contexts_of_join_nests(derived_query_block->m_table_nest);
// Leaf tables have been shuffled, so update table numbers for them
remap_tables(thd);
// Update table info of referenced expressions after query block is merged
fix_tables_after_pullout(this, derived_query_block, derived_table,
table_adjust,
derived_query_expression->m_lateral_deps);
if (derived_query_block->is_ordered()) {
/*
An ORDER BY clause is moved to an outer query block
- if the outer query block allows ordering, and
- that refers to this view/derived table only, and
- is not part of a set operation (UNION, EXCEPT, INTERSECT), and
- may have a WHERE clause but is not grouped or aggregated and is not
itself ordered.
Otherwise the ORDER BY clause is ignored.
Only SELECT statements and single-table UPDATE and DELETE statements
allow ordering.
Up to version 5.6 included, ORDER BY was unconditionally merged.
Currently we only merge in the simple case above, which ensures
backward compatibility for most reasonable use cases.
Note that table numbers in order_list do not need updating, since
the outer query contains only one table reference.
*/
// LIMIT currently blocks derived table merge
assert(!derived_query_block->has_limit());
if ((lex->sql_command == SQLCOM_SELECT ||
lex->sql_command == SQLCOM_UPDATE ||
lex->sql_command == SQLCOM_DELETE) &&
!(master_query_expression()->is_set_operation() || is_grouped() ||
is_distinct() || is_ordered() ||
get_table_list()->next_local != nullptr)) {
order_list.push_back(&derived_query_block->order_list);
for (ORDER *o = derived_query_block->order_list.first; o != nullptr;
o = o->next) {
/*
ORDER BY clause may contain expressions with outer references that
must be adjusted:
*/
o->item[0]->fix_after_pullout(this, derived_query_block);
/*
If at outer-most level (not within another derived table), ensure
the ordering columns are marked in read_set, since columns selected
from derived tables are not marked in initial resolving.
*/
if (!thd->derived_tables_processing) {
Mark_field mf(thd->mark_used_columns);
o->item[0]->walk(&Item::mark_field_in_map, enum_walk::POSTFIX,
pointer_cast<uchar *>(&mf));
}
}
} else {
if (derived_query_block->empty_order_list(this)) return true;
trace_derived.add_alnum("transformations_to_derived_table",
"removed_ordering");
}
}
// Add any full-text functions from derived table into outer query
if (derived_query_block->ftfunc_list->elements &&
add_ftfunc_list(derived_query_block->ftfunc_list))
return true; /* purecov: inspected */
/*
The "laterality" of this nest is not interesting anymore; it was
transferred to underlying tables.
*/
derived_query_expression->m_lateral_deps = 0;
return false;
}
/**
Destructively replaces a sub-condition inside a condition tree. The
parse tree is also altered.
@param thd thread handler
@param tree Must be the handle to the top level condition. This is needed
when the top-level condition changes.
@param old_cond The condition to be replaced.
@param new_cond The condition to be substituted.
@param do_fix_fields If true, Item::fix_fields(THD*, Item**) is called for
the new condition.
@param[out] found_ptr Pointer to boolean; used only in recursive sub-calls;
top call must not specify this argument. Function deposits there if it
found the searched Item or not.
@return error status
@retval true If there was an error.
@retval false If successful.
*/
static bool replace_subcondition(THD *thd, Item **tree, Item *old_cond,
Item *new_cond, bool do_fix_fields,
bool *found_ptr = nullptr) {
if (*tree == old_cond) {
*tree = new_cond;
if (do_fix_fields && new_cond->fix_fields(thd, tree)) return true;
if (found_ptr != nullptr) *found_ptr = true; // inform upper call
return false;
}
if ((*tree)->type() == Item::COND_ITEM) {
Item_cond *cond = down_cast<Item_cond *>(*tree);
List_iterator<Item> li(*cond->argument_list());
Item *item;
bool found_local = false;
while ((item = li++)) {
if (replace_subcondition(thd, li.ref(), old_cond, new_cond, do_fix_fields,
&found_local))
return true;
if (found_local) {
if (found_ptr != nullptr) *found_ptr = true; // inform upper call
return false;
}
}
} else if ((*tree)->type() == Item::FUNC_ITEM) {
Item_func *func = down_cast<Item_func *>(*tree);
bool found_local = false;
for (uint i = 0; i < func->arg_count; i++) {
if (replace_subcondition(thd, func->arguments() + i, old_cond, new_cond,
do_fix_fields, &found_local))
return true;
if (found_local) {
if (found_ptr != nullptr) *found_ptr = true; // inform upper call
return false;
}
}
}
// item not found
// if it is the top call: error, else: no error.
assert(found_ptr != nullptr);
return (found_ptr == nullptr);
}
/**
Convert semi-join subquery predicates into semi-join join nests.
Convert candidate subquery predicates into semi-join join nests. This
transformation is performed once in query lifetime and is irreversible.
Conversion of one subquery predicate
------------------------------------
We start with a query block that has a semi-join subquery predicate:
@code
SELECT ...
FROM ot, ...
WHERE oe IN (SELECT ie FROM it1 ... itN WHERE subq_where) AND outer_where
@endcode
and convert the predicate and subquery into a semi-join nest:
@code
SELECT ...
FROM ot SEMI JOIN (it1 ... itN), ...
WHERE outer_where AND subq_where AND oe=ie
@endcode
that is, in order to do the conversion, we need to
* Create the "SEMI JOIN (it1 .. itN)" part and add it into the parent
query block's FROM structure.
* Add "AND subq_where AND oe=ie" into parent query block's WHERE (or ON if
the subquery predicate was in an ON condition)
* Remove the subquery predicate from the parent query block's WHERE
Considerations when converting many predicates
----------------------------------------------
A join may have at most MAX_TABLES tables. This may prevent us from
flattening all subqueries when the total number of tables in parent and
child selects exceeds MAX_TABLES. In addition, one slot is reserved per
semi-join nest, in case the subquery needs to be materialized in a
temporary table.
We deal with this problem by flattening children's subqueries first and
then using a heuristic rule to determine each subquery predicate's
priority, which is calculated in this order:
1. Prefer dependent subqueries over non-dependent ones
2. Prefer subqueries with many tables over those with fewer tables
3. Prefer early subqueries over later ones (to make sort deterministic)
@returns false if success, true if error
*/
bool Query_block::flatten_subqueries(THD *thd) {
DBUG_TRACE;
assert(has_sj_candidates());
Item_exists_subselect **subq, **subq_begin = sj_candidates->begin(),
**subq_end = sj_candidates->end();
Opt_trace_context *const trace = &thd->opt_trace;
/*
Semijoin flattening is bottom-up. Indeed, we have this execution flow,
for SELECT#1 WHERE X IN (SELECT #2 WHERE Y IN (SELECT#3)) :
Query_block::prepare() (select#1)
-> fix_fields() on IN condition
-> Query_block::prepare() on subquery (select#2)
-> fix_fields() on IN condition
-> Query_block::prepare() on subquery (select#3)
<- Query_block::prepare()
<- fix_fields()
-> flatten_subqueries: merge #3 in #2
<- flatten_subqueries
<- Query_block::prepare()
<- fix_fields()
-> flatten_subqueries: merge #2 in #1
Note that flattening of #(N) is done by its parent JOIN#(N-1), because
there are cases where flattening is not possible and only the parent can
know.
*/
uint subq_no;
for (subq = subq_begin, subq_no = 0; subq < subq_end; subq++, subq_no++) {
Item_exists_subselect *item = *subq;
/*
Some subqueries may have been deleted, remove them fully before sorting
sj_candidates and subsequent processing:
*/
if (item->strategy == Subquery_strategy::DELETED) {
sj_candidates->erase_value(item);
subq--; // So that the next iteration will handle the next subquery.
subq_end = sj_candidates->end(); // array's end moved.
continue;
}
// Transformation of IN and EXISTS subqueries is supported
assert(item->subquery_type() == Item_subselect::IN_SUBQUERY ||
item->subquery_type() == Item_subselect::EXISTS_SUBQUERY);
Query_block *child_query_block = item->query_expr()->first_query_block();
// Check that we proceeded bottom-up
assert(child_query_block->sj_candidates == nullptr);
bool dependent = item->query_expr()->uncacheable & UNCACHEABLE_DEPENDENT;
item->sj_convert_priority =
(((dependent * MAX_TABLES_FOR_SIZE) + // dependent subqueries first
child_query_block->leaf_table_count) *
65536) + // then with many tables
(65536 - subq_no); // then based on position
/*
We may actually allocate more than 64k subqueries in a query block,
but this is so unlikely that we ignore the impact it may have on sorting.
*/
}
/*
Pick which subqueries to convert:
sort the subquery array
- prefer correlated subqueries over uncorrelated;
- prefer subqueries that have greater number of outer tables;
*/
std::sort(subq_begin, subq_begin + sj_candidates->size(),
[](Item_exists_subselect *el1, Item_exists_subselect *el2) {
return el1->sj_convert_priority > el2->sj_convert_priority;
});
// A permanent transformation is going to start, so:
Prepared_stmt_arena_holder ps_arena_holder(thd);
// Transform certain subquery predicates to derived tables
for (subq = subq_begin; subq < subq_end; subq++) {
Item_exists_subselect *item = *subq;
if (item->strategy != Subquery_strategy::CANDIDATE_FOR_DERIVED_TABLE)
continue;
OPT_TRACE_TRANSFORM(trace, oto0, oto1,
item->query_expr()->first_query_block()->select_number,
"IN (SELECT)", "joined derived table");
oto1.add("chosen", true);
if (transform_table_subquery_to_join_with_derived(thd, item)) return true;
}
/*
Replace all subqueries to be flattened with a truth predicate.
Generally, this predicate is TRUE, but if the subquery has a WHERE condition
that is always false, replace with a FALSE predicate. In the latter case,
also avoid converting the subquery to a semi-join.
*/
uint table_count = leaf_table_count;
for (subq = subq_begin; subq < subq_end; subq++) {
Item_exists_subselect *item = *subq;
if (item->strategy != Subquery_strategy::CANDIDATE_FOR_SEMIJOIN) continue;
// Add the tables in the subquery nest plus one in case of materialization:
const uint tables_added =
item->query_expr()->first_query_block()->leaf_table_count + 1;
// (1) Not too many tables in total.
// (2) This subquery contains no antijoin nest (anti/semijoin nest cannot
// include antijoin nest for implementation reasons, see
// advance_sj_state()).
if (table_count + tables_added <= MAX_TABLES && // (1)
!item->query_expr()->first_query_block()->has_aj_nests) // (2)
item->strategy = Subquery_strategy::SEMIJOIN;
Item *subq_where = item->query_expr()->first_query_block()->where_cond();
/*
A predicate can be evaluated to ALWAYS TRUE or ALWAYS FALSE when it
has only const items. If found to be ALWAYS FALSE, do not include
the subquery in transformations.
*/
bool cond_value = true;
if (subq_where && subq_where->const_item() &&
!subq_where->walk(&Item::is_non_const_over_literals, enum_walk::POSTFIX,
nullptr) &&
simplify_const_condition(thd, &subq_where, false, &cond_value))
return true;
if (!cond_value) {
// Unlink and delete this subquery's query expression
Item::Cleanup_after_removal_context ctx(this);
item->walk(&Item::clean_up_after_removal, walk_options,
pointer_cast<uchar *>(&ctx));
}
if (item->strategy == Subquery_strategy::SEMIJOIN)
table_count += tables_added;
if (item->strategy != Subquery_strategy::SEMIJOIN &&
item->strategy != Subquery_strategy::DELETED) {
item->strategy = Subquery_strategy::UNSPECIFIED;
continue;
}
/*
In WHERE/ON of parent query, replace IN (subq) with truth value:
- When subquery is converted to anti/semi-join: truth value true.
- When subquery WHERE cond is false: IN returns FALSE, so truth value
false if a semijoin (IN) and truth value true if an antijoin (NOT IN).
*/
Item *truth_item =
(cond_value || item->can_do_aj)
? implicit_cast<Item *>(new (thd->mem_root) Item_func_true())
: implicit_cast<Item *>(new (thd->mem_root) Item_func_false());
if (truth_item == nullptr) return true;
Item **tree = (item->embedding_join_nest == nullptr)
? &m_where_cond
: item->embedding_join_nest->join_cond_ref();
if (replace_subcondition(thd, tree, item, truth_item, false))
return true; /* purecov: inspected */
}
/* Transform the selected subqueries into semi-join */
for (subq = subq_begin; subq < subq_end; subq++) {
Item_exists_subselect *item = *subq;
if (item->strategy != Subquery_strategy::SEMIJOIN) continue;
OPT_TRACE_TRANSFORM(trace, oto0, oto1,
item->query_expr()->first_query_block()->select_number,
"IN (SELECT)",
item->can_do_aj ? "antijoin" : "semijoin");
oto1.add("chosen", true);
if (convert_subquery_to_semijoin(thd, *subq)) return true;
}
/*
Finalize the subqueries that we did not convert,
ie. perform IN->EXISTS rewrite.
*/
for (subq = subq_begin; subq < subq_end; subq++) {
Item_exists_subselect *item = *subq;
if (item->strategy != Subquery_strategy::UNSPECIFIED) continue;
Query_block *save_query_block = thd->lex->current_query_block();
thd->lex->set_current_query_block(item->query_expr()->first_query_block());
Item *transformed = nullptr;
if (item->transformer(thd, &transformed)) {
return true;
}
thd->lex->set_current_query_block(save_query_block);
/*
If the Item has been substituted with another Item (e.g an
Item_in_optimizer), resolve it and add it to proper WHERE or ON clause.
If no substitute exists (e.g for EXISTS predicate), no action is required.
*/
if (transformed == nullptr) continue;
const bool do_fix_fields = !transformed->fixed;
const bool subquery_in_join_clause = item->embedding_join_nest != nullptr;
Item **tree = subquery_in_join_clause
? (item->embedding_join_nest->join_cond_ref())
: &m_where_cond;
if (replace_subcondition(thd, tree, *subq, transformed, do_fix_fields))
return true;
}
sj_candidates->clear();
return false;
}
/**
Propagate nullability into inner tables of outer join operation
@param tables List of tables and join nests, start at m_table_nest
@param nullable true: Set all underlying tables as nullable
*/
void propagate_nullability(mem_root_deque<Table_ref *> *tables, bool nullable) {
for (Table_ref *tr : *tables) {
if (tr->table && !tr->table->is_nullable() && (nullable || tr->outer_join))
tr->table->set_nullable();
if (tr->nested_join == nullptr) continue;
propagate_nullability(&tr->nested_join->m_tables,
nullable || tr->outer_join);
}
}
/**
Propagate exclusion from unique table check into all subqueries belonging
to this query block.
This function can be applied to all subqueries of a materialized derived
table or view.
*/
void Query_block::propagate_unique_test_exclusion() {
for (Query_expression *unit = first_inner_query_expression(); unit;
unit = unit->next_query_expression())
for (Query_block *sl = unit->first_query_block(); sl;
sl = sl->next_query_block())
sl->propagate_unique_test_exclusion();
exclude_from_table_unique_test = true;
}
/**
Add a list of full-text function elements into a query block.
@param ftfuncs List of full-text function elements to add.
@returns false if success, true if error
*/
bool Query_block::add_ftfunc_list(List<Item_func_match> *ftfuncs) {
Item_func_match *ifm;
List_iterator_fast<Item_func_match> li(*ftfuncs);
while ((ifm = li++)) {
if (ftfunc_list->push_back(ifm)) return true; /* purecov: inspected */
}
return false;
}
/**
Go through a list of tables and join nests, recursively, and repoint
its query_block pointer.
@param join_list List of tables and join nests
*/
void Query_block::repoint_contexts_of_join_nests(
mem_root_deque<Table_ref *> join_list) {
for (Table_ref *tbl : join_list) {
tbl->query_block = this;
if (tbl->nested_join)
repoint_contexts_of_join_nests(tbl->nested_join->m_tables);
}
}
/**
Merge name resolution context objects belonging to an inner subquery
to parent query block.
Update all context objects to have this base query block.
Used when a subquery's query block is merged into its parent.
@param inner Subquery for which context objects are to be merged.
*/
void Query_block::merge_contexts(Query_block *inner) {
for (Name_resolution_context *ctx = inner->first_context; ctx != nullptr;
ctx = ctx->next_context) {
ctx->query_block = this;
if (ctx->next_context == nullptr) {
ctx->next_context = first_context;
first_context = inner->first_context;
inner->first_context = nullptr;
break;
}
}
}
/**
For a table subquery predicate (IN/ANY/ALL/EXISTS/etc):
since it does not support LIMIT the following clauses are redundant:
ORDER BY
DISTINCT
GROUP BY if there are no aggregate functions and no HAVING clause
For a scalar subquery without LIMIT:
ORDER BY is redundant, as the number of rows to order must be 1.
This removal is permanent. Thus, it only makes sense to call this function
for regular queries and on first execution of SP/PS
@param thd thread handler
@return true on error
*/
bool Query_block::remove_redundant_subquery_clauses(THD *thd) {
Item_subselect *subq_predicate = master_query_expression()->item;
enum change {
REMOVE_NONE = 0,
REMOVE_ORDER = 1 << 0,
REMOVE_DISTINCT = 1 << 1,
REMOVE_GROUP = 1 << 2
};
uint possible_changes;
if (subq_predicate->subquery_type() == Item_subselect::SCALAR_SUBQUERY) {
if (has_limit()) return false;
possible_changes = REMOVE_ORDER;
} else {
assert(subq_predicate->subquery_type() == Item_subselect::EXISTS_SUBQUERY ||
subq_predicate->subquery_type() == Item_subselect::IN_SUBQUERY ||
subq_predicate->subquery_type() == Item_subselect::ALL_SUBQUERY ||
subq_predicate->subquery_type() == Item_subselect::ANY_SUBQUERY);
possible_changes = REMOVE_ORDER | REMOVE_DISTINCT | REMOVE_GROUP;
}
uint changelog = 0;
if ((possible_changes & REMOVE_ORDER) && order_list.elements) {
changelog |= REMOVE_ORDER;
if (empty_order_list(this)) return true;
}
if ((possible_changes & REMOVE_DISTINCT) && is_distinct()) {
changelog |= REMOVE_DISTINCT;
remove_base_options(SELECT_DISTINCT);
}
/*
Remove GROUP BY if there are no aggregate functions, no HAVING clause,
no non-primitive grouping and no windowing functions.
*/
if ((possible_changes & REMOVE_GROUP) && group_list.elements &&
!agg_func_used() && !having_cond() && olap == UNSPECIFIED_OLAP_TYPE &&
m_windows.elements == 0) {
changelog |= REMOVE_GROUP;
for (ORDER *g = group_list.first; g != nullptr; g = g->next) {
if (g->is_item_original()) {
Item::Cleanup_after_removal_context ctx(this);
(*g->item)->walk(&Item::clean_up_after_removal, walk_options,
pointer_cast<uchar *>(&ctx));
}
}
group_list.clear();
while (hidden_group_field_count-- > 0) {
fields.pop_front();
base_ref_items[fields.size()] = nullptr;
}
}
if (changelog) {
Opt_trace_context *trace = &thd->opt_trace;
if (unlikely(trace->is_started())) {
Opt_trace_object trace_wrapper(trace);
Opt_trace_array trace_changes(trace, "transformations_to_subquery");
if (changelog & REMOVE_ORDER) trace_changes.add_alnum("removed_ordering");
if (changelog & REMOVE_DISTINCT)
trace_changes.add_alnum("removed_distinct");
if (changelog & REMOVE_GROUP) trace_changes.add_alnum("removed_grouping");
}
}
return false;
}
/**
Empty the ORDER list.
Delete corresponding elements from fields and base_ref_items too.
If ORDER list contain any subqueries, delete them from the query block list.
@param sl Query block that possible subquery blocks in the ORDER BY clause
are attached to (may be different from "this" when query block has
been merged into an outer query block).
@returns true on error
*/
bool Query_block::empty_order_list(Query_block *sl) {
for (ORDER *o = order_list.first; o != nullptr; o = o->next) {
if (o->is_item_original()) {
Item *const order_item = o->item_initial;
Item::Cleanup_after_removal_context ctx(sl);
order_item->walk(&Item::clean_up_after_removal, walk_options,
pointer_cast<uchar *>(&ctx));
if (order_item->hidden && m_windows.elements != 0) {
// Below, when we pop off the unused expression from the select list,
// we do it only if the query block has no windows. So, instead, we
// replace the ordering expression in the select list and
// base_ref_items with a hidden NULL which is harmless.
Item *const replacement = new (parent_lex->thd->mem_root) Item_null;
if (replacement == nullptr) return true;
replacement->hidden = true;
std::replace(fields.begin(), fields.end(), order_item, replacement);
std::replace(base_ref_items.begin(),
base_ref_items.begin() + fields.size(), order_item,
replacement);
}
}
}
order_list.clear();
if (m_windows.elements != 0) {
/*
The next lines doing cleanup of ORDER elements expect the
query block's ORDER BY items to be the last part of fields and
base_ref_items, as they just chop the lists' end. But if there is a
window, that end is actually the PARTITION BY and ORDER BY clause of the
window, so do not chop then: leave the items in place.
*/
return false;
}
while (hidden_order_field_count-- > 0) {
fields.pop_front();
base_ref_items[fields.size()] = nullptr;
}
return false;
}
/*****************************************************************************
Group and order functions
*****************************************************************************/
/**
Resolve an ORDER BY or GROUP BY column reference.
Given a column reference (represented by 'order') from a GROUP BY or ORDER
BY clause, find the actual column it represents. If the column being
resolved is from the GROUP BY clause, the procedure searches the SELECT
list 'fields' and the columns in the FROM list 'tables'. If 'order' is from
the ORDER BY clause, only the SELECT list is being searched.
If 'order' is resolved to an Item, then order->item is set to the found
Item. If there is no item for the found column (that is, it was resolved
into a table field), order->item is 'fixed' and is added to fields and
ref_item_array.
ref_item_array and fields are updated.
@param[in] thd Pointer to current thread structure
@param[in,out] ref_item_array All select, group and order by fields
@param[in] tables List of tables to search in (usually
FROM clause)
@param[in] order Column reference to be resolved
@param[in,out] fields List of fields to search in (usually
SELECT list; hidden items are ignored)
@param[in] is_group_field True if order is a GROUP field, false if
ORDER by field
@param[in] is_window_order True if order is a Window function's
PARTITION BY or ORDER BY field
@retval
false if OK
@retval
true if error occurred
*/
bool find_order_in_list(THD *thd, Ref_item_array ref_item_array,
Table_ref *tables, ORDER *order,
mem_root_deque<Item *> *fields, bool is_group_field,
bool is_window_order) {
Item *order_item = *order->item; /* The item from the GROUP/ORDER clause. */
Item::Type order_item_type;
Item **select_item; /* The corresponding item from the SELECT clause. */
Field *from_field; /* The corresponding field from the FROM clause. */
uint counter;
enum_resolution_type resolution;
/*
Local SP variables may be int but are expressions, not positions.
(And they can't be used before fix_fields is called for them).
*/
if (order_item->type() == Item::INT_ITEM &&
order_item->basic_const_item()) { /* Order by position */
uint count = (uint)order_item->val_int();
if (!count || count > CountVisibleFields(*fields)) {
my_error(ER_BAD_FIELD_ERROR, MYF(0), order_item->full_name(), thd->where);
return true;
}
order->item = &ref_item_array[count - 1];
// Order by is now referencing select expression, so increment the reference
// count for the select expression.
(*order->item)->increment_ref_count();
order->in_field_list = true;
return false;
}
/* Lookup the current GROUP/ORDER field in the SELECT clause. */
if (find_item_in_list(thd, order_item, fields, &select_item, &counter,
&resolution)) {
return true;
}
/* Check whether the resolved field is unambiguous. */
if (select_item != nullptr) {
Item *view_ref = nullptr;
/*
If we have found field not by its alias in select list but by its
original field name, we should additionally check if we have conflict
for this name (in case if we would perform lookup in all tables).
*/
if (resolution == RESOLVED_BEHIND_ALIAS && !order_item->fixed &&
order_item->fix_fields(thd, order->item))
return true;
/*
Lookup the current GROUP or WINDOW partition by or order by field in the
FROM clause.
*/
order_item_type = order_item->type();
from_field = not_found_field;
if (((is_group_field || is_window_order) &&
order_item_type == Item::FIELD_ITEM) ||
order_item_type == Item::REF_ITEM) {
from_field = find_field_in_tables(thd, (Item_ident *)order_item, tables,
nullptr, &view_ref, IGNORE_ERRORS, true,
// view_ref is a local variable, so
// don't record a change to roll back:
false);
if (thd->is_error()) return true;
if (!from_field) from_field = not_found_field;
}
if (from_field == not_found_field ||
(from_field != view_ref_found
?
/* it is field of base table => check that fields are same */
((*select_item)->type() == Item::FIELD_ITEM &&
((Item_field *)(*select_item))->field->eq(from_field))
:
/*
in is field of view table => check that references on translation
table are same
*/
((*select_item)->type() == Item::REF_ITEM &&
view_ref->type() == Item::REF_ITEM &&
down_cast<Item_ref *>(*select_item)->ref_pointer() ==
down_cast<Item_ref *>(view_ref)->ref_pointer()))) {
/*
If there is no such field in the FROM clause, or it is the same field
as the one found in the SELECT clause, then use the Item created for
the SELECT field. As a result if there was a derived field that
'shadowed' a table field with the same name, the table field will be
chosen over the derived field.
If we replace *order->item with one from the select list or
from a table in the FROM list, we should clean up after
removing the old *order->item from the query. The item has not
been fixed (so there are no aggregation functions that need
cleaning up), but it may contain subqueries that should be
unlinked.
*/
if ((*order->item)->real_item() != (*select_item)->real_item()) {
Item::Cleanup_after_removal_context ctx(
thd->lex->current_query_block());
(*order->item)
->walk(&Item::clean_up_after_removal, walk_options,
pointer_cast<uchar *>(&ctx));
}
order->item = &ref_item_array[counter];
// Order by is now referencing select expression, so increment the
// reference count for the select expression.
(*order->item)->increment_ref_count();
order->in_field_list = true;
if (resolution == RESOLVED_AGAINST_ALIAS && from_field == not_found_field)
order->used_alias = (*order->item)->item_name.ptr();
return false;
}
/*
There is a field with the same name in the FROM clause. This
is the field that will be chosen. In this case we issue a
warning so the user knows that the field from the FROM clause
overshadows the column reference from the SELECT list.
For window functions we do not need to issue this warning
(field should resolve to a unique column in the FROM derived
table expression, cf. SQL 2016 section 7.15 SR 4)
*/
if (!is_window_order) {
push_warning_printf(thd, Sql_condition::SL_WARNING, ER_NON_UNIQ_ERROR,
ER_THD(thd, ER_NON_UNIQ_ERROR),
((Item_ident *)order_item)->field_name, thd->where);
}
}
// If we couldn't find the item, see if we can find it in a merged derived
// table, hidden behind an Item_view_ref. This is a lowest-priority
// fallback to make sure we don't add the field twice to the select list;
// once as hidden (directly) and once as visible (through the view_ref).
// Such double-adds would be a problem if we later create a temporary table
// containing the item, which will call item->get_tmp_table_item() and
// effectively peel away the ref -- an item cannot be both visible and
// hidden at the same time.
counter = 0;
for (auto it = VisibleFields(*fields).begin();
it != VisibleFields(*fields).end(); ++it, ++counter) {
Item *item = *it;
if (item->type() == Item::REF_ITEM &&
((Item_ref *)item)->ref_type() == Item_ref::VIEW_REF) {
Item_view_ref *item_ref = down_cast<Item_view_ref *>(item);
if (item_ref->cached_table->is_merged() &&
order_item->eq(item_ref->ref_item(), false)) {
order->item = &ref_item_array[counter];
// Order by is now referencing select expression, so increment the
// reference count for the select expression.
(*order->item)->increment_ref_count();
order->in_field_list = true;
return false;
}
}
}
order->in_field_list = false;
/*
The call to order_item->fix_fields() means that here we resolve
'order_item' to a column from a table in the list 'tables', or to
a column in some outer query. Exactly because of the second case
we come to this point even if (select_item == nullptr),
in spite of that fix_fields() calls find_item_in_list() one more
time.
We check order_item->fixed because Item_func_group_concat can put
arguments for which fix_fields already was called.
group_fix_field = true is so that we properly reject GROUP BY on
subqueries with references to group fields.
*/
bool save_group_fix_field = thd->lex->current_query_block()->group_fix_field;
if (is_group_field) thd->lex->current_query_block()->group_fix_field = true;
bool ret =
(!order_item->fixed && (order_item->fix_fields(thd, order->item) ||
(order_item = *order->item)->check_cols(1)));
thd->lex->current_query_block()->group_fix_field = save_group_fix_field;
if (ret) return true; /* Wrong field. */
uint el = fields->size();
if (!order_item->const_for_execution()) {
order_item->increment_ref_count();
assert_consistent_hidden_flags(*fields, order_item, /*hidden=*/true);
order_item->hidden = true;
fields->push_front(order_item); /* Add new field to field list. */
ref_item_array[el] = order_item;
}
/*
If the order_item is a SUM_FUNC_ITEM, when fix_fields is called
referenced_by is set to order->item which is the address of order_item.
But this needs to be address of order_item in the fields list.
As a result, when it gets replaced with Item_aggregate_ref
object in Item::split_sum_func2, we will be able to retrieve the
newly created object.
*/
if (order_item->type() == Item::SUM_FUNC_ITEM)
down_cast<Item_sum *>(order_item)->referenced_by[0] = &(*fields)[0];
/*
Currently, we assume that this assertion holds. If it turns out
that it fails for some query, order->item has changed and the old
item is removed from the query. In that case, we must call walk()
with clean_up_after_removal() on the old order->item.
*/
assert(order_item == *order->item);
if (!order_item->const_for_execution()) {
order->item = &ref_item_array[el];
}
return false;
}
/**
Resolve and setup list of expressions in ORDER BY clause.
Change order to point at item in select list.
If item isn't a number and doesn't exists in the select list, add it to the
the field list.
@param thd Current session.
@param ref_item_array The Ref_item_array for this query block.
@param tables From clause of the query.
@param fields All columns, including hidden ones.
@param order The query block's order clause.
@returns false if success, true if error.
*/
bool setup_order(THD *thd, Ref_item_array ref_item_array, Table_ref *tables,
mem_root_deque<Item *> *fields, ORDER *order) {
DBUG_TRACE;
assert(order);
Query_block *const select = thd->lex->current_query_block();
thd->where = "order clause";
const bool for_set_operation =
select->master_query_expression()->is_set_operation() &&
select == select->master_query_expression()->query_term()->query_block();
const bool is_aggregated = select->is_grouped();
for (uint number = 1; order; order = order->next, number++) {
Item *order_item = *order->item;
if (order_item->fixed && !order_item->const_item()) {
// If a non constant expression in order by is already
// resolved, it must have been merged from a derived table.
// So, we do not need to re-resolve in this query block. Add
// a hidden item if not present in the visible fields list.
// Update with the correct ref item.
uint counter = fields->size();
for (uint i = 0; i < fields->size(); i++) {
if (order_item->real_item()->eq(ref_item_array[i]->real_item(),
false)) {
order->item = &ref_item_array[i];
// Order by is now referencing select expression, so increment the
// reference count for the select expression.
(*order->item)->increment_ref_count();
order->in_field_list = true;
counter = i;
break;
}
}
if (counter == fields->size()) {
// Add as a hidden item.
ref_item_array[counter] = order_item;
fields->push_front(order_item);
order_item->hidden = true;
order->in_field_list = false;
order->item = &ref_item_array[counter];
}
continue;
}
if (find_order_in_list(thd, ref_item_array, tables, order, fields, false,
false))
return true;
if ((*order->item)->has_aggregation()) {
/*
Aggregated expressions in ORDER BY are not supported by SQL standard,
but MySQL has some limited support for them. The limitations are
checked below:
1. A set operation query is not aggregated, so ordering by a set
function is always wrong.
*/
if (for_set_operation) {
my_error(ER_AGGREGATE_ORDER_FOR_UNION, MYF(0), number);
return true;
}
/*
2. A non-aggregated query combined with a set function in ORDER BY
that does not contain an outer reference is illegal, because it
would cause the query to become aggregated.
(Since is_aggregated is false, this expression would cause
agg_func_used() to become true).
*/
if (!is_aggregated && select->agg_func_used()) {
my_error(ER_AGGREGATE_ORDER_NON_AGG_QUERY, MYF(0), number);
return true;
}
}
if (for_set_operation && (*order->item)->has_wf()) {
// Window function in ORDER BY of set operation not supported,
// SQL2014 4.16.3
my_error(ER_AGGREGATE_ORDER_FOR_UNION, MYF(0), number);
return true;
}
if ((*order->item)->data_type() == MYSQL_TYPE_INVALID &&
(*order->item)->propagate_type(thd, MYSQL_TYPE_VARCHAR))
return true;
}
return false;
}
/**
Runs checks mandated by ONLY_FULL_GROUP_BY
@param thd THD pointer
@returns true if ONLY_FULL_GROUP_BY is violated.
*/
bool Query_block::check_only_full_group_by(THD *thd) {
bool rc = false;
if (is_grouped()) {
/*
"root" has very short lifetime, and should not consume much
=> not instrumented.
*/
MEM_ROOT root(PSI_NOT_INSTRUMENTED, MEM_ROOT_BLOCK_SIZE);
{
Group_check gc(this, &root);
rc = gc.check_query(thd);
gc.to_opt_trace(thd);
} // Scope, to let any destructor run before the MEM_ROOT DTOR.
}
if (!rc && is_distinct()) {
Distinct_check dc(this);
rc = dc.check_query(thd);
}
return rc;
}
/**
Do final setup of ORDER BY clause, after the query block is fully resolved.
Check that ORDER BY clause is not redundant.
Split any aggregate functions.
@param thd Thread handler
@returns false if success, true if error
*/
bool Query_block::setup_order_final(THD *thd) {
DBUG_TRACE;
if (is_implicitly_grouped()) {
// Result will contain zero or one row - ordering is redundant
return empty_order_list(this);
}
if (!master_query_expression()->is_simple()) {
std::pair<bool, bool> result =
master_query_expression()->query_term()->redundant_order_by(this, 0);
assert(result.first); // that we found the block
if (result.second) {
// Part of set operation which requires global ordering may skip local
// order
if (empty_order_list(this)) return true;
}
}
for (ORDER *ord = order_list.first; ord; ord = ord->next) {
Item *const item = *ord->item;
const bool is_grouped_aggregate =
(item->type() == Item::SUM_FUNC_ITEM && !item->m_is_window_function);
if (is_grouped_aggregate) continue;
if (item->has_aggregation() || item->has_wf()) {
if (item->split_sum_func(thd, base_ref_items, &fields)) {
return true; /* purecov: inspected */
}
}
}
return false;
}
/**
Resolve and set up the GROUP BY list.
@param thd Thread handler
@todo
change ER_WRONG_FIELD_WITH_GROUP to more detailed
ER_NON_GROUPING_FIELD_USED
@returns false if success, true if error
*/
bool Query_block::setup_group(THD *thd) {
DBUG_TRACE;
assert(group_list.elements);
thd->where = "group statement";
for (ORDER *group = group_list.first; group; group = group->next) {
if (find_order_in_list(thd, base_ref_items, get_table_list(), group,
&fields, true, false))
return true;
Item *item = *group->item;
if (item->has_aggregation() || item->has_wf()) {
my_error(ER_WRONG_GROUP_FIELD, MYF(0), (*group->item)->full_name());
return true;
}
else if (item->has_grouping_func()) {
my_error(ER_WRONG_GROUP_FIELD, MYF(0), "GROUPING function");
return true;
}
if (item->data_type() == MYSQL_TYPE_INVALID &&
item->propagate_type(thd, MYSQL_TYPE_VARCHAR))
return true;
}
return false;
}
/****************************************************************************
ROLLUP handling
****************************************************************************/
ORDER *Query_block::find_in_group_list(Item *item, int *rollup_level) const {
Item *real_item = item->real_item();
if (real_item->type() == Item::CACHE_ITEM) {
// Unwrap the cache, if any. NOTE: There should never be any caches
// in the GROUP BY list, so we don't need to unwrap any from there.
real_item = down_cast<const Item_cache *>(real_item)->get_example();
}
ORDER *best_candidate = nullptr;
int idx = 0;
for (ORDER *group = group_list.first; group; group = group->next, ++idx) {
Item *group_item = *group->item;
assert(group_item->real_item()->type() != Item::CACHE_ITEM);
if (real_item->eq(group_item->real_item(), /*binary_cmp=*/false)) {
if (item->item_name.ptr() != nullptr &&
group_item->item_name.ptr() != nullptr &&
item->item_name.eq(group_item->item_name)) {
// Match on group _and_ alias; return immediately.
if (rollup_level != nullptr) {
*rollup_level = idx;
}
return group;
} else if (best_candidate == nullptr) {
// Match on group but not alias; it's a good candidate,
// but only if we don't find a better match. (If there
// are multiple such candidates, we use the leftmost one.)
if (rollup_level != nullptr) {
*rollup_level = idx;
}
best_candidate = group;
}
}
}
return best_candidate;
}
int Query_block::group_list_size() const {
int size = 0;
for (ORDER *group = group_list.first; group; group = group->next) {
++size;
}
return size;
}
/**
Checks whether an item matches a grouped expression, creates an
Item_rollup_group_item around it and replaces the reference to it with that
item.
*/
static ReplaceResult wrap_grouped_expressions_for_rollup(
Query_block *select, Item *item, Item *parent, unsigned argument_idx) {
if (is_rollup_group_wrapper(item->real_item())) {
// This item must already be a group item, or we wouldn't have
// wrapped it earlier. No need to do anything more about it,
// since it's already wrapped (also, don't traverse further).
return {ReplaceResult::REPLACE, item};
}
int rollup_level = 0;
ORDER *group = select->find_in_group_list(item, &rollup_level);
if (group != nullptr) {
Item_rollup_group_item *new_item =
new Item_rollup_group_item(rollup_level, item);
if (new_item == nullptr || select->rollup_group_items.push_back(new_item)) {
return {ReplaceResult::ERROR, nullptr};
}
new_item->quick_fix_field();
if (group->rollup_item == nullptr) {
group->rollup_item = new_item;
}
return {ReplaceResult::REPLACE, new_item};
} else if (parent != nullptr && parent->type() == Item::FUNC_ITEM &&
down_cast<Item_func *>(parent)->functype() ==
Item_func::GROUPING_FUNC) {
my_error(ER_FIELD_IN_GROUPING_NOT_GROUP_BY, MYF(0), (argument_idx + 1));
return {ReplaceResult::ERROR, nullptr};
}
return {ReplaceResult::KEEP_TRAVERSING, nullptr};
}
/**
Helper function for WalkAndReplace() which replaces the Item referenced by
"child_ref" if "get_new_item" returns a replacement, or visits the children
of "child_ref" otherwise.
*/
static bool WalkAndReplaceInner(
THD *thd, Item *parent, unsigned argument_idx,
const function<ReplaceResult(Item *item, Item *parent,
unsigned argument_idx)> &get_new_item,
Item **child_ref) {
ReplaceResult result = get_new_item(*child_ref, parent, argument_idx);
if (result.action == ReplaceResult::ERROR) {
return true;
}
if (result.action == ReplaceResult::REPLACE) {
if (thd->lex->is_exec_started()) {
thd->change_item_tree(child_ref, result.replacement);
} else {
*child_ref = result.replacement;
}
return false;
}
return WalkAndReplace(thd, *child_ref, get_new_item);
}
bool WalkAndReplace(
THD *thd, Item *item,
const function<ReplaceResult(Item *item, Item *parent,
unsigned argument_idx)> &get_new_item) {
if (item->type() == Item::FUNC_ITEM ||
(item->type() == Item::SUM_FUNC_ITEM && item->m_is_window_function)) {
Item **args = down_cast<Item_func *>(item)->arguments();
const unsigned arg_count = down_cast<Item_func *>(item)->argument_count();
for (unsigned argument_idx = 0; argument_idx < arg_count; argument_idx++) {
if (WalkAndReplaceInner(thd, item, argument_idx, get_new_item,
&args[argument_idx])) {
return true;
}
}
if (item->m_is_window_function) {
down_cast<Item_sum *>(item)->update_after_wf_arguments_changed(thd);
}
} else if (item->type() == Item::ROW_ITEM) {
// Pretty much exactly the same logic as functions above.
Item_row *row_item = down_cast<Item_row *>(item);
for (unsigned argument_idx = 0; argument_idx < row_item->cols();
argument_idx++) {
if (WalkAndReplaceInner(thd, item, argument_idx, get_new_item,
row_item->addr(argument_idx))) {
return true;
}
}
} else if (item->type() == Item::COND_ITEM) {
Item_cond *cond_item = down_cast<Item_cond *>(item);
List_iterator<Item> li(*cond_item->argument_list());
unsigned argument_idx = 0;
for (Item *arg = li++; arg != nullptr; arg = li++) {
if (WalkAndReplaceInner(thd, item, argument_idx++, get_new_item,
li.ref())) {
return true;
}
}
} else if (item->type() == Item::SUBQUERY_ITEM) {
const Item_subselect::Subquery_type subquery_type =
down_cast<Item_subselect *>(item)->subquery_type();
if (subquery_type == Item_subselect::IN_SUBQUERY ||
subquery_type == Item_subselect::ALL_SUBQUERY ||
subquery_type == Item_subselect::ANY_SUBQUERY) {
return WalkAndReplaceInner(
thd, item, /*argument_idx=*/0, get_new_item,
&down_cast<Item_in_subselect *>(item)->left_expr);
}
}
return false;
}
/**
Marks occurrences of group by fields in a function's arguments as nullable,
so that we do not optimize them away before we get to add the rollup wrappers.
@todo
Some functions are not null-preserving. For those functions
updating of the m_nullable attribute is an overkill.
*/
void Query_block::mark_item_as_maybe_null_if_non_primitive_grouped(
Item *item) const {
if (find_in_group_list(item, /*rollup_level=*/nullptr) != nullptr) {
/*
If this item is present in GROUP BY clause, set m_nullable
to true, as ROLLUP will generate NULLs for this column.
This prevents the optimizer from constant-folding away
IS NULL expressions (e.g. in HAVING). This must be done
before we start resolving subselects in m_having_cond.
*/
item->set_nullable(true);
}
}
Item *Query_block::single_visible_field() const {
Item *ret = nullptr;
for (Item *item : visible_fields()) {
if (ret != nullptr) {
// More than one.
return nullptr;
}
ret = item;
}
return ret;
}
size_t Query_block::num_visible_fields() const {
return CountVisibleFields(fields);
}
bool Query_block::field_list_is_empty() const {
for (Item *item : fields) {
if (!item->hidden) return false;
}
return true;
}
/**
Refreshes the comparators after ROLLUP resolving.
This is needed because ROLLUP resolving happens after the comparators have
been set up. In ROLLUP resolving, it may turn out that something initially
believed to be constant, is not constant after all (e.g., group items that may
be NULL in some cases). So we call set_cmp_func() to make Arg_comparator
adjust/remove its caches accordingly.
*/
static bool refresh_comparators_after_rollup(Item *item) {
return WalkItem(item, enum_walk::POSTFIX, [](Item *inner_item) {
if (inner_item->type() != Item::FUNC_ITEM) {
return false;
}
switch (down_cast<Item_func *>(inner_item)->functype()) {
case Item_func::GE_FUNC:
case Item_func::GT_FUNC:
case Item_func::LT_FUNC:
case Item_func::LE_FUNC:
case Item_func::EQ_FUNC:
case Item_func::NE_FUNC:
case Item_func::EQUAL_FUNC:
return down_cast<Item_bool_func2 *>(inner_item)->set_cmp_func();
default:
return false;
}
});
}
/**
Resolve an item (and its tree) for rollup processing by replacing items
matching grouped expressions with Item_rollup_group_items and
updating properties (m_nullable, PROP_ROLLUP_FIELD).
Also check any GROUPING function for incorrect column.
@param thd session context
@param item the item to be processed
@returns the new item, or nullptr on error
*/
Item *Query_block::resolve_rollup_item(THD *thd, Item *item) {
ReplaceResult result =
wrap_grouped_expressions_for_rollup(this, item, nullptr, 0);
if (result.action == ReplaceResult::ERROR) {
return nullptr;
} else if (result.action == ReplaceResult::REPLACE) {
item->set_nullable(true);
return result.replacement;
}
bool changed = false;
bool error = WalkAndReplace(
thd, item,
[this, &changed](Item *inner_item, Item *parent, unsigned argument_idx) {
ReplaceResult inner_result = wrap_grouped_expressions_for_rollup(
this, inner_item, parent, argument_idx);
changed |= (inner_result.action == ReplaceResult::REPLACE);
return inner_result;
});
if (error) return nullptr;
if (changed) {
if (refresh_comparators_after_rollup(item)) {
return nullptr;
}
item->update_used_tables();
// Since item is now nullable, mark every expression (except rollup sum
// functions) depending on it as also potentially nullable. (This is a
// conservative choice; in some cases, expressions can be proven
// non-nullable even for NULL arguments.)
class Update_nullability_for_rollup_items : public Item_tree_walker {
public:
using Item_tree_walker::is_stopped;
using Item_tree_walker::stop_at;
};
Update_nullability_for_rollup_items info;
if (WalkItem(
item, enum_walk::PREFIX | enum_walk::POSTFIX,
[&info](Item *inner_item) {
if (info.is_stopped(inner_item)) {
return false;
} else if (inner_item->type() == Item::SUM_FUNC_ITEM &&
down_cast<Item_sum *>(inner_item)->real_sum_func() ==
Item_sum::ROLLUP_SUM_SWITCHER_FUNC) {
info.stop_at(inner_item);
return false;
} else {
inner_item->set_nullable(true);
return false;
}
})) {
return nullptr;
}
}
return item;
}
Item *create_rollup_switcher(THD *thd, Query_block *query_block, Item_sum *item,
int send_group_parts) {
assert(!item->m_is_window_function);
assert(!item->is_rollup_sum_wrapper());
List<Item> alternatives;
alternatives.push_back(item);
for (int level = 0; level < send_group_parts; ++level) {
Item_sum *new_item = down_cast<Item_sum *>(item->copy_or_same(thd));
if (new_item == nullptr) {
return nullptr;
}
new_item->make_unique();
if (alternatives.push_back(new_item)) {
return nullptr;
}
}
Item_rollup_sum_switcher *new_item =
new Item_rollup_sum_switcher(&alternatives);
if (new_item == nullptr || query_block->rollup_sums.push_back(new_item)) {
return nullptr;
}
new_item->quick_fix_field();
return new_item;
}
/**
Resolve items in SELECT list and ORDER BY list for rollup processing
@param thd session context
@returns false if success, true if error
*/
bool Query_block::resolve_rollup(THD *thd) {
DBUG_TRACE;
uint send_group_parts = group_list_size();
for (auto it = fields.begin(); it != fields.end(); ++it) {
Item *item = *it;
Item *new_item;
if (Item_sum * item_sum; item->type() == Item::SUM_FUNC_ITEM &&
!item->const_item() &&
(item_sum = down_cast<Item_sum *>(item),
item_sum->aggr_query_block == this)) {
// This is a top level aggregate, which must be replaced with
// a different one for each rollup level.
new_item = create_rollup_switcher(thd, this, item_sum, send_group_parts);
} else {
new_item = resolve_rollup_item(thd, item);
}
if (new_item == nullptr) {
return true;
}
*it = new_item;
}
return false;
}
/**
Checks if there are any calls to the MATCH function that take a ROLLUP column
as argument in the SELECT list, GROUP BY clause, HAVING clause or ORDER BY
clause. Such calls should be rejected, since MATCH only works on base columns.
*/
static bool fulltext_uses_rollup_column(const Query_block *query_block) {
if (query_block->olap != ROLLUP_TYPE || !query_block->has_ft_funcs()) {
return false;
}
// References to ROLLUP columns in SELECT and HAVING are represented
// by Item_rollup_group_items. So we can just check if any of the MATCH
// functions has such an argument.
for (Item_func_match &match : *query_block->ftfunc_list) {
if (match.has_grouping_set_dep()) {
return true;
}
}
// The references in ORDER BY and GROUP BY are not wrapped in
// Item_rollup_group_item, so we need to search for them.
for (ORDER *order = query_block->order_list.first; order != nullptr;
order = order->next) {
if (WalkItem(*order->item, enum_walk::PREFIX, [query_block](Item *item) {
if (is_function_of_type(item, Item_func::FT_FUNC)) {
Item_func_match *match = down_cast<Item_func_match *>(item);
for (unsigned i = 0; i < match->arg_count; ++i) {
if (query_block->find_in_group_list(match->get_arg(i),
/*rollup_level=*/nullptr) !=
nullptr) {
return true;
}
}
}
return false;
})) {
return true;
}
}
for (ORDER *group = query_block->group_list.first; group != nullptr;
group = group->next) {
if (WalkItem(*group->item, enum_walk::PREFIX, [query_block](Item *item) {
if (is_function_of_type(item, Item_func::FT_FUNC)) {
Item_func_match *match = down_cast<Item_func_match *>(item);
for (unsigned i = 0; i < match->arg_count; ++i) {
if (query_block->find_in_group_list(match->get_arg(i),
/*rollup_level=*/nullptr) !=
nullptr) {
return true;
}
}
}
return false;
})) {
return true;
}
}
return false;
}
/**
Replace group by field references inside window functions with references
in the presence of ROLLUP.
@param thd session context
@returns false if success, true if error
*/
bool Query_block::resolve_rollup_wfs(THD *thd) {
DBUG_TRACE;
for (auto it = fields.begin(); it != fields.end(); ++it) {
Item *new_item = resolve_rollup_item(thd, *it);
if (new_item == nullptr) return true;
*it = new_item;
// Any expression having a window function which involves rollup
// expressions should be set nullable.
if (!new_item->is_nullable()) {
bool any_nullable_wf = false;
WalkItem(new_item, enum_walk::POSTFIX,
[&any_nullable_wf](Item *inner_item) {
if (inner_item->real_item()->type() == Item::SUM_FUNC_ITEM &&
inner_item->real_item()->m_is_window_function &&
inner_item->has_grouping_set_dep()) {
inner_item->set_nullable(true);
any_nullable_wf = true;
}
return false;
});
if (any_nullable_wf) new_item->set_nullable(true);
}
}
/*
When this method is called, all ORDER BY items not already present in
the SELECT list have been added to the select list as hidden items,
so we do not need to traverse order_list to see all items.
The companion method, resolve_rollup, needs to traverse order_list
list, because at the the time that method is called, the ORDER BY
items haven't been added yet. Cf second loop in resolve_rollup.
*/
return false;
}
/**
@brief validate_gc_assignment
Check whether the other values except DEFAULT are assigned
for generated columns.
@param fields Item_fields list to be filled
@param values values to fill with
@param table table to be checked
@return Operation status
@retval false OK
@retval true Error occurred
@note This function must be called after table->write_set has been
filled.
*/
bool validate_gc_assignment(const mem_root_deque<Item *> &fields,
const mem_root_deque<Item *> &values,
TABLE *table) {
Field **fld = nullptr;
MY_BITMAP *bitmap = table->write_set;
bool use_table_field = false;
DBUG_TRACE;
if (values.empty()) return false;
// If fields has no elements, we use all table fields
if (fields.empty()) {
use_table_field = true;
fld = table->field;
}
auto field_it = VisibleFields(fields).begin();
auto value_it = VisibleFields(values).begin();
while (value_it != VisibleFields(values).end()) {
Item *value = *value_it++;
const Field *rfield;
if (!use_table_field)
rfield = (down_cast<Item_field *>((*field_it++)->real_item()))->field;
else
rfield = *(fld++);
if (rfield->table != table) continue;
// Skip hidden system fields.
if (rfield->is_hidden_by_system()) continue;
// If any of the explicit values is DEFAULT
if (rfield->m_default_val_expr &&
value->type() == Item::DEFAULT_VALUE_ITEM) {
// Restore the statement safety flag to current lex
current_thd->lex->set_stmt_unsafe_flags(
rfield->m_default_val_expr->get_stmt_unsafe_flags());
// Mark the columns that this expression reads to rthe ead_set
for (uint j = 0; j < table->s->fields; j++) {
if (bitmap_is_set(&rfield->m_default_val_expr->base_columns_map, j)) {
bitmap_set_bit(table->read_set, j);
}
}
}
/* skip non marked fields */
if (!bitmap_is_set(bitmap, rfield->field_index())) continue;
if (rfield->gcol_info && value->type() != Item::DEFAULT_VALUE_ITEM) {
my_error(ER_NON_DEFAULT_VALUE_FOR_GENERATED_COLUMN, MYF(0),
rfield->field_name, rfield->table->s->table_name.str);
return true;
}
}
return false;
}
/**
Delete unused columns from merged tables.
This function is called recursively for each join nest and/or table
in the query block. For each merged table that it finds, each column
that contains a subquery and is not marked as used is removed and
the translation item is set to NULL.
@param tables List of tables and join nests
*/
void Query_block::delete_unused_merged_columns(
mem_root_deque<Table_ref *> *tables) {
DBUG_TRACE;
for (Table_ref *tl : *tables) {
if (tl->nested_join == nullptr) continue;
if (tl->is_merged()) {
for (Field_translator *transl = tl->field_translation;
transl < tl->field_translation_end; transl++) {
Item *const item = transl->item;
// Decrement the ref count as its no more used in
// select list.
if (item->decrement_ref_count()) continue;
// Cleanup the item since its not referenced from
// anywhere.
assert(item->fixed);
Item::Cleanup_after_removal_context ctx(this);
item->walk(&Item::clean_up_after_removal, walk_options,
pointer_cast<uchar *>(&ctx));
transl->item = nullptr;
}
}
delete_unused_merged_columns(&tl->nested_join->m_tables);
}
}
/**
Add item to the hidden part of select list.
@param item the item to add
@return Pointer to reference to the added item
*/
Item **Query_block::add_hidden_item(Item *item) {
const uint el = fields.size();
base_ref_items[el] = item;
assert_consistent_hidden_flags(fields, item, /*hidden=*/true);
fields.push_front(item);
item->hidden = true;
return &base_ref_items[el];
}
void Query_block::remove_hidden_items() {
for (uint i = 0; i < hidden_items_from_optimization; i++) {
fields.pop_front();
}
hidden_items_from_optimization = 0;
}
/**
Resolve the rows of a table value constructor and aggregate the type of each
column across rows.
@param thd thread handler
@returns false if success, true if error
*/
bool Query_block::resolve_table_value_constructor_values(THD *thd) {
// Item_values_column objects may be allocated; they should be persistent for
// PREPARE statements.
Prepared_stmt_arena_holder ps_arena_holder(thd);
size_t num_rows = row_value_list->size();
size_t row_degree = row_value_list->front()->size();
// All table row value expressions shall be of the same degree. Note that
// non-scalar subqueries are not allowed; we can simply count the number of
// elements.
if (row_degree > MAX_FIELDS) {
my_error(ER_TOO_MANY_FIELDS, MYF(0));
return true;
}
size_t row_index = 0;
for (mem_root_deque<Item *> *values_row : *row_value_list) {
if (values_row->size() != row_degree) {
my_error(ER_WRONG_VALUE_COUNT_ON_ROW, MYF(0), row_index + 1);
return true;
} else if (values_row->empty()) {
// A table value constructor with empty row objects is a syntax error,
// except when used as the source for an INSERT statement.
my_error(ER_TABLE_VALUE_CONSTRUCTOR_MUST_HAVE_COLUMNS, MYF(0));
return true;
}
size_t item_index = 0;
for (auto it = values_row->begin(); it != values_row->end(); ++it) {
Item *item = *it;
if ((!item->fixed && item->fix_fields(thd, &*it)) ||
(item = *it)->check_cols(1))
return true; /* purecov: inspected */
if (item->type() == Item::DEFAULT_VALUE_ITEM) {
my_error(ER_TABLE_VALUE_CONSTRUCTOR_CANNOT_HAVE_DEFAULT, MYF(0));
return true;
}
/*
In case this item is or contains a parameter, propagate a default
data type for the expression. Note that there is no context available
here that can give us a good default value (like what is done when
a VALUES clause is used directly with an INSERT statement).
*/
if (item->data_type() == MYSQL_TYPE_INVALID) {
if (item->propagate_type(thd, item->default_data_type())) return true;
}
if (row_index == 0) {
// If single row, we skip setting up indirections.
if (num_rows != 1 && first_execution) {
Item_values_column *column = new Item_values_column(thd, item);
if (column == nullptr) return true;
column->add_used_tables(item);
item = column;
}
// Make sure to also replace the reference in item_list. In the case
// where fix_fields transforms an item, it.ref() will only update the
// reference of values_row.
if (first_execution) fields[item_index] = item;
} else {
Item_values_column *column = down_cast<Item_values_column *>(
GetNthVisibleField(fields, item_index));
if (column->join_types(thd, item)) return true;
column->add_used_tables(item);
column->fixed = true; // Does not have regular fix_fields()
}
++item_index;
}
++row_index;
}
// base_ref_items is used during row_value_in_to_exists_transformer to set up
// equality checks when transforming IN subquery predicates.
if (setup_base_ref_items(thd)) return true;
size_t name_len;
char buff[NAME_LEN + 1];
if (check_stack_overrun(thd, STACK_MIN_SIZE, pointer_cast<uchar *>(buff)))
return true; /* purecov: inspected */
size_t item_index = 0;
for (Item *column : visible_fields()) {
base_ref_items[item_index] = column;
// Name the columns column_0, column_1, ...
name_len = snprintf(buff, NAME_LEN, "column_%zu", item_index);
column->item_name.copy(buff, name_len);
++item_index;
}
return false;
}
static bool baptize_item(THD *thd, Item *item, int *field_no);
static bool update_context_to_derived(Item *expr, Query_block *new_derived);
/**
Replace a table subquery ([NOT] {IN, EXISTS}) with a join to a derived table.
The principle of this transformation is:
FROM [tables] WHERE ... AND/OR oe IN (SELECT ie FROM it) ...
becomes
FROM (tables) LEFT JOIN (SELECT DISTINCT ie FROM it) AS derived
ON oe = derived.ie WHERE ... AND/OR derived.ie IS NOT NULL ...
If the subquery predicate is top-level in WHERE, and not negated, we use
JOIN instead of LEFT JOIN, and use TRUE instead of IS NOT NULL. If the
subquery predicate is negated, we use IS NULL instead of IS NOT NULL. If the
subquery predicate is without aggregation(etc), we decorrelate any equality
from it, and, if negated, we also decorrelate '<>,<,<=,>,>='; thus we handle
EXISTS too. If the subquery cannot be decorrelated, the derived table could be
made LATERAL, but as a certain secondary engine doesn't support that we just
return an error.
@param thd Connection handle
@param subq Item for subquery
@returns true if error
*/
bool Query_block::transform_table_subquery_to_join_with_derived(
THD *thd, Item_exists_subselect *subq) {
assert(first_execution);
Query_expression *const subs_query_expression = subq->query_expr();
Query_block *subs_query_block = subs_query_expression->first_query_block();
assert(subs_query_block->first_execution);
subq->strategy = Subquery_strategy::DERIVED_TABLE;
const int hidden_fields = CountHiddenFields(subs_query_block->fields);
const bool no_aggregates = !subs_query_block->is_grouped() &&
!subs_query_block->with_sum_func &&
subs_query_block->having_cond() == nullptr &&
!subs_query_block->has_windows();
const bool decorrelate =
no_aggregates &&
(subs_query_expression->uncacheable & UNCACHEABLE_DEPENDENT) &&
subs_query_block->where_cond() != nullptr &&
subs_query_block->where_cond()->is_outer_reference() &&
// decorrelation adds to the SELECT list, and hidden fields make it
// impossible (search for "hidden" in this function). Hidden fields
// usually come from aggregation, which we disallowed just above, but also
// if a SELECT list element is a subquery which contains an outer
// reference to subs_query_block.
hidden_fields == 0;
// Ensure that all lists are consistent. all_fields should have an optional
// prefix and then be fields_list. If no aggregates, base_ref_items should
// start with fields_list.
assert(hidden_fields >= 0);
// We're going to build the lists of outer and inner semijoin
// expressions:
// - they start empty
// - first (build_sj_exprs()), if this is IN, we add the left and right
// expressions of IN; if this is EXISTS, we do nothing
// - second (decorrelate_condition()), we decorrelate comparison operators
// in the subquery, and add the resulting left and right expressions.
mem_root_deque<Item *> sj_outer_exprs(thd->mem_root);
mem_root_deque<Item *> sj_inner_exprs(thd->mem_root);
Mem_root_array<Item_func::Functype> op_types(thd->mem_root);
if (subq->subquery_type() == Item_subselect::IN_SUBQUERY) {
build_sj_exprs(thd, &sj_outer_exprs, &sj_inner_exprs, subq,
subs_query_block);
// All these expressions are compared with '=':
op_types.resize(sj_outer_exprs.size(), Item_func::EQ_FUNC);
} else {
assert(subq->subquery_type() == Item_subselect::EXISTS_SUBQUERY);
if (subs_query_block->is_table_value_constructor) {
if ((subs_query_block->select_limit != nullptr &&
!subs_query_block->select_limit->const_item()) ||
(subs_query_block->offset_limit != nullptr &&
!subs_query_block->offset_limit->const_item())) {
subq->strategy = Subquery_strategy::SUBQ_MATERIALIZATION;
// We can't determine until materialization time whether we have
// an empty or non-empty result set, skip transform
return false;
}
}
// We must replace of all EXISTS' initial SELECT list with
// constants, otherwise they will interfere in DISTINCT, indeed if we didn't
// replace,
// SELECT ... FROM ot WHERE EXISTS(SELECT c1 FROM it)
// would become
// SELECT ... FROM ot JOIN (SELECT DISTINCT c1 FROM it) AS dt
// and we may get duplicate copies of a row of 'ot', wrongly.
// Note that in setup_wild() we already do that, but only for "SELECT *",
// not for an explicit list "SELECT expr1, expr2", so we still have to do
// that here.
// We cannot do that if the query is aggregated, consider:
// EXISTS(SELECT SUM(a) AS x, b as y FROM t GROUP BY y HAVING x>2)
// if we replace we get
// EXISTS(SELECT 1, 1 FROM t GROUP BY y HAVING x>2)
// And as 'x' points to 1, HAVING is "always false".
// Resolving ensures that this assertion holds.
assert(no_aggregates);
if (subs_query_block->is_table_value_constructor) {
// This transformation effectively converts a table value constructor
// query block to a scalar subquery with zero or one constant rows.
subs_query_block->is_table_value_constructor = false;
// We checked above that we can evaluate LIMIT/OFFSET, so use that to
// compute here whether result set is empty or not
const ulonglong limit = (subs_query_block->select_limit != nullptr)
? subs_query_block->select_limit->val_uint()
: std::numeric_limits<ulonglong>::max();
const ulonglong offset = (subs_query_block->offset_limit != nullptr)
? subs_query_block->offset_limit->val_uint()
: 0;
const ulonglong actual_rows = subs_query_block->row_value_list->size();
const bool empty_rs = limit == 0 || offset >= actual_rows;
auto limes = new (thd->mem_root) Item_int(empty_rs ? 0 : 1);
if (limes == nullptr) return true;
subs_query_block->select_limit = limes;
subs_query_block->offset_limit = nullptr;
}
Item::Cleanup_after_removal_context ctx(this);
int i = 0;
for (auto it = subs_query_block->visible_fields().begin();
it != subs_query_block->visible_fields().end(); ++it, ++i) {
Item *inner = *it;
if (inner->basic_const_item()) continue; // no need to replace it
auto constant = new (thd->mem_root) Item_int(
NAME_STRING("Not_used"), (longlong)1, MY_INT64_NUM_DECIMAL_DIGITS);
*it = constant;
subs_query_block->base_ref_items[i] = constant;
// Expressions from the SELECT list will not be used; unlike in the case
// of IN, they are not part of sj_inner_exprs.
inner->walk(&Item::clean_up_after_removal, walk_options,
pointer_cast<uchar *>(&ctx));
}
subs_query_block->select_list_tables = 0;
}
Semijoin_decorrelation sj_decor(
&sj_outer_exprs, &sj_inner_exprs,
// If antijoin, we can decorrelate '<>', '>=', etc, too (but not '<=>'):
// multiple inner rows may match '<>', but they will fail the IS NULL
// condition, and if this condition is top-level in WHERE it will
// eliminate the rows.
(subq->can_do_aj &&
subq->outer_condition_context == enum_condition_context::ANDS)
? &op_types
: nullptr);
if (decorrelate) {
// We try to decorrelate it, by looking at equalities in its WHERE.
// This helps for this common pattern:
// EXISTS(SELECT FROM it WHERE it.c=ot.c AND <condition on 'it' only>)
const int initial_sj_inner_exprs_count = sj_inner_exprs.size();
if (subs_query_block->decorrelate_condition(sj_decor, nullptr)) return true;
// Append inner expressions of decorrelated equalities to the SELECT
// list. Correct context info of outer expressions.
auto it_outer = sj_outer_exprs.begin() + initial_sj_inner_exprs_count;
auto it_inner = sj_inner_exprs.begin() + initial_sj_inner_exprs_count;
for (int i = 0; it_outer != sj_outer_exprs.end();
++it_outer, ++it_inner, ++i) {
Item *inner = *it_inner;
Item *outer = *it_outer;
// In setup_base_ref_items() we allocated space for appending this
// element.
// If there were a hidden element (there is none, see the setting of
// 'decorrelate'), we would be appending a *non*-hidden element
// (participating in DISTINCT) *after* the hidden element, which would
// break the usual layout of base_ref_items which is: "non-hidden then
// hidden" (see Query_block::add_hidden_item()). While this layout is not
// documented (?), it is safer to not break it.
subs_query_block->base_ref_items[subs_query_block->fields.size()] = inner;
subs_query_block->fields.push_back(inner);
// Needed for fix_after_pullout:
update_context_to_derived(outer, this);
// Decorrelated outer expression will move to ON, so fix it.
outer->fix_after_pullout(this, subs_query_block);
}
// Decorrelation identified new outer/inner expression pairs.
// Recalculate used_tables() after that (the subquery may have become
// uncorrelated). Because there is no aggregation, window functions, ORDER
// BY, we only have to collect used_tables bits from the SELECT list, FROM
// clause (outer-correlated derived tables and join conditions) and WHERE
// clause.
for (Item *inner : subs_query_block->visible_fields()) {
subs_query_block->select_list_tables |= inner->used_tables();
}
table_map new_used_tables = subs_query_block->select_list_tables;
if (subs_query_block->where_cond()) {
subs_query_block->where_cond()->update_used_tables();
new_used_tables |= subs_query_block->where_cond()->used_tables();
}
// Walk the FROM clause to gather any outer-correlated derived table or join
// condition.
walk_join_list(subs_query_block->m_table_nest, [&](Table_ref *tr) -> bool {
if (tr->join_cond()) new_used_tables |= tr->join_cond()->used_tables();
if (tr->is_derived() && tr->uses_materialization())
new_used_tables |= tr->derived_query_expression()->m_lateral_deps;
return false;
});
if (!(new_used_tables & OUTER_REF_TABLE_BIT)) {
// there is no outer reference anymore
subs_query_block->uncacheable &= ~UNCACHEABLE_DEPENDENT;
subs_query_expression->uncacheable &= ~UNCACHEABLE_DEPENDENT;
// this must be called only after the change to 'uncacheable' above
subq->update_used_tables();
}
}
if (!subs_query_block->can_skip_distinct())
subs_query_block->add_base_options(SELECT_DISTINCT);
// As the synthesised ON and WHERE will reference columns of the derived
// table, we must have unique names.
// A derived table must have unique column names, while a quantified
// subquery needn't; so names may not currently be unique and we have to
// make them so.
{
int i = 1;
for (Item *inner : subs_query_block->visible_fields()) {
if (baptize_item(thd, inner, &i)) return true;
}
}
// If the subquery is (still) correlated, we would need to create a LATERAL
// derived table, but a certain secondary engine doesn't support it. Error:
if ((subq->subquery_used_tables() & ~PSEUDO_TABLE_BITS) != 0) {
my_error(ER_SUBQUERY_TRANSFORM_REJECTED, MYF(0));
return true;
}
// We have added to subs_query_expression->fields;
// subs_query_expression->types must always be equal to its visible fields.
subs_query_expression->types.clear();
for (Item *item : subq->query_expr()->first_query_block()->visible_fields()) {
subs_query_expression->types.push_back(item);
}
Table_ref *tl;
if (transform_subquery_to_derived(
thd, &tl, subs_query_expression, subq,
// If subquery is top-level in WHERE, and not negated, use INNER JOIN,
// else use LEFT JOIN.
// We could use LEFT JOIN unconditionally and let simplify_joins()
// convert it to INNER JOIN, but the conversion is not perfect, as
// not all effects of propagate_nullability() are undone.
/*use_inner_join=*/
subq->outer_condition_context == enum_condition_context::ANDS &&
!subq->can_do_aj,
/*reject_multiple_rows*/ false,
/*join_condition=*/nullptr,
/*lifted_where_cond*/ nullptr))
return true;
assert(CountVisibleFields(sj_inner_exprs) == sj_inner_exprs.size());
const int first_sj_inner_expr_of_subquery =
CountVisibleFields(subs_query_block->fields) - sj_inner_exprs.size();
Item_field *derived_field;
// Make the join condition for the derived table:
Item *join_cond = nullptr;
// Start at first SJ inner expression in SELECT list:
int i = first_sj_inner_expr_of_subquery;
int j = 0; // counter of processed SJ inner expressions
for (auto it_outer = sj_outer_exprs.begin(); it_outer != sj_outer_exprs.end();
++i, ++j, ++it_outer) {
Item *outer = *it_outer;
assert(i < (int)tl->table->s->fields);
// Using this constructor, instead of the alternative which only takes a
// Field pointer, gives a persistent name to the item (sets orig_table_name
// etc) which is necessary for prepared statements.
derived_field = new (thd->mem_root)
Item_field(thd, &this->context, tl, tl->table->field[i]);
if (derived_field == nullptr) return true;
// The said constructor sets 'fixed' to true, so join_cond->fix_fields()
// below ignores 'derived_field', so derived_field->cached_table isn't set,
// making a prepared statement fail. Setting cached_table solves it, and
// also helps during name resolution because the derived table isn't in the
// context's name resolution chain.
// derived_field->cached_table = tl;
// derived_field->cached_field_index = i;
Item_bool_func *comp_item;
Item_func::Functype op_type = sj_decor.op_type_at(j);
switch (op_type) {
case Item_func::EQ_FUNC:
comp_item = new (thd->mem_root) Item_func_eq(outer, derived_field);
break;
case Item_func::NE_FUNC:
comp_item = new (thd->mem_root) Item_func_ne(outer, derived_field);
break;
case Item_func::LT_FUNC:
comp_item = new (thd->mem_root) Item_func_lt(outer, derived_field);
break;
case Item_func::LE_FUNC:
comp_item = new (thd->mem_root) Item_func_le(outer, derived_field);
break;
case Item_func::GT_FUNC:
comp_item = new (thd->mem_root) Item_func_gt(outer, derived_field);
break;
case Item_func::GE_FUNC:
comp_item = new (thd->mem_root) Item_func_ge(outer, derived_field);
break;
default:
assert(false);
comp_item = nullptr;
}
if (comp_item == nullptr) return true;
// 'outer' moved from the left expression of IN (or from an operator in
// WHERE, if decorrelated) to this new equality:
// thd->replace_rollback_place(comp_item->arguments());
join_cond = and_items(join_cond, comp_item);
}
if (join_cond == nullptr) // it's EXISTS and we couldn't decorrelate anything
join_cond = new (thd->mem_root) Item_func_true();
join_cond->apply_is_true();
if (!join_cond->fixed && join_cond->fix_fields(thd, &join_cond)) return true;
tl->set_join_cond(join_cond);
// Make the IS [NOT] NULL condition:
derived_field = new (thd->mem_root)
Item_field(thd, &this->context, tl, tl->table->field[0]);
if (derived_field == nullptr) return true;
// derived_field->cached_table = tl;
// derived_field->cached_field_index = 0;
Item *null_check;
if (!tl->outer_join)
null_check = new (thd->mem_root) Item_func_true();
else if (subq->can_do_aj)
null_check = new (thd->mem_root) Item_func_isnull(derived_field);
else
null_check = new (thd->mem_root) Item_func_isnotnull(derived_field);
null_check->apply_is_true();
if (null_check->fix_fields(thd, &null_check)) return true;
// We only need to test the first column for null-ness:
// if the NOT NULL test eliminates it, i.e. if it's NULL:
// - if it's not NULL-complemented: it's a NULL in the right member of the
// LEFT JOIN, thus in the subquery, thus it wouldn't pass the IN
// condition,
// - if it is NULL-complemented: then one IN sub-equality failed, thus it
// wouldn't pass the IN condition.
// Reciprocically: if the NOT NULL does not eliminate it: it's not
// NULL-complemented, so all IN sub-equalities passed, it would pass the IN
// condition.
// If the subquery was rather with EXISTS, the SELECT list's first
// expression is 1, so if it's NULL it's surely NULL-complemented; if there
// were decorrelated equalities one of them failed, or the inner table
// was empty.
// Walk the parent query's WHERE, to find the subquery item, and replace it.
if (replace_subcondition(thd, &m_where_cond, subq, null_check, false))
return true; /* purecov: inspected */
// WHERE now references the derived table's column, so used_tables needs an
// update; so does not_null_tables (by making it up to date, we allow
// simplify_joins() to optimize more).
m_where_cond->update_used_tables();
return false;
}
/**
Create a new Table_ref object for this query block, for either:
1) a derived table which will replace the subquery, or
2) an extra derived table for handling grouping, if necessary,
cf. transform_grouped_to_derived.
The derived table is added to the list of used tables for the query block
("outer").
@param thd the session context
@param unit the query expression for subquery (case 1), or a new
query expression for (case 2)
@param join_cond != nullptr: we are synthesizing a derived table for a
subquery within this join condition
= nullptr: synthesizing a derived table for a subquery
where the subquery is not contained in a join condition
@param left_outer true for case (1), false for (2)
@param use_inner_join for case (1): if true/false use INNER/LEFT JOIN
@returns the derived table object, or nullptr on error.
*/
Table_ref *Query_block::synthesize_derived(THD *thd, Query_expression *unit,
Item *join_cond, bool left_outer,
bool use_inner_join) {
char name[STRING_BUFFER_USUAL_SIZE];
const uint i = unit->first_query_block()->select_number;
std::snprintf(name, sizeof(name), "derived_%d_%d", select_number, i);
char *namep = thd->mem_strdup(name);
if (namep == nullptr) return nullptr;
auto *const ti = new (thd->mem_root) Table_ident(unit);
if (ti == nullptr) return nullptr;
Table_ref *derived_table =
add_table_to_list(thd, ti, namep, 0, TL_READ, MDL_SHARED_READ);
if (derived_table == nullptr) return nullptr;
if (left_outer) {
derived_table->outer_join = !use_inner_join;
if (!unit->item->is_bool_func())
derived_table->m_was_scalar_subquery = true;
if (join_cond != nullptr) {
// impossible if table subquery:
assert(derived_table->m_was_scalar_subquery);
if (nest_derived(thd, join_cond, m_current_table_nest, derived_table))
return nullptr;
} else {
// The derived table is not for a subquery in a join condition
if (add_joined_table(derived_table)) return nullptr;
if (nest_last_join(thd) == nullptr) return nullptr;
}
if (derived_table->m_was_scalar_subquery) {
auto *const join_cond_true = new (thd->mem_root) Item_func_true();
if (join_cond_true == nullptr) return nullptr;
derived_table->set_join_cond(join_cond_true);
} // else: table subquery, the join condition is complex, made by caller.
}
unit->derived_table = derived_table;
return derived_table;
}
/**
A minion of transform_grouped_to_derived.
Replace occurrences of the aggregate function identified in info.m_target with
the the field info.m_replacement in the expressions contained in list.
Note that since this is part of a permanent transformation, we use the extra
m_permanent_transform flag in the THD
@param info a tuple containing {aggregate, replacement field}
@param was_hidden true if the aggregate was originally hidden
@param list the list of expressions
@param ref_item_array to be kept in sync with any changes in 'list'
@returns true on error (can not happen currently unless replacement field is
empty)
*/
static bool replace_aggregate_in_list(Item::Aggregate_replacement &info,
bool was_hidden,
mem_root_deque<Item *> *list,
Ref_item_array *ref_item_array) {
for (auto lii = list->begin(); lii != list->end(); ++lii) {
Item *select_expr = *lii;
Item *const new_item = select_expr->transform(&Item::replace_aggregate,
pointer_cast<uchar *>(&info));
if (new_item == nullptr) return true;
new_item->update_used_tables();
if (new_item != select_expr) {
new_item->hidden = was_hidden;
new_item->increment_ref_count();
*lii = new_item;
for (size_t i = 0; i < list->size(); i++) {
if ((*ref_item_array)[i] == select_expr)
(*ref_item_array)[i] = new_item;
}
}
}
return false;
}
/**
A minion of transform_grouped_to_derived.
"Remove" any non-window aggregate functions from fields unconditionally.
If such an aggregate is found, the query block should have a HAVING clause.
This is asserted in debug mode. We "remove" them by replacing them with
an Item_int, which should have no adverse effects. This avoids creating
trouble for Query_block::add_hidden_item which would otherwise need to keep
track of removed items.
@param thd session context
@param select the query block whose aggregates are being moved into a
derived table
@returns true on error, else false
*/
bool Query_block::remove_aggregates(THD *thd,
[[maybe_unused]] Query_block *select) {
for (auto it = fields.begin(); it != fields.end(); ++it) {
Item *select_expr = *it;
if (!select_expr->m_is_window_function &&
select_expr->type() == Item::SUM_FUNC_ITEM) {
// must be an aggregate induced from a HAVING clause, remove from
// transformed query block since it is not needed on that
// level any more
assert(select->having_cond() != nullptr);
Item *int_item = new (thd->mem_root) Item_int(0);
int_item->hidden = select_expr->hidden;
if (int_item == nullptr) return true;
*it = int_item;
for (size_t i = 0; i < fields.size(); i++) {
if (base_ref_items[i] == select_expr) base_ref_items[i] = int_item;
}
}
}
return false;
}
/**
A minion of transform_grouped_to_derived.
This updates the name resolution contexts in expr to that of new_derived
permanently.
@param expr the expression to be updated
@param new_derived the query block of the new derived table which now holds
the expression after it has been moved down.
@returns true on error
*/
static bool update_context_to_derived(Item *expr, Query_block *new_derived) {
Item_ident::Change_context ctx(&new_derived->context);
if (expr != nullptr && expr->walk(&Item::change_context_processor,
enum_walk::POSTFIX, (uchar *)&ctx))
return true; /* purecov: inspected */
return false;
}
/**
A minion of transform_grouped_to_derived.
Collect a unique list of aggregate functions used in the transformed query
block, which will need to be replaced with fields from the derived table
containing the grouping during transform_grouped_to_derived.
@param[in] select the query block
@param[in, out] aggregates the accumulator which will contain the aggregates
@return true on error
*/
static bool collect_aggregates(
Query_block *select, Item_sum::Collect_grouped_aggregate_info *aggregates) {
for (Item *select_expr : select->visible_fields()) {
if (select_expr->walk(&Item::collect_grouped_aggregates,
enum_walk::SUBQUERY_PREFIX,
pointer_cast<uchar *>(aggregates)))
return true; /* purecov: inspected */
}
if (select->having_cond() != nullptr) {
if (select->having_cond()->walk(&Item::collect_grouped_aggregates,
enum_walk::SUBQUERY_PREFIX,
pointer_cast<uchar *>(aggregates)))
return true; /* purecov: inspected */
}
// We move the aggregate functions from an implicitly grouped query block to
// a new derived table, effectively making the existing query block
// non-grouped. When the grouping is implicit, the ORDER BY is eliminated
// since the result set has only one row, so skip processing of the
// order_list.
assert(select->order_list.elements == 0);
List_iterator<Window> li(select->m_windows);
for (Window *w = li++; w != nullptr; w = li++) {
for (ORDER *it : {w->first_order_by(), w->first_partition_by()}) {
if (it != nullptr) {
for (auto ord = it; ord != nullptr; ord = ord->next) {
if ((*ord->item)
->walk(&Item::collect_grouped_aggregates, enum_walk::PREFIX,
pointer_cast<uchar *>(aggregates)))
return true; /* purecov: inspected */
}
}
}
}
return false;
}
/**
Helper function to make names for columns of a derived table replacing a
scalar or table subquery.
Fields from the query block containing the scalar subquery are moved
to the new derived table. We give them synthetic unique names here.
@param thd current session context
@param item the item we want to name
@param field_no the field number
@returns true on error
*/
static bool baptize_item(THD *thd, Item *item, int *field_no) {
char buff[100];
std::snprintf(buff, sizeof(buff), SYNTHETIC_FIELD_NAME "%d", (*field_no)++);
char *namep = thd->mem_strdup(buff);
if (namep == nullptr) return true;
item->orig_name.set(item->item_name.ptr());
item->item_name.set(namep);
return false;
}
/**
Minion of \c transform_grouped_to_derived. Do a replacement in \c expr
using \c Item::transform as specified in \c info using \c transformer.
*/
bool Query_block::replace_item_in_expression(Item **expr, bool was_hidden,
Item::Item_replacement *info,
Item_transformer transformer) {
Item *new_item = (*expr)->transform(transformer, pointer_cast<uchar *>(info));
if (new_item == nullptr) return true;
new_item->update_used_tables();
if (new_item != *expr) {
// Save our original item name at this level
auto saved_item_name =
(*expr)->orig_name.is_set() ? (*expr)->orig_name : (*expr)->item_name;
replace_referenced_item(*expr, new_item);
// Replace in fields
const auto it = find(fields.begin(), fields.end(), new_item);
if (it == fields.end()) {
*expr = new_item;
} else {
// More than one occurrence of same replaced field, make another copy so
// we do not clobber the item_name (alias) of another occurrence in select
// list.
Item_field *f = down_cast<Item_field *>(new_item);
Item_field *cpy = new (parent_lex->thd->mem_root) Item_field(f->field);
if (cpy == nullptr) return true;
*expr = cpy;
}
// Mark this expression as hidden if it was hidden in this query
// block.
(*expr)->hidden = was_hidden;
(*expr)->item_name = saved_item_name;
}
return false;
}
/**
Minion of \c transform_scalar_subqueries_to_join_with_derived. Moves implicit
grouping down into a derived table to prepare for
\c transform_scalar_subqueries_to_join_with_derived.
Example:
@verbatim
SELECT ( SELECT COUNT(*)
FROM t1 ) AS tot,
IFNULL(MAX(t2.b), 0) + 6 AS mx # MAX(t2.b), FROM t2 and
FROM t2 # WHERE go to derived_1_3
WHERE ANY_VALUE(t2.b) = 10;
is transformed to
SELECT ( SELECT COUNT(*)
FROM t1 ) AS tot,
IFNULL(derived_1_3.`MAX(t2.b)`, 0) + 6 AS mx
FROM ( SELECT MAX(t2.b) AS `MAX(t2.b)`
FROM t2
WHERE (ANY_VALUE(t2.b) = 10)) derived_1_3
@endverbatim
Create a new query expression object and query block object to represent the
contents of a derived table ("new_derived" in the code below, "derived_1_3"
in the example above), with a select list which only contains the aggregate
functions lifted out of the transformed query block ("MAX(t2.b) AS
`MAX(t2.b)`" above) and any fields referenced.
The transformed query block retains the original select list except aggregates
and fields are replaced by fields ("derived_1_3.`MAX(t2.b)`") from the
new subquery, but it loses its FROM list, replaced by the new derived table
("derived_1_3" above) and its WHERE and HAVING clauses which all go to
the derived table's query block.
Any DISTINCT, WINDOW clauses and LIMITs stay in place at the transformed
query block.
@param thd session context
@param[out] break_off set to true of transformation could not be performed
@returns true on error
*/
bool Query_block::transform_grouped_to_derived(THD *thd, bool *break_off) {
// Collect all aggregates, and add them to our new select list
Item_sum::Collect_grouped_aggregate_info aggregates(this);
if (collect_aggregates(this, &aggregates)) return true;
if (aggregates.m_break_off) {
*break_off = true; // some aggregates functions aggregate in an outer query
return false;
} else if (aggregates.list.size() == 0) {
// No longer to be found, probably optimized away ORDER BY
return false;
}
// Remember implicit grouping in case this query is also a scalar subquery
// so we can still identify it after this transform.
assert(is_implicitly_grouped());
m_was_implicitly_grouped = true;
Table_ref *tl = nullptr;
Query_block *new_derived = nullptr;
List<Item> item_fields_or_view_refs;
Mem_root_array<Item_view_ref *> unique_view_refs(thd->mem_root);
mem_root_unordered_map<Field *, Item_field *> unique_fields(thd->mem_root);
mem_root_unordered_map<Field *, Item_field *> unique_default_values(
thd->mem_root);
mem_root_unordered_map<Field *, Item_field *> *field_classes[] = {
&unique_default_values, &unique_fields};
/*
In addition to adding the aggregates to the derived table's SELECT list,
we need to add all referenced fields that will be needed in this query
block.
They fall into three categories:
1) fields referenced directly in the select list
2) fields referenced by window functions as arguments, or in
in a window definition's ORDER BY or PARTITION BY clauses
3) fields referenced by the transformed query block's ORDER BY clause
All of these can reference items from tables that are now moved inside the
derived table.
This query block will get its fields replaced by the corresponding ones in
the derived table shortly, after we have resolved the derived table. We
need to give them unique names in the derived table, else we could have
issues with resolution. Can probably be removed after WL#6570.
Method: collect all unique fields referenced in categories 1-3 above.
Add them with unique names to the SELECT list of the derived table,
after the aggregates (e.g. inside the derived table one may see t1.i and
t2.i, but at this level both fields are part of the same derived table,
so they cannot both be known as i in this query block).
When the fields in the derived table are known (after the call to
resolve_placeholder_tables below, we can go back and modify the references
at this level.
*/
mem_root_unordered_map<Item **, bool> contrib_exprs(thd->mem_root);
// We want permanent changes
{
Prepared_stmt_arena_holder ps_arena_holder(thd);
Query_expression *const old_slave = slave;
slave = nullptr;
// The new derived table takes over WHERE and HAVING from this query block
Query_expression *new_slu = parent_lex->create_query_expr_and_block(
thd, this, m_where_cond, m_having_cond, CTX_DERIVED);
if (new_slu == nullptr) return true;
new_derived = new_slu->first_query_block();
m_where_cond = nullptr;
m_having_cond = nullptr;
new_derived->linkage = DERIVED_TABLE_TYPE;
// inherit item counts for safe allocation of base_ref_items array
new_derived->select_n_having_items = select_n_having_items;
new_derived->select_n_where_fields = select_n_where_fields;
new_derived->n_sum_items = n_sum_items;
new_derived->n_child_sum_items = n_child_sum_items;
// update condition counts
new_derived->cond_count = cond_count;
// between_count is updated if cond_count gets updated when there are any
// transformations. So we do the same here too. However it needs to be
// investigated if this is necessary or not.
new_derived->between_count = between_count;
with_sum_func = false;
// Any moved Item_ident needs new name resolution context
Item *conds[2] = {new_derived->m_where_cond, new_derived->m_having_cond};
for (auto cond : conds) {
if (update_context_to_derived(cond, new_derived)) return true;
}
assert(join == nullptr);
// Move FROM tables under the new derived table with fix ups
new_derived->m_table_list = m_table_list;
m_table_list.clear();
for (Table_ref *tables = new_derived->get_table_list(); tables != nullptr;
tables = tables->next_local) {
tables->query_block = new_derived; // update query block context
if (update_context_to_derived(tables->join_cond(), new_derived))
return true; /* purecov: inspected */
}
new_derived->derived_table_count = this->derived_table_count;
derived_table_count = 0; // will soon become 1.
assert(is_implicitly_grouped()); // only implicit grouping moved
assert(group_list.elements == 0);
assert(olap == UNSPECIFIED_OLAP_TYPE);
// Let new derived take over grouping flags
new_derived->m_agg_func_used = m_agg_func_used;
m_agg_func_used = false;
new_derived->m_json_agg_func_used = m_json_agg_func_used;
m_json_agg_func_used = false;
// Let new derived take over any semijoin candidates
new_derived->sj_candidates = sj_candidates;
sj_candidates = nullptr;
assert(m_current_table_nest == &m_table_nest);
new_derived->m_table_nest = std::move(m_table_nest);
m_table_nest.clear();
new_derived->m_current_table_nest = &new_derived->m_table_nest;
new_derived->leaf_tables = leaf_tables;
new_derived->leaf_table_count = leaf_table_count;
leaf_tables = nullptr;
leaf_table_count = 0;
// Add the derived table to this query block's FROM list
tl = synthesize_derived(thd, new_slu, nullptr, false, false);
if (tl == nullptr) return true;
if (!(tl->derived_result = new (thd->mem_root) Query_result_union()))
return true; /* purecov: inspected */
new_slu->set_query_result(tl->derived_result);
m_table_nest.push_back(tl);
// Update this query block's and the derived table's query block's name
// resolution contexts
context.table_list = tl;
context.first_name_resolution_table = tl;
assert(context.last_name_resolution_table == nullptr);
new_derived->context.init();
new_derived->context.table_list = get_table_list();
new_derived->context.query_block = new_derived;
new_derived->context.outer_context = &context;
new_derived->context.first_name_resolution_table = get_table_list();
/*
Retain only subqueries from SELECT list in this block [2]; all other
query expressions go to the new derived table [1]:
*/
Item_subselect::Collect_subq_info subqueries(this);
for (Item *item : fields) {
if (item->walk(&Item::collect_subqueries, enum_walk::PREFIX,
pointer_cast<uchar *>(&subqueries)))
return true; /* purecov: inspected */
}
assert(slave != nullptr);
assert(new_derived->slave == nullptr);
// Collect all query expressions in a container first, since we cannot rely
// on old_slave's ::next pointer chain once we start inserting them.
Mem_root_array<Query_expression *> old_slaves(thd->mem_root);
for (Query_expression *cand = old_slave; cand != nullptr;
cand = cand->next) {
old_slaves.push_back(cand);
}
for (auto cand : old_slaves) {
if (cand == new_slu) continue; // already in place
if (subqueries.contains(cand))
cand->include_down(parent_lex, this); // [2]
else {
cand->include_down(parent_lex, new_derived); // [1]
// These subqueries are now moving into a new query block, so we need
// to update any outer references inside such subqueries from this block
// to that of the new derived table.
Item_ident::Depended_change info{this, new_derived};
if (cand->walk(&Item::update_depended_from, enum_walk::SUBQUERY_PREFIX,
pointer_cast<uchar *>(&info)))
return true; /* purecov: inspected */
}
}
// Insert the aggregates in the derived table's query block
int i = 0;
for (Item_sum *agg : aggregates.list) {
assert(agg->aggr_query_block == agg->base_query_block);
agg->aggr_query_block = new_derived;
agg->base_query_block = new_derived;
if (agg->hidden) {
// Because 'agg' is going to move to the derived table's SELECT list,
// its 'hidden' flag will become true. Then, in the current query block,
// 'agg' will be replaced by an Item_field for the column of that
// derived table; such Item_field must have the original value of
// agg->hidden, which we thus save here:
aggregates.aggregates_that_were_hidden.insert(agg);
}
if (new_derived->add_item_to_list(agg)) return true;
if (agg->item_name.length() == 0) {
// Generate a name (required)
char buff[100];
std::snprintf(buff, sizeof(buff), "tmp_aggr_%d", ++i);
agg->item_name.copy(buff);
if (agg->item_name.length() == 0) return true; // allocation error.
}
}
// We will find all fields mentioned above by checking fields, which
// has any hidden fields induced by ORDER BY or window specifications, in
// addition to fields from the select expressions. We also make a note
// of the expression's hidden status to mark the expression as hidden
// when it is replaced with derived table expression later.
for (Item *&item : fields) {
contrib_exprs.emplace(&item, item->hidden);
}
// Collect fields in expr, but not from inside grouped aggregates.
Item::Collect_item_fields_or_view_refs info{&item_fields_or_view_refs,
this};
for (auto expr : contrib_exprs) {
if ((*expr.first)
->walk(&Item::collect_item_field_or_view_ref_processor,
enum_walk::SUBQUERY_PREFIX | enum_walk::POSTFIX,
pointer_cast<uchar *>(&info)))
return true; /* purecov: inspected */
}
List_iterator<Item> lfi(item_fields_or_view_refs);
Item *lf;
// Remove irrelevant field references, i.e. those fields that are not local
// to new_derived
while ((lf = lfi++)) {
if (lf->type() == Item::FIELD_ITEM) {
Item_field *f = down_cast<Item_field *>(lf);
if (!(f->context->query_block == this || f->depended_from == this))
lfi.remove();
}
}
// We now have all fields, default values and view references; now find only
// unique ones.
lfi.init(item_fields_or_view_refs);
while ((lf = lfi++)) {
if (lf->type() == Item::FIELD_ITEM) {
Item_field *f = down_cast<Item_field *>(lf);
if (unique_fields.find(f->field) == unique_fields.end()) {
unique_fields.emplace(std::pair<Field *, Item_field *>(f->field, f));
} else {
// Should already have been deduplicated during collection
assert(false);
}
} else if (lf->type() == Item::DEFAULT_VALUE_ITEM) {
Item_default_value *dv = down_cast<Item_default_value *>(lf);
Item_field *lf_field =
down_cast<Item_field *>(dv->argument()->real_item());
if (unique_default_values.find(lf_field->field) ==
unique_default_values.end()) {
unique_default_values.emplace(
std::pair<Field *, Item_field *>(lf_field->field, dv));
} else {
// Should already have been deduplicated during collection
assert(false);
}
} else {
Item_view_ref *vr = down_cast<Item_view_ref *>(lf);
for (auto curr : unique_view_refs) {
if (curr->eq(vr, true)) goto continue_outer;
}
unique_view_refs.push_back(vr);
}
continue_outer:;
}
int field_no = 1;
for (auto vr : unique_view_refs) {
if (baptize_item(thd, vr, &field_no)) return true;
if (new_derived->add_item_to_list(vr)) return true;
if (update_context_to_derived(vr, new_derived)) return true;
vr->depended_from = nullptr;
}
for (auto field_class : field_classes) {
for (auto pair : *field_class) {
Item_field *f = pair.second;
Item *sl_item = f;
if (f->type() == Item::FIELD_ITEM && f->protected_by_any_value()) {
// The field was mentioned only ever inside arguments to ANY_VALUE, so
// protect it likewise in new_derived, lest we get a
// ER_MIX_OF_GROUP_FUNC_AND_FIELDS_V2. If not, we let the check
// proceed, i.e. we do not add ANY_VALUE for the column.
sl_item = new (thd->mem_root) Item_func_any_value(f);
if (sl_item == nullptr) return true;
if (sl_item->fix_fields(thd, &sl_item)) return true;
}
if (new_derived->add_item_to_list(sl_item)) return true;
if (baptize_item(thd, sl_item, &field_no)) return true;
if (update_context_to_derived(sl_item, new_derived)) return true;
f->depended_from = nullptr;
}
}
new_derived->original_tables_map = original_tables_map;
if (new_derived->has_sj_candidates() &&
new_derived->flatten_subqueries(thd))
return true;
if (setup_tables(thd, get_table_list(), false)) return true;
} // Prepared_stmt_arena_holder scope
// Resolving the new derived table needs normal arena
if (resolve_placeholder_tables(thd, true)) return true;
{
Prepared_stmt_arena_holder ps_arena_holder(thd);
assert(tl->table != nullptr);
/*
We pushed the HAVING clause into new_derived above, but it is resolved to
this query block, meaning it may have Item_aggregate_refs pointing into
this->base_ref_items. We need to update such references to point into
new_derived->base_ref_items instead, since this is where the aggregates
are now also. We do this by adding them as hidden items and setting
the Item_aggregate_refs::ref accordingly.
*/
if (new_derived->m_having_cond != nullptr) {
Item_sum::Collect_grouped_aggregate_info having_aggs(this);
if (new_derived->m_having_cond->walk(&Item::collect_grouped_aggregates,
enum_walk::PREFIX,
pointer_cast<uchar *>(&having_aggs)))
return true; /* purecov: inspected */
for (Item_sum *agg : having_aggs.list) {
Item::Aggregate_ref_update info(agg, new_derived);
[[maybe_unused]] bool error = new_derived->m_having_cond->walk(
&Item::update_aggr_refs, enum_walk::PREFIX,
pointer_cast<uchar *>(&info));
assert(!error);
agg->aggr_query_block = new_derived;
}
}
/*
Permanently replace the aggregates in this select list and windowing
clauses with fields from the derived table.
*/
Field **field_ptr = tl->table->field;
for (Item_sum *agg : aggregates.list) {
Item_field *replaces_agg = new (thd->mem_root) Item_field(*field_ptr);
if (replaces_agg == nullptr) return true;
// So we can re-bind this field in EXECUTE phase of prepared statement
// Remove after WL#6570.
// replaces_agg->set_orig_names();
/*
The WHERE condition cannot contain group function from this level, so
ignore. Only replace aggregates from the SELECT lists with fields from
the derived table, then remove aggregates from top select lists.
*/
Item::Aggregate_replacement info(agg, replaces_agg);
if (replace_aggregate_in_list(
info, aggregates.aggregates_that_were_hidden.count(agg) != 0,
&fields, &base_ref_items))
return true;
// We only transform implicit grouping to a derived table: in such a case,
// the order by is eliminated since the result set has only one row, so
// skip processing of order_list.
assert(group_list.elements == 0);
assert(order_list.elements == 0);
List_iterator<Window> wli(m_windows);
for (Window *w = wli++; w != nullptr; w = wli++) {
for (ORDER *it : {w->first_order_by(), w->first_partition_by()}) {
if (it != nullptr) {
for (auto ord = it; ord != nullptr; ord = ord->next) {
Item *new_item;
if (!(new_item = (*ord->item)
->transform(&Item::replace_aggregate,
pointer_cast<uchar *>(&info))))
return true; /* purecov: inspected */
new_item->update_used_tables();
if (new_item != *ord->item) {
*ord->item = new_item;
}
}
}
}
// Physical sorting order should not have been set up since we are
// implicitly grouped, so no need to attempt substitution in it.
assert(w->sorting_order(nullptr, false) == nullptr);
}
// Aggregate argument may contain identifiers that need correct
// context. View references will have been replaced Item_fields,
// so we have to be careful: these will be rolled back and to make
// our transformation permanent we need to update the context of the
// original Item_fields, not the Item_view_refs.
if (update_context_to_derived(agg, new_derived)) return true;
++field_ptr;
}
/*
Remove any moved aggregates from top query block that did not get
replaced above.
*/
if (remove_aggregates(thd, new_derived)) return true;
// field_ptr now points to the first of any view references added to the
// select list of the derived table's query block. We now create new fields
// for this block which will point to the corresponding item in the derived
// table and then we substitute the new fields for the view refs.
for (auto vr : unique_view_refs) {
for (const auto &[expr, was_hidden] : contrib_exprs) {
Item::Item_view_ref_replacement info(vr->real_item(), *field_ptr, this);
if (replace_item_in_expression(expr, was_hidden, &info,
&Item::replace_item_view_ref))
return true;
}
++field_ptr;
}
for (auto field_class : field_classes) {
// field_ptr now points to the first of the fields added to the select
// list of the derived table's query block. We now create new fields for
// this block which will point to the corresponding fields moved to the
// derived table and then we substitute the new fields for the old ones.
for (auto pair : *field_class) {
auto replaces_field = new (thd->mem_root) Item_field(*field_ptr);
if (replaces_field == nullptr) return true;
// We can update context of the field moved into the derived table
// now that replaces_field has inherited the upper context
pair.second->context = &new_derived->context;
replaces_field->increment_ref_count();
for (const auto &[expr, was_hidden] : contrib_exprs) {
Item_field *replacement = replaces_field;
// If this expression was hidden, we need to make a copy of the
// derived table field. The same derived table field cannot be marked
// both hidden and visible if the field replaces two different
// expressions in the transforming query block.
if (was_hidden) {
auto hidden_field = new (thd->mem_root) Item_field(*field_ptr);
if (hidden_field == nullptr) return true;
hidden_field->item_name.set(pair.second->orig_name.ptr());
pair.second->context = &new_derived->context;
replacement = hidden_field;
}
Item::Item_field_replacement info(
pair.first, replacement, this,
field_class == &unique_default_values
? Item::Item_field_replacement::Mode::DEFAULT_VALUE
: Item::Item_field_replacement::Mode::FIELD);
if (replace_item_in_expression(expr, was_hidden, &info,
&Item::replace_item_field))
return true;
}
++field_ptr;
}
}
OPT_TRACE_TRANSFORM(&thd->opt_trace, trace_wrapper, trace_object,
select_number, "grouped subquery",
"subquery over grouped derived table");
opt_trace_print_expanded_query(thd, this, &trace_object);
} // Prepared_stmt_arena_holder scope
return false;
}
/**
A minion of transform_scalar_subqueries_to_join_with_derived.
A transform creates a field representing the value of the derived table and
adds it as a hidden field to the select list. Next, it replaces the subquery
in the item tree with this field. If we replace in a HAVING condition, we
build an Item_ref, cf. PTI_simple_ident_ident::itemize which also creates a
Item_ref for a field reference in HAVING, because we may need to access the
field in a tmp table.
@param thd The session context
@param subquery The scalar subquery
@param tr The table reference for the derived table
@param expr The expression we are replacing (in)
*/
bool Query_block::replace_subquery_in_expr(THD *thd, Item::Css_info *subquery,
Table_ref *tr, Item **expr) {
if (!(*expr)->has_subquery()) return false;
Item_singlerow_subselect::Scalar_subquery_replacement info(
subquery->item,
// make sure to not replace with one of the hidden fields, if present,
// e.g. for INTERSECT:
tr->table->field[tr->table->hidden_field_count], this,
subquery->m_add_coalesce);
// ROLLUP wrappers might have been added to the expression at this point. Take
// care to transform the inner item and keep the rollup wrappers as is.
bool with_rollup_wrapper = is_rollup_group_wrapper(*expr);
Item *orig_unwrapped_item = unwrap_rollup_group(*expr);
Item *new_item = (*expr)->transform(&Item::replace_scalar_subquery,
pointer_cast<uchar *>(&info));
if (new_item == nullptr) return true;
// If we replaced an item contained in the transformed query block,
// retain its name so the metadata column name remains correct.
if (*expr != new_item) {
new_item->item_name.set((*expr)->item_name.ptr());
*expr = new_item;
} else if (with_rollup_wrapper) {
// If the original expression was a rollup group item, the inner item of the
// expression might have changed.
Item *new_unwrapped_item = unwrap_rollup_group(new_item);
if (new_unwrapped_item != orig_unwrapped_item)
new_unwrapped_item->item_name.set((*expr)->item_name.ptr());
}
new_item->update_used_tables();
// If this expression has aggregation and we have replaced a subquery
// with a field, we need to recompute split_sum_func
if ((new_item->has_aggregation() &&
!(new_item->type() == Item::SUM_FUNC_ITEM &&
!new_item->m_is_window_function)) || // (1)
new_item->has_wf()) { // (2)
if (new_item->split_sum_func(thd, base_ref_items, &fields)) {
return true;
}
}
assert(!thd->is_error());
return false;
}
/**
A minion of transform_scalar_subqueries_to_join_with_derived.
Determine if the query expression is directly contained in the
query block, i.e. it is a subquery.
@param select the query block
@param slu the query expression
@returns true if slu is directly contained in select, else false
*/
static bool query_block_contains_subquery(Query_block *select,
Query_expression *slu) {
for (Query_expression *cand = select->first_inner_query_expression();
cand != nullptr; cand = cand->next_query_expression()) {
if (cand == slu) return true;
}
return false;
}
static bool walk_join_conditions(mem_root_deque<Table_ref *> &list,
std::function<bool(Item **expr_p)> action,
Item::Collect_scalar_subquery_info *info) {
for (Table_ref *tl : list) {
if (tl->join_cond() != nullptr) {
info->m_join_condition_context = tl->join_cond();
if (action(tl->join_cond_ref())) return true;
}
if (tl->nested_join != nullptr &&
walk_join_conditions(tl->nested_join->m_tables, action, info))
return true; /* purecov: inspected */
}
info->m_join_condition_context = nullptr;
return false;
}
/**
Remember if this transform was performed. It it was done by a secondary
engine, it may need to be rolled back before falling back on primary engine
execution.
*/
static void remember_transform(THD *thd, Query_block *select) {
if (!thd->optimizer_switch_flag(OPTIMIZER_SWITCH_SUBQUERY_TO_DERIVED)) {
// Transform was enabled not by switch, but by secondary enginee
select->parent_lex->m_sql_cmd->set_optional_transform_prepared(true);
}
}
/**
Push the generated derived table to the correct location inside a join nest.
It will be nested in a new nest along with the outer table to the join
which owns the search condition in which we found the scalar subquery.
For example:
select t1.i,
t2.i
from t1
left outer join
t2 on
(t1.i < (select max(t2.i) from t2));
in transformed to
select t1.i,
t2.i
from t1
left join
(select max(t2.i) AS `max(t2.i)` from t2) derived_1_0 [*]
on(true)
left join
t2
on((t1.i < derived_1_0.`max(t2.i)`))
[*]: the derived table is nested in here, just ahead of the inner table
t2 to which the join condition is attached.
In the original join nest before transformation may look like this
(the join order list is reversed relative to the logical order):
(nest_join)
t2 LEFT OUTER ON .. = .. (inner table)
t1 (outer table)
After the transformation we have this nest structure:
(nest_join)
t2 LEFT OUTER ON .. = ..
(nest_last_join)
derived_1_0 LEFT OUTER ON true
t1
The method will recursively inspect and rebuild join nests as needed since
the join with the condition may be deeply nested.
@param thd the session context
@param join_cond the join condition which identifies the join we want to
nest into
@param nested_join_list
the join list at the current nesting level
@param derived_table the table we want to nest
@returns true on error
*/
bool Query_block::nest_derived(THD *thd, Item *join_cond,
mem_root_deque<Table_ref *> *nested_join_list,
Table_ref *derived_table) {
// Locate join nest in which the joinee with the condition sits
const bool found [[maybe_unused]] = walk_join_list(
*nested_join_list,
[join_cond, &nested_join_list](Table_ref *tr) mutable -> bool {
if (tr->join_cond() == join_cond) {
nested_join_list = &tr->embedding->nested_join->m_tables;
return true; // break off walk
}
return false;
});
assert(found);
// Make a copy of the join list, outer before inner joinees, so we
// can rebuild the join_list after inserting the derived table in a nest
// with the outer(s)
mem_root_deque<Table_ref *> copy_list(*THR_MALLOC);
auto &jlist = *nested_join_list;
for (auto tl : jlist) copy_list.push_front(tl);
jlist.clear();
auto it = std::find_if(copy_list.begin(), copy_list.end(),
[join_cond](Table_ref *tl) -> bool {
return tl->join_cond() == join_cond;
});
assert(it != copy_list.end()); // assert that we found it
const size_t idx = it - copy_list.begin();
// Insert back all outer tables to the inner containing the condition.
// Normally only one.
for (size_t i = 0; i < idx; i++) {
jlist.push_front(copy_list[i]);
}
// Insert the derived table and nest it with the outer(s)
jlist.push_front(derived_table);
derived_table->join_list = &jlist;
derived_table->embedding = copy_list[idx]->embedding;
if (nest_join(thd, this, copy_list[idx]->embedding, &jlist, idx + 1,
"(nest_join)") == nullptr)
return true;
// Insert back the inner containing the JOIN condition and any subsequent
// joinees
for (size_t i = idx; i < copy_list.size(); i++) {
jlist.push_front(copy_list[i]);
}
return false;
}
/**
Helper singleton class used to track information needed to perform the
transform of a correlated scalar subquery in a derived table, as performed
by \c decorrelate_derived_scalar_subquery_pre and
\c decorrelate_derived_scalar_subquery_pre.
*/
struct Lifted_expressions_map {
///< list of fields in WHERE clauses eligible for lifting
List<Item> m_inner_fields;
///< list of expressions that are not simple fields in WHERE clauses eligible
///< for lifting.
List<Item> m_inner_func_calls;
// Positions in derived table of corresponding field, indexed by the fields's
// position in m_inner_fields
Mem_root_array<uint> m_field_positions;
// Positions in derived table of corresponding expression (function call)
// indexed by call's position in m_inner_func_calls
Mem_root_array<uint> m_func_call_positions;
///< The list of outer fields of the WHERE clauses eligible
List<Item> m_outer_fields;
explicit Lifted_expressions_map(MEM_ROOT *root)
: m_field_positions(root), m_func_call_positions(root) {}
};
/**
Given an expression, create an ORDER expression for that expression and add
it to a window's ORDER BY list in preparation for synthesizing a window
function, cf. \c setup_counts_over_partitions
*/
static bool add_partition_by_expr(THD *thd, PT_order_list *partition,
Query_block *qb, Item *expr) {
ORDER *o = new (thd->mem_root) PT_order_expr(POS(), expr, ORDER_ASC);
if (o == nullptr) return true;
o->in_field_list = true;
(*o->item)->increment_ref_count();
bool found [[maybe_unused]] = false;
for (size_t idx = 0; idx < qb->fields.size(); idx++) {
if (qb->base_ref_items[idx] == expr) {
o->item = &qb->base_ref_items[idx];
found = true;
break;
}
}
assert(found);
o->used = expr->used_tables();
// Add at back of list
partition->value.link_in_list(o, &o->next);
return false;
}
/**
Add all COUNT(0) to SELECT list of the derived table to be used for
cardinality checking of the transformed subquery. Minion of
\c decorrelate_derived_scalar_subquery_pre
a. Add COUNT(0) OVER (PARTITION BY group-by-list)
b. Add COUNT(0) OVER (PARTITION BY inner-expr) for each inner-expression
not already grouped on.
*/
bool Query_block::setup_counts_over_partitions(
THD *thd, Table_ref *derived, Lifted_expressions_map *lifted_expressions,
mem_root_deque<Item *> &exprs_added_to_group_by, uint hidden_fields) {
for (size_t i = 0; i < exprs_added_to_group_by.size() + 1; i++) {
// 1. Construct PARTITION BY
PT_order_list *partition = new (thd->mem_root) PT_order_list(POS());
if (i == 0) {
// 1. a partition for original group by list
for (ORDER *group = group_list.first; group != nullptr;
group = group->next) {
if (add_partition_by_expr(thd, partition, this, *group->item))
return true;
}
} else {
// 1. b partition for each added expression
Item *f = exprs_added_to_group_by[i - 1];
if (add_partition_by_expr(thd, partition, this, f)) return true;
}
// 2. Construct default frame
auto start_bound =
new (thd->mem_root) PT_border(POS(), WBT_UNBOUNDED_PRECEDING);
if (start_bound == nullptr) return true;
auto end_bound =
new (thd->mem_root) PT_border(POS(), WBT_UNBOUNDED_FOLLOWING);
if (end_bound == nullptr) return true;
auto bounds = new (thd->mem_root) PT_borders(POS(), start_bound, end_bound);
if (bounds == nullptr) return true;
PT_frame *frame =
new (thd->mem_root) PT_frame(POS(), WFU_ROWS, bounds, nullptr);
if (frame == nullptr) return true;
frame->m_originally_absent = true;
// 3. Construct window and set it up (mini-version of what is normally done
// by setup_windows1).
PT_window *w = new (thd->mem_root)
PT_window(POS(), partition, /*order_by*/ nullptr, frame);
if (w == nullptr) return true;
if (w->setup_ordering_cached_items(thd, this, partition, true)) return true;
if (w->check_window_functions1(thd, this)) return true;
// initialize the physical sorting order for the partition
(void)w->sorting_order(thd, /*implicitly_grouped*/ false);
char buff[NAME_LEN + 1];
size_t namelen = snprintf(buff, NAME_LEN, "w%u", m_windows.size());
Item_string *wname =
new (thd->mem_root) Item_string(buff, namelen, thd->collation());
if (wname == nullptr) return true;
w->set_name(wname);
if (m_windows.push_back(w)) return true;
// 4. Construct window function COUNT and bind it
//
Item_int *number_0 = new (thd->mem_root) Item_int(int32{0}, 1);
if (number_0 == nullptr) return true;
Item_sum *cnt = new (thd->mem_root) Item_sum_count(POS(), number_0, w);
if (cnt == nullptr) return true;
cnt->m_is_window_function = true;
cnt->set_wf();
int item_no = fields.size() + 1;
baptize_item(thd, cnt, &item_no);
m_added_non_hidden_fields++;
{
// prelude to binding COUNT(*)
Query_block *save_query_block = thd->lex->current_query_block();
assert(save_query_block == outer_query_block());
thd->lex->set_current_query_block(this);
const auto save_allow_sum_func = thd->lex->allow_sum_func;
thd->lex->allow_sum_func |= (nesting_map)1 << nest_level;
Item *count = cnt;
if (cnt->fix_fields(thd, &count)) return true;
// postlude to binding COUNT(*)
thd->lex->set_current_query_block(save_query_block);
thd->lex->allow_sum_func = save_allow_sum_func;
}
// 5. Add window function to the SELECT list so we can reference it from
// outside the derived table (the cardinality check)
base_ref_items[fields.size()] = cnt;
lifted_expressions->m_field_positions.push_back(fields.size() -
hidden_fields);
fields.push_back(cnt);
cnt->increment_ref_count();
// Add a new column to the derived table's query expression
derived->derived_query_expression()->types.push_back(cnt);
}
return false;
}
/**
Run through the inner expressions and add them to the block's GROUP BY if not
already present.
*/
bool Query_block::add_inner_exprs_to_group_by(
THD *thd, List_iterator<Item> &inner_exprs, Item *selected_item,
bool *selected_expr_added_to_group_by,
mem_root_deque<Item *> *exprs_added_to_group_by) {
//
inner_exprs.rewind();
while (Item *expr = inner_exprs++) {
bool found = false;
for (ORDER *group = group_list.first; group != nullptr;
group = group->next) {
Item *gitem = *group->item;
if (gitem->eq(expr, /*binary_cmp*/ false)) {
found = true;
break;
}
}
if (!found) {
Item *in_select = expr;
if (selected_item != nullptr &&
selected_item->real_item()->eq(in_select->real_item(),
/*binary_cmp*/ false)) {
in_select = selected_item;
*selected_expr_added_to_group_by = true;
}
ORDER *o = new (thd->mem_root) PT_order_expr(POS(), in_select, ORDER_ASC);
if (o == nullptr) return true;
o->direction = ORDER_NOT_RELEVANT; // ignored by constructor
o->in_field_list = true;
o->used = in_select->used_tables();
// Add at back of list
group_list.link_in_list(o, &o->next);
exprs_added_to_group_by->push_back(in_select);
}
}
return false;
}
/**
Minion of \c decorrelate_derived_scalar_subquery_pre.
Run through the inner fields and add them to the derived table's SELECT list
if not already present (only one can be present, since it's a scalar
subquery), and make a note of where in the derived table's field list they
are positioned: we need that information in
\c Query_block::decorrelate_derived_scalar_subquery_post
@param thd session context
@param lifted_exprs the structure containing de-correlation information
@param selected_field_or_ref
the selected field item (actually a field or a ref to
a field) if that's what present in the select list, or
else a nullptr.
@param first_non_hidden the index of the first non-hidden field in fields
@return true if error, else false
*/
bool Query_block::add_inner_fields_to_select_list(
THD *thd, Lifted_expressions_map *lifted_exprs, Item *selected_field_or_ref,
uint first_non_hidden [[maybe_unused]]) {
//
List_iterator<Item> inner_fields(lifted_exprs->m_inner_fields);
const uint hidden_fields = CountHiddenFields(fields);
Item_field *const selected_field =
selected_field_or_ref != nullptr
? down_cast<Item_field *>(selected_field_or_ref->real_item())
: nullptr;
while (Item *field_or_ref = inner_fields++) {
Item_field *const f = down_cast<Item_field *>(field_or_ref->real_item());
// Add non-correlated fields in WHERE clause to select_list if not
// already present
if (selected_field == nullptr || f->field != selected_field->field) {
m_added_non_hidden_fields++;
// If f->hidden, f should be among the hidden fields in 'fields'.
assert(std::any_of(fields.cbegin(), fields.cbegin() + first_non_hidden,
[&f](const Item *item) { return f == item; }) ==
f->hidden);
Item_field *inner_field;
if (f->hidden) {
// Make a new Item_field to avoid changing the set of hidden
// Item_fields.
inner_field = new (thd->mem_root) Item_field(thd, f);
if (inner_field == nullptr) return true;
assert(!inner_field->hidden);
} else {
inner_field = f;
}
// select_n_where_fields is counted, so safe to add to base_ref_items
base_ref_items[fields.size()] = inner_field;
// Compute position in resulting derived table (TABLE::fields)
// Note the corresponding slice position calculation performed in
// - change_to_use_tmp_fields_except_sums (example figure
// expanded)
// - change_to_use_tmp_fields
// takes this new situation into account.
lifted_exprs->m_field_positions.push_back(fields.size() - hidden_fields);
fields.push_back(inner_field);
inner_field->increment_ref_count();
// We have added to fields; master_query_expression->types must
// always be equal to it;
master_query_expression()->types.push_back(inner_field);
} else {
// This is the field present in the scalar subquery initially, so it
// will be first in the derived table's set of fields.
lifted_exprs->m_field_positions.push_back(0);
}
}
return false;
}
bool Query_block::add_inner_func_calls_to_select_list(
THD *thd, Lifted_expressions_map *lifted_exprs) {
// Add non-correlated function calls in WHERE clause to select list, if not
// present.
List_iterator<Item> inner_calls(lifted_exprs->m_inner_func_calls);
const uint hidden_fields = CountHiddenFields(fields);
while (Item *func_item = inner_calls++) {
Item_func *func = down_cast<Item_func *>(func_item);
bool found = false;
for (size_t i = 0; i < fields.size(); i++) {
Item *fi = fields[i];
if (fi->type() != Item::FUNC_ITEM) continue;
if (down_cast<Item_func *>(fi)->eq(func, /*binary_cmp*/ false)) {
found = true;
break;
}
}
if (found) {
// we found the call in select list, use it
lifted_exprs->m_func_call_positions.push_back(0);
} else {
// The function call is not in select list, add it
m_added_non_hidden_fields++;
// The next assignment is safe, because we have reserved space for
// select_n_where_fields in base_ref_items, which during parsing
// pessimistically counts all field references disregarding uniqueness,
// selected or not, and this call contains at least one inner field
// reference. and we have a scalar subquery, so only one position is
// taken by that.
// For example:
// SELECT * FROM t1 WHERE(SELECT a+4 FROM t2
// WHERE -t2.a = -t1.a AND
// abs(t2.b) = t1.a AND
// t2.a + 3 = t1.a AND
// t2.a * 4 = t1.a AND
// t2.a / 3 = t1.a
// ) > 0;
//
// we get these counts when allocating base_ref_items in
// Query_block::setup_base_ref_items():
//
// n_sum_items: 0
// n_child_sum_items: 0
// fields.size(): 1
// select_n_having_items: 5 (includes select_list items,
// cf. Item::itemize)
// select_n_where_fields: 11
// order_group_num: 0
// n_scalar_subqueries: 0
//
// These numbers are set somewhat pessimistically during the itemize
// phase, i.e. we allocate allocating 17 slots in base_ref_items of
// which we end up using 6 - well within bounds, i.e.
//
// [0]: a+4
// [1]: -t2.a
// [2]: abs(t2.b)
// [3]: t2.a + 3
// [4]: t2.a * 4
// [5]: t2.a / 3
//
// Adding another predicate will not change this, because we add only one
// function from the inner side of the predicate to the select list but
// have counted at least two field references for the predicate, the
// inner and the outer, we are even better off.
base_ref_items[fields.size()] = func;
// Compute position in resulting derived table (TABLE::fields)
// Note the corresponding slice position calculation performed in
// - change_to_use_tmp_fields_except_sums (example figure
// expanded)
// - change_to_use_tmp_fields
// takes this new situation into account.
lifted_exprs->m_func_call_positions.push_back(fields.size() -
hidden_fields);
int item_no = fields.size() + 1;
baptize_item(thd, func, &item_no);
fields.push_back(func);
func->increment_ref_count();
// We have added to fields; master_query_expression->types must
// always be equal to it;
master_query_expression()->types.push_back(func);
}
}
return false;
}
/**
We have a correlated scalar subquery, so we must do several things:
1. Add the relevant non-correlated fields or function calls "NCF"[1] to the
select list so they can be referenced in the JOIN condition which now
holds the earlier WHERE AND predicates that were correlated.
[1] We handle simple column references (fields) separately from general
expressions ("function calls"), hence the unusual terminology: by
function calls here we mean expressions that are not simple column
references, i.e. \c Item_func's. So, NCF are the inner
fields/function calls operand of an equality predicate that contains
an outer field reference in the other operand. These were identified
in \c supported_correlated_scalar_subquery, and passed in as
\c lifted_where.
2. Add a COUNT to select list so it can be referenced from the
transformed query's WHERE clause for cardinality check, if needed,
i.e. when there is no aggregate function in the subquery's single[2]
select expression.
[2] single because we have a scalar subquery. Add this to NCF. If it
*does* contain an aggregate function, there will be only one row per
group iff the NCF are part of any GROUP BY list, and we add them to
it, so that property holds.
3. Add grouping on NCF to the subquery. If already grouped, add the NCF
at end of grouping list. Note that this might result in a grouped query
that might fail the functional dependency checks. So we wrap any
non-grouped field in the select list in Item_func_any_value.
We can safely add the Item_func_any_value because subqueries with
cardinalities greater than one will be rejected anyway.
For already grouped subqueries, we use possibly several windowed COUNT
cardinality checks, cf. \c added_window_card_checks
4. Remember the set of NCF so we can create derived.field and
derived.count(field), after setting up the materialized derived
table, cf. \c lifted_fields.
5. Update the correlated fields in the JOIN condition to no longer be
outer references, and the NCFs to refer to the derived table's fields,
NCF.
This logic is partially done *before* setting up the materialized derived
table, in the present method (\c _pre), and partly *after* setting up the
materialized derived table, cf. the companion method (\c _post).
@param thd session context
@param derived the derived table being created in the transform
@param lifted_where the WHERE condition we move out to the JOIN cond
@param[out] lifted_exprs mapping of where inner fields and function calls
end up in the derived table's fields.
@param[out] added_card_check set to true if we are adding a cardinality check
@param[out] added_window_card_checks
set to # of window functions added to SELECT if
the subquery is initially grouped and we need
COUNT(*) OVER (...) to be checked
*/
bool Query_block::decorrelate_derived_scalar_subquery_pre(
THD *thd, Table_ref *derived, Item *lifted_where,
Lifted_expressions_map *lifted_exprs, bool *added_card_check,
size_t *added_window_card_checks) {
const uint hidden_fields = CountHiddenFields(fields);
const uint first_non_hidden = hidden_fields;
assert((fields.size() - hidden_fields) ==
1); // scalar subquery
#ifndef NDEBUG
// Hidden fields should come before non-hidden.
for (uint i = 0; i < fields.size(); i++) {
assert((fields[i]->hidden) != (i >= hidden_fields));
}
#endif
Item *selected_field_or_ref = nullptr;
Item_func *selected_func_call = nullptr;
//****************************************************************
// Save the select item in the appropriate lifted structure if any
//****************************************************************
if (fields[first_non_hidden]->type() == Item::FUNC_ITEM &&
!fields[first_non_hidden]->has_aggregation()) {
selected_func_call = down_cast<Item_func *>(fields[first_non_hidden]);
} else if (fields[first_non_hidden]->real_item()->type() ==
Item::FIELD_ITEM) {
selected_field_or_ref = fields[first_non_hidden];
}
// Containers for collected outer and inner fields.
Item::Collect_item_fields_or_refs outer_info{&lifted_exprs->m_outer_fields};
Item::Collect_item_fields_or_refs inner_info_fields{
&lifted_exprs->m_inner_fields};
//**************************************************************
// Walk where predicates and collect inner field references and
// function calls
//**************************************************************
Item_cond_and *lw = down_cast<Item_cond_and *>(lifted_where);
List_iterator<Item> eq_li(*lw->argument_list());
while (Item *item = eq_li++) {
Item_func_eq *eq = down_cast<Item_func_eq *>(item);
for (size_t j = 0; j < 2; j++) {
if (eq->arguments()[j]->is_outer_reference()) {
if (eq->arguments()[j]->walk(&Item::collect_item_field_or_ref_processor,
enum_walk::PREFIX | enum_walk::POSTFIX,
pointer_cast<uchar *>(&outer_info)))
return true;
} else {
// <outer> = <inner_ref> or <outer> = func( .., <inner_ref>, ..)
Item *this_item = eq->arguments()[j];
if (this_item->type() == Item::FUNC_ITEM) {
// For function calls we collect separately the function call and
// the field arguments, skipping any constant args
Item_func *this_item_func = down_cast<Item_func *>(this_item);
List_iterator<Item> item_list_it(lifted_exprs->m_inner_func_calls);
Item *curr_item;
bool found = false;
while ((curr_item = item_list_it++)) {
if (curr_item->eq(this_item, true)) {
found = true;
break;
}
}
if (!found)
lifted_exprs->m_inner_func_calls.push_back(this_item_func);
} else {
if (this_item->walk(&Item::collect_item_field_or_ref_processor,
enum_walk::PREFIX | enum_walk::POSTFIX,
pointer_cast<uchar *>(&inner_info_fields)))
return true;
}
}
}
}
//**************************************************************
// Add inner fields and calls to the derived table's select list
//**************************************************************
if (add_inner_fields_to_select_list(thd, lifted_exprs, selected_field_or_ref,
first_non_hidden))
return true;
if (add_inner_func_calls_to_select_list(thd, lifted_exprs)) return true;
//****************************************************************
// Add inner fields and calls to the derived table's GROUP BY list
//****************************************************************
const bool subquery_was_grouped =
is_explicitly_grouped() || is_implicitly_grouped();
const bool subquery_was_explicitly_grouped = is_explicitly_grouped();
mem_root_deque<Item *> exprs_added_to_group_by(thd->mem_root);
// True if selected expr was added by us to the group by list (not originally
// present. Used to determine whther to wrap it in ANY_VALUE.
bool selected_expr_added_to_group_by = false;
List_iterator<Item> inner_fields(lifted_exprs->m_inner_fields);
if (add_inner_exprs_to_group_by(thd, inner_fields, selected_field_or_ref,
&selected_expr_added_to_group_by,
&exprs_added_to_group_by))
return true;
// Prepare for setting m_no_of_added_exprs
const auto sz = group_list.elements;
List_iterator<Item> inner_calls(lifted_exprs->m_inner_func_calls);
if (add_inner_exprs_to_group_by(thd, inner_calls, selected_func_call,
&selected_expr_added_to_group_by,
&exprs_added_to_group_by))
return true;
if (subquery_was_explicitly_grouped)
// See definition of m_no_of_added_exprs for rationale
m_no_of_added_exprs = group_list.elements - sz;
//****************************************************************
// Potentially protect a selected field item in ANY_VALUE
//****************************************************************
Item *const fnh = fields[first_non_hidden];
if (!subquery_was_grouped && !selected_expr_added_to_group_by &&
!fnh->const_item() &&
!is_function_of_type(fnh, Item_func::ANY_VALUE_FUNC)) {
Item *const old_expr = fnh;
Item *func_any = new (thd->mem_root) Item_func_any_value(old_expr);
if (func_any == nullptr) return true;
if (func_any->fix_fields(thd, &func_any)) return true;
fields[first_non_hidden] = func_any;
replace_referenced_item(old_expr, func_any);
}
//****************************************************************
// Add grouped COUNT(0) to the select list if subquery was not grouped, or
// one or more windowed COUNT(0) if subquery was explicitly grouped
//****************************************************************
if (!subquery_was_grouped) {
Item_int *number_0 = new (thd->mem_root) Item_int(int32{0}, 1);
if (number_0 == nullptr) return true;
Item *cnt = new (thd->mem_root) Item_sum_count(number_0);
if (cnt == nullptr) return true;
int item_no = fields.size() + 1;
baptize_item(thd, cnt, &item_no);
m_added_non_hidden_fields++;
// prelude to binding COUNT(*)
Query_block *save_query_block = thd->lex->current_query_block();
assert(save_query_block == outer_query_block());
thd->lex->set_current_query_block(this);
const auto save_allow_sum_func = thd->lex->allow_sum_func;
thd->lex->allow_sum_func |= (nesting_map)1 << nest_level;
if (cnt->fix_fields(thd, &cnt)) return true;
// postlude to binding COUNT(*)
thd->lex->set_current_query_block(save_query_block);
thd->lex->allow_sum_func = save_allow_sum_func;
// This is safe, because we have reserved space for select_n_where_fields,
// but at least one of them is an outer reference so this extra COUNT(*)
// can use the first such space.
base_ref_items[fields.size()] = cnt;
lifted_exprs->m_field_positions.push_back(fields.size() - hidden_fields);
fields.push_back(cnt);
cnt->increment_ref_count();
m_agg_func_used = true;
// Add a new column to the derived table's query expression
derived->derived_query_expression()->types.push_back(cnt);
*added_card_check = true;
} else if (subquery_was_explicitly_grouped) {
// For this case (not implicit grouping and correlated), we need to make
// sure the derived table has no more than one row of each partition on
// a) the grouped expression list and b) any added inner expression: to do
// this we add window function COUNT and check that it is less than or
// equal to one.
if (setup_counts_over_partitions(thd, derived, lifted_exprs,
exprs_added_to_group_by, hidden_fields))
return true;
*added_window_card_checks = 1 + exprs_added_to_group_by.size();
}
return false;
}
/**
Function calls in lifted join condition.
Replace occurences for inner function calls in lifted predicates with the
corresponding field in the derived table. This must be performed *before* we
replace inner field (below) to avoid replacing function arguments of the
derived table, eg. if we have ABS(t2.a) = \c outer_table.a in a subquery, we
will lift this predicate. But we have also added ABS(t2.a) to the GROUP BY
list in the derived table and its argument t2.a should not be replaced,
rather we replace the entire function call with \c derived_table.`abs(t2.a)`
in the lifted join condition.
*/
static bool replace_inner_function_calls_in_lifted_predicate(
THD *thd, Table_ref *derived, Lifted_expressions_map *lifted_exprs,
Query_block *qb) {
uint call_pos_idx = 0;
List_iterator<Item> li_funcs(lifted_exprs->m_inner_func_calls);
Item *func_item;
while ((func_item = li_funcs++)) {
Item_func *func = down_cast<Item_func *>(func_item);
Field *field_in_derived =
derived->table
->field[lifted_exprs->m_func_call_positions[call_pos_idx++]];
auto *replaces_field = new (thd->mem_root) Item_field(field_in_derived);
if (replaces_field == nullptr) return true;
Item::Item_func_call_replacement info(func, replaces_field, qb);
Item *new_item = derived->join_cond()->transform(
&Item::replace_func_call, pointer_cast<uchar *>(&info));
if (new_item == nullptr) return true;
if (new_item != derived->join_cond()) derived->set_join_cond(new_item);
}
return false;
}
/**
Fields in lifted join condition.
We added referenced inner fields to select list, now replace occurences of
such fields in the join condition with derived.<Item_field-n>. Since we have
now set up materialization for the derived table, we now know the Field to
use for a new \c Item_field.
*/
static bool replace_inner_fields_in_lifted_predicate(
THD *thd, Table_ref *derived, Lifted_expressions_map *lifted_exprs,
Query_block *qb, uint *field_pos_idx) {
Item *field_or_ref;
List_iterator<Item> li(lifted_exprs->m_inner_fields);
while ((field_or_ref = li++)) {
Item_field *f = down_cast<Item_field *>(field_or_ref->real_item());
Field *field_in_derived =
derived->table->field[lifted_exprs->m_field_positions[*field_pos_idx]];
*field_pos_idx += 1;
auto *replaces_field = new (thd->mem_root) Item_field(field_in_derived);
if (replaces_field == nullptr) return true;
assert(replaces_field->data_type() == f->data_type());
Item::Item_field_replacement info(f->field, replaces_field, qb);
Item *new_item = derived->join_cond()->transform(
&Item::replace_item_field, pointer_cast<uchar *>(&info));
if (new_item == nullptr) return true;
if (new_item != derived->join_cond()) derived->set_join_cond(new_item);
}
return false;
}
// Add derived.count(0) <= 1 assert condition to transformed query block's WHERE
// condition to preserve semantics of original query.
static bool build_reject_if(THD *thd, Table_ref *derived,
Lifted_expressions_map *lifted_exprs,
uint field_pos_idx) {
const uint cnt_pos_in_fields = lifted_exprs->m_field_positions[field_pos_idx];
Field *cnt_f = derived->table->field[cnt_pos_in_fields];
auto *cnt_i = new (thd->mem_root) Item_field(cnt_f);
if (cnt_i == nullptr) return true;
auto *number_1 = new (thd->mem_root) Item_int(1);
if (number_1 == nullptr) return true;
auto *gt = new (thd->mem_root) Item_func_gt(cnt_i, number_1);
if (gt == nullptr) return true;
auto *check_card = new (thd->mem_root) Item_func_reject_if(gt);
if (check_card == nullptr) return true;
Item *new_cond = and_items(derived->join_cond(), check_card);
if (new_cond == nullptr) return true;
new_cond->apply_is_true();
if (new_cond->fix_fields(thd, &new_cond)) return true;
derived->set_join_cond(new_cond);
return false;
}
/**
See explanation in companion method
\c decorrelate_derived_scalar_subquery_pre.
*/
bool Query_block::decorrelate_derived_scalar_subquery_post(
THD *thd, Table_ref *derived, Lifted_expressions_map *lifted_exprs,
bool added_card_check, size_t added_window_card_checks) {
//****************************************************************
// Replace fields and calls in the lifted predicates
//****************************************************************
if (replace_inner_function_calls_in_lifted_predicate(thd, derived,
lifted_exprs, this))
return true;
uint field_pos_idx = 0;
if (replace_inner_fields_in_lifted_predicate(thd, derived, lifted_exprs, this,
&field_pos_idx))
return true;
//****************************************************************
// Replace outer references in the lifted predicates with inner
// now that the predicates have been lifted out.
//****************************************************************
List_iterator<Item> li(lifted_exprs->m_outer_fields);
while (Item *field_or_ref = li++) {
Item_field *f = down_cast<Item_field *>(field_or_ref->real_item());
// This field used to be correlated, but is now lifted out to ON
// clause, so change its outer status
if (field_or_ref->type() == Item::REF_ITEM) {
down_cast<Item_ref *>(field_or_ref)->depended_from = nullptr;
// If this is an outer ref, we need to replace the ref with the
// underlying field as it is no more correlated. Else used_tables
// will not be correct.
if (down_cast<Item_ref *>(field_or_ref)->ref_type() ==
Item_ref::OUTER_REF) {
Item *new_item = derived->join_cond()->transform(
&Item::replace_outer_ref, pointer_cast<uchar *>(field_or_ref));
if (new_item != derived->join_cond()) derived->set_join_cond(new_item);
}
}
f->depended_from = nullptr;
}
//****************************************************************
// Add cardinality check (REJECT_IF) of derived table if necessary
//****************************************************************
if (added_card_check) {
if (build_reject_if(thd, derived, lifted_exprs, field_pos_idx)) return true;
cond_count++;
} else {
for (size_t wno = 0; wno < added_window_card_checks; wno++) {
if (build_reject_if(thd, derived, lifted_exprs, field_pos_idx++))
return true;
cond_count++;
}
}
derived->join_cond()->update_used_tables();
// Simplify join condition if only a single condition in our AND node since
// it looks slightly better in EXPLAIN output.
auto *and_cond = down_cast<Item_cond_and *>(derived->join_cond());
if (and_cond->argument_list()->elements == 1) {
List_iterator<Item> it(*and_cond->argument_list());
derived->set_join_cond(it++);
}
return false;
}
/**
Replace item in select list and preserve its reference count.
@param old_item Item to be replaced.
@param new_item Item to replace the old item.
If old item is present in base_ref_items, make sure it is replaced there.
Also make sure that reference count for old item is preserved in new item.
*/
void Query_block::replace_referenced_item(Item *const old_item,
Item *const new_item) {
for (size_t i = 0; i < fields.size(); i++) {
if (base_ref_items[i] == old_item) {
base_ref_items[i] = new_item;
break;
}
}
// Keep the same number of references as for the old expression:
new_item->increment_ref_count();
while (old_item->decrement_ref_count() > 0) {
new_item->increment_ref_count();
}
}
/**
Converts a subquery to a derived table and inserts it into the FROM
clause of the owning query block
@param thd Connection handle
@param[out] out_tl The created derived table will be stored in this.
@param subs_query_expression Unit for the subquery
@param subq Item for the subquery
@param use_inner_join Insert with INNER JOIN, or with LEFT JOIN
@param reject_multiple_rows
For scalar subqueries where we need run-time cardinality
check: true, else false
@param join_condition See join_cond in synthesize_derived()
@param lifted_where_cond
The subquery's where condition, moving to JOIN cond of
JOIN with the derived table
*/
bool Query_block::transform_subquery_to_derived(
THD *thd, Table_ref **out_tl, Query_expression *subs_query_expression,
Item_subselect *subq, bool use_inner_join, bool reject_multiple_rows,
Item *join_condition, Item *lifted_where_cond) {
Table_ref *tl;
{
// We did not do the transformation yet
remember_transform(thd, this);
// We want the Table_ref, Table_ident and m_join_cond to be permanent
Prepared_stmt_arena_holder ps_arena_holder(thd);
tl = synthesize_derived(thd, subs_query_expression, join_condition,
/*left_outer=*/true, use_inner_join);
if (tl == nullptr) return true;
if (lifted_where_cond != nullptr) {
tl->set_join_cond(lifted_where_cond);
cond_count += (lifted_where_cond->type() == Item::COND_ITEM)
? down_cast<Item_cond *>(lifted_where_cond)
->argument_list()
->elements
: 1;
}
// Append to end of leaf tables list
Table_ref *leaf;
for (leaf = leaf_tables; leaf->next_leaf != nullptr;
leaf = leaf->next_leaf) {
}
leaf->next_leaf = tl;
// Adjust table no and map
if (leaf_table_count >= MAX_TABLES) {
my_error(ER_TOO_MANY_TABLES, MYF(0), static_cast<int>(MAX_TABLES));
return true;
}
tl->set_tableno(leaf_table_count);
tl->embedding->nested_join->query_block_id =
subq->query_expr()->first_query_block()->select_number;
leaf_table_count += 1;
if (!(tl->derived_result = new (thd->mem_root) Query_result_union()))
return true; /* purecov: inspected */
subs_query_expression->m_reject_multiple_rows = reject_multiple_rows;
subs_query_expression->set_explain_marker(thd, CTX_DERIVED);
subs_query_expression->first_query_block()->linkage = DERIVED_TABLE_TYPE;
// Break connection to the subquery expression:
subs_query_expression->item = nullptr;
}
subs_query_expression->set_query_result(tl->derived_result);
subs_query_expression->first_query_block()->set_query_result(
tl->derived_result);
materialized_derived_table_count++;
derived_table_count++;
Lifted_expressions_map lifted_where_expressions(thd->mem_root);
bool added_cardinality_check = false;
size_t added_window_cardinality_checks = 0;
if (lifted_where_cond != nullptr) {
assert(!subs_query_expression->is_set_operation());
if (subs_query_expression->first_query_block()
->decorrelate_derived_scalar_subquery_pre(
thd, tl, lifted_where_cond, &lifted_where_expressions,
&added_cardinality_check, &added_window_cardinality_checks))
return true;
}
// We skip resolve_derived(), as the subquery has already been resolved
// before the conversion to derived table.
assert(tl->table == nullptr);
if (tl->setup_materialized_derived(thd)) return true; /* purecov: inspected */
if (lifted_where_cond != nullptr) {
assert(tl->join_cond() == lifted_where_cond);
if (decorrelate_derived_scalar_subquery_post(
thd, tl, &lifted_where_expressions, added_cardinality_check,
added_window_cardinality_checks))
return true;
}
*out_tl = tl;
return false;
}
/**
WL#15540 check that predicate operand item conforms to our requirements.
- Single inner item, e.g item is t2.a:
WHERE (SELECT ... FROM t2 WHERE t2.a = outer_column)
- Function call containing one or more inner items (no outer!), e.g.
item is ABS(t2.a) + t2.b:
WHERE (SELECT ... FROM t2 WHERE ABS(t2.a) + t2.b = outer_column)
Any constant arguments are ignored (supported also). The function call must
be (recursively) deterministic and cannot contain subqueries.
@returns pair<bool, bool> where
first bool: true if we found at least one inner field contained in the
item
second bool: true if non-conforming (subquery or non-deterministic)
*/
static std::pair<bool, bool> item_containing_non_correlated_field(Item *item) {
const Item::Type typ = item->real_item()->type();
if (typ == Item::FIELD_ITEM) return {true, false};
if (typ == Item::SUBQUERY_ITEM) return {false, true};
if (typ != Item::FUNC_ITEM) return {false, false};
Item_func *f = down_cast<Item_func *>(item->real_item());
if (f->is_non_deterministic()) return {false, true};
std::pair<bool, bool> result{false, false};
for (uint i = 0; i < f->arg_count; i++) {
std::pair<bool, bool> tmp =
item_containing_non_correlated_field(f->arguments()[i]);
result = {result.first || tmp.first, result.second || tmp.second};
}
return result;
}
/**
Called to check if the provided correlated predicate is eligible for
transformation. To be eligible, it must have one correlated operand
and one non-orrelated ("inner") operand. The correlated operand cannot contain
a non-correlated field, but must contain at least one correlated field.
It can be a deterministic function over (other)
deterministic functions and correlated field(s), recursively,
or a simple correlated field.
@param cor_pred correlated predicate that needs to be examined
@return true if predicate is eligible for transformation.
*/
bool is_correlated_predicate_eligible(Item *cor_pred) {
assert(cor_pred->is_outer_reference());
if (cor_pred->type() != Item::FUNC_ITEM ||
down_cast<Item_func *>(cor_pred)->functype() != Item_func::EQ_FUNC)
return false;
Item_func *eq_func = down_cast<Item_func *>(cor_pred);
bool non_correlated_operand = false;
for (uint i = 0; i < eq_func->argument_count(); i++) {
Item *item = eq_func->arguments()[i];
if (!item->is_outer_reference()) {
std::pair<bool, bool> result = item_containing_non_correlated_field(item);
if (result.second) return false; // illegal
non_correlated_operand = result.first;
} else if (item->used_tables() & ~PSEUDO_TABLE_BITS) {
// Inner table reference mixed with outer table reference is not
// allowed.
return false;
}
}
// We need to find one non-correlated operand in the correlated predicate
return non_correlated_operand;
}
/**
Extracts the top level correlated condition in an OR condition.
For ex:
(((t1.a = t2.b ) and (t1.c =10)) OR ((t1.a = t2.b) and (t1.d =10))) is the
same as (t1.a = t2.b) and ((t1.c = 10) or (t1.d = 10))
So we extract the (t1.a = t2.b) as the correlated condition and leave ((t1.c
= 10) or (t1.d = 10)) in the original condition that is passed as the
argument.
The caller of the function has to send an OR condition. Only the top level
correlated condition is extracted. Caller could repeatedly call this
function to extract the inner level correlated conditions as well.
@param thd session context
@param[in,out] cond Original condition that is looked into, to extract the
correlated condition.
@param[out] correlated_cond correlated condition that is extracted
@return false when a correlated condition is successfully extracted.
true when no correlated condition could be extracted.
*/
static bool extract_correlated_condition(THD *thd, Item **cond,
Item **correlated_cond) {
Item_cond *or_condition = down_cast<Item_cond *>(*cond);
Item *cor_pred = nullptr;
bool found = false;
for (Item &item : *or_condition->argument_list()) {
Mem_root_array<Item *> cond_parts(thd->mem_root);
ExtractConditions(&item, &cond_parts); // all elements AND'ed
found = false;
for (Item *pred : cond_parts) {
// Check if we have a correlated condition that is present in all the
// arguments to this OR condition. Only then we can extract it.
if (pred->is_outer_reference()) {
// If the correlated condition itself is disjuntive, we reject.
if (pred->type() == Item::COND_ITEM) return true;
// If this is the first argument to the OR condition, we need to be
// finding this correlated condition in all other arguments of the OR
// condition
if (cor_pred == nullptr) cor_pred = pred;
// If it is not the first argument to the OR condition, we already
// have a predicate with us that we need to look for in this argument.
// So, continue to search until we find it.
else if (!cor_pred->eq(pred, false))
continue;
found = true;
if (!is_correlated_predicate_eligible(cor_pred)) return true;
break;
}
}
if (!found) return true;
}
// We now have a correlated condition that could be extracted. So we remove
// the condition from each of the arguments of the OR condition and return
// the correlated condition to the caller.
List_iterator<Item> li(*(or_condition->argument_list()));
Item *item;
while ((item = li++)) {
Mem_root_array<Item *> cond_parts(thd->mem_root);
ExtractConditions(item, &cond_parts); // all elements AND'ed
Mem_root_array<Item *> final_args(thd->mem_root);
for (Item *pred : cond_parts) {
if (!cor_pred->eq(pred, false)) final_args.push_back(pred);
}
if (final_args.size() == 0)
li.remove();
else {
auto *tmp_cond = down_cast<Item_cond *>(*li.ref());
tmp_cond->argument_list()->clear();
for (Item *pred : final_args) tmp_cond->argument_list()->push_back(pred);
li.replace(tmp_cond);
}
}
or_condition->update_used_tables();
*correlated_cond = cor_pred;
return false;
}
/**
Called when the scalar subquery is correlated. If the type of correlation is
not supported, return false and leave \c *lifted_where unassigned. If it is
supported, \c *lifted_where contains a set of correlated predicates.
Currently, we can only de-correlate the WHERE clause: if the clause is not a
top level AND, we lift out the entire predicate to the JOIN clause. If it is
a top level AND, we lift out only those AND operand predicates which are
correlated, leaving un-correlated operand predicates in the subquery's WHERE
clause, as lifting all out would be too ineffective, potentially creating
large cartesian products in the subquery.
@param thd session context
@param subquery the subquery under consideration
@param[out] lifted_where set of predicates lifted out of WHERE
@returns true for error else false
*/
bool Query_block::supported_correlated_scalar_subquery(THD *thd,
Item::Css_info *subquery,
Item **lifted_where) {
// Disallow if subquery is in a JOIN clause
if (subquery->m_location &
Item_aggregate_type::Collect_scalar_subquery_info::L_JOIN_COND)
return false;
// Check that we do no have correlation inside a derived table in the
// FROM list
for (Table_ref *tr = leaf_tables; tr != nullptr; tr = tr->next_leaf)
if (tr->is_derived() && tr->derived_query_expression()->uncacheable)
return false;
// Disallow LIMIT, OFFSET
if (has_limit()) return false;
// Disallow window functions: transform not valid in their presence.
if (has_windows()) return false;
// Disallow ROLLUP
if (olap == ROLLUP_TYPE) return false;
const size_t first_selected = CountHiddenFields(fields);
if (is_implicitly_grouped()) {
Item_sum::Collect_grouped_aggregate_info aggregates(this);
if (fields[first_selected]->walk(&Item::collect_grouped_aggregates,
enum_walk::PREFIX,
pointer_cast<uchar *>(&aggregates))) {
return true;
}
bool saw_count{false};
Item_sum *cnt_item{nullptr};
for (auto a : aggregates.list) {
if (a->sum_func() == Item_sum::COUNT_FUNC ||
a->sum_func() == Item_sum::COUNT_DISTINCT_FUNC) {
saw_count = true;
cnt_item = a;
}
}
if (saw_count) {
// The COUNT() must be the selected item, no expression involved
if (fields[first_selected] != cnt_item) return false;
// If we have an occurrence of COUNT() in the selected expression and
// implicit grouping , we know that the transform can yield NULL rather
// than 0. In such a case, we need to add a COALESCE around the replaced
// subquery expression, i.e. COALESCE(derived.`COUNT()`, 0). This is
// because in a LEFT JOIN inner position, a COUNT(0) can yield NULL
// which it could not in the original subquery position.
subquery->m_add_coalesce = true;
}
}
Item *select_item = single_visible_field();
assert(select_item != nullptr);
// Disallow subquery in selected expression
if (select_item->has_subquery()) return false;
// Disallow non deterministic functions
if (select_item->type() == Item::FUNC_ITEM &&
down_cast<Item_func *>(select_item)->is_non_deterministic())
return false;
// Check whether the select expression may change nulls to non-nulls:
// it may change semantics of the transformed query if so, hence we deny this
for (auto *sel_expr : visible_fields()) {
if (WalkItem(sel_expr, enum_walk::PREFIX, [](Item *inner_item) {
return inner_item->type() == Item::FUNC_ITEM &&
!down_cast<Item_func *>(inner_item)->is_null_on_null();
}))
return false;
}
// Only allow outer reference in the WHERE clause, check now
// 1. select list
if (select_item->is_outer_reference()) return false;
// 2. group by clause
if (is_grouped()) {
for (ORDER *group = group_list.first; group != nullptr;
group = group->next) {
if ((*group->item)->is_outer_reference()) return false;
}
}
// 3. HAVING clause
if (having_cond() != nullptr && having_cond()->is_outer_reference())
return false;
// 4. ORDER BY clause
if (is_ordered()) {
for (ORDER *o = order_list.first; o != nullptr; o = o->next) {
if ((*o->item)->is_outer_reference()) return false;
}
}
if (m_where_cond == nullptr) {
// We expect to find outer references (field of a FROM table of a query
// block directly containing this subquery) in the WHERE, since all other
// possibilities are exhausted. But we didn't find any correlated field.
// It may have disappeared due to ORDER BY elimination in the subquery.
// The subquery will still be marked as using having correlated fields.
// How to handle this?
// TODO. Example:
// SELECT t1.a, SUM(t1.b)
// FROM t1
// WHERE t1.a = (SELECT SUM(t2.b)
// FROM t2 ORDER BY SUM(t2.b) + SUM(t1.b) LIMIT 1)
// GROUP BY t
return false;
}
// Check that the WHERE clause doesn't contain an aggregate function which
// aggregates outside this query block. We only want outer reference to
// a field.
Item_sum::Collect_grouped_aggregate_info aggregates(this);
if (m_where_cond->walk(&Item::collect_grouped_aggregates, enum_walk::PREFIX,
pointer_cast<uchar *>(&aggregates)))
return true;
if (aggregates.m_outside)
// some aggregate functions aggregate in an outer query, not supported
return false;
// Check that the WHERE clause doesn't contain any nested scalar subqueries
// that are still there (correlated of a kind we couldn't handle: any nested
// subqueries that did support transformation will already have been
// transformed).
Item::Collect_scalar_subquery_info subqueries;
subqueries.m_collect_unconditionally = true;
if (m_where_cond->walk(&Item::collect_scalar_subqueries, enum_walk::PREFIX,
pointer_cast<uchar *>(&subqueries)))
return true;
if (subqueries.m_list.size() > 0) return false;
// Get all fields/refs referenced in the WHERE clause, and count the number
// of correlated ones.
List<Item> fields_or_refs;
Item::Collect_item_fields_or_refs info{&fields_or_refs};
if (m_where_cond->walk(&Item::collect_item_field_or_ref_processor,
enum_walk::PREFIX | enum_walk::POSTFIX,
pointer_cast<uchar *>(&info)))
return true;
int cnt = 0;
List_iterator<Item> li(fields_or_refs);
while (Item *i = li++) {
cnt = cnt + (i->is_outer_reference() ? 1 : 0);
}
if (cnt == 0) {
// We didn't find any correlated field. It may have disappeared due to
// ORDER BY elimination in the subquery. The subquery would still be
// marked as having correlated fields. Related case to missing WHERE
// above.
//
// TODO: We can improve these two cases by returning, presuming no
// correlation, but we would like to improve the status of the subquery's
// used_tables instead.
//
// Example: (correlated field inside ORDER BY optimized away)
// SELECT t1.a, SUM(t1.b)
// FROM t1
// WHERE t1.a = (SELECT SUM(t2.b)
// FROM t2
// WHERE t2.a > 4 ORDER BY t1.b)
// GROUP BY t1.a ORDER BY t1.a LIMIT 30;
return false;
}
// Extract the predicates that must be moved out to JOIN, i.e. those AND
// constituents which contain an outer reference, and those which shall
// remain.
Mem_root_array<Item *> staying(thd->mem_root);
List<Item> going;
Mem_root_array<Item *> condition_parts(thd->mem_root);
bool orig_where_modified = false;
ExtractConditions(m_where_cond, &condition_parts); // all elements AND'ed
for (Item *cond_part : condition_parts) {
// If the condition part extracted is an OR condition having correlated
// fields, we extract top level correlated condition if possible. If not,
// transformation cannot happen.
if (cond_part->is_outer_reference()) {
Item *cor_pred = nullptr;
if (cond_part->type() == Item::COND_ITEM) {
assert(down_cast<Item_cond *>(cond_part)->functype() ==
Item_func::COND_OR_FUNC);
if (extract_correlated_condition(thd, &cond_part, &cor_pred))
return false;
// Make a note if this extracted predicate is the same as the original
// where condition.
if (cond_part == m_where_cond) orig_where_modified = true;
} else {
cor_pred = cond_part;
cond_part = nullptr;
}
if (!is_correlated_predicate_eligible(cor_pred)) return false;
going.push_back(cor_pred);
}
if (cond_part) staying.push_back(cond_part);
}
// No correlated predicates. Note that we did find some fields earlier which
// were marked as being an "outer reference". However, it might be that the
// expression containing this outer reference is not marked as such due to
// some optimizations. Reject such queries for transformation (Since we
// anyways reject queries with non-correlated operands having expressions in
// is_correlated_predicate_eligible())
if (going.elements == 0) return false;
// Construct a new, reduced, WHERE clause sans the lifted predicates, which
// will stay in the subquery
if (staying.size() == 0) {
m_where_cond = nullptr;
} else {
// If the original where condition was a disjunctive correlated predicate,
// it would have been modified when extracting the correlated condition.
// So, just update the used tables.
if (orig_where_modified)
m_where_cond->update_used_tables();
else {
auto *new_where = down_cast<Item_cond *>(m_where_cond);
new_where->argument_list()->clear();
for (Item *pred : staying) new_where->argument_list()->push_back(pred);
m_where_cond = new_where;
new_where->update_used_tables();
}
assert(!m_where_cond->is_outer_reference());
}
// Construct the lifted part of the WHERE condition, which will go to the
// JOIN condition
auto *cond = new (thd->mem_root) Item_cond_and(going);
if (cond == nullptr) return true;
cond->update_used_tables();
*lifted_where = cond;
// there is no outer reference in this query expression/block anymore
uncacheable &= ~UNCACHEABLE_DEPENDENT;
master_query_expression()->uncacheable &= ~UNCACHEABLE_DEPENDENT;
return false;
}
bool Query_block::transform_scalar_subqueries_to_join_with_derived(THD *thd) {
if (thd->lex->m_subquery_to_derived_is_impossible) return false;
// Need at least one FROM table. Also, we do not want to perform this
// transformation if we have an assignment of a user variable in the query.
if (leaf_table_count == 0 || thd->lex->set_var_list.elements > 0)
return false;
/*
Collect list of eligible scalar subqueries used in JOIN conds, WHERE
conds, SELECT list expressions and HAVING cond. NOTE: Join conditions need
to be collected/transformed first since they have the be nested after the
outer join table (i.e. before the inner). So, if we have scalar subqueries
in other locations that the JOIN conditions, those need to be added after
the JOIN conditions have been put in place.
*/
Item::Collect_scalar_subquery_info subqueries;
// Collect from join conditions
if (walk_join_conditions(
m_table_nest,
[&](Item **expr_p) mutable -> bool {
subqueries.m_location =
Item::Collect_scalar_subquery_info::L_JOIN_COND;
if ((*expr_p)->has_subquery() &&
(*expr_p)->walk(&Item::collect_scalar_subqueries,
enum_walk::PREFIX | enum_walk::POSTFIX,
pointer_cast<uchar *>(&subqueries)))
return true; /* purecov: inspected */
return false;
},
&subqueries))
return true; /* purecov: inspected */
subqueries.m_location = Item::Collect_scalar_subquery_info::L_WHERE;
Item **where_expr_p = &m_where_cond;
if (*where_expr_p != nullptr && (*where_expr_p)->has_subquery()) {
if ((*where_expr_p)
->walk(&Item::collect_scalar_subqueries,
enum_walk::PREFIX | enum_walk::POSTFIX,
pointer_cast<uchar *>(&subqueries)))
return true; /* purecov: inspected */
}
subqueries.m_location =
Item_singlerow_subselect::Collect_scalar_subquery_info::L_SELECT;
for (Item *select_expr : visible_fields()) {
if (select_expr->has_subquery() &&
select_expr->walk(&Item::collect_scalar_subqueries,
enum_walk::PREFIX | enum_walk::POSTFIX,
pointer_cast<uchar *>(&subqueries)))
return true; /* purecov: inspected */
}
subqueries.m_location = Item::Collect_scalar_subquery_info::L_HAVING;
Item **having_expr_p = &m_having_cond;
if (*having_expr_p != nullptr && (*having_expr_p)->has_subquery()) {
if ((*having_expr_p)
->walk(&Item::collect_scalar_subqueries,
enum_walk::PREFIX | enum_walk::POSTFIX,
pointer_cast<uchar *>(&subqueries)))
return true; /* purecov: inspected */
}
/*
Loop through eligible subqueries and see if we need the extra transform of
implicit grouping into a separate derived table before we can
transform the scalar subqueries to more derived tables. But we
cannot do this if we have a HAVING expression which references or contains
a subquery.
In that case, we throw in the towel and don't do any transformations. E.g.
1. SELECT SUM(a), (SELECT SUM(b) FROM t3) scalar
FROM t1
HAVING SUM(a) > scalar;
2. SELECT MAX(a)
FROM t1
WHERE FALSE
HAVING (SELECT MIN(a) FROM t1) > 0;
TODO: we could solve this by not moving the HAVING condition into the
derived table, but instead letting it remain in the transformed block as a
WHERE predicate, e.g. in the case of example 1:
SELECT derived0.summ, derived1.scalar
FROM (SELECT SUM(a) AS summ FROM t1) AS derived0
LEFT JOIN
(SELECT SUM(b) AS scalar FROM t3) AS derived1
ON TRUE
WHERE derived0.sum > derived1.scalar;
but this is not yet done.
*/
if (is_implicitly_grouped()) {
bool need_new_outer = false;
for (auto subquery : subqueries.m_list) {
auto *subq = subquery.item;
if (!query_block_contains_subquery(this, subq->query_expr())) continue;
// Possibly contradicting requirements
// (1) Subquery is in SELECT list: new_outer
// (2) No new outer possible if HAVING contains subquery
if (subquery.m_location & Item::Collect_scalar_subquery_info::L_SELECT) {
need_new_outer = true;
}
if (subquery.m_location & Item::Collect_scalar_subquery_info::L_HAVING)
return false;
}
if (need_new_outer) {
/*
In this case, the default transform with a single new derived table
and a LEFT OUTER JOIN isn't always correct - we need to first move the
aggregated query to a new derived subquery before we can transform the
scalar subqueries to other derived tables.
*/
bool break_off = false;
if (transform_grouped_to_derived(thd, &break_off)) return true;
if (break_off) return false; // skip transformation
}
}
/*
Loop through eligible subqueries and transform them to derived tables
and replace occurrences in expression trees with a field of the relevant
derived table.
*/
for (auto subquery : subqueries.m_list) {
Item_singlerow_subselect *const subq = subquery.item;
Query_expression *const subs_query_expression = subq->query_expr();
/*
[1] A reference to a scalar subquery from another query expression can
happen. We can't transform it here, but it may be replaced from
another query block.
[2] A constant scalar subquery will be evaluated at prepare time
*/
if (!query_block_contains_subquery(this, subs_query_expression) || // [1]
(subq->const_item() && subs_query_expression->is_optimized())) // [2]
continue;
Table_ref *tl;
// Do we need a run-time cardinality check?
bool needs_cardinality_check = !subquery.m_implicitly_grouped_and_no_union;
Item *lifted_where = nullptr;
if (subquery.m_correlation_map != 0) {
// We have a correlated subquery. Check if we can handle it or not (only
// applicable for subqueries without set operations)
if (!subs_query_expression->is_set_operation()) {
if (subs_query_expression->first_query_block()
->supported_correlated_scalar_subquery(thd, &subquery,
&lifted_where))
return true;
if (lifted_where == nullptr) continue;
} else
continue;
// Since we have a correlated subquery, we will use GROUP BY to
// materialize so, we do not expect a single row result set. For
// correlated scalar subquery, we use another run-time check.
needs_cardinality_check = false;
}
// Create a derived table for the subquery and nest it. If we found the
// subquery outside of a join condition, we simply nest it at the end
// with a LEFT OUTER .. ON TRUE, e.g.
//
// SELECT (SELECT COUNT(a) FROM t2) + a FROM t1;
// ->
// SELECT derived.cnt + t1.a FROM
// t1 LEFT OUTER JOIN
// (select COUNT(a) AS cnt FROM t2) AS derived
// ON TRUE;
//
// If we have a subquery inside a join condition we nest it after the
// outer table:
//
// SELECT * FROM t1 LEFT JOIN
// t2
// ON (SELECT COUNT(a) AS cnt FROM t2) = t1.a;
// ->
// SELECT * FROM t1 LEFT JOIN
// (SELECT COUNT(t2.a) AS cnt
// FROM t2) derived_1_0
// ON(TRUE) LEFT JOIN
// t2
// ON derived_1_0.cnt = t1.a
//
if (transform_subquery_to_derived(thd, &tl, subs_query_expression, subq,
/*use_inner_join=*/false,
needs_cardinality_check,
subquery.m_join_condition, lifted_where))
return true;
/*
Replace the subquery with a field in the materialized tmp table
in WHERE, JOIN conditions, HAVING clause or SELECT expressions (could be
optimized by keeping track in which expression the subquery was found)
*/
// Replace in WHERE clause?
if (subquery.m_location & Item::Collect_scalar_subquery_info::L_WHERE) {
if (*where_expr_p != nullptr &&
replace_subquery_in_expr(thd, &subquery, tl, where_expr_p))
return true; /* purecov: inspected */
}
// Replace in join conditions?
if (subquery.m_location & Item::Collect_scalar_subquery_info::L_JOIN_COND) {
if (walk_join_conditions(
m_table_nest,
[&](Item **expr_p) mutable -> bool {
subqueries.m_location =
Item::Collect_scalar_subquery_info::L_JOIN_COND;
if (*expr_p != nullptr &&
replace_subquery_in_expr(thd, &subquery, tl, expr_p))
return true; /* purecov: inspected */
return false;
},
&subqueries))
return true; /* purecov: inspected */
}
size_t old_size;
do {
old_size = fields.size();
for (Item *&select_expr : fields) {
// At this time, expression could be wrapped in a rollup group
// wrapper. It is the inner item of the rollup group item that
// gets replaced. We take care to retain the rollup wrappers.
Item *prev_value = unwrap_rollup_group(select_expr);
if (replace_subquery_in_expr(thd, &subquery, tl, &select_expr))
return true;
Item *unwrapped_select_expr = unwrap_rollup_group(select_expr);
if (unwrapped_select_expr != prev_value) {
replace_referenced_item(prev_value, unwrapped_select_expr);
}
if (fields.size() != old_size) {
// The (implicit) iterator over fields has been invalidated,
// probably due to a call to split_sum_func(), so we cannot
// iterate any further. The simplest fix is just restarting
// the loop, as it is idempotent.
break;
}
}
} while (old_size != fields.size());
// Replace in HAVING clause?
if (subquery.m_location & (Item::Collect_scalar_subquery_info::L_HAVING)) {
if (*having_expr_p != nullptr &&
replace_subquery_in_expr(thd, &subquery, tl, having_expr_p))
return true; /* purecov: inspected */
}
// A subquery in the SELECT list can be present in the GROUP BY clause
// so we potentially need to replace there too.
for (ORDER *ord = group_list.first; ord != nullptr; ord = ord->next) {
if (replace_subquery_in_expr(thd, &subquery, tl, ord->item)) return true;
}
OPT_TRACE_TRANSFORM(
&thd->opt_trace, trace_wrapper, trace_object,
tl->derived_query_expression()->first_query_block()->select_number,
"scalar subquery", "derived table");
opt_trace_print_expanded_query(thd, this, &trace_object);
}
return false;
}
bool Query_block::lift_fulltext_from_having_to_select_list(THD *thd) {
Item *having_cond = m_having_cond;
if (having_cond == nullptr) return false;
Prealloced_array<Item **, 8> refs_to_fulltext(PSI_NOT_INSTRUMENTED);
// Add all full-text search calls as hidden elements of the SELECT list, if
// they are not already there.
if (WalkItem(having_cond, enum_walk::PREFIX | enum_walk::POSTFIX,
NonAggregatedFullTextSearchVisitor(
[this, thd, &refs_to_fulltext](Item_func_match *item) {
const auto it = find(fields.begin(), fields.end(), item);
Item **ref =
it != fields.end() ? &*it : add_hidden_item(item);
// The above is sufficient for the hypergraph optimizer.
// The old optimizer additionally needs to have references
// from the HAVING clause to the corresponding elements in
// the SELECT list, so that it knows that it should read
// results from a temporary table instead of evaluating the
// expressions if they have been materialized. So we wrap
// these items in an Item_ref later.
if (!thd->lex->using_hypergraph_optimizer()) {
return refs_to_fulltext.push_back(ref);
}
return false;
}))) {
return true;
}
// Add Item_ref indirection in the old optimizer.
for (Item **item_to_replace : refs_to_fulltext) {
assert(!thd->lex->using_hypergraph_optimizer());
having_cond = TransformItem(having_cond, [&](Item *sub_item) -> Item * {
if (sub_item == *item_to_replace) {
return new (thd->mem_root)
Item_ref(&context, item_to_replace, "<fulltext>");
} else {
return sub_item;
}
});
if (having_cond == nullptr) return true;
}
// The MATCH calls are always wrapped in other functions, since non-boolean
// predicates in HAVING are made complete. The topmost Item should therefore
// never be changed in the above calls to TransformItem().
assert(having_cond == m_having_cond);
return false;
}
/**
@} (end of group Query_Resolver)
*/