mysql-server/sql/sql_select.cc

/* Copyright (c) 2000, 2024, Oracle and/or its affiliates.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License, version 2.0,
   as published by the Free Software Foundation.

   This program is designed to work with certain software (including
   but not limited to OpenSSL) that is licensed under separate terms,
   as designated in a particular file or component or in included license
   documentation.  The authors of MySQL hereby grant you an additional
   permission to link the program and your derivative works with the
   separately licensed software that they have either included with
   the program or referenced in the documentation.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License, version 2.0, for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */

/**
  @file sql/sql_select.cc

  @brief Evaluate query expressions, throughout resolving, optimization and
         execution.

  @defgroup Query_Optimizer  Query Optimizer
  @{
*/

#include "sql/sql_select.h"

#include <stdio.h>
#include <string.h>

#include <algorithm>
#include <atomic>
#include <cstdio>
#include <cstring>
#include <initializer_list>
#include <memory>
#include <string>
#include <string_view>
#include "field_types.h"
#include "lex_string.h"
#include "m_string.h"        // native_strncasecmp(), native_strcasecmp()
#include "mem_root_deque.h"  // mem_root_deque
#include "my_alloc.h"
#include "my_bitmap.h"
#include "my_byteorder.h"  // int8store, uint8korr
#include "my_compiler.h"
#include "my_dbug.h"
#include "my_pointer_arithmetic.h"
#include "my_sqlcommand.h"
#include "my_sys.h"
#include "mysql/plugin.h"
#include "mysql/strings/m_ctype.h"
#include "mysql/udf_registration_types.h"
#include "mysql_com.h"
#include "mysqld_error.h"
#include "scope_guard.h"
#include "sql-common/json_dom.h"
#include "sql-common/my_decimal.h"
#include "sql/auth/auth_acls.h"
#include "sql/auth/auth_common.h"  // *_ACL
#include "sql/auth/sql_security_ctx.h"
#include "sql/current_thd.h"
#include "sql/debug_sync.h"  // DEBUG_SYNC
#include "sql/enum_query_type.h"
#include "sql/error_handler.h"  // Ignore_error_handler
#include "sql/field.h"
#include "sql/filesort.h"  // filesort_free_buffers
#include "sql/handler.h"
#include "sql/intrusive_list_iterator.h"
#include "sql/item.h"
#include "sql/item_func.h"
#include "sql/item_json_func.h"
#include "sql/item_subselect.h"
#include "sql/item_sum.h"  // Item_sum
#include "sql/iterators/row_iterator.h"
#include "sql/iterators/sorting_iterator.h"
#include "sql/join_optimizer/access_path.h"
#include "sql/join_optimizer/bit_utils.h"
#include "sql/join_optimizer/join_optimizer.h"
#include "sql/join_optimizer/replace_item.h"
#include "sql/key.h"  // key_copy, key_cmp, key_cmp_if_same
#include "sql/key_spec.h"
#include "sql/lock.h"    // mysql_unlock_some_tables,
#include "sql/mysqld.h"  // stage_init
#include "sql/nested_join.h"
#include "sql/opt_explain.h"
#include "sql/opt_explain_format.h"
#include "sql/opt_hints.h"  // hint_key_state()
#include "sql/opt_trace.h"
#include "sql/opt_trace_context.h"
#include "sql/parse_tree_node_base.h"
#include "sql/query_options.h"
#include "sql/query_result.h"
#include "sql/range_optimizer/path_helpers.h"
#include "sql/range_optimizer/range_optimizer.h"
#include "sql/set_var.h"
#include "sql/sql_base.h"
#include "sql/sql_class.h"
#include "sql/sql_cmd.h"
#include "sql/sql_do.h"
#include "sql/sql_error.h"
#include "sql/sql_executor.h"
#include "sql/sql_insert.h"       // Sql_cmd_insert_base
#include "sql/sql_join_buffer.h"  // JOIN_CACHE
#include "sql/sql_lex.h"
#include "sql/sql_list.h"
#include "sql/sql_optimizer.h"  // JOIN
#include "sql/sql_parse.h"      // bind_fields
#include "sql/sql_planner.h"    // calculate_condition_filter
#include "sql/sql_plugin.h"
#include "sql/sql_resolver.h"
#include "sql/sql_test.h"       // misc. debug printing utilities
#include "sql/sql_timer.h"      // thd_timer_set
#include "sql/sql_tmp_table.h"  // tmp tables
#include "sql/system_variables.h"
#include "sql/table.h"
#include "sql/temp_table_param.h"
#include "sql/thd_raii.h"
#include "sql/window.h"  // ignore_gaf_const_opt
#include "sql_string.h"
#include "template_utils.h"
#include "thr_lock.h"

using std::max;
using std::min;
using namespace std::literals;

static store_key *get_store_key(THD *thd, Item *val, table_map used_tables,
                                table_map const_tables,
                                const KEY_PART_INFO *key_part, uchar *key_buff,
                                uint maybe_null);
static bool retry_with_secondary_engine(THD *thd);

using Global_tables_iterator =
    IntrusiveListIterator<Table_ref, &Table_ref::next_global>;

/// A list interface over the Table_ref::next_global pointer.
using Global_tables_list = IteratorContainer<Global_tables_iterator>;

/**
   Check whether the statement is a SHOW command using INFORMATION_SCHEMA system
   views.

   @param  thd   Thread (session) context.

   @returns true if command uses INFORMATION_SCHEMA system view, false otherwise
*/
inline bool is_show_cmd_using_system_view(THD *thd) {
  return sql_command_flags[thd->lex->sql_command] & CF_SHOW_USES_SYSTEM_VIEW;
}

/**
  Get the maximum execution time for a statement.

  @return Length of time in milliseconds.

  @remark A zero timeout means that no timeout should be
          applied to this particular statement.

*/
static inline ulong get_max_execution_time(THD *thd) {
  return (thd->lex->max_execution_time ? thd->lex->max_execution_time
                                       : thd->variables.max_execution_time);
}

/**
  Check whether max statement time is applicable to statement or not.


  @param  thd   Thread (session) context.

  @return true  if max statement time is applicable to statement
  @return false otherwise.
*/
static inline bool is_timer_applicable_to_statement(THD *thd) {
  /*
    The following conditions must be met:
      - is SELECT statement.
      - timer support is implemented and it is initialized.
      - statement is not made by the slave threads.
      - timer is not set for statement
      - timer out value of is set
      - SELECT statement is not from any stored programs.
  */
  return (thd->lex->sql_command == SQLCOM_SELECT &&
          (have_statement_timeout == SHOW_OPTION_YES) && !thd->slave_thread &&
          !thd->timer &&
          (thd->lex->max_execution_time || thd->variables.max_execution_time) &&
          !thd->sp_runtime_ctx);
}

/**
  Set the time until the currently running statement is aborted.

  @param  thd   Thread (session) context.

  @return true if the timer was armed.
*/

bool set_statement_timer(THD *thd) {
  const ulong max_execution_time = get_max_execution_time(thd);

  /**
    whether timer can be set for the statement or not should be checked before
    calling set_statement_timer function.
  */
  assert(is_timer_applicable_to_statement(thd) == true);
  assert(thd->timer == nullptr);

  thd->timer = thd_timer_set(thd, thd->timer_cache, max_execution_time);
  thd->timer_cache = nullptr;

  if (thd->timer) {
    thd->status_var.max_execution_time_set++;
    global_aggregated_stats.get_shard(thd->thread_id())
        .max_execution_time_set++;
  } else {
    thd->status_var.max_execution_time_set_failed++;
    global_aggregated_stats.get_shard(thd->thread_id())
        .max_execution_time_set_failed++;
  }
  return thd->timer;
}

/**
  Deactivate the timer associated with the statement that was executed.

  @param  thd   Thread (session) context.
*/

void reset_statement_timer(THD *thd) {
  assert(thd->timer);
  /* Cache the timer object if it can be reused. */
  thd->timer_cache = thd_timer_reset(thd->timer);
  thd->timer = nullptr;
}

/**
 * Checks if a query reads a column that is _not_ available in the secondary
 * engine (i.e. a column defined with NOT SECONDARY).
 *
 * @param lex Parse tree descriptor.
 *
 * @return True if at least one of the read columns is not in the secondary
 * engine, false otherwise.
 */
bool reads_not_secondary_columns(const LEX *lex) {
  // Check all read base tables.
  const Table_ref *tl = lex->query_tables;
  // For INSERT INTO SELECT statements, the table to insert into does not have
  // to have a secondary engine. This table is always first in the list.
  if (lex->sql_command == SQLCOM_INSERT_SELECT && tl != nullptr)
    tl = tl->next_global;
  for (; tl != nullptr; tl = tl->next_global) {
    if (tl->is_placeholder()) continue;

    // Check all read columns of table.
    for (unsigned int i = bitmap_get_first_set(tl->table->read_set);
         i != MY_BIT_NONE; i = bitmap_get_next_set(tl->table->read_set, i)) {
      if (tl->table->field[i]->is_flag_set(NOT_SECONDARY_FLAG)) {
        Opt_trace_context *trace = &lex->thd->opt_trace;
        if (trace->is_started()) {
          std::string message("");
          message.append("Column ");
          message.append(tl->table->field[i]->field_name);
          message.append(" is marked as NOT SECONDARY.");
          const Opt_trace_object trace_wrapper(trace);
          Opt_trace_object oto(trace, "secondary_engine_not_used");
          oto.add_alnum("reason", message.c_str());
        }
        return true;
      }
    }
  }

  return false;
}

static bool has_secondary_engine_defined(const LEX *lex) {
  for (const Table_ref *tl = lex->query_tables; tl != nullptr;
       tl = tl->next_global) {
    if (tl == nullptr || tl->table == nullptr || tl->table->s == nullptr ||
        !tl->table->s->has_secondary_engine()) {
      return false;
    }
  }
  return (lex->table_count > 0);
}

// Compare two engine names using the system collation.
static bool equal_engines(const LEX_CSTRING &engine1,
                          const LEX_CSTRING &engine2) {
  return system_charset_info->coll->strnncollsp(
             system_charset_info,
             pointer_cast<const unsigned char *>(engine1.str), engine1.length,
             pointer_cast<const unsigned char *>(engine2.str),
             engine2.length) == 0;
}

// Helper function that checks if the command is eligible for secondary engine
// and if that's true returns the name of that eligible secondary storage
// engine.
const MYSQL_LEX_CSTRING *get_eligible_secondary_engine_from(const LEX *lex) {
  // Don't use secondary storage engines for statements that call stored
  // routines.
  if (lex->uses_stored_routines()) return nullptr;
  // Now check if the opened tables are available in a secondary
  // storage engine. Only use the secondary tables if all the tables
  // have a secondary tables, and they are all in the same secondary
  // storage engine.
  const LEX_CSTRING *secondary_engine = nullptr;
  const Table_ref *tl = lex->query_tables;

  if (lex->sql_command == SQLCOM_INSERT_SELECT && tl != nullptr) {
    // If table from Table_ref is either view or derived table then
    // do not perform INSERT AS SELECT.
    if (tl->is_view_or_derived()) return nullptr;
    // For INSERT INTO SELECT statements, the table to insert into does not have
    // to have a secondary engine. This table is always first in the list.
    tl = tl->next_global;
  }
  for (; tl != nullptr; tl = tl->next_global) {
    // Schema tables are not available in secondary engines.
    if (tl->schema_table != nullptr) return nullptr;

    // We're only interested in base tables.
    if (tl->is_placeholder()) continue;

    // check if required pointers are valid before proceeding further
    if (tl->table == nullptr || tl->table->s == nullptr) continue;

    assert(!tl->table->s->is_secondary_engine());
    // Give up, if the table is not in a secondary engine,
    if (!tl->table->s->has_secondary_engine()) return nullptr;

    if (secondary_engine == nullptr) {
      // First base table. Save its secondary engine name for later.
      secondary_engine = &tl->table->s->secondary_engine;
    } else if (!equal_engines(*secondary_engine,
                              tl->table->s->secondary_engine)) {
      // In a different secondary engine than the previous base tables.
      return nullptr;
    }
  }
  return secondary_engine;
}

const handlerton *get_secondary_engine_handlerton(const LEX *lex) {
  if (const handlerton *hton = lex->m_sql_cmd->secondary_engine();
      hton != nullptr) {
    return hton;
  }
  const LEX_CSTRING *storage_engine = get_eligible_secondary_engine_from(lex);
  if (storage_engine != nullptr) {
    plugin_ref ref = ha_resolve_by_name(lex->thd, storage_engine, false);
    if (ref != nullptr) {
      return plugin_data<handlerton *>(ref);
    }
  }
  return nullptr;
}

std::string_view get_secondary_engine_fail_reason(const LEX *lex) {
  const auto *hton = get_secondary_engine_handlerton(lex);
  if (hton != nullptr &&
      hton->get_secondary_engine_offload_or_exec_fail_reason != nullptr &&
      lex->thd->is_secondary_engine_forced()) {
    return hton->get_secondary_engine_offload_or_exec_fail_reason(lex->thd);
  }
  return {};
}

std::string_view find_secondary_engine_fail_reason(const LEX *lex) {
  const auto *hton = get_secondary_engine_handlerton(lex);
  if (hton != nullptr &&
      hton->find_secondary_engine_offload_fail_reason != nullptr &&
      lex->thd->variables.use_secondary_engine == SECONDARY_ENGINE_FORCED) {
    return hton->find_secondary_engine_offload_fail_reason(lex->thd);
  }
  if (hton == nullptr && get_eligible_secondary_engine_from(lex) != nullptr &&
      get_eligible_secondary_engine_from(lex)->length > 0 &&
      (native_strncasecmp(get_eligible_secondary_engine_from(lex)->str, "MOCK",
                          4) == 0)) {
    // We don't support secondary storage engine execution,
    // if it is a MOCK secondary engine.
    return "Queries cannot be offloaded to a MOCK secondary engine";
  }
  // return a generic offload error if no specific reason is known
  return "All plans were rejected by the secondary storage engine";
}

static bool set_secondary_engine_fail_reason(const LEX *lex,
                                             std::string_view reason) {
  const auto *hton = get_secondary_engine_handlerton(lex);
  if (hton != nullptr &&
      hton->set_secondary_engine_offload_fail_reason != nullptr &&
      lex->thd->is_secondary_engine_forced()) {
    hton->set_secondary_engine_offload_fail_reason(lex->thd, reason);
    return true;
  }
  return false;
}

void set_fail_reason_and_raise_error(const LEX *lex, std::string_view reason) {
  assert(!reason.empty());
  if (set_secondary_engine_fail_reason(lex, reason)) {
    my_error(ER_SECONDARY_ENGINE, MYF(0),
             (std::data(get_secondary_engine_fail_reason(lex))));
  } else {
    std::string err_msg{"Reason: "};
    err_msg.append("\"");
    err_msg.append(std::data(reason));
    err_msg.append("\"");
    my_error(ER_SECONDARY_ENGINE, MYF(0), err_msg.c_str());
  }
}

void find_and_set_offload_fail_reason(const LEX *lex) {
  // If we are unable to gather secondary-engine-specific error message,
  // check known unsupported features and raise a specific offload error.
  std::string_view err_msg{};
  if (lex->uses_stored_routines() ||
      (lex->m_sql_cmd != nullptr && lex->m_sql_cmd->is_part_of_sp()) ||
      lex->thd->sp_runtime_ctx != nullptr) {
    // We don't support secondary storage engine execution,
    // if the query has statements that call stored routines.
    err_msg =
        "Queries part of stored functions or calling stored functions are not "
        "supported in secondary engines";
  } else if (!has_secondary_engine_defined(lex)) {
    // We don't support secondary storage engine execution,
    // if at least one of the query tables have no secondary engine defined.
    err_msg =
        "No secondary engine defined for at least one of the query tables";
  } else {
    err_msg = find_secondary_engine_fail_reason(lex);
  }
  assert(!err_msg.empty());
  set_fail_reason_and_raise_error(lex, err_msg);
}

bool validate_use_secondary_engine(const LEX *lex) {
  if (lex->m_sql_cmd == nullptr) {
    return false;
  }
  THD *thd = lex->thd;
  const Sql_cmd *sql_cmd = lex->m_sql_cmd;
  // Ensure that all read columns are in the secondary engine.
  if (sql_cmd->using_secondary_storage_engine()) {
    if (reads_not_secondary_columns(lex)) {
      std::string_view err_msg = find_secondary_engine_fail_reason(lex);
      assert(!err_msg.empty());
      set_fail_reason_and_raise_error(lex, err_msg);
      return true;
    }
    return false;
  }

  if (thd->secondary_engine_optimization() ==
          Secondary_engine_optimization::SECONDARY &&
      thd->is_secondary_engine_forced()) {
    // Gather secondary-engine-specific error message.
    std::string_view offloadfail_reason = get_secondary_engine_fail_reason(lex);
    if (!offloadfail_reason.empty()) {
      if (thd->is_error()) {
        thd->clear_error();
      }
      my_error(ER_SECONDARY_ENGINE, MYF(0), std::data(offloadfail_reason));
      return true;
    }
    // If we haven't generated a specific error so far,
    // we try to generate one here.
    if (!thd->is_error()) {
      find_and_set_offload_fail_reason(lex);
      return true;
    }
  }

  return false;
}

bool Sql_cmd_dml::prepare(THD *thd) {
  DBUG_TRACE;

  bool error_handler_active = false;

  Ignore_error_handler ignore_handler;
  Strict_error_handler strict_handler;

  // @todo: Move this to constructor?
  lex = thd->lex;

  // Parser may have assigned a specific query result handler
  result = lex->result;

  assert(!is_prepared());

  assert(!lex->unit->is_prepared() && !lex->unit->is_optimized() &&
         !lex->unit->is_executed());
  /*
    Constant folding could cause warnings during preparation. Make
    sure they are promoted to errors when strict mode is enabled.
  */
  if (is_data_change_stmt() && needs_explicit_preparation()) {
    // Push ignore / strict error handler
    if (lex->is_ignore()) {
      thd->push_internal_handler(&ignore_handler);
      error_handler_active = true;
    } else if (thd->is_strict_mode()) {
      thd->push_internal_handler(&strict_handler);
      error_handler_active = true;
    }
  }

  // Perform a coarse statement-specific privilege check.
  if (precheck(thd)) goto err;

  // Trigger out_of_memory condition inside open_tables_for_query()
  DBUG_EXECUTE_IF("sql_cmd_dml_prepare__out_of_memory",
                  DBUG_SET("+d,simulate_out_of_memory"););
  /*
    Open tables and expand views.
    During prepare of query (not as part of an execute), acquire only
    S metadata locks instead of SW locks to be compatible with concurrent
    LOCK TABLES WRITE and global read lock.
  */
  if (open_tables_for_query(
          thd, lex->query_tables,
          needs_explicit_preparation() ? MYSQL_OPEN_FORCE_SHARED_MDL : 0)) {
    if (thd->is_error())  // @todo - dictionary code should be fixed
      goto err;
    if (error_handler_active) thd->pop_internal_handler();
    lex->cleanup(false);
    return true;
  }
  DEBUG_SYNC(thd, "after_open_tables");
#ifndef NDEBUG
  if (sql_command_code() == SQLCOM_SELECT) DEBUG_SYNC(thd, "after_table_open");
#endif

  lex->set_using_hypergraph_optimizer(
      thd->optimizer_switch_flag(OPTIMIZER_SWITCH_HYPERGRAPH_OPTIMIZER));

  if (thd->lex->validate_use_in_old_optimizer()) {
    return true;
  }

  if (lex->set_var_list.elements && resolve_var_assignments(thd, lex))
    goto err; /* purecov: inspected */

  {
    const Prepare_error_tracker tracker(thd);
    const Prepared_stmt_arena_holder ps_arena_holder(thd);
    const Enable_derived_merge_guard derived_merge_guard(
        thd, is_show_cmd_using_system_view(thd));

    if (prepare_inner(thd)) goto err;
    if (needs_explicit_preparation() && result != nullptr) {
      result->cleanup();
    }
    if (!is_regular()) {
      if (save_cmd_properties(thd)) goto err;
    }
    if (needs_explicit_preparation()) {
      lex->set_secondary_engine_execution_context(nullptr);
    }
    set_prepared();
  }

  // Pop ignore / strict error handler
  if (error_handler_active) thd->pop_internal_handler();

  // Revertable changes are not supported during preparation
  assert(thd->change_list.is_empty());

  return false;

err:
  assert(thd->is_error());
  DBUG_PRINT("info", ("report_error: %d", thd->is_error()));

  lex->set_secondary_engine_execution_context(nullptr);

  if (error_handler_active) thd->pop_internal_handler();

  if (result != nullptr) result->cleanup();

  lex->cleanup(false);

  return true;
}

bool Sql_cmd_select::accept(THD *thd, Select_lex_visitor *visitor) {
  return thd->lex->unit->accept(visitor);
}

const MYSQL_LEX_CSTRING *Sql_cmd_select::eligible_secondary_storage_engine(
    THD *thd) const {
  return get_eligible_secondary_engine(thd);
}

/**
  Prepare a SELECT statement.
*/

bool Sql_cmd_select::prepare_inner(THD *thd) {
  if (lex->is_explain()) {
    /*
      Always use Query_result_send for EXPLAIN, even if it's an EXPLAIN for
      SELECT ... INTO OUTFILE: a user application should be able to prepend
      EXPLAIN to any query and receive output for it, even if the query itself
      redirects the output.
    */
    result = new (thd->mem_root) Query_result_send();
    if (result == nullptr) return true; /* purecov: inspected */
  } else {
    if (result == nullptr) {
      if (sql_command_code() == SQLCOM_SELECT)
        result = new (thd->mem_root) Query_result_send();
      else if (sql_command_code() == SQLCOM_DO)
        result = new (thd->mem_root) Query_result_do();
      else  // Currently assumed to be a SHOW command
        result = new (thd->mem_root) Query_result_send();

      if (result == nullptr) return true; /* purecov: inspected */
    }
  }

  Query_expression *const unit = lex->unit;
  Query_block *parameters = unit->global_parameters();
  if (!parameters->has_limit()) {
    parameters->m_use_select_limit = true;
  }

  if (unit->is_simple()) {
    Query_block *const select = unit->first_query_block();
    select->context.resolve_in_select_list = true;
    select->set_query_result(result);
    unit->set_query_result(result);
    // Unlock the table as soon as possible, so don't set SELECT_NO_UNLOCK.
    select->make_active_options(0, 0);

    if (select->prepare(thd, nullptr)) return true;

    unit->set_prepared();
  } else {
    // If we have multiple query blocks, don't unlock and re-lock
    // tables between each each of them.
    if (unit->prepare(thd, result, nullptr, SELECT_NO_UNLOCK, 0)) return true;
  }

  return false;
}

bool has_external_table(const LEX *lex) {
  if (lex->m_sql_cmd == nullptr) {
    return false;
  }
  for (Table_ref *ref = lex->query_tables; ref != nullptr;
       ref = ref->next_global) {
    if (ref->is_external()) {
      return true;
    }
  }
  return false;
}

bool Sql_cmd_dml::execute(THD *thd) {
  DBUG_TRACE;

  lex = thd->lex;

  Query_expression *const unit = lex->unit;

  bool statement_timer_armed = false;
  bool error_handler_active = false;

  Ignore_error_handler ignore_handler;
  Strict_error_handler strict_handler;

  // If statement is preparable, it must be prepared
  assert(owner() == nullptr || is_prepared());
  // If statement is regular, it must be unprepared
  assert(!is_regular() || !is_prepared());
  // If statement is part of SP, it can be both prepared and unprepared.

  // If a timer is applicable to statement, then set it.
  if (is_timer_applicable_to_statement(thd))
    statement_timer_armed = set_statement_timer(thd);

  if (is_data_change_stmt()) {
    // Push ignore / strict error handler
    if (lex->is_ignore()) {
      thd->push_internal_handler(&ignore_handler);
      error_handler_active = true;
      /*
        UPDATE IGNORE can be unsafe. We therefore use row based
        logging if mixed or row based logging is available.
        TODO: Check if the order of the output of the select statement is
        deterministic. Waiting for BUG#42415
      */
      if (lex->sql_command == SQLCOM_UPDATE)
        lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_UPDATE_IGNORE);
    } else if (thd->is_strict_mode()) {
      thd->push_internal_handler(&strict_handler);
      error_handler_active = true;
    }
  }

  if (!is_prepared()) {
    if (prepare(thd)) goto err;
  } else {
    /*
      Prepared statement, open tables referenced in statement and check
      privileges for it.
    */
    cleanup(thd);
    if (open_tables_for_query(thd, lex->query_tables, 0)) goto err;
#ifndef NDEBUG
    if (sql_command_code() == SQLCOM_SELECT)
      DEBUG_SYNC(thd, "after_table_open");
#endif
    // Use the hypergraph optimizer for the SELECT statement, if enabled.
    const bool need_hypergraph_optimizer =
        thd->optimizer_switch_flag(OPTIMIZER_SWITCH_HYPERGRAPH_OPTIMIZER);

    if (need_hypergraph_optimizer != lex->using_hypergraph_optimizer() &&
        ask_to_reprepare(thd)) {
      goto err;
    }
    assert(need_hypergraph_optimizer == lex->using_hypergraph_optimizer());

    // Bind table and field information
    if (restore_cmd_properties(thd)) goto err;
    if (check_privileges(thd)) goto err;

    if (m_lazy_result) {
      const Prepared_stmt_arena_holder ps_arena_holder(thd);

      if (result->prepare(thd, *unit->get_unit_column_types(), unit)) goto err;
      m_lazy_result = false;
    }
  }

  if (validate_use_secondary_engine(lex)) goto err;

  lex->set_exec_started();

  DBUG_EXECUTE_IF("use_attachable_trx",
                  thd->begin_attachable_ro_transaction(););

  THD_STAGE_INFO(thd, stage_init);

  thd->clear_current_query_costs();

  // Replication may require extra check of data change statements
  if (is_data_change_stmt() && run_before_dml_hook(thd)) goto err;

  // Revertable changes are not supported during preparation
  assert(thd->change_list.is_empty());

  assert(!lex->is_query_tables_locked());
  /*
    Locking of tables is done after preparation but before optimization.
    This allows to do better partition pruning and avoid locking unused
    partitions. As a consequence, in such a case, prepare stage can rely only
    on metadata about tables used and not data from them.
  */
  if (!is_empty_query()) {
    if (lock_tables(thd, lex->query_tables, lex->table_count, 0)) goto err;
  }

  // Perform statement-specific execution
  if (execute_inner(thd)) goto err;

  // Count the number of statements offloaded to a secondary storage engine.
  if (using_secondary_storage_engine() && lex->unit->is_executed()) {
    ++thd->status_var.secondary_engine_execution_count;
    global_aggregated_stats.get_shard(thd->thread_id())
        .secondary_engine_execution_count++;
  }

  assert(!thd->is_error());

  // Pop ignore / strict error handler
  if (error_handler_active) thd->pop_internal_handler();

  THD_STAGE_INFO(thd, stage_end);

  // Do partial cleanup (preserve plans for EXPLAIN).
  lex->cleanup(false);
  lex->clear_values_map();
  lex->set_secondary_engine_execution_context(nullptr);

  // Perform statement-specific cleanup for Query_result
  if (result != nullptr) result->cleanup();

  thd->save_current_query_costs();

  thd->update_previous_found_rows();

  DBUG_EXECUTE_IF("use_attachable_trx", thd->end_attachable_transaction(););

  if (statement_timer_armed && thd->timer) reset_statement_timer(thd);

  /*
    This sync point is normally right before thd->query_plan is reset, so
    EXPLAIN FOR CONNECTION can catch the plan. It is copied here as
    after unprepare() EXPLAIN considers the query as "not ready".
    @todo remove in WL#6570 when unprepare() is gone.
  */
  DEBUG_SYNC(thd, "before_reset_query_plan");

  return false;

err:
  assert(thd->is_error() || thd->killed);
  DBUG_PRINT("info", ("report_error: %d", thd->is_error()));
  THD_STAGE_INFO(thd, stage_end);

  lex->cleanup(false);
  lex->clear_values_map();
  lex->set_secondary_engine_execution_context(nullptr);

  // Abort and cleanup the result set (if it has been prepared).
  if (result != nullptr) {
    result->abort_result_set(thd);
    result->cleanup();
  }
  if (error_handler_active) thd->pop_internal_handler();

  if (statement_timer_armed && thd->timer) reset_statement_timer(thd);

  /*
    There are situations where we want to know the cost of a query that
    has failed during execution, e.g because of a timeout.
  */
  thd->save_current_query_costs();

  DBUG_EXECUTE_IF("use_attachable_trx", thd->end_attachable_transaction(););

  return thd->is_error();
}

void accumulate_statement_cost(const LEX *lex) {
  Opt_trace_context *trace = &lex->thd->opt_trace;
  const Opt_trace_disable_I_S disable_trace(trace, true);

  double total_cost = 0.0;
  for (const Query_block *query_block = lex->all_query_blocks_list;
       query_block != nullptr;
       query_block = query_block->next_select_in_list()) {
    if (query_block->join == nullptr) continue;

    // Get the cost of this query block.
    double query_block_cost = query_block->join->best_read;

    // If it is a non-cacheable subquery, estimate how many times it
    // needs to be executed, and adjust the cost accordingly.
    const Item_subselect *item = query_block->master_query_expression()->item;
    if (item != nullptr && !query_block->is_cacheable())
      query_block_cost *= calculate_subquery_executions(item, trace);

    total_cost += query_block_cost;
  }
  lex->thd->m_current_query_cost = total_cost;
}

namespace {
/**
   Gets the secondary storage engine pre prepare hook function, if any. If no
   hook is found, this function returns false. If hook function is found, it
   returns the return value of the hook. Please refer to
   secondary_engine_pre_prepare_hook_t definition for description of its return
   value.
 */
bool SecondaryEngineCallPrePrepareHook(THD *thd,
                                       const LEX_CSTRING &secondary_engine) {
  handlerton *hton = nullptr;
  plugin_ref ref = ha_resolve_by_name(thd, &secondary_engine, false);
  if (ref != nullptr) {
    hton = plugin_data<handlerton *>(ref);
  }

  if (hton != nullptr) {
    secondary_engine_pre_prepare_hook_t secondary_engine_pre_prepare_hook =
        hton->secondary_engine_pre_prepare_hook;
    if (secondary_engine_pre_prepare_hook != nullptr) {
      return secondary_engine_pre_prepare_hook(thd);
    }
  }
  return false;
}
}  // namespace

/**
  Checks if a query should be retried using a secondary storage engine.

  @param thd      the current session

  @retval true   if the statement should be retried in a secondary engine
  @retval false  if the statement should not be retried
*/
static bool retry_with_secondary_engine(THD *thd) {
  // Only retry if the current statement is being tentatively
  // optimized for the primary engine.
  if (thd->secondary_engine_optimization() !=
      Secondary_engine_optimization::PRIMARY_TENTATIVELY)
    return false;

  Sql_cmd *const sql_cmd = thd->lex->m_sql_cmd;
  assert(!sql_cmd->using_secondary_storage_engine());

  // Don't retry if there is a property of the statement that prevents use of
  // secondary engines.
  const LEX_CSTRING *secondary_engine =
      thd->lex->m_sql_cmd->eligible_secondary_storage_engine(thd);

  if (secondary_engine == nullptr) {
    sql_cmd->disable_secondary_storage_engine();
    return false;
  }

  // Don't retry if it's already determined that the statement should not be
  // executed by a secondary engine.
  if (sql_cmd->secondary_storage_engine_disabled()) {
    return false;
  }

  // Don't retry if there is a property of the environment that prevents use of
  // secondary engines.
  if (!thd->is_secondary_storage_engine_eligible()) {
    return false;
  }

  // If the query cannot be executed in the PRIMARY engine, always attempt to
  // execute it in the secondary engine whenever possible.
  if (thd->lex->can_execute_only_in_secondary_engine()) {
    return true;
  }

  return SecondaryEngineCallPrePrepareHook(thd, *secondary_engine);
}

bool optimize_secondary_engine(THD *thd) {
  if (retry_with_secondary_engine(thd)) {
    // NOLINTNEXTLINE(cppcoreguidelines-avoid-do-while)
    DBUG_EXECUTE_IF("emulate_user_query_kill", {
      thd->get_stmt_da()->set_error_status(thd, ER_QUERY_INTERRUPTED);
      return true;
    });
    thd->get_stmt_da()->reset_diagnostics_area();
    thd->get_stmt_da()->set_error_status(thd, ER_PREPARE_FOR_SECONDARY_ENGINE);

    return true;
  }

  if (thd->secondary_engine_optimization() ==
          Secondary_engine_optimization::PRIMARY_TENTATIVELY &&
      thd->lex->m_sql_cmd != nullptr &&
      thd->lex->m_sql_cmd->is_optional_transform_prepared()) {
    // A previous preparation did a secondary engine specific transform,
    // and this transform wasn't requested for the primary engine (in
    // 'optimizer_switch'), so in this new execution we need to reprepare for
    // the primary engine without the optional transform, for likely better
    // performance.
    thd->lex->m_sql_cmd->set_optional_transform_prepared(false);
    thd->get_stmt_da()->reset_diagnostics_area();
    thd->get_stmt_da()->set_error_status(thd, ER_PREPARE_FOR_PRIMARY_ENGINE);
    return true;
  }

  const handlerton *secondary_engine = thd->lex->m_sql_cmd->secondary_engine();

  /* When there is a secondary engine hook, return its return value,
   * otherwise return false (success). */
  return secondary_engine != nullptr &&
         secondary_engine->optimize_secondary_engine != nullptr &&
         secondary_engine->optimize_secondary_engine(thd, thd->lex);
}

void notify_plugins_after_select(THD *thd, const Sql_cmd *cmd) {
  /* Return if one of the 2 conditions is true:
   * 1. when secondary engine statement context is not present, query cost is
   * lower than the secondary than the engine threshold.
   * 2. When secondary engine statement context is present, primary engine
   * is the better execution engine for this query.
   * This prevents calling plugin_foreach for short queries, reducing the
   * overhead. */
  if (((thd->secondary_engine_statement_context() == nullptr) &&
       thd->m_current_query_cost <=
           thd->variables.secondary_engine_cost_threshold) ||
      ((thd->secondary_engine_statement_context() != nullptr) &&
       thd->secondary_engine_statement_context()
           ->is_primary_engine_optimal())) {
    return;
  }
  auto executed_in = (cmd != nullptr && cmd->using_secondary_storage_engine())
                         ? SelectExecutedIn::kSecondaryEngine
                         : SelectExecutedIn::kPrimaryEngine;

  plugin_foreach(
      thd,
      [](THD *t, plugin_ref plugin, void *arg) -> bool {
        handlerton *hton = plugin_data<handlerton *>(plugin);
        if (hton->notify_after_select != nullptr) {
          hton->notify_after_select(t, *(static_cast<SelectExecutedIn *>(arg)));
        }
        return false;
      },
      MYSQL_STORAGE_ENGINE_PLUGIN, &executed_in);
}

/**
  Execute a DML statement.
  This is the default implementation for a DML statement and uses a
  nested-loop join processor per outer-most query block.
  The implementation is split in two: One for query expressions containing
  a single query block and one for query expressions containing multiple
  query blocks combined with UNION.
*/

bool Sql_cmd_dml::execute_inner(THD *thd) {
  Query_expression *unit = lex->unit;

  if (unit->optimize(thd, /*materialize_destination=*/nullptr,
                     /*create_iterators=*/true, /*finalize_access_paths=*/true))
    return true;

  // Calculate the current statement cost.
  accumulate_statement_cost(lex);

  // Perform secondary engine optimizations, if needed.
  if (optimize_secondary_engine(thd)) return true;

  // We know by now that execution will complete (successful or with error)
  lex->set_exec_completed();
  if (lex->is_explain()) {
    for (Table_ref *ref = lex->query_tables; ref != nullptr;
         ref = ref->next_global) {
      if (ref->table != nullptr && ref->table->file != nullptr) {
        handlerton *hton = ref->table->file->ht;
        if (hton->external_engine_explain_check != nullptr) {
          if (hton->external_engine_explain_check(thd)) return true;
        }
      }
    }

    if (explain_query(thd, thd, unit)) return true; /* purecov: inspected */
  } else {
    if (unit->execute(thd)) return true;

    notify_plugins_after_select(thd, lex->m_sql_cmd);
  }

  return false;
}

bool Sql_cmd_dml::restore_cmd_properties(THD *thd) {
  lex->restore_cmd_properties();
  bind_fields(thd->stmt_arena->item_list());

  return false;
}

bool Sql_cmd_dml::save_cmd_properties(THD *thd) {
  return lex->save_cmd_properties(thd);
}

Query_result *Sql_cmd_dml::query_result() const {
  assert(is_prepared());
  return lex->unit->query_result() != nullptr
             ? lex->unit->query_result()
             : lex->unit->first_query_block()->query_result();
}

void Sql_cmd_dml::set_query_result(Query_result *result_arg) {
  result = result_arg;
  Query_expression *unit = lex->unit;
  unit->query_term()->query_block()->set_query_result(result);
  unit->set_query_result(result);
}

/**
  Performs access check for the locking clause, if present.

  @param thd Current session, used for checking access and raising error.

  @param tables Tables in the query's from clause.

  @retval true There was a locking clause and access was denied. An error has
  been raised.

  @retval false There was no locking clause or access was allowed to it. This
  is always returned in an embedded build.
*/
static bool check_locking_clause_access(THD *thd, Global_tables_list tables) {
  for (Table_ref *table_ref : tables)
    if (table_ref->lock_descriptor().type == TL_WRITE) {  // i.e. FOR UPDATE
      bool access_is_granted = false;
      /*
        If either of these privileges is present along with SELECT, access is
        granted.
      */
      for (Access_bitmask allowed_priv :
           {UPDATE_ACL, DELETE_ACL, LOCK_TABLES_ACL}) {
        Access_bitmask priv = SELECT_ACL | allowed_priv;
        if (!check_table_access(thd, priv, table_ref, false, 1, true)) {
          access_is_granted = true;
          // No need to check for other privileges for this table.
          // However, we still need to check privileges for other tables.
          break;
        }
      }

      if (!access_is_granted) {
        const Security_context *sctx = thd->security_context();

        my_error(ER_TABLEACCESS_DENIED_ERROR, MYF(0),
                 "SELECT with locking clause", sctx->priv_user().str,
                 sctx->host_or_ip().str, table_ref->get_table_name());

        return true;
      }
    }

  return false;
}

/**
  Perform an authorization precheck for an unprepared SELECT statement.

  This function will check that we have some privileges to all involved tables
  of the query (and possibly to other entities).
*/

bool Sql_cmd_select::precheck(THD *thd) {
  /*
    lex->exchange != NULL implies SELECT .. INTO OUTFILE and this
    requires FILE_ACL access.
  */
  const bool check_file_acl =
      (lex->result != nullptr && lex->result->needs_file_privilege());

  /*
    Check following,

    1) Check FILE privileges for current user who runs a query if needed.

    2) Check privileges for every user specified as a definer for a view or
       check privilege to access any DB in case a table wasn't specified.
       Although calling of check_access() when no tables are specified results
       in returning false value immediately, this call has important side
       effect: the counter 'stage/sql/checking permissions' in performance
       schema is incremented. Therefore, this function is called in order to
       save backward compatibility.

    3) Performs access check for the locking clause, if present.

  */
  Table_ref *tables = lex->query_tables;

  if (check_file_acl && check_global_access(thd, FILE_ACL)) return true;

  bool res;
  if (tables)
    res = check_table_access(thd, SELECT_ACL, tables, false, UINT_MAX, false);
  else
    res = check_access(thd, SELECT_ACL, any_db, nullptr, nullptr, false, false);

  return res || check_locking_clause_access(thd, Global_tables_list(tables));
}

/**
  Perform an authorization check for a prepared SELECT statement.
*/

bool Sql_cmd_select::check_privileges(THD *thd) {
  /*
    lex->exchange != nullptr implies SELECT .. INTO OUTFILE and this
    requires FILE_ACL access.
  */
  if (result->needs_file_privilege() &&
      check_access(thd, FILE_ACL, any_db, nullptr, nullptr, false, false))
    return true;

  if (check_all_table_privileges(thd)) return true;

  if (check_locking_clause_access(thd, Global_tables_list(lex->query_tables)))
    return true;

  Query_expression *const unit = lex->unit;

  for (auto qt : unit->query_terms<>())
    if (qt->query_block()->check_column_privileges(thd)) return true;

  return false;
}

bool Sql_cmd_dml::check_all_table_privileges(THD *thd) {
  // Check for all possible DML privileges

  const Table_ref *const first_not_own_table = thd->lex->first_not_own_table();
  for (Table_ref *tr = lex->query_tables; tr != first_not_own_table;
       tr = tr->next_global) {
    if (tr->is_internal())  // No privilege check required for internal tables
      continue;
    // Calculated wanted privilege based on how table/view is used:
    Access_bitmask want_privilege = 0;
    if (tr->is_inserted()) {
      want_privilege |= INSERT_ACL;
    }
    if (tr->is_updated()) {
      want_privilege |= UPDATE_ACL;
    }
    if (tr->is_deleted()) {
      want_privilege |= DELETE_ACL;
    }
    if (want_privilege == 0) {
      want_privilege = SELECT_ACL;
    }
    if (tr->referencing_view == nullptr) {
      // This is a base table
      if (check_single_table_access(thd, want_privilege, tr, false))
        return true;
    } else {
      // This is a view, set handler for transformation of errors
      Internal_error_handler_holder<View_error_handler, Table_ref> view_handler(
          thd, true, tr);
      for (Table_ref *t = tr; t->referencing_view; t = t->referencing_view) {
        if (check_single_table_access(thd, want_privilege, t, false))
          return true;
      }
    }
  }
  return false;
}

const MYSQL_LEX_CSTRING *Sql_cmd_dml::get_eligible_secondary_engine(
    THD *thd) const {
  return get_eligible_secondary_engine_from(thd->lex);
}

/*****************************************************************************
  Check fields, find best join, do the select and output fields.
  All tables must be opened.
*****************************************************************************/

/**
  @brief Check if two items are compatible wrt. materialization.

  @param outer Expression from outer query
  @param inner Expression from inner query

  @retval true   If subquery types allow materialization.
  @retval false  Otherwise.

  @note the purpose is similar to that of comparable_in_index().
*/

bool types_allow_materialization(Item *outer, Item *inner) {
  auto res_outer = outer->result_type();
  auto res_inner = inner->result_type();
  // Materialization of rows nested inside rows is not currently supported.
  if (res_outer == ROW_RESULT || res_inner == ROW_RESULT) return false;
  const bool num_outer = res_outer == INT_RESULT || res_outer == REAL_RESULT ||
                         res_outer == DECIMAL_RESULT;
  const bool num_inner = res_inner == INT_RESULT || res_inner == REAL_RESULT ||
                         res_inner == DECIMAL_RESULT;
  /*
    Materialization uses index lookup which implicitly converts the type of
    res_outer into that of res_inner.
    However, this can be done only if it respects rules in:
    https://dev.mysql.com/doc/refman/8.0/en/type-conversion.html
    https://dev.mysql.com/doc/refman/8.0/en/date-and-time-type-conversion.html
    Those rules say that, generally, if types differ, we convert them to
    REAL.
    So, looking up into a number is ok: outer will be converted to
    number. Collations don't matter.
    This covers e.g. looking up INT into DECIMAL, CHAR into INT, DECIMAL into
    BIT.
  */
  if (num_inner) return true;
  // Conversely, looking up one number into a non-number is not possible.
  if (num_outer) return false;
  /*
    Arguments are strings or temporal.
    Require same collation for correct comparison.
  */
  assert(res_outer == STRING_RESULT && res_inner == STRING_RESULT);
  if (outer->collation.collation != inner->collation.collation) return false;
  const bool temp_outer = outer->is_temporal();
  const bool temp_inner = inner->is_temporal();
  /*
    Same logic as for numbers.
    As explained in add_key_field(), IndexedTimeComparedToDate is not working;
    see also field_time_cmp_date().
    @todo unify all pieces of code which deal with this same problem.
  */
  if (temp_inner) {
    if (!inner->is_temporal_with_date())
      return temp_outer && !outer->is_temporal_with_date();
    return true;
  }
  if (temp_outer) return false;
  return true;
}

/*
  Check if the table's rowid is included in the temptable

  SYNOPSIS
    sj_table_is_included()
      join      The join
      join_tab  The table to be checked

  DESCRIPTION
    SemiJoinDuplicateElimination: check the table's rowid should be included
    in the temptable. This is so if

    1. The table is not embedded within some semi-join nest
    2. The has been pulled out of a semi-join nest, or

    3. The table is functionally dependent on some previous table

    [4. This is also true for constant tables that can't be
        NULL-complemented but this function is not called for such tables]

  RETURN
    true  - Include table's rowid
    false - Don't
*/

static bool sj_table_is_included(JOIN *join, JOIN_TAB *join_tab) {
  if (join_tab->emb_sj_nest) return false;

  /* Check if this table is functionally dependent on the tables that
     are within the same outer join nest
  */
  Table_ref *embedding = join_tab->table_ref->embedding;
  if (join_tab->type() == JT_EQ_REF) {
    table_map depends_on = 0;

    for (uint kp = 0; kp < join_tab->ref().key_parts; kp++)
      depends_on |= join_tab->ref().items[kp]->used_tables();

    for (size_t idx : BitsSetIn(depends_on & ~PSEUDO_TABLE_BITS)) {
      JOIN_TAB *ref_tab = join->map2table[idx];
      if (embedding != ref_tab->table_ref->embedding) return true;
    }
    /* Ok, functionally dependent */
    return false;
  }
  /* Not functionally dependent => need to include*/
  return true;
}

SJ_TMP_TABLE *create_sj_tmp_table(THD *thd, JOIN *join,
                                  SJ_TMP_TABLE_TAB *first_tab,
                                  SJ_TMP_TABLE_TAB *last_tab) {
  uint jt_rowid_offset =
      0;                  // # tuple bytes are already occupied (w/o NULL bytes)
  uint jt_null_bits = 0;  // # null bits in tuple bytes
  for (SJ_TMP_TABLE_TAB *tab = first_tab; tab != last_tab; ++tab) {
    QEP_TAB *qep_tab = tab->qep_tab;
    tab->rowid_offset = jt_rowid_offset;
    jt_rowid_offset += qep_tab->table()->file->ref_length;
    if (qep_tab->table()->is_nullable()) {
      tab->null_byte = jt_null_bits / 8;
      tab->null_bit = jt_null_bits++;
    }
    qep_tab->table()->prepare_for_position();
  }

  SJ_TMP_TABLE *sjtbl;
  if (jt_rowid_offset) /* Temptable has at least one rowid */
  {
    sjtbl = new (thd->mem_root) SJ_TMP_TABLE;
    if (sjtbl == nullptr) return nullptr;
    sjtbl->tabs =
        thd->mem_root->ArrayAlloc<SJ_TMP_TABLE_TAB>(last_tab - first_tab);
    if (sjtbl->tabs == nullptr) return nullptr;
    sjtbl->tabs_end = std::uninitialized_copy(first_tab, last_tab, sjtbl->tabs);
    sjtbl->is_confluent = false;
    sjtbl->rowid_len = jt_rowid_offset;
    sjtbl->null_bits = jt_null_bits;
    sjtbl->null_bytes = (jt_null_bits + 7) / 8;
    sjtbl->tmp_table = create_duplicate_weedout_tmp_table(
        thd, sjtbl->rowid_len + sjtbl->null_bytes, sjtbl);
    if (sjtbl->tmp_table == nullptr) return nullptr;
    if (sjtbl->tmp_table->hash_field)
      sjtbl->tmp_table->file->ha_index_init(0, false);
    join->sj_tmp_tables.push_back(sjtbl->tmp_table);
  } else {
    /*
      This is confluent case where the entire subquery predicate does
      not depend on anything at all, ie this is
        WHERE const IN (uncorrelated select)
    */
    if (!(sjtbl = new (thd->mem_root) SJ_TMP_TABLE))
      return nullptr; /* purecov: inspected */
    sjtbl->tmp_table = nullptr;
    sjtbl->is_confluent = true;
    sjtbl->have_confluent_row = false;
  }
  return sjtbl;
}

/**
  Setup the strategies to eliminate semi-join duplicates.

  @param join           Join to process
  @param no_jbuf_after  Do not use join buffering after the table with this
                        number

  @retval false  OK
  @retval true   Out of memory error

    Setup the strategies to eliminate semi-join duplicates.
    At the moment there are 5 strategies:

    -# DuplicateWeedout (use of temptable to remove duplicates based on rowids
                         of row combinations)
    -# FirstMatch (pick only the 1st matching row combination of inner tables)
    -# LooseScan (scanning the sj-inner table in a way that groups duplicates
                  together and picking the 1st one)
    -# MaterializeLookup (Materialize inner tables, then setup a scan over
                          outer correlated tables, lookup in materialized table)
    -# MaterializeScan (Materialize inner tables, then setup a scan over
                        materialized tables, perform lookup in outer tables)

    The join order has "duplicate-generating ranges", and every range is
    served by one strategy or a combination of FirstMatch with with some
    other strategy.

    "Duplicate-generating range" is defined as a range within the join order
    that contains all of the inner tables of a semi-join. All ranges must be
    disjoint, if tables of several semi-joins are interleaved, then the ranges
    are joined together, which is equivalent to converting

     `SELECT ... WHERE oe1 IN (SELECT ie1 ...) AND oe2 IN (SELECT ie2 )`

    to

      `SELECT ... WHERE (oe1, oe2) IN (SELECT ie1, ie2 ... ...)`.

    Applicability conditions are as follows:

    @par DuplicateWeedout strategy

    @code
      (ot|nt)*  [ it ((it|ot|nt)* (it|ot))]  (nt)*
      +------+  +=========================+  +---+
        (1)                 (2)               (3)
    @endcode

    -# Prefix of OuterTables (those that participate in IN-equality and/or are
       correlated with subquery) and outer Non-correlated tables.

    -# The handled range. The range starts with the first sj-inner table, and
       covers all sj-inner and outer tables Within the range, Inner, Outer,
       outer non-correlated tables may follow in any order.

    -# The suffix of outer non-correlated tables.

    @par FirstMatch strategy

    @code
      (ot|nt)*  [ it (it)* ]  (nt)*
      +------+  +==========+  +---+
        (1)          (2)        (3)

    @endcode
    -# Prefix of outer correlated and non-correlated tables

    -# The handled range, which may contain only inner tables.

    -# The suffix of outer non-correlated tables.

    @par LooseScan strategy

    @code
     (ot|ct|nt) [ loosescan_tbl (ot|nt|it)* it ]  (ot|nt)*
     +--------+   +===========+ +=============+   +------+
        (1)           (2)          (3)              (4)
    @endcode

    -# Prefix that may contain any outer tables. The prefix must contain all
       the non-trivially correlated outer tables. (non-trivially means that
       the correlation is not just through the IN-equality).

    -# Inner table for which the LooseScan scan is performed.  Notice that
       special requirements for existence of certain indexes apply to this
       table, @see class Loose_scan_opt.

    -# The remainder of the duplicate-generating range. It is served by
       application of FirstMatch strategy. Outer IN-correlated tables must be
       correlated to the LooseScan table but not to the inner tables in this
       range. (Currently, there can be no outer tables in this range because
       of implementation restrictions, @see
       Optimize_table_order::advance_sj_state()).

    -# The suffix of outer correlated and non-correlated tables.

    @par MaterializeLookup strategy

    @code
     (ot|nt)*  [ it (it)* ]  (nt)*
     +------+  +==========+  +---+
        (1)         (2)        (3)
    @endcode

    -# Prefix of outer correlated and non-correlated tables.

    -# The handled range, which may contain only inner tables.
            The inner tables are materialized in a temporary table that is
            later used as a lookup structure for the outer correlated tables.

    -# The suffix of outer non-correlated tables.

    @par MaterializeScan strategy

    @code
     (ot|nt)*  [ it (it)* ]  (ot|nt)*
     +------+  +==========+  +-----+
        (1)         (2)         (3)
    @endcode

    -# Prefix of outer correlated and non-correlated tables.

    -# The handled range, which may contain only inner tables.
            The inner tables are materialized in a temporary table which is
            later used to setup a scan.

    -# The suffix of outer correlated and non-correlated tables.

  Note that MaterializeLookup and MaterializeScan has overlap in their
  patterns. It may be possible to consolidate the materialization strategies
  into one.

  The choice between the strategies is made by the join optimizer (see
  advance_sj_state() and fix_semijoin_strategies()).  This function sets up
  all fields/structures/etc needed for execution except for
  setup/initialization of semi-join materialization which is done in
  setup_materialized_table().
*/

static bool setup_semijoin_dups_elimination(JOIN *join, uint no_jbuf_after) {
  uint tableno;
  THD *thd = join->thd;
  DBUG_TRACE;
  ASSERT_BEST_REF_IN_JOIN_ORDER(join);

  if (join->query_block->sj_nests.empty()) return false;

  QEP_TAB *const qep_array = join->qep_tab;
  for (tableno = join->const_tables; tableno < join->primary_tables;) {
#ifndef NDEBUG
    const bool tab_in_sj_nest = join->best_ref[tableno]->emb_sj_nest != nullptr;
#endif
    QEP_TAB *const tab = &qep_array[tableno];
    POSITION *const pos = tab->position();

    if (pos->sj_strategy == SJ_OPT_NONE) {
      tableno++;  // nothing to do
      continue;
    }
    QEP_TAB *last_sj_tab = tab + pos->n_sj_tables - 1;
    switch (pos->sj_strategy) {
      case SJ_OPT_MATERIALIZE_LOOKUP:
      case SJ_OPT_MATERIALIZE_SCAN:
        assert(false);  // Should not occur among "primary" tables
        // Do nothing
        tableno += pos->n_sj_tables;
        break;
      case SJ_OPT_LOOSE_SCAN: {
        assert(tab_in_sj_nest);  // First table must be inner
        /* We jump from the last table to the first one */
        tab->match_tab = last_sj_tab->idx();

        /* For LooseScan, duplicate elimination is based on rows being sorted
           on key. We need to make sure that range select keeps the sorted index
           order. (When using MRR it may not.)

           Note: need_sorted_output() implementations for range select classes
           that do not support sorted output, will trigger an assert. This
           should not happen since LooseScan strategy is only picked if sorted
           output is supported.
        */
        if (tab->range_scan()) {
          assert(used_index(tab->range_scan()) == pos->loosescan_key);
          set_need_sorted_output(tab->range_scan());
        }

        const uint keyno = pos->loosescan_key;
        assert(tab->keys().is_set(keyno));
        tab->set_index(keyno);

        /* Calculate key length */
        uint keylen = 0;
        for (uint kp = 0; kp < pos->loosescan_parts; kp++)
          keylen += tab->table()->key_info[keyno].key_part[kp].store_length;
        tab->loosescan_key_len = keylen;

        if (pos->n_sj_tables > 1) {
          last_sj_tab->firstmatch_return = tab->idx();
          last_sj_tab->match_tab = last_sj_tab->idx();
        }
        tableno += pos->n_sj_tables;
        break;
      }
      case SJ_OPT_DUPS_WEEDOUT: {
        assert(tab_in_sj_nest);  // First table must be inner
        /*
          Consider a semijoin of one outer and one inner table, both
          with two rows. The inner table is assumed to be confluent
          (See sj_opt_materialize_lookup)

          If normal nested loop execution is used, we do not need to
          include semi-join outer table rowids in the duplicate
          weedout temp table since NL guarantees that outer table rows
          are encountered only consecutively and because all rows in
          the temp table are deleted for every new outer table
          combination (example is with a confluent inner table):

            ot1.row1|it1.row1
                 '-> temp table's have_confluent_row == false
                   |-> output ot1.row1
                   '-> set have_confluent_row= true
            ot1.row1|it1.row2
                 |-> temp table's have_confluent_row == true
                 | '-> do not output ot1.row1
                 '-> no more join matches - set have_confluent_row= false
            ot1.row2|it1.row1
                 '-> temp table's have_confluent_row == false
                   |-> output ot1.row2
                   '-> set have_confluent_row= true
              ...

          Note: not having outer table rowids in the temp table and
          then emptying the temp table when a new outer table row
          combinition is encountered is an optimization. Including
          outer table rowids in the temp table is not harmful but
          wastes memory.

          Now consider the join buffering algorithms (BNL/BKA). These
          algorithms join each inner row with outer rows in "reverse"
          order compared to NL. Effectively, this means that outer
          table rows may be encountered multiple times in a
          non-consecutive manner:

            NL:                 BNL/BKA:
            ot1.row1|it1.row1   ot1.row1|it1.row1
            ot1.row1|it1.row2   ot1.row2|it1.row1
            ot1.row2|it1.row1   ot1.row1|it1.row2
            ot1.row2|it1.row2   ot1.row2|it1.row2

          It is clear from the above that there is no place we can
          empty the temp table like we do in NL to avoid storing outer
          table rowids.

          Below we check if join buffering might be used. If so, set
          first_table to the first non-constant table so that outer
          table rowids are included in the temp table. Do not destroy
          other duplicate elimination methods.
        */
        uint first_table = tableno;
        for (uint sj_tableno = tableno; sj_tableno < tableno + pos->n_sj_tables;
             sj_tableno++) {
          if (join->best_ref[sj_tableno]->use_join_cache() &&
              sj_tableno <= no_jbuf_after) {
            /* Join buffering will probably be used */
            first_table = join->const_tables;
            break;
          }
        }

        QEP_TAB *const first_sj_tab = qep_array + first_table;
        if (last_sj_tab->first_inner() != NO_PLAN_IDX &&
            first_sj_tab->first_inner() != last_sj_tab->first_inner()) {
          /*
            The first duplicate weedout table is an outer table of an outer join
            and the last duplicate weedout table is one of the inner tables of
            the outer join.
            In this case, we must assure that all the inner tables of the
            outer join are part of the duplicate weedout operation.
            This is to assure that NULL-extension for inner tables of an
            outer join is performed before duplicate elimination is performed,
            otherwise we will have extra NULL-extended rows being output, which
            should have been eliminated as duplicates.
          */
          QEP_TAB *tab2 = &qep_array[last_sj_tab->first_inner()];
          /*
            First, locate the table that is the first inner table of the
            outer join operation that first_sj_tab is outer for.
          */
          while (tab2->first_upper() != NO_PLAN_IDX &&
                 tab2->first_upper() != first_sj_tab->first_inner())
            tab2 = qep_array + tab2->first_upper();
          // Then, extend the range with all inner tables of the join nest:
          if (qep_array[tab2->first_inner()].last_inner() > last_sj_tab->idx())
            last_sj_tab =
                &qep_array[qep_array[tab2->first_inner()].last_inner()];
        }

        SJ_TMP_TABLE_TAB sjtabs[MAX_TABLES];
        SJ_TMP_TABLE_TAB *last_tab = sjtabs;
        /*
          Walk through the range and remember
           - tables that need their rowids to be put into temptable
           - the last outer table
        */
        for (QEP_TAB *tab_in_range = qep_array + first_table;
             tab_in_range <= last_sj_tab; tab_in_range++) {
          if (sj_table_is_included(join, join->best_ref[tab_in_range->idx()])) {
            last_tab->qep_tab = tab_in_range;
            ++last_tab;
          }
        }

        SJ_TMP_TABLE *sjtbl = create_sj_tmp_table(thd, join, sjtabs, last_tab);
        if (sjtbl == nullptr) {
          return true;
        }

        qep_array[first_table].flush_weedout_table = sjtbl;
        last_sj_tab->check_weed_out_table = sjtbl;

        tableno += pos->n_sj_tables;
        break;
      }
      case SJ_OPT_FIRST_MATCH: {
        /*
          Setup a "jump" from the last table in the range of inner tables
          to the last outer table before the inner tables.
        */
        plan_idx jump_to = tab->idx() - 1;
        assert(tab_in_sj_nest);  // First table must be inner
        for (QEP_TAB *tab_in_range = tab; tab_in_range <= last_sj_tab;
             tab_in_range++) {
          if (!join->best_ref[tab_in_range->idx()]->emb_sj_nest) {
            /*
              Let last non-correlated table be jump target for
              subsequent inner tables.
            */
            assert(false);  // no "split jump" should exist.
            jump_to = tab_in_range->idx();
          } else {
            /*
              Assign jump target for last table in a consecutive range of
              inner tables.
            */
            if (tab_in_range == last_sj_tab ||
                !join->best_ref[tab_in_range->idx() + 1]->emb_sj_nest) {
              tab_in_range->firstmatch_return = jump_to;
              tab_in_range->match_tab = last_sj_tab->idx();
            }
          }
        }
        tableno += pos->n_sj_tables;
        break;
      }
    }
  }
  return false;
}

/*
  Destroy all temporary tables created by NL-semijoin runtime
*/

static void destroy_sj_tmp_tables(JOIN *join) {
  List_iterator<TABLE> it(join->sj_tmp_tables);
  TABLE *table;
  while ((table = it++)) {
    /*
      SJ-Materialization tables are initialized for either sequential reading
      or index lookup, DuplicateWeedout tables are not initialized for read
      (we only write to them), so need to call ha_index_or_rnd_end.
    */
    if (table->file != nullptr) {
      table->file->ha_index_or_rnd_end();
    }
    close_tmp_table(table);
    free_tmp_table(table);
  }
  join->sj_tmp_tables.clear();
}

/**
  Remove all rows from all temp tables used by NL-semijoin runtime

  All rows must be removed from all temporary tables before every join
  re-execution.
*/

bool JOIN::clear_sj_tmp_tables() {
  List_iterator<TABLE> it(sj_tmp_tables);
  TABLE *table;
  while ((table = it++)) {
    if (table->empty_result_table()) return true; /* purecov: inspected */
  }
  return false;
}

/// Empties all correlated materialized derived tables
bool JOIN::clear_corr_derived_tmp_tables() {
  for (uint i = const_tables; i < tables; i++) {
    auto tl = qep_tab[i].table_ref;
    if (tl && tl->is_derived() && !tl->common_table_expr() &&
        (tl->derived_query_expression()->uncacheable & UNCACHEABLE_DEPENDENT) &&
        tl->table) {
      /*
        Applied only to non-CTE derived tables, as CTEs are reset in
        Query_expression::clear_correlated_query_blocks()
      */
      if (tl->derived_query_expression()->query_result()->reset()) return true;
    }
  }
  return false;
}

/**
  Reset the state of this join object so that it is ready for a
  new execution.
*/

void JOIN::reset() {
  DBUG_TRACE;

  if (!executed) return;

  // clang-format off
  query_expression()->offset_limit_cnt = (ha_rows)(
      query_block->offset_limit ? query_block->offset_limit->val_uint() : 0ULL);
  // clang-format on

  group_sent = false;
  recursive_iteration_count = 0;
  executed = false;

  List_iterator<Window> li(query_block->m_windows);
  Window *w;
  while ((w = li++)) {
    w->reset_round();
  }

  if (tmp_tables) {
    for (uint tmp = primary_tables; tmp < primary_tables + tmp_tables; tmp++) {
      (void)qep_tab[tmp].table()->empty_result_table();
    }
  }
  clear_sj_tmp_tables();
  set_ref_item_slice(REF_SLICE_SAVED_BASE);

  if (qep_tab) {
    if (query_block->derived_table_count) clear_corr_derived_tmp_tables();
    /* need to reset ref access state (see EQRefIterator) */
    for (uint i = 0; i < tables; i++) {
      QEP_TAB *const tab = &qep_tab[i];
      /*
        If qep_tab==NULL, we may still have done ref access (to read a const
        table); const tables will not be re-read in the next execution of this
        subquery, so resetting key_err is not needed.
      */
      tab->ref().key_err = true;
    }
  }

  /* Reset of sum functions */
  if (sum_funcs) {
    Item_sum *func, **func_ptr = sum_funcs;
    while ((func = *(func_ptr++))) func->clear();
  }

  if (query_block->has_ft_funcs()) {
    /* TODO: move the code to JOIN::exec */
    (void)init_ftfuncs(thd, query_block);
  }
}

/**
  Prepare join result.

  @details Prepare join result prior to join execution or describing.
  Instantiate derived tables and get schema tables result if necessary.

  @return
    true  An error during derived or schema tables instantiation.
    false Ok
*/

bool JOIN::prepare_result() {
  DBUG_TRACE;

  error = 0;

  if (query_block->query_result()->start_execution(thd)) goto err;

  return false;

err:
  error = 1;
  return true;
}

/**
  Clean up and destroy join object.
*/

void JOIN::destroy() {
  cond_equal = nullptr;

  set_plan_state(NO_PLAN);

  if (qep_tab) {
    assert(!join_tab);
    for (uint i = 0; i < tables; i++) {
      TABLE *table = qep_tab[i].table();
      if (table != nullptr) {
        // These were owned by the root iterator, which we just destroyed.
        // Keep filesort_free_buffers() from trying to call CleanupAfterQuery()
        // on them.
        table->sorting_iterator = nullptr;
        table->duplicate_removal_iterator = nullptr;
      }
      qep_tab[i].cleanup();
    }
  } else if (thd->lex->using_hypergraph_optimizer()) {
    // Same, for hypergraph queries.
    for (Table_ref *tl = query_block->leaf_tables; tl; tl = tl->next_leaf) {
      TABLE *table = tl->table;
      if (table != nullptr) {
        // For prepared statements, a derived table's temp table handler
        // gets cleaned up at the end of prepare and it is setup again
        // during optimization. However, if optimization for a derived
        // table query block fails for some reason (E.g. Secondary engine
        // rejects all the plans), handler is not setup for the rest of
        // the derived tables. So we need to call set_keyread() only
        // when handler is initialized.
        // TODO(Chaithra): This should be moved to a more suitable place,
        // perhaps TableRowIterator's destructor ?
        if (table->file != nullptr) {
          table->set_keyread(false);
        }
        table->sorting_iterator = nullptr;
        table->duplicate_removal_iterator = nullptr;
      }
    }
    for (JOIN::TemporaryTableToCleanup cleanup : temp_tables) {
      if (cleanup.table != nullptr) {
        cleanup.table->sorting_iterator = nullptr;
        cleanup.table->duplicate_removal_iterator = nullptr;
      }
      close_tmp_table(cleanup.table);
      free_tmp_table(cleanup.table);
      ::destroy_at(cleanup.temp_table_param);
    }
    for (Filesort *filesort : filesorts_to_cleanup) {
      if (filesort != nullptr) ::destroy_at(filesort);
    }
    temp_tables.clear();
    filesorts_to_cleanup.clear();
  }
  if (join_tab || best_ref) {
    for (uint i = 0; i < tables; i++) {
      JOIN_TAB *const tab = join_tab ? &join_tab[i] : best_ref[i];
      tab->cleanup();
    }
  }

  /*
    We are not using tables anymore
    Unlock all tables. We may be in an INSERT .... SELECT statement.
  */

  // Run Cached_item DTORs!
  group_fields.destroy_elements();
  semijoin_deduplication_fields.destroy_elements();

  tmp_table_param.cleanup();

  /* Cleanup items referencing temporary table columns */
  if (tmp_fields != nullptr) {
    cleanup_item_list(tmp_fields[REF_SLICE_TMP1]);
    cleanup_item_list(tmp_fields[REF_SLICE_TMP2]);
    for (uint widx = 0; widx < m_windows.elements; widx++) {
      cleanup_item_list(tmp_fields[REF_SLICE_WIN_1 + widx]);
    }
  }

  destroy_sj_tmp_tables(this);

  List_iterator<Semijoin_mat_exec> sjm_list_it(sjm_exec_list);
  Semijoin_mat_exec *sjm;
  while ((sjm = sjm_list_it++) != nullptr) ::destroy_at(sjm);
  sjm_exec_list.clear();

  keyuse_array.clear();
  // Free memory for rollup arrays
  if (query_block->olap == ROLLUP_TYPE) {
    rollup_group_items.clear();
    rollup_group_items.shrink_to_fit();
    rollup_sums.clear();
    rollup_sums.shrink_to_fit();
  }
}

void JOIN::cleanup_item_list(const mem_root_deque<Item *> &items) const {
  for (Item *item : items) {
    item->cleanup();
  }
}

/**
  Optimize a query block and all inner query expressions

  @param thd    thread handler
  @param finalize_access_paths
                if true, finalize access paths, cf. FinalizePlanForQueryBlock
  @returns false if success, true if error
*/

bool Query_block::optimize(THD *thd, bool finalize_access_paths) {
  DBUG_TRACE;

  assert(join == nullptr);
  JOIN *const join_local = new (thd->mem_root) JOIN(thd, this);
  if (!join_local) return true; /* purecov: inspected */

  /*
    Updating Query_block::join requires acquiring THD::LOCK_query_plan
    to avoid races when EXPLAIN FOR CONNECTION is used.
  */
  thd->lock_query_plan();
  join = join_local;
  thd->unlock_query_plan();

  if (join->optimize(finalize_access_paths)) return true;

  if (join->zero_result_cause && !is_implicitly_grouped()) return false;

  for (Query_expression *query_expression = first_inner_query_expression();
       query_expression;
       query_expression = query_expression->next_query_expression()) {
    // Derived tables and const subqueries are already optimized
    if (!query_expression->is_optimized() &&
        query_expression->optimize(thd, /*materialize_destination=*/nullptr,
                                   /*create_iterators=*/false,
                                   /*finalize_access_paths=*/true))
      return true;
  }

  return false;
}

/**
  Check privileges for all columns referenced from this query block.
  Also check privileges for referenced subqueries.

  @param thd      thread handler

  @returns false if success, true if error (insufficient privileges)

  @todo - skip this if we have table SELECT privileges for all tables
*/
bool Query_block::check_column_privileges(THD *thd) {
  const Column_privilege_tracker tracker(thd, SELECT_ACL);

  for (Item *item : visible_fields()) {
    if (item->walk(&Item::check_column_privileges, enum_walk::PREFIX,
                   pointer_cast<uchar *>(thd)))
      return true;
  }
  if (m_current_table_nest &&
      check_privileges_for_join(thd, m_current_table_nest))
    return true;
  if (where_cond() != nullptr &&
      where_cond()->walk(&Item::check_column_privileges, enum_walk::PREFIX,
                         pointer_cast<uchar *>(thd)))
    return true;
  for (ORDER *group = group_list.first; group; group = group->next) {
    if ((*group->item)
            ->walk(&Item::check_column_privileges, enum_walk::PREFIX,
                   pointer_cast<uchar *>(thd)))
      return true;
  }
  if (having_cond() != nullptr &&
      having_cond()->walk(&Item::check_column_privileges, enum_walk::PREFIX,
                          pointer_cast<uchar *>(thd)))
    return true;
  List_iterator<Window> wi(m_windows);
  Window *w;
  while ((w = wi++)) {
    for (ORDER *wp = w->first_partition_by(); wp != nullptr; wp = wp->next)
      if ((*wp->item)->walk(&Item::check_column_privileges, enum_walk::PREFIX,
                            pointer_cast<uchar *>(thd)))
        return true;

    for (ORDER *wo = w->first_order_by(); wo != nullptr; wo = wo->next)
      if ((*wo->item)->walk(&Item::check_column_privileges, enum_walk::PREFIX,
                            pointer_cast<uchar *>(thd)))
        return true;
  }
  for (ORDER *order = order_list.first; order; order = order->next) {
    if ((*order->item)
            ->walk(&Item::check_column_privileges, enum_walk::PREFIX,
                   pointer_cast<uchar *>(thd)))
      return true;
  }

  if (check_privileges_for_subqueries(thd)) return true;

  return false;
}

/**
  Check privileges for column references in a JOIN expression

  @param thd      thread handler
  @param tables   list of joined tables

  @returns false if success, true if error (insufficient privileges)
*/

bool check_privileges_for_join(THD *thd, mem_root_deque<Table_ref *> *tables) {
  thd->want_privilege = SELECT_ACL;

  for (Table_ref *table_ref : *tables) {
    if (table_ref->join_cond() != nullptr &&
        table_ref->join_cond()->walk(&Item::check_column_privileges,
                                     enum_walk::PREFIX,
                                     pointer_cast<uchar *>(thd)))
      return true;

    if (table_ref->nested_join != nullptr &&
        check_privileges_for_join(thd, &table_ref->nested_join->m_tables))
      return true;
  }

  return false;
}

/**
  Check privileges for column references in an item list

  @param thd      thread handler
  @param items    list of items
  @param privileges the required privileges

  @returns false if success, true if error (insufficient privileges)
*/
bool check_privileges_for_list(THD *thd, const mem_root_deque<Item *> &items,
                               Access_bitmask privileges) {
  thd->want_privilege = privileges;
  for (Item *item : items) {
    if (item->walk(&Item::check_column_privileges, enum_walk::PREFIX,
                   pointer_cast<uchar *>(thd)))
      return true;
  }
  return false;
}

/**
  Check privileges for column references in subqueries of a query block

  @param thd      thread handler

  @returns false if success, true if error (insufficient privileges)
*/

bool Query_block::check_privileges_for_subqueries(THD *thd) {
  for (Query_expression *query_expression = first_inner_query_expression();
       query_expression;
       query_expression = query_expression->next_query_expression()) {
    for (Query_block *sl = query_expression->first_query_block(); sl;
         sl = sl->next_query_block()) {
      if (sl->check_column_privileges(thd)) return true;
    }
  }
  return false;
}

/*****************************************************************************
  Go through all combinations of not marked tables and find the one
  which uses least records
*****************************************************************************/

/**
  Find how much space the previous read not const tables takes in cache.
*/

void calc_used_field_length(TABLE *table, bool needs_rowid,
                            uint *p_used_fieldlength) {
  uint null_fields, blobs, fields, rec_length;
  Field **f_ptr, *field;
  uint uneven_bit_fields;
  MY_BITMAP *read_set = table->read_set;

  uneven_bit_fields = null_fields = blobs = fields = rec_length = 0;
  for (f_ptr = table->field; (field = *f_ptr); f_ptr++) {
    if (bitmap_is_set(read_set, field->field_index())) {
      fields++;
      rec_length += field->pack_length();
      if (field->is_flag_set(BLOB_FLAG) || field->is_array()) blobs++;
      if (!field->is_flag_set(NOT_NULL_FLAG)) null_fields++;
      if (field->type() == MYSQL_TYPE_BIT && ((Field_bit *)field)->bit_len)
        uneven_bit_fields++;
    }
  }
  if (null_fields || uneven_bit_fields)
    rec_length += (table->s->null_fields + 7) / 8;
  if (table->is_nullable()) rec_length += sizeof(bool);
  if (blobs) {
    const uint blob_length = (uint)(table->file->stats.mean_rec_length -
                                    (table->s->reclength - rec_length));
    rec_length += max<uint>(4U, blob_length);
  }

  if (needs_rowid) {
    rec_length += table->file->ref_length;
    fields++;
  }

  *p_used_fieldlength = rec_length;
}

bool JOIN::init_ref_access() {
  DBUG_TRACE;
  ASSERT_BEST_REF_IN_JOIN_ORDER(this);

  for (uint tableno = const_tables; tableno < tables; tableno++) {
    JOIN_TAB *const tab = best_ref[tableno];

    if (tab->type() == JT_REF)  // Here JT_REF means all kinds of ref access
    {
      assert(tab->position() && tab->position()->key);
      if (create_ref_for_key(this, tab, tab->position()->key,
                             tab->prefix_tables()))
        return true;
    }
  }

  return false;
}

/**
  Set the first_sj_inner_tab and last_sj_inner_tab fields for all tables
  inside the semijoin nests of the query.
*/
void JOIN::set_semijoin_info() {
  ASSERT_BEST_REF_IN_JOIN_ORDER(this);
  if (query_block->sj_nests.empty()) return;

  for (uint tableno = const_tables; tableno < tables;) {
    JOIN_TAB *const tab = best_ref[tableno];
    const POSITION *const pos = tab->position();

    if (!pos) {
      tableno++;
      continue;
    }
    switch (pos->sj_strategy) {
      case SJ_OPT_NONE:
        tableno++;
        break;
      case SJ_OPT_MATERIALIZE_LOOKUP:
      case SJ_OPT_MATERIALIZE_SCAN:
      case SJ_OPT_LOOSE_SCAN:
      case SJ_OPT_DUPS_WEEDOUT:
      case SJ_OPT_FIRST_MATCH:
        /*
          Remember the first and last semijoin inner tables; this serves to tell
          a JOIN_TAB's semijoin strategy (like in setup_join_buffering()).
        */
        const plan_idx last_sj_tab = tableno + pos->n_sj_tables - 1;
        const plan_idx last_sj_inner = (pos->sj_strategy == SJ_OPT_DUPS_WEEDOUT)
                                           ?
                                           /* Range may end with non-inner table
                                              so cannot set last_sj_inner_tab */
                                           NO_PLAN_IDX
                                           : last_sj_tab;
        for (plan_idx tab_in_range = tableno; tab_in_range <= last_sj_tab;
             tab_in_range++) {
          best_ref[tab_in_range]->set_first_sj_inner(tableno);
          best_ref[tab_in_range]->set_last_sj_inner(last_sj_inner);
        }
        tableno += pos->n_sj_tables;
        break;
    }
  }
}

void calc_length_and_keyparts(Key_use *keyuse, JOIN_TAB *tab, const uint key,
                              table_map used_tables, Key_use **chosen_keyuses,
                              uint *length_out, uint *keyparts_out,
                              table_map *dep_map, bool *maybe_null) {
  assert(!dep_map || maybe_null);
  uint keyparts = 0, length = 0;
  uint found_part_ref_or_null = 0;
  KEY *const keyinfo = tab->table()->key_info + key;

  do {
    /*
      This Key_use is chosen if:
      - it involves a key part at the right place (if index is (a,b) we
      can have a search criterion on 'b' only if we also have a criterion
      on 'a'),
      - it references only tables earlier in the plan.
      Moreover, the execution layer is limited to maximum one ref_or_null
      keypart, as Index_lookup::null_ref_key is only one byte.
    */
    if (!(~used_tables & keyuse->used_tables) && keyparts == keyuse->keypart &&
        !(found_part_ref_or_null & keyuse->optimize)) {
      assert(keyparts <= MAX_REF_PARTS);
      if (chosen_keyuses) chosen_keyuses[keyparts] = keyuse;
      keyparts++;
      length += keyinfo->key_part[keyuse->keypart].store_length;
      found_part_ref_or_null |= keyuse->optimize;
      if (dep_map) {
        *dep_map |= keyuse->val->used_tables();
        *maybe_null |= keyinfo->key_part[keyuse->keypart].null_bit &&
                       (keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL);
      }
    }
    keyuse++;
  } while (keyuse->table_ref == tab->table_ref && keyuse->key == key);
  if (keyparts <= 0) {
    assert(false);
    my_error(ER_INTERNAL_ERROR, MYF(0),
             "Key not found");  // In debug build we assert, but in release we
                                // guard against a potential server exit with an
                                // error
    return;
  }
  *length_out = length;
  *keyparts_out = keyparts;
}

bool init_ref(THD *thd, unsigned keyparts, unsigned length, unsigned keyno,
              Index_lookup *ref) {
  ref->key_parts = keyparts;
  ref->key_length = length;
  ref->key = keyno;
  if (!(ref->key_buff = thd->mem_root->ArrayAlloc<uchar>(ALIGN_SIZE(length))) ||
      !(ref->key_buff2 =
            thd->mem_root->ArrayAlloc<uchar>(ALIGN_SIZE(length))) ||
      !(ref->key_copy = thd->mem_root->ArrayAlloc<store_key *>(keyparts)) ||
      !(ref->items = thd->mem_root->ArrayAlloc<Item *>(keyparts)) ||
      !(ref->cond_guards = thd->mem_root->ArrayAlloc<bool *>(keyparts))) {
    return true;
  }
  ref->key_err = true;
  ref->null_rejecting = 0;
  ref->use_count = 0;
  ref->disable_cache = false;
  return false;
}

bool init_ref_part(THD *thd, unsigned part_no, Item *val, bool *cond_guard,
                   bool null_rejecting, table_map const_tables,
                   table_map used_tables, bool nullable,
                   const KEY_PART_INFO *key_part_info, uchar *key_buff,
                   Index_lookup *ref) {
  ref->items[part_no] = val;  // Save for cond removal
  ref->cond_guards[part_no] = cond_guard;
  // Set ref as "null rejecting" only if either side is really nullable:
  if (null_rejecting && (nullable || val->is_nullable()))
    ref->null_rejecting |= (key_part_map)1 << part_no;

  store_key *s_key = get_store_key(thd, val, used_tables, const_tables,
                                   key_part_info, key_buff, nullable);
  if (unlikely(!s_key || thd->is_error())) return true;

  if (used_tables & ~INNER_TABLE_BIT) {
    /* Comparing against a non-constant. */
    ref->key_copy[part_no] = s_key;
  } else {
    /*
      The outer reference is to a const table, so we copy the value
      straight from that table now (during optimization), instead of from
      the temporary table created during execution.

      TODO: Synchronize with the temporary table creation code, so that
      there is no need to create a column for this value.
    */
    bool dummy_value = false;
    val->walk(&Item::repoint_const_outer_ref, enum_walk::PREFIX,
              pointer_cast<uchar *>(&dummy_value));
    /*
      key is const, copy value now and possibly skip it while ::exec().

      Note:
        Result check of store_key::copy() is unnecessary,
        it could be an error returned by store_key::copy() method
        but stored value is not null and default value could be used
        in this case. Methods which used for storing the value
        should be responsible for proper null value setting
        in case of an error. Thus it's enough to check s_key->null_key
        value only.
    */
    (void)s_key->copy();
    /*
      It should be reevaluated in ::exec() if
      constant evaluated to NULL value which we might need to
      handle as a special case during JOIN::exec()
      (As in : 'Full scan on NULL key')
    */
    if (s_key->null_key)
      ref->key_copy[part_no] = s_key;  // Reevaluate in JOIN::exec()
    else
      ref->key_copy[part_no] = nullptr;
  }
  return false;
}

/**
  Setup a ref access for looking up rows via an index (a key).

  @param join          The join object being handled
  @param j             The join_tab which will have the ref access populated
  @param org_keyuse    First key part of (possibly multi-part) key
  @param used_tables   Bitmap of available tables

  @return False if success, True if error

  Given a Key_use structure that specifies the fields that can be used
  for index access, this function creates and set up the structure
  used for index look up via one of the access methods {JT_FT,
  JT_CONST, JT_REF_OR_NULL, JT_REF, JT_EQ_REF} for the plan operator
  'j'. Generally the function sets up the structure j->ref (of type
  Index_lookup), and the access method j->type.

  @note We cannot setup fields used for ref access before we have sorted
        the items within multiple equalities according to the final order of
        the tables involved in the join operation. Currently, this occurs in
        @see substitute_for_best_equal_field().
        The exception is ref access for const tables, which are fixed
        before the greedy search planner is invoked.
*/

bool create_ref_for_key(JOIN *join, JOIN_TAB *j, Key_use *org_keyuse,
                        table_map used_tables) {
  DBUG_TRACE;

  const uint key = org_keyuse->key;
  const bool ftkey = (org_keyuse->keypart == FT_KEYPART);
  THD *const thd = join->thd;
  uint keyparts, length;
  TABLE *const table = j->table();
  KEY *const keyinfo = table->key_info + key;
  Key_use *chosen_keyuses[MAX_REF_PARTS];

  assert(j->keys().is_set(org_keyuse->key));

  /* Calculate the length of the used key. */
  if (ftkey) {
    Item_func_match *ifm = down_cast<Item_func_match *>(org_keyuse->val);

    length = 0;
    keyparts = 1;
    ifm->get_master()->score_from_index_scan = true;
  } else /* not ftkey */
    calc_length_and_keyparts(org_keyuse, j, key, used_tables, chosen_keyuses,
                             &length, &keyparts, nullptr, nullptr);
  if (thd->is_error()) {
    return true;
  }
  /* set up fieldref */
  if (init_ref(thd, keyparts, length, (int)key, &j->ref())) {
    return true;
  }

  uchar *key_buff = j->ref().key_buff;
  uchar *null_ref_key = nullptr;
  bool keyuse_uses_no_tables = true;
  bool null_rejecting_key = true;
  if (ftkey) {
    Key_use *keyuse = org_keyuse;
    j->ref().items[0] = ((Item_func *)(keyuse->val))->key_item();
    /* Predicates pushed down into subquery can't be used FT access */
    j->ref().cond_guards[0] = nullptr;
    // not supported yet. SerG
    assert(!(keyuse->used_tables & ~PSEUDO_TABLE_BITS));

    j->set_type(JT_FT);
    j->set_ft_func(down_cast<Item_func_match *>(keyuse->val));
    memset(j->ref().key_copy, 0, sizeof(j->ref().key_copy[0]) * keyparts);

    return false;
  }
  // Set up Index_lookup based on chosen Key_use-s.
  for (uint part_no = 0; part_no < keyparts; part_no++) {
    Key_use *keyuse = chosen_keyuses[part_no];
    const bool nullable = keyinfo->key_part[part_no].null_bit;

    if (keyuse->val->type() == Item::FIELD_ITEM) {
      // Look up the most appropriate field to base the ref access on.
      keyuse->val = get_best_field(down_cast<Item_field *>(keyuse->val),
                                   join->cond_equal);
      keyuse->used_tables = keyuse->val->used_tables();
    }

    if (init_ref_part(thd, part_no, keyuse->val, keyuse->cond_guard,
                      keyuse->null_rejecting, join->const_table_map,
                      keyuse->used_tables, nullable,
                      &keyinfo->key_part[part_no], key_buff, &j->ref())) {
      return true;
    }

    keyuse_uses_no_tables = keyuse_uses_no_tables && !keyuse->used_tables;

    /*
      Remember if we are going to use REF_OR_NULL
      But only if field _really_ can be null i.e. we force JT_REF
      instead of JT_REF_OR_NULL in case if field can't be null
    */
    if ((keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL) && nullable) {
      assert(null_ref_key == nullptr);  // or we would overwrite it below
      null_ref_key = key_buff;
    }
    /*
      The selected key will reject matches on NULL values if:
       - the key field is nullable, and
       - predicate rejects NULL values (keyuse->null_rejecting is true), or
       - JT_REF_OR_NULL is not effective.
    */
    if ((keyinfo->key_part[part_no].field->is_nullable() ||
         table->is_nullable()) &&
        (!keyuse->null_rejecting || null_ref_key != nullptr)) {
      null_rejecting_key = false;
    }
    key_buff += keyinfo->key_part[part_no].store_length;
  }
  assert(j->type() != JT_FT);
  if (j->type() == JT_CONST)
    j->table()->const_table = true;
  else if (((actual_key_flags(keyinfo) & HA_NOSAME) == 0) ||
           ((actual_key_flags(keyinfo) & HA_NULL_PART_KEY) &&
            !null_rejecting_key) ||
           keyparts != actual_key_parts(keyinfo)) {
    /* Must read with repeat */
    j->set_type(null_ref_key ? JT_REF_OR_NULL : JT_REF);
    j->ref().null_ref_key = null_ref_key;
  } else if (keyuse_uses_no_tables &&
             !(table->file->ha_table_flags() & HA_BLOCK_CONST_TABLE)) {
    /*
      This happen if we are using a constant expression in the ON part
      of an LEFT JOIN.
      SELECT * FROM a LEFT JOIN b ON b.key=30
      Here we should not mark the table as a 'const' as a field may
      have a 'normal' value or a NULL value.
    */
    j->set_type(JT_CONST);
    j->position()->rows_fetched = 1.0;
  } else {
    j->set_type(JT_EQ_REF);
    j->position()->rows_fetched = 1.0;
  }

  return thd->is_error();
}

namespace {

class store_key_const_item final : public store_key {
  int cached_result = -1;

 public:
  store_key_const_item(THD *thd, Field *to_field_arg, uchar *ptr,
                       uchar *null_ptr_arg, uint length, Item *item_arg)
      : store_key(thd, to_field_arg, ptr, null_ptr_arg, length, item_arg) {}
  const char *name() const override { return STORE_KEY_CONST_NAME; }

 protected:
  enum store_key_result copy_inner() override {
    if (cached_result == -1) {
      cached_result = store_key::copy_inner();
    }
    return static_cast<store_key_result>(cached_result);
  }
};

/*
  Class used for indexes over JSON expressions. The value to lookup is
  obtained from val_json() method and then converted according to field's
  result type and saved. This allows proper handling of temporal values.
*/
class store_key_json_item final : public store_key {
  /// Whether the key is constant.
  const bool m_const_key{false};
  /// Whether the key was already copied.
  bool m_inited{false};

 public:
  store_key_json_item(THD *thd, Field *to_field_arg, uchar *ptr,
                      uchar *null_ptr_arg, uint length, Item *item_arg,
                      bool const_key_arg)
      : store_key(thd, to_field_arg, ptr, null_ptr_arg, length, item_arg),
        m_const_key(const_key_arg) {}

  const char *name() const override {
    return m_const_key ? STORE_KEY_CONST_NAME : "func";
  }

 protected:
  enum store_key_result copy_inner() override;
};

}  // namespace

static store_key *get_store_key(THD *thd, Item *val, table_map used_tables,
                                table_map const_tables,
                                const KEY_PART_INFO *key_part, uchar *key_buff,
                                uint maybe_null) {
  if (key_part->field->is_array()) {
    return new (thd->mem_root)
        store_key_json_item(thd, key_part->field, key_buff + maybe_null,
                            maybe_null ? key_buff : nullptr, key_part->length,
                            val, (!((~const_tables) & used_tables)));
  }
  if (!((~const_tables) & used_tables))  // if const item
  {
    return new (thd->mem_root) store_key_const_item(
        thd, key_part->field, key_buff + maybe_null,
        maybe_null ? key_buff : nullptr, key_part->length, val);
  }
  return new (thd->mem_root)
      store_key(thd, key_part->field, key_buff + maybe_null,
                maybe_null ? key_buff : nullptr, key_part->length, val);
}

store_key::store_key(THD *thd, Field *field_arg, uchar *ptr, uchar *null,
                     uint length, Item *item_arg)
    : item(item_arg) {
  if (field_arg->type() == MYSQL_TYPE_BLOB ||
      field_arg->type() == MYSQL_TYPE_GEOMETRY) {
    /*
      Key segments are always packed with a 2 byte length prefix.
      See mi_rkey for details.
    */
    to_field = new (thd->mem_root) Field_varstring(
        ptr, length, 2, null, 1, Field::NONE, field_arg->field_name,
        field_arg->table->s, field_arg->charset());
    to_field->init(field_arg->table);
  } else
    to_field =
        field_arg->new_key_field(thd->mem_root, field_arg->table, ptr, null, 1);

  // If the item is nullable, but we cannot store null, make
  // to_field temporary nullable so that we can check in copy_inner()
  // if we end up with an illegal null value.
  if (!to_field->is_nullable() && item->is_nullable())
    to_field->set_tmp_nullable();
}

store_key::store_key_result store_key::copy() {
  enum store_key_result result;
  THD *thd = current_thd;
  const enum_check_fields saved_check_for_truncated_fields =
      thd->check_for_truncated_fields;
  const sql_mode_t sql_mode = thd->variables.sql_mode;
  thd->variables.sql_mode &= ~(MODE_NO_ZERO_IN_DATE | MODE_NO_ZERO_DATE);

  thd->check_for_truncated_fields = CHECK_FIELD_IGNORE;

  result = copy_inner();

  thd->check_for_truncated_fields = saved_check_for_truncated_fields;
  thd->variables.sql_mode = sql_mode;

  return result;
}

enum store_key::store_key_result store_key_hash_item::copy_inner() {
  const enum store_key_result res = store_key::copy_inner();
  if (res != STORE_KEY_FATAL) {
    // Convert to and from little endian, since that is what gets
    // stored in the hash field we are lookup up against.
    ulonglong h = uint8korr(pointer_cast<char *>(hash));
    h = calc_field_hash(to_field, &h);
    int8store(pointer_cast<char *>(hash), h);
  }
  return res;
}

namespace {

enum store_key::store_key_result store_key_json_item::copy_inner() {
  THD *thd = current_thd;
  TABLE *table = to_field->table;
  // Temporarily mark all table's fields writable to avoid assert.
  my_bitmap_map *old_map = dbug_tmp_use_all_columns(table, table->write_set);
  if (!m_inited) {
    Json_wrapper wr;
    String str_val, buf;

    const Functional_index_error_handler functional_index_error_handler(
        to_field, thd);
    // Get JSON value and store its value as the key. MEMBER OF is the only
    // function that can use this function
    if (get_json_atom_wrapper(&item, 0, "MEMBER OF", &str_val, &buf, &wr,
                              nullptr, true) ||
        save_json_to_field(thd, to_field, &wr, false))
      return STORE_KEY_FATAL;
    // Copy constant key only once
    if (m_const_key) m_inited = true;
  }

  dbug_tmp_restore_column_map(table->write_set, old_map);
  null_key = to_field->is_null() || item->null_value;
  assert(!thd->is_error());
  return STORE_KEY_OK;
}

}  // namespace

static store_key::store_key_result type_conversion_status_to_store_key(
    THD *thd, type_conversion_status ts) {
  switch (ts) {
    case TYPE_OK:
      return store_key::STORE_KEY_OK;
    case TYPE_NOTE_TRUNCATED:
    case TYPE_WARN_TRUNCATED:
    case TYPE_NOTE_TIME_TRUNCATED:
      if (thd->check_for_truncated_fields)
        return store_key::STORE_KEY_CONV;
      else
        return store_key::STORE_KEY_OK;
    case TYPE_WARN_OUT_OF_RANGE:
    case TYPE_WARN_INVALID_STRING:
    case TYPE_ERR_NULL_CONSTRAINT_VIOLATION:
    case TYPE_ERR_BAD_VALUE:
    case TYPE_ERR_OOM:
      return store_key::STORE_KEY_FATAL;
  }

  assert(false);  // not possible
  return store_key::STORE_KEY_FATAL;
}

enum store_key::store_key_result store_key::copy_inner() {
  THD *thd = current_thd;
  TABLE *table = to_field->table;
  my_bitmap_map *old_map = dbug_tmp_use_all_columns(table, table->write_set);
  const type_conversion_status save_res = item->save_in_field(to_field, true);
  store_key_result res;
  /*
    Item::save_in_field() may call Item::val_xxx(). And if this is a subquery
    we need to check for errors executing it and react accordingly.
  */
  if (save_res != TYPE_OK && thd->is_error())
    res = STORE_KEY_FATAL;
  else
    res = type_conversion_status_to_store_key(thd, save_res);
  dbug_tmp_restore_column_map(table->write_set, old_map);
  null_key = to_field->is_null() || item->null_value;
  return to_field->is_tmp_null() ? STORE_KEY_FATAL : res;
}

/**
  Extend e1 by AND'ing e2 to the condition e1 points to. The resulting
  condition is fixed. Requirement: the input Items must already have
  been fixed. This is a variant of and_items(); it is intended for use in
  the optimizer phase.

  @param[in,out]   e1 Pointer to condition that will be extended with e2
  @param           e2 Condition that will extend e1

  @retval true   if there was a memory allocation error, in which case
                 e1 remains unchanged
  @retval false  otherwise
*/

bool and_conditions(Item **e1, Item *e2) {
  assert(!(*e1) || (*e1)->fixed);
  assert(!e2 || e2->fixed);
  if (*e1) {
    if (!e2) return false;
    Item *res = new Item_cond_and(*e1, e2);
    if (unlikely(!res)) return true;

    *e1 = res;
    res->quick_fix_field();
    res->update_used_tables();

  } else
    *e1 = e2;
  return false;
}

/*
  Get a part of the condition that can be checked using only index fields

  SYNOPSIS
    make_cond_for_index()
      cond           The source condition
      table          The table that is partially available
      keyno          The index in the above table. Only fields covered by the
  index are available other_tbls_ok  true <=> Fields of other non-const tables
  are allowed

  DESCRIPTION
    Get a part of the condition that can be checked when for the given table
    we have values only of fields covered by some index. The condition may
    refer to other tables, it is assumed that we have values of all of their
    fields.

    Example:
      make_cond_for_index(
         "cond(t1.field) AND cond(t2.key1) AND cond(t2.non_key) AND
  cond(t2.key2)", t2, keyno(t2.key1)) will return "cond(t1.field) AND
  cond(t2.key2)"

  RETURN
    Index condition, or NULL if no condition could be inferred.
*/

static Item *make_cond_for_index(Item *cond, TABLE *table, uint keyno,
                                 bool other_tbls_ok) {
  assert(cond != nullptr);

  if (cond->type() == Item::COND_ITEM) {
    uint n_marked = 0;
    if (((Item_cond *)cond)->functype() == Item_func::COND_AND_FUNC) {
      table_map used_tables = 0;
      Item_cond_and *new_cond = new Item_cond_and;
      if (!new_cond) return nullptr;
      List_iterator<Item> li(*((Item_cond *)cond)->argument_list());
      Item *item;
      while ((item = li++)) {
        Item *fix = make_cond_for_index(item, table, keyno, other_tbls_ok);
        if (fix) {
          new_cond->argument_list()->push_back(fix);
          used_tables |= fix->used_tables();
        }
        n_marked += (item->marker == Item::MARKER_ICP_COND_USES_INDEX_ONLY);
      }
      if (n_marked == ((Item_cond *)cond)->argument_list()->elements)
        cond->marker = Item::MARKER_ICP_COND_USES_INDEX_ONLY;
      switch (new_cond->argument_list()->elements) {
        case 0:
          return nullptr;
        case 1:
          new_cond->set_used_tables(used_tables);
          return new_cond->argument_list()->head();
        default:
          new_cond->quick_fix_field();
          new_cond->set_used_tables(used_tables);
          return new_cond;
      }
    } else /* It's OR */
    {
      Item_cond_or *new_cond = new Item_cond_or;
      if (!new_cond) return nullptr;
      List_iterator<Item> li(*((Item_cond *)cond)->argument_list());
      Item *item;
      while ((item = li++)) {
        Item *fix = make_cond_for_index(item, table, keyno, other_tbls_ok);
        if (!fix) return nullptr;
        new_cond->argument_list()->push_back(fix);
        n_marked += (item->marker == Item::MARKER_ICP_COND_USES_INDEX_ONLY);
      }
      if (n_marked == ((Item_cond *)cond)->argument_list()->elements)
        cond->marker = Item::MARKER_ICP_COND_USES_INDEX_ONLY;
      new_cond->quick_fix_field();
      new_cond->set_used_tables(cond->used_tables());
      new_cond->apply_is_true();
      return new_cond;
    }
  }

  if (!uses_index_fields_only(cond, table, keyno, other_tbls_ok)) {
    /*
      Reset marker since it might have the value
      MARKER_ICP_COND_USES_INDEX_ONLY if this condition is part of the select
      condition for multiple tables.
    */
    cond->marker = Item::MARKER_NONE;
    return nullptr;
  }
  cond->marker = Item::MARKER_ICP_COND_USES_INDEX_ONLY;
  return cond;
}

static Item *make_cond_remainder(Item *cond, bool exclude_index) {
  if (exclude_index && cond->marker == Item::MARKER_ICP_COND_USES_INDEX_ONLY)
    return nullptr; /* Already checked */

  if (cond->type() == Item::COND_ITEM) {
    table_map tbl_map = 0;
    if (((Item_cond *)cond)->functype() == Item_func::COND_AND_FUNC) {
      /* Create new top level AND item */
      Item_cond_and *new_cond = new Item_cond_and;
      if (!new_cond) return (Item *)nullptr;
      List_iterator<Item> li(*((Item_cond *)cond)->argument_list());
      Item *item;
      while ((item = li++)) {
        Item *fix = make_cond_remainder(item, exclude_index);
        if (fix) {
          new_cond->argument_list()->push_back(fix);
          tbl_map |= fix->used_tables();
        }
      }
      switch (new_cond->argument_list()->elements) {
        case 0:
          return (Item *)nullptr;
        case 1:
          return new_cond->argument_list()->head();
        default:
          new_cond->quick_fix_field();
          new_cond->set_used_tables(tbl_map);
          return new_cond;
      }
    } else /* It's OR */
    {
      Item_cond_or *new_cond = new Item_cond_or;
      if (!new_cond) return (Item *)nullptr;
      List_iterator<Item> li(*((Item_cond *)cond)->argument_list());
      Item *item;
      while ((item = li++)) {
        Item *fix = make_cond_remainder(item, false);
        if (!fix) return (Item *)nullptr;
        new_cond->argument_list()->push_back(fix);
        tbl_map |= fix->used_tables();
      }
      new_cond->quick_fix_field();
      new_cond->set_used_tables(tbl_map);
      new_cond->apply_is_true();
      return new_cond;
    }
  }
  return cond;
}

/**
  Try to extract and push the index condition down to table handler

  @param  join_tab       join_tab for table
  @param  keyno          Index for which extract and push the condition
  @param  trace_obj      trace object where information is to be added
*/
void QEP_TAB::push_index_cond(const JOIN_TAB *join_tab, uint keyno,
                              Opt_trace_object *trace_obj) {
  JOIN *const join_ = join();
  DBUG_TRACE;

  ASSERT_BEST_REF_IN_JOIN_ORDER(join_);
  assert(join_tab == join_->best_ref[idx()]);

  if (join_tab->reversed_access)  // @todo: historical limitation, lift it!
    return;

  TABLE *const tbl = table();

  // Disable ICP for Innodb intrinsic temp table because of performance
  if (tbl->s->db_type() == innodb_hton && tbl->s->tmp_table != NO_TMP_TABLE &&
      tbl->s->tmp_table != TRANSACTIONAL_TMP_TABLE)
    return;

  // TODO: Currently, index on virtual generated column doesn't support ICP
  if (tbl->vfield && tbl->index_contains_some_virtual_gcol(keyno)) return;

  /*
    Fields of other non-const tables aren't allowed in following cases:
       type is:
        (JT_ALL | JT_INDEX_SCAN | JT_RANGE | JT_INDEX_MERGE)
       and BNL is used.
    and allowed otherwise.
  */
  const bool other_tbls_ok =
      !((type() == JT_ALL || type() == JT_INDEX_SCAN || type() == JT_RANGE ||
         type() == JT_INDEX_MERGE) &&
        join_tab->use_join_cache() == JOIN_CACHE::ALG_BNL);

  /*
    We will only attempt to push down an index condition when the
    following criteria are true:
    0. The table has a select condition
    1. The storage engine supports ICP.
    2. The index_condition_pushdown switch is on and
       the use of ICP is not disabled by the NO_ICP hint.
    3. The query is not a multi-table update or delete statement. The reason
       for this requirement is that the same handler will be used
       both for doing the select/join and the update. The pushed index
       condition might then also be applied by the storage engine
       when doing the update part and result in either not finding
       the record to update or updating the wrong record.
    4. The JOIN_TAB is not part of a subquery that has guarded conditions
       that can be turned on or off during execution of a 'Full scan on NULL
       key'.
       @see Item_in_optimizer::val_int()
       @see subselect_iterator_engine::exec()
       @see Index_lookup::cond_guards
       @see setup_join_buffering
    5. The join type is not CONST or SYSTEM. The reason for excluding
       these join types, is that these are optimized to only read the
       record once from the storage engine and later re-use it. In a
       join where a pushed index condition evaluates fields from
       tables earlier in the join sequence, the pushed condition would
       only be evaluated the first time the record value was needed.
    6. The index is not a clustered index. The performance improvement
       of pushing an index condition on a clustered key is much lower
       than on a non-clustered key. This restriction should be
       re-evaluated when WL#6061 is implemented.
    7. The index on virtual generated columns is not supported for ICP.
  */
  if (condition() &&
      tbl->file->index_flags(keyno, 0, true) & HA_DO_INDEX_COND_PUSHDOWN &&
      hint_key_state(join_->thd, table_ref, keyno, ICP_HINT_ENUM,
                     OPTIMIZER_SWITCH_INDEX_CONDITION_PUSHDOWN) &&
      join_->thd->lex->sql_command != SQLCOM_UPDATE_MULTI &&
      join_->thd->lex->sql_command != SQLCOM_DELETE_MULTI &&
      !has_guarded_conds() && type() != JT_CONST && type() != JT_SYSTEM &&
      !(keyno == tbl->s->primary_key &&
        tbl->file->primary_key_is_clustered())) {
    DBUG_EXECUTE("where", print_where(join_->thd, condition(), "full cond",
                                      QT_ORDINARY););
    Item *idx_cond =
        make_cond_for_index(condition(), tbl, keyno, other_tbls_ok);
    DBUG_EXECUTE("where",
                 print_where(join_->thd, idx_cond, "idx cond", QT_ORDINARY););
    if (idx_cond) {
      /*
        Check that the condition to push actually contains fields from
        the index. Without any fields from the index it is unlikely
        that it will filter out any records since the conditions on
        fields from other tables in most cases have already been
        evaluated.
      */
      idx_cond->update_used_tables();
      if ((idx_cond->used_tables() & table_ref->map()) == 0) {
        /*
          The following assert is to check that we only skip pushing the
          index condition for the following situations:
          1. We actually are allowed to generate an index condition on another
             table.
          2. The index condition is a constant item.
          3. The index condition contains an updatable user variable
             (test this by checking that the RAND_TABLE_BIT is set).
        */
        assert(other_tbls_ok ||                    // 1
               idx_cond->const_item() ||           // 2
               idx_cond->is_non_deterministic());  // 3
        return;
      }

      Item *idx_remainder_cond = nullptr;

      /*
        For BKA cache, we don't store the condition, because evaluation of the
        condition would require additional operations before the evaluation.
      */
      if (join_tab->use_join_cache() &&
          /*
            if cache is used then the value is true only
            for BKA cache (see setup_join_buffering() func).
            In this case other_tbls_ok is an equivalent of
            cache->is_key_access().
          */
          other_tbls_ok &&
          (idx_cond->used_tables() &
           ~(table_ref->map() | join_->const_table_map))) {
        idx_remainder_cond = idx_cond;
        trace_obj->add("not_pushed_due_to_BKA", true);
      } else {
        idx_remainder_cond = tbl->file->idx_cond_push(keyno, idx_cond);
        DBUG_EXECUTE("where",
                     print_where(join_->thd, tbl->file->pushed_idx_cond,
                                 "icp cond", QT_ORDINARY););
      }
      /*
        Disable eq_ref's "lookup cache" if we've pushed down an index
        condition.
        TODO: This check happens to work on current ICP implementations, but
        there may exist a compliant implementation that will not work
        correctly with it. Sort this out when we stabilize the condition
        pushdown APIs.
      */
      if (idx_remainder_cond != idx_cond) {
        ref().disable_cache = true;
        trace_obj->add("pushed_index_condition", idx_cond);
      }

      Item *row_cond = make_cond_remainder(condition(), true);
      DBUG_EXECUTE("where", print_where(join_->thd, row_cond, "remainder cond",
                                        QT_ORDINARY););

      if (row_cond) {
        and_conditions(&row_cond, idx_remainder_cond);
        idx_remainder_cond = row_cond;
      }
      set_condition(idx_remainder_cond);
      trace_obj->add("table_condition_attached", idx_remainder_cond);
    }
  }
}

/**
  Setup the materialized table for a semi-join nest

  @param tab       join_tab for the materialized semi-join table
  @param tableno   table number of materialized table
  @param inner_pos information about the first inner table of the subquery
  @param sjm_pos   information about the materialized semi-join table,
                   to be filled with data.

  @details
    Setup execution structures for one semi-join materialization nest:
    - Create the materialization temporary table, including Table_ref
  object.
    - Create a list of Item_field objects per column in the temporary table.
    - Create a keyuse array describing index lookups into the table
      (for MaterializeLookup)

  @return False if OK, True if error
*/

bool JOIN::setup_semijoin_materialized_table(JOIN_TAB *tab, uint tableno,
                                             POSITION *inner_pos,
                                             POSITION *sjm_pos) {
  DBUG_TRACE;
  Table_ref *const emb_sj_nest = inner_pos->table->emb_sj_nest;
  Semijoin_mat_optimize *const sjm_opt = &emb_sj_nest->nested_join->sjm;
  Semijoin_mat_exec *const sjm_exec = tab->sj_mat_exec();
  const uint field_count = emb_sj_nest->nested_join->sj_inner_exprs.size();

  assert(field_count > 0);
  assert(inner_pos->sj_strategy == SJ_OPT_MATERIALIZE_LOOKUP ||
         inner_pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN);

  /*
    Set up the table to write to, do as
    Query_result_union::create_result_table does
  */
  sjm_exec->table_param = Temp_table_param();
  count_field_types(query_block, &sjm_exec->table_param,
                    emb_sj_nest->nested_join->sj_inner_exprs, false, true);
  sjm_exec->table_param.bit_fields_as_long = true;

  char buffer[NAME_LEN];
  const size_t len = snprintf(buffer, sizeof(buffer) - 1, "<subquery%u>",
                              emb_sj_nest->nested_join->query_block_id);
  char *name = (char *)thd->mem_root->Alloc(len + 1);
  if (name == nullptr) return true; /* purecov: inspected */

  memcpy(name, buffer, len);
  name[len] = '\0';
  TABLE *table;
  if (!(table =
            create_tmp_table(thd, &sjm_exec->table_param,
                             emb_sj_nest->nested_join->sj_inner_exprs, nullptr,
                             true /* distinct */, true /* save_sum_fields */,
                             thd->variables.option_bits | TMP_TABLE_ALL_COLUMNS,
                             HA_POS_ERROR /* rows_limit */, name)))
    return true; /* purecov: inspected */
  sjm_exec->table = table;
  map2table[tableno] = tab;
  table->file->ha_extra(HA_EXTRA_IGNORE_DUP_KEY);
  sj_tmp_tables.push_back(table);
  sjm_exec_list.push_back(sjm_exec);

  /*
    Hash_field is not applicable for MATERIALIZE_LOOKUP. If hash_field is
    created for temporary table, semijoin_types_allow_materialization must
    assure that MATERIALIZE_LOOKUP can't be chosen.
  */
  assert((inner_pos->sj_strategy == SJ_OPT_MATERIALIZE_LOOKUP &&
          !table->hash_field) ||
         inner_pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN);

  auto tl = new (thd->mem_root) Table_ref("", name, TL_IGNORE);
  if (tl == nullptr) return true; /* purecov: inspected */
  tl->table = table;

  /*
    If the SJ nest is inside an outer join nest, this tmp table belongs to
    it. It's important for attachment of the semi-join ON condition with the
    proper guards, to this table. If it's an AJ nest it's an outer join
    nest too.
  */
  if (emb_sj_nest->is_aj_nest())
    tl->embedding = emb_sj_nest;
  else
    tl->embedding = emb_sj_nest->outer_join_nest();
  /*
    Above, we do not set tl->emb_sj_nest, neither first_sj_inner nor
    last_sj_inner; it's because there's no use to say that this table is part
    of the SJ nest; but it's necessary to say that it's part of any outer join
    nest. The antijoin nest is an outer join nest, but from the POV of the
    sj-tmp table it's only an outer join nest, so there is no need to set
    emb_sj_nest even in this case.
  */

  // Table is "nullable" if inner table of an outer_join
  if (tl->is_inner_table_of_outer_join()) table->set_nullable();

  tl->set_tableno(tableno);

  table->pos_in_table_list = tl;
  table->pos_in_table_list->query_block = query_block;

  if (!(sjm_opt->mat_fields = (Item_field **)thd->mem_root->Alloc(
            field_count * sizeof(Item_field **))))
    return true;

  for (uint fieldno = 0; fieldno < field_count; fieldno++) {
    if (!(sjm_opt->mat_fields[fieldno] =
              new Item_field(table->visible_field_ptr()[fieldno])))
      return true;
  }

  tab->table_ref = tl;
  tab->set_table(table);
  tab->set_position(sjm_pos);

  tab->worst_seeks = 1.0;
  tab->set_records((ha_rows)emb_sj_nest->nested_join->sjm.expected_rowcount);

  tab->found_records = tab->records();
  tab->read_time = emb_sj_nest->nested_join->sjm.scan_cost.total_cost();

  tab->init_join_cond_ref(tl);

  table->keys_in_use_for_query.set_all();
  sjm_pos->table = tab;
  sjm_pos->sj_strategy = SJ_OPT_NONE;

  sjm_pos->use_join_buffer = false;
  /*
    No need to recalculate filter_effect since there are no post-read
    conditions for materialized tables.
  */
  sjm_pos->filter_effect = 1.0;

  /*
    Key_use objects are required so that create_ref_for_key() can set up
    a proper ref access for this table.
  */
  Key_use_array *keyuse =
      create_keyuse_for_table(thd, field_count, sjm_opt->mat_fields,
                              emb_sj_nest->nested_join->sj_outer_exprs);
  if (!keyuse) return true;

  const double fanout =
      ((uint)tab->idx() == const_tables)
          ? 1.0
          : best_ref[tab->idx() - 1]->position()->prefix_rowcount;
  if (!sjm_exec->is_scan) {
    sjm_pos->key = keyuse->begin();  // MaterializeLookup will use the index
    sjm_pos->read_cost =
        emb_sj_nest->nested_join->sjm.lookup_cost.total_cost() * fanout;
    tab->set_keyuse(keyuse->begin());
    tab->keys().set_bit(0);  // There is one index - use it always
    tab->set_index(0);
    sjm_pos->rows_fetched = 1.0;
    tab->set_type(JT_REF);
  } else {
    sjm_pos->key = nullptr;  // No index use for MaterializeScan
    sjm_pos->read_cost = tab->read_time * fanout;
    sjm_pos->rows_fetched = static_cast<double>(tab->records());
    tab->set_type(JT_ALL);
  }
  sjm_pos->set_prefix_join_cost((tab - join_tab), cost_model());

  return false;
}

/**
  A helper function that sets the right op type for join cache (BNL/BKA).
*/

void QEP_TAB::init_join_cache(JOIN_TAB *join_tab) {
  assert(idx() > 0);
  ASSERT_BEST_REF_IN_JOIN_ORDER(join());
  assert(join_tab == join()->best_ref[idx()]);

  switch (join_tab->use_join_cache()) {
    case JOIN_CACHE::ALG_BNL:
      op_type = QEP_TAB::OT_BNL;
      break;
    case JOIN_CACHE::ALG_BKA:
      op_type = QEP_TAB::OT_BKA;
      break;
    default:
      assert(0);
  }
}

/**
  Plan refinement stage: do various setup things for the executor

  @param join          Join being processed
  @param no_jbuf_after Don't use join buffering after table with this number.

  @return false if successful, true if error (Out of memory)

  @details
    Plan refinement stage: do various set ups for the executioner
      - setup join buffering use
      - push index conditions
      - increment relevant counters
      - etc
*/

bool make_join_readinfo(JOIN *join, uint no_jbuf_after) {
  const bool statistics = !join->thd->lex->is_explain();
  const bool prep_for_pos = join->need_tmp_before_win ||
                            join->select_distinct ||
                            !join->group_list.empty() || !join->order.empty() ||
                            join->m_windows.elements > 0;

  DBUG_TRACE;
  ASSERT_BEST_REF_IN_JOIN_ORDER(join);

  Opt_trace_context *const trace = &join->thd->opt_trace;
  const Opt_trace_object wrapper(trace);
  const Opt_trace_array trace_refine_plan(trace, "refine_plan");

  if (setup_semijoin_dups_elimination(join, no_jbuf_after))
    return true; /* purecov: inspected */

  for (uint i = join->const_tables; i < join->tables; i++) {
    QEP_TAB *const qep_tab = &join->qep_tab[i];
    if (!qep_tab->position()) continue;

    JOIN_TAB *const tab = join->best_ref[i];
    TABLE *const table = qep_tab->table();
    Table_ref *const table_ref = qep_tab->table_ref;
    /*
     Need to tell handlers that to play it safe, it should fetch all
     columns of the primary key of the tables: this is because MySQL may
     build row pointers for the rows, and for all columns of the primary key
     the read set has not necessarily been set by the server code.
    */
    if (prep_for_pos) table->prepare_for_position();

    Opt_trace_object trace_refine_table(trace);
    trace_refine_table.add_utf8_table(table_ref);

    if (tab->use_join_cache() != JOIN_CACHE::ALG_NONE)
      qep_tab->init_join_cache(tab);

    switch (qep_tab->type()) {
      case JT_EQ_REF:
      case JT_REF_OR_NULL:
      case JT_REF:
      case JT_SYSTEM:
      case JT_CONST:
        if (table->covering_keys.is_set(qep_tab->ref().key) &&
            !table->no_keyread)
          table->set_keyread(true);
        else
          qep_tab->push_index_cond(tab, qep_tab->ref().key,
                                   &trace_refine_table);
        break;
      case JT_ALL:
        join->thd->set_status_no_index_used();
        qep_tab->using_dynamic_range = (tab->use_quick == QS_DYNAMIC_RANGE);
        [[fallthrough]];
      case JT_INDEX_SCAN:
        if (tab->position()->filter_effect != COND_FILTER_STALE_NO_CONST &&
            !tab->sj_mat_exec()) {
          /*
            rows_w_const_cond is # of rows which will be read by the access
            method, minus those which will not pass the constant condition;
            that's how calculate_scan_cost() works. Such number is useful inside
            the planner, but obscure to the reader of EXPLAIN; so we put the
            real count of read rows into rows_fetched, and move the constant
            condition's filter to filter_effect.
          */
          const double rows_w_const_cond = qep_tab->position()->rows_fetched;
          table_ref->fetch_number_of_rows();
          tab->position()->rows_fetched =
              static_cast<double>(table->file->stats.records);
          if (tab->position()->filter_effect != COND_FILTER_STALE) {
            // Constant condition moves to filter_effect:
            if (tab->position()->rows_fetched == 0)  // avoid division by zero
              tab->position()->filter_effect = 0.0f;
            else
              tab->position()->filter_effect *= static_cast<float>(
                  rows_w_const_cond / tab->position()->rows_fetched);
          }
        }
        if (qep_tab->using_dynamic_range) {
          join->thd->set_status_no_good_index_used();
          if (statistics) join->thd->inc_status_select_range_check();
        } else {
          if (statistics) {
            if (i == join->const_tables)
              join->thd->inc_status_select_scan();
            else
              join->thd->inc_status_select_full_join();
          }
        }
        break;
      case JT_RANGE:
      case JT_INDEX_MERGE:
        qep_tab->using_dynamic_range = (tab->use_quick == QS_DYNAMIC_RANGE);
        if (statistics) {
          if (i == join->const_tables)
            join->thd->inc_status_select_range();
          else
            join->thd->inc_status_select_full_range_join();
        }
        if (!table->no_keyread && qep_tab->type() == JT_RANGE) {
          if (table->covering_keys.is_set(used_index(qep_tab->range_scan()))) {
            assert(used_index(qep_tab->range_scan()) != MAX_KEY);
            table->set_keyread(true);
          }
          if (!table->key_read)
            qep_tab->push_index_cond(tab, used_index(qep_tab->range_scan()),
                                     &trace_refine_table);
        }
        if (tab->position()->filter_effect != COND_FILTER_STALE_NO_CONST) {
          const double rows_w_const_cond = qep_tab->position()->rows_fetched;
          qep_tab->position()->rows_fetched =
              tab->range_scan()->num_output_rows();
          if (tab->position()->filter_effect != COND_FILTER_STALE) {
            // Constant condition moves to filter_effect:
            if (tab->position()->rows_fetched == 0)  // avoid division by zero
              tab->position()->filter_effect = 0.0f;
            else
              tab->position()->filter_effect *= static_cast<float>(
                  rows_w_const_cond / tab->position()->rows_fetched);
          }
        }
        break;
      case JT_FT:
        if (tab->join()->fts_index_access(tab)) {
          table->set_keyread(true);
          table->covering_keys.set_bit(tab->ft_func()->key);
        }
        break;
      default:
        DBUG_PRINT("error", ("Table type %d found",
                             qep_tab->type())); /* purecov: deadcode */
        assert(0);
        break; /* purecov: deadcode */
    }

    if (tab->position()->filter_effect <= COND_FILTER_STALE) {
      /*
        Cost and rows produced needs to be updated to match the logic
        in test_if_skip_sort_order().
      */
      bool need_cost_update =
          join->primary_tables == 1 &&
          tab->position()->filter_effect == COND_FILTER_STALE_NO_CONST &&
          table->s->has_secondary_engine();
      /*
        Give a proper value for EXPLAIN.
        For performance reasons, we do not recalculate the filter for
        non-EXPLAIN queries; thus, EXPLAIN CONNECTION may show 100%
        for a query.

        Also calculate the proper value if max_join_size is in effect and there
        is a limit, since it's needed in order to calculate how many rows to
        read from the base table if rows are filtered before the limit is
        applied.
      */
      tab->position()->filter_effect =
          (join->thd->lex->is_explain() || need_cost_update ||
           (join->m_select_limit != HA_POS_ERROR &&
            !Overlaps(join->thd->variables.option_bits, OPTION_BIG_SELECTS)))
              ? calculate_condition_filter(
                    tab,
                    (tab->ref().key != -1) ? tab->position()->key : nullptr,
                    tab->prefix_tables() & ~table_ref->map(),
                    tab->position()->rows_fetched, false, false,
                    trace_refine_table)
              : COND_FILTER_ALLPASS;
      /*
        Update the cost/rows data accordingly for single table queries. Updating
        Multi-table queries here can lead to inconsistencies.
      */
      if (need_cost_update)
        tab->position()->set_prefix_join_cost(tab->idx(), join->cost_model());
    }

    assert(!table_ref->is_recursive_reference() || qep_tab->type() == JT_ALL);

    qep_tab->set_reversed_access(tab->reversed_access);

    // Materialize derived tables prior to accessing them.
    if (table_ref->is_table_function()) {
      qep_tab->materialize_table = QEP_TAB::MATERIALIZE_TABLE_FUNCTION;
      if (tab->dependent) qep_tab->rematerialize = true;
    } else if (table_ref->uses_materialization()) {
      qep_tab->materialize_table = QEP_TAB::MATERIALIZE_DERIVED;
    }

    if (qep_tab->sj_mat_exec())
      qep_tab->materialize_table = QEP_TAB::MATERIALIZE_SEMIJOIN;

    if (table_ref->is_derived() &&
        table_ref->derived_query_expression()->m_lateral_deps) {
      auto deps = table_ref->derived_query_expression()->m_lateral_deps;
      plan_idx last = NO_PLAN_IDX;
      for (JOIN_TAB **tab2 = join->map2table; deps; tab2++, deps >>= 1) {
        if (deps & 1) last = std::max(last, (*tab2)->idx());
      }
      /*
        We identified the last dependency of table_ref in the plan, and it's
        the table whose reading must trigger rematerialization of table_ref.
      */
      if (last != NO_PLAN_IDX) {
        QEP_TAB &t = join->qep_tab[last];
        t.lateral_derived_tables_depend_on_me |= TableBitmap(i);
        trace_refine_table.add_utf8("rematerialized_for_each_row_of",
                                    t.table()->alias);
      }
    }
  }

  return false;
}

void JOIN_TAB::set_table(TABLE *t) {
  if (t != nullptr) t->reginfo.join_tab = this;
  m_qs->set_table(t);
}

void JOIN_TAB::init_join_cond_ref(Table_ref *tl) {
  m_join_cond_ref = tl->join_cond_optim_ref();
}

/**
  Cleanup table of join operation.
*/

void JOIN_TAB::cleanup() {
  // Delete parts specific of JOIN_TAB:

  if (table()) table()->reginfo.join_tab = nullptr;

  // Delete shared parts:
  if (join()->qep_tab) {
    // deletion will be done by QEP_TAB
  } else
    qs_cleanup();
}

void QEP_TAB::cleanup() {
  // Delete parts specific of QEP_TAB:
  if (filesort != nullptr) ::destroy_at(filesort);
  filesort = nullptr;

  TABLE *const t = table();

  if (t != nullptr) {
    t->reginfo.qep_tab = nullptr;
    t->const_table = false;  // Note: Also done in TABLE::init()
  }

  // Delete shared parts:
  qs_cleanup();

  // Order of qs_cleanup() and this, matters:
  if (op_type == QEP_TAB::OT_MATERIALIZE ||
      op_type == QEP_TAB::OT_AGGREGATE_THEN_MATERIALIZE ||
      op_type == QEP_TAB::OT_AGGREGATE_INTO_TMP_TABLE ||
      op_type == QEP_TAB::OT_WINDOWING_FUNCTION) {
    if (t != nullptr)  // Check tmp table is not yet freed.
    {
      close_tmp_table(t);
      free_tmp_table(t);
    }
    ::destroy_at(tmp_table_param);
    tmp_table_param = nullptr;
  }
  if (table_ref != nullptr && table_ref->uses_materialization()) {
    assert(t == table_ref->table);
    t->merge_keys.clear_all();
    t->quick_keys.clear_all();
    t->covering_keys.clear_all();
    t->possible_quick_keys.clear_all();

    close_tmp_table(t);
  }
}

void QEP_shared_owner::qs_cleanup() {
  /* Skip non-existing derived tables/views result tables */
  if (table() &&
      (table()->s->tmp_table != INTERNAL_TMP_TABLE || table()->is_created())) {
    table()->set_keyread(false);
    table()->file->ha_index_or_rnd_end();
    free_io_cache(table());
    filesort_free_buffers(table(), true);
    Table_ref *const table_ref = table()->pos_in_table_list;
    if (table_ref) {
      table_ref->derived_keys_ready = false;
      table_ref->derived_key_list.clear();
    }
  }
  if (range_scan() != nullptr) ::destroy_at(range_scan());
}

uint QEP_TAB::sjm_query_block_id() const {
  assert(sj_is_materialize_strategy(get_sj_strategy()));
  for (uint i = 0; i < join()->primary_tables; ++i) {
    // Find the sj-mat tmp table whose sj nest contains us:
    Semijoin_mat_exec *const sjm = join()->qep_tab[i].sj_mat_exec();
    if (sjm && (uint)idx() >= sjm->inner_table_index &&
        (uint)idx() < sjm->inner_table_index + sjm->table_count)
      return sjm->sj_nest->nested_join->query_block_id;
  }
  assert(false);
  return 0;
}

/**
  Extend join_tab->cond by AND'ing add_cond to it

  @param add_cond    The condition to AND with the existing cond
                     for this JOIN_TAB

  @retval true   if there was a memory allocation error
  @retval false  otherwise
*/
bool QEP_shared_owner::and_with_condition(Item *add_cond) {
  Item *tmp = condition();
  if (and_conditions(&tmp, add_cond)) return true;
  set_condition(tmp);
  return false;
}

/**
  Partially cleanup JOIN after it has executed: close index or rnd read
  (table cursors), free quick selects.

    This function is called in the end of execution of a JOIN, before the used
    tables are unlocked and closed.

    For a join that is resolved using a temporary table, the first sweep is
    performed against actual tables and an intermediate result is inserted
    into the temporary table.
    The last sweep is performed against the temporary table. Therefore,
    the base tables and associated buffers used to fill the temporary table
    are no longer needed, and this function is called to free them.

    For a join that is performed without a temporary table, this function
    is called after all rows are sent, but before EOF packet is sent.

    For a simple SELECT with no subqueries this function performs a full
    cleanup of the JOIN and calls mysql_unlock_read_tables to free used base
    tables.

    If a JOIN is executed for a subquery or if it has a subquery, we can't
    do the full cleanup and need to do a partial cleanup only.
    - If a JOIN is not the top level join, we must not unlock the tables
    because the outer select may not have been evaluated yet, and we
    can't unlock only selected tables of a query.
    - Additionally, if this JOIN corresponds to a correlated subquery, we
    should not free quick selects and join buffers because they will be
    needed for the next execution of the correlated subquery.
    - However, if this is a JOIN for a [sub]select, which is not
    a correlated subquery itself, but has subqueries, we can free it
    fully and also free JOINs of all its subqueries. The exception
    is a subquery in SELECT list, e.g:
    @code
    SELECT a, (select max(b) from t1) group by c
    @endcode
    This subquery will not be evaluated at first sweep and its value will
    not be inserted into the temporary table. Instead, it's evaluated
    when selecting from the temporary table. Therefore, it can't be freed
    here even though it's not correlated.

  @todo
    Unlock tables even if the join isn't top level select in the tree
*/

void JOIN::join_free() {
  Query_expression *tmp_query_expression;
  Query_block *sl;
  /*
    Optimization: if not EXPLAIN and we are done with the JOIN,
    free all tables.
  */
  const bool full = (!query_block->uncacheable && !thd->lex->is_explain());
  bool can_unlock = full;
  DBUG_TRACE;

  cleanup();

  for (tmp_query_expression = query_block->first_inner_query_expression();
       tmp_query_expression;
       tmp_query_expression = tmp_query_expression->next_query_expression())
    for (sl = tmp_query_expression->first_query_block(); sl;
         sl = sl->next_query_block()) {
      Item_subselect *subselect = sl->master_query_expression()->item;
      const bool full_local = full && (!subselect || subselect->is_evaluated());
      /*
        If this join is evaluated, we can partially clean it up and clean up
        all its underlying joins even if they are correlated, only query plan
        is left in case a user will run EXPLAIN FOR CONNECTION.
        If this join is not yet evaluated, we still must clean it up to
        close its table cursors -- it may never get evaluated, as in case of
        ... HAVING FALSE OR a IN (SELECT ...))
        but all table cursors must be closed before the unlock.
      */
      sl->cleanup_all_joins();
      /* Can't unlock if at least one JOIN is still needed */
      can_unlock = can_unlock && full_local;
    }

  /*
    We are not using tables anymore
    Unlock all tables. We may be in an INSERT .... SELECT statement.
  */
  if (can_unlock && lock && thd->lock && !thd->locked_tables_mode &&
      !(query_block->active_options() & SELECT_NO_UNLOCK) &&
      !query_block->subquery_in_having &&
      (query_block == thd->lex->unit->query_term()->query_block())) {
    /*
      TODO: unlock tables even if the join isn't top level select in the
      tree.
    */
    mysql_unlock_read_tables(thd, lock);  // Don't free join->lock
    DEBUG_SYNC(thd, "after_join_free_unlock");
    lock = nullptr;
  }
}

static void cleanup_table(TABLE *table) {
  if (table->is_created()) {
    table->file->ha_index_or_rnd_end();
  }
  free_io_cache(table);
  filesort_free_buffers(table, false);
}

/**
  Free resources of given join.

  @note
    With subquery this function definitely will be called several times,
    but even for simple query it can be called several times.
*/

void JOIN::cleanup() {
  DBUG_TRACE;

  assert(const_tables <= primary_tables && primary_tables <= tables);

  if (qep_tab || join_tab || best_ref) {
    for (uint i = 0; i < tables; i++) {
      QEP_TAB *qtab;
      TABLE *table;
      if (qep_tab) {
        assert(!join_tab);
        qtab = &qep_tab[i];
        table = qtab->table();
      } else {
        qtab = nullptr;
        table = (join_tab ? &join_tab[i] : best_ref[i])->table();
      }
      if (!table) continue;
      cleanup_table(table);
    }
  } else if (thd->lex->using_hypergraph_optimizer()) {
    for (Table_ref *tl = query_block->leaf_tables; tl; tl = tl->next_leaf) {
      cleanup_table(tl->table);
    }
    for (JOIN::TemporaryTableToCleanup cleanup : temp_tables) {
      cleanup_table(cleanup.table);
    }
  }
}

/**
  Filter out ORDER BY items that are equal to constants in WHERE condition

  This function is a limited version of remove_const() for use
  with non-JOIN statements (i.e. single-table UPDATE and DELETE).

  @param order            Linked list of ORDER BY arguments.
  @param where            Where condition.

  @return pointer to new filtered ORDER list or NULL if whole list eliminated

  @note
    This function overwrites input order list.
*/

ORDER *simple_remove_const(ORDER *order, Item *where) {
  if (order == nullptr || where == nullptr) return order;

  ORDER *first = nullptr, *prev = nullptr;
  for (; order; order = order->next) {
    assert(!order->item[0]->has_aggregation());  // should never happen
    if (!check_field_is_const(where, order->item[0])) {
      if (first == nullptr) first = order;
      if (prev != nullptr) prev->next = order;
      prev = order;
    }
  }
  if (prev != nullptr) prev->next = nullptr;
  return first;
}

bool equality_determines_uniqueness(const Item_func_comparison *func,
                                    const Item *v, const Item *c) {
  /*
    - The "c" argument must be a constant.
    - The result type of both arguments must be the same.
      However, since a temporal type is also classified as a string type,
      we do not allow a temporal constant to be considered equal to a
      variable character string.
    - If both arguments are strings, the comparison operator must have the same
      collation as the ordering operation applied to the variable expression.
  */
  return c->const_for_execution() && v->result_type() == c->result_type() &&
         (v->result_type() != STRING_RESULT ||
          (!(is_string_type(v->data_type()) &&
             is_temporal_type(c->data_type())) &&
           func->compare_collation() == v->collation.collation));
}

bool equality_has_no_implicit_casts(const Item_func_comparison *func,
                                    const Item *item1, const Item *item2) {
  // See equality_determines_uniqueness() for the logic around strings
  // and dates.
  if (item1->result_type() != item2->result_type()) {
    return false;
  }
  if (item1->result_type() == STRING_RESULT) {
    if (is_temporal_type(item1->data_type()) !=
        is_temporal_type(item2->data_type())) {
      return false;
    }
    if (is_string_type(item1->data_type()) !=
        is_string_type(item2->data_type())) {
      return false;
    }
    if (is_string_type(item1->data_type())) {
      if (func->compare_collation() != item1->collation.collation ||
          func->compare_collation() != item2->collation.collation) {
        return false;
      }
    }
  }
  return true;
}

/*
  Return true if i1 and i2 (if any) are equal items,
  or if i1 is a wrapper item around the f2 field.
*/

static bool equal(const Item *i1, const Item *i2, const Field *f2) {
  assert((i2 == nullptr) ^ (f2 == nullptr));

  if (i2 != nullptr)
    return i1->eq(i2, true);
  else if (i1->type() == Item::FIELD_ITEM)
    return f2->eq(down_cast<const Item_field *>(i1)->field);
  else
    return false;
}

/**
  Check if a field is equal to a constant value in a condition

  @param      cond        condition to search within
  @param      order_item  Item to find in condition (if order_field is NULL)
  @param      order_field Field to find in condition (if order_item is NULL)
  @param[out] const_item  Used in calculation with conjunctive predicates,
                          must be NULL in outer-most call.

  @returns true if the field is a constant value in condition, false otherwise
*/
bool check_field_is_const(Item *cond, const Item *order_item,
                          const Field *order_field, Item **const_item) {
  assert((order_item == nullptr) ^ (order_field == nullptr));

  Item *intermediate = nullptr;
  if (const_item == nullptr) const_item = &intermediate;

  if (cond->type() == Item::COND_ITEM) {
    Item_cond *const c = down_cast<Item_cond *>(cond);
    bool and_level = c->functype() == Item_func::COND_AND_FUNC;
    List_iterator_fast<Item> li(*c->argument_list());
    Item *item;
    while ((item = li++)) {
      if (check_field_is_const(item, order_item, order_field, const_item)) {
        if (and_level) return true;
      } else if (!and_level)
        return false;
    }
    return !and_level;
  }
  if (cond->type() != Item::FUNC_ITEM) return false;
  Item_func *const func = down_cast<Item_func *>(cond);
  if (func->functype() != Item_func::EQUAL_FUNC &&
      func->functype() != Item_func::EQ_FUNC)
    return false;
  Item_func_comparison *comp = down_cast<Item_func_comparison *>(func);
  Item *left = comp->arguments()[0];
  Item *right = comp->arguments()[1];
  if (equal(left, order_item, order_field)) {
    if (equality_determines_uniqueness(comp, left, right)) {
      if (*const_item != nullptr) return right->eq(*const_item, true);
      *const_item = right;
      return true;
    }
  } else if (equal(right, order_item, order_field)) {
    if (equality_determines_uniqueness(comp, right, left)) {
      if (*const_item != nullptr) return left->eq(*const_item, true);
      *const_item = left;
      return true;
    }
  }
  return false;
}

/**
  Update TMP_TABLE_PARAM with count of the different type of fields.

  This function counts the number of fields, functions and sum
  functions (items with type SUM_FUNC_ITEM) for use by
  create_tmp_table() and stores it in the Temp_table_param object. It
  also updates the allow_group_via_temp_table property if needed.

  @param query_block           Query_block of query
  @param param                Description of temp table
  @param fields               List of fields to count
  @param reset_with_sum_func  Whether to reset with_sum_func of func items
  @param save_sum_fields      Count in the way create_tmp_table() expects when
                              given the same parameter.
*/

void count_field_types(const Query_block *query_block, Temp_table_param *param,
                       const mem_root_deque<Item *> &fields,
                       bool reset_with_sum_func, bool save_sum_fields) {
  DBUG_TRACE;

  param->sum_func_count = 0;
  param->func_count = fields.size();
  param->hidden_field_count = 0;
  param->outer_sum_func_count = 0;
  /*
    Loose index scan guarantees that all grouping is done and MIN/MAX
    functions are computed, so create_tmp_table() treats this as if
    save_sum_fields is set.
  */
  save_sum_fields |= param->precomputed_group_by;

  for (Item *field : fields) {
    Item *real = field->real_item();
    Item::Type real_type = real->type();

    if (real_type == Item::SUM_FUNC_ITEM && !real->m_is_window_function) {
      if (!field->const_item()) {
        Item_sum *sum_item = down_cast<Item_sum *>(field->real_item());
        if (sum_item->aggr_query_block == query_block) {
          if (!sum_item->allow_group_via_temp_table)
            param->allow_group_via_temp_table = false;  // UDF SUM function
          param->sum_func_count++;

          // Add one column per argument.
          param->func_count += sum_item->argument_count();
        }
      } else if (save_sum_fields) {
        /*
          Count the way create_tmp_table() does if asked to preserve
          Item_sum_* functions in fields list.

          Item field is an Item_sum_* or a reference to such an
          item. We need to distinguish between these two cases since
          they are treated differently by create_tmp_table().
        */
        if (field->type() != Item::SUM_FUNC_ITEM) {
          // A reference to an Item_sum_*
          param->func_count++;  // TODO: Is this really needed?
          param->sum_func_count++;
        }
      }
    } else if (real_type == Item::SUM_FUNC_ITEM) {
      assert(real->m_is_window_function);

      Item_sum *window_item = down_cast<Item_sum *>(real);
      param->func_count += window_item->argument_count();
    } else {
      if (reset_with_sum_func) field->reset_aggregation();
      if (field->has_aggregation()) param->outer_sum_func_count++;
    }
  }
}

/**
  Return 1 if second is a subpart of first argument.

  If first parts has different direction, change it to second part
  (group is sorted like order)
*/

bool test_if_subpart(ORDER *a, ORDER *b) {
  ORDER *first = a;
  ORDER *second = b;
  for (; first && second; first = first->next, second = second->next) {
    if ((*first->item)->eq(*second->item, true))
      continue;
    else
      return false;
  }
  // If the second argument is not subpart of the first return false
  if (second) return false;
  // Else assign the direction of the second argument to the first
  for (; a && b; a = a->next, b = b->next) a->direction = b->direction;
  return true;
}

/**
  calc how big buffer we need for comparing group entries.
*/

void calc_group_buffer(JOIN *join, ORDER *group) {
  DBUG_TRACE;
  uint key_length = 0, parts = 0, null_parts = 0;

  if (group) join->grouped = true;
  for (; group; group = group->next) {
    Item *group_item = *group->item;
    Field *field = group_item->get_tmp_table_field();
    if (field) {
      enum_field_types type;
      if ((type = field->type()) == MYSQL_TYPE_BLOB)
        key_length += MAX_BLOB_WIDTH;  // Can't be used as a key
      else if (type == MYSQL_TYPE_VARCHAR || type == MYSQL_TYPE_VAR_STRING)
        key_length += field->field_length + HA_KEY_BLOB_LENGTH;
      else if (type == MYSQL_TYPE_BIT) {
        /* Bit is usually stored as a longlong key for group fields */
        key_length += 8;  // Big enough
      } else
        key_length += field->pack_length();
    } else {
      switch (group_item->result_type()) {
        case REAL_RESULT:
          key_length += sizeof(double);
          break;
        case INT_RESULT:
          key_length += sizeof(longlong);
          break;
        case DECIMAL_RESULT:
          key_length += my_decimal_get_binary_size(
              group_item->max_length - (group_item->decimals ? 1 : 0),
              group_item->decimals);
          break;
        case STRING_RESULT: {
          /*
            As items represented as DATE/TIME fields in the group buffer
            have STRING_RESULT result type, we increase the length
            by 8 as maximum pack length of such fields.
          */
          if (group_item->is_temporal()) {
            key_length += 8;
          } else if (group_item->data_type() == MYSQL_TYPE_BLOB)
            key_length += MAX_BLOB_WIDTH;  // Can't be used as a key
          else {
            /*
              Group strings are taken as varstrings and require an length field.
              A field is not yet created by create_tmp_field()
              and the sizes should match up.
            */
            key_length += group_item->max_length + HA_KEY_BLOB_LENGTH;
          }
          break;
        }
        default:
          /* This case should never be chosen */
          assert(0);
          my_error(ER_OUT_OF_RESOURCES, MYF(ME_FATALERROR));
      }
    }
    parts++;
    if (group_item->is_nullable()) null_parts++;
  }
  join->tmp_table_param.group_length = key_length + null_parts;
  join->tmp_table_param.group_parts = parts;
  join->tmp_table_param.group_null_parts = null_parts;
}

/**
  Make an array of pointers to sum_functions to speed up
  sum_func calculation.

  @retval
    0	ok
  @retval
    1	Error
*/

bool JOIN::alloc_func_list() {
  uint func_count, group_parts;
  DBUG_TRACE;

  func_count = tmp_table_param.sum_func_count;
  /*
    If we are using rollup, we need a copy of the summary functions for
    each level
  */
  if (rollup_state != RollupState::NONE) func_count *= (send_group_parts + 1);

  group_parts = send_group_parts;
  /*
    If distinct, reserve memory for possible
    disctinct->group_by optimization
  */
  if (select_distinct) {
    group_parts += CountVisibleFields(*fields);
    /*
      If the ORDER clause is specified then it's possible that
      it also will be optimized, so reserve space for it too
    */
    if (!order.empty()) {
      ORDER *ord;
      for (ord = order.order; ord; ord = ord->next) group_parts++;
    }
  }

  /* This must use calloc() as rollup_make_fields depends on this */
  sum_funcs =
      (Item_sum **)thd->mem_calloc(sizeof(Item_sum **) * (func_count + 1) +
                                   sizeof(Item_sum ***) * (group_parts + 1));
  return sum_funcs == nullptr;
}

/**
  Initialize 'sum_funcs' array with all Item_sum objects.

  @param fields            All items
  @param before_group_by   Set to 1 if this is called before GROUP BY handling
  @param recompute         Set to true if sum_funcs must be recomputed

  @retval
    0  ok
  @retval
    1  error
*/

bool JOIN::make_sum_func_list(const mem_root_deque<Item *> &fields,
                              bool before_group_by, bool recompute) {
  DBUG_TRACE;

  if (*sum_funcs && !recompute)
    return false; /* We have already initialized sum_funcs. */

  Item_sum **func = sum_funcs;
  for (Item *item : fields) {
    if (item->type() == Item::SUM_FUNC_ITEM && !item->const_item() &&
        down_cast<Item_sum *>(item)->aggr_query_block == query_block) {
      assert(!item->m_is_window_function);
      *func++ = down_cast<Item_sum *>(item);
    }
  }
  if (before_group_by && rollup_state == RollupState::INITED) {
    rollup_state = RollupState::READY;
  } else if (rollup_state == RollupState::READY)
    return false;   // Don't put end marker
  *func = nullptr;  // End marker
  return false;
}

/**
  Free joins of subselect of this select.

  @param select   pointer to Query_block which subselects joins we will free

  @todo when the final use of this function (from SET statements) is removed,
  this function can be deleted.
*/

void free_underlaid_joins(Query_block *select) {
  for (Query_expression *query_expression =
           select->first_inner_query_expression();
       query_expression;
       query_expression = query_expression->next_query_expression())
    query_expression->cleanup(false);
}

/**
  Change the Query_result object of the query block.

  If old_result is not used, forward the call to the current
  Query_result in case it is a wrapper around old_result.

  Call prepare() on the new Query_result if we decide to use it.

  @param thd        Thread handle
  @param new_result New Query_result object
  @param old_result Old Query_result object (NULL to force change)

  @retval false Success
  @retval true  Error
*/

bool Query_block::change_query_result(THD *thd,
                                      Query_result_interceptor *new_result,
                                      Query_result_interceptor *old_result) {
  DBUG_TRACE;
  if (old_result == nullptr || query_result() == old_result) {
    set_query_result(new_result);
    if (query_result()->prepare(thd, fields, master_query_expression()))
      return true; /* purecov: inspected */
    return false;
  } else {
    const bool ret = query_result()->change_query_result(thd, new_result);
    return ret;
  }
}

/**
  Add having condition as a filter condition, which is applied when reading
  from the temp table.

  @param    curr_tmp_table  Table number to which having conds are added.
  @returns  false if success, true if error.
*/

bool JOIN::add_having_as_tmp_table_cond(uint curr_tmp_table) {
  having_cond->update_used_tables();
  QEP_TAB *const curr_table = &qep_tab[curr_tmp_table];
  table_map used_tables;
  Opt_trace_context *const trace = &thd->opt_trace;

  DBUG_TRACE;

  if (curr_table->table_ref)
    used_tables = curr_table->table_ref->map();
  else {
    /*
      Pushing parts of HAVING to an internal temporary table.
      Fields in HAVING condition may have been replaced with fields in an
      internal temporary table. This table has map=1.
    */
    assert(having_cond->has_subquery() ||
           !(having_cond->used_tables() & ~(1 | PSEUDO_TABLE_BITS)));
    used_tables = 1;
  }
  // Condition may contain outer references, const and non-deterministic exprs:
  used_tables |= PSEUDO_TABLE_BITS;

  /*
    All conditions which can be applied after reading from used_tables are
    added as filter conditions of curr_tmp_table. If condition's used_tables is
    not read yet for example subquery in having, then it will be kept as it is
    in original having_cond of join.
    If ROLLUP, having condition needs to be tested after writing rollup data.
    So do not move the having condition.
  */
  Item *sort_table_cond =
      (rollup_state == RollupState::NONE)
          ? make_cond_for_table(thd, having_cond, used_tables, table_map{0},
                                false)
          : nullptr;
  if (sort_table_cond) {
    if (!curr_table->condition())
      curr_table->set_condition(sort_table_cond);
    else {
      curr_table->set_condition(
          new Item_cond_and(curr_table->condition(), sort_table_cond));
      if (curr_table->condition()->fix_fields(thd, nullptr)) return true;
    }
    curr_table->condition()->apply_is_true();
    DBUG_EXECUTE("where", print_where(thd, curr_table->condition(),
                                      "select and having", QT_ORDINARY););

    having_cond = make_cond_for_table(thd, having_cond, ~table_map{0},
                                      ~used_tables, false);
    DBUG_EXECUTE("where", print_where(thd, having_cond, "having after sort",
                                      QT_ORDINARY););

    const Opt_trace_object trace_wrapper(trace);
    Opt_trace_object(trace, "sort_using_internal_table")
        .add("condition_for_sort", sort_table_cond)
        .add("having_after_sort", having_cond);
  }

  return false;
}

bool CreateFramebufferTable(
    THD *thd, const Temp_table_param &tmp_table_param,
    const Query_block &query_block, const mem_root_deque<Item *> &source_fields,
    const mem_root_deque<Item *> &window_output_fields,
    Func_ptr_array *mapping_from_source_to_window_output, Window *window) {
  /*
    Create the window frame buffer tmp table.  We create a
    temporary table with same contents as the output tmp table
    in the windowing pipeline (columns defined by
    curr_all_fields), but used for intermediate storage, saving
    the window's frame buffer now that we know the window needs
    buffering.
  */
  Temp_table_param *par =
      new (thd->mem_root) Temp_table_param(thd->mem_root, tmp_table_param);
  par->m_window_frame_buffer = true;

  // Don't include temporary fields that originally came from
  // a window function (or an expression containing a window function).
  // Window functions are not relevant to store in the framebuffer,
  // and in fact, trying to restore them would often overwrite
  // good data we shouldn't.
  //
  // Not that the regular filtering in create_tmp_table() cannot do this
  // for us, as it only sees the Item_field, not where it came from.
  mem_root_deque<Item *> fb_fields(window_output_fields);
  for (size_t i = 0; i < fb_fields.size(); ++i) {
    Item *orig_item = source_fields[i];
    if (orig_item->has_wf()) {
      fb_fields[i] = nullptr;
    }
  }
  fb_fields.erase(std::remove(fb_fields.begin(), fb_fields.end(), nullptr),
                  fb_fields.end());
  count_field_types(&query_block, par, fb_fields, false, false);

  TABLE *table =
      create_tmp_table(thd, par, fb_fields, nullptr, false, false,
                       query_block.active_options(), HA_POS_ERROR, "");
  if (table == nullptr) return true;

  window->set_frame_buffer_param(par);
  window->set_frame_buffer(table);

  // For window function expressions we are to evaluate after
  // framebuffering, we need to replace their arguments to point to the
  // output table instead of the input table (we could probably also have
  // used the framebuffer if we wanted). E.g., if our input is t1 and our
  // output is <temporary>, we need to rewrite 1 + SUM(t1.x) OVER w into
  // 1 + SUM(<temporary>.x) OVER w.
  for (Func_ptr &ptr : *mapping_from_source_to_window_output) {
    if (ptr.func()->has_wf()) {
      ReplaceMaterializedItems(thd, ptr.func(),
                               *mapping_from_source_to_window_output,
                               /*need_exact_match=*/false);
    }
  }
  return false;
}

/**
  Init tmp tables usage info.

  @details
  This function finalizes execution plan by taking following actions:
    .) tmp tables are created, but not instantiated (this is done during
       execution). QEP_TABs dedicated to tmp tables are filled appropriately.
       see JOIN::create_intermediate_table.
    .) prepare fields lists (fields, all_fields, ref_item_array slices) for
       each required stage of execution. These fields lists are set for
       tmp tables' tabs and for the tab of last table in the join.
    .) fill info for sorting/grouping/dups removal is prepared and saved to
       appropriate tabs. Here is an example:
        SELECT * from t1,t2 WHERE ... GROUP BY t1.f1 ORDER BY t2.f2, t1.f2
        and lets assume that the table order in the plan is t1,t2.
       In this case optimizer will sort for group only the first table as the
       second one isn't mentioned in GROUP BY. The result will be materialized
       in tmp table.  As filesort can't sort join optimizer will sort tmp table
       also. The first sorting (for group) is called simple as is doesn't
       require tmp table.  The Filesort object for it is created here - in
       JOIN::create_intermediate_table.  Filesort for the second case is
       created here, in JOIN::make_tmp_tables_info.

  @note
  This function may change tmp_table_param.precomputed_group_by. This
  affects how create_tmp_table() treats aggregation functions, so
  count_field_types() must be called again to make sure this is taken
  into consideration.

  @returns
  false - Ok
  true  - Error
*/

bool JOIN::make_tmp_tables_info() {
  assert(!join_tab);
  mem_root_deque<Item *> *curr_fields = fields;
  bool materialize_join = false;
  uint curr_tmp_table = const_tables;
  TABLE *exec_tmp_table = nullptr;

  auto cleanup_tmp_tables_on_error =
      create_scope_guard([this, &curr_tmp_table] {
        if (qep_tab == nullptr) {
          return;
        }
        for (unsigned table_idx = primary_tables; table_idx <= curr_tmp_table;
             ++table_idx) {
          TABLE *table = qep_tab[table_idx].table();
          if (table != nullptr) {
            close_tmp_table(table);
            free_tmp_table(table);
            qep_tab[table_idx].set_table(nullptr);
          }
        }
      });

  /*
    If the plan is constant, we will not do window tmp table processing
    cf. special code path for handling const plans.
  */
  m_windowing_steps = m_windows.elements > 0 && !plan_is_const() &&
                      !implicit_grouping && !group_optimized_away;
  const bool may_trace =  // just to avoid an empty trace block
      need_tmp_before_win || implicit_grouping || m_windowing_steps ||
      !group_list.empty() || !order.empty();

  Opt_trace_context *const trace = &thd->opt_trace;
  const Opt_trace_disable_I_S trace_disabled(trace, !may_trace);
  const Opt_trace_object wrapper(trace);
  const Opt_trace_array trace_tmp(trace, "considering_tmp_tables");

  DBUG_TRACE;

  /*
    In this function, we may change having_cond into a condition on a
    temporary sort/group table, so we have to assign having_for_explain now:
  */
  having_for_explain = having_cond;

  const bool has_group_by = this->grouped;

  /*
    The loose index scan access method guarantees that all grouping or
    duplicate row elimination (for distinct) is already performed
    during data retrieval, and that all MIN/MAX functions are already
    computed for each group. Thus all MIN/MAX functions should be
    treated as regular functions, and there is no need to perform
    grouping in the main execution loop.
    Currently loose index scan is only applicable for single table queries. The
    only exception is when a single table query becomes a multi-table query
    because of a semijoin transformation. We check the first join_tab element
    of the plan for its access method here, which holds good even for the
    multi-table query, but only when optimizer has picked nested loop joins.
    Skip scan is enabled only for the original table in the query which is the
    first table in the join order for a nested loop join. However, for hash
    joins it does not hold good. So, we see an additional de-duplication step
    when hash join is picked as it is not aware that de-duplication is taken
    care by the access method picked.

    TODO: Make optimize_distinct_group_order() understand that de-duplication
    is taken care by the chosen access method, so that we avoid the additional
    de-duplication step.
  */
  if (qep_tab && qep_tab[0].range_scan() &&
      is_loose_index_scan(qep_tab[0].range_scan()))
    tmp_table_param.precomputed_group_by =
        !is_agg_loose_index_scan(qep_tab[0].range_scan());

  /*
    Create the first temporary table if distinct elimination is requested or
    if the sort is too complicated to be evaluated as a filesort.
  */
  if (need_tmp_before_win) {
    curr_tmp_table = primary_tables;
    Opt_trace_object trace_this_outer(trace);
    trace_this_outer.add("adding_tmp_table_in_plan_at_position",
                         curr_tmp_table);
    tmp_tables++;

    /*
      Make a copy of the base slice in the save slice.
      This is needed because later steps will overwrite the base slice with
      another slice (1-3).
      After this slice has been used, overwrite the base slice again with
      the copy in the save slice.
    */
    if (alloc_ref_item_slice(thd, REF_SLICE_SAVED_BASE)) return true;

    copy_ref_item_slice(REF_SLICE_SAVED_BASE, REF_SLICE_ACTIVE);
    current_ref_item_slice = REF_SLICE_SAVED_BASE;

    /*
      Create temporary table for use in a single execution.
      (Will be reused if this is a subquery that is executed several times
       for one execution of the statement)
      Don't use tmp table grouping for json aggregate funcs as it's
      very ineffective.
    */
    ORDER_with_src tmp_group;
    if (!simple_group && !(test_flags & TEST_NO_KEY_GROUP) && !with_json_agg)
      tmp_group = group_list;

    tmp_table_param.hidden_field_count = CountHiddenFields(*fields);

    if (create_intermediate_table(&qep_tab[curr_tmp_table], *fields, tmp_group,
                                  !group_list.empty() && simple_group))
      return true;
    exec_tmp_table = qep_tab[curr_tmp_table].table();

    if (exec_tmp_table->s->is_distinct) optimize_distinct();

    /*
      If there is no sorting or grouping, 'use_order'
      index result should not have been requested.
      Exception: LooseScan strategy for semijoin requires
      sorted access even if final result is not to be sorted.
    */
    assert(
        !(m_ordered_index_usage == ORDERED_INDEX_VOID && !plan_is_const() &&
          qep_tab[const_tables].position()->sj_strategy != SJ_OPT_LOOSE_SCAN &&
          qep_tab[const_tables].use_order()));

    /*
      Allocate a slice of ref items that describe the items to be copied
      from the first temporary table.
    */
    if (alloc_ref_item_slice(thd, REF_SLICE_TMP1)) return true;

    // Change sum_fields reference to calculated fields in tmp_table
    if (streaming_aggregation || qep_tab[curr_tmp_table].table()->group ||
        tmp_table_param.precomputed_group_by) {
      if (change_to_use_tmp_fields(fields, thd, ref_items[REF_SLICE_TMP1],
                                   &tmp_fields[REF_SLICE_TMP1],
                                   query_block->m_added_non_hidden_fields))
        return true;
    } else {
      if (change_to_use_tmp_fields_except_sums(
              fields, thd, query_block, ref_items[REF_SLICE_TMP1],
              &tmp_fields[REF_SLICE_TMP1],
              query_block->m_added_non_hidden_fields))
        return true;
    }
    curr_fields = &tmp_fields[REF_SLICE_TMP1];
    // Need to set them now for correct group_fields setup, reset at the end.
    set_ref_item_slice(REF_SLICE_TMP1);
    qep_tab[curr_tmp_table].ref_item_slice = REF_SLICE_TMP1;
    setup_tmptable_write_func(&qep_tab[curr_tmp_table], &trace_this_outer);

    /*
      If having is not handled here, it will be checked before the row is sent
      to the client.
    */
    if (having_cond &&
        (streaming_aggregation ||
         (exec_tmp_table->s->is_distinct && group_list.empty()))) {
      /*
        If there is no select distinct or rollup, then move the having to table
        conds of tmp table.
        NOTE : We cannot apply having after distinct. If columns of having are
               not part of select distinct, then distinct may remove rows
               which can satisfy having.

        As this condition will read the tmp table, it is appropriate that
        REF_SLICE_TMP1 is in effect when we create it below.
      */
      if ((!select_distinct && rollup_state == RollupState::NONE) &&
          add_having_as_tmp_table_cond(curr_tmp_table))
        return true;

      /*
        Having condition which we are not able to add as tmp table conds are
        kept as before. And, this will be applied before storing the rows in
        tmp table.
      */
      qep_tab[curr_tmp_table].having = having_cond;
      having_cond = nullptr;  // Already done
    }

    tmp_table_param.func_count = 0;

    if (streaming_aggregation || qep_tab[curr_tmp_table].table()->group) {
      tmp_table_param.func_count += tmp_table_param.sum_func_count;
      tmp_table_param.sum_func_count = 0;
    }

    if (exec_tmp_table->group) {  // Already grouped
                                  /*
                                    Check if group by has to respect ordering. If true, move group by to
                                    order by.
                                  */
      if (order.empty() && !skip_sort_order) {
        for (ORDER *group = group_list.order; group; group = group->next) {
          if (group->direction != ORDER_NOT_RELEVANT) {
            order = group_list; /* order by group */
            break;
          }
        }
      }
      group_list.clean();
    }
    /*
      If we have different sort & group then we must sort the data by group
      and copy it to a second temporary table.
      This code is also used if we are using distinct something
      we haven't been able to store in the temporary table yet
      like SEC_TO_TIME(SUM(...)) or when distinct is used with rollup.
    */

    if ((!group_list.empty() &&
         (!test_if_subpart(group_list.order, order.order) || select_distinct ||
          m_windowing_steps || rollup_state != RollupState::NONE)) ||
        (select_distinct && (tmp_table_param.using_outer_summary_function ||
                             rollup_state != RollupState::NONE))) {
      DBUG_PRINT("info", ("Creating group table"));

      calc_group_buffer(this, group_list.order);
      count_field_types(query_block, &tmp_table_param,
                        tmp_fields[REF_SLICE_TMP1],
                        select_distinct && group_list.empty(), false);
      tmp_table_param.hidden_field_count =
          CountHiddenFields(tmp_fields[REF_SLICE_TMP1]);
      streaming_aggregation = false;
      if (!exec_tmp_table->group && !exec_tmp_table->s->is_distinct) {
        // 1st tmp table were materializing join result
        materialize_join = true;
        explain_flags.set(ESC_BUFFER_RESULT, ESP_USING_TMPTABLE);
      }
      curr_tmp_table++;
      tmp_tables++;
      Opt_trace_object trace_this_tbl(trace);
      trace_this_tbl.add("adding_tmp_table_in_plan_at_position", curr_tmp_table)
          .add_alnum("cause", "sorting_to_make_groups");

      /* group data to new table */
      /*
        If the access method is loose index scan then all MIN/MAX
        functions are precomputed, and should be treated as regular
        functions. See extended comment above.
      */
      if (qep_tab[0].range_scan() &&
          is_loose_index_scan(qep_tab[0].range_scan()))
        tmp_table_param.precomputed_group_by = true;

      ORDER_with_src dummy;  // TODO can use table->group here also

      if (create_intermediate_table(&qep_tab[curr_tmp_table], *curr_fields,
                                    dummy, true))
        return true;

      if (!group_list.empty()) {
        explain_flags.set(group_list.src, ESP_USING_TMPTABLE);
        if (!plan_is_const())  // No need to sort a single row
        {
          if (add_sorting_to_table(curr_tmp_table - 1, &group_list,
                                   /*sort_before_group=*/true))
            return true;
        }

        if (make_group_fields(this, this)) return true;
      }

      // Setup sum funcs only when necessary, otherwise we might break info
      // for the first table
      if (!group_list.empty() || tmp_table_param.sum_func_count) {
        if (make_sum_func_list(*curr_fields, true, true)) return true;
        const bool need_distinct =
            !(qep_tab[0].range_scan() &&
              is_agg_loose_index_scan(qep_tab[0].range_scan()));
        if (prepare_sum_aggregators(sum_funcs, need_distinct)) return true;
        group_list.clean();
        if (setup_sum_funcs(thd, sum_funcs)) return true;
      }

      /*
        Allocate a slice of ref items that describe the items to be copied
        from the second temporary table.
      */
      if (alloc_ref_item_slice(thd, REF_SLICE_TMP2)) return true;

      // No sum funcs anymore
      if (change_to_use_tmp_fields(&tmp_fields[REF_SLICE_TMP1], thd,
                                   ref_items[REF_SLICE_TMP2],
                                   &tmp_fields[REF_SLICE_TMP2],
                                   query_block->m_added_non_hidden_fields))
        return true;

      curr_fields = &tmp_fields[REF_SLICE_TMP2];
      set_ref_item_slice(REF_SLICE_TMP2);
      qep_tab[curr_tmp_table].ref_item_slice = REF_SLICE_TMP2;
      setup_tmptable_write_func(&qep_tab[curr_tmp_table], &trace_this_tbl);
    }
    if (qep_tab[curr_tmp_table].table()->s->is_distinct)
      select_distinct = false; /* Each row is unique */

    if (select_distinct && group_list.empty() && !m_windowing_steps) {
      if (having_cond) {
        qep_tab[curr_tmp_table].having = having_cond;
        having_cond->update_used_tables();
        having_cond = nullptr;
      }
      qep_tab[curr_tmp_table].needs_duplicate_removal = true;
      trace_this_outer.add("reading_from_table_eliminates_duplicates", true);
      explain_flags.set(ESC_DISTINCT, ESP_DUPS_REMOVAL);
      select_distinct = false;
    }
    /* Clean tmp_table_param for the next tmp table. */
    tmp_table_param.sum_func_count = tmp_table_param.func_count = 0;

    tmp_table_param.cleanup();
    streaming_aggregation = false;

    if (!group_optimized_away) {
      grouped = false;
    } else {
      /*
        If grouping has been optimized away, a temporary table is
        normally not needed unless we're explicitly requested to create
        one (e.g. due to a SQL_BUFFER_RESULT hint or INSERT ... SELECT or
        there is a windowing function that needs sorting).

        In this case (grouping was optimized away), temp_table was
        created without a grouping expression and JOIN::exec() will not
        perform the necessary grouping (by the use of end_send_group()
        or end_write_group()) if JOIN::group is set to false.
      */
      /*
         The temporary table was explicitly requested or there is a window
         function which needs sorting (check need_tmp_before_win in
         JOIN::optimize).
      */
      assert(query_block->active_options() & OPTION_BUFFER_RESULT ||
             m_windowing_steps);
      // the temporary table does not have a grouping expression
      assert(!qep_tab[curr_tmp_table].table()->group);
    }
    calc_group_buffer(this, group_list.order);
    count_field_types(query_block, &tmp_table_param, *curr_fields, false,
                      false);
  }

  /*
    Set up structures for a temporary table but do not actually create
    the temporary table if one of these conditions are true:
    - The query is implicitly grouped.
    - The query is explicitly grouped and
        + implemented as a simple grouping, or
        + LIMIT 1 is specified, or
        + ROLLUP is specified, or
        + <some unknown condition>.
  */

  if ((grouped || implicit_grouping) && !m_windowing_steps) {
    if (make_group_fields(this, this)) return true;

    if (make_sum_func_list(*curr_fields, true, true)) return true;
    const bool need_distinct =
        !(qep_tab && qep_tab[0].range_scan() &&
          is_agg_loose_index_scan(qep_tab[0].range_scan()));
    if (prepare_sum_aggregators(sum_funcs, need_distinct)) return true;
    if (setup_sum_funcs(thd, sum_funcs) || thd->is_fatal_error()) return true;
  }

  if (qep_tab && (!group_list.empty() ||
                  (!order.empty() && !m_windowing_steps /* [1] */))) {
    /*
      [1] above: too early to do query ORDER BY if we have windowing; must
      wait till after window processing.
    */
    ASSERT_BEST_REF_IN_JOIN_ORDER(this);
    DBUG_PRINT("info", ("Sorting for send_result_set_metadata"));
    /*
      If we have already done the group, add HAVING to sorted table except
      when rollup is present
    */
    if (having_cond && group_list.empty() && !streaming_aggregation &&
        rollup_state == RollupState::NONE) {
      if (add_having_as_tmp_table_cond(curr_tmp_table)) return true;
    }

    if (grouped)
      m_select_limit = HA_POS_ERROR;
    else if (!need_tmp_before_win) {
      /*
        We can abort sorting after thd->select_limit rows if there are no
        filter conditions for any tables after the sorted one.
        Filter conditions come in several forms:
         1. as a condition item attached to the join_tab, or
         2. as a keyuse attached to the join_tab (ref access).
      */
      for (uint i = const_tables + 1; i < primary_tables; i++) {
        QEP_TAB *const tab = qep_tab + i;
        if (tab->condition() ||  // 1
            (best_ref[tab->idx()]->keyuse() &&
             tab->first_inner() == NO_PLAN_IDX))  // 2
        {
          /* We have to sort all rows */
          m_select_limit = HA_POS_ERROR;
          break;
        }
      }
    }
    /*
      Here we add sorting stage for ORDER BY/GROUP BY clause, if the
      optimiser chose FILESORT to be faster than INDEX SCAN or there is
      no suitable index present.
      OPTION_FOUND_ROWS supersedes LIMIT and is taken into account.
    */
    DBUG_PRINT("info", ("Sorting for order by/group by"));
    ORDER_with_src order_arg = group_list.empty() ? order : group_list;
    if (qep_tab &&
        m_ordered_index_usage != (group_list.empty()
                                      ? ORDERED_INDEX_ORDER_BY
                                      : ORDERED_INDEX_GROUP_BY) &&
        // Windowing will change order, so it's too early to sort here
        !m_windowing_steps) {
      // Sort either first non-const table or the last tmp table
      QEP_TAB *const sort_tab = &qep_tab[curr_tmp_table];
      if (need_tmp_before_win && !materialize_join && !exec_tmp_table->group)
        explain_flags.set(order_arg.src, ESP_USING_TMPTABLE);

      if (add_sorting_to_table(curr_tmp_table, &order_arg,
                               /*sort_before_group=*/false))
        return true;
      /*
        filesort_limit:	 Return only this many rows from filesort().
        We can use select_limit_cnt only if we have no group_by and 1 table.
        This allows us to use Bounded_queue for queries like:
          "select * from t1 order by b desc limit 1;"
        m_select_limit == HA_POS_ERROR (we need a full table scan)
        query_expression->select_limit_cnt == 1 (we only need one row in the
        result set)
      */
      if (sort_tab->filesort)
        sort_tab->filesort->limit =
            (has_group_by || (primary_tables > curr_tmp_table + 1) ||
             calc_found_rows)
                ? m_select_limit
                : query_expression()->select_limit_cnt;
    }
  }

  if (qep_tab && m_windowing_steps) {
    for (uint wno = 0; wno < m_windows.elements; wno++) {
      tmp_table_param.m_window = m_windows[wno];

      if (!tmp_tables) {
        curr_tmp_table = primary_tables;
        tmp_tables++;

        if (ref_items[REF_SLICE_SAVED_BASE].is_null()) {
          /*
           Make a copy of the base slice in the save slice.
           This is needed because later steps will overwrite the base slice with
           another slice (1-3 or window slice).
           After this slice has been used, overwrite the base slice again with
           the copy in the save slice.
           */
          if (alloc_ref_item_slice(thd, REF_SLICE_SAVED_BASE)) return true;

          copy_ref_item_slice(REF_SLICE_SAVED_BASE, REF_SLICE_ACTIVE);
          current_ref_item_slice = REF_SLICE_SAVED_BASE;
        }
      } else {
        curr_tmp_table++;
        tmp_tables++;
      }

      ORDER_with_src dummy;

      tmp_table_param.hidden_field_count = CountHiddenFields(*curr_fields);

      /*
        Allocate a slice of ref items that describe the items to be copied
        from the next temporary table.
      */
      const uint widx = REF_SLICE_WIN_1 + wno;

      QEP_TAB *tab = &qep_tab[curr_tmp_table];
      mem_root_deque<Item *> *orig_fields = curr_fields;
      {
        Opt_trace_object trace_this_tbl(trace);
        trace_this_tbl
            .add("adding_tmp_table_in_plan_at_position", curr_tmp_table)
            .add_alnum("cause", "output_for_window_functions")
            .add("with_buffer", m_windows[wno]->needs_buffering());

        if (create_intermediate_table(tab, *curr_fields, dummy, false))
          return true;

        if (alloc_ref_item_slice(thd, widx)) return true;

        if (change_to_use_tmp_fields(curr_fields, thd, ref_items[widx],
                                     &tmp_fields[widx],
                                     query_block->m_added_non_hidden_fields,
                                     /*windowing*/ true))
          return true;

        curr_fields = &tmp_fields[widx];
        set_ref_item_slice(widx);
        tab->ref_item_slice = widx;
        setup_tmptable_write_func(tab, &trace_this_tbl);
      }

      if (m_windows[wno]->needs_buffering()) {
        if (CreateFramebufferTable(
                thd, tmp_table_param, *query_block, *orig_fields, *curr_fields,
                tab->tmp_table_param->items_to_copy, m_windows[wno])) {
          return true;
        }
      }

      if (m_windows[wno]->make_special_rows_cache(thd, tab->table()))
        return true;

      ORDER_with_src w_partition(m_windows[wno]->sorting_order(),
                                 ESC_WINDOWING);

      if (w_partition.order != nullptr) {
        Opt_trace_object trace_pre_sort(trace, "adding_sort_to_previous_table");
        if (add_sorting_to_table(curr_tmp_table - 1, &w_partition,
                                 /*sort_before_group=*/false))
          return true;
      }

      if (m_windows[wno]->is_last()) {
        if (!order.empty() && m_ordered_index_usage != ORDERED_INDEX_ORDER_BY) {
          if (add_sorting_to_table(curr_tmp_table, &order,
                                   /*sort_before_group=*/false))
            return true;
        }
        if (!tab->filesort && !tab->table()->s->keys &&
            (!(query_block->active_options() & OPTION_BUFFER_RESULT) ||
             need_tmp_before_win || wno >= 1)) {
          /*
            Last tmp table of execution; no sort, no duplicate elimination, no
            buffering imposed by user (or it has already been implemented by
            a previous tmp table): hence any row needn't be written to
            tmp table's storage; send it out to query's result instead:
          */
          m_windows[wno]->set_short_circuit(true);
        }
      }

      if (having_cond != nullptr) {
        tab->having = having_cond;
        having_cond = nullptr;
      }
    }
  }

  // In the case of rollup (only): After the base slice list was made, we may
  // have modified the field list to add rollup group items and sum switchers.
  // Since there may be HAVING filters with refs that refer to the base slice,
  // we need to refresh that slice (and its copy, REF_SLICE_SAVED_BASE) so
  // that it includes the updated items.
  //
  // Note that we do this after we've made the TMP1 and TMP2 slices, since
  // there's a lot of logic that looks through the GROUP BY list, which refers
  // to the base slice and expects _not_ to find rollup items there.
  refresh_base_slice();

  fields = curr_fields;
  // Reset before execution
  set_ref_item_slice(REF_SLICE_SAVED_BASE);
  if (qep_tab) {
    qep_tab[primary_tables + tmp_tables].op_type = get_end_select_func();
  }
  grouped = has_group_by;

  unplug_join_tabs();

  /*
    Tmp tables are a layer between the nested loop and the derived table's
    result, WITH RECURSIVE cannot work with them. This should not happen, as a
    recursive query cannot have clauses which use a tmp table (GROUP BY,
    etc).
  */
  assert(!query_block->is_recursive() || !tmp_tables);
  cleanup_tmp_tables_on_error.release();
  return false;
}

void JOIN::refresh_base_slice() {
  const unsigned num_hidden_fields = CountHiddenFields(*fields);
  const size_t num_select_elements = fields->size() - num_hidden_fields;
  const size_t orig_num_select_elements =
      num_select_elements - query_block->m_added_non_hidden_fields;

  for (unsigned i = 0; i < fields->size(); ++i) {
    Item *item = (*fields)[i];
    size_t pos;
    // See change_to_use_tmp_fields_except_sums for an explanation of how
    // the visible fields, hidden fields and additional fields added by
    // transformations are organized in fields and ref_item_array.
    if (i < num_hidden_fields) {
      pos = fields->size() - i - 1 - query_block->m_added_non_hidden_fields;
    } else {
      pos = i - num_hidden_fields;
      if (pos >= orig_num_select_elements) pos += num_hidden_fields;
    }
    query_block->base_ref_items[pos] = item;
    if (!ref_items[REF_SLICE_SAVED_BASE].is_null()) {
      ref_items[REF_SLICE_SAVED_BASE][pos] = item;
    }
  }
}

void JOIN::unplug_join_tabs() {
  ASSERT_BEST_REF_IN_JOIN_ORDER(this);

  map2table = nullptr;

  for (uint i = 0; i < tables; ++i) best_ref[i]->cleanup();

  best_ref = nullptr;
}

/**
  @brief Add Filesort object to the given table to sort if with filesort

  @param idx        JOIN_TAB's position in the qep_tab array. The
                    created Filesort object gets attached to this.

  @param sort_order List of expressions to sort the table by
  @param sort_before_group
                    If true, this sort happens before grouping is done
                    (potentially as a step of grouping itself),
                    so any wrapped rollup group items should be
                    unwrapped.

  @note This function moves tab->select, if any, to filesort->select

  @return false on success, true on OOM
*/

bool JOIN::add_sorting_to_table(uint idx, ORDER_with_src *sort_order,
                                bool sort_before_group) {
  DBUG_TRACE;
  ASSERT_BEST_REF_IN_JOIN_ORDER(this);
  assert(!query_block->is_recursive());
  const enum join_type jt = qep_tab[idx].type();
  if (jt == JT_CONST || jt == JT_EQ_REF)
    return false;  // 1 single row: is already sorted

  // Weedout needs an underlying table to store refs from (it deduplicates
  // by row ID), so if this table is part of a weedout operation, we need
  // to force sorting by row IDs -- sorting rows with addon fields returns
  // rows that have no reference to the underlying table object.
  bool force_sort_rowids = false;
  for (plan_idx i = 0; i <= static_cast<plan_idx>(idx); ++i) {
    if (!qep_tab[i].starts_weedout()) {
      continue;
    }

    plan_idx weedout_end = NO_PLAN_IDX;  // Exclusive.
    for (uint j = i; j < primary_tables; ++j) {
      if (qep_tab[j].check_weed_out_table == qep_tab[i].flush_weedout_table) {
        weedout_end = j + 1;
        break;
      }
    }
    if (weedout_end != NO_PLAN_IDX &&
        weedout_end > static_cast<plan_idx>(idx)) {
      force_sort_rowids = true;
      break;
    }
  }

  explain_flags.set(sort_order->src, ESP_USING_FILESORT);
  QEP_TAB *const tab = &qep_tab[idx];
  const bool keep_buffers =
      qep_tab->join() != nullptr &&
      qep_tab->join()->query_block->master_query_expression()->item !=
          nullptr &&
      qep_tab->join()
          ->query_block->master_query_expression()
          ->item->is_uncacheable();

  {
    // Switch to the right slice if applicable, so that we fetch out the correct
    // items from order_arg.
    const Switch_ref_item_slice slice_switch(this, tab->ref_item_slice);
    tab->filesort = new (thd->mem_root)
        Filesort(thd, {tab->table()}, keep_buffers, sort_order->order,
                 HA_POS_ERROR, /*remove_duplicates=*/false, force_sort_rowids,
                 /*unwrap_rollup=*/sort_before_group);
    tab->filesort_pushed_order = sort_order->order;
  }
  if (!tab->filesort) return true;
  Opt_trace_object trace_tmp(&thd->opt_trace, "filesort");
  trace_tmp.add_alnum("adding_sort_to_table",
                      tab->table() ? tab->table()->alias : "");

  return false;
}

/**
  Find a cheaper access key than a given key.

  @param          tab                 NULL or JOIN_TAB of the accessed table
  @param          order               Linked list of ORDER BY arguments
  @param          table               Table if tab == NULL or tab->table()
  @param          usable_keys         Key map to find a cheaper key in
  @param          ref_key
                * 0 <= key < MAX_KEY   - key number (hint) to start the search
                * -1                   - no key number provided
  @param          select_limit        LIMIT value, or HA_POS_ERROR if no limit
  @param [out]    new_key             Key number if success, otherwise undefined
  @param [out]    new_key_direction   Return -1 (reverse) or +1 if success,
                                      otherwise undefined
  @param [out]    new_select_limit    Return adjusted LIMIT
  @param [out]    new_used_key_parts  NULL by default, otherwise return number
                                      of new_key prefix columns if success
                                      or undefined if the function fails
  @param [out]  saved_best_key_parts  NULL by default, otherwise preserve the
                                      value for further use in
                                      ReverseIndexRangeScanIterator
  @param [out]    new_read_time       NULL by default, otherwise return the
                                      cost of access using new_key if success
                                      or undefined if the function fails

  @note
    This function takes into account table->quick_condition_rows statistic
    (that is calculated by JOIN::make_join_plan()).
    However, single table procedures such as mysql_update() and mysql_delete()
    never call JOIN::make_join_plan(), so they have to update it manually
    (@see get_index_for_order()).
    This function resets bits in TABLE::quick_keys for indexes with mixed
    ASC/DESC keyparts as range scan doesn't support range reordering
    required for them.
*/

bool test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER_with_src *order,
                              TABLE *table, Key_map usable_keys, int ref_key,
                              ha_rows select_limit, int *new_key,
                              int *new_key_direction, ha_rows *new_select_limit,
                              uint *new_used_key_parts,
                              uint *saved_best_key_parts,
                              double *new_read_time) {
  DBUG_TRACE;
  /*
    Check whether there is an index compatible with the given order
    usage of which is cheaper than usage of the ref_key index (ref_key>=0)
    or a table scan.
    It may be the case if ORDER/GROUP BY is used with LIMIT.
  */
  ha_rows best_select_limit = HA_POS_ERROR;
  JOIN *join = tab ? tab->join() : nullptr;
  if (join) ASSERT_BEST_REF_IN_JOIN_ORDER(join);
  uint nr;
  uint best_key_parts = 0;
  int best_key_direction = 0;
  ha_rows best_records = 0;
  double best_read_time = 0;
  double read_time;
  int best_key = -1;
  bool is_best_covering = false;
  double fanout = 1;
  const ha_rows table_records = table->file->stats.records;
  const bool group = join && join->grouped && order == &join->group_list;
  double refkey_rows_estimate =
      static_cast<double>(table->quick_condition_rows);
  const bool has_limit = (select_limit != HA_POS_ERROR);
  const join_type cur_access_method = tab ? tab->type() : JT_ALL;

  if (join) {
    read_time = tab->position()->read_cost;
    for (uint jt = tab->idx() + 1; jt < join->primary_tables; jt++) {
      POSITION *pos = join->best_ref[jt]->position();
      fanout *= pos->rows_fetched * pos->filter_effect;
      if (fanout < 0) break;  // fanout became 'unknown'
    }
  } else
    read_time = table->file->table_scan_cost().total_cost();

  /*
    Calculate the selectivity of the ref_key for REF_ACCESS. For
    RANGE_ACCESS we use table->quick_condition_rows.
  */
  if (ref_key >= 0 && cur_access_method == JT_REF) {
    if (table->quick_keys.is_set(ref_key))
      refkey_rows_estimate = static_cast<double>(table->quick_rows[ref_key]);
    else {
      const KEY *ref_keyinfo = table->key_info + ref_key;
      if (ref_keyinfo->has_records_per_key(tab->ref().key_parts - 1))
        refkey_rows_estimate =
            ref_keyinfo->records_per_key(tab->ref().key_parts - 1);
      else
        refkey_rows_estimate = 1.0;  // No index statistics
    }
    assert(refkey_rows_estimate >= 1.0);
  }

  for (nr = 0; nr < table->s->keys; nr++) {
    int direction = 0;
    uint used_key_parts;
    bool skip_quick;

    if (usable_keys.is_set(nr) &&
        (direction = test_if_order_by_key(order, table, nr, &used_key_parts,
                                          &skip_quick))) {
      const bool is_covering = table->covering_keys.is_set(nr) ||
                               (nr == table->s->primary_key &&
                                table->file->primary_key_is_clustered());
      // Don't allow backward scans on indexes with mixed ASC/DESC key parts
      if (skip_quick) table->quick_keys.clear_bit(nr);

      /*
        Don't use an index scan with ORDER BY without limit.
        For GROUP BY without limit always use index scan
        if there is a suitable index.
        Why we hold to this asymmetry hardly can be explained
        rationally. It's easy to demonstrate that using
        temporary table + filesort could be cheaper for grouping
        queries too.
      */
      if (is_covering || select_limit != HA_POS_ERROR ||
          (ref_key < 0 && (group || table->force_index_order))) {
        rec_per_key_t rec_per_key;
        KEY *keyinfo = table->key_info + nr;
        if (select_limit == HA_POS_ERROR) select_limit = table_records;
        if (group) {
          /*
            Used_key_parts can be larger than keyinfo->key_parts
            when using a secondary index clustered with a primary
            key (e.g. as in Innodb).
            See Bug #28591 for details.
          */
          rec_per_key =
              used_key_parts && used_key_parts <= actual_key_parts(keyinfo)
                  ? keyinfo->records_per_key(used_key_parts - 1)
                  : 1.0f;
          rec_per_key = std::max(rec_per_key, 1.0f);
          /*
            With a grouping query each group containing on average
            rec_per_key records produces only one row that will
            be included into the result set.
          */
          if (select_limit > table_records / rec_per_key)
            select_limit = table_records;
          else
            select_limit = (ha_rows)(select_limit * rec_per_key);
        }
        /*
          If tab=tk is not the last joined table tn then to get first
          L records from the result set we can expect to retrieve
          only L/fanout(tk,tn) where fanout(tk,tn) says how many
          rows in the record set on average will match each row tk.
          Usually our estimates for fanouts are too pessimistic.
          So the estimate for L/fanout(tk,tn) will be too optimistic
          and as result we'll choose an index scan when using ref/range
          access + filesort will be cheaper.
        */
        if (fanout == 0) {              // Would have been a division-by-zero
          select_limit = HA_POS_ERROR;  // -> 'infinite'
        } else if (fanout >= 0) {       // 'fanout' not unknown
          const double new_limit = max(select_limit / fanout, 1.0);
          if (new_limit >= static_cast<double>(HA_POS_ERROR)) {
            select_limit = HA_POS_ERROR;
          } else {
            select_limit = new_limit;
          }
        }
        /*
          We assume that each of the tested indexes is not correlated
          with ref_key. Thus, to select first N records we have to scan
          N/selectivity(ref_key) index entries.
          selectivity(ref_key) = #scanned_records/#table_records =
          refkey_rows_estimate/table_records.
          In any case we can't select more than #table_records.
          N/(refkey_rows_estimate/table_records) > table_records
          <=> N > refkey_rows_estimate.
          Neither should it be possible to select more than #table_records
          rows from refkey_rows_estimate.
         */
        if (select_limit > refkey_rows_estimate)
          select_limit = table_records;
        else if (table_records >= refkey_rows_estimate)
          select_limit = (ha_rows)(select_limit * (double)table_records /
                                   refkey_rows_estimate);
        rec_per_key =
            keyinfo->records_per_key(keyinfo->user_defined_key_parts - 1);
        rec_per_key = std::max(rec_per_key, 1.0f);
        /*
          Here we take into account the fact that rows are
          accessed in sequences rec_per_key records in each.
          Rows in such a sequence are supposed to be ordered
          by rowid/primary key. When reading the data
          in a sequence we'll touch not more pages than the
          table file contains.
          TODO. Use the formula for a disk sweep sequential access
          to calculate the cost of accessing data rows for one
          index entry.
        */
        const Cost_estimate table_scan_time = table->file->table_scan_cost();
        const double index_scan_time =
            select_limit / rec_per_key *
            min<double>(table->file->page_read_cost(nr, rec_per_key),
                        table_scan_time.total_cost());

        /*
          Switch to index that gives order if its scan time is smaller than
          read_time of current chosen access method. In addition, if the
          current chosen access method is index scan or table scan, always
          switch to the index that gives order when it is covering or when
          force index order or group by is present.
        */
        if (((cur_access_method == JT_ALL ||
              cur_access_method == JT_INDEX_SCAN) &&
             (is_covering || group || table->force_index_order)) ||
            index_scan_time < read_time) {
          ha_rows quick_records = table_records;
          const ha_rows refkey_select_limit =
              (ref_key >= 0 && table->covering_keys.is_set(ref_key))
                  ? static_cast<ha_rows>(refkey_rows_estimate)
                  : HA_POS_ERROR;

          if ((is_best_covering && !is_covering) ||
              (is_covering && refkey_select_limit < select_limit))
            continue;
          if (table->quick_keys.is_set(nr))
            quick_records = table->quick_rows[nr];
          if (best_key < 0 ||
              (select_limit <= min(quick_records, best_records)
                   ? keyinfo->user_defined_key_parts < best_key_parts
                   : quick_records < best_records) ||
              // We assume forward scan is faster than backward even if the
              // key is longer. This should be taken into account in cost
              // calculation.
              direction > best_key_direction) {
            best_key = nr;
            best_key_parts = keyinfo->user_defined_key_parts;
            if (saved_best_key_parts) *saved_best_key_parts = used_key_parts;
            best_read_time = index_scan_time;
            best_records = quick_records;
            is_best_covering = is_covering;
            best_key_direction = direction;
            best_select_limit = select_limit;
          }
        }
      }
    }
  }

  if (best_key < 0 || best_key == ref_key) return false;

  *new_key = best_key;
  *new_key_direction = best_key_direction;
  *new_select_limit = has_limit ? best_select_limit : table_records;
  if (new_used_key_parts != nullptr) *new_used_key_parts = best_key_parts;
  if (new_read_time) *new_read_time = best_read_time;

  return true;
}

/**
  Find a key to apply single table UPDATE/DELETE by a given ORDER

  @param       order           Linked list of ORDER BY arguments
  @param       table           Table to find a key
  @param       limit           LIMIT clause parameter
  @param       range_scan      Range scan used for this table, if any
  @param [out] need_sort       true if filesort needed
  @param [out] reverse
    true if the key is reversed again given ORDER (undefined if key == MAX_KEY)

  @return
    - MAX_KEY if no key found                        (need_sort == true)
    - MAX_KEY if quick select result order is OK     (need_sort == false)
    - key number (either index scan or quick select) (need_sort == false)

  @note
    Side effects:
    - may deallocate or deallocate and replace select->quick;
    - may set table->quick_condition_rows and table->quick_rows[...]
      to table->file->stats.records.
*/

uint get_index_for_order(ORDER_with_src *order, TABLE *table, ha_rows limit,
                         AccessPath *range_scan, bool *need_sort,
                         bool *reverse) {
  if (range_scan &&
      unique_key_range(range_scan)) {  // Single row select (always
                                       // "ordered"): Ok to use with
                                       // key field UPDATE
    *need_sort = false;
    /*
      Returning of MAX_KEY here prevents updating of used_key_is_modified
      in mysql_update(). Use AccessPath "as is".
    */
    return MAX_KEY;
  }

  if (order->empty()) {
    *need_sort = false;
    if (range_scan)
      return used_index(range_scan);  // index or MAX_KEY, use AccessPath as is
    else
      return table->file
          ->key_used_on_scan;  // MAX_KEY or index for some engines
  }

  if (!is_simple_order(order->order))  // just to cut further expensive checks
  {
    *need_sort = true;
    return MAX_KEY;
  }

  if (range_scan) {
    if (used_index(range_scan) == MAX_KEY) {
      *need_sort = true;
      return MAX_KEY;
    }

    uint used_key_parts;
    bool skip_path;
    switch (test_if_order_by_key(order, table, used_index(range_scan),
                                 &used_key_parts, &skip_path)) {
      case 1:  // desired order
        *need_sort = false;
        return used_index(range_scan);
      case 0:  // unacceptable order
        *need_sort = true;
        return MAX_KEY;
      case -1:  // desired order, but opposite direction
      {
        if (!skip_path && !make_reverse(used_key_parts, range_scan)) {
          *need_sort = false;
          return used_index(range_scan);
        } else {
          *need_sort = true;
          return MAX_KEY;
        }
      }
    }
    assert(0);
  } else if (limit != HA_POS_ERROR) {  // check if some index scan & LIMIT is
                                       // more efficient than filesort

    /*
      Update quick_condition_rows since single table UPDATE/DELETE procedures
      don't call JOIN::make_join_plan() and leave this variable uninitialized.
    */
    table->quick_condition_rows = table->file->stats.records;

    int key, direction;
    if (test_if_cheaper_ordering(nullptr, order, table,
                                 table->keys_in_use_for_order_by, -1, limit,
                                 &key, &direction, &limit)) {
      *need_sort = false;
      *reverse = (direction < 0);
      return key;
    }
  }
  *need_sort = true;
  return MAX_KEY;
}

/**
  Returns number of key parts depending on
  OPTIMIZER_SWITCH_USE_INDEX_EXTENSIONS flag.

  @param  key_info  pointer to KEY structure

  @return number of key parts.
*/

uint actual_key_parts(const KEY *key_info) {
  return current_thd->optimizer_switch_flag(
             OPTIMIZER_SWITCH_USE_INDEX_EXTENSIONS)
             ? key_info->actual_key_parts
             : key_info->user_defined_key_parts;
}

uint actual_key_flags(const KEY *key_info) {
  return current_thd->optimizer_switch_flag(
             OPTIMIZER_SWITCH_USE_INDEX_EXTENSIONS)
             ? key_info->actual_flags
             : key_info->flags;
}

join_type calc_join_type(AccessPath *path) {
  switch (path->type) {
    case AccessPath::INDEX_RANGE_SCAN:
    case AccessPath::INDEX_SKIP_SCAN:
    case AccessPath::GROUP_INDEX_SKIP_SCAN:
      return JT_RANGE;
    case AccessPath::INDEX_MERGE:
    case AccessPath::ROWID_INTERSECTION:
    case AccessPath::ROWID_UNION:
      return JT_INDEX_MERGE;
    default:
      assert(false);
      return JT_RANGE;
  }
}

/**
  @} (end of group Query_Optimizer)
*/