2195 lines
72 KiB
C++
2195 lines
72 KiB
C++
/* Copyright (c) 2000, 2024, Oracle and/or its affiliates.
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License, version 2.0,
|
|
as published by the Free Software Foundation.
|
|
|
|
This program is designed to work with certain software (including
|
|
but not limited to OpenSSL) that is licensed under separate terms,
|
|
as designated in a particular file or component or in included license
|
|
documentation. The authors of MySQL hereby grant you an additional
|
|
permission to link the program and your derivative works with the
|
|
separately licensed software that they have either included with
|
|
the program or referenced in the documentation.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License, version 2.0, for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
|
|
|
|
#include "sql/range_optimizer/tree.h"
|
|
|
|
#include <algorithm>
|
|
#include <set>
|
|
#include <utility>
|
|
|
|
#include "m_string.h"
|
|
#include "memory_debugging.h"
|
|
#include "my_dbug.h"
|
|
#include "my_sqlcommand.h"
|
|
#include "mysql/components/services/log_builtins.h"
|
|
#include "mysql/strings/m_ctype.h"
|
|
#include "mysqld_error.h"
|
|
#include "sql/handler.h"
|
|
#include "sql/key.h"
|
|
#include "sql/key_spec.h"
|
|
#include "sql/range_optimizer/internal.h"
|
|
#include "sql/range_optimizer/range_opt_param.h"
|
|
#include "sql/range_optimizer/range_optimizer.h"
|
|
#include "sql/sql_class.h"
|
|
#include "sql/sql_lex.h"
|
|
#include "sql/system_variables.h"
|
|
#include "sql/table.h"
|
|
#include "sql_string.h"
|
|
|
|
using std::max;
|
|
using std::min;
|
|
|
|
// Note: tree1 and tree2 are not usable by themselves after tree_and() or
|
|
// tree_or().
|
|
SEL_TREE *tree_and(RANGE_OPT_PARAM *param, SEL_TREE *tree1, SEL_TREE *tree2);
|
|
SEL_TREE *tree_or(RANGE_OPT_PARAM *param, bool remove_jump_scans,
|
|
SEL_TREE *tree1, SEL_TREE *tree2);
|
|
SEL_ROOT *key_or(RANGE_OPT_PARAM *param, SEL_ROOT *key1, SEL_ROOT *key2);
|
|
SEL_ROOT *key_and(RANGE_OPT_PARAM *param, SEL_ROOT *key1, SEL_ROOT *key2);
|
|
|
|
SEL_ARG *rb_delete_fixup(SEL_ARG *root, SEL_ARG *key, SEL_ARG *par);
|
|
#ifndef NDEBUG
|
|
int test_rb_tree(SEL_ARG *element, SEL_ARG *parent);
|
|
#endif
|
|
|
|
static bool eq_tree(const SEL_ROOT *a, const SEL_ROOT *b);
|
|
static bool eq_tree(const SEL_ARG *a, const SEL_ARG *b);
|
|
static bool get_range(SEL_ARG **e1, SEL_ARG **e2, const SEL_ROOT *root1);
|
|
|
|
using opt_range::null_element;
|
|
|
|
/*
|
|
Returns a number of keypart values appended to the key buffer
|
|
for min key and max key. This function is used by both Range
|
|
Analysis and Partition pruning. For partition pruning we have
|
|
to ensure that we don't store also subpartition fields. Thus
|
|
we have to stop at the last partition part and not step into
|
|
the subpartition fields. For Range Analysis we set last_part
|
|
to MAX_KEY which we should never reach.
|
|
|
|
Note: Caller of this function should take care of sending the
|
|
correct flags and correct key to be stored into. In case of
|
|
ascending indexes, store_min_key() gets called to store the
|
|
min_value to range start_key. In case of descending indexes, it's
|
|
called for storing min_value to range end_key.
|
|
*/
|
|
|
|
int SEL_ROOT::store_min_key(KEY_PART *key, uchar **range_key,
|
|
uint *range_key_flag, uint last_part,
|
|
bool start_key) {
|
|
SEL_ARG *key_tree = root->first();
|
|
uint res = key_tree->store_min_value(key[key_tree->part].store_length,
|
|
range_key, *range_key_flag);
|
|
// We've stored min_value, so append min_flag
|
|
*range_key_flag |= key_tree->min_flag;
|
|
if (key_tree->next_key_part &&
|
|
key_tree->next_key_part->type == SEL_ROOT::Type::KEY_RANGE &&
|
|
key_tree->part != last_part &&
|
|
key_tree->next_key_part->root->part == key_tree->part + 1 &&
|
|
!(*range_key_flag & (NO_MIN_RANGE | NEAR_MIN))) {
|
|
const bool asc = key_tree->next_key_part->root->is_ascending;
|
|
if ((start_key && asc) || (!start_key && !asc))
|
|
res += key_tree->next_key_part->store_min_key(
|
|
key, range_key, range_key_flag, last_part, start_key);
|
|
else {
|
|
uint tmp_flag = invert_min_flag(*range_key_flag);
|
|
res += key_tree->next_key_part->store_max_key(key, range_key, &tmp_flag,
|
|
last_part, start_key);
|
|
*range_key_flag = invert_max_flag(tmp_flag);
|
|
}
|
|
}
|
|
return res;
|
|
}
|
|
|
|
/*
|
|
Returns the number of keypart values appended to the key buffer.
|
|
|
|
Note: Caller of this function should take care of sending the
|
|
correct flags and correct key to be stored into. In case of
|
|
ascending indexes, store_max_key() gets called while storing the
|
|
max_value into range end_key. In case of descending indexes,
|
|
its max_value to range start_key.
|
|
*/
|
|
|
|
int SEL_ROOT::store_max_key(KEY_PART *key, uchar **range_key,
|
|
uint *range_key_flag, uint last_part,
|
|
bool start_key) {
|
|
SEL_ARG *key_tree = root->last();
|
|
uint res = key_tree->store_max_value(key[key_tree->part].store_length,
|
|
range_key, *range_key_flag);
|
|
// We've stored max value, so return max_flag
|
|
(*range_key_flag) |= key_tree->max_flag;
|
|
if (key_tree->next_key_part &&
|
|
key_tree->next_key_part->type == SEL_ROOT::Type::KEY_RANGE &&
|
|
key_tree->part != last_part &&
|
|
key_tree->next_key_part->root->part == key_tree->part + 1 &&
|
|
!(*range_key_flag & (NO_MAX_RANGE | NEAR_MAX))) {
|
|
const bool asc = key_tree->next_key_part->root->is_ascending;
|
|
if ((!start_key && asc) || (start_key && !asc))
|
|
res += key_tree->next_key_part->store_max_key(
|
|
key, range_key, range_key_flag, last_part, start_key);
|
|
else {
|
|
uint tmp_flag = invert_max_flag(*range_key_flag);
|
|
res += key_tree->next_key_part->store_min_key(key, range_key, &tmp_flag,
|
|
last_part, start_key);
|
|
*range_key_flag = invert_min_flag(tmp_flag);
|
|
}
|
|
}
|
|
return res;
|
|
}
|
|
|
|
void SEL_ROOT::free_tree() {
|
|
if (use_count == 0) {
|
|
for (SEL_ARG *pos = root->first(); pos; pos = pos->next) {
|
|
SEL_ROOT *root = pos->release_next_key_part();
|
|
if (root) root->free_tree();
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
Helper function to compare two SEL_ROOTs.
|
|
*/
|
|
static bool all_same(const SEL_ROOT *sa1, const SEL_ROOT *sa2) {
|
|
if (sa1 == nullptr && sa2 == nullptr) return true;
|
|
if ((sa1 != nullptr && sa2 == nullptr) || (sa1 == nullptr && sa2 != nullptr))
|
|
return false;
|
|
if (sa1->type == SEL_ROOT::Type::KEY_RANGE &&
|
|
sa2->type == SEL_ROOT::Type::KEY_RANGE) {
|
|
const SEL_ARG *sa1_arg = sa1->root->first();
|
|
const SEL_ARG *sa2_arg = sa2->root->first();
|
|
for (; sa1_arg && sa2_arg && sa1_arg->is_same(sa2_arg);
|
|
sa1_arg = sa1_arg->next, sa2_arg = sa2_arg->next)
|
|
;
|
|
if (sa1_arg || sa2_arg) return false;
|
|
return true;
|
|
} else
|
|
return sa1->type == sa2->type;
|
|
}
|
|
|
|
SEL_TREE::SEL_TREE(SEL_TREE *arg, RANGE_OPT_PARAM *param)
|
|
: keys(param->temp_mem_root, param->keys), n_ror_scans(0) {
|
|
keys_map = arg->keys_map;
|
|
type = arg->type;
|
|
for (uint idx = 0; idx < param->keys; idx++) {
|
|
if (arg->keys[idx]) {
|
|
set_key(idx, arg->keys[idx]->clone_tree(param));
|
|
if (!keys[idx]) break;
|
|
} else
|
|
set_key(idx, nullptr);
|
|
}
|
|
|
|
List_iterator<SEL_IMERGE> it(arg->merges);
|
|
for (SEL_IMERGE *el = it++; el; el = it++) {
|
|
SEL_IMERGE *merge = new (param->temp_mem_root) SEL_IMERGE(el, param);
|
|
if (!merge || merge->trees.empty() || param->has_errors()) {
|
|
merges.clear();
|
|
return;
|
|
}
|
|
merges.push_back(merge);
|
|
}
|
|
|
|
/*
|
|
SEL_TREEs are only created by get_mm_tree() (and functions called
|
|
by get_mm_tree()). Index intersection is checked after
|
|
get_mm_tree() has constructed all ranges. In other words, there
|
|
should not be any ROR scans to copy when this ctor is called.
|
|
*/
|
|
assert(n_ror_scans == 0);
|
|
}
|
|
|
|
/*
|
|
Perform AND operation on two index_merge lists and store result in *im1.
|
|
*/
|
|
|
|
inline void imerge_list_and_list(List<SEL_IMERGE> *im1, List<SEL_IMERGE> *im2) {
|
|
im1->concat(im2);
|
|
}
|
|
|
|
/*
|
|
Perform OR operation on 2 index_merge lists, storing result in first list.
|
|
|
|
NOTES
|
|
The following conversion is implemented:
|
|
(a_1 &&...&& a_N)||(b_1 &&...&& b_K) = AND_i,j(a_i || b_j) =>
|
|
=> (a_1||b_1).
|
|
|
|
i.e. all conjuncts except the first one are currently dropped.
|
|
This is done to avoid producing N*K ways to do index_merge.
|
|
|
|
If (a_1||b_1) produce a condition that is always true, NULL is returned
|
|
and index_merge is discarded (while it is actually possible to try
|
|
harder).
|
|
|
|
As a consequence of this, choice of keys to do index_merge read may depend
|
|
on the order of conditions in WHERE part of the query.
|
|
|
|
RETURN
|
|
0 OK, result is stored in *im1
|
|
other Error, both passed lists are unusable
|
|
*/
|
|
|
|
static int imerge_list_or_list(RANGE_OPT_PARAM *param, bool remove_jump_scans,
|
|
List<SEL_IMERGE> *im1, List<SEL_IMERGE> *im2) {
|
|
SEL_IMERGE *imerge = im1->head();
|
|
im1->clear();
|
|
im1->push_back(imerge);
|
|
|
|
return imerge->or_sel_imerge_with_checks(param, remove_jump_scans,
|
|
im2->head());
|
|
}
|
|
|
|
/*
|
|
Perform OR operation on index_merge list and key tree.
|
|
|
|
RETURN
|
|
false OK, result is stored in *im1.
|
|
true Error
|
|
*/
|
|
|
|
static bool imerge_list_or_tree(RANGE_OPT_PARAM *param, bool remove_jump_scans,
|
|
List<SEL_IMERGE> *im1, SEL_TREE *tree) {
|
|
DBUG_TRACE;
|
|
SEL_IMERGE *imerge;
|
|
List_iterator<SEL_IMERGE> it(*im1);
|
|
|
|
uint remaining_trees = im1->elements;
|
|
while ((imerge = it++)) {
|
|
SEL_TREE *or_tree;
|
|
/*
|
|
Need to make a copy of 'tree' for all but the last OR operation
|
|
because or_sel_tree_with_checks() may change it.
|
|
*/
|
|
if (--remaining_trees == 0)
|
|
or_tree = tree;
|
|
else {
|
|
or_tree = new (param->temp_mem_root) SEL_TREE(tree, param);
|
|
if (!or_tree || param->has_errors()) return true;
|
|
if (or_tree->keys_map.is_clear_all() && or_tree->merges.is_empty())
|
|
return false;
|
|
}
|
|
|
|
int result_or =
|
|
imerge->or_sel_tree_with_checks(param, remove_jump_scans, or_tree);
|
|
if (result_or == 1)
|
|
it.remove();
|
|
else if (result_or == -1)
|
|
return true;
|
|
}
|
|
assert(remaining_trees == 0);
|
|
return im1->is_empty();
|
|
}
|
|
|
|
SEL_ARG::SEL_ARG(SEL_ARG &arg)
|
|
: min_flag(arg.min_flag),
|
|
max_flag(arg.max_flag),
|
|
maybe_flag(arg.maybe_flag),
|
|
part(arg.part),
|
|
rkey_func_flag(arg.rkey_func_flag),
|
|
field(arg.field),
|
|
min_value(arg.min_value),
|
|
max_value(arg.max_value),
|
|
left(null_element),
|
|
right(null_element),
|
|
next(nullptr),
|
|
prev(nullptr),
|
|
next_key_part(arg.next_key_part),
|
|
is_ascending(arg.is_ascending) {
|
|
if (next_key_part) ++next_key_part->use_count;
|
|
}
|
|
|
|
SEL_ARG::SEL_ARG(Field *f, const uchar *min_value_arg,
|
|
const uchar *max_value_arg, bool asc)
|
|
: part(0),
|
|
rkey_func_flag(HA_READ_INVALID),
|
|
field(f),
|
|
min_value(const_cast<uchar *>(min_value_arg)),
|
|
max_value(const_cast<uchar *>(max_value_arg)),
|
|
left(null_element),
|
|
right(null_element),
|
|
next(nullptr),
|
|
prev(nullptr),
|
|
parent(nullptr),
|
|
color(BLACK),
|
|
is_ascending(asc) {}
|
|
|
|
SEL_ARG::SEL_ARG(Field *field_, uint8 part_, uchar *min_value_,
|
|
uchar *max_value_, uint8 min_flag_, uint8 max_flag_,
|
|
bool maybe_flag_, bool asc, ha_rkey_function gis_flag)
|
|
: min_flag(min_flag_),
|
|
max_flag(max_flag_),
|
|
maybe_flag(maybe_flag_),
|
|
part(part_),
|
|
rkey_func_flag(gis_flag),
|
|
field(field_),
|
|
min_value(min_value_),
|
|
max_value(max_value_),
|
|
left(null_element),
|
|
right(null_element),
|
|
next(nullptr),
|
|
prev(nullptr),
|
|
parent(nullptr),
|
|
color(BLACK),
|
|
is_ascending(asc) {}
|
|
|
|
SEL_ARG *SEL_ARG::clone(RANGE_OPT_PARAM *param, SEL_ARG *new_parent,
|
|
SEL_ARG **next_arg) {
|
|
SEL_ARG *tmp;
|
|
|
|
if (param->has_errors()) return nullptr;
|
|
|
|
if (!(tmp = new (param->temp_mem_root)
|
|
SEL_ARG(field, part, min_value, max_value, min_flag, max_flag,
|
|
maybe_flag, is_ascending,
|
|
min_flag & GEOM_FLAG ? rkey_func_flag : HA_READ_INVALID)))
|
|
return nullptr; // OOM
|
|
tmp->parent = new_parent;
|
|
tmp->set_next_key_part(next_key_part);
|
|
if (left == null_element || left == nullptr) {
|
|
tmp->left = left;
|
|
} else {
|
|
if (!(tmp->left = left->clone(param, tmp, next_arg)))
|
|
return nullptr; // OOM
|
|
}
|
|
|
|
tmp->prev = *next_arg; // Link into next/prev chain
|
|
(*next_arg)->next = tmp;
|
|
(*next_arg) = tmp;
|
|
|
|
if (right == null_element || right == nullptr) {
|
|
tmp->right = right;
|
|
} else {
|
|
if (!(tmp->right = right->clone(param, tmp, next_arg)))
|
|
return nullptr; // OOM
|
|
}
|
|
tmp->color = color;
|
|
return tmp;
|
|
}
|
|
|
|
/**
|
|
This gives the first SEL_ARG in the interval list, and the minimal element
|
|
in the red-black tree
|
|
|
|
@return
|
|
SEL_ARG first SEL_ARG in the interval list
|
|
*/
|
|
SEL_ARG *SEL_ARG::first() {
|
|
SEL_ARG *next_arg = this;
|
|
if (!next_arg->left) return nullptr; // MAYBE_KEY
|
|
while (next_arg->left != null_element) next_arg = next_arg->left;
|
|
return next_arg;
|
|
}
|
|
|
|
const SEL_ARG *SEL_ARG::first() const {
|
|
return const_cast<SEL_ARG *>(this)->first();
|
|
}
|
|
|
|
SEL_ARG *SEL_ARG::last() {
|
|
SEL_ARG *next_arg = this;
|
|
if (!next_arg->right) return nullptr; // MAYBE_KEY
|
|
while (next_arg->right != null_element) next_arg = next_arg->right;
|
|
return next_arg;
|
|
}
|
|
|
|
/*
|
|
Check if a compare is ok, when one takes ranges in account
|
|
Returns -2 or 2 if the ranges where 'joined' like < 2 and >= 2
|
|
*/
|
|
|
|
int sel_cmp(Field *field, uchar *a, uchar *b, uint8 a_flag, uint8 b_flag) {
|
|
int cmp;
|
|
/* First check if there was a compare to a min or max element */
|
|
if (a_flag & (NO_MIN_RANGE | NO_MAX_RANGE)) {
|
|
if ((a_flag & (NO_MIN_RANGE | NO_MAX_RANGE)) ==
|
|
(b_flag & (NO_MIN_RANGE | NO_MAX_RANGE)))
|
|
return 0;
|
|
return (a_flag & NO_MIN_RANGE) ? -1 : 1;
|
|
}
|
|
if (b_flag & (NO_MIN_RANGE | NO_MAX_RANGE))
|
|
return (b_flag & NO_MIN_RANGE) ? 1 : -1;
|
|
|
|
if (field->is_nullable()) // If null is part of key
|
|
{
|
|
if (*a != *b) {
|
|
return *a ? -1 : 1;
|
|
}
|
|
if (*a) goto end; // NULL where equal
|
|
a++;
|
|
b++; // Skip NULL marker
|
|
}
|
|
cmp = field->key_cmp(a, b);
|
|
if (cmp) return cmp < 0 ? -1 : 1; // The values differed
|
|
|
|
// Check if the compared equal arguments was defined with open/closed range
|
|
end:
|
|
if (a_flag & (NEAR_MIN | NEAR_MAX)) {
|
|
if ((a_flag & (NEAR_MIN | NEAR_MAX)) == (b_flag & (NEAR_MIN | NEAR_MAX)))
|
|
return 0;
|
|
if (!(b_flag & (NEAR_MIN | NEAR_MAX))) return (a_flag & NEAR_MIN) ? 2 : -2;
|
|
return (a_flag & NEAR_MIN) ? 1 : -1;
|
|
}
|
|
if (b_flag & (NEAR_MIN | NEAR_MAX)) return (b_flag & NEAR_MIN) ? -2 : 2;
|
|
return 0; // The elements where equal
|
|
}
|
|
|
|
namespace {
|
|
|
|
size_t count_elements(const SEL_ARG *arg) {
|
|
size_t elements = 1;
|
|
assert(arg->left);
|
|
assert(arg->right);
|
|
if (arg->left && arg->left != null_element)
|
|
elements += count_elements(arg->left);
|
|
if (arg->right && arg->right != null_element)
|
|
elements += count_elements(arg->right);
|
|
return elements;
|
|
}
|
|
|
|
} // Namespace
|
|
|
|
SEL_ROOT::SEL_ROOT(SEL_ARG *root_arg)
|
|
: type(Type::KEY_RANGE),
|
|
root(root_arg),
|
|
elements(count_elements(root_arg)) {}
|
|
|
|
SEL_ROOT::SEL_ROOT(MEM_ROOT *mem_root, Type type_arg)
|
|
: type(type_arg), root(new(mem_root) SEL_ARG()), elements(1) {
|
|
assert(type_arg == Type::MAYBE_KEY || type_arg == Type::IMPOSSIBLE);
|
|
root->make_root();
|
|
if (type_arg == Type::MAYBE_KEY) {
|
|
// See todo for left/right pointers
|
|
root->left = root->right = nullptr;
|
|
}
|
|
}
|
|
|
|
SEL_ROOT *SEL_ROOT::clone_tree(RANGE_OPT_PARAM *param) const {
|
|
/*
|
|
Only SEL_ROOTs of type KEY_RANGE has any elements that need to be cloned.
|
|
For other types, just create a new SEL_ROOT object.
|
|
*/
|
|
if (type != Type::KEY_RANGE)
|
|
return new (param->temp_mem_root) SEL_ROOT(param->temp_mem_root, type);
|
|
|
|
SEL_ARG tmp_link, *next_arg, *new_root;
|
|
SEL_ROOT *new_tree;
|
|
next_arg = &tmp_link;
|
|
|
|
// Clone the underlying SEL_ARG tree, starting from the root node.
|
|
if (!(new_root = root->clone(param, (SEL_ARG *)nullptr, &next_arg)) ||
|
|
(param && param->has_errors()))
|
|
return nullptr;
|
|
|
|
// Make the SEL_ROOT itself.
|
|
if (!(new_tree = new (param->temp_mem_root) SEL_ROOT(new_root)))
|
|
return nullptr;
|
|
new_tree->elements = elements;
|
|
next_arg->next = nullptr; // Fix last link
|
|
tmp_link.next->prev = nullptr; // Fix first link
|
|
new_tree->use_count = 0;
|
|
return new_tree;
|
|
}
|
|
|
|
SEL_TREE *tree_and(RANGE_OPT_PARAM *param, SEL_TREE *tree1, SEL_TREE *tree2) {
|
|
DBUG_TRACE;
|
|
|
|
if (param->has_errors()) return nullptr;
|
|
|
|
if (tree1 == nullptr) {
|
|
if (tree2 != nullptr) {
|
|
tree2->inexact = true;
|
|
}
|
|
return tree2;
|
|
}
|
|
if (tree2 == nullptr) {
|
|
if (tree1 != nullptr) {
|
|
tree1->inexact = true;
|
|
}
|
|
return tree1;
|
|
}
|
|
if (tree1->type == SEL_TREE::IMPOSSIBLE) {
|
|
return tree1;
|
|
}
|
|
if (tree2->type == SEL_TREE::IMPOSSIBLE) {
|
|
return tree2;
|
|
}
|
|
if (tree2->type == SEL_TREE::ALWAYS) {
|
|
tree1->inexact |= tree2->inexact;
|
|
return tree1;
|
|
}
|
|
if (tree1->type == SEL_TREE::ALWAYS) {
|
|
tree2->inexact |= tree1->inexact;
|
|
return tree2;
|
|
}
|
|
|
|
dbug_print_tree("tree1", tree1, param);
|
|
dbug_print_tree("tree2", tree2, param);
|
|
|
|
Key_map result_keys;
|
|
|
|
/* Join the trees key per key */
|
|
for (uint idx = 0; idx < param->keys; idx++) {
|
|
SEL_ROOT *key1 = tree1->release_key(idx);
|
|
SEL_ROOT *key2 = tree2->release_key(idx);
|
|
|
|
if (key1 != nullptr || key2 != nullptr) {
|
|
if (key1 == nullptr || key2 == nullptr) {
|
|
// If AND-ing two trees together, and one has an expression over a
|
|
// different index from the other, we cannot guarantee that the entire
|
|
// expression is exact if that index is chosen. (The only time this
|
|
// really matters is when there's an AND within an OR; only the
|
|
// hypergraph optimizer cares about the inexact flag, and it does its
|
|
// own splitting of top-level ANDs.)
|
|
tree1->inexact = true;
|
|
}
|
|
SEL_ROOT *new_key = key_and(param, key1, key2);
|
|
tree1->set_key(idx, new_key);
|
|
if (new_key) {
|
|
if (new_key->type == SEL_ROOT::Type::IMPOSSIBLE) {
|
|
tree1->type = SEL_TREE::IMPOSSIBLE;
|
|
return tree1;
|
|
}
|
|
result_keys.set_bit(idx);
|
|
#ifndef NDEBUG
|
|
/*
|
|
Do not test use_count if there is a large range tree created.
|
|
It takes too much time to traverse the tree.
|
|
*/
|
|
if (param->temp_mem_root->allocated_size() < 2097152)
|
|
new_key->test_use_count(new_key);
|
|
#endif
|
|
}
|
|
}
|
|
}
|
|
tree1->keys_map = result_keys;
|
|
tree1->inexact |= tree2->inexact;
|
|
|
|
/* ok, both trees are index_merge trees */
|
|
imerge_list_and_list(&tree1->merges, &tree2->merges);
|
|
// An index merge is a union/OR, so it cannot exactly represent an
|
|
// intersection/AND.
|
|
tree1->inexact |= !tree1->merges.is_empty();
|
|
|
|
return tree1;
|
|
}
|
|
|
|
/*
|
|
Check if two SEL_TREES can be combined into one (i.e. a single key range
|
|
read can be constructed for "cond_of_tree1 OR cond_of_tree2" ) without
|
|
using index_merge.
|
|
*/
|
|
|
|
bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2,
|
|
RANGE_OPT_PARAM *param) {
|
|
Key_map common_keys = tree1->keys_map;
|
|
DBUG_TRACE;
|
|
common_keys.intersect(tree2->keys_map);
|
|
|
|
dbug_print_tree("tree1", tree1, param);
|
|
dbug_print_tree("tree2", tree2, param);
|
|
|
|
if (common_keys.is_clear_all()) return false;
|
|
|
|
/* trees have a common key, check if they refer to same key part */
|
|
for (uint key_no = 0; key_no < param->keys; key_no++) {
|
|
if (common_keys.is_set(key_no)) {
|
|
const SEL_ROOT *key1 = tree1->keys[key_no];
|
|
const SEL_ROOT *key2 = tree2->keys[key_no];
|
|
/* GIS_OPTIMIZER_FIXME: temp solution. key1 could be all nulls */
|
|
if (key1 && key2 && key1->root->part == key2->root->part) return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
Remove the trees that are not suitable for record retrieval.
|
|
SYNOPSIS
|
|
param Range analysis parameter
|
|
tree Tree to be processed, tree->type is KEY
|
|
|
|
DESCRIPTION
|
|
This function walks through tree->keys[] and removes the SEL_ARG* trees
|
|
that are not "maybe" trees (*) and cannot be used to construct quick range
|
|
selects.
|
|
(*) - have type MAYBE or MAYBE_KEY. Perhaps we should remove trees of
|
|
these types here as well.
|
|
|
|
A SEL_ARG* tree cannot be used to construct quick select if it has
|
|
tree->part != 0. (e.g. it could represent "keypart2 < const").
|
|
|
|
WHY THIS FUNCTION IS NEEDED
|
|
|
|
Normally we allow construction of SEL_TREE objects that have SEL_ARG
|
|
trees that do not allow quick range select construction. For example for
|
|
" keypart1=1 AND keypart2=2 " the execution will proceed as follows:
|
|
tree1= SEL_TREE { SEL_ARG{keypart1=1} }
|
|
tree2= SEL_TREE { SEL_ARG{keypart2=2} } -- can't make quick range select
|
|
from this
|
|
call tree_and(tree1, tree2) -- this joins SEL_ARGs into a usable SEL_ARG
|
|
tree.
|
|
|
|
There is an exception though: when we construct index_merge SEL_TREE,
|
|
any SEL_ARG* tree that cannot be used to construct quick range select can
|
|
be removed, because current range analysis code doesn't provide any way
|
|
that tree could be later combined with another tree.
|
|
Consider an example: we should not construct
|
|
st1 = SEL_TREE {
|
|
merges = SEL_IMERGE {
|
|
SEL_TREE(t.key1part1 = 1),
|
|
SEL_TREE(t.key2part2 = 2) -- (*)
|
|
}
|
|
};
|
|
because
|
|
- (*) cannot be used to construct quick range select,
|
|
- There is no execution path that would cause (*) to be converted to
|
|
a tree that could be used.
|
|
|
|
The latter is easy to verify: first, notice that the only way to convert
|
|
(*) into a usable tree is to call tree_and(something, (*)).
|
|
|
|
Second look at what tree_and/tree_or function would do when passed a
|
|
SEL_TREE that has the structure like st1 tree has, and conclude that
|
|
tree_and(something, (*)) will not be called.
|
|
|
|
RETURN
|
|
0 Ok, some suitable trees left
|
|
1 No tree->keys[] left.
|
|
*/
|
|
|
|
static bool remove_nonrange_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree) {
|
|
bool res = false;
|
|
for (uint i = 0; i < param->keys; i++) {
|
|
if (tree->keys[i]) {
|
|
if (tree->keys[i]->root->part) {
|
|
tree->keys[i] = nullptr;
|
|
tree->keys_map.clear_bit(i);
|
|
} else
|
|
res = true;
|
|
}
|
|
}
|
|
return !res;
|
|
}
|
|
|
|
SEL_TREE *tree_or(RANGE_OPT_PARAM *param, bool remove_jump_scans,
|
|
SEL_TREE *tree1, SEL_TREE *tree2) {
|
|
DBUG_TRACE;
|
|
|
|
if (param->has_errors()) return nullptr;
|
|
|
|
if (!tree1 || !tree2) return nullptr;
|
|
tree1->inexact = tree2->inexact = tree1->inexact | tree2->inexact;
|
|
if (tree1->type == SEL_TREE::IMPOSSIBLE || tree2->type == SEL_TREE::ALWAYS)
|
|
return tree2;
|
|
if (tree2->type == SEL_TREE::IMPOSSIBLE || tree1->type == SEL_TREE::ALWAYS)
|
|
return tree1;
|
|
|
|
/*
|
|
It is possible that a tree contains both
|
|
a) simple range predicates (in tree->keys[]) and
|
|
b) index merge range predicates (in tree->merges)
|
|
|
|
If a tree has both, they represent equally *valid* range
|
|
predicate alternatives; both will return all relevant rows from
|
|
the table but one may return more unnecessary rows than the
|
|
other (additional rows will be filtered later). However, doing
|
|
an OR operation on trees with both types of predicates is too
|
|
complex at the time. We therefore remove the index merge
|
|
predicates (if we have both types) before OR'ing the trees.
|
|
|
|
TODO: enable tree_or() for trees with both simple and index
|
|
merge range predicates.
|
|
*/
|
|
if (!tree1->merges.is_empty()) {
|
|
for (uint i = 0; i < param->keys; i++)
|
|
if (tree1->keys[i] != nullptr &&
|
|
tree1->keys[i]->type == SEL_ROOT::Type::KEY_RANGE) {
|
|
tree1->merges.clear();
|
|
break;
|
|
}
|
|
}
|
|
if (!tree2->merges.is_empty()) {
|
|
for (uint i = 0; i < param->keys; i++)
|
|
if (tree2->keys[i] != nullptr &&
|
|
tree2->keys[i]->type == SEL_ROOT::Type::KEY_RANGE) {
|
|
tree2->merges.clear();
|
|
break;
|
|
}
|
|
}
|
|
|
|
SEL_TREE *result = nullptr;
|
|
Key_map result_keys;
|
|
if (sel_trees_can_be_ored(tree1, tree2, param)) {
|
|
/* Join the trees key per key */
|
|
for (uint idx = 0; idx < param->keys; idx++) {
|
|
SEL_ROOT *key1 = tree1->release_key(idx);
|
|
SEL_ROOT *key2 = tree2->release_key(idx);
|
|
SEL_ROOT *new_key = key_or(param, key1, key2);
|
|
tree1->set_key(idx, new_key);
|
|
if (new_key) {
|
|
result = tree1; // Added to tree1
|
|
result_keys.set_bit(idx);
|
|
#ifndef NDEBUG
|
|
/*
|
|
Do not test use count if there is a large range tree created.
|
|
It takes too much time to traverse the tree.
|
|
*/
|
|
if (param->temp_mem_root->allocated_size() < 2097152)
|
|
new_key->test_use_count(new_key);
|
|
#endif
|
|
}
|
|
}
|
|
if (result) result->keys_map = result_keys;
|
|
} else {
|
|
/* ok, two trees have KEY type but cannot be used without index merge */
|
|
if (tree1->merges.is_empty() && tree2->merges.is_empty()) {
|
|
if (remove_jump_scans) {
|
|
bool no_trees = remove_nonrange_trees(param, tree1);
|
|
no_trees = no_trees || remove_nonrange_trees(param, tree2);
|
|
if (no_trees)
|
|
return new (param->temp_mem_root)
|
|
SEL_TREE(SEL_TREE::ALWAYS, param->temp_mem_root, param->keys);
|
|
}
|
|
SEL_IMERGE *merge;
|
|
/* both trees are "range" trees, produce new index merge structure */
|
|
if (!(result = new (param->temp_mem_root)
|
|
SEL_TREE(param->temp_mem_root, param->keys)) ||
|
|
!(merge =
|
|
new (param->temp_mem_root) SEL_IMERGE(param->temp_mem_root)) ||
|
|
result->merges.push_back(merge) || merge->or_sel_tree(tree1) ||
|
|
merge->or_sel_tree(tree2))
|
|
result = nullptr;
|
|
else
|
|
result->type = tree1->type;
|
|
} else if (!tree1->merges.is_empty() && !tree2->merges.is_empty()) {
|
|
if (imerge_list_or_list(param, remove_jump_scans, &tree1->merges,
|
|
&tree2->merges))
|
|
result = new (param->temp_mem_root)
|
|
SEL_TREE(SEL_TREE::ALWAYS, param->temp_mem_root, param->keys);
|
|
else
|
|
result = tree1;
|
|
} else {
|
|
/* one tree is index merge tree and another is range tree */
|
|
if (tree1->merges.is_empty()) std::swap(tree1, tree2);
|
|
|
|
if (remove_jump_scans && remove_nonrange_trees(param, tree2))
|
|
return new (param->temp_mem_root)
|
|
SEL_TREE(SEL_TREE::ALWAYS, param->temp_mem_root, param->keys);
|
|
/* add tree2 to tree1->merges, checking if it collapses to ALWAYS */
|
|
if (imerge_list_or_tree(param, remove_jump_scans, &tree1->merges, tree2))
|
|
result = new (param->temp_mem_root)
|
|
SEL_TREE(SEL_TREE::ALWAYS, param->temp_mem_root, param->keys);
|
|
else
|
|
result = tree1;
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
And key trees where key1->part < key2->part
|
|
|
|
key2 will be connected to every key in key1, and thus
|
|
have its use_count incremented many times. The returned node
|
|
will not have its use_count increased; you are supposed to do
|
|
that yourself when you connect it to a root.
|
|
|
|
@param param Range analysis context (needed to track if we have allocated
|
|
too many SEL_ARGs)
|
|
@param key1 Root of first tree to AND together
|
|
@param key2 Root of second tree to AND together
|
|
@return Root of (key1 AND key2)
|
|
*/
|
|
|
|
static SEL_ROOT *and_all_keys(RANGE_OPT_PARAM *param, SEL_ROOT *key1,
|
|
SEL_ROOT *key2) {
|
|
SEL_ARG *next;
|
|
|
|
// We will be modifying key1, so clone it if we need to.
|
|
if (key1->use_count > 0) {
|
|
if (!(key1 = key1->clone_tree(param))) return nullptr; // OOM
|
|
}
|
|
|
|
/*
|
|
We will be using key2 several times, so temporarily increase
|
|
its use_count artificially to keep key_and() below from modifying
|
|
it in-place.
|
|
|
|
Note that this makes test_use_count() fail since our use_count is
|
|
now higher than the actual number of references, but that is only ever
|
|
called from tree_and() and tree_or(), not from anything below this,
|
|
and we undo it below.
|
|
*/
|
|
++key2->use_count;
|
|
|
|
if (key1->type == SEL_ROOT::Type::MAYBE_KEY) {
|
|
// See todo for left/right pointers
|
|
assert(!key1->root->left);
|
|
assert(!key1->root->right);
|
|
key1->root->next = key1->root->prev = nullptr;
|
|
}
|
|
for (next = key1->root->first(); next; next = next->next) {
|
|
if (next->next_key_part) {
|
|
/*
|
|
The more complicated case; there's already another AND clause,
|
|
so we cannot connect key2 to key1 directly, but need to recurse.
|
|
*/
|
|
SEL_ROOT *tmp = key_and(param, next->release_next_key_part(), key2);
|
|
next->set_next_key_part(tmp);
|
|
if (tmp && tmp->type == SEL_ROOT::Type::IMPOSSIBLE) {
|
|
key1->tree_delete(next);
|
|
}
|
|
} else {
|
|
// The trivial case.
|
|
next->set_next_key_part(key2);
|
|
}
|
|
}
|
|
|
|
// Undo the temporary use_count modification above.
|
|
--key2->use_count;
|
|
|
|
return key1;
|
|
}
|
|
|
|
/*
|
|
Produce a SEL_ARG graph that represents "key1 AND key2"
|
|
|
|
SYNOPSIS
|
|
key_and()
|
|
param Range analysis context (needed to track if we have allocated
|
|
too many SEL_ARGs)
|
|
key1 First argument, root of its RB-tree
|
|
key2 Second argument, root of its RB-tree
|
|
|
|
key_and() does not modify key1 nor key2 if they are in use by other roots
|
|
(although typical use is that key1 has been disconnected from its root
|
|
and thus can be modified in-place). Thus, it does not change their use_count
|
|
in the typical case.
|
|
|
|
The returned node will not have its use_count increased; you are supposed
|
|
to do that yourself when you connect it to a root.
|
|
|
|
RETURN
|
|
RB-tree root of the resulting SEL_ARG graph.
|
|
NULL if the result of AND operation is an empty interval {0}.
|
|
*/
|
|
|
|
SEL_ROOT *key_and(RANGE_OPT_PARAM *param, SEL_ROOT *key1, SEL_ROOT *key2) {
|
|
if (param->has_errors()) return nullptr;
|
|
|
|
if (key1 == nullptr || key1->is_always()) {
|
|
if (key1) key1->free_tree();
|
|
return key2;
|
|
}
|
|
if (key2 == nullptr || key2->is_always()) {
|
|
if (key2) key2->free_tree();
|
|
return key1;
|
|
}
|
|
|
|
if (key1->root->part != key2->root->part) {
|
|
if (key1->root->part > key2->root->part) {
|
|
std::swap(key1, key2);
|
|
}
|
|
assert(key1->root->part < key2->root->part);
|
|
return and_all_keys(param, key1, key2);
|
|
}
|
|
|
|
if ((!key2->simple_key() && key1->simple_key() &&
|
|
key2->type != SEL_ROOT::Type::MAYBE_KEY) ||
|
|
key1->type == SEL_ROOT::Type::MAYBE_KEY) { // Put simple key in key2
|
|
std::swap(key1, key2);
|
|
}
|
|
|
|
/* If one of the key is MAYBE_KEY then the found region may be smaller */
|
|
if (key2->type == SEL_ROOT::Type::MAYBE_KEY) {
|
|
if (key1->use_count > 0) {
|
|
// We are going to modify key1, so we need to clone it.
|
|
if (!(key1 = key1->clone_tree(param))) return nullptr; // OOM
|
|
}
|
|
if (key1->type == SEL_ROOT::Type::MAYBE_KEY) { // Both are maybe key
|
|
SEL_ROOT *new_part = key_and(param, key1->root->release_next_key_part(),
|
|
key2->root->next_key_part);
|
|
key1->root->set_next_key_part(new_part);
|
|
return key1;
|
|
} else {
|
|
key1->root->maybe_smaller();
|
|
if (key2->root->next_key_part) {
|
|
return and_all_keys(param, key1, key2);
|
|
} else {
|
|
/*
|
|
key2 is MAYBE_KEY and nothing more; simply discard it,
|
|
since we've now moved that information into key1's maybe_flag.
|
|
*/
|
|
key2->free_tree();
|
|
return key1;
|
|
}
|
|
}
|
|
// Unreachable.
|
|
assert(false);
|
|
return nullptr;
|
|
}
|
|
|
|
if ((key1->root->min_flag | key2->root->min_flag) & GEOM_FLAG) {
|
|
/*
|
|
Cannot optimize geometry ranges. The next best thing is to keep
|
|
one of them.
|
|
*/
|
|
key2->free_tree();
|
|
return key1;
|
|
}
|
|
|
|
// Two non-overlapped key ranges for multi-valued index do not mean
|
|
// an always false condition.
|
|
// For example, "1 member of(f) AND 2 member of(f)" for f=[1, 2].
|
|
if (key1->root->field->is_array() || key2->root->field->is_array()) {
|
|
return and_all_keys(param, key1, key2);
|
|
}
|
|
|
|
SEL_ARG *e1 = key1->root->first(), *e2 = key2->root->first();
|
|
SEL_ROOT *new_tree = nullptr;
|
|
|
|
while (e1 && e2) {
|
|
int cmp = e1->cmp_min_to_min(e2);
|
|
if (cmp < 0) {
|
|
if (get_range(&e1, &e2, key1)) continue;
|
|
} else if (get_range(&e2, &e1, key2))
|
|
continue;
|
|
/*
|
|
NOTE: We don't destroy e1->next_key_part nor e2->next_key_part
|
|
(if used at all, the return value here goes into a brand new element;
|
|
it does not overwrite either of them), so we keep their use_counts
|
|
intact here.
|
|
*/
|
|
SEL_ROOT *next = key_and(param, e1->next_key_part, e2->next_key_part);
|
|
if (next && next->type == SEL_ROOT::Type::IMPOSSIBLE)
|
|
next->free_tree();
|
|
else {
|
|
SEL_ARG *new_arg = e1->clone_and(e2, param->temp_mem_root);
|
|
if (!new_arg) return nullptr; // End of memory
|
|
new_arg->set_next_key_part(next);
|
|
if (!new_tree) {
|
|
new_tree = new (param->temp_mem_root) SEL_ROOT(new_arg);
|
|
} else
|
|
new_tree->insert(new_arg);
|
|
}
|
|
if (e1->cmp_max_to_max(e2) < 0)
|
|
e1 = e1->next; // e1 can't overlap next e2
|
|
else
|
|
e2 = e2->next;
|
|
}
|
|
key1->free_tree();
|
|
key2->free_tree();
|
|
if (!new_tree)
|
|
// Impossible range
|
|
return new (param->temp_mem_root)
|
|
SEL_ROOT(param->temp_mem_root, SEL_ROOT::Type::IMPOSSIBLE);
|
|
return new_tree;
|
|
}
|
|
|
|
static bool get_range(SEL_ARG **e1, SEL_ARG **e2, const SEL_ROOT *root1) {
|
|
(*e1) = root1->find_range(*e2); // first e1->min < e2->min
|
|
if ((*e1)->cmp_max_to_min(*e2) < 0) {
|
|
if (!((*e1) = (*e1)->next)) return true;
|
|
if ((*e1)->cmp_min_to_max(*e2) > 0) {
|
|
(*e2) = (*e2)->next;
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
Combine two range expression under a common OR. On a logical level, the
|
|
transformation is key_or( expr1, expr2 ) => expr1 OR expr2.
|
|
|
|
Both expressions are assumed to be in the SEL_ARG format. In a logic sense,
|
|
the format is reminiscent of DNF, since an expression such as the following
|
|
|
|
( 1 < kp1 < 10 AND p1 ) OR ( 10 <= kp2 < 20 AND p2 )
|
|
|
|
where there is a key consisting of keyparts ( kp1, kp2, ..., kpn ) and p1
|
|
and p2 are valid SEL_ARG expressions over keyparts kp2 ... kpn, is a valid
|
|
SEL_ARG condition. The disjuncts appear ordered by the minimum endpoint of
|
|
the first range and ranges must not overlap. It follows that they are also
|
|
ordered by maximum endpoints. Thus
|
|
|
|
( 1 < kp1 <= 2 AND ( kp2 = 2 OR kp2 = 3 ) ) OR kp1 = 3
|
|
|
|
Is a a valid SER_ARG expression for a key of at least 2 keyparts.
|
|
|
|
For simplicity, we will assume that expr2 is a single range predicate,
|
|
i.e. on the form ( a < x < b AND ... ). It is easy to generalize to a
|
|
disjunction of several predicates by subsequently call key_or for each
|
|
disjunct.
|
|
|
|
The algorithm iterates over each disjunct of expr1, and for each disjunct
|
|
where the first keypart's range overlaps with the first keypart's range in
|
|
expr2:
|
|
|
|
If the predicates are equal for the rest of the keyparts, or if there are
|
|
no more, the range in expr2 has its endpoints copied in, and the SEL_ARG
|
|
node in expr2 is deallocated. If more ranges became connected in expr1, the
|
|
surplus is also dealocated. If they differ, two ranges are created.
|
|
|
|
- The range leading up to the overlap. Empty if endpoints are equal.
|
|
|
|
- The overlapping sub-range. May be the entire range if they are equal.
|
|
|
|
Finally, there may be one more range if expr2's first keypart's range has a
|
|
greater maximum endpoint than the last range in expr1.
|
|
|
|
For the overlapping sub-range, we recursively call key_or. Thus in order to
|
|
compute key_or of
|
|
|
|
(1) ( 1 < kp1 < 10 AND 1 < kp2 < 10 )
|
|
|
|
(2) ( 2 < kp1 < 20 AND 4 < kp2 < 20 )
|
|
|
|
We create the ranges 1 < kp <= 2, 2 < kp1 < 10, 10 <= kp1 < 20. For the
|
|
first one, we simply hook on the condition for the second keypart from (1)
|
|
: 1 < kp2 < 10. For the second range 2 < kp1 < 10, key_or( 1 < kp2 < 10, 4
|
|
< kp2 < 20 ) is called, yielding 1 < kp2 < 20. For the last range, we reuse
|
|
the range 4 < kp2 < 20 from (2) for the second keypart. The result is thus
|
|
|
|
( 1 < kp1 <= 2 AND 1 < kp2 < 10 ) OR
|
|
( 2 < kp1 < 10 AND 1 < kp2 < 20 ) OR
|
|
( 10 <= kp1 < 20 AND 4 < kp2 < 20 )
|
|
|
|
key_or() does not modify key1 nor key2 if they are in use by other roots
|
|
(although typical use is that key1 has been disconnected from its root
|
|
and thus can be modified in-place). Thus, it does not change their
|
|
use_count.
|
|
|
|
The returned node will not have its use_count increased; you are supposed
|
|
to do that yourself when you connect it to a root.
|
|
|
|
@param param RANGE_OPT_PARAM from test_quick_select
|
|
@param key1 Root of RB-tree of SEL_ARGs to be ORed with key2
|
|
@param key2 Root of RB-tree of SEL_ARGs to be ORed with key1
|
|
*/
|
|
SEL_ROOT *key_or(RANGE_OPT_PARAM *param, SEL_ROOT *key1, SEL_ROOT *key2) {
|
|
if (param->has_errors()) return nullptr;
|
|
|
|
if (key1 == nullptr || key1->is_always()) {
|
|
if (key2) key2->free_tree();
|
|
return key1;
|
|
}
|
|
if (key2 == nullptr || key2->is_always())
|
|
// Case is symmetric to the one above, just flip parameters.
|
|
return key_or(param, key2, key1);
|
|
|
|
if (key1->root->part != key2->root->part ||
|
|
(key1->root->min_flag | key2->root->min_flag) & GEOM_FLAG) {
|
|
key1->free_tree();
|
|
key2->free_tree();
|
|
return nullptr; // Can't optimize this
|
|
}
|
|
|
|
// If one of the key is MAYBE_KEY then the found region may be bigger
|
|
if (key1->type == SEL_ROOT::Type::MAYBE_KEY) {
|
|
key2->free_tree();
|
|
return key1;
|
|
}
|
|
if (key2->type == SEL_ROOT::Type::MAYBE_KEY) {
|
|
key1->free_tree();
|
|
return key2;
|
|
}
|
|
|
|
// (cond) OR (IMPOSSIBLE) <=> (cond).
|
|
if (key1->type == SEL_ROOT::Type::IMPOSSIBLE) {
|
|
key1->free_tree();
|
|
return key2;
|
|
}
|
|
if (key2->type == SEL_ROOT::Type::IMPOSSIBLE) {
|
|
key2->free_tree();
|
|
return key1;
|
|
}
|
|
|
|
/*
|
|
We need to modify one of key1 or key2 (whichever we choose, we will
|
|
call it key1 afterwards). If either is used only by us (use_count == 0),
|
|
we can use that directly. If not, we need to clone one of them; we pick
|
|
the one with the fewest elements since that is the cheapest.
|
|
*/
|
|
if (key1->use_count > 0) {
|
|
if (key2->use_count == 0 || key1->elements > key2->elements) {
|
|
std::swap(key1, key2);
|
|
}
|
|
if (key1->use_count > 0 && (key1 = key1->clone_tree(param)) == nullptr)
|
|
return nullptr; // OOM
|
|
}
|
|
assert(key1->use_count == 0);
|
|
|
|
/*
|
|
Add tree at key2 to tree at key1. If key2 is used by nobody else,
|
|
we can cannibalize its nodes and add them directly into key1.
|
|
If not, we'll need to make copies of them.
|
|
*/
|
|
const bool key2_shared = (key2->use_count != 0);
|
|
key1->root->maybe_flag |= key2->root->maybe_flag;
|
|
|
|
/*
|
|
Notation for illustrations used in the rest of this function:
|
|
|
|
Range: [--------]
|
|
^ ^
|
|
start stop
|
|
|
|
Two overlapping ranges:
|
|
[-----] [----] [--]
|
|
[---] or [---] or [-------]
|
|
|
|
Ambiguity: ***
|
|
The range starts or stops somewhere in the "***" range.
|
|
Example: a starts before b and may end before/the same place/after b
|
|
a: [----***]
|
|
b: [---]
|
|
|
|
Adjacent ranges:
|
|
Ranges that meet but do not overlap. Example: a = "x < 3", b = "x >= 3"
|
|
a: ----]
|
|
b: [----
|
|
*/
|
|
|
|
SEL_ARG *cur_key2 = key2->root->first();
|
|
while (cur_key2) {
|
|
/*
|
|
key1 consists of one or more ranges. cur_key1 is the
|
|
range currently being handled.
|
|
|
|
initialize cur_key1 to the latest range in key1 that starts the
|
|
same place or before the range in cur_key2 starts
|
|
|
|
cur_key2: [------]
|
|
key1: [---] [-----] [----]
|
|
^
|
|
cur_key1
|
|
*/
|
|
SEL_ARG *cur_key1 = key1->find_range(cur_key2);
|
|
|
|
/*
|
|
Used to describe how two key values are positioned compared to
|
|
each other. Consider key_value_a.<cmp_func>(key_value_b):
|
|
|
|
-2: key_value_a is smaller than key_value_b, and they are adjacent
|
|
-1: key_value_a is smaller than key_value_b (not adjacent)
|
|
0: the key values are equal
|
|
1: key_value_a is bigger than key_value_b (not adjacent)
|
|
2: key_value_a is bigger than key_value_b, and they are adjacent
|
|
|
|
Example: "cmp= cur_key1->cmp_max_to_min(cur_key2)"
|
|
|
|
cur_key2: [-------- (10 <= x ... )
|
|
cur_key1: -----] ( ... x < 10) => cmp==-2
|
|
cur_key1: ----] ( ... x < 9) => cmp==-1
|
|
cur_key1: ------] ( ... x <= 10) => cmp== 0
|
|
cur_key1: --------] ( ... x <= 12) => cmp== 1
|
|
(cmp == 2 does not make sense for cmp_max_to_min())
|
|
*/
|
|
int cmp = 0;
|
|
|
|
if (!cur_key1) {
|
|
/*
|
|
The range in cur_key2 starts before the first range in key1. Use
|
|
the first range in key1 as cur_key1.
|
|
|
|
cur_key2: [--------]
|
|
key1: [****--] [----] [-------]
|
|
^
|
|
cur_key1
|
|
*/
|
|
cur_key1 = key1->root->first();
|
|
cmp = -1;
|
|
} else if ((cmp = cur_key1->cmp_max_to_min(cur_key2)) < 0) {
|
|
/*
|
|
This is the case:
|
|
cur_key2: [-------]
|
|
cur_key1: [----**]
|
|
*/
|
|
SEL_ARG *next_key1 = cur_key1->next;
|
|
if (cmp == -2 &&
|
|
eq_tree(cur_key1->next_key_part, cur_key2->next_key_part)) {
|
|
/*
|
|
Adjacent (cmp==-2) and equal next_key_parts => ranges can be merged
|
|
|
|
This is the case:
|
|
cur_key2: [-------]
|
|
cur_key1: [----]
|
|
|
|
Result:
|
|
cur_key2: [-------------] => inserted into key1 below
|
|
cur_key1: => deleted
|
|
*/
|
|
SEL_ARG *next_key2 = cur_key2->next;
|
|
if (key2_shared) {
|
|
if (!(cur_key2 = new (param->temp_mem_root) SEL_ARG(*cur_key2)))
|
|
return nullptr; // out of memory
|
|
cur_key2->next = next_key2; // New copy of cur_key2
|
|
}
|
|
|
|
if (cur_key2->copy_min(cur_key1)) {
|
|
// cur_key2 is full range: [-inf <= cur_key2 <= +inf]
|
|
key1->free_tree();
|
|
key2->free_tree();
|
|
if (key1->root->maybe_flag)
|
|
return new (param->temp_mem_root)
|
|
SEL_ROOT(param->temp_mem_root, SEL_ROOT::Type::MAYBE_KEY);
|
|
return nullptr;
|
|
}
|
|
|
|
key1->tree_delete(cur_key1);
|
|
if (key1->type == SEL_ROOT::Type::IMPOSSIBLE) {
|
|
/*
|
|
cur_key1 was the last range in key1; move the cur_key2
|
|
range that was merged above to key1
|
|
*/
|
|
key1->insert(cur_key2);
|
|
cur_key2 = next_key2;
|
|
break;
|
|
}
|
|
}
|
|
// Move to next range in key1. Now cur_key1.min > cur_key2.min
|
|
if (!(cur_key1 = next_key1))
|
|
break; // No more ranges in key1. Copy rest of key2
|
|
}
|
|
|
|
if (cmp < 0) {
|
|
/*
|
|
This is the case:
|
|
cur_key2: [--***]
|
|
cur_key1: [----]
|
|
*/
|
|
int cur_key1_cmp;
|
|
if ((cur_key1_cmp = cur_key1->cmp_min_to_max(cur_key2)) > 0) {
|
|
/*
|
|
This is the case:
|
|
cur_key2: [------**]
|
|
cur_key1: [----]
|
|
*/
|
|
if (cur_key1_cmp == 2 &&
|
|
eq_tree(cur_key1->next_key_part, cur_key2->next_key_part)) {
|
|
/*
|
|
Adjacent ranges with equal next_key_part. Merge like this:
|
|
|
|
This is the case:
|
|
cur_key2: [------]
|
|
cur_key1: [-----]
|
|
|
|
Result:
|
|
cur_key2: [------]
|
|
cur_key1: [-------------]
|
|
|
|
Then move on to next key2 range.
|
|
*/
|
|
cur_key1->copy_min_to_min(cur_key2);
|
|
key1->root->merge_flags(cur_key2); // should be cur_key1->merge...()
|
|
// ?
|
|
if (cur_key1->min_flag & NO_MIN_RANGE &&
|
|
cur_key1->max_flag & NO_MAX_RANGE) {
|
|
key1->free_tree();
|
|
key2->free_tree();
|
|
if (key1->root->maybe_flag)
|
|
return new (param->temp_mem_root)
|
|
SEL_ROOT(param->temp_mem_root, SEL_ROOT::Type::MAYBE_KEY);
|
|
return nullptr;
|
|
}
|
|
cur_key2->release_next_key_part(); // Free not used tree
|
|
cur_key2 = cur_key2->next;
|
|
continue;
|
|
} else {
|
|
/*
|
|
cur_key2 not adjacent to cur_key1 or has different next_key_part.
|
|
Insert into key1 and move to next range in key2
|
|
|
|
This is the case:
|
|
cur_key2: [------**]
|
|
cur_key1: [----]
|
|
|
|
Result:
|
|
key1: [------**][----]
|
|
^ ^
|
|
insert cur_key1
|
|
*/
|
|
SEL_ARG *next_key2 = cur_key2->next;
|
|
if (key2_shared) {
|
|
SEL_ARG *cpy = new (param->temp_mem_root)
|
|
SEL_ARG(*cur_key2); // Must make copy
|
|
if (!cpy) return nullptr; // OOM
|
|
key1->insert(cpy);
|
|
} else
|
|
key1->insert(cur_key2);
|
|
cur_key2 = next_key2;
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
The ranges in cur_key1 and cur_key2 are overlapping:
|
|
|
|
cur_key2: [----------]
|
|
cur_key1: [*****-----*****]
|
|
|
|
Corollary: cur_key1.min <= cur_key2.max
|
|
*/
|
|
if (eq_tree(cur_key1->next_key_part, cur_key2->next_key_part)) {
|
|
// Merge overlapping ranges with equal next_key_part
|
|
if (cur_key1->is_same(cur_key2)) {
|
|
/*
|
|
cur_key1 covers exactly the same range as cur_key2
|
|
Use the relevant range in key1.
|
|
*/
|
|
cur_key1->merge_flags(cur_key2); // Copy maybe flags
|
|
cur_key2->release_next_key_part(); // Free not used tree
|
|
// Move to the next range in cur_key2
|
|
cur_key2 = cur_key2->next;
|
|
continue;
|
|
} else {
|
|
SEL_ARG *last = cur_key1;
|
|
SEL_ARG *first = cur_key1;
|
|
|
|
/*
|
|
Find the last range in key1 that overlaps cur_key2 and
|
|
where all ranges first...last have the same next_key_part as
|
|
cur_key2.
|
|
|
|
cur_key2: [****----------------------*******]
|
|
key1: [--] [----] [---] [-----] [xxxx]
|
|
^ ^ ^
|
|
first last different next_key_part
|
|
|
|
Since cur_key2 covers them, the ranges between first and last
|
|
are merged into one range by deleting first...last-1 from
|
|
the key1 tree. In the figure, this applies to first and the
|
|
two consecutive ranges. The range of last is then extended:
|
|
* last.min: Set to min(cur_key2.min, first.min)
|
|
* last.max: If there is a last->next that overlaps cur_key2
|
|
(i.e., last->next has a different next_key_part):
|
|
Set adjacent to last->next.min
|
|
Otherwise: Set to max(cur_key2.max, last.max)
|
|
|
|
Result:
|
|
cur_key2: [****----------------------*******]
|
|
[--] [----] [---] => deleted from key1
|
|
key1: [**------------------------***][xxxx]
|
|
^ ^
|
|
cur_key1=last different next_key_part
|
|
*/
|
|
while (last->next && last->next->cmp_min_to_max(cur_key2) <= 0 &&
|
|
eq_tree(last->next->next_key_part, cur_key2->next_key_part)) {
|
|
/*
|
|
last->next is covered by cur_key2 and has same next_key_part.
|
|
last can be deleted
|
|
*/
|
|
SEL_ARG *save = last;
|
|
last = last->next;
|
|
key1->tree_delete(save);
|
|
}
|
|
// Redirect cur_key1 to last which will cover the entire range
|
|
cur_key1 = last;
|
|
|
|
/*
|
|
Extend last to cover the entire range of
|
|
[min(first.min_value,cur_key2.min_value)...last.max_value].
|
|
If this forms a full range (the range covers all possible
|
|
values) we return no SEL_ARG RB-tree.
|
|
*/
|
|
bool full_range = last->copy_min(first);
|
|
if (!full_range) full_range = last->copy_min(cur_key2);
|
|
|
|
if (!full_range) {
|
|
if (last->next && cur_key2->cmp_max_to_min(last->next) >= 0) {
|
|
/*
|
|
This is the case:
|
|
cur_key2: [-------------]
|
|
key1: [***------] [xxxx]
|
|
^ ^
|
|
last different next_key_part
|
|
|
|
Extend range of last up to last->next:
|
|
cur_key2: [-------------]
|
|
key1: [***--------][xxxx]
|
|
*/
|
|
last->copy_min_to_max(last->next);
|
|
} else
|
|
/*
|
|
This is the case:
|
|
cur_key2: [--------*****]
|
|
key1: [***---------] [xxxx]
|
|
^ ^
|
|
last different next_key_part
|
|
|
|
Extend range of last up to max(last.max, cur_key2.max):
|
|
cur_key2: [--------*****]
|
|
key1: [***----------**] [xxxx]
|
|
*/
|
|
full_range = last->copy_max(cur_key2);
|
|
}
|
|
if (full_range) { // Full range
|
|
key1->free_tree();
|
|
cur_key2->release_next_key_part();
|
|
if (key1->root->maybe_flag)
|
|
return new (param->temp_mem_root)
|
|
SEL_ROOT(param->temp_mem_root, SEL_ROOT::Type::MAYBE_KEY);
|
|
return nullptr;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (cmp >= 0 && cur_key1->cmp_min_to_min(cur_key2) < 0) {
|
|
/*
|
|
This is the case ("cmp>=0" means that cur_key1.max >= cur_key2.min):
|
|
cur_key2: [-------]
|
|
cur_key1: [----------*******]
|
|
*/
|
|
|
|
if (!cur_key1->next_key_part) {
|
|
/*
|
|
cur_key1->next_key_part is empty: cut the range that
|
|
is covered by cur_key1 from cur_key2.
|
|
Reason: (cur_key2->next_key_part OR
|
|
cur_key1->next_key_part) will be empty and therefore
|
|
equal to cur_key1->next_key_part. Thus, this part of
|
|
the cur_key2 range is completely covered by cur_key1.
|
|
*/
|
|
if (cur_key1->cmp_max_to_max(cur_key2) >= 0) {
|
|
/*
|
|
cur_key1 covers the entire range in cur_key2.
|
|
cur_key2: [-------]
|
|
cur_key1: [-----------------]
|
|
|
|
Move on to next range in key2
|
|
*/
|
|
cur_key2 = cur_key2->next;
|
|
continue;
|
|
} else {
|
|
/*
|
|
This is the case:
|
|
cur_key2: [-------]
|
|
cur_key1: [---------]
|
|
|
|
Result:
|
|
cur_key2: [---]
|
|
cur_key1: [---------]
|
|
*/
|
|
cur_key2->copy_max_to_min(cur_key1);
|
|
// FIXME: what if key2_shared?
|
|
continue;
|
|
}
|
|
}
|
|
|
|
/*
|
|
The ranges are overlapping but have not been merged because
|
|
next_key_part of cur_key1 and cur_key2 differ.
|
|
cur_key2: [----]
|
|
cur_key1: [------------*****]
|
|
|
|
Split cur_key1 in two where cur_key2 starts:
|
|
cur_key2: [----]
|
|
key1: [--------][--*****]
|
|
^ ^
|
|
insert cur_key1
|
|
*/
|
|
SEL_ARG *new_arg = cur_key1->clone_first(cur_key2, param->temp_mem_root);
|
|
if (!new_arg) return nullptr; // OOM
|
|
new_arg->set_next_key_part(cur_key1->next_key_part);
|
|
cur_key1->copy_min_to_min(cur_key2);
|
|
key1->insert(new_arg);
|
|
} // cur_key1.min >= cur_key2.min due to this if()
|
|
|
|
/*
|
|
Now cur_key2.min <= cur_key1.min <= cur_key2.max:
|
|
cur_key2: [---------]
|
|
cur_key1: [****---*****]
|
|
*/
|
|
|
|
/*
|
|
Get a copy we can modify. Note that this will keep an extra reference
|
|
to its next_key_part (if any), but the destructor will clean that up
|
|
when we exit from the function. key2_cpy is ephemeral and will not be
|
|
inserted in any tree, although copies of it might be.
|
|
*/
|
|
SEL_ARG key2_cpy(*cur_key2);
|
|
|
|
for (;;) {
|
|
if (cur_key1->cmp_min_to_min(&key2_cpy) > 0) {
|
|
/*
|
|
This is the case:
|
|
key2_cpy: [------------]
|
|
key1: [-*****]
|
|
^
|
|
cur_key1
|
|
|
|
Result:
|
|
key2_cpy: [---]
|
|
key1: [-------][-*****]
|
|
^ ^
|
|
insert cur_key1
|
|
*/
|
|
SEL_ARG *new_arg = key2_cpy.clone_first(cur_key1, param->temp_mem_root);
|
|
if (!new_arg) return nullptr; // OOM
|
|
new_arg->set_next_key_part(key2_cpy.next_key_part);
|
|
key1->insert(new_arg);
|
|
key2_cpy.copy_min_to_min(cur_key1);
|
|
}
|
|
// Now key2_cpy.min == cur_key1.min
|
|
|
|
if ((cmp = cur_key1->cmp_max_to_max(&key2_cpy)) <= 0) {
|
|
/*
|
|
cur_key1.max <= key2_cpy.max:
|
|
key2_cpy: a) [-------] or b) [----]
|
|
cur_key1: [----] [----]
|
|
|
|
Steps:
|
|
|
|
1) Update next_key_part of cur_key1: OR it with
|
|
key2_cpy->next_key_part.
|
|
2) If case a: Insert range [cur_key1.max, key2_cpy.max]
|
|
into key1 using next_key_part of key2_cpy
|
|
|
|
Result:
|
|
key1: a) [----][-] or b) [----]
|
|
*/
|
|
cur_key1->maybe_flag |= key2_cpy.maybe_flag;
|
|
cur_key1->set_next_key_part(key_or(
|
|
param, cur_key1->release_next_key_part(), key2_cpy.next_key_part));
|
|
|
|
if (!cmp) break; // case b: done with this key2 range
|
|
|
|
// Make key2_cpy the range [cur_key1.max, key2_cpy.max]
|
|
key2_cpy.copy_max_to_min(cur_key1);
|
|
if (!(cur_key1 = cur_key1->next)) {
|
|
/*
|
|
No more ranges in key1. Insert key2_cpy and go to "end"
|
|
label to insert remaining ranges in key2 if any.
|
|
*/
|
|
SEL_ARG *new_key1_range =
|
|
new (param->temp_mem_root) SEL_ARG(key2_cpy);
|
|
if (!new_key1_range) return nullptr; // OOM
|
|
key1->insert(new_key1_range);
|
|
cur_key2 = cur_key2->next;
|
|
goto end;
|
|
}
|
|
if (cur_key1->cmp_min_to_max(&key2_cpy) > 0) {
|
|
/*
|
|
The next range in key1 does not overlap with key2_cpy.
|
|
Insert this range into key1 and move on to the next range
|
|
in key2.
|
|
*/
|
|
SEL_ARG *new_key1_range =
|
|
new (param->temp_mem_root) SEL_ARG(key2_cpy);
|
|
if (!new_key1_range) return nullptr; // OOM
|
|
key1->insert(new_key1_range);
|
|
break;
|
|
}
|
|
/*
|
|
key2_cpy overlaps with the next range in key1 and the case
|
|
is now "cur_key2.min <= cur_key1.min <= cur_key2.max". Go back
|
|
to for(;;) to handle this situation.
|
|
*/
|
|
continue;
|
|
} else {
|
|
/*
|
|
This is the case:
|
|
key2_cpy: [-------]
|
|
cur_key1: [------------]
|
|
|
|
Result:
|
|
key1: [-------][---]
|
|
^ ^
|
|
new_arg cur_key1
|
|
Steps:
|
|
|
|
0) If cur_key1->next_key_part is empty: do nothing.
|
|
Reason: (key2_cpy->next_key_part OR
|
|
cur_key1->next_key_part) will be empty and
|
|
therefore equal to cur_key1->next_key_part. Thus,
|
|
the range in key2_cpy is completely covered by
|
|
cur_key1
|
|
1) Make new_arg with range [cur_key1.min, key2_cpy.max].
|
|
new_arg->next_key_part is OR between next_key_part of
|
|
cur_key1 and key2_cpy
|
|
2) Make cur_key1 the range [key2_cpy.max, cur_key1.max]
|
|
3) Insert new_arg into key1
|
|
*/
|
|
if (!cur_key1->next_key_part) // Step 0
|
|
{
|
|
key2_cpy.release_next_key_part(); // Free not used tree
|
|
break;
|
|
}
|
|
SEL_ARG *new_arg =
|
|
cur_key1->clone_last(&key2_cpy, param->temp_mem_root);
|
|
if (!new_arg) return nullptr; // OOM
|
|
cur_key1->copy_max_to_min(&key2_cpy);
|
|
|
|
new_arg->set_next_key_part(
|
|
key_or(param, cur_key1->next_key_part, key2_cpy.next_key_part));
|
|
key1->insert(new_arg);
|
|
break;
|
|
}
|
|
}
|
|
// Move on to next range in key2
|
|
cur_key2 = cur_key2->next;
|
|
}
|
|
|
|
end:
|
|
/*
|
|
Add key2 ranges that are non-overlapping with and higher than the
|
|
highest range in key1.
|
|
*/
|
|
while (cur_key2) {
|
|
SEL_ARG *next = cur_key2->next;
|
|
if (key2_shared) {
|
|
SEL_ARG *key2_cpy =
|
|
new (param->temp_mem_root) SEL_ARG(*cur_key2); // Must make copy
|
|
if (!key2_cpy) return nullptr;
|
|
key1->insert(key2_cpy);
|
|
} else
|
|
key1->insert(cur_key2);
|
|
cur_key2 = next;
|
|
}
|
|
|
|
/*
|
|
TODO: We should call key2->free_tree() here, since this might be the
|
|
last reference to the tree (if !key2_shared). However, the tree might
|
|
be in an invalid state since we may have inserted nodes into key1 without
|
|
taking them out of key2, so we need to clean that up first. As a temporary
|
|
measure, we TRASH() it to expose any bugs where people hold on to it
|
|
where we thought they wouldn't.
|
|
*/
|
|
#ifndef NDEBUG
|
|
if (!key2_shared) TRASH(key2, sizeof(*key2));
|
|
#endif
|
|
return key1;
|
|
}
|
|
|
|
/**
|
|
Compare if two trees are equal, recursively (not necessarily the same
|
|
elements, but in terms of structure and values in each leaf).
|
|
|
|
NOTE: The demand for the same structure means that some trees that are
|
|
equivalent could be deemed inequal by this function, depending on insertion
|
|
order.
|
|
|
|
@param a First tree to compare.
|
|
@param b Second tree to compare.
|
|
|
|
@return true iff they are equivalent.
|
|
*/
|
|
static bool eq_tree(const SEL_ROOT *a, const SEL_ROOT *b) {
|
|
if (a == b) return true;
|
|
if (!a || !b) return false;
|
|
if (a->type == SEL_ROOT::Type::KEY_RANGE &&
|
|
b->type == SEL_ROOT::Type::KEY_RANGE)
|
|
return eq_tree(a->root, b->root);
|
|
else
|
|
return a->type == b->type;
|
|
}
|
|
|
|
static bool eq_tree(const SEL_ARG *a, const SEL_ARG *b) {
|
|
if (a == b) return true;
|
|
if (!a || !b || !a->is_same(b)) return false;
|
|
if (a->left != null_element && b->left != null_element) {
|
|
if (!eq_tree(a->left, b->left)) return false;
|
|
} else if (a->left != null_element || b->left != null_element)
|
|
return false;
|
|
if (a->right != null_element && b->right != null_element) {
|
|
if (!eq_tree(a->right, b->right)) return false;
|
|
} else if (a->right != null_element || b->right != null_element)
|
|
return false;
|
|
if (a->next_key_part != b->next_key_part) { // Sub range
|
|
if (!a->next_key_part != !b->next_key_part ||
|
|
!eq_tree(a->next_key_part, b->next_key_part))
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void SEL_ROOT::insert(SEL_ARG *key) {
|
|
SEL_ARG *element, **par = nullptr, *last_element = nullptr;
|
|
|
|
if (type == Type::IMPOSSIBLE) {
|
|
/*
|
|
Used to be impossible, but now gets a new range; remove the dummy node
|
|
that exists in that kind of tree, and set this one as the root
|
|
(and sole element) instead.
|
|
*/
|
|
root->release_next_key_part();
|
|
uint8 maybe_flag = root->maybe_flag;
|
|
root = key;
|
|
root->maybe_flag = maybe_flag;
|
|
root->make_root();
|
|
type = Type::KEY_RANGE;
|
|
return;
|
|
}
|
|
|
|
assert(type == Type::KEY_RANGE);
|
|
assert(root->parent == nullptr);
|
|
assert(root != null_element);
|
|
for (element = root; element != null_element;) {
|
|
last_element = element;
|
|
if (key->cmp_min_to_min(element) > 0) {
|
|
par = &element->right;
|
|
element = element->right;
|
|
} else {
|
|
par = &element->left;
|
|
element = element->left;
|
|
}
|
|
}
|
|
*par = key;
|
|
key->parent = last_element;
|
|
/* Link in list */
|
|
if (par == &last_element->left) {
|
|
key->next = last_element;
|
|
if ((key->prev = last_element->prev)) key->prev->next = key;
|
|
last_element->prev = key;
|
|
} else {
|
|
if ((key->next = last_element->next)) key->next->prev = key;
|
|
key->prev = last_element;
|
|
last_element->next = key;
|
|
}
|
|
key->left = key->right = null_element;
|
|
uint8 maybe_flag = root->maybe_flag;
|
|
root = root->rb_insert(key); // rebalance tree
|
|
root->maybe_flag = maybe_flag;
|
|
++elements;
|
|
}
|
|
|
|
SEL_ARG *SEL_ROOT::find_range(const SEL_ARG *key) const {
|
|
SEL_ARG *element = root, *found = nullptr;
|
|
|
|
for (;;) {
|
|
if (element == null_element) return found;
|
|
int cmp = element->cmp_min_to_min(key);
|
|
if (cmp == 0) return element;
|
|
if (cmp < 0) {
|
|
found = element;
|
|
element = element->right;
|
|
} else
|
|
element = element->left;
|
|
}
|
|
}
|
|
|
|
/*
|
|
Remove a element from the tree
|
|
|
|
SYNOPSIS
|
|
tree_delete()
|
|
key Key that is to be deleted from tree (this)
|
|
|
|
NOTE
|
|
This also frees all sub trees that is used by the element
|
|
|
|
*/
|
|
|
|
void SEL_ROOT::tree_delete(SEL_ARG *key) {
|
|
enum SEL_ARG::leaf_color remove_color;
|
|
SEL_ARG *nod, **par, *fix_par;
|
|
DBUG_TRACE;
|
|
|
|
assert(this->type == Type::KEY_RANGE);
|
|
assert(this->root->parent == nullptr);
|
|
|
|
/*
|
|
If deleting the last element, we are now of type IMPOSSIBLE.
|
|
Keep the element around so that we have somewhere to store
|
|
next_key_part etc. if needed in the future.
|
|
*/
|
|
if (elements == 1) {
|
|
assert(key == root);
|
|
type = Type::IMPOSSIBLE;
|
|
key->release_next_key_part();
|
|
return;
|
|
}
|
|
|
|
/* Unlink from list */
|
|
if (key->prev) key->prev->next = key->next;
|
|
if (key->next) key->next->prev = key->prev;
|
|
if (key->next_key_part) --key->next_key_part->use_count;
|
|
if (!key->parent)
|
|
par = &root;
|
|
else
|
|
par = key->parent_ptr();
|
|
|
|
if (key->left == null_element) {
|
|
*par = nod = key->right;
|
|
fix_par = key->parent;
|
|
if (nod != null_element) nod->parent = fix_par;
|
|
remove_color = key->color;
|
|
} else if (key->right == null_element) {
|
|
*par = nod = key->left;
|
|
nod->parent = fix_par = key->parent;
|
|
remove_color = key->color;
|
|
} else {
|
|
SEL_ARG *tmp = key->next; // next bigger key (exist!)
|
|
nod = *tmp->parent_ptr() = tmp->right; // unlink tmp from tree
|
|
fix_par = tmp->parent;
|
|
if (nod != null_element) nod->parent = fix_par;
|
|
remove_color = tmp->color;
|
|
|
|
tmp->parent = key->parent; // Move node in place of key
|
|
(tmp->left = key->left)->parent = tmp;
|
|
if ((tmp->right = key->right) != null_element) tmp->right->parent = tmp;
|
|
tmp->color = key->color;
|
|
*par = tmp;
|
|
if (fix_par == key) // key->right == key->next
|
|
fix_par = tmp; // new parent of nod
|
|
}
|
|
|
|
--elements;
|
|
if (root == null_element) return; // Maybe root later
|
|
if (remove_color == SEL_ARG::BLACK) {
|
|
uint8 maybe_flag = root->maybe_flag;
|
|
root = rb_delete_fixup(root, nod, fix_par);
|
|
root->maybe_flag = maybe_flag;
|
|
}
|
|
#ifndef NDEBUG
|
|
test_rb_tree(root, root->parent);
|
|
#endif
|
|
}
|
|
|
|
/* Functions to fix up the tree after insert and delete */
|
|
|
|
static void left_rotate(SEL_ARG **root, SEL_ARG *leaf) {
|
|
SEL_ARG *y = leaf->right;
|
|
leaf->right = y->left;
|
|
if (y->left != null_element) y->left->parent = leaf;
|
|
if (!(y->parent = leaf->parent))
|
|
*root = y;
|
|
else
|
|
*leaf->parent_ptr() = y;
|
|
y->left = leaf;
|
|
leaf->parent = y;
|
|
}
|
|
|
|
static void right_rotate(SEL_ARG **root, SEL_ARG *leaf) {
|
|
SEL_ARG *y = leaf->left;
|
|
leaf->left = y->right;
|
|
if (y->right != null_element) y->right->parent = leaf;
|
|
if (!(y->parent = leaf->parent))
|
|
*root = y;
|
|
else
|
|
*leaf->parent_ptr() = y;
|
|
y->right = leaf;
|
|
leaf->parent = y;
|
|
}
|
|
|
|
SEL_ARG *SEL_ARG::rb_insert(SEL_ARG *leaf) {
|
|
SEL_ARG *y, *par, *par2, *root;
|
|
root = this;
|
|
assert(!root->parent);
|
|
assert(root->color == BLACK);
|
|
|
|
leaf->color = RED;
|
|
while (leaf != root && (par = leaf->parent)->color ==
|
|
RED) { // This can't be root or 1 level under
|
|
assert(leaf->parent->parent);
|
|
if (par == (par2 = leaf->parent->parent)->left) {
|
|
y = par2->right;
|
|
if (y->color == RED) {
|
|
par->color = BLACK;
|
|
y->color = BLACK;
|
|
leaf = par2;
|
|
leaf->color = RED; /* And the loop continues */
|
|
} else {
|
|
if (leaf == par->right) {
|
|
left_rotate(&root, leaf->parent);
|
|
par = leaf; /* leaf is now parent to old leaf */
|
|
}
|
|
par->color = BLACK;
|
|
par2->color = RED;
|
|
right_rotate(&root, par2);
|
|
break;
|
|
}
|
|
} else {
|
|
y = par2->left;
|
|
if (y->color == RED) {
|
|
par->color = BLACK;
|
|
y->color = BLACK;
|
|
leaf = par2;
|
|
leaf->color = RED; /* And the loop continues */
|
|
} else {
|
|
if (leaf == par->left) {
|
|
right_rotate(&root, par);
|
|
par = leaf;
|
|
}
|
|
par->color = BLACK;
|
|
par2->color = RED;
|
|
left_rotate(&root, par2);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
root->color = BLACK;
|
|
#ifndef NDEBUG
|
|
test_rb_tree(root, root->parent);
|
|
#endif
|
|
return root;
|
|
}
|
|
|
|
SEL_ARG *rb_delete_fixup(SEL_ARG *root, SEL_ARG *key, SEL_ARG *par) {
|
|
SEL_ARG *x, *w;
|
|
root->parent = nullptr;
|
|
|
|
x = key;
|
|
while (x != root && x->color == SEL_ARG::BLACK) {
|
|
if (x == par->left) {
|
|
w = par->right;
|
|
if (w->color == SEL_ARG::RED) {
|
|
w->color = SEL_ARG::BLACK;
|
|
par->color = SEL_ARG::RED;
|
|
left_rotate(&root, par);
|
|
w = par->right;
|
|
}
|
|
if (w->left->color == SEL_ARG::BLACK &&
|
|
w->right->color == SEL_ARG::BLACK) {
|
|
w->color = SEL_ARG::RED;
|
|
x = par;
|
|
} else {
|
|
if (w->right->color == SEL_ARG::BLACK) {
|
|
w->left->color = SEL_ARG::BLACK;
|
|
w->color = SEL_ARG::RED;
|
|
right_rotate(&root, w);
|
|
w = par->right;
|
|
}
|
|
w->color = par->color;
|
|
par->color = SEL_ARG::BLACK;
|
|
w->right->color = SEL_ARG::BLACK;
|
|
left_rotate(&root, par);
|
|
x = root;
|
|
break;
|
|
}
|
|
} else {
|
|
w = par->left;
|
|
if (w->color == SEL_ARG::RED) {
|
|
w->color = SEL_ARG::BLACK;
|
|
par->color = SEL_ARG::RED;
|
|
right_rotate(&root, par);
|
|
w = par->left;
|
|
}
|
|
if (w->right->color == SEL_ARG::BLACK &&
|
|
w->left->color == SEL_ARG::BLACK) {
|
|
w->color = SEL_ARG::RED;
|
|
x = par;
|
|
} else {
|
|
if (w->left->color == SEL_ARG::BLACK) {
|
|
w->right->color = SEL_ARG::BLACK;
|
|
w->color = SEL_ARG::RED;
|
|
left_rotate(&root, w);
|
|
w = par->left;
|
|
}
|
|
w->color = par->color;
|
|
par->color = SEL_ARG::BLACK;
|
|
w->left->color = SEL_ARG::BLACK;
|
|
right_rotate(&root, par);
|
|
x = root;
|
|
break;
|
|
}
|
|
}
|
|
par = x->parent;
|
|
}
|
|
x->color = SEL_ARG::BLACK;
|
|
return root;
|
|
}
|
|
|
|
#ifndef NDEBUG
|
|
/* Test that the properties for a red-black tree hold */
|
|
|
|
int test_rb_tree(SEL_ARG *element, SEL_ARG *parent) {
|
|
int count_l, count_r;
|
|
|
|
if (element == null_element) return 0; // Found end of tree
|
|
if (element->parent != parent) {
|
|
LogErr(ERROR_LEVEL, ER_TREE_CORRUPT_PARENT_SHOULD_POINT_AT_PARENT);
|
|
return -1;
|
|
}
|
|
if (!parent && element->color != SEL_ARG::BLACK) {
|
|
LogErr(ERROR_LEVEL, ER_TREE_CORRUPT_ROOT_SHOULD_BE_BLACK);
|
|
return -1;
|
|
}
|
|
if (element->color == SEL_ARG::RED &&
|
|
(element->left->color == SEL_ARG::RED ||
|
|
element->right->color == SEL_ARG::RED)) {
|
|
LogErr(ERROR_LEVEL, ER_TREE_CORRUPT_2_CONSECUTIVE_REDS);
|
|
return -1;
|
|
}
|
|
if (element->left == element->right &&
|
|
element->left != null_element) { // Dummy test
|
|
LogErr(ERROR_LEVEL, ER_TREE_CORRUPT_RIGHT_IS_LEFT);
|
|
return -1;
|
|
}
|
|
count_l = test_rb_tree(element->left, element);
|
|
count_r = test_rb_tree(element->right, element);
|
|
if (count_l >= 0 && count_r >= 0) {
|
|
if (count_l == count_r) return count_l + (element->color == SEL_ARG::BLACK);
|
|
LogErr(ERROR_LEVEL, ER_TREE_CORRUPT_INCORRECT_BLACK_COUNT, count_l,
|
|
count_r);
|
|
}
|
|
return -1; // Error, no more warnings
|
|
}
|
|
#endif
|
|
|
|
/**
|
|
Count how many times SEL_ARG graph "root" refers to its part "key" via
|
|
transitive closure.
|
|
|
|
@param root An RB-Root node in a SEL_ARG graph.
|
|
@param key Another RB-Root node in that SEL_ARG graph.
|
|
@param seen Which SEL_ARGs we have already seen in this traversal.
|
|
Used for deduplication, so that we only count each
|
|
SEL_ARG once.
|
|
|
|
The passed "root" node may refer to "key" node via root->next_key_part,
|
|
root->next->n
|
|
|
|
This function counts how many times the node "key" is referred (via
|
|
SEL_ARG::next_key_part) by
|
|
- intervals of RB-tree pointed by "root",
|
|
- intervals of RB-trees that are pointed by SEL_ARG::next_key_part from
|
|
intervals of RB-tree pointed by "root",
|
|
- and so on.
|
|
|
|
Here is an example (horizontal links represent next_key_part pointers,
|
|
vertical links - next/prev prev pointers):
|
|
|
|
+----+ $
|
|
|root|-----------------+
|
|
+----+ $ |
|
|
| $ |
|
|
| $ |
|
|
+----+ +---+ $ | +---+ Here the return value
|
|
| |- ... -| |---$-+--+->|key| will be 4.
|
|
+----+ +---+ $ | | +---+
|
|
| $ | |
|
|
... $ | |
|
|
| $ | |
|
|
+----+ +---+ $ | |
|
|
| |---| |---------+ |
|
|
+----+ +---+ $ |
|
|
| | $ |
|
|
... +---+ $ |
|
|
| |------------+
|
|
+---+ $
|
|
@return
|
|
Number of links to "key" from nodes reachable from "root".
|
|
*/
|
|
|
|
static ulong count_key_part_usage(const SEL_ROOT *root, const SEL_ROOT *key,
|
|
std::set<const SEL_ROOT *> *seen) {
|
|
// Don't count paths from a given key twice.
|
|
if (seen->count(root)) return 0;
|
|
seen->insert(root);
|
|
ulong count = 0;
|
|
for (SEL_ARG *node = root->root->first(); node; node = node->next) {
|
|
if (node->next_key_part) {
|
|
if (node->next_key_part == key) count++;
|
|
if (node->next_key_part->root->part < key->root->part)
|
|
count += count_key_part_usage(node->next_key_part, key, seen);
|
|
}
|
|
}
|
|
return count;
|
|
}
|
|
|
|
bool SEL_ROOT::test_use_count(const SEL_ROOT *origin) const {
|
|
uint e_count = 0;
|
|
if (this == origin && use_count != 1) {
|
|
LogErr(INFORMATION_LEVEL, ER_WRONG_COUNT_FOR_ORIGIN, use_count, this);
|
|
assert(false);
|
|
return true;
|
|
}
|
|
if (type != SEL_ROOT::Type::KEY_RANGE) return false;
|
|
for (SEL_ARG *pos = root->first(); pos; pos = pos->next) {
|
|
e_count++;
|
|
if (pos->next_key_part) {
|
|
std::set<const SEL_ROOT *> seen;
|
|
ulong count = count_key_part_usage(origin, pos->next_key_part, &seen);
|
|
/*
|
|
This cannot be a strict equality test, since there might be
|
|
connections from the keys[] array that we don't see.
|
|
*/
|
|
if (count > pos->next_key_part->use_count) {
|
|
LogErr(INFORMATION_LEVEL, ER_WRONG_COUNT_FOR_KEY, pos->next_key_part,
|
|
pos->next_key_part->use_count, count);
|
|
assert(false);
|
|
return true;
|
|
}
|
|
pos->next_key_part->test_use_count(origin);
|
|
}
|
|
}
|
|
if (e_count != elements) {
|
|
LogErr(WARNING_LEVEL, ER_WRONG_COUNT_OF_ELEMENTS, e_count, elements, this);
|
|
assert(false);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool get_sel_root_for_keypart(uint key_part_num, SEL_ROOT *keypart_tree,
|
|
SEL_ROOT **cur_range) {
|
|
if (keypart_tree == nullptr) return false;
|
|
if (keypart_tree->type != SEL_ROOT::Type::KEY_RANGE) {
|
|
/*
|
|
A range predicate not usable by Loose Index Scan is found.
|
|
Predicates for keypart 'keypart_tree->root->part' and later keyparts
|
|
cannot be used.
|
|
*/
|
|
*cur_range = keypart_tree;
|
|
return false;
|
|
}
|
|
if (keypart_tree->root->part == key_part_num) {
|
|
*cur_range = keypart_tree;
|
|
return false;
|
|
}
|
|
|
|
SEL_ROOT *tree_first_range = nullptr;
|
|
SEL_ARG *first_kp = keypart_tree->root->first();
|
|
|
|
for (SEL_ARG *cur_kp = first_kp; cur_kp; cur_kp = cur_kp->next) {
|
|
SEL_ROOT *curr_tree = nullptr;
|
|
if (cur_kp->next_key_part) {
|
|
if (get_sel_root_for_keypart(key_part_num, cur_kp->next_key_part,
|
|
&curr_tree))
|
|
return true;
|
|
}
|
|
/**
|
|
Check if the SEL_ARG tree for 'field' is identical for all ranges in
|
|
'keypart_tree'.
|
|
*/
|
|
if (cur_kp == first_kp)
|
|
tree_first_range = curr_tree;
|
|
else if (!all_same(tree_first_range, curr_tree))
|
|
return true;
|
|
}
|
|
*cur_range = tree_first_range;
|
|
return false;
|
|
}
|
|
|
|
#ifndef NDEBUG
|
|
void print_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree, Key_map *tree_map,
|
|
const char *msg) {
|
|
char buff[1024];
|
|
DBUG_TRACE;
|
|
|
|
String tmp(buff, sizeof(buff), &my_charset_bin);
|
|
tmp.length(0);
|
|
for (uint idx = 0; idx < param->keys; idx++) {
|
|
if (tree_map->is_set(idx)) {
|
|
uint keynr = param->real_keynr[idx];
|
|
if (tmp.length()) tmp.append(',');
|
|
tmp.append(param->table->key_info[keynr].name);
|
|
}
|
|
}
|
|
if (!tmp.length()) tmp.append(STRING_WITH_LEN("(empty)"));
|
|
|
|
DBUG_PRINT("info", ("SEL_TREE: %p (%s) scans: %s", tree, msg, tmp.ptr()));
|
|
}
|
|
#endif
|