504 lines
18 KiB
C++
504 lines
18 KiB
C++
/* Copyright (c) 2016, 2024, Oracle and/or its affiliates.
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License, version 2.0,
|
|
as published by the Free Software Foundation.
|
|
|
|
This program is designed to work with certain software (including
|
|
but not limited to OpenSSL) that is licensed under separate terms,
|
|
as designated in a particular file or component or in included license
|
|
documentation. The authors of MySQL hereby grant you an additional
|
|
permission to link the program and your derivative works with the
|
|
separately licensed software that they have either included with
|
|
the program or referenced in the documentation.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License, version 2.0, for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
|
|
|
|
#ifndef SORT_PARAM_INCLUDED
|
|
#define SORT_PARAM_INCLUDED
|
|
|
|
#include <assert.h>
|
|
#include <algorithm>
|
|
|
|
#include "field_types.h" // enum_field_types
|
|
#include "my_base.h" // ha_rows
|
|
#include "my_byteorder.h" // uint4korr
|
|
|
|
#include "my_inttypes.h"
|
|
#include "my_io.h" // mysql_com.h needs my_socket
|
|
#include "mysql_com.h" // Item_result
|
|
#include "sql/mem_root_array.h"
|
|
#include "sql/sql_array.h" // Bounds_checked_array
|
|
#include "sql/sql_const.h"
|
|
#include "sql/sql_sort.h" // Filesort_info
|
|
#include "sql/thr_malloc.h"
|
|
#include "sql_string.h"
|
|
|
|
class Field;
|
|
class Filesort;
|
|
class Item;
|
|
struct TABLE;
|
|
|
|
enum class Addon_fields_status {
|
|
unknown_status,
|
|
using_addon_fields,
|
|
|
|
// The remainder are reasons why we are _not_ using addon fields.
|
|
fulltext_searched,
|
|
keep_rowid,
|
|
row_not_packable,
|
|
row_contains_blob,
|
|
skip_heuristic,
|
|
using_priority_queue
|
|
};
|
|
|
|
inline const char *addon_fields_text(Addon_fields_status afs) {
|
|
switch (afs) {
|
|
default:
|
|
return "unknown";
|
|
case Addon_fields_status::using_addon_fields:
|
|
return "using_addon_fields";
|
|
case Addon_fields_status::fulltext_searched:
|
|
return "fulltext_searched";
|
|
case Addon_fields_status::keep_rowid:
|
|
return "keep_rowid";
|
|
case Addon_fields_status::row_not_packable:
|
|
return "row_not_packable";
|
|
case Addon_fields_status::row_contains_blob:
|
|
return "row_contains_blob";
|
|
case Addon_fields_status::skip_heuristic:
|
|
return "skip_heuristic";
|
|
case Addon_fields_status::using_priority_queue:
|
|
return "using_priority_queue";
|
|
}
|
|
}
|
|
|
|
/* Structs used when sorting */
|
|
|
|
/// Struct that holds information about a sort field.
|
|
struct st_sort_field {
|
|
Item *item; ///< Item to sort
|
|
|
|
/// Length of sort field. Beware, can be 0xFFFFFFFFu (infinite)!
|
|
uint length;
|
|
|
|
Item_result result_type; ///< Type of item
|
|
enum_field_types field_type; ///< Field type of the item
|
|
bool reverse; ///< if descending sort
|
|
bool is_varlen; ///< If key part has variable length
|
|
bool maybe_null; ///< If key part is nullable
|
|
};
|
|
|
|
/**
|
|
The structure Sort_addon_field describes the layout
|
|
for field values appended to sorted values in records to be sorted
|
|
in the sort buffer.
|
|
|
|
Null bit maps for the appended values is placed before the values
|
|
themselves. Offsets are from the last sorted field.
|
|
|
|
The structure is used to store values of the additional fields
|
|
in the sort buffer. It is used also when these values are read
|
|
from a temporary file/buffer in 'Filesort_info::unpack_addon_fields'.
|
|
*/
|
|
|
|
struct Sort_addon_field { /* Sort addon packed field */
|
|
Field *field; /* Original field */
|
|
uint null_offset; /* Offset to to null bit from the last sorted field */
|
|
uint max_length; /* Maximum length in the sort buffer */
|
|
uint8 null_bit; /* Null bit mask for the field */
|
|
};
|
|
|
|
typedef Bounds_checked_array<Sort_addon_field> Addon_fields_array;
|
|
|
|
/**
|
|
This class wraps information about usage of addon fields.
|
|
An Addon_fields object is used both during packing of data in the filesort
|
|
buffer, and later during unpacking in 'Filesort_info::unpack_addon_fields'.
|
|
|
|
@see documentation for the Sort_addon_field struct.
|
|
@see documentation for get_addon_fields()
|
|
*/
|
|
class Addon_fields {
|
|
public:
|
|
Addon_fields(Addon_fields_array arr)
|
|
: m_field_descriptors(arr),
|
|
m_addon_buf(nullptr),
|
|
m_addon_buf_length(0),
|
|
m_using_packed_addons(false) {
|
|
assert(!arr.is_null());
|
|
}
|
|
|
|
Sort_addon_field *begin() { return m_field_descriptors.begin(); }
|
|
Sort_addon_field *end() { return m_field_descriptors.end(); }
|
|
size_t num_field_descriptors() const { return m_field_descriptors.size(); }
|
|
|
|
/// SortFileIterator needs an extra buffer when unpacking.
|
|
uchar *allocate_addon_buf(uint sz) {
|
|
if (using_packed_addons()) {
|
|
sz += Addon_fields::size_of_length_field;
|
|
} else {
|
|
// For fixed-size "addons" the size should not change.
|
|
assert(m_addon_buf == nullptr || m_addon_buf_length == sz);
|
|
}
|
|
/*
|
|
For subqueries we try to re-use the buffer.
|
|
With packed addons, the longest_addons may change, so we may have
|
|
to allocate a larger buffer below.
|
|
*/
|
|
if (m_addon_buf != nullptr && m_addon_buf_length >= sz) {
|
|
return m_addon_buf;
|
|
}
|
|
m_addon_buf = static_cast<uchar *>((*THR_MALLOC)->Alloc(sz));
|
|
if (m_addon_buf) m_addon_buf_length = sz;
|
|
return m_addon_buf;
|
|
}
|
|
|
|
uchar *get_addon_buf() { return m_addon_buf; }
|
|
uint get_addon_buf_length() const { return m_addon_buf_length; }
|
|
|
|
void set_using_packed_addons(bool val) { m_using_packed_addons = val; }
|
|
|
|
void set_first_addon_relative_offset(int offset) {
|
|
m_first_addon_relative_offset = offset;
|
|
}
|
|
int first_addon_offset() const {
|
|
return skip_bytes() + m_first_addon_relative_offset;
|
|
}
|
|
|
|
bool using_packed_addons() const { return m_using_packed_addons; }
|
|
|
|
/**
|
|
How many bytes to skip to get to the actual data; first NULL flags
|
|
(for tables and addon fields) and then the actual addons.
|
|
*/
|
|
size_t skip_bytes() const {
|
|
if (m_using_packed_addons) {
|
|
return Addon_fields::size_of_length_field;
|
|
} else {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
/**
|
|
@returns Total number of bytes used for packed addon fields.
|
|
the size of the length field + size of null bits + sum of field sizes.
|
|
*/
|
|
static uint read_addon_length(uchar *p) {
|
|
return size_of_length_field + uint4korr(p);
|
|
}
|
|
|
|
/**
|
|
Stores the number of bytes used for packed addon fields.
|
|
*/
|
|
static void store_addon_length(uchar *p, uint sz) {
|
|
// We actually store the length of everything *after* the length field.
|
|
int4store(p, sz - size_of_length_field);
|
|
}
|
|
|
|
static const uint size_of_length_field = 4;
|
|
|
|
private:
|
|
Addon_fields_array m_field_descriptors;
|
|
|
|
uchar *m_addon_buf; ///< Buffer for unpacking addon fields.
|
|
uint m_addon_buf_length; ///< Length of the buffer.
|
|
bool m_using_packed_addons; ///< Are we packing the addon fields?
|
|
|
|
/// Number of bytes from after skip_bytes() to the beginning of the first
|
|
/// addon field.
|
|
int m_first_addon_relative_offset = 0;
|
|
};
|
|
|
|
/* clang-format off */
|
|
/**
|
|
There are several record formats for sorting:
|
|
@verbatim
|
|
|<key a><key b>... | ( <null row flag> | <rowid> | ) * num_tables
|
|
/ m_fixed_sort_length / ( 0 or 1 bytes | ref_len / )
|
|
@endverbatim
|
|
|
|
or with "addon fields"
|
|
@verbatim
|
|
|<key a><key b>... |<null bits>|<field a><field b>...|
|
|
/ m_fixed_sort_length / addon_length /
|
|
@endverbatim
|
|
|
|
The packed format for "addon fields"
|
|
@verbatim
|
|
|<key a><key b>... |<length>|<null bits>|<field a><field b>...|
|
|
/ m_fixed_sort_length / addon_length /
|
|
@endverbatim
|
|
|
|
For packed addon fields, fields are not stored if the table is nullable and
|
|
has its NULL bit set.
|
|
|
|
All the figures above are depicted for the case of fixed-size keys, with
|
|
appropriate padding. Fixed-size keys can be compared/sorted using memcmp().
|
|
|
|
The packed (variable length) format for keys:
|
|
@verbatim
|
|
|<keylen>|<varkey a><key b>...<hash>|<(null_row,rowid) * num_tables> or <addons> |
|
|
/ 4 bytes/ keylen bytes / (0/1 + ref_len) * num_tables or addon_length /
|
|
@endverbatim
|
|
|
|
Variable-size keys must be compared piece-by-piece, using type information
|
|
about each individual key part, @see cmp_varlen_keys.
|
|
|
|
All the record formats consist of a (possibly composite) key,
|
|
followed by a (possibly composite) payload.
|
|
The key is used for sorting data. Once sorting is done, the payload is
|
|
stored in some buffer, and read by some RowIterator.
|
|
|
|
<dl>
|
|
<dt>@<key@>
|
|
<dd> Fields are fixed-size, specially encoded with
|
|
Field::make_sort_key() so we can do byte-by-byte compare.
|
|
<dt>@<length@>
|
|
<dd> Contains the *actual* packed length (after packing) of
|
|
everything after the sort keys.
|
|
The size of the length field is 2 bytes,
|
|
which should cover most use cases: addon data <= 65535 bytes.
|
|
This is the same as max record size in MySQL.
|
|
<dt>@<null bits@>
|
|
<dd> One bit for each nullable table and field, indicating whether
|
|
the table/field is NULL or not. May have size zero if no fields
|
|
or rows are nullable. NULL bits for rows (on nullable tables),
|
|
if any, always come before NULL bits for fields.
|
|
|
|
<dt>@<field xx@>
|
|
<dd> Are stored with field->pack(), and retrieved with
|
|
field->unpack().
|
|
Addon fields within a record are stored consecutively, with no
|
|
"holes" or padding. They will have zero size for NULL values.
|
|
<dt>@<keylen@>
|
|
<dd> Contains the *actual* packed length of all the keys.
|
|
We may have an arbitrary mix of fixed and variable-sized keys.
|
|
<dt>@<hash@>
|
|
<dd> Optional 8 byte hash, used for GROUPing of JSON values.
|
|
<dt>@<varkey@>
|
|
<dd> Used for JSON and variable-length string values, the format is:
|
|
</dl>
|
|
@verbatim
|
|
|<null value>|<key length>|<sort key> |
|
|
/ 1 byte / 4 bytes / key length bytes /
|
|
@endverbatim
|
|
<dl>
|
|
<dt>@<null value@>
|
|
<dd> 0x00 for NULL. 0xff for NULL under DESC sort. 0x01 for NOT NULL.
|
|
<dt>@<key length@>
|
|
<dd> The length of the sort key, *including* the four bytes for the
|
|
key length. Does not exist if the field is NULL.
|
|
</dl>
|
|
*/
|
|
/* clang-format on */
|
|
class Sort_param {
|
|
uint m_fixed_rec_length{0}; ///< Maximum length of a record, see above.
|
|
uint m_fixed_sort_length{0}; ///< Maximum number of bytes used for sorting.
|
|
public:
|
|
uint sum_ref_length{0}; // Length of record ref.
|
|
uint m_addon_length{0}; // Length of added packed fields.
|
|
uint fixed_res_length{0}; // Length of records in final sorted file/buffer.
|
|
uint max_rows_per_buffer{0}; // Max (unpacked) rows / buffer.
|
|
ha_rows max_rows{0}; // Select limit, or HA_POS_ERROR if unlimited.
|
|
bool use_hash{false}; // Whether to use hash to distinguish cut JSON
|
|
bool m_remove_duplicates{
|
|
false}; ///< Whether we want to remove duplicate rows
|
|
|
|
/// If we are removing duplicate rows and merging, contains a buffer where we
|
|
/// can store the last key seen.
|
|
uchar *m_last_key_seen{nullptr};
|
|
|
|
/**
|
|
ORDER BY list with some precalculated info for filesort.
|
|
Array is created and owned by a Filesort instance.
|
|
*/
|
|
Bounds_checked_array<st_sort_field> local_sortorder;
|
|
|
|
Addon_fields *addon_fields{nullptr}; ///< Descriptors for addon fields.
|
|
bool using_pq{false};
|
|
StringBuffer<STRING_BUFFER_USUAL_SIZE> tmp_buffer;
|
|
|
|
/// Decide whether we are to use addon fields (sort rows instead of sorting
|
|
/// row IDs or not). See using_addon_fields().
|
|
///
|
|
/// Note that currently, this function must _not_ be called from the Filesort
|
|
/// constructor, as the read sets are not fully set up at that time
|
|
/// (see filter_virtual_gcol_base_cols(), which runs very late in
|
|
/// optimization). If we want to change this, we can probably have
|
|
/// make_sortkey() check the read set at runtime, at the cost of slightly less
|
|
/// precise estimation of packed row size.
|
|
void decide_addon_fields(Filesort *file_sort,
|
|
const Mem_root_array<TABLE *> &tables,
|
|
bool force_sort_rowids);
|
|
|
|
/// Reset the decision made in decide_addon_fields(). Only used in exceptional
|
|
/// circumstances (see NewWeedoutAccessPathForTables()).
|
|
void clear_addon_fields();
|
|
|
|
/**
|
|
Initialize this struct for filesort() usage.
|
|
@see description of record layout above
|
|
@param [in,out] file_sort sorting information which may be re-used on
|
|
subsequent invocations of filesort()
|
|
@param sf_array initialization value for local_sortorder
|
|
@param sortlen length of sorted columns
|
|
@param tables tables to be sorted
|
|
@param maxrows HA_POS_ERROR or possible LIMIT value
|
|
@param remove_duplicates if true, items with duplicate keys will be removed
|
|
*/
|
|
void init_for_filesort(Filesort *file_sort,
|
|
Bounds_checked_array<st_sort_field> sf_array,
|
|
uint sortlen, const Mem_root_array<TABLE *> &tables,
|
|
ha_rows maxrows, bool remove_duplicates);
|
|
|
|
/**
|
|
Initialize this struct for unit testing.
|
|
*/
|
|
void init_for_unittest(Bounds_checked_array<st_sort_field> sf_array) {
|
|
local_sortorder = sf_array;
|
|
m_num_varlen_keys = count_varlen_keys();
|
|
}
|
|
|
|
/// Enables the packing of addons if possible.
|
|
void try_to_pack_addons();
|
|
|
|
/// Are we packing the "addon fields"?
|
|
bool using_packed_addons() const {
|
|
assert(m_using_packed_addons ==
|
|
(addon_fields != nullptr && addon_fields->using_packed_addons()));
|
|
return m_using_packed_addons;
|
|
}
|
|
|
|
/// Are we using varlen key fields?
|
|
bool using_varlen_keys() const { return m_num_varlen_keys > 0; }
|
|
|
|
/// Are we using any JSON key fields?
|
|
bool using_json_keys() const { return m_num_json_keys > 0; }
|
|
|
|
/// Are we using "addon fields"? Note that decide_addon_fields() or
|
|
/// init_for_filesort() must be called before checking this.
|
|
bool using_addon_fields() const { return addon_fields != nullptr; }
|
|
|
|
/**
|
|
Stores key fields in *dst.
|
|
Then appends either *ref_pos (the @<rowid@>) or the "addon fields".
|
|
@param [out] dst Where to store the result
|
|
@param tables Tables to get @<rowid@> from
|
|
@param [in,out] longest_addons
|
|
The longest addon field row (sum of all addon fields for any single
|
|
given row) found.
|
|
@returns Number of bytes stored, or UINT_MAX if the result could not
|
|
provably fit within the destination buffer.
|
|
*/
|
|
uint make_sortkey(Bounds_checked_array<uchar> dst,
|
|
const Mem_root_array<TABLE *> &tables,
|
|
size_t *longest_addons);
|
|
|
|
// Adapter for Bounded_queue.
|
|
uint make_sortkey(uchar *dst, size_t dst_len,
|
|
const Mem_root_array<TABLE *> &tables) {
|
|
size_t longest_addons = 0; // Unused.
|
|
return make_sortkey(Bounds_checked_array<uchar>(dst, dst_len), tables,
|
|
&longest_addons);
|
|
}
|
|
|
|
/// Stores the length of a variable-sized key.
|
|
static void store_varlen_key_length(uchar *p, uint sz) { int4store(p, sz); }
|
|
|
|
/// Skips the key part, and returns address of payload.
|
|
uchar *get_start_of_payload(uchar *p) const {
|
|
size_t offset = using_varlen_keys() ? uint4korr(p) : max_compare_length();
|
|
if (!using_addon_fields() && !using_varlen_keys())
|
|
offset -= sum_ref_length; // The reference is also part of the sort key.
|
|
return p + offset;
|
|
}
|
|
|
|
/**
|
|
Skips the key part, and returns address of payload.
|
|
For SortBufferIterator, which does not have access to Sort_param.
|
|
*/
|
|
static uchar *get_start_of_payload(uint default_val, bool is_varlen,
|
|
uchar *p) {
|
|
const size_t offset = is_varlen ? uint4korr(p) : default_val;
|
|
return p + offset;
|
|
}
|
|
|
|
/// @returns The number of bytes used for sorting of fixed-size keys.
|
|
uint max_compare_length() const { return m_fixed_sort_length; }
|
|
|
|
void set_max_compare_length(uint len) { m_fixed_sort_length = len; }
|
|
|
|
/// @returns The actual size of a record (key + addons)
|
|
size_t get_record_length(uchar *p) const;
|
|
|
|
/// @returns The maximum size of a record (key + addons)
|
|
uint max_record_length() const { return m_fixed_rec_length; }
|
|
|
|
void set_max_record_length(uint len) { m_fixed_rec_length = len; }
|
|
|
|
/**
|
|
Getter for record length and result length.
|
|
@param record_start Pointer to record.
|
|
@param [out] recl Store record length here.
|
|
@param [out] resl Store result length here.
|
|
*/
|
|
void get_rec_and_res_len(uchar *record_start, uint *recl, uint *resl);
|
|
|
|
// NOTE: Even with FILESORT_ALG_STD_STABLE, we do not necessarily have a
|
|
// stable sort if spilling to disk; this is purely a performance option.
|
|
enum enum_sort_algorithm {
|
|
FILESORT_ALG_NONE,
|
|
FILESORT_ALG_STD_SORT,
|
|
FILESORT_ALG_STD_STABLE
|
|
};
|
|
enum_sort_algorithm m_sort_algorithm{FILESORT_ALG_NONE};
|
|
|
|
Addon_fields_status m_addon_fields_status{
|
|
Addon_fields_status::unknown_status};
|
|
|
|
static const uint size_of_varlength_field = 4;
|
|
|
|
private:
|
|
/// Counts number of varlen keys
|
|
int count_varlen_keys() const {
|
|
return std::count_if(local_sortorder.begin(), local_sortorder.end(),
|
|
[](const auto &sf) { return sf.is_varlen; });
|
|
}
|
|
|
|
/// Counts number of JSON keys
|
|
int count_json_keys() const;
|
|
|
|
/// total length of fields which have a packable type
|
|
uint m_packable_length{0};
|
|
/// caches the value of using_packed_addons()
|
|
bool m_using_packed_addons{false};
|
|
int m_num_varlen_keys{0}; ///< number of varlen keys
|
|
int m_num_json_keys{0}; ///< number of JSON keys
|
|
|
|
public:
|
|
Sort_param() = default;
|
|
// Not copyable.
|
|
Sort_param(const Sort_param &) = delete;
|
|
Sort_param &operator=(const Sort_param &) = delete;
|
|
};
|
|
|
|
inline uchar *get_start_of_payload(const Filesort_info *fsi, uchar *p) {
|
|
return Sort_param::get_start_of_payload(fsi->sort_length(),
|
|
fsi->using_varlen_keys(), p);
|
|
}
|
|
|
|
/// Are we using "packed addon fields"?
|
|
inline bool using_packed_addons(const Filesort_info *fsi) {
|
|
return fsi->addon_fields != nullptr &&
|
|
fsi->addon_fields->using_packed_addons();
|
|
}
|
|
|
|
#endif // SORT_PARAM_INCLUDED
|