mysql-server/mysql-test/t/group_by_hypergraph.test
2025-03-05 14:31:37 +07:00

357 lines
11 KiB
Text

--source include/have_hypergraph.inc
--source include/elide_costs.inc
--echo #
--echo # Bug#32980875: WL#14422: ASSERTION `FALSE' FAILED|SQL/BASIC_ROW_ITERATORS.H
--echo #
# This should have been a unit test in hypergraph_optimizer-t.cc, but
# the unit test framework does not currently allow creating temporary
# tables.
#
# We want to test that the hypergraph optimizer doesn't try to do
# sort-ahead on an aggregate.
#
# We expect the hypergraph optimizer to set up a plan like this:
#
# -> Remove duplicates from input grouped on t.x, `MIN(t2.x)`
# -> Stream results
# -> Group aggregate: min(t2.x)
# -> Sort: t2.x
# -> Inner hash join (t1.x = t2.x)
# -> Table scan on t1
# -> Hash
# -> Table scan on t2
#
# Before, it would set up this plan, which tried to sort on MIN(t2.x)
# before the aggregation had happened:
#
# -> Remove duplicates from input grouped on t1.x, min(t2.x)
# -> Stream results
# -> Group aggregate: min(t2.x)
# -> Sort: t1.x, min(t2.x)
# -> Inner hash join (t1.x = t2.x)
# -> Table scan on t1
# -> Hash
# -> Table scan on t2
#
# Note that because MIN(t2.x) is functionally dependent on the GROUP BY
# expression (which happens to be t2.x as well here), we avoid a sort
# in the final DISTINCT pass. We further shuffle the SELECT expressions
# around a bit (putting the MIN() first) to demonstrate that our sorting of
# expressions in an interesting grouping is robust.
#
# Of course, we should have been able to remove the entire DISTINCT operation,
# and if we wrote t2.x instead of t1.x, we would be able to do that. However,
# the interesting order framework does not track uniqueness (so can not do it),
# and the hard-coded DISTINCT removal, which runs before the join optimizer,
# does not take functional dependencies into account, so it does not know that
# t1.x (in the SELECT list) = t2.x (in the GROUP BY list).
CREATE TABLE t (x INTEGER);
INSERT INTO t VALUES (1), (2), (3);
ANALYZE TABLE t;
--replace_regex $elide_metrics
EXPLAIN FORMAT=TREE
SELECT DISTINCT MIN(t2.x), t1.x
FROM t t1 JOIN t t2 USING (x)
GROUP BY t2.x;
DROP TABLE t;
--echo #
--echo # Bug #34670701 Too many ROLLUP rows with hypergraph
--echo #
CREATE TABLE t1(
a INT,
b INT,
c INT,
d INT,
e INT,
PRIMARY KEY(a,b),
KEY ix1 (c,d)
);
INSERT INTO t1 VALUES (0,0,0,0,1), (1,0,1,0,1), (0,1,2,0,1), (2,0,2,0,1), (4,0,0,0,1);
ANALYZE TABLE t1;
# When we use ROLLUP, we can scan ix1 if the group-by terms form a (correctly ordered)
# prefix of [c,d,a,b] and the primary key for a prefix of [a,b]. Otherwise, we must sort.
# All of these need sort.
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT a,c,d,sum(e) FROM t1 GROUP BY a,c,d WITH ROLLUP;
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT a,d,c,sum(e) FROM t1 GROUP BY a,d,c WITH ROLLUP;
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT c,a,d,sum(e) FROM t1 GROUP BY c,a,d WITH ROLLUP;
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT d,a,c,sum(e) FROM t1 GROUP BY d,a,c WITH ROLLUP;
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT c,d,b,sum(e) FROM t1 GROUP BY c,d,b WITH ROLLUP;
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT d,c,a,sum(e) FROM t1 GROUP BY d,c,a WITH ROLLUP;
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT b,a,sum(e) FROM t1 GROUP BY b,a WITH ROLLUP;
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT b,sum(e) FROM t1 GROUP BY b WITH ROLLUP;
# For these queries, the group-by key form a prefix of [c,d,a,b]
# (in that order). Therefore they can scan ix1.
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT c,sum(e) FROM t1 GROUP BY c WITH ROLLUP;
SELECT c,sum(e) FROM t1 GROUP BY c WITH ROLLUP;
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT c,d,sum(e) FROM t1 GROUP BY c,d WITH ROLLUP;
SELECT c,d,sum(e) FROM t1 GROUP BY c,d WITH ROLLUP;
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT c,d,a,sum(e) FROM t1 GROUP BY c,d,a WITH ROLLUP;
SELECT c,d,a,sum(e) FROM t1 GROUP BY c,d,a WITH ROLLUP;
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT c,d,a,b,sum(e) FROM t1 GROUP BY c,d,a,b WITH ROLLUP;
SELECT c,d,a,b,sum(e) FROM t1 GROUP BY c,d,a,b WITH ROLLUP;
# For these queries, the group-by terms for a prefix of the primary index.
# So we can scan that.
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT a,sum(e) FROM t1 GROUP BY a WITH ROLLUP;
SELECT a,sum(e) FROM t1 GROUP BY a WITH ROLLUP;
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT a,b,sum(e) FROM t1 GROUP BY a,b WITH ROLLUP;
SELECT a,b,sum(e) FROM t1 GROUP BY a,b WITH ROLLUP;
#No ROLLUP. We scan ix1 since it covers the group-by fields.
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT d,a,c,sum(e) FROM t1 GROUP BY d,a,c;
SELECT d,a,c,sum(e) FROM t1 GROUP BY d,a,c;
#No ROLLUP. We scan ix1 since it covers the group-by fields.
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT a,d,c,sum(e) FROM t1 GROUP BY a,d,c;
SELECT a,d,c,sum(e) FROM t1 GROUP BY a,d,c;
#No ROLLUP. We scan the primary index since it covers the group-by fields.
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT b,a,sum(e) FROM t1 GROUP BY b,a;
SELECT b,a,sum(e) FROM t1 GROUP BY b,a;
#No ROLLUP. No index covers the group-by fields, therefore we must sort.
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT a,c,e,sum(d) FROM t1 GROUP BY a,c,e;
DROP TABLE t1;
--echo #
--echo # Bug #33968442: Hypergraph gives too high row estimates for GROUP BY
--echo #
CREATE TABLE num10 (n INT);
INSERT INTO num10 VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
CREATE VIEW num1000 AS
SELECT d1.n+d2.n*10+d3.n*100 n FROM num10 d1, num10 d2, num10 d3;
CREATE TABLE t1(
a INT,
b INT,
c INT,
d INT,
e INT,
f INT,
g INT,
h INT,
i INT,
j INT,
k INT,
l INT,
PRIMARY KEY(a,b),
KEY ix1 (c,d),
KEY ix2 (d,a,c),
KEY ix3 (g,h,i,j),
KEY ix4 (k,j,l),
KEY ix5 (k,l)
);
INSERT INTO t1
SELECT n/100,n%100,n%5,n%7,n%11,n%13,n%10,n%10,n%10,n%10,n%10,n%10
FROM num1000;
ANALYZE TABLE t1;
# Estimate result size using primary index.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY a;
# Estimate result size using ix1.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY c;
# Estimate result size using ix1.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY d,c;
# Estimate result size using ix2.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY d,a;
# Estimate result size using ix1 or ix2.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY c,d,a;
# Estimate result size using:
# - ix1 or ix2 for d,c,a.
# - ix1 or ix2 for d,c
# - ix2 d
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY d,a,c WITH ROLLUP;
# Estimate result size using:
# - ix1 or ix2 for c,d,a.
# - ix1 for c,d
# - ix1 for c
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY c,d,a WITH ROLLUP;
# Estimate result size using:
# - ix1 or ix2 for c,a,d.
# - ix1 and PRIMARY for c,a
# - ix1 for c
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY c,a,d WITH ROLLUP;
# Estimate result size using ix1 and PRIMARY.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY c,a;
# Estimate result size using ix1 (for c) and t1 row count.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY c,b;
# Estimate result size using t1 row count.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY e,f;
# Estimate result size using ix3 and ix5 (not one-field prefix of ix4).
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY g,h,i,j,k,l;
ANALYZE TABLE t1 UPDATE HISTOGRAM ON a,b,c,d,e,f,g,h,i;
ANALYZE TABLE t1;
# Estimate result size using histogram.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY e;
# Estimate result size using histograms.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY e,f;
# Estimate result size using histograms, and then cap to input set size.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 WHERE b>95 GROUP BY e,f;
# Estimate result size using input row count.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY c+0,e+0;
# Estimate result size using input row count.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 WHERE b>95 GROUP BY c+0,e+0;
# Estimate result size using primary index (for a), histogram (for e)
# and t1 row count for 'c+0'.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1 GROUP BY a,e,c+0;
CREATE TABLE t2 (
c1 INT,
c2 INT,
c3 INT,
PRIMARY KEY(c1,c2)
);
INSERT INTO t2 SELECT n%5,n/5,n%3 FROM num10;
ANALYZE TABLE t2 UPDATE HISTOGRAM ON c3;
ANALYZE TABLE t2;
# Estimate result size using primary index.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1,t2 GROUP BY c1;
# Estimate result size using t2 row count.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1,t2 GROUP BY c2;
# Estimate result size using histogram.
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1,t2 GROUP BY c3;
# Estimate result size using primary index (a) and histogram (c3).
--replace_regex $elide_costs_and_time
EXPLAIN ANALYZE SELECT 1 FROM t1,t2 GROUP BY a,c3;
DROP VIEW num1000;
DROP TABLE num10, t1, t2;
--echo #
--echo # Bug#34844509: Assertion `receiver.HasSecondaryEngineCostHook()' failed
--echo #
CREATE TABLE t (
col1 INT, col2 INT, col3 INT, col4 INT, col5 INT, col6 INT,
col7 INT, col8 INT, col9 INT, col10 INT, col11 INT, col12 INT,
col13 INT, col14 INT, col15 INT, col16 INT, col17 INT, col18 INT,
col19 INT, col20 INT, col21 INT, col22 INT, col23 INT, col24 INT,
col25 INT, col26 INT, col27 INT, col28 INT, col29 INT, col30 INT,
col31 INT, col32 INT, col33 INT, col34 INT, col35 INT, col36 INT,
col37 INT, col38 INT, col39 INT, col40 INT, col41 INT, col42 INT,
col43 INT, col44 INT, col45 INT, col46 INT, col47 INT, col48 INT,
col49 INT, col50 INT, col51 INT, col52 INT, col53 INT, col54 INT,
col55 INT, col56 INT, col57 INT, col58 INT, col59 INT, col60 INT,
col61 INT, col62 INT, col63 INT, col64 INT,
KEY (col1, col2), KEY (col2, col3), KEY (col3), KEY (col4), KEY (col5),
KEY (col6), KEY (col7), KEY (col8), KEY (col9), KEY (col10),
KEY (col11), KEY (col12), KEY (col13), KEY (col14), KEY (col15),
KEY (col16), KEY (col17), KEY (col18), KEY (col19), KEY (col20),
KEY (col21), KEY (col22), KEY (col23), KEY (col24), KEY (col25),
KEY (col26), KEY (col27), KEY (col28), KEY (col29), KEY (col30),
KEY (col31), KEY (col32), KEY (col33), KEY (col34), KEY (col35),
KEY (col36), KEY (col37), KEY (col38), KEY (col39), KEY (col40),
KEY (col41), KEY (col42), KEY (col43), KEY (col44), KEY (col45),
KEY (col46), KEY (col47), KEY (col48), KEY (col49), KEY (col50),
KEY (col51), KEY (col52), KEY (col53), KEY (col54), KEY (col55),
KEY (col56), KEY (col57), KEY (col58), KEY (col59), KEY (col60),
KEY (col61), KEY (col62), KEY (col63), KEY (col64));
ANALYZE TABLE t;
--source include/turn_off_only_full_group_by.inc
--replace_regex $elide_costs
EXPLAIN FORMAT=TREE SELECT DISTINCT * FROM t GROUP BY col1 + col2;
--source include/restore_sql_mode_after_turn_off_only_full_group_by.inc
DROP TABLE t;