MDEV-21104 Wrong result (extra rows and wrong values) with incremental BNLH

This bug could affect multi-way join queries with embedded outer joins that
contained a conjunctive IS NULL predicate over a non-nullable column from
inner table of an outer join. The predicate could occur in WHERE condition
or in ON condition. Due to this bug a wrong result set could be returned by
the query. The bug manifested itself only when join buffers were employed
for join operations.

The problem appeared because
- a bug in the function JOIN_CACHE::get_match_flag_by_pos that not always
  returned proper match flags for embedding outer joins stored together
  with table rows put a join buffer.
- bug in the function JOIN_CACHE::join_matching_records that not always
  correctly determined that a row from the buffer could be skipped due
  to applied 'not_exists' optimization.
Example:
  SELECT * FROM t1 LEFT JOIN ((t2 LEFT JOIN t3 ON c = d) JOIN t4) ON b = e
    WHERE e IS NULL;

The patch introduces a new function that finds the match flag for a record
from join buffer specifying the buffer where this flag has to be found.
The function is called JOIN_CACHE::get_match_flag_by_pos_from_join_buffer().
Now this function rather than JOIN_CACHE::get_match_flag_by_pos() is used
in JOIN_CACHE::skip_if_matched() to check whether a record from the join
buffer must be ignored when extending the record by null complements.
Also the code of the function JOIN_CACHE::skip_if_not_needed_match() has
been changed. The function checks whether a record from the join buffer
still may produce some useful extensions.
Also some clarifying comments has been added.

Approved by monty@mariadb.com.
This commit is contained in:
Igor Babaev 2021-03-10 17:26:43 -08:00
commit 90780bb5a9
4 changed files with 177 additions and 11 deletions

View file

@ -6054,4 +6054,57 @@ select f2 from t2,t1 where f2 = 0;
f2
drop table t1, t2;
set join_buffer_size=@save_join_buffer_size;
#
# MDEV-21104: BNLH used for multi-join query with embedded outer join
# and possible 'not exists' optimization
#
set join_cache_level=4;
CREATE TABLE t1 (a int) ENGINE=MyISAM;
INSERT INTO t1 VALUES (1),(2);
CREATE TABLE t2 (b int, c int) ENGINE=MyISAM;
INSERT INTO t2 VALUES (1,2),(2,4);
CREATE TABLE t3 (d int, KEY(d)) ENGINE=MyISAM;
INSERT INTO t3 VALUES (1),(2);
CREATE TABLE t4 (e int primary key) ENGINE=MyISAM;
INSERT INTO t4 VALUES (1),(2);
ANALYZE TABLE t1,t2,t3,t4;
Table Op Msg_type Msg_text
test.t1 analyze status OK
test.t2 analyze status OK
test.t3 analyze status OK
test.t4 analyze status OK
SELECT * FROM t2 LEFT JOIN t3 ON c = d;
b c d
1 2 2
2 4 NULL
SELECT * FROM (t2 LEFT JOIN t3 ON c = d ) JOIN t4;
b c d e
1 2 2 1
2 4 NULL 1
1 2 2 2
2 4 NULL 2
EXPLAIN SELECT * FROM t1 LEFT JOIN ( ( t2 LEFT JOIN t3 ON c = d ) JOIN t4 ) ON b = e;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 2
1 SIMPLE t2 ALL NULL NULL NULL NULL 2 Using where; Using join buffer (flat, BNL join)
1 SIMPLE t3 hash_index d #hash#d:d 5:5 test.t2.c 2 Using where; Using index; Using join buffer (incremental, BNLH join)
1 SIMPLE t4 hash_index PRIMARY #hash#PRIMARY:PRIMARY 4:4 test.t2.b 2 Using index; Using join buffer (incremental, BNLH join)
SELECT * FROM t1 LEFT JOIN ( ( t2 LEFT JOIN t3 ON c = d ) JOIN t4 ) ON b = e;
a b c d e
1 1 2 2 1
2 1 2 2 1
1 2 4 NULL 2
2 2 4 NULL 2
EXPLAIN SELECT * FROM t1 LEFT JOIN ( ( t2 LEFT JOIN t3 ON c = d ) JOIN t4 ) ON b = e
WHERE e IS NULL;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 2
1 SIMPLE t2 ALL NULL NULL NULL NULL 2 Using where; Using join buffer (flat, BNL join)
1 SIMPLE t3 hash_index d #hash#d:d 5:5 test.t2.c 2 Using where; Using index; Using join buffer (incremental, BNLH join)
1 SIMPLE t4 hash_index PRIMARY #hash#PRIMARY:PRIMARY 4:4 test.t2.b 2 Using where; Using index; Not exists; Using join buffer (incremental, BNLH join)
SELECT * FROM t1 LEFT JOIN ( ( t2 LEFT JOIN t3 ON c = d ) JOIN t4 ) ON b = e
WHERE e IS NULL;
a b c d e
DROP TABLE t1,t2,t3,t4;
set join_cache_level=@save_join_cache_level;
set @@optimizer_switch=@save_optimizer_switch;

View file

@ -4014,5 +4014,41 @@ select f2 from t2,t1 where f2 = 0;
drop table t1, t2;
set join_buffer_size=@save_join_buffer_size;
--echo #
--echo # MDEV-21104: BNLH used for multi-join query with embedded outer join
--echo # and possible 'not exists' optimization
--echo #
set join_cache_level=4;
CREATE TABLE t1 (a int) ENGINE=MyISAM;
INSERT INTO t1 VALUES (1),(2);
CREATE TABLE t2 (b int, c int) ENGINE=MyISAM;
INSERT INTO t2 VALUES (1,2),(2,4);
CREATE TABLE t3 (d int, KEY(d)) ENGINE=MyISAM;
INSERT INTO t3 VALUES (1),(2);
CREATE TABLE t4 (e int primary key) ENGINE=MyISAM;
INSERT INTO t4 VALUES (1),(2);
ANALYZE TABLE t1,t2,t3,t4;
SELECT * FROM t2 LEFT JOIN t3 ON c = d;
SELECT * FROM (t2 LEFT JOIN t3 ON c = d ) JOIN t4;
let $q1=
SELECT * FROM t1 LEFT JOIN ( ( t2 LEFT JOIN t3 ON c = d ) JOIN t4 ) ON b = e;
eval EXPLAIN $q1;
eval $q1;
let $q2=
SELECT * FROM t1 LEFT JOIN ( ( t2 LEFT JOIN t3 ON c = d ) JOIN t4 ) ON b = e
WHERE e IS NULL;
eval EXPLAIN $q2;
eval $q2;
DROP TABLE t1,t2,t3,t4;
set join_cache_level=@save_join_cache_level;
# The following command must be the last one in the file
set @@optimizer_switch=@save_optimizer_switch;