From b917fb63a638fd117e1e52d3bc29b57a81124fea Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 14 Sep 2012 11:26:01 +0300 Subject: [PATCH] Fix bug lp:1009187, mdev-373, mysql bug#58628 Analysis: The queries in question use the [unique | index]_subquery execution methods. These methods reuse the ref keys constructed by create_ref_for_key(). The way create_ref_for_key() works is that it doesn't store in ref.key_copy[] store_key elements that represent constants. In particular it doesn't store the store_key for NULL constants. The execution of [unique | index]_subquery calls subselect_uniquesubquery_engine::copy_ref_key, which in addition to copy the left IN argument into a index lookup key, is supposed to detect if the left IN argument contains NULLs. Since the store_key for the NULL constant is not copied into the key array, the null is not detected, and execution erroneously proceeds as if it should look for a complete match. Solution: The solution (unlike MySQL) is to reuse already computed information about NULL presence. Item_in_optimizer::val_int already finds out if the left IN operand contains NULLs. The fix propagates this to the execution methods subselect_[unique | index]subquery_engine::exec so it knows if there were NULL values independent of the presence of keys. In addition the patch siplifies copy_ref_key() and the logic that hanldes the case of NULLs in the left IN operand. --- mysql-test/r/subselect4.result | 81 +++++++++++++++++++ mysql-test/t/subselect4.test | 44 +++++++++++ sql/item_subselect.cc | 139 +++++++++++++-------------------- sql/item_subselect.h | 2 +- 4 files changed, 180 insertions(+), 86 deletions(-) diff --git a/mysql-test/r/subselect4.result b/mysql-test/r/subselect4.result index 8385ee6f4c0..952717c5c37 100644 --- a/mysql-test/r/subselect4.result +++ b/mysql-test/r/subselect4.result @@ -197,5 +197,86 @@ default(a) aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa DROP TABLE t; # +# LP BUG#1009187, MDEV-373, MYSQL bug#58628 +# Wrong result for a query with [NOT] IN subquery predicate if +# the left part of the predicate is explicit NULL +# +CREATE TABLE t1 (pk INT NOT NULL, i INT NOT NULL); +INSERT INTO t1 VALUES (0,10), (1,20), (2,30), (3,40); +CREATE TABLE t2a (pk INT NOT NULL, i INT NOT NULL, PRIMARY KEY(i,pk)); +INSERT INTO t2a VALUES (0,0), (1,1), (2,2), (3,3); +CREATE TABLE t2b (pk INT, i INT); +INSERT INTO t2b VALUES (0,0), (1,1), (2,2), (3,3); +CREATE TABLE t2c (pk INT NOT NULL, i INT NOT NULL); +INSERT INTO t2c VALUES (0,0), (1,1), (2,2), (3,3); +create index it2c on t2c (i,pk); +EXPLAIN +SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2a.i FROM t2a WHERE t2a.pk = t1.pk); +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 4 Using where +2 DEPENDENT SUBQUERY t2a unique_subquery PRIMARY PRIMARY 8 const,test.t1.pk 1 Using index; Using where; Full scan on NULL key +SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2a.i FROM t2a WHERE t2a.pk = t1.pk); +pk i +SELECT * FROM t1 WHERE 1+NULL NOT IN (SELECT t2a.i FROM t2a WHERE t2a.pk = t1.pk); +pk i +SELECT * FROM t1 WHERE NULL IN (SELECT t2a.i FROM t2a WHERE t2a.pk = t1.pk) IS UNKNOWN; +pk i +0 10 +1 20 +2 30 +3 40 +SELECT t1.pk, NULL NOT IN (SELECT t2a.i FROM t2a WHERE t2a.pk = t1.pk) FROM t1; +pk NULL NOT IN (SELECT t2a.i FROM t2a WHERE t2a.pk = t1.pk) +0 NULL +1 NULL +2 NULL +3 NULL +EXPLAIN +SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2b.i FROM t2b WHERE t2b.pk = t1.pk); +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 4 Using where +2 DEPENDENT SUBQUERY t2b ALL NULL NULL NULL NULL 4 Using where +SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2b.i FROM t2b WHERE t2b.pk = t1.pk); +pk i +SELECT * FROM t1 WHERE NULL IN (SELECT t2b.i FROM t2b WHERE t2b.pk = t1.pk) IS UNKNOWN; +pk i +0 10 +1 20 +2 30 +3 40 +SELECT t1.pk, NULL NOT IN (SELECT t2b.i FROM t2b WHERE t2b.pk = t1.pk) FROM t1; +pk NULL NOT IN (SELECT t2b.i FROM t2b WHERE t2b.pk = t1.pk) +0 NULL +1 NULL +2 NULL +3 NULL +EXPLAIN +SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2c.i FROM t2c WHERE t2c.pk = t1.pk); +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 4 Using where +2 DEPENDENT SUBQUERY t2c index_subquery it2c it2c 8 const,test.t1.pk 2 Using index; Using where; Full scan on NULL key +SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2c.i FROM t2c WHERE t2c.pk = t1.pk); +pk i +SELECT * FROM t1 WHERE NULL IN (SELECT t2c.i FROM t2c WHERE t2c.pk = t1.pk) IS UNKNOWN; +pk i +0 10 +1 20 +2 30 +3 40 +SELECT t1.pk, NULL NOT IN (SELECT t2c.i FROM t2c WHERE t2c.pk = t1.pk) FROM t1; +pk NULL NOT IN (SELECT t2c.i FROM t2c WHERE t2c.pk = t1.pk) +0 NULL +1 NULL +2 NULL +3 NULL +EXPLAIN +SELECT * FROM t1 WHERE (NULL, 1) NOT IN (SELECT t2a.i, t2a.pk FROM t2a WHERE t2a.pk = t1.pk); +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 4 Using where +2 DEPENDENT SUBQUERY t2a eq_ref PRIMARY PRIMARY 8 const,test.t1.pk 2 Using where; Using index; Full scan on NULL key +SELECT * FROM t1 WHERE (NULL, 1) NOT IN (SELECT t2a.i, t2a.pk FROM t2a WHERE t2a.pk = t1.pk); +pk i +drop table t1, t2a, t2b, t2c; +# # End of 5.1 tests. # diff --git a/mysql-test/t/subselect4.test b/mysql-test/t/subselect4.test index bb97f246488..3de2dfc394e 100644 --- a/mysql-test/t/subselect4.test +++ b/mysql-test/t/subselect4.test @@ -170,6 +170,50 @@ INSERT INTO t VALUES (''),(''),(''),(''),(''),(''),(''),(''),(''),(''),(''); SELECT * FROM (SELECT default(a) FROM t GROUP BY a) d; DROP TABLE t; +--echo # +--echo # LP BUG#1009187, MDEV-373, MYSQL bug#58628 +--echo # Wrong result for a query with [NOT] IN subquery predicate if +--echo # the left part of the predicate is explicit NULL +--echo # + +CREATE TABLE t1 (pk INT NOT NULL, i INT NOT NULL); +INSERT INTO t1 VALUES (0,10), (1,20), (2,30), (3,40); + +CREATE TABLE t2a (pk INT NOT NULL, i INT NOT NULL, PRIMARY KEY(i,pk)); +INSERT INTO t2a VALUES (0,0), (1,1), (2,2), (3,3); + +CREATE TABLE t2b (pk INT, i INT); +INSERT INTO t2b VALUES (0,0), (1,1), (2,2), (3,3); + +CREATE TABLE t2c (pk INT NOT NULL, i INT NOT NULL); +INSERT INTO t2c VALUES (0,0), (1,1), (2,2), (3,3); +create index it2c on t2c (i,pk); + +EXPLAIN +SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2a.i FROM t2a WHERE t2a.pk = t1.pk); +SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2a.i FROM t2a WHERE t2a.pk = t1.pk); +SELECT * FROM t1 WHERE 1+NULL NOT IN (SELECT t2a.i FROM t2a WHERE t2a.pk = t1.pk); +SELECT * FROM t1 WHERE NULL IN (SELECT t2a.i FROM t2a WHERE t2a.pk = t1.pk) IS UNKNOWN; +SELECT t1.pk, NULL NOT IN (SELECT t2a.i FROM t2a WHERE t2a.pk = t1.pk) FROM t1; + +EXPLAIN +SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2b.i FROM t2b WHERE t2b.pk = t1.pk); +SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2b.i FROM t2b WHERE t2b.pk = t1.pk); +SELECT * FROM t1 WHERE NULL IN (SELECT t2b.i FROM t2b WHERE t2b.pk = t1.pk) IS UNKNOWN; +SELECT t1.pk, NULL NOT IN (SELECT t2b.i FROM t2b WHERE t2b.pk = t1.pk) FROM t1; + +EXPLAIN +SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2c.i FROM t2c WHERE t2c.pk = t1.pk); +SELECT * FROM t1 WHERE NULL NOT IN (SELECT t2c.i FROM t2c WHERE t2c.pk = t1.pk); +SELECT * FROM t1 WHERE NULL IN (SELECT t2c.i FROM t2c WHERE t2c.pk = t1.pk) IS UNKNOWN; +SELECT t1.pk, NULL NOT IN (SELECT t2c.i FROM t2c WHERE t2c.pk = t1.pk) FROM t1; + +EXPLAIN +SELECT * FROM t1 WHERE (NULL, 1) NOT IN (SELECT t2a.i, t2a.pk FROM t2a WHERE t2a.pk = t1.pk); +SELECT * FROM t1 WHERE (NULL, 1) NOT IN (SELECT t2a.i, t2a.pk FROM t2a WHERE t2a.pk = t1.pk); + +drop table t1, t2a, t2b, t2c; + --echo # --echo # End of 5.1 tests. --echo # diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc index fc214a584ee..888c346c199 100644 --- a/sql/item_subselect.cc +++ b/sql/item_subselect.cc @@ -1686,6 +1686,12 @@ bool Item_in_subselect::fix_fields(THD *thd_arg, Item **ref) } +inline bool Item_in_subselect::left_expr_has_null() +{ + return (*(optimizer->get_cache()))->null_value; +} + + Item_subselect::trans_res Item_allany_subselect::select_transformer(JOIN *join) { @@ -2131,39 +2137,19 @@ int subselect_uniquesubquery_engine::scan_table() subselect_uniquesubquery_engine::copy_ref_key() DESCRIPTION - Copy ref key and check for null parts in it. - Depending on the nullability and conversion problems this function - recognizes and processes the following states : - 1. Partial match on top level. This means IN has a value of FALSE - regardless of the data in the subquery table. - Detected by finding a NULL in the left IN operand of a top level - expression. - We may actually skip reading the subquery, so return TRUE to skip - the table scan in subselect_uniquesubquery_engine::exec and make - the value of the IN predicate a NULL (that is equal to FALSE on - top level). - 2. No exact match when IN is nested inside another predicate. - Detected by finding a NULL in the left IN operand when IN is not - a top level predicate. - We cannot have an exact match. But we must proceed further with a - table scan to find out if it's a partial match (and IN has a value - of NULL) or no match (and IN has a value of FALSE). - So we return FALSE to continue with the scan and see if there are - any record that would constitute a partial match (as we cannot - determine that from the index). - 3. Error converting the left IN operand to the column type of the - right IN operand. This counts as no match (and IN has the value of - FALSE). We mark the subquery table cursor as having no more rows - (to ensure that the processing that follows will not find a match) - and return FALSE, so IN is not treated as returning NULL. + Copy ref key and check for conversion problems. + If there is an error converting the left IN operand to the column type of + the right IN operand count it as no match. In this case IN has the value of + FALSE. We mark the subquery table cursor as having no more rows (to ensure + that the processing that follows will not find a match) and return FALSE, + so IN is not treated as returning NULL. RETURN FALSE - The value of the IN predicate is not known. Proceed to find the - value of the IN predicate using the determined values of - null_keypart and table->status. - TRUE - IN predicate has a value of NULL. Stop the processing right there - and return NULL to the outer predicates. + value of the IN predicate using the copied key. + TRUE - IN predicate has a value of FALSE. Stop the processing right there + and tell the outer predicate it is FALSE. */ bool subselect_uniquesubquery_engine::copy_ref_key() @@ -2176,30 +2162,6 @@ bool subselect_uniquesubquery_engine::copy_ref_key() continue; tab->ref.key_err= (*copy)->copy(); - /* - When there is a NULL part in the key we don't need to make index - lookup for such key thus we don't need to copy whole key. - If we later should do a sequential scan return OK. Fail otherwise. - - See also the comment for the subselect_uniquesubquery_engine::exec() - function. - */ - null_keypart= (*copy)->null_key; - if (null_keypart) - { - bool top_level= ((Item_in_subselect *) item)->is_top_level_item(); - if (top_level) - { - /* Partial match on top level */ - DBUG_RETURN(1); - } - else - { - /* No exact match when IN is nested inside another predicate */ - break; - } - } - /* Check if the error is equal to STORE_KEY_FATAL. This is not expressed using the store_key::store_key_result enum because ref.key_err is a @@ -2215,10 +2177,10 @@ bool subselect_uniquesubquery_engine::copy_ref_key() IN operand. */ tab->table->status= STATUS_NOT_FOUND; - break; + DBUG_RETURN(true); } } - DBUG_RETURN(0); + DBUG_RETURN(false); } @@ -2248,8 +2210,9 @@ bool subselect_uniquesubquery_engine::copy_ref_key() NOTE RETURN - FALSE - ok - TRUE - an error occured while scanning + 0 - ok + 1 - notify caller to call Item_subselect::reset(), + in most cases reset() sets the result to NULL */ int subselect_uniquesubquery_engine::exec() @@ -2259,23 +2222,27 @@ int subselect_uniquesubquery_engine::exec() TABLE *table= tab->table; empty_result_set= TRUE; table->status= 0; + Item_in_subselect *in_subs= (Item_in_subselect *) item; - /* TODO: change to use of 'full_scan' here? */ - if (copy_ref_key()) - DBUG_RETURN(1); - if (table->status) + if (in_subs->left_expr_has_null()) { - /* - We know that there will be no rows even if we scan. - Can be set in copy_ref_key. + /* + The case when all values in left_expr are NULL is handled by + Item_in_optimizer::val_int(). */ - ((Item_in_subselect *) item)->value= 0; + if (in_subs->is_top_level_item()) + DBUG_RETURN(1); /* notify caller to call reset() and set NULL value. */ + else + DBUG_RETURN(scan_table()); + } + + if (copy_ref_key()) + { + /* We know that there will be no rows even if we scan. */ + in_subs->value= 0; DBUG_RETURN(0); } - if (null_keypart) - DBUG_RETURN(scan_table()); - if (!table->file->inited) table->file->ha_index_init(tab->ref.key, 0); error= table->file->ha_index_read_map(table->record[0], @@ -2357,9 +2324,9 @@ subselect_uniquesubquery_engine::~subselect_uniquesubquery_engine() cheaper. We can use index statistics to quickly check whether "ref" scan will be cheaper than full table scan. - RETURN - 0 - 1 + 0 - ok + 1 - notify caller to call Item_subselect::reset(), + in most cases reset() sets the result to NULL */ int subselect_indexsubquery_engine::exec() @@ -2368,10 +2335,10 @@ int subselect_indexsubquery_engine::exec() int error; bool null_finding= 0; TABLE *table= tab->table; + Item_in_subselect *in_subs= (Item_in_subselect *) item; - ((Item_in_subselect *) item)->value= 0; + in_subs->value= 0; empty_result_set= TRUE; - null_keypart= 0; table->status= 0; if (check_null) @@ -2381,22 +2348,24 @@ int subselect_indexsubquery_engine::exec() ((Item_in_subselect *) item)->was_null= 0; } - /* Copy the ref key and check for nulls... */ - if (copy_ref_key()) - DBUG_RETURN(1); - - if (table->status) + if (in_subs->left_expr_has_null()) { - /* - We know that there will be no rows even if we scan. - Can be set in copy_ref_key. + /* + The case when all values in left_expr are NULL is handled by + Item_in_optimizer::val_int(). */ - ((Item_in_subselect *) item)->value= 0; - DBUG_RETURN(0); + if (in_subs->is_top_level_item()) + DBUG_RETURN(1); /* notify caller to call reset() and set NULL value. */ + else + DBUG_RETURN(scan_table()); } - if (null_keypart) - DBUG_RETURN(scan_table()); + if (copy_ref_key()) + { + /* We know that there will be no rows even if we scan. */ + in_subs->value= 0; + DBUG_RETURN(0); + } if (!table->file->inited) table->file->ha_index_init(tab->ref.key, 1); diff --git a/sql/item_subselect.h b/sql/item_subselect.h index 7eb45e74f40..b6b9750b910 100644 --- a/sql/item_subselect.h +++ b/sql/item_subselect.h @@ -334,6 +334,7 @@ public: bool test_limit(st_select_lex_unit *unit); virtual void print(String *str, enum_query_type query_type); bool fix_fields(THD *thd, Item **ref); + inline bool left_expr_has_null(); friend class Item_ref_null_helper; friend class Item_is_not_null_test; @@ -513,7 +514,6 @@ protected: expression is NULL. */ bool empty_result_set; - bool null_keypart; /* TRUE <=> constructed search tuple has a NULL */ public: // constructor can assign THD because it will be called after JOIN::prepare