From 1d4acfb668a5de5c638a794b081fb858700f2970 Mon Sep 17 00:00:00 2001 From: "sergefp@mysql.com" <> Date: Tue, 25 Apr 2006 23:33:31 +0400 Subject: [PATCH] BUG#15872: Don't run the range analyzer on "t1.keypart NOT IN (const1, ..., )", as that consumes too much memory. Instead, either create the equvalent SEL_TREE manually, or create only two ranges that strictly include the area to scan (Note: just to re-iterate: increasing NOT_IN_IGNORE_THRESHOLD will make optimization run slower for big IN-lists, but the server will not run out of memory. O(N^2) memory use has been eliminated) --- mysql-test/r/func_in.result | 103 +++++++++++++++++++++++++++++++++++- mysql-test/t/func_in.test | 94 +++++++++++++++++++++++++++++++- sql/item.cc | 10 ++++ sql/item.h | 1 + sql/item_cmpfunc.h | 77 +++++++++++++++++++++++++-- sql/opt_range.cc | 93 ++++++++++++++++++++++++++++---- 6 files changed, 364 insertions(+), 14 deletions(-) diff --git a/mysql-test/r/func_in.result b/mysql-test/r/func_in.result index 60022ae0d8f..e3257ce5fd0 100644 --- a/mysql-test/r/func_in.result +++ b/mysql-test/r/func_in.result @@ -1,4 +1,4 @@ -drop table if exists t1; +drop table if exists t1, t2; select 1 in (1,2,3); 1 in (1,2,3) 1 @@ -225,3 +225,104 @@ a 46 DROP VIEW v1; DROP TABLE t1; +create table t1 (a int); +insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table t2 (a int, filler char(200), key(a)); +insert into t2 select C.a*2, 'no' from t1 A, t1 B, t1 C; +insert into t2 select C.a*2+1, 'yes' from t1 C; +explain +select * from t2 where a NOT IN (0, 2,4,6,8,10,12,14,16,18); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range a a 5 NULL 12 Using where +select * from t2 where a NOT IN (0, 2,4,6,8,10,12,14,16,18); +a filler +1 yes +3 yes +5 yes +7 yes +9 yes +11 yes +13 yes +15 yes +17 yes +19 yes +explain select * from t2 force index(a) where a NOT IN (2,2,2,2,2,2); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range a a 5 NULL 912 Using where +explain select * from t2 force index(a) where a <> 2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range a a 5 NULL 912 Using where +drop table t2; +create table t2 (a datetime, filler char(200), key(a)); +insert into t2 select '2006-04-25 10:00:00' + interval C.a minute, +'no' from t1 A, t1 B, t1 C where C.a % 2 = 0; +insert into t2 select '2006-04-25 10:00:00' + interval C.a*2+1 minute, +'yes' from t1 C; +explain +select * from t2 where a NOT IN ( +'2006-04-25 10:00:00','2006-04-25 10:02:00','2006-04-25 10:04:00', +'2006-04-25 10:06:00', '2006-04-25 10:08:00'); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range a a 9 NULL 18 Using where +select * from t2 where a NOT IN ( +'2006-04-25 10:00:00','2006-04-25 10:02:00','2006-04-25 10:04:00', +'2006-04-25 10:06:00', '2006-04-25 10:08:00'); +a filler +2006-04-25 10:01:00 yes +2006-04-25 10:03:00 yes +2006-04-25 10:05:00 yes +2006-04-25 10:07:00 yes +2006-04-25 10:09:00 yes +2006-04-25 10:11:00 yes +2006-04-25 10:13:00 yes +2006-04-25 10:15:00 yes +2006-04-25 10:17:00 yes +2006-04-25 10:19:00 yes +drop table t2; +create table t2 (a varchar(10), filler char(200), key(a)); +insert into t2 select 'foo', 'no' from t1 A, t1 B; +insert into t2 select 'barbar', 'no' from t1 A, t1 B; +insert into t2 select 'bazbazbaz', 'no' from t1 A, t1 B; +insert into t2 values ('fon', '1'), ('fop','1'), ('barbaq','1'), +('barbas','1'), ('bazbazbay', '1'),('zz','1'); +explain select * from t2 where a not in('foo','barbar', 'bazbazbaz'); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range a a 13 NULL 7 Using where +drop table t2; +create table t2 (a decimal(10,5), filler char(200), key(a)); +insert into t2 select 345.67890, 'no' from t1 A, t1 B; +insert into t2 select 43245.34, 'no' from t1 A, t1 B; +insert into t2 select 64224.56344, 'no' from t1 A, t1 B; +insert into t2 values (0, '1'), (22334.123,'1'), (33333,'1'), +(55555,'1'), (77777, '1'); +explain +select * from t2 where a not in (345.67890, 43245.34, 64224.56344); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range a a 7 NULL 7 Using where +select * from t2 where a not in (345.67890, 43245.34, 64224.56344); +a filler +0.00000 1 +22334.12300 1 +33333.00000 1 +55555.00000 1 +77777.00000 1 +drop table t2; +create table t2 (a int, key(a), b int); +insert into t2 values (1,1),(2,2); +set @cnt= 1; +set @str="update t2 set b=1 where a not in ("; +select count(*) from ( +select @str:=concat(@str, @cnt:=@cnt+1, ",") +from t1 A, t1 B, t1 C, t1 D) Z; +count(*) +10000 +set @str:=concat(@str, "10000)"); +select substr(@str, 1, 50); +substr(@str, 1, 50) +update t2 set b=1 where a not in (2,3,4,5,6,7,8,9, +prepare s from @str; +execute s; +deallocate prepare s; +set @str=NULL; +drop table t2; +drop table t1; diff --git a/mysql-test/t/func_in.test b/mysql-test/t/func_in.test index 0472968f918..351d1fc2c92 100644 --- a/mysql-test/t/func_in.test +++ b/mysql-test/t/func_in.test @@ -1,6 +1,6 @@ # Initialise --disable_warnings -drop table if exists t1; +drop table if exists t1, t2; --enable_warnings # # test of IN (NULL) @@ -128,3 +128,95 @@ SELECT * FROM v1; DROP VIEW v1; DROP TABLE t1; + +# BUG#15872: Excessive memory consumption of range analysis of NOT IN +create table t1 (a int); +insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table t2 (a int, filler char(200), key(a)); + +insert into t2 select C.a*2, 'no' from t1 A, t1 B, t1 C; +insert into t2 select C.a*2+1, 'yes' from t1 C; + +explain +select * from t2 where a NOT IN (0, 2,4,6,8,10,12,14,16,18); +select * from t2 where a NOT IN (0, 2,4,6,8,10,12,14,16,18); + +explain select * from t2 force index(a) where a NOT IN (2,2,2,2,2,2); +explain select * from t2 force index(a) where a <> 2; + +drop table t2; + +# +# Repeat the test for DATETIME +# +create table t2 (a datetime, filler char(200), key(a)); + +insert into t2 select '2006-04-25 10:00:00' + interval C.a minute, + 'no' from t1 A, t1 B, t1 C where C.a % 2 = 0; + +insert into t2 select '2006-04-25 10:00:00' + interval C.a*2+1 minute, + 'yes' from t1 C; + +explain +select * from t2 where a NOT IN ( + '2006-04-25 10:00:00','2006-04-25 10:02:00','2006-04-25 10:04:00', + '2006-04-25 10:06:00', '2006-04-25 10:08:00'); +select * from t2 where a NOT IN ( + '2006-04-25 10:00:00','2006-04-25 10:02:00','2006-04-25 10:04:00', + '2006-04-25 10:06:00', '2006-04-25 10:08:00'); +drop table t2; + +# +# Repeat the test for CHAR(N) +# +create table t2 (a varchar(10), filler char(200), key(a)); + +insert into t2 select 'foo', 'no' from t1 A, t1 B; +insert into t2 select 'barbar', 'no' from t1 A, t1 B; +insert into t2 select 'bazbazbaz', 'no' from t1 A, t1 B; + +insert into t2 values ('fon', '1'), ('fop','1'), ('barbaq','1'), + ('barbas','1'), ('bazbazbay', '1'),('zz','1'); + +explain select * from t2 where a not in('foo','barbar', 'bazbazbaz'); + +drop table t2; + +# +# Repeat for DECIMAL +# +create table t2 (a decimal(10,5), filler char(200), key(a)); + +insert into t2 select 345.67890, 'no' from t1 A, t1 B; +insert into t2 select 43245.34, 'no' from t1 A, t1 B; +insert into t2 select 64224.56344, 'no' from t1 A, t1 B; + +insert into t2 values (0, '1'), (22334.123,'1'), (33333,'1'), + (55555,'1'), (77777, '1'); + +explain +select * from t2 where a not in (345.67890, 43245.34, 64224.56344); +select * from t2 where a not in (345.67890, 43245.34, 64224.56344); + +drop table t2; + +# Try a very big IN-list +create table t2 (a int, key(a), b int); +insert into t2 values (1,1),(2,2); + +set @cnt= 1; +set @str="update t2 set b=1 where a not in ("; +select count(*) from ( + select @str:=concat(@str, @cnt:=@cnt+1, ",") + from t1 A, t1 B, t1 C, t1 D) Z; + +set @str:=concat(@str, "10000)"); +select substr(@str, 1, 50); +prepare s from @str; +execute s; +deallocate prepare s; +set @str=NULL; + +drop table t2; +drop table t1; + diff --git a/sql/item.cc b/sql/item.cc index 8cf6fde1dbd..9410fa65408 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -1991,6 +1991,16 @@ bool Item_decimal::eq(const Item *item, bool binary_cmp) const } +void Item_decimal::set_decimal_value(my_decimal *value_par) +{ + my_decimal2decimal(value_par, &decimal_value); + decimals= (uint8) decimal_value.frac; + unsigned_flag= !decimal_value.sign(); + max_length= my_decimal_precision_to_length(decimal_value.intg + decimals, + decimals, unsigned_flag); +} + + String *Item_float::val_str(String *str) { // following assert is redundant, because fixed=1 assigned in constructor diff --git a/sql/item.h b/sql/item.h index d33e0ae34be..8b09bc14221 100644 --- a/sql/item.h +++ b/sql/item.h @@ -1442,6 +1442,7 @@ public: } uint decimal_precision() const { return decimal_value.precision(); } bool eq(const Item *, bool binary_cmp) const; + void set_decimal_value(my_decimal *value_par); }; diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h index 67f0a5f5e2e..1837603d78f 100644 --- a/sql/item_cmpfunc.h +++ b/sql/item_cmpfunc.h @@ -623,15 +623,17 @@ public: /* Functions to handle the optimized IN */ + +/* A vector of values of some type */ + class in_vector :public Sql_alloc { - protected: +public: char *base; uint size; qsort2_cmp compare; CHARSET_INFO *collation; uint count; -public: uint used_count; in_vector() {} in_vector(uint elements,uint element_length,qsort2_cmp cmp_func, @@ -647,6 +649,32 @@ public: qsort2(base,used_count,size,compare,collation); } int find(Item *item); + + /* + Create an instance of Item_{type} (e.g. Item_decimal) constant object + which type allows it to hold an element of this vector without any + conversions. + The purpose of this function is to be able to get elements of this + vector in form of Item_xxx constants without creating Item_xxx object + for every array element you get (i.e. this implements "FlyWeight" pattern) + */ + virtual Item* create_item() { return NULL; } + + /* + Store the value at position #pos into provided item object + SYNOPSIS + value_to_item() + pos Index of value to store + item Constant item to store value into. The item must be of the same + type that create_item() returns. + */ + virtual void value_to_item(uint pos, Item *item) { } + + /* Compare values number pos1 and pos2 for equality */ + bool compare_elems(uint pos1, uint pos2) + { + return test(compare(collation, base + pos1*size, base + pos2*size)); + } }; class in_string :public in_vector @@ -658,6 +686,16 @@ public: ~in_string(); void set(uint pos,Item *item); byte *get_value(Item *item); + Item* create_item() + { + return new Item_string(collation); + } + void value_to_item(uint pos, Item *item) + { + String *str=((String*) base)+pos; + Item_string *to= (Item_string*)item; + to->str_value= *str; + } }; class in_longlong :public in_vector @@ -667,6 +705,19 @@ public: in_longlong(uint elements); void set(uint pos,Item *item); byte *get_value(Item *item); + + Item* create_item() + { + /* + We're created a signed INT, this may not be correct in + general case (see BUG#19342). + */ + return new Item_int(0); + } + void value_to_item(uint pos, Item *item) + { + ((Item_int*)item)->value= ((longlong*)base)[pos]; + } }; class in_double :public in_vector @@ -676,6 +727,15 @@ public: in_double(uint elements); void set(uint pos,Item *item); byte *get_value(Item *item); + Item *create_item() + { + return new Item_float(0.0); + } + void value_to_item(uint pos, Item *item) + { + ((Item_float*)item)->value= ((double*) base)[pos]; + } + }; class in_decimal :public in_vector @@ -685,6 +745,16 @@ public: in_decimal(uint elements); void set(uint pos, Item *item); byte *get_value(Item *item); + Item *create_item() + { + return new Item_decimal(0, FALSE); + } + void value_to_item(uint pos, Item *item) + { + my_decimal *dec= ((my_decimal *)base) + pos; + Item_decimal *item_dec= (Item_decimal*)item; + item_dec->set_decimal_value(dec); + } }; @@ -864,12 +934,13 @@ public: class Item_func_in :public Item_func_opt_neg { +public: Item_result cmp_type; in_vector *array; cmp_item *in_item; bool have_null; DTCollation cmp_collation; - public: + Item_func_in(List &list) :Item_func_opt_neg(list), array(0), in_item(0), have_null(0) { diff --git a/sql/opt_range.cc b/sql/opt_range.cc index ae02bb8934c..2610e4fc1c8 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -3501,17 +3501,92 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func, if (inv) { - tree= get_ne_mm_tree(param, cond_func, field, - func->arguments()[1], func->arguments()[1], - cmp_type); - if (tree) + /* + We get here for conditions like "t.keypart NOT IN (....)". + + If the IN-list contains only constants (and func->array is an ordered + array of them), we construct the appropriate SEL_ARG tree manually, + because constructing it using the range analyzer (as + AND_i( t.keypart != c_i)) will cause lots of memory to be consumed + (see BUG#15872). + */ + if (func->array && func->cmp_type != ROW_RESULT) { - Item **arg, **end; - for (arg= func->arguments()+2, end= arg+func->argument_count()-2; - arg < end ; arg++) + /* + Create one Item_type constant object. We'll need it as + get_mm_parts only accepts constant values wrapped in Item_Type + objects. + We create the Item on param->mem_root which points to + per-statement mem_root (while thd->mem_root is currently pointing + to mem_root local to range optimizer). + */ + MEM_ROOT *tmp_root= param->mem_root; + param->thd->mem_root= param->old_root; + Item *value_item= func->array->create_item(); + param->thd->mem_root= tmp_root; + + if (!value_item) + break; + + /* Get a SEL_TREE for "-inf < X < c_0" interval */ + func->array->value_to_item(0, value_item); + tree= get_mm_parts(param, cond_func, field, Item_func::LT_FUNC, + value_item, cmp_type); + if (!tree) + break; +#define NOT_IN_IGNORE_THRESHOLD 1000 + SEL_TREE *tree2; + if (func->array->count < NOT_IN_IGNORE_THRESHOLD) { - tree= tree_and(param, tree, get_ne_mm_tree(param, cond_func, field, - *arg, *arg, cmp_type)); + for (uint i=1; i < func->array->count; i++) + { + if (func->array->compare_elems(i, i-1)) + { + /* Get a SEL_TREE for "-inf < X < c_i" interval */ + func->array->value_to_item(i, value_item); + tree2= get_mm_parts(param, cond_func, field, Item_func::LT_FUNC, + value_item, cmp_type); + + /* Change all intervals to be "c_{i-1} < X < c_i" */ + for (uint idx= 0; idx < param->keys; idx++) + { + SEL_ARG *new_interval; + if ((new_interval= tree2->keys[idx])) + { + SEL_ARG *last_val= tree->keys[idx]->last(); + new_interval->min_value= last_val->max_value; + new_interval->min_flag= NEAR_MIN; + } + } + tree= tree_or(param, tree, tree2); + } + } + } + else + func->array->value_to_item(func->array->count - 1, value_item); + + /* + Get the SEL_TREE for the last "c_last < X < +inf" interval + (value_item cotains c_last already) + */ + tree2= get_mm_parts(param, cond_func, field, Item_func::GT_FUNC, + value_item, cmp_type); + tree= tree_or(param, tree, tree2); + } + else + { + tree= get_ne_mm_tree(param, cond_func, field, + func->arguments()[1], func->arguments()[1], + cmp_type); + if (tree) + { + Item **arg, **end; + for (arg= func->arguments()+2, end= arg+func->argument_count()-2; + arg < end ; arg++) + { + tree= tree_and(param, tree, get_ne_mm_tree(param, cond_func, field, + *arg, *arg, cmp_type)); + } } } }