Bug #30355: Incorrect ordering of UDF results

There's currently no way of knowing the determinicity of an UDF.
And the optimizer and the sequence() UDFs were making wrong
assumptions about what the is_const member means.
Plus there was no implementation of update_system_tables()
causing the optimizer to overwrite the information returned by
the <udf>_init function.

Fixed by equating the assumptions about the semantics of 
is_const and providing a implementation of update_used_tables().
Added a TODO item for the UDF API change needed to make a better 
implementation.


include/mysql_com.h:
  Bug #30355: comment added
mysql-test/r/udf.result:
  Bug #30355: test case
mysql-test/t/udf.test:
  Bug #30355: test case
sql/item_func.cc:
  Bug #30355: keep const_item_cache and used_tables_cache in sync
sql/item_func.h:
  Bug #30355: 
   - a better implementation of update_used_tables()
   - keep const_item_cache and used_tables_cache in sync
sql/udf_example.c:
  Bug #30355: Wrong value for const_item fixed.
This commit is contained in:
unknown 2007-11-27 17:16:52 +02:00
parent 3d550b757d
commit 63b6516953
6 changed files with 112 additions and 6 deletions

View file

@ -393,6 +393,10 @@ typedef struct st_udf_init
char *ptr; /* free pointer for function data */
my_bool const_item; /* 0 if result is independent of arguments */
} UDF_INIT;
/*
TODO: add a notion for determinism of the UDF.
See Item_udf_func::update_used_tables ()
*/
/* Constants when using compression */
#define NET_HEADER_SIZE 4 /* standard header size */

View file

@ -327,4 +327,31 @@ DROP FUNCTION check_const_len;
DROP PROCEDURE check_const_len_sp;
DROP TRIGGER check_const_len_trigger;
DROP TABLE const_len_bug;
CREATE FUNCTION sequence RETURNS INTEGER SONAME "UDF_EXAMPLE_LIB";
CREATE TABLE t1 (a INT);
CREATE TABLE t2 (a INT PRIMARY KEY);
INSERT INTO t1 VALUES (4),(3),(2),(1);
INSERT INTO t2 SELECT * FROM t1;
SELECT sequence() AS seq, a FROM t1 ORDER BY seq ASC;
seq a
1 4
2 3
3 2
4 1
SELECT sequence() AS seq, a FROM t1 ORDER BY seq DESC;
seq a
4 1
3 2
2 3
1 4
SELECT * FROM t1 WHERE a = sequence();
a
SELECT * FROM t2 WHERE a = sequence();
a
1
2
3
4
DROP FUNCTION sequence;
DROP TABLE t1,t2;
End of 5.0 tests.

View file

@ -362,4 +362,25 @@ DROP PROCEDURE check_const_len_sp;
DROP TRIGGER check_const_len_trigger;
DROP TABLE const_len_bug;
#
# Bug #30355: Incorrect ordering of UDF results
#
--replace_result $UDF_EXAMPLE_LIB UDF_EXAMPLE_LIB
eval CREATE FUNCTION sequence RETURNS INTEGER SONAME "$UDF_EXAMPLE_LIB";
CREATE TABLE t1 (a INT);
CREATE TABLE t2 (a INT PRIMARY KEY);
INSERT INTO t1 VALUES (4),(3),(2),(1);
INSERT INTO t2 SELECT * FROM t1;
SELECT sequence() AS seq, a FROM t1 ORDER BY seq ASC;
SELECT sequence() AS seq, a FROM t1 ORDER BY seq DESC;
SELECT * FROM t1 WHERE a = sequence();
SELECT * FROM t2 WHERE a = sequence();
DROP FUNCTION sequence;
DROP TABLE t1,t2;
--echo End of 5.0 tests.

View file

@ -2968,6 +2968,12 @@ udf_handler::fix_fields(THD *thd, Item_result_field *func,
func->max_length=min(initid.max_length,MAX_BLOB_WIDTH);
func->maybe_null=initid.maybe_null;
const_item_cache=initid.const_item;
/*
Keep used_tables_cache in sync with const_item_cache.
See the comment in Item_udf_func::update_used tables.
*/
if (!const_item_cache && !used_tables_cache)
used_tables_cache= RAND_TABLE_BIT;
func->decimals=min(initid.decimals,NOT_FIXED_DEC);
}
initialized=1;

View file

@ -1016,6 +1016,56 @@ public:
fixed= 1;
return res;
}
void update_used_tables()
{
/*
TODO: Make a member in UDF_INIT and return if a UDF is deterministic or
not.
Currently UDF_INIT has a member (const_item) that is an in/out
parameter to the init() call.
The code in udf_handler::fix_fields also duplicates the arguments
handling code in Item_func::fix_fields().
The lack of information if a UDF is deterministic makes writing
a correct update_used_tables() for UDFs impossible.
One solution to this would be :
- Add a is_deterministic member of UDF_INIT
- (optionally) deprecate the const_item member of UDF_INIT
- Take away the duplicate code from udf_handler::fix_fields() and
make Item_udf_func call Item_func::fix_fields() to process its
arguments as for any other function.
- Store the deterministic flag returned by <udf>_init into the
udf_handler.
- Don't implement Item_udf_func::fix_fields, implement
Item_udf_func::fix_length_and_dec() instead (similar to non-UDF
functions).
- Override Item_func::update_used_tables to call
Item_func::update_used_tables() and add a RAND_TABLE_BIT to the
result of Item_func::update_used_tables() if the UDF is
non-deterministic.
- (optionally) rename RAND_TABLE_BIT to NONDETERMINISTIC_BIT to
better describe its usage.
The above would require a change of the UDF API.
Until that change is done here's how the current code works:
We call Item_func::update_used_tables() only when we know that
the function depends on real non-const tables and is deterministic.
This can be done only because we know that the optimizer will
call update_used_tables() only when there's possibly a new const
table. So update_used_tables() can only make a Item_func more
constant than it is currently.
That's why we don't need to do anything if a function is guaranteed
to return non-constant (it's non-deterministic) or is already a
const.
*/
if ((used_tables_cache & ~PSEUDO_TABLE_BITS) &&
!(used_tables_cache & RAND_TABLE_BIT))
{
Item_func::update_used_tables();
if (!const_item_cache && !used_tables_cache)
used_tables_cache= RAND_TABLE_BIT;
}
}
void cleanup();
Item_result result_type () const { return udf.result_type(); }
table_map not_null_tables() const { return 0; }

View file

@ -648,13 +648,11 @@ my_bool sequence_init(UDF_INIT *initid, UDF_ARGS *args, char *message)
return 1;
}
bzero(initid->ptr,sizeof(longlong));
/*
Fool MySQL to think that this function is a constant
This will ensure that MySQL only evalutes the function
when the rows are sent to the client and not before any ORDER BY
clauses
/*
sequence() is a non-deterministic function : it has different value
even if called with the same arguments.
*/
initid->const_item=1;
initid->const_item=0;
return 0;
}