mariadb/sql/sp_head.h

1339 lines
33 KiB
C
Raw Normal View History

/* -*- C++ -*- */
/* Copyright (C) 2002 MySQL AB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
#ifndef _SP_HEAD_H_
#define _SP_HEAD_H_
#ifdef USE_PRAGMA_INTERFACE
#pragma interface /* gcc class implementation */
#endif
#include <stddef.h>
/**
@defgroup Stored_Routines Stored Routines
@ingroup Runtime_Environment
@{
*/
// Values for the type enum. This reflects the order of the enum declaration
// in the CREATE TABLE command.
#define TYPE_ENUM_FUNCTION 1
#define TYPE_ENUM_PROCEDURE 2
#define TYPE_ENUM_TRIGGER 3
Item_result
sp_map_result_type(enum enum_field_types type);
Item::Type
sp_map_item_type(enum enum_field_types type);
uint
sp_get_flags_for_command(LEX *lex);
struct sp_label;
class sp_instr;
class sp_instr_opt_meta;
class sp_instr_jump_if_not;
struct sp_cond_type;
struct sp_variable;
Patch for the following bugs: - BUG#11986: Stored routines and triggers can fail if the code has a non-ascii symbol - BUG#16291: mysqldump corrupts string-constants with non-ascii-chars - BUG#19443: INFORMATION_SCHEMA does not support charsets properly - BUG#21249: Character set of SP-var can be ignored - BUG#25212: Character set of string constant is ignored (stored routines) - BUG#25221: Character set of string constant is ignored (triggers) There were a few general problems that caused these bugs: 1. Character set information of the original (definition) query for views, triggers, stored routines and events was lost. 2. mysqldump output query in client character set, which can be inappropriate to encode definition-query. 3. INFORMATION_SCHEMA used strings with mixed encodings to display object definition; 1. No query-definition-character set. In order to compile query into execution code, some extra data (such as environment variables or the database character set) is used. The problem here was that this context was not preserved. So, on the next load it can differ from the original one, thus the result will be different. The context contains the following data: - client character set; - connection collation (character set and collation); - collation of the owner database; The fix is to store this context and use it each time we parse (compile) and execute the object (stored routine, trigger, ...). 2. Wrong mysqldump-output. The original query can contain several encodings (by means of character set introducers). The problem here was that we tried to convert original query to the mysqldump-client character set. Moreover, we stored queries in different character sets for different objects (views, for one, used UTF8, triggers used original character set). The solution is - to store definition queries in the original character set; - to change SHOW CREATE statement to output definition query in the binary character set (i.e. without any conversion); - introduce SHOW CREATE TRIGGER statement; - to dump special statements to switch the context to the original one before dumping and restore it afterwards. Note, in order to preserve the database collation at the creation time, additional ALTER DATABASE might be used (to temporary switch the database collation back to the original value). In this case, ALTER DATABASE privilege will be required. This is a backward-incompatible change. 3. INFORMATION_SCHEMA showed non-UTF8 strings The fix is to generate UTF8-query during the parsing, store it in the object and show it in the INFORMATION_SCHEMA. Basically, the idea is to create a copy of the original query convert it to UTF8. Character set introducers are removed and all text literals are converted to UTF8. This UTF8 query is intended to provide user-readable output. It must not be used to recreate the object. Specialized SHOW CREATE statements should be used for this. The reason for this limitation is the following: the original query can contain symbols from several character sets (by means of character set introducers). Example: - original query: CREATE VIEW v1 AS SELECT _cp1251 'Hello' AS c1; - UTF8 query (for INFORMATION_SCHEMA): CREATE VIEW v1 AS SELECT 'Hello' AS c1;
2007-06-28 19:34:54 +02:00
/*************************************************************************/
/**
Stored_program_creation_ctx -- base class for creation context of stored
programs (stored routines, triggers, events).
*/
class Stored_program_creation_ctx :public Default_object_creation_ctx
{
public:
CHARSET_INFO *get_db_cl()
{
return m_db_cl;
}
public:
virtual Stored_program_creation_ctx *clone(MEM_ROOT *mem_root) = 0;
protected:
Stored_program_creation_ctx(THD *thd)
: Default_object_creation_ctx(thd),
m_db_cl(thd->variables.collation_database)
{ }
Stored_program_creation_ctx(CHARSET_INFO *client_cs,
CHARSET_INFO *connection_cl,
CHARSET_INFO *db_cl)
: Default_object_creation_ctx(client_cs, connection_cl),
m_db_cl(db_cl)
{ }
protected:
virtual void change_env(THD *thd) const
{
thd->variables.collation_database= m_db_cl;
Default_object_creation_ctx::change_env(thd);
}
protected:
/**
db_cl stores the value of the database collation. Both character set
and collation attributes are used.
Database collation is included into the context because it defines the
default collation for stored-program variables.
*/
CHARSET_INFO *m_db_cl;
};
/*************************************************************************/
class sp_name : public Sql_alloc
{
public:
LEX_STRING m_db;
LEX_STRING m_name;
LEX_STRING m_qname;
2007-10-16 22:11:50 +02:00
/**
Key representing routine in the set of stored routines used by statement.
Consists of 1-byte routine type and m_qname (which usually refences to
same buffer). Note that one must complete initialization of the key by
calling set_routine_type().
*/
LEX_STRING m_sroutines_key;
bool m_explicit_name; /**< Prepend the db name? */
sp_name(LEX_STRING db, LEX_STRING name, bool use_explicit_name)
: m_db(db), m_name(name), m_explicit_name(use_explicit_name)
{
m_qname.str= m_sroutines_key.str= 0;
m_qname.length= m_sroutines_key.length= 0;
}
2007-10-16 22:11:50 +02:00
/**
Creates temporary sp_name object from key, used mainly
for SP-cache lookups.
*/
sp_name(THD *thd, char *key, uint key_len);
// Init. the qualified name from the db and name.
void init_qname(THD *thd); // thd for memroot allocation
void set_routine_type(char type)
{
m_sroutines_key.str[0]= type;
}
~sp_name()
{}
};
bool
check_routine_name(LEX_STRING *ident);
class sp_head :private Query_arena
{
2007-10-16 22:11:50 +02:00
sp_head(const sp_head &); /**< Prevent use of these */
void operator=(sp_head &);
MEM_ROOT main_mem_root;
public:
2007-10-16 22:11:50 +02:00
/** Possible values of m_flags */
enum {
HAS_RETURN= 1, // For FUNCTIONs only: is set if has RETURN
MULTI_RESULTS= 8, // Is set if a procedure with SELECT(s)
CONTAINS_DYNAMIC_SQL= 16, // Is set if a procedure with PREPARE/EXECUTE
IS_INVOKED= 32, // Is set if this sp_head is being used
HAS_SET_AUTOCOMMIT_STMT= 64,// Is set if a procedure with 'set autocommit'
/* Is set if a procedure with COMMIT (implicit or explicit) | ROLLBACK */
HAS_COMMIT_OR_ROLLBACK= 128,
LOG_SLOW_STATEMENTS= 256, // Used by events
* Mixed replication mode * : 1) Fix for BUG#19630 "stored function inserting into two auto_increment breaks statement-based binlog": a stored function inserting into two such tables may fail to replicate (inserting wrong data in the slave's copy of the second table) if the slave's second table had an internal auto_increment counter different from master's. Because the auto_increment value autogenerated by master for the 2nd table does not go into binlog, only the first does, so the slave lacks information. To fix this, if running in mixed binlogging mode, if the stored function or trigger plans to update two different tables both having auto_increment columns, we switch to row-based for the whole function. We don't have a simple solution for statement-based binlogging mode, there the bug remains and will be documented as a known problem. Re-enabling rpl_switch_stm_row_mixed. 2) Fix for BUG#20630 "Mixed binlogging mode does not work with stored functions, triggers, views", which was a documented limitation (in mixed mode, we didn't detect that a stored function's execution needed row-based binlogging (due to some UUID() call for example); same for triggers, same for views (a view created from a SELECT UUID(), and doing INSERT INTO sometable SELECT theview; would not replicate row-based). This is implemented by, after parsing a routine's body, remembering in sp_head that this routine needs row-based binlogging. Then when this routine is used, the caller is marked to require row-based binlogging too. Same for views: when we parse a view and detect that its SELECT needs row-based binary logging, we mark the calling LEX as such. 3) Fix for BUG#20499 "mixed mode with temporary table breaks binlog": a temporary table containing e.g. UUID has its changes not binlogged, so any query updating a permanent table with data from the temporary table will run wrongly on slave. Solution: in mixed mode we don't switch back from row-based to statement-based when there exists temporary tables. 4) Attempt to test mysqlbinlog on a binlog generated by mysqlbinlog; impossible due to BUG#11312 and BUG#20329, but test is in place for when they are fixed.
2006-07-09 17:00:47 +02:00
LOG_GENERAL_LOG= 512, // Used by events
2006-08-30 21:09:47 +02:00
BINLOG_ROW_BASED_IF_MIXED= 1024,
HAS_SQLCOM_RESET= 2048,
HAS_SQLCOM_FLUSH= 4096
};
2007-10-16 22:11:50 +02:00
/** TYPE_ENUM_FUNCTION, TYPE_ENUM_PROCEDURE or TYPE_ENUM_TRIGGER */
int m_type;
uint m_flags; // Boolean attributes of a stored routine
2007-10-16 22:11:50 +02:00
Create_field m_return_field_def; /**< This is used for FUNCTIONs only. */
2007-10-16 22:11:50 +02:00
const char *m_tmp_query; ///< Temporary pointer to sub query string
st_sp_chistics *m_chistics;
2007-10-16 22:11:50 +02:00
ulong m_sql_mode; ///< For SHOW CREATE and execution
LEX_STRING m_qname; ///< db.name
bool m_explicit_name; ///< Prepend the db name? */
/**
Key representing routine in the set of stored routines used by statement.
2007-08-15 15:43:08 +02:00
[routine_type]db.name
@sa sp_name::m_sroutines_key
*/
LEX_STRING m_sroutines_key;
LEX_STRING m_db;
LEX_STRING m_name;
LEX_STRING m_params;
LEX_STRING m_body;
Patch for the following bugs: - BUG#11986: Stored routines and triggers can fail if the code has a non-ascii symbol - BUG#16291: mysqldump corrupts string-constants with non-ascii-chars - BUG#19443: INFORMATION_SCHEMA does not support charsets properly - BUG#21249: Character set of SP-var can be ignored - BUG#25212: Character set of string constant is ignored (stored routines) - BUG#25221: Character set of string constant is ignored (triggers) There were a few general problems that caused these bugs: 1. Character set information of the original (definition) query for views, triggers, stored routines and events was lost. 2. mysqldump output query in client character set, which can be inappropriate to encode definition-query. 3. INFORMATION_SCHEMA used strings with mixed encodings to display object definition; 1. No query-definition-character set. In order to compile query into execution code, some extra data (such as environment variables or the database character set) is used. The problem here was that this context was not preserved. So, on the next load it can differ from the original one, thus the result will be different. The context contains the following data: - client character set; - connection collation (character set and collation); - collation of the owner database; The fix is to store this context and use it each time we parse (compile) and execute the object (stored routine, trigger, ...). 2. Wrong mysqldump-output. The original query can contain several encodings (by means of character set introducers). The problem here was that we tried to convert original query to the mysqldump-client character set. Moreover, we stored queries in different character sets for different objects (views, for one, used UTF8, triggers used original character set). The solution is - to store definition queries in the original character set; - to change SHOW CREATE statement to output definition query in the binary character set (i.e. without any conversion); - introduce SHOW CREATE TRIGGER statement; - to dump special statements to switch the context to the original one before dumping and restore it afterwards. Note, in order to preserve the database collation at the creation time, additional ALTER DATABASE might be used (to temporary switch the database collation back to the original value). In this case, ALTER DATABASE privilege will be required. This is a backward-incompatible change. 3. INFORMATION_SCHEMA showed non-UTF8 strings The fix is to generate UTF8-query during the parsing, store it in the object and show it in the INFORMATION_SCHEMA. Basically, the idea is to create a copy of the original query convert it to UTF8. Character set introducers are removed and all text literals are converted to UTF8. This UTF8 query is intended to provide user-readable output. It must not be used to recreate the object. Specialized SHOW CREATE statements should be used for this. The reason for this limitation is the following: the original query can contain symbols from several character sets (by means of character set introducers). Example: - original query: CREATE VIEW v1 AS SELECT _cp1251 'Hello' AS c1; - UTF8 query (for INFORMATION_SCHEMA): CREATE VIEW v1 AS SELECT 'Hello' AS c1;
2007-06-28 19:34:54 +02:00
LEX_STRING m_body_utf8;
LEX_STRING m_defstr;
LEX_STRING m_definer_user;
LEX_STRING m_definer_host;
Patch for the following bugs: - BUG#11986: Stored routines and triggers can fail if the code has a non-ascii symbol - BUG#16291: mysqldump corrupts string-constants with non-ascii-chars - BUG#19443: INFORMATION_SCHEMA does not support charsets properly - BUG#21249: Character set of SP-var can be ignored - BUG#25212: Character set of string constant is ignored (stored routines) - BUG#25221: Character set of string constant is ignored (triggers) There were a few general problems that caused these bugs: 1. Character set information of the original (definition) query for views, triggers, stored routines and events was lost. 2. mysqldump output query in client character set, which can be inappropriate to encode definition-query. 3. INFORMATION_SCHEMA used strings with mixed encodings to display object definition; 1. No query-definition-character set. In order to compile query into execution code, some extra data (such as environment variables or the database character set) is used. The problem here was that this context was not preserved. So, on the next load it can differ from the original one, thus the result will be different. The context contains the following data: - client character set; - connection collation (character set and collation); - collation of the owner database; The fix is to store this context and use it each time we parse (compile) and execute the object (stored routine, trigger, ...). 2. Wrong mysqldump-output. The original query can contain several encodings (by means of character set introducers). The problem here was that we tried to convert original query to the mysqldump-client character set. Moreover, we stored queries in different character sets for different objects (views, for one, used UTF8, triggers used original character set). The solution is - to store definition queries in the original character set; - to change SHOW CREATE statement to output definition query in the binary character set (i.e. without any conversion); - introduce SHOW CREATE TRIGGER statement; - to dump special statements to switch the context to the original one before dumping and restore it afterwards. Note, in order to preserve the database collation at the creation time, additional ALTER DATABASE might be used (to temporary switch the database collation back to the original value). In this case, ALTER DATABASE privilege will be required. This is a backward-incompatible change. 3. INFORMATION_SCHEMA showed non-UTF8 strings The fix is to generate UTF8-query during the parsing, store it in the object and show it in the INFORMATION_SCHEMA. Basically, the idea is to create a copy of the original query convert it to UTF8. Character set introducers are removed and all text literals are converted to UTF8. This UTF8 query is intended to provide user-readable output. It must not be used to recreate the object. Specialized SHOW CREATE statements should be used for this. The reason for this limitation is the following: the original query can contain symbols from several character sets (by means of character set introducers). Example: - original query: CREATE VIEW v1 AS SELECT _cp1251 'Hello' AS c1; - UTF8 query (for INFORMATION_SCHEMA): CREATE VIEW v1 AS SELECT 'Hello' AS c1;
2007-06-28 19:34:54 +02:00
private:
Stored_program_creation_ctx *m_creation_ctx;
public:
inline Stored_program_creation_ctx *get_creation_ctx()
{
return m_creation_ctx;
}
inline void set_creation_ctx(Stored_program_creation_ctx *creation_ctx)
{
m_creation_ctx= creation_ctx->clone(mem_root);
}
longlong m_created;
longlong m_modified;
2007-10-16 22:11:50 +02:00
/** Recursion level of the current SP instance. The levels are numbered from 0 */
ulong m_recursion_level;
2007-10-16 22:11:50 +02:00
/**
A list of diferent recursion level instances for the same procedure.
For every recursion level we have a sp_head instance. This instances
connected in the list. The list ordered by increasing recursion level
(m_recursion_level).
*/
sp_head *m_next_cached_sp;
2007-10-16 22:11:50 +02:00
/**
Pointer to the first element of the above list
*/
sp_head *m_first_instance;
2007-10-16 22:11:50 +02:00
/**
Pointer to the first free (non-INVOKED) routine in the list of
cached instances for this SP. This pointer is set only for the first
SP in the list of instences (see above m_first_cached_sp pointer).
The pointer equal to 0 if we have no free instances.
For non-first instance value of this pointer meanless (point to itself);
*/
sp_head *m_first_free_instance;
2007-10-16 22:11:50 +02:00
/**
Pointer to the last element in the list of instances of the SP.
For non-first instance value of this pointer meanless (point to itself);
*/
sp_head *m_last_cached_sp;
2007-10-16 22:11:50 +02:00
/**
Set containing names of stored routines used by this routine.
Note that unlike elements of similar set for statement elements of this
set are not linked in one list. Because of this we are able save memory
by using for this set same objects that are used in 'sroutines' sets
for statements of which this stored routine consists.
*/
HASH m_sroutines;
// Pointers set during parsing
const char *m_param_begin;
const char *m_param_end;
private:
const char *m_body_begin;
public:
/*
Security context for stored routine which should be run under
definer privileges.
*/
Security_context m_security_ctx;
static void *
operator new(size_t size) throw ();
static void
operator delete(void *ptr, size_t size) throw ();
sp_head();
2007-10-16 22:11:50 +02:00
/// Initialize after we have reset mem_root
void
init(LEX *lex);
2007-10-16 22:11:50 +02:00
/** Copy sp name from parser. */
void
init_sp_name(THD *thd, sp_name *spname);
Patch for the following bugs: - BUG#11986: Stored routines and triggers can fail if the code has a non-ascii symbol - BUG#16291: mysqldump corrupts string-constants with non-ascii-chars - BUG#19443: INFORMATION_SCHEMA does not support charsets properly - BUG#21249: Character set of SP-var can be ignored - BUG#25212: Character set of string constant is ignored (stored routines) - BUG#25221: Character set of string constant is ignored (triggers) There were a few general problems that caused these bugs: 1. Character set information of the original (definition) query for views, triggers, stored routines and events was lost. 2. mysqldump output query in client character set, which can be inappropriate to encode definition-query. 3. INFORMATION_SCHEMA used strings with mixed encodings to display object definition; 1. No query-definition-character set. In order to compile query into execution code, some extra data (such as environment variables or the database character set) is used. The problem here was that this context was not preserved. So, on the next load it can differ from the original one, thus the result will be different. The context contains the following data: - client character set; - connection collation (character set and collation); - collation of the owner database; The fix is to store this context and use it each time we parse (compile) and execute the object (stored routine, trigger, ...). 2. Wrong mysqldump-output. The original query can contain several encodings (by means of character set introducers). The problem here was that we tried to convert original query to the mysqldump-client character set. Moreover, we stored queries in different character sets for different objects (views, for one, used UTF8, triggers used original character set). The solution is - to store definition queries in the original character set; - to change SHOW CREATE statement to output definition query in the binary character set (i.e. without any conversion); - introduce SHOW CREATE TRIGGER statement; - to dump special statements to switch the context to the original one before dumping and restore it afterwards. Note, in order to preserve the database collation at the creation time, additional ALTER DATABASE might be used (to temporary switch the database collation back to the original value). In this case, ALTER DATABASE privilege will be required. This is a backward-incompatible change. 3. INFORMATION_SCHEMA showed non-UTF8 strings The fix is to generate UTF8-query during the parsing, store it in the object and show it in the INFORMATION_SCHEMA. Basically, the idea is to create a copy of the original query convert it to UTF8. Character set introducers are removed and all text literals are converted to UTF8. This UTF8 query is intended to provide user-readable output. It must not be used to recreate the object. Specialized SHOW CREATE statements should be used for this. The reason for this limitation is the following: the original query can contain symbols from several character sets (by means of character set introducers). Example: - original query: CREATE VIEW v1 AS SELECT _cp1251 'Hello' AS c1; - UTF8 query (for INFORMATION_SCHEMA): CREATE VIEW v1 AS SELECT 'Hello' AS c1;
2007-06-28 19:34:54 +02:00
/** Set the body-definition start position. */
void
Patch for the following bugs: - BUG#11986: Stored routines and triggers can fail if the code has a non-ascii symbol - BUG#16291: mysqldump corrupts string-constants with non-ascii-chars - BUG#19443: INFORMATION_SCHEMA does not support charsets properly - BUG#21249: Character set of SP-var can be ignored - BUG#25212: Character set of string constant is ignored (stored routines) - BUG#25221: Character set of string constant is ignored (triggers) There were a few general problems that caused these bugs: 1. Character set information of the original (definition) query for views, triggers, stored routines and events was lost. 2. mysqldump output query in client character set, which can be inappropriate to encode definition-query. 3. INFORMATION_SCHEMA used strings with mixed encodings to display object definition; 1. No query-definition-character set. In order to compile query into execution code, some extra data (such as environment variables or the database character set) is used. The problem here was that this context was not preserved. So, on the next load it can differ from the original one, thus the result will be different. The context contains the following data: - client character set; - connection collation (character set and collation); - collation of the owner database; The fix is to store this context and use it each time we parse (compile) and execute the object (stored routine, trigger, ...). 2. Wrong mysqldump-output. The original query can contain several encodings (by means of character set introducers). The problem here was that we tried to convert original query to the mysqldump-client character set. Moreover, we stored queries in different character sets for different objects (views, for one, used UTF8, triggers used original character set). The solution is - to store definition queries in the original character set; - to change SHOW CREATE statement to output definition query in the binary character set (i.e. without any conversion); - introduce SHOW CREATE TRIGGER statement; - to dump special statements to switch the context to the original one before dumping and restore it afterwards. Note, in order to preserve the database collation at the creation time, additional ALTER DATABASE might be used (to temporary switch the database collation back to the original value). In this case, ALTER DATABASE privilege will be required. This is a backward-incompatible change. 3. INFORMATION_SCHEMA showed non-UTF8 strings The fix is to generate UTF8-query during the parsing, store it in the object and show it in the INFORMATION_SCHEMA. Basically, the idea is to create a copy of the original query convert it to UTF8. Character set introducers are removed and all text literals are converted to UTF8. This UTF8 query is intended to provide user-readable output. It must not be used to recreate the object. Specialized SHOW CREATE statements should be used for this. The reason for this limitation is the following: the original query can contain symbols from several character sets (by means of character set introducers). Example: - original query: CREATE VIEW v1 AS SELECT _cp1251 'Hello' AS c1; - UTF8 query (for INFORMATION_SCHEMA): CREATE VIEW v1 AS SELECT 'Hello' AS c1;
2007-06-28 19:34:54 +02:00
set_body_start(THD *thd, const char *begin_ptr);
/** Set the statement-definition (body-definition) end position. */
void
set_stmt_end(THD *thd);
int
create(THD *thd);
virtual ~sp_head();
2007-10-16 22:11:50 +02:00
/// Free memory
void
destroy();
bool
execute_trigger(THD *thd,
const LEX_STRING *db_name,
const LEX_STRING *table_name,
GRANT_INFO *grant_info);
bool
execute_function(THD *thd, Item **args, uint argcount, Field *return_fld);
bool
execute_procedure(THD *thd, List<Item> *args);
bool
show_create_routine(THD *thd, int type);
void
add_instr(sp_instr *instr);
inline uint
instructions()
{
return m_instr.elements;
}
inline sp_instr *
last_instruction()
{
sp_instr *i;
WL#3817: Simplify string / memory area types and make things more consistent (first part) The following type conversions was done: - Changed byte to uchar - Changed gptr to uchar* - Change my_string to char * - Change my_size_t to size_t - Change size_s to size_t Removed declaration of byte, gptr, my_string, my_size_t and size_s. Following function parameter changes was done: - All string functions in mysys/strings was changed to use size_t instead of uint for string lengths. - All read()/write() functions changed to use size_t (including vio). - All protocoll functions changed to use size_t instead of uint - Functions that used a pointer to a string length was changed to use size_t* - Changed malloc(), free() and related functions from using gptr to use void * as this requires fewer casts in the code and is more in line with how the standard functions work. - Added extra length argument to dirname_part() to return the length of the created string. - Changed (at least) following functions to take uchar* as argument: - db_dump() - my_net_write() - net_write_command() - net_store_data() - DBUG_DUMP() - decimal2bin() & bin2decimal() - Changed my_compress() and my_uncompress() to use size_t. Changed one argument to my_uncompress() from a pointer to a value as we only return one value (makes function easier to use). - Changed type of 'pack_data' argument to packfrm() to avoid casts. - Changed in readfrm() and writefrom(), ha_discover and handler::discover() the type for argument 'frmdata' to uchar** to avoid casts. - Changed most Field functions to use uchar* instead of char* (reduced a lot of casts). - Changed field->val_xxx(xxx, new_ptr) to take const pointers. Other changes: - Removed a lot of not needed casts - Added a few new cast required by other changes - Added some cast to my_multi_malloc() arguments for safety (as string lengths needs to be uint, not size_t). - Fixed all calls to hash-get-key functions to use size_t*. (Needed to be done explicitely as this conflict was often hided by casting the function to hash_get_key). - Changed some buffers to memory regions to uchar* to avoid casts. - Changed some string lengths from uint to size_t. - Changed field->ptr to be uchar* instead of char*. This allowed us to get rid of a lot of casts. - Some changes from true -> TRUE, false -> FALSE, unsigned char -> uchar - Include zlib.h in some files as we needed declaration of crc32() - Changed MY_FILE_ERROR to be (size_t) -1. - Changed many variables to hold the result of my_read() / my_write() to be size_t. This was needed to properly detect errors (which are returned as (size_t) -1). - Removed some very old VMS code - Changed packfrm()/unpackfrm() to not be depending on uint size (portability fix) - Removed windows specific code to restore cursor position as this causes slowdown on windows and we should not mix read() and pread() calls anyway as this is not thread safe. Updated function comment to reflect this. Changed function that depended on original behavior of my_pwrite() to itself restore the cursor position (one such case). - Added some missing checking of return value of malloc(). - Changed definition of MOD_PAD_CHAR_TO_FULL_LENGTH to avoid 'long' overflow. - Changed type of table_def::m_size from my_size_t to ulong to reflect that m_size is the number of elements in the array, not a string/memory length. - Moved THD::max_row_length() to table.cc (as it's not depending on THD). Inlined max_row_length_blob() into this function. - More function comments - Fixed some compiler warnings when compiled without partitions. - Removed setting of LEX_STRING() arguments in declaration (portability fix). - Some trivial indentation/variable name changes. - Some trivial code simplifications: - Replaced some calls to alloc_root + memcpy to use strmake_root()/strdup_root(). - Changed some calls from memdup() to strmake() (Safety fix) - Simpler loops in client-simple.c
2007-05-10 11:59:39 +02:00
get_dynamic(&m_instr, (uchar*)&i, m_instr.elements-1);
return i;
}
2007-10-16 22:11:50 +02:00
/*
Resets lex in 'thd' and keeps a copy of the old one.
@todo Conflicting comment in sp_head.cc
*/
bool
reset_lex(THD *thd);
2007-10-16 22:11:50 +02:00
/**
Restores lex in 'thd' from our copy, but keeps some status from the
one in 'thd', like ptr, tables, fields, etc.
@todo Conflicting comment in sp_head.cc
*/
void
restore_lex(THD *thd);
2007-10-16 22:11:50 +02:00
/// Put the instruction on the backpatch list, associated with the label.
void
push_backpatch(sp_instr *, struct sp_label *);
2007-10-16 22:11:50 +02:00
/// Update all instruction with this label in the backpatch list to
/// the current position.
void
backpatch(struct sp_label *);
2007-10-16 22:11:50 +02:00
/// Start a new cont. backpatch level. If 'i' is NULL, the level is just incr.
void
new_cont_backpatch(sp_instr_opt_meta *i);
2007-10-16 22:11:50 +02:00
/// Add an instruction to the current level
void
add_cont_backpatch(sp_instr_opt_meta *i);
2007-10-16 22:11:50 +02:00
/// Backpatch (and pop) the current level to the current position.
void
do_cont_backpatch();
char *name(uint *lenp = 0) const
{
if (lenp)
*lenp= m_name.length;
return m_name.str;
}
char *create_string(THD *thd, ulong *lenp);
Field *create_result_field(uint field_max_length, const char *field_name,
TABLE *table);
bool fill_field_definition(THD *thd, LEX *lex,
enum enum_field_types field_type,
Create_field *field_def);
void set_info(longlong created, longlong modified,
st_sp_chistics *chistics, ulong sql_mode);
void set_definer(const char *definer, uint definerlen);
void set_definer(const LEX_STRING *user_name, const LEX_STRING *host_name);
void reset_thd_mem_root(THD *thd);
void restore_thd_mem_root(THD *thd);
Bug#19194 (Right recursion in parser for CASE causes excessive stack usage, limitation) Note to the reviewer ==================== Warning: reviewing this patch is somewhat involved. Due to the nature of several issues all affecting the same area, fixing separately each issue is not practical, since each fix can not be implemented and tested independently. In particular, the issues with - rule recursion - nested case statements - forward jump resolution (backpatch list) are tightly coupled (see below). Definitions =========== The expression CASE expr WHEN expr THEN expr WHEN expr THEN expr ... END is a "Simple Case Expression". The expression CASE WHEN expr THEN expr WHEN expr THEN expr ... END is a "Searched Case Expression". The statement CASE expr WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Simple Case Statement". The statement CASE WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Searched Case Statement". A "Left Recursive" rule is like list: element | list element ; A "Right Recursive" rule is like list: element | element list ; Left and right recursion produces the same language, the difference only affects the *order* in which the text is parsed. In a descendant parser (usually written manually), right recursion works very well, and is typically implemented with a while loop. In an ascendant parser (yacc/bison) left recursion works very well, and is implemented naturally by the parser stack. In both cases, using the wrong type or recursion is very bad and should be avoided, as it causes technical issues with the parser implementation. Before this change ================== The "Simple Case Expression" and "Searched Case Expression" were both implemented by the "when_list" and "when_list2" rules, which are left recursive (ok). These rules, however, used lex->when_list instead of using the parser stack, which is more complex that necessary, and potentially dangerous because of other rules using THD::reset_lex. The "Simple Case Statement" and "Searched Case Statements" were implemented by the "sp_case", "sp_whens" and in part by "sp_proc_stmt" rules. Both cases were right recursive (bad). The grammar involved was convoluted, and is assumed to be the results of tweaks to get the code generation to work, but is not what someone would naturally write. In addition, using a common rule for both "Simple" and "Searched" case statements was implemented with sp_head::m_flags |= IN_SIMPLE_CASE, which is a flag and not a stack, and therefore does not take into account *nested* case statements. This leads to incorrect generated code, and either a server crash or an incorrect result. With regards to the backpatch mechanism, a *different* backpatch list was created for each jump from "WHEN expr THEN stmt" to "END CASE", which relied on the grammar to be right recursive. This is a mis-use of the backpatch list, since this list can resolve multiple references to the same target at once. The optimizer algorithm used to detect dead code in the "assembly" SQL instructions, implemented by sp_head::opt_mark(uint ip), was recursive in some cases (a conditional jump pointing forward to another conditional jump). In case of specially crafted code, like - a long list of "IF expr THEN stmt END IF" - a long CASE statement this would actually cause a server crash with a stack overflow. In general, having a stack that grows proportionally with user data (the SQL code given by the client in a CREATE PROCEDURE) is to be avoided. In debug builds only, creating a SP / SF / Trigger which had a significant amount of code would spend --literally-- several minutes in sp_head::create, because of the debug code involved with DBUG_PRINT("info", ("Code %s ... There are several issues with this code: - in a CASE with 5 000 WHEN, there are 15 000 instructions generated, which create a sting representation of the code which is 500 000 bytes long, - using a String instead of an io stream causes performances to degrade to a total server freeze, as time is spent doing realloc of a buffer always too short, - Printing a 500 000 long string in the debug log is too verbose, - Generating this string even when DBUG_PRINT is off is useless, - Having code that potentially can affect the server behavior, used with #ifdef / #endif is useful in some cases, but is also a bad practice. After this change ================= "Case Expressions" (both simple and searched) have been simplified to not use LEX::when_list, which has been removed. Considering all the issues affecting case statements, the grammar for these has been totally re written. The existing actions, used to generate "assembly" sp_inst* code, have been preserved but moved in the new grammar, with the following changes: a) Bison rules are no longer shared between "Simple" and "Searched" case statements, because a stack instead of a flag is required to handle them. Nested statements are handled naturally by the parser stack, which by definition uses the correct rule in the correct context. Nested statements of the opposite type (simple vs searched) works correctly. The flag sp_head::IN_SIMPLE_CASE is no longer used. This is a step towards resolution of WL#2999, which correctly identified that temporary parsing flags do not belong to sp_head. The code in the action is shared by mean of the case_stmt_action_xxx() helpers. b) The backpatch mechanism, used to resolve forward jumps in the generated code, has been changed to: - create a label for the instruction following 'END CASE', - register each jump at the end of a "WHEN expr THEN stmt" in a *unique* backpatch list associated with the 'END CASE' label - resolve all the forward jumps for this label at once. In addition, the code involving backpatch has been commented, so that a reader can now understand by reading matching "Registering" and "Resolving" comments how the forward jumps are resolved and what target they resolve to, as this is far from evident when reading the code alone. The implementation of sp_head::opt_mark() has been revised to avoid recursive calls from jump instructions, and instead add the jump location to the list of paths to explore during the flow analysis of the instruction graph, with a call to sp_head::add_mark_lead(). In addition, the flow analysis will stop if an instruction has already been marked as reachable, which the previous code failed to do in the recursive case. sp_head::opt_mark() is now private, to prevent new calls to this method from being introduced. The debug code present in sp_head::create() has been removed. Considering that SHOW PROCEDURE CODE is also available in debug builds, and can be used anytime regardless of the trace level, as opposed to "CREATE PROCEDURE" time and only if the trace was on, removing the code actually makes debugging easier (usable trace). Tests have been written to cover the parser overflow (big CASE), and to cover nested CASE statements.
2006-11-17 20:14:29 +01:00
/**
Optimize the code.
*/
void optimize();
Bug#19194 (Right recursion in parser for CASE causes excessive stack usage, limitation) Note to the reviewer ==================== Warning: reviewing this patch is somewhat involved. Due to the nature of several issues all affecting the same area, fixing separately each issue is not practical, since each fix can not be implemented and tested independently. In particular, the issues with - rule recursion - nested case statements - forward jump resolution (backpatch list) are tightly coupled (see below). Definitions =========== The expression CASE expr WHEN expr THEN expr WHEN expr THEN expr ... END is a "Simple Case Expression". The expression CASE WHEN expr THEN expr WHEN expr THEN expr ... END is a "Searched Case Expression". The statement CASE expr WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Simple Case Statement". The statement CASE WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Searched Case Statement". A "Left Recursive" rule is like list: element | list element ; A "Right Recursive" rule is like list: element | element list ; Left and right recursion produces the same language, the difference only affects the *order* in which the text is parsed. In a descendant parser (usually written manually), right recursion works very well, and is typically implemented with a while loop. In an ascendant parser (yacc/bison) left recursion works very well, and is implemented naturally by the parser stack. In both cases, using the wrong type or recursion is very bad and should be avoided, as it causes technical issues with the parser implementation. Before this change ================== The "Simple Case Expression" and "Searched Case Expression" were both implemented by the "when_list" and "when_list2" rules, which are left recursive (ok). These rules, however, used lex->when_list instead of using the parser stack, which is more complex that necessary, and potentially dangerous because of other rules using THD::reset_lex. The "Simple Case Statement" and "Searched Case Statements" were implemented by the "sp_case", "sp_whens" and in part by "sp_proc_stmt" rules. Both cases were right recursive (bad). The grammar involved was convoluted, and is assumed to be the results of tweaks to get the code generation to work, but is not what someone would naturally write. In addition, using a common rule for both "Simple" and "Searched" case statements was implemented with sp_head::m_flags |= IN_SIMPLE_CASE, which is a flag and not a stack, and therefore does not take into account *nested* case statements. This leads to incorrect generated code, and either a server crash or an incorrect result. With regards to the backpatch mechanism, a *different* backpatch list was created for each jump from "WHEN expr THEN stmt" to "END CASE", which relied on the grammar to be right recursive. This is a mis-use of the backpatch list, since this list can resolve multiple references to the same target at once. The optimizer algorithm used to detect dead code in the "assembly" SQL instructions, implemented by sp_head::opt_mark(uint ip), was recursive in some cases (a conditional jump pointing forward to another conditional jump). In case of specially crafted code, like - a long list of "IF expr THEN stmt END IF" - a long CASE statement this would actually cause a server crash with a stack overflow. In general, having a stack that grows proportionally with user data (the SQL code given by the client in a CREATE PROCEDURE) is to be avoided. In debug builds only, creating a SP / SF / Trigger which had a significant amount of code would spend --literally-- several minutes in sp_head::create, because of the debug code involved with DBUG_PRINT("info", ("Code %s ... There are several issues with this code: - in a CASE with 5 000 WHEN, there are 15 000 instructions generated, which create a sting representation of the code which is 500 000 bytes long, - using a String instead of an io stream causes performances to degrade to a total server freeze, as time is spent doing realloc of a buffer always too short, - Printing a 500 000 long string in the debug log is too verbose, - Generating this string even when DBUG_PRINT is off is useless, - Having code that potentially can affect the server behavior, used with #ifdef / #endif is useful in some cases, but is also a bad practice. After this change ================= "Case Expressions" (both simple and searched) have been simplified to not use LEX::when_list, which has been removed. Considering all the issues affecting case statements, the grammar for these has been totally re written. The existing actions, used to generate "assembly" sp_inst* code, have been preserved but moved in the new grammar, with the following changes: a) Bison rules are no longer shared between "Simple" and "Searched" case statements, because a stack instead of a flag is required to handle them. Nested statements are handled naturally by the parser stack, which by definition uses the correct rule in the correct context. Nested statements of the opposite type (simple vs searched) works correctly. The flag sp_head::IN_SIMPLE_CASE is no longer used. This is a step towards resolution of WL#2999, which correctly identified that temporary parsing flags do not belong to sp_head. The code in the action is shared by mean of the case_stmt_action_xxx() helpers. b) The backpatch mechanism, used to resolve forward jumps in the generated code, has been changed to: - create a label for the instruction following 'END CASE', - register each jump at the end of a "WHEN expr THEN stmt" in a *unique* backpatch list associated with the 'END CASE' label - resolve all the forward jumps for this label at once. In addition, the code involving backpatch has been commented, so that a reader can now understand by reading matching "Registering" and "Resolving" comments how the forward jumps are resolved and what target they resolve to, as this is far from evident when reading the code alone. The implementation of sp_head::opt_mark() has been revised to avoid recursive calls from jump instructions, and instead add the jump location to the list of paths to explore during the flow analysis of the instruction graph, with a call to sp_head::add_mark_lead(). In addition, the flow analysis will stop if an instruction has already been marked as reachable, which the previous code failed to do in the recursive case. sp_head::opt_mark() is now private, to prevent new calls to this method from being introduced. The debug code present in sp_head::create() has been removed. Considering that SHOW PROCEDURE CODE is also available in debug builds, and can be used anytime regardless of the trace level, as opposed to "CREATE PROCEDURE" time and only if the trace was on, removing the code actually makes debugging easier (usable trace). Tests have been written to cover the parser overflow (big CASE), and to cover nested CASE statements.
2006-11-17 20:14:29 +01:00
/**
Helper used during flow analysis during code optimization.
See the implementation of <code>opt_mark()</code>.
@param ip the instruction to add to the leads list
@param leads the list of remaining paths to explore in the graph that
represents the code, during flow analysis.
*/
void add_mark_lead(uint ip, List<sp_instr> *leads);
void recursion_level_error(THD *thd);
inline sp_instr *
get_instr(uint i)
{
sp_instr *ip;
if (i < m_instr.elements)
WL#3817: Simplify string / memory area types and make things more consistent (first part) The following type conversions was done: - Changed byte to uchar - Changed gptr to uchar* - Change my_string to char * - Change my_size_t to size_t - Change size_s to size_t Removed declaration of byte, gptr, my_string, my_size_t and size_s. Following function parameter changes was done: - All string functions in mysys/strings was changed to use size_t instead of uint for string lengths. - All read()/write() functions changed to use size_t (including vio). - All protocoll functions changed to use size_t instead of uint - Functions that used a pointer to a string length was changed to use size_t* - Changed malloc(), free() and related functions from using gptr to use void * as this requires fewer casts in the code and is more in line with how the standard functions work. - Added extra length argument to dirname_part() to return the length of the created string. - Changed (at least) following functions to take uchar* as argument: - db_dump() - my_net_write() - net_write_command() - net_store_data() - DBUG_DUMP() - decimal2bin() & bin2decimal() - Changed my_compress() and my_uncompress() to use size_t. Changed one argument to my_uncompress() from a pointer to a value as we only return one value (makes function easier to use). - Changed type of 'pack_data' argument to packfrm() to avoid casts. - Changed in readfrm() and writefrom(), ha_discover and handler::discover() the type for argument 'frmdata' to uchar** to avoid casts. - Changed most Field functions to use uchar* instead of char* (reduced a lot of casts). - Changed field->val_xxx(xxx, new_ptr) to take const pointers. Other changes: - Removed a lot of not needed casts - Added a few new cast required by other changes - Added some cast to my_multi_malloc() arguments for safety (as string lengths needs to be uint, not size_t). - Fixed all calls to hash-get-key functions to use size_t*. (Needed to be done explicitely as this conflict was often hided by casting the function to hash_get_key). - Changed some buffers to memory regions to uchar* to avoid casts. - Changed some string lengths from uint to size_t. - Changed field->ptr to be uchar* instead of char*. This allowed us to get rid of a lot of casts. - Some changes from true -> TRUE, false -> FALSE, unsigned char -> uchar - Include zlib.h in some files as we needed declaration of crc32() - Changed MY_FILE_ERROR to be (size_t) -1. - Changed many variables to hold the result of my_read() / my_write() to be size_t. This was needed to properly detect errors (which are returned as (size_t) -1). - Removed some very old VMS code - Changed packfrm()/unpackfrm() to not be depending on uint size (portability fix) - Removed windows specific code to restore cursor position as this causes slowdown on windows and we should not mix read() and pread() calls anyway as this is not thread safe. Updated function comment to reflect this. Changed function that depended on original behavior of my_pwrite() to itself restore the cursor position (one such case). - Added some missing checking of return value of malloc(). - Changed definition of MOD_PAD_CHAR_TO_FULL_LENGTH to avoid 'long' overflow. - Changed type of table_def::m_size from my_size_t to ulong to reflect that m_size is the number of elements in the array, not a string/memory length. - Moved THD::max_row_length() to table.cc (as it's not depending on THD). Inlined max_row_length_blob() into this function. - More function comments - Fixed some compiler warnings when compiled without partitions. - Removed setting of LEX_STRING() arguments in declaration (portability fix). - Some trivial indentation/variable name changes. - Some trivial code simplifications: - Replaced some calls to alloc_root + memcpy to use strmake_root()/strdup_root(). - Changed some calls from memdup() to strmake() (Safety fix) - Simpler loops in client-simple.c
2007-05-10 11:59:39 +02:00
get_dynamic(&m_instr, (uchar*)&ip, i);
else
ip= NULL;
return ip;
}
2003-05-06 18:09:20 +02:00
/* Add tables used by routine to the table list. */
bool add_used_tables_to_table_list(THD *thd,
TABLE_LIST ***query_tables_last_ptr,
TABLE_LIST *belong_to_view);
2007-10-16 22:11:50 +02:00
/**
Check if this stored routine contains statements disallowed
in a stored function or trigger, and set an appropriate error message
if this is the case.
*/
bool is_not_allowed_in_function(const char *where)
{
if (m_flags & CONTAINS_DYNAMIC_SQL)
my_error(ER_STMT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0), "Dynamic SQL");
else if (m_flags & MULTI_RESULTS)
my_error(ER_SP_NO_RETSET, MYF(0), where);
else if (m_flags & HAS_SET_AUTOCOMMIT_STMT)
my_error(ER_SP_CANT_SET_AUTOCOMMIT, MYF(0));
else if (m_flags & HAS_COMMIT_OR_ROLLBACK)
my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
else if (m_flags & HAS_SQLCOM_RESET)
my_error(ER_STMT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0), "RESET");
else if (m_flags & HAS_SQLCOM_FLUSH)
my_error(ER_STMT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0), "FLUSH");
return test(m_flags &
(CONTAINS_DYNAMIC_SQL|MULTI_RESULTS|HAS_SET_AUTOCOMMIT_STMT|
HAS_COMMIT_OR_ROLLBACK|HAS_SQLCOM_RESET|HAS_SQLCOM_FLUSH));
}
#ifndef DBUG_OFF
int show_routine_code(THD *thd);
#endif
* Mixed replication mode * : 1) Fix for BUG#19630 "stored function inserting into two auto_increment breaks statement-based binlog": a stored function inserting into two such tables may fail to replicate (inserting wrong data in the slave's copy of the second table) if the slave's second table had an internal auto_increment counter different from master's. Because the auto_increment value autogenerated by master for the 2nd table does not go into binlog, only the first does, so the slave lacks information. To fix this, if running in mixed binlogging mode, if the stored function or trigger plans to update two different tables both having auto_increment columns, we switch to row-based for the whole function. We don't have a simple solution for statement-based binlogging mode, there the bug remains and will be documented as a known problem. Re-enabling rpl_switch_stm_row_mixed. 2) Fix for BUG#20630 "Mixed binlogging mode does not work with stored functions, triggers, views", which was a documented limitation (in mixed mode, we didn't detect that a stored function's execution needed row-based binlogging (due to some UUID() call for example); same for triggers, same for views (a view created from a SELECT UUID(), and doing INSERT INTO sometable SELECT theview; would not replicate row-based). This is implemented by, after parsing a routine's body, remembering in sp_head that this routine needs row-based binlogging. Then when this routine is used, the caller is marked to require row-based binlogging too. Same for views: when we parse a view and detect that its SELECT needs row-based binary logging, we mark the calling LEX as such. 3) Fix for BUG#20499 "mixed mode with temporary table breaks binlog": a temporary table containing e.g. UUID has its changes not binlogged, so any query updating a permanent table with data from the temporary table will run wrongly on slave. Solution: in mixed mode we don't switch back from row-based to statement-based when there exists temporary tables. 4) Attempt to test mysqlbinlog on a binlog generated by mysqlbinlog; impossible due to BUG#11312 and BUG#20329, but test is in place for when they are fixed.
2006-07-09 17:00:47 +02:00
/*
This method is intended for attributes of a routine which need
to propagate upwards to the LEX of the caller (when a property of a
sp_head needs to "taint" the caller).
*/
void propagate_attributes(LEX *lex)
{
/*
If this routine needs row-based binary logging, the entire top statement
too (we cannot switch from statement-based to row-based only for this
routine, as in statement-based the top-statement may be binlogged and
the substatements not).
*/
if (m_flags & BINLOG_ROW_BASED_IF_MIXED)
lex->set_stmt_unsafe();
* Mixed replication mode * : 1) Fix for BUG#19630 "stored function inserting into two auto_increment breaks statement-based binlog": a stored function inserting into two such tables may fail to replicate (inserting wrong data in the slave's copy of the second table) if the slave's second table had an internal auto_increment counter different from master's. Because the auto_increment value autogenerated by master for the 2nd table does not go into binlog, only the first does, so the slave lacks information. To fix this, if running in mixed binlogging mode, if the stored function or trigger plans to update two different tables both having auto_increment columns, we switch to row-based for the whole function. We don't have a simple solution for statement-based binlogging mode, there the bug remains and will be documented as a known problem. Re-enabling rpl_switch_stm_row_mixed. 2) Fix for BUG#20630 "Mixed binlogging mode does not work with stored functions, triggers, views", which was a documented limitation (in mixed mode, we didn't detect that a stored function's execution needed row-based binlogging (due to some UUID() call for example); same for triggers, same for views (a view created from a SELECT UUID(), and doing INSERT INTO sometable SELECT theview; would not replicate row-based). This is implemented by, after parsing a routine's body, remembering in sp_head that this routine needs row-based binlogging. Then when this routine is used, the caller is marked to require row-based binlogging too. Same for views: when we parse a view and detect that its SELECT needs row-based binary logging, we mark the calling LEX as such. 3) Fix for BUG#20499 "mixed mode with temporary table breaks binlog": a temporary table containing e.g. UUID has its changes not binlogged, so any query updating a permanent table with data from the temporary table will run wrongly on slave. Solution: in mixed mode we don't switch back from row-based to statement-based when there exists temporary tables. 4) Attempt to test mysqlbinlog on a binlog generated by mysqlbinlog; impossible due to BUG#11312 and BUG#20329, but test is in place for when they are fixed.
2006-07-09 17:00:47 +02:00
}
private:
2007-10-16 22:11:50 +02:00
MEM_ROOT *m_thd_root; ///< Temp. store for thd's mem_root
THD *m_thd; ///< Set if we have reset mem_root
2007-10-16 22:11:50 +02:00
sp_pcontext *m_pcont; ///< Parse context
List<LEX> m_lex; ///< Temp. store for the other lex
DYNAMIC_ARRAY m_instr; ///< The "instructions"
typedef struct
{
struct sp_label *lab;
sp_instr *instr;
} bp_t;
2007-10-16 22:11:50 +02:00
List<bp_t> m_backpatch; ///< Instructions needing backpatching
/**
We need a special list for backpatching of instructions with a continue
destination (in the case of a continue handler catching an error in
the test), since it would otherwise interfere with the normal backpatch
mechanism - e.g. jump_if_not instructions have two different destinations
which are to be patched differently.
Since these occur in a more restricted way (always the same "level" in
the code), we don't need the label.
2007-10-16 22:11:50 +02:00
*/
List<sp_instr_opt_meta> m_cont_backpatch;
uint m_cont_level; // The current cont. backpatch level
2007-10-16 22:11:50 +02:00
/**
Multi-set representing optimized list of tables to be locked by this
routine. Does not include tables which are used by invoked routines.
2007-10-16 22:11:50 +02:00
@note
For prelocking-free SPs this multiset is constructed too.
We do so because the same instance of sp_head may be called both
in prelocked mode and in non-prelocked mode.
*/
HASH m_sptabs;
bool
execute(THD *thd);
Bug#19194 (Right recursion in parser for CASE causes excessive stack usage, limitation) Note to the reviewer ==================== Warning: reviewing this patch is somewhat involved. Due to the nature of several issues all affecting the same area, fixing separately each issue is not practical, since each fix can not be implemented and tested independently. In particular, the issues with - rule recursion - nested case statements - forward jump resolution (backpatch list) are tightly coupled (see below). Definitions =========== The expression CASE expr WHEN expr THEN expr WHEN expr THEN expr ... END is a "Simple Case Expression". The expression CASE WHEN expr THEN expr WHEN expr THEN expr ... END is a "Searched Case Expression". The statement CASE expr WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Simple Case Statement". The statement CASE WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Searched Case Statement". A "Left Recursive" rule is like list: element | list element ; A "Right Recursive" rule is like list: element | element list ; Left and right recursion produces the same language, the difference only affects the *order* in which the text is parsed. In a descendant parser (usually written manually), right recursion works very well, and is typically implemented with a while loop. In an ascendant parser (yacc/bison) left recursion works very well, and is implemented naturally by the parser stack. In both cases, using the wrong type or recursion is very bad and should be avoided, as it causes technical issues with the parser implementation. Before this change ================== The "Simple Case Expression" and "Searched Case Expression" were both implemented by the "when_list" and "when_list2" rules, which are left recursive (ok). These rules, however, used lex->when_list instead of using the parser stack, which is more complex that necessary, and potentially dangerous because of other rules using THD::reset_lex. The "Simple Case Statement" and "Searched Case Statements" were implemented by the "sp_case", "sp_whens" and in part by "sp_proc_stmt" rules. Both cases were right recursive (bad). The grammar involved was convoluted, and is assumed to be the results of tweaks to get the code generation to work, but is not what someone would naturally write. In addition, using a common rule for both "Simple" and "Searched" case statements was implemented with sp_head::m_flags |= IN_SIMPLE_CASE, which is a flag and not a stack, and therefore does not take into account *nested* case statements. This leads to incorrect generated code, and either a server crash or an incorrect result. With regards to the backpatch mechanism, a *different* backpatch list was created for each jump from "WHEN expr THEN stmt" to "END CASE", which relied on the grammar to be right recursive. This is a mis-use of the backpatch list, since this list can resolve multiple references to the same target at once. The optimizer algorithm used to detect dead code in the "assembly" SQL instructions, implemented by sp_head::opt_mark(uint ip), was recursive in some cases (a conditional jump pointing forward to another conditional jump). In case of specially crafted code, like - a long list of "IF expr THEN stmt END IF" - a long CASE statement this would actually cause a server crash with a stack overflow. In general, having a stack that grows proportionally with user data (the SQL code given by the client in a CREATE PROCEDURE) is to be avoided. In debug builds only, creating a SP / SF / Trigger which had a significant amount of code would spend --literally-- several minutes in sp_head::create, because of the debug code involved with DBUG_PRINT("info", ("Code %s ... There are several issues with this code: - in a CASE with 5 000 WHEN, there are 15 000 instructions generated, which create a sting representation of the code which is 500 000 bytes long, - using a String instead of an io stream causes performances to degrade to a total server freeze, as time is spent doing realloc of a buffer always too short, - Printing a 500 000 long string in the debug log is too verbose, - Generating this string even when DBUG_PRINT is off is useless, - Having code that potentially can affect the server behavior, used with #ifdef / #endif is useful in some cases, but is also a bad practice. After this change ================= "Case Expressions" (both simple and searched) have been simplified to not use LEX::when_list, which has been removed. Considering all the issues affecting case statements, the grammar for these has been totally re written. The existing actions, used to generate "assembly" sp_inst* code, have been preserved but moved in the new grammar, with the following changes: a) Bison rules are no longer shared between "Simple" and "Searched" case statements, because a stack instead of a flag is required to handle them. Nested statements are handled naturally by the parser stack, which by definition uses the correct rule in the correct context. Nested statements of the opposite type (simple vs searched) works correctly. The flag sp_head::IN_SIMPLE_CASE is no longer used. This is a step towards resolution of WL#2999, which correctly identified that temporary parsing flags do not belong to sp_head. The code in the action is shared by mean of the case_stmt_action_xxx() helpers. b) The backpatch mechanism, used to resolve forward jumps in the generated code, has been changed to: - create a label for the instruction following 'END CASE', - register each jump at the end of a "WHEN expr THEN stmt" in a *unique* backpatch list associated with the 'END CASE' label - resolve all the forward jumps for this label at once. In addition, the code involving backpatch has been commented, so that a reader can now understand by reading matching "Registering" and "Resolving" comments how the forward jumps are resolved and what target they resolve to, as this is far from evident when reading the code alone. The implementation of sp_head::opt_mark() has been revised to avoid recursive calls from jump instructions, and instead add the jump location to the list of paths to explore during the flow analysis of the instruction graph, with a call to sp_head::add_mark_lead(). In addition, the flow analysis will stop if an instruction has already been marked as reachable, which the previous code failed to do in the recursive case. sp_head::opt_mark() is now private, to prevent new calls to this method from being introduced. The debug code present in sp_head::create() has been removed. Considering that SHOW PROCEDURE CODE is also available in debug builds, and can be used anytime regardless of the trace level, as opposed to "CREATE PROCEDURE" time and only if the trace was on, removing the code actually makes debugging easier (usable trace). Tests have been written to cover the parser overflow (big CASE), and to cover nested CASE statements.
2006-11-17 20:14:29 +01:00
/**
Perform a forward flow analysis in the generated code.
Mark reachable instructions, for the optimizer.
*/
void opt_mark();
2007-10-16 22:11:50 +02:00
/**
Merge the list of tables used by query into the multi-set of tables used
by routine.
*/
bool merge_table_list(THD *thd, TABLE_LIST *table, LEX *lex_for_tmp_check);
}; // class sp_head : public Sql_alloc
//
// "Instructions"...
//
class sp_instr :public Query_arena, public Sql_alloc
{
2007-10-16 22:11:50 +02:00
sp_instr(const sp_instr &); /**< Prevent use of these */
void operator=(sp_instr &);
public:
uint marked;
2007-10-16 22:11:50 +02:00
uint m_ip; ///< My index
sp_pcontext *m_ctx; ///< My parse context
2007-10-16 22:11:50 +02:00
/// Should give each a name or type code for debugging purposes?
sp_instr(uint ip, sp_pcontext *ctx)
:Query_arena(0, INITIALIZED_FOR_SP), marked(0), m_ip(ip), m_ctx(ctx)
{}
virtual ~sp_instr()
{ free_items(); }
2007-10-16 22:11:50 +02:00
/**
Execute this instruction
2007-10-16 22:11:50 +02:00
@param thd Thread handle
@param[out] nextp index of the next instruction to execute. (For most
instructions this will be the instruction following this
one). Note that this parameter is undefined in case of
errors, use get_cont_dest() to find the continuation
instruction for CONTINUE error handlers.
@retval 0 on success,
@retval other if some error occured
*/
virtual int execute(THD *thd, uint *nextp) = 0;
Bug#8407 (Stored functions/triggers ignore exception handler) Bug 18914 (Calling certain SPs from triggers fail) Bug 20713 (Functions will not not continue for SQLSTATE VALUE '42S02') Bug 21825 (Incorrect message error deleting records in a table with a trigger for inserting) Bug 22580 (DROP TABLE in nested stored procedure causes strange dependency error) Bug 25345 (Cursors from Functions) This fix resolves a long standing issue originally reported with bug 8407, which affect the behavior of Stored Procedures, Stored Functions and Trigger in many different ways, causing symptoms reported by all the bugs listed. In all cases, the root cause of the problem traces back to 8407 and how the server locks tables involved with sub statements. Prior to this fix, the implementation of stored routines would: - compute the transitive closure of all the tables referenced by a top level statement - open and lock all the tables involved - execute the top level statement "transitive closure of tables" means collecting: - all the tables, - all the stored functions, - all the views, - all the table triggers - all the stored procedures involved, and recursively inspect these objects definition to find more references to more objects, until the list of every object referenced does not grow any more. This mechanism is known as "pre-locking" tables before execution. The motivation for locking all the tables (possibly) used at once is to prevent dead locks. One problem with this approach is that, if the execution path the code really takes during runtime does not use a given table, and if the table is missing, the server would not execute the statement. This in particular has a major impact on triggers, since a missing table referenced by an update/delete trigger would prevent an insert trigger to run. Another problem is that stored routines might define SQL exception handlers to deal with missing tables, but the server implementation would never give user code a chance to execute this logic, since the routine is never executed when a missing table cause the pre-locking code to fail. With this fix, the internal implementation of the pre-locking code has been relaxed of some constraints, so that failure to open a table does not necessarily prevent execution of a stored routine. In particular, the pre-locking mechanism is now behaving as follows: 1) the first step, to compute the transitive closure of all the tables possibly referenced by a statement, is unchanged. 2) the next step, which is to open all the tables involved, only attempts to open the tables added by the pre-locking code, but silently fails without reporting any error or invoking any exception handler is the table is not present. This is achieved by trapping internal errors with Prelock_error_handler 3) the locking step only locks tables that were successfully opened. 4) when executing sub statements, the list of tables used by each statements is evaluated as before. The tables needed by the sub statement are expected to be already opened and locked. Statement referencing tables that were not opened in step 2) will fail to find the table in the open list, and only at this point will execution of the user code fail. 5) when a runtime exception is raised at 4), the instruction continuation destination (the next instruction to execute in case of SQL continue handlers) is evaluated. This is achieved with sp_instr::exec_open_and_lock_tables() 6) if a user exception handler is present in the stored routine, that handler is invoked as usual, so that ER_NO_SUCH_TABLE exceptions can be trapped by stored routines. If no handler exists, then the runtime execution will fail as expected. With all these changes, a side effect is that view security is impacted, in two different ways. First, a view defined as "select stored_function()", where the stored function references a table that may not exist, is considered valid. The rationale is that, because the stored function might trap exceptions during execution and still return a valid result, there is no way to decide when the view is created if a missing table really cause the view to be invalid. Secondly, testing for existence of tables is now done later during execution. View security, which consist of trapping errors and return a generic ER_VIEW_INVALID (to prevent disclosing information) was only implemented at very specific phases covering *opening* tables, but not covering the runtime execution. Because of this existing limitation, errors that were previously trapped and converted into ER_VIEW_INVALID are not trapped, causing table names to be reported to the user. This change is exposing an existing problem, which is independent and will be resolved separately.
2007-03-06 03:42:07 +01:00
/**
Execute <code>open_and_lock_tables()</code> for this statement.
Open and lock the tables used by this statement, as a pre-requisite
to execute the core logic of this instruction with
<code>exec_core()</code>.
@param thd the current thread
@param tables the list of tables to open and lock
@return zero on success, non zero on failure.
*/
int exec_open_and_lock_tables(THD *thd, TABLE_LIST *tables);
Bug#8407 (Stored functions/triggers ignore exception handler) Bug 18914 (Calling certain SPs from triggers fail) Bug 20713 (Functions will not not continue for SQLSTATE VALUE '42S02') Bug 21825 (Incorrect message error deleting records in a table with a trigger for inserting) Bug 22580 (DROP TABLE in nested stored procedure causes strange dependency error) Bug 25345 (Cursors from Functions) This fix resolves a long standing issue originally reported with bug 8407, which affect the behavior of Stored Procedures, Stored Functions and Trigger in many different ways, causing symptoms reported by all the bugs listed. In all cases, the root cause of the problem traces back to 8407 and how the server locks tables involved with sub statements. Prior to this fix, the implementation of stored routines would: - compute the transitive closure of all the tables referenced by a top level statement - open and lock all the tables involved - execute the top level statement "transitive closure of tables" means collecting: - all the tables, - all the stored functions, - all the views, - all the table triggers - all the stored procedures involved, and recursively inspect these objects definition to find more references to more objects, until the list of every object referenced does not grow any more. This mechanism is known as "pre-locking" tables before execution. The motivation for locking all the tables (possibly) used at once is to prevent dead locks. One problem with this approach is that, if the execution path the code really takes during runtime does not use a given table, and if the table is missing, the server would not execute the statement. This in particular has a major impact on triggers, since a missing table referenced by an update/delete trigger would prevent an insert trigger to run. Another problem is that stored routines might define SQL exception handlers to deal with missing tables, but the server implementation would never give user code a chance to execute this logic, since the routine is never executed when a missing table cause the pre-locking code to fail. With this fix, the internal implementation of the pre-locking code has been relaxed of some constraints, so that failure to open a table does not necessarily prevent execution of a stored routine. In particular, the pre-locking mechanism is now behaving as follows: 1) the first step, to compute the transitive closure of all the tables possibly referenced by a statement, is unchanged. 2) the next step, which is to open all the tables involved, only attempts to open the tables added by the pre-locking code, but silently fails without reporting any error or invoking any exception handler is the table is not present. This is achieved by trapping internal errors with Prelock_error_handler 3) the locking step only locks tables that were successfully opened. 4) when executing sub statements, the list of tables used by each statements is evaluated as before. The tables needed by the sub statement are expected to be already opened and locked. Statement referencing tables that were not opened in step 2) will fail to find the table in the open list, and only at this point will execution of the user code fail. 5) when a runtime exception is raised at 4), the instruction continuation destination (the next instruction to execute in case of SQL continue handlers) is evaluated. This is achieved with sp_instr::exec_open_and_lock_tables() 6) if a user exception handler is present in the stored routine, that handler is invoked as usual, so that ER_NO_SUCH_TABLE exceptions can be trapped by stored routines. If no handler exists, then the runtime execution will fail as expected. With all these changes, a side effect is that view security is impacted, in two different ways. First, a view defined as "select stored_function()", where the stored function references a table that may not exist, is considered valid. The rationale is that, because the stored function might trap exceptions during execution and still return a valid result, there is no way to decide when the view is created if a missing table really cause the view to be invalid. Secondly, testing for existence of tables is now done later during execution. View security, which consist of trapping errors and return a generic ER_VIEW_INVALID (to prevent disclosing information) was only implemented at very specific phases covering *opening* tables, but not covering the runtime execution. Because of this existing limitation, errors that were previously trapped and converted into ER_VIEW_INVALID are not trapped, causing table names to be reported to the user. This change is exposing an existing problem, which is independent and will be resolved separately.
2007-03-06 03:42:07 +01:00
/**
Get the continuation destination of this instruction.
@return the continuation destination
Bug#8407 (Stored functions/triggers ignore exception handler) Bug 18914 (Calling certain SPs from triggers fail) Bug 20713 (Functions will not not continue for SQLSTATE VALUE '42S02') Bug 21825 (Incorrect message error deleting records in a table with a trigger for inserting) Bug 22580 (DROP TABLE in nested stored procedure causes strange dependency error) Bug 25345 (Cursors from Functions) This fix resolves a long standing issue originally reported with bug 8407, which affect the behavior of Stored Procedures, Stored Functions and Trigger in many different ways, causing symptoms reported by all the bugs listed. In all cases, the root cause of the problem traces back to 8407 and how the server locks tables involved with sub statements. Prior to this fix, the implementation of stored routines would: - compute the transitive closure of all the tables referenced by a top level statement - open and lock all the tables involved - execute the top level statement "transitive closure of tables" means collecting: - all the tables, - all the stored functions, - all the views, - all the table triggers - all the stored procedures involved, and recursively inspect these objects definition to find more references to more objects, until the list of every object referenced does not grow any more. This mechanism is known as "pre-locking" tables before execution. The motivation for locking all the tables (possibly) used at once is to prevent dead locks. One problem with this approach is that, if the execution path the code really takes during runtime does not use a given table, and if the table is missing, the server would not execute the statement. This in particular has a major impact on triggers, since a missing table referenced by an update/delete trigger would prevent an insert trigger to run. Another problem is that stored routines might define SQL exception handlers to deal with missing tables, but the server implementation would never give user code a chance to execute this logic, since the routine is never executed when a missing table cause the pre-locking code to fail. With this fix, the internal implementation of the pre-locking code has been relaxed of some constraints, so that failure to open a table does not necessarily prevent execution of a stored routine. In particular, the pre-locking mechanism is now behaving as follows: 1) the first step, to compute the transitive closure of all the tables possibly referenced by a statement, is unchanged. 2) the next step, which is to open all the tables involved, only attempts to open the tables added by the pre-locking code, but silently fails without reporting any error or invoking any exception handler is the table is not present. This is achieved by trapping internal errors with Prelock_error_handler 3) the locking step only locks tables that were successfully opened. 4) when executing sub statements, the list of tables used by each statements is evaluated as before. The tables needed by the sub statement are expected to be already opened and locked. Statement referencing tables that were not opened in step 2) will fail to find the table in the open list, and only at this point will execution of the user code fail. 5) when a runtime exception is raised at 4), the instruction continuation destination (the next instruction to execute in case of SQL continue handlers) is evaluated. This is achieved with sp_instr::exec_open_and_lock_tables() 6) if a user exception handler is present in the stored routine, that handler is invoked as usual, so that ER_NO_SUCH_TABLE exceptions can be trapped by stored routines. If no handler exists, then the runtime execution will fail as expected. With all these changes, a side effect is that view security is impacted, in two different ways. First, a view defined as "select stored_function()", where the stored function references a table that may not exist, is considered valid. The rationale is that, because the stored function might trap exceptions during execution and still return a valid result, there is no way to decide when the view is created if a missing table really cause the view to be invalid. Secondly, testing for existence of tables is now done later during execution. View security, which consist of trapping errors and return a generic ER_VIEW_INVALID (to prevent disclosing information) was only implemented at very specific phases covering *opening* tables, but not covering the runtime execution. Because of this existing limitation, errors that were previously trapped and converted into ER_VIEW_INVALID are not trapped, causing table names to be reported to the user. This change is exposing an existing problem, which is independent and will be resolved separately.
2007-03-06 03:42:07 +01:00
*/
virtual uint get_cont_dest();
Bug#8407 (Stored functions/triggers ignore exception handler) Bug 18914 (Calling certain SPs from triggers fail) Bug 20713 (Functions will not not continue for SQLSTATE VALUE '42S02') Bug 21825 (Incorrect message error deleting records in a table with a trigger for inserting) Bug 22580 (DROP TABLE in nested stored procedure causes strange dependency error) Bug 25345 (Cursors from Functions) This fix resolves a long standing issue originally reported with bug 8407, which affect the behavior of Stored Procedures, Stored Functions and Trigger in many different ways, causing symptoms reported by all the bugs listed. In all cases, the root cause of the problem traces back to 8407 and how the server locks tables involved with sub statements. Prior to this fix, the implementation of stored routines would: - compute the transitive closure of all the tables referenced by a top level statement - open and lock all the tables involved - execute the top level statement "transitive closure of tables" means collecting: - all the tables, - all the stored functions, - all the views, - all the table triggers - all the stored procedures involved, and recursively inspect these objects definition to find more references to more objects, until the list of every object referenced does not grow any more. This mechanism is known as "pre-locking" tables before execution. The motivation for locking all the tables (possibly) used at once is to prevent dead locks. One problem with this approach is that, if the execution path the code really takes during runtime does not use a given table, and if the table is missing, the server would not execute the statement. This in particular has a major impact on triggers, since a missing table referenced by an update/delete trigger would prevent an insert trigger to run. Another problem is that stored routines might define SQL exception handlers to deal with missing tables, but the server implementation would never give user code a chance to execute this logic, since the routine is never executed when a missing table cause the pre-locking code to fail. With this fix, the internal implementation of the pre-locking code has been relaxed of some constraints, so that failure to open a table does not necessarily prevent execution of a stored routine. In particular, the pre-locking mechanism is now behaving as follows: 1) the first step, to compute the transitive closure of all the tables possibly referenced by a statement, is unchanged. 2) the next step, which is to open all the tables involved, only attempts to open the tables added by the pre-locking code, but silently fails without reporting any error or invoking any exception handler is the table is not present. This is achieved by trapping internal errors with Prelock_error_handler 3) the locking step only locks tables that were successfully opened. 4) when executing sub statements, the list of tables used by each statements is evaluated as before. The tables needed by the sub statement are expected to be already opened and locked. Statement referencing tables that were not opened in step 2) will fail to find the table in the open list, and only at this point will execution of the user code fail. 5) when a runtime exception is raised at 4), the instruction continuation destination (the next instruction to execute in case of SQL continue handlers) is evaluated. This is achieved with sp_instr::exec_open_and_lock_tables() 6) if a user exception handler is present in the stored routine, that handler is invoked as usual, so that ER_NO_SUCH_TABLE exceptions can be trapped by stored routines. If no handler exists, then the runtime execution will fail as expected. With all these changes, a side effect is that view security is impacted, in two different ways. First, a view defined as "select stored_function()", where the stored function references a table that may not exist, is considered valid. The rationale is that, because the stored function might trap exceptions during execution and still return a valid result, there is no way to decide when the view is created if a missing table really cause the view to be invalid. Secondly, testing for existence of tables is now done later during execution. View security, which consist of trapping errors and return a generic ER_VIEW_INVALID (to prevent disclosing information) was only implemented at very specific phases covering *opening* tables, but not covering the runtime execution. Because of this existing limitation, errors that were previously trapped and converted into ER_VIEW_INVALID are not trapped, causing table names to be reported to the user. This change is exposing an existing problem, which is independent and will be resolved separately.
2007-03-06 03:42:07 +01:00
/*
Execute core function of instruction after all preparations (e.g.
setting of proper LEX, saving part of the thread context have been
done).
Should be implemented for instructions using expressions or whole
statements (thus having to have own LEX). Used in concert with
sp_lex_keeper class and its descendants (there are none currently).
*/
virtual int exec_core(THD *thd, uint *nextp);
virtual void print(String *str) = 0;
virtual void backpatch(uint dest, sp_pcontext *dst_ctx)
{}
2007-10-16 22:11:50 +02:00
/**
Mark this instruction as reachable during optimization and return the
Bug#19194 (Right recursion in parser for CASE causes excessive stack usage, limitation) Note to the reviewer ==================== Warning: reviewing this patch is somewhat involved. Due to the nature of several issues all affecting the same area, fixing separately each issue is not practical, since each fix can not be implemented and tested independently. In particular, the issues with - rule recursion - nested case statements - forward jump resolution (backpatch list) are tightly coupled (see below). Definitions =========== The expression CASE expr WHEN expr THEN expr WHEN expr THEN expr ... END is a "Simple Case Expression". The expression CASE WHEN expr THEN expr WHEN expr THEN expr ... END is a "Searched Case Expression". The statement CASE expr WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Simple Case Statement". The statement CASE WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Searched Case Statement". A "Left Recursive" rule is like list: element | list element ; A "Right Recursive" rule is like list: element | element list ; Left and right recursion produces the same language, the difference only affects the *order* in which the text is parsed. In a descendant parser (usually written manually), right recursion works very well, and is typically implemented with a while loop. In an ascendant parser (yacc/bison) left recursion works very well, and is implemented naturally by the parser stack. In both cases, using the wrong type or recursion is very bad and should be avoided, as it causes technical issues with the parser implementation. Before this change ================== The "Simple Case Expression" and "Searched Case Expression" were both implemented by the "when_list" and "when_list2" rules, which are left recursive (ok). These rules, however, used lex->when_list instead of using the parser stack, which is more complex that necessary, and potentially dangerous because of other rules using THD::reset_lex. The "Simple Case Statement" and "Searched Case Statements" were implemented by the "sp_case", "sp_whens" and in part by "sp_proc_stmt" rules. Both cases were right recursive (bad). The grammar involved was convoluted, and is assumed to be the results of tweaks to get the code generation to work, but is not what someone would naturally write. In addition, using a common rule for both "Simple" and "Searched" case statements was implemented with sp_head::m_flags |= IN_SIMPLE_CASE, which is a flag and not a stack, and therefore does not take into account *nested* case statements. This leads to incorrect generated code, and either a server crash or an incorrect result. With regards to the backpatch mechanism, a *different* backpatch list was created for each jump from "WHEN expr THEN stmt" to "END CASE", which relied on the grammar to be right recursive. This is a mis-use of the backpatch list, since this list can resolve multiple references to the same target at once. The optimizer algorithm used to detect dead code in the "assembly" SQL instructions, implemented by sp_head::opt_mark(uint ip), was recursive in some cases (a conditional jump pointing forward to another conditional jump). In case of specially crafted code, like - a long list of "IF expr THEN stmt END IF" - a long CASE statement this would actually cause a server crash with a stack overflow. In general, having a stack that grows proportionally with user data (the SQL code given by the client in a CREATE PROCEDURE) is to be avoided. In debug builds only, creating a SP / SF / Trigger which had a significant amount of code would spend --literally-- several minutes in sp_head::create, because of the debug code involved with DBUG_PRINT("info", ("Code %s ... There are several issues with this code: - in a CASE with 5 000 WHEN, there are 15 000 instructions generated, which create a sting representation of the code which is 500 000 bytes long, - using a String instead of an io stream causes performances to degrade to a total server freeze, as time is spent doing realloc of a buffer always too short, - Printing a 500 000 long string in the debug log is too verbose, - Generating this string even when DBUG_PRINT is off is useless, - Having code that potentially can affect the server behavior, used with #ifdef / #endif is useful in some cases, but is also a bad practice. After this change ================= "Case Expressions" (both simple and searched) have been simplified to not use LEX::when_list, which has been removed. Considering all the issues affecting case statements, the grammar for these has been totally re written. The existing actions, used to generate "assembly" sp_inst* code, have been preserved but moved in the new grammar, with the following changes: a) Bison rules are no longer shared between "Simple" and "Searched" case statements, because a stack instead of a flag is required to handle them. Nested statements are handled naturally by the parser stack, which by definition uses the correct rule in the correct context. Nested statements of the opposite type (simple vs searched) works correctly. The flag sp_head::IN_SIMPLE_CASE is no longer used. This is a step towards resolution of WL#2999, which correctly identified that temporary parsing flags do not belong to sp_head. The code in the action is shared by mean of the case_stmt_action_xxx() helpers. b) The backpatch mechanism, used to resolve forward jumps in the generated code, has been changed to: - create a label for the instruction following 'END CASE', - register each jump at the end of a "WHEN expr THEN stmt" in a *unique* backpatch list associated with the 'END CASE' label - resolve all the forward jumps for this label at once. In addition, the code involving backpatch has been commented, so that a reader can now understand by reading matching "Registering" and "Resolving" comments how the forward jumps are resolved and what target they resolve to, as this is far from evident when reading the code alone. The implementation of sp_head::opt_mark() has been revised to avoid recursive calls from jump instructions, and instead add the jump location to the list of paths to explore during the flow analysis of the instruction graph, with a call to sp_head::add_mark_lead(). In addition, the flow analysis will stop if an instruction has already been marked as reachable, which the previous code failed to do in the recursive case. sp_head::opt_mark() is now private, to prevent new calls to this method from being introduced. The debug code present in sp_head::create() has been removed. Considering that SHOW PROCEDURE CODE is also available in debug builds, and can be used anytime regardless of the trace level, as opposed to "CREATE PROCEDURE" time and only if the trace was on, removing the code actually makes debugging easier (usable trace). Tests have been written to cover the parser overflow (big CASE), and to cover nested CASE statements.
2006-11-17 20:14:29 +01:00
index to the next instruction. Jump instruction will add their
destination to the leads list.
*/
Bug#19194 (Right recursion in parser for CASE causes excessive stack usage, limitation) Note to the reviewer ==================== Warning: reviewing this patch is somewhat involved. Due to the nature of several issues all affecting the same area, fixing separately each issue is not practical, since each fix can not be implemented and tested independently. In particular, the issues with - rule recursion - nested case statements - forward jump resolution (backpatch list) are tightly coupled (see below). Definitions =========== The expression CASE expr WHEN expr THEN expr WHEN expr THEN expr ... END is a "Simple Case Expression". The expression CASE WHEN expr THEN expr WHEN expr THEN expr ... END is a "Searched Case Expression". The statement CASE expr WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Simple Case Statement". The statement CASE WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Searched Case Statement". A "Left Recursive" rule is like list: element | list element ; A "Right Recursive" rule is like list: element | element list ; Left and right recursion produces the same language, the difference only affects the *order* in which the text is parsed. In a descendant parser (usually written manually), right recursion works very well, and is typically implemented with a while loop. In an ascendant parser (yacc/bison) left recursion works very well, and is implemented naturally by the parser stack. In both cases, using the wrong type or recursion is very bad and should be avoided, as it causes technical issues with the parser implementation. Before this change ================== The "Simple Case Expression" and "Searched Case Expression" were both implemented by the "when_list" and "when_list2" rules, which are left recursive (ok). These rules, however, used lex->when_list instead of using the parser stack, which is more complex that necessary, and potentially dangerous because of other rules using THD::reset_lex. The "Simple Case Statement" and "Searched Case Statements" were implemented by the "sp_case", "sp_whens" and in part by "sp_proc_stmt" rules. Both cases were right recursive (bad). The grammar involved was convoluted, and is assumed to be the results of tweaks to get the code generation to work, but is not what someone would naturally write. In addition, using a common rule for both "Simple" and "Searched" case statements was implemented with sp_head::m_flags |= IN_SIMPLE_CASE, which is a flag and not a stack, and therefore does not take into account *nested* case statements. This leads to incorrect generated code, and either a server crash or an incorrect result. With regards to the backpatch mechanism, a *different* backpatch list was created for each jump from "WHEN expr THEN stmt" to "END CASE", which relied on the grammar to be right recursive. This is a mis-use of the backpatch list, since this list can resolve multiple references to the same target at once. The optimizer algorithm used to detect dead code in the "assembly" SQL instructions, implemented by sp_head::opt_mark(uint ip), was recursive in some cases (a conditional jump pointing forward to another conditional jump). In case of specially crafted code, like - a long list of "IF expr THEN stmt END IF" - a long CASE statement this would actually cause a server crash with a stack overflow. In general, having a stack that grows proportionally with user data (the SQL code given by the client in a CREATE PROCEDURE) is to be avoided. In debug builds only, creating a SP / SF / Trigger which had a significant amount of code would spend --literally-- several minutes in sp_head::create, because of the debug code involved with DBUG_PRINT("info", ("Code %s ... There are several issues with this code: - in a CASE with 5 000 WHEN, there are 15 000 instructions generated, which create a sting representation of the code which is 500 000 bytes long, - using a String instead of an io stream causes performances to degrade to a total server freeze, as time is spent doing realloc of a buffer always too short, - Printing a 500 000 long string in the debug log is too verbose, - Generating this string even when DBUG_PRINT is off is useless, - Having code that potentially can affect the server behavior, used with #ifdef / #endif is useful in some cases, but is also a bad practice. After this change ================= "Case Expressions" (both simple and searched) have been simplified to not use LEX::when_list, which has been removed. Considering all the issues affecting case statements, the grammar for these has been totally re written. The existing actions, used to generate "assembly" sp_inst* code, have been preserved but moved in the new grammar, with the following changes: a) Bison rules are no longer shared between "Simple" and "Searched" case statements, because a stack instead of a flag is required to handle them. Nested statements are handled naturally by the parser stack, which by definition uses the correct rule in the correct context. Nested statements of the opposite type (simple vs searched) works correctly. The flag sp_head::IN_SIMPLE_CASE is no longer used. This is a step towards resolution of WL#2999, which correctly identified that temporary parsing flags do not belong to sp_head. The code in the action is shared by mean of the case_stmt_action_xxx() helpers. b) The backpatch mechanism, used to resolve forward jumps in the generated code, has been changed to: - create a label for the instruction following 'END CASE', - register each jump at the end of a "WHEN expr THEN stmt" in a *unique* backpatch list associated with the 'END CASE' label - resolve all the forward jumps for this label at once. In addition, the code involving backpatch has been commented, so that a reader can now understand by reading matching "Registering" and "Resolving" comments how the forward jumps are resolved and what target they resolve to, as this is far from evident when reading the code alone. The implementation of sp_head::opt_mark() has been revised to avoid recursive calls from jump instructions, and instead add the jump location to the list of paths to explore during the flow analysis of the instruction graph, with a call to sp_head::add_mark_lead(). In addition, the flow analysis will stop if an instruction has already been marked as reachable, which the previous code failed to do in the recursive case. sp_head::opt_mark() is now private, to prevent new calls to this method from being introduced. The debug code present in sp_head::create() has been removed. Considering that SHOW PROCEDURE CODE is also available in debug builds, and can be used anytime regardless of the trace level, as opposed to "CREATE PROCEDURE" time and only if the trace was on, removing the code actually makes debugging easier (usable trace). Tests have been written to cover the parser overflow (big CASE), and to cover nested CASE statements.
2006-11-17 20:14:29 +01:00
virtual uint opt_mark(sp_head *sp, List<sp_instr> *leads)
{
marked= 1;
return m_ip+1;
}
2007-10-16 22:11:50 +02:00
/**
Short-cut jumps to jumps during optimization. This is used by the
jump instructions' opt_mark() methods. 'start' is the starting point,
used to prevent the mark sweep from looping for ever. Return the
end destination.
*/
virtual uint opt_shortcut_jump(sp_head *sp, sp_instr *start)
{
return m_ip;
}
2007-10-16 22:11:50 +02:00
/**
Inform the instruction that it has been moved during optimization.
Most instructions will simply update its index, but jump instructions
must also take care of their destination pointers. Forward jumps get
pushed to the backpatch list 'ibp'.
*/
virtual void opt_move(uint dst, List<sp_instr> *ibp)
{
m_ip= dst;
}
}; // class sp_instr : public Sql_alloc
2007-10-16 22:11:50 +02:00
/**
Auxilary class to which instructions delegate responsibility
for handling LEX and preparations before executing statement
or calculating complex expression.
Exist mainly to avoid having double hierarchy between instruction
classes.
2007-10-16 22:11:50 +02:00
@todo
Add ability to not store LEX and do any preparations if
expression used is simple.
*/
class sp_lex_keeper
{
2007-10-16 22:11:50 +02:00
/** Prevent use of these */
sp_lex_keeper(const sp_lex_keeper &);
void operator=(sp_lex_keeper &);
public:
sp_lex_keeper(LEX *lex, bool lex_resp)
: m_lex(lex), m_lex_resp(lex_resp),
lex_query_tables_own_last(NULL)
{
lex->sp_lex_in_use= TRUE;
}
virtual ~sp_lex_keeper()
{
if (m_lex_resp)
{
lex_end(m_lex);
delete m_lex;
}
}
2007-10-16 22:11:50 +02:00
/**
Prepare execution of instruction using LEX, if requested check whenever
we have read access to tables used and open/lock them, call instruction's
exec_core() method, perform cleanup afterwards.
2007-10-16 22:11:50 +02:00
@todo Conflicting comment in sp_head.cc
*/
int reset_lex_and_exec_core(THD *thd, uint *nextp, bool open_tables,
sp_instr* instr);
inline uint sql_command() const
{
return (uint)m_lex->sql_command;
}
void disable_query_cache()
{
m_lex->safe_to_cache_query= 0;
}
private:
LEX *m_lex;
2007-10-16 22:11:50 +02:00
/**
Indicates whenever this sp_lex_keeper instance responsible
for LEX deletion.
*/
bool m_lex_resp;
/*
Support for being able to execute this statement in two modes:
a) inside prelocked mode set by the calling procedure or its ancestor.
b) outside of prelocked mode, when this statement enters/leaves
prelocked mode itself.
*/
2007-10-16 22:11:50 +02:00
/**
List of additional tables this statement needs to lock when it
enters/leaves prelocked mode on its own.
*/
TABLE_LIST *prelocking_tables;
2007-10-16 22:11:50 +02:00
/**
The value m_lex->query_tables_own_last should be set to this when the
statement enters/leaves prelocked mode on its own.
*/
TABLE_LIST **lex_query_tables_own_last;
};
2007-10-16 22:11:50 +02:00
/**
Call out to some prepared SQL statement.
*/
class sp_instr_stmt : public sp_instr
{
2007-10-16 22:11:50 +02:00
sp_instr_stmt(const sp_instr_stmt &); /**< Prevent use of these */
void operator=(sp_instr_stmt &);
public:
2007-10-16 22:11:50 +02:00
LEX_STRING m_query; ///< For thd->query
sp_instr_stmt(uint ip, sp_pcontext *ctx, LEX *lex)
: sp_instr(ip, ctx), m_lex_keeper(lex, TRUE)
{
m_query.str= 0;
m_query.length= 0;
}
virtual ~sp_instr_stmt()
{};
virtual int execute(THD *thd, uint *nextp);
virtual int exec_core(THD *thd, uint *nextp);
virtual void print(String *str);
private:
sp_lex_keeper m_lex_keeper;
}; // class sp_instr_stmt : public sp_instr
class sp_instr_set : public sp_instr
{
2007-10-16 22:11:50 +02:00
sp_instr_set(const sp_instr_set &); /**< Prevent use of these */
void operator=(sp_instr_set &);
public:
sp_instr_set(uint ip, sp_pcontext *ctx,
uint offset, Item *val, enum enum_field_types type_arg,
LEX *lex, bool lex_resp)
: sp_instr(ip, ctx), m_offset(offset), m_value(val), m_type(type_arg),
m_lex_keeper(lex, lex_resp)
{}
virtual ~sp_instr_set()
{}
virtual int execute(THD *thd, uint *nextp);
virtual int exec_core(THD *thd, uint *nextp);
virtual void print(String *str);
private:
2007-10-16 22:11:50 +02:00
uint m_offset; ///< Frame offset
Item *m_value;
2007-10-16 22:11:50 +02:00
enum enum_field_types m_type; ///< The declared type
sp_lex_keeper m_lex_keeper;
}; // class sp_instr_set : public sp_instr
2007-10-16 22:11:50 +02:00
/**
Set NEW/OLD row field value instruction. Used in triggers.
*/
class sp_instr_set_trigger_field : public sp_instr
{
sp_instr_set_trigger_field(const sp_instr_set_trigger_field &);
void operator=(sp_instr_set_trigger_field &);
public:
sp_instr_set_trigger_field(uint ip, sp_pcontext *ctx,
Item_trigger_field *trg_fld,
Item *val, LEX *lex)
: sp_instr(ip, ctx),
trigger_field(trg_fld),
value(val), m_lex_keeper(lex, TRUE)
{}
virtual ~sp_instr_set_trigger_field()
{}
virtual int execute(THD *thd, uint *nextp);
virtual int exec_core(THD *thd, uint *nextp);
virtual void print(String *str);
private:
Item_trigger_field *trigger_field;
Item *value;
sp_lex_keeper m_lex_keeper;
}; // class sp_instr_trigger_field : public sp_instr
2007-10-16 22:11:50 +02:00
/**
An abstract class for all instructions with destinations that
needs to be updated by the optimizer.
2007-10-16 22:11:50 +02:00
Even if not all subclasses will use both the normal destination and
the continuation destination, we put them both here for simplicity.
2007-10-16 22:11:50 +02:00
*/
class sp_instr_opt_meta : public sp_instr
{
public:
2007-10-16 22:11:50 +02:00
uint m_dest; ///< Where we will go
uint m_cont_dest; ///< Where continue handlers will go
sp_instr_opt_meta(uint ip, sp_pcontext *ctx)
: sp_instr(ip, ctx),
m_dest(0), m_cont_dest(0), m_optdest(0), m_cont_optdest(0)
{}
sp_instr_opt_meta(uint ip, sp_pcontext *ctx, uint dest)
: sp_instr(ip, ctx),
m_dest(dest), m_cont_dest(0), m_optdest(0), m_cont_optdest(0)
{}
virtual ~sp_instr_opt_meta()
{}
virtual void set_destination(uint old_dest, uint new_dest)
= 0;
virtual uint get_cont_dest();
Bug#8407 (Stored functions/triggers ignore exception handler) Bug 18914 (Calling certain SPs from triggers fail) Bug 20713 (Functions will not not continue for SQLSTATE VALUE '42S02') Bug 21825 (Incorrect message error deleting records in a table with a trigger for inserting) Bug 22580 (DROP TABLE in nested stored procedure causes strange dependency error) Bug 25345 (Cursors from Functions) This fix resolves a long standing issue originally reported with bug 8407, which affect the behavior of Stored Procedures, Stored Functions and Trigger in many different ways, causing symptoms reported by all the bugs listed. In all cases, the root cause of the problem traces back to 8407 and how the server locks tables involved with sub statements. Prior to this fix, the implementation of stored routines would: - compute the transitive closure of all the tables referenced by a top level statement - open and lock all the tables involved - execute the top level statement "transitive closure of tables" means collecting: - all the tables, - all the stored functions, - all the views, - all the table triggers - all the stored procedures involved, and recursively inspect these objects definition to find more references to more objects, until the list of every object referenced does not grow any more. This mechanism is known as "pre-locking" tables before execution. The motivation for locking all the tables (possibly) used at once is to prevent dead locks. One problem with this approach is that, if the execution path the code really takes during runtime does not use a given table, and if the table is missing, the server would not execute the statement. This in particular has a major impact on triggers, since a missing table referenced by an update/delete trigger would prevent an insert trigger to run. Another problem is that stored routines might define SQL exception handlers to deal with missing tables, but the server implementation would never give user code a chance to execute this logic, since the routine is never executed when a missing table cause the pre-locking code to fail. With this fix, the internal implementation of the pre-locking code has been relaxed of some constraints, so that failure to open a table does not necessarily prevent execution of a stored routine. In particular, the pre-locking mechanism is now behaving as follows: 1) the first step, to compute the transitive closure of all the tables possibly referenced by a statement, is unchanged. 2) the next step, which is to open all the tables involved, only attempts to open the tables added by the pre-locking code, but silently fails without reporting any error or invoking any exception handler is the table is not present. This is achieved by trapping internal errors with Prelock_error_handler 3) the locking step only locks tables that were successfully opened. 4) when executing sub statements, the list of tables used by each statements is evaluated as before. The tables needed by the sub statement are expected to be already opened and locked. Statement referencing tables that were not opened in step 2) will fail to find the table in the open list, and only at this point will execution of the user code fail. 5) when a runtime exception is raised at 4), the instruction continuation destination (the next instruction to execute in case of SQL continue handlers) is evaluated. This is achieved with sp_instr::exec_open_and_lock_tables() 6) if a user exception handler is present in the stored routine, that handler is invoked as usual, so that ER_NO_SUCH_TABLE exceptions can be trapped by stored routines. If no handler exists, then the runtime execution will fail as expected. With all these changes, a side effect is that view security is impacted, in two different ways. First, a view defined as "select stored_function()", where the stored function references a table that may not exist, is considered valid. The rationale is that, because the stored function might trap exceptions during execution and still return a valid result, there is no way to decide when the view is created if a missing table really cause the view to be invalid. Secondly, testing for existence of tables is now done later during execution. View security, which consist of trapping errors and return a generic ER_VIEW_INVALID (to prevent disclosing information) was only implemented at very specific phases covering *opening* tables, but not covering the runtime execution. Because of this existing limitation, errors that were previously trapped and converted into ER_VIEW_INVALID are not trapped, causing table names to be reported to the user. This change is exposing an existing problem, which is independent and will be resolved separately.
2007-03-06 03:42:07 +01:00
protected:
2007-10-16 22:11:50 +02:00
sp_instr *m_optdest; ///< Used during optimization
sp_instr *m_cont_optdest; ///< Used during optimization
}; // class sp_instr_opt_meta : public sp_instr
class sp_instr_jump : public sp_instr_opt_meta
{
2007-10-16 22:11:50 +02:00
sp_instr_jump(const sp_instr_jump &); /**< Prevent use of these */
void operator=(sp_instr_jump &);
public:
sp_instr_jump(uint ip, sp_pcontext *ctx)
: sp_instr_opt_meta(ip, ctx)
{}
sp_instr_jump(uint ip, sp_pcontext *ctx, uint dest)
: sp_instr_opt_meta(ip, ctx, dest)
{}
virtual ~sp_instr_jump()
{}
virtual int execute(THD *thd, uint *nextp);
virtual void print(String *str);
Bug#19194 (Right recursion in parser for CASE causes excessive stack usage, limitation) Note to the reviewer ==================== Warning: reviewing this patch is somewhat involved. Due to the nature of several issues all affecting the same area, fixing separately each issue is not practical, since each fix can not be implemented and tested independently. In particular, the issues with - rule recursion - nested case statements - forward jump resolution (backpatch list) are tightly coupled (see below). Definitions =========== The expression CASE expr WHEN expr THEN expr WHEN expr THEN expr ... END is a "Simple Case Expression". The expression CASE WHEN expr THEN expr WHEN expr THEN expr ... END is a "Searched Case Expression". The statement CASE expr WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Simple Case Statement". The statement CASE WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Searched Case Statement". A "Left Recursive" rule is like list: element | list element ; A "Right Recursive" rule is like list: element | element list ; Left and right recursion produces the same language, the difference only affects the *order* in which the text is parsed. In a descendant parser (usually written manually), right recursion works very well, and is typically implemented with a while loop. In an ascendant parser (yacc/bison) left recursion works very well, and is implemented naturally by the parser stack. In both cases, using the wrong type or recursion is very bad and should be avoided, as it causes technical issues with the parser implementation. Before this change ================== The "Simple Case Expression" and "Searched Case Expression" were both implemented by the "when_list" and "when_list2" rules, which are left recursive (ok). These rules, however, used lex->when_list instead of using the parser stack, which is more complex that necessary, and potentially dangerous because of other rules using THD::reset_lex. The "Simple Case Statement" and "Searched Case Statements" were implemented by the "sp_case", "sp_whens" and in part by "sp_proc_stmt" rules. Both cases were right recursive (bad). The grammar involved was convoluted, and is assumed to be the results of tweaks to get the code generation to work, but is not what someone would naturally write. In addition, using a common rule for both "Simple" and "Searched" case statements was implemented with sp_head::m_flags |= IN_SIMPLE_CASE, which is a flag and not a stack, and therefore does not take into account *nested* case statements. This leads to incorrect generated code, and either a server crash or an incorrect result. With regards to the backpatch mechanism, a *different* backpatch list was created for each jump from "WHEN expr THEN stmt" to "END CASE", which relied on the grammar to be right recursive. This is a mis-use of the backpatch list, since this list can resolve multiple references to the same target at once. The optimizer algorithm used to detect dead code in the "assembly" SQL instructions, implemented by sp_head::opt_mark(uint ip), was recursive in some cases (a conditional jump pointing forward to another conditional jump). In case of specially crafted code, like - a long list of "IF expr THEN stmt END IF" - a long CASE statement this would actually cause a server crash with a stack overflow. In general, having a stack that grows proportionally with user data (the SQL code given by the client in a CREATE PROCEDURE) is to be avoided. In debug builds only, creating a SP / SF / Trigger which had a significant amount of code would spend --literally-- several minutes in sp_head::create, because of the debug code involved with DBUG_PRINT("info", ("Code %s ... There are several issues with this code: - in a CASE with 5 000 WHEN, there are 15 000 instructions generated, which create a sting representation of the code which is 500 000 bytes long, - using a String instead of an io stream causes performances to degrade to a total server freeze, as time is spent doing realloc of a buffer always too short, - Printing a 500 000 long string in the debug log is too verbose, - Generating this string even when DBUG_PRINT is off is useless, - Having code that potentially can affect the server behavior, used with #ifdef / #endif is useful in some cases, but is also a bad practice. After this change ================= "Case Expressions" (both simple and searched) have been simplified to not use LEX::when_list, which has been removed. Considering all the issues affecting case statements, the grammar for these has been totally re written. The existing actions, used to generate "assembly" sp_inst* code, have been preserved but moved in the new grammar, with the following changes: a) Bison rules are no longer shared between "Simple" and "Searched" case statements, because a stack instead of a flag is required to handle them. Nested statements are handled naturally by the parser stack, which by definition uses the correct rule in the correct context. Nested statements of the opposite type (simple vs searched) works correctly. The flag sp_head::IN_SIMPLE_CASE is no longer used. This is a step towards resolution of WL#2999, which correctly identified that temporary parsing flags do not belong to sp_head. The code in the action is shared by mean of the case_stmt_action_xxx() helpers. b) The backpatch mechanism, used to resolve forward jumps in the generated code, has been changed to: - create a label for the instruction following 'END CASE', - register each jump at the end of a "WHEN expr THEN stmt" in a *unique* backpatch list associated with the 'END CASE' label - resolve all the forward jumps for this label at once. In addition, the code involving backpatch has been commented, so that a reader can now understand by reading matching "Registering" and "Resolving" comments how the forward jumps are resolved and what target they resolve to, as this is far from evident when reading the code alone. The implementation of sp_head::opt_mark() has been revised to avoid recursive calls from jump instructions, and instead add the jump location to the list of paths to explore during the flow analysis of the instruction graph, with a call to sp_head::add_mark_lead(). In addition, the flow analysis will stop if an instruction has already been marked as reachable, which the previous code failed to do in the recursive case. sp_head::opt_mark() is now private, to prevent new calls to this method from being introduced. The debug code present in sp_head::create() has been removed. Considering that SHOW PROCEDURE CODE is also available in debug builds, and can be used anytime regardless of the trace level, as opposed to "CREATE PROCEDURE" time and only if the trace was on, removing the code actually makes debugging easier (usable trace). Tests have been written to cover the parser overflow (big CASE), and to cover nested CASE statements.
2006-11-17 20:14:29 +01:00
virtual uint opt_mark(sp_head *sp, List<sp_instr> *leads);
virtual uint opt_shortcut_jump(sp_head *sp, sp_instr *start);
virtual void opt_move(uint dst, List<sp_instr> *ibp);
virtual void backpatch(uint dest, sp_pcontext *dst_ctx)
{
/* Calling backpatch twice is a logic flaw in jump resolution. */
DBUG_ASSERT(m_dest == 0);
m_dest= dest;
}
2007-10-16 22:11:50 +02:00
/**
Update the destination; used by the optimizer.
*/
virtual void set_destination(uint old_dest, uint new_dest)
{
if (m_dest == old_dest)
m_dest= new_dest;
}
}; // class sp_instr_jump : public sp_instr_opt_meta
class sp_instr_jump_if_not : public sp_instr_jump
{
2007-10-16 22:11:50 +02:00
sp_instr_jump_if_not(const sp_instr_jump_if_not &); /**< Prevent use of these */
void operator=(sp_instr_jump_if_not &);
public:
sp_instr_jump_if_not(uint ip, sp_pcontext *ctx, Item *i, LEX *lex)
: sp_instr_jump(ip, ctx), m_expr(i),
m_lex_keeper(lex, TRUE)
{}
sp_instr_jump_if_not(uint ip, sp_pcontext *ctx, Item *i, uint dest, LEX *lex)
: sp_instr_jump(ip, ctx, dest), m_expr(i),
m_lex_keeper(lex, TRUE)
{}
virtual ~sp_instr_jump_if_not()
{}
virtual int execute(THD *thd, uint *nextp);
virtual int exec_core(THD *thd, uint *nextp);
virtual void print(String *str);
Bug#19194 (Right recursion in parser for CASE causes excessive stack usage, limitation) Note to the reviewer ==================== Warning: reviewing this patch is somewhat involved. Due to the nature of several issues all affecting the same area, fixing separately each issue is not practical, since each fix can not be implemented and tested independently. In particular, the issues with - rule recursion - nested case statements - forward jump resolution (backpatch list) are tightly coupled (see below). Definitions =========== The expression CASE expr WHEN expr THEN expr WHEN expr THEN expr ... END is a "Simple Case Expression". The expression CASE WHEN expr THEN expr WHEN expr THEN expr ... END is a "Searched Case Expression". The statement CASE expr WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Simple Case Statement". The statement CASE WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Searched Case Statement". A "Left Recursive" rule is like list: element | list element ; A "Right Recursive" rule is like list: element | element list ; Left and right recursion produces the same language, the difference only affects the *order* in which the text is parsed. In a descendant parser (usually written manually), right recursion works very well, and is typically implemented with a while loop. In an ascendant parser (yacc/bison) left recursion works very well, and is implemented naturally by the parser stack. In both cases, using the wrong type or recursion is very bad and should be avoided, as it causes technical issues with the parser implementation. Before this change ================== The "Simple Case Expression" and "Searched Case Expression" were both implemented by the "when_list" and "when_list2" rules, which are left recursive (ok). These rules, however, used lex->when_list instead of using the parser stack, which is more complex that necessary, and potentially dangerous because of other rules using THD::reset_lex. The "Simple Case Statement" and "Searched Case Statements" were implemented by the "sp_case", "sp_whens" and in part by "sp_proc_stmt" rules. Both cases were right recursive (bad). The grammar involved was convoluted, and is assumed to be the results of tweaks to get the code generation to work, but is not what someone would naturally write. In addition, using a common rule for both "Simple" and "Searched" case statements was implemented with sp_head::m_flags |= IN_SIMPLE_CASE, which is a flag and not a stack, and therefore does not take into account *nested* case statements. This leads to incorrect generated code, and either a server crash or an incorrect result. With regards to the backpatch mechanism, a *different* backpatch list was created for each jump from "WHEN expr THEN stmt" to "END CASE", which relied on the grammar to be right recursive. This is a mis-use of the backpatch list, since this list can resolve multiple references to the same target at once. The optimizer algorithm used to detect dead code in the "assembly" SQL instructions, implemented by sp_head::opt_mark(uint ip), was recursive in some cases (a conditional jump pointing forward to another conditional jump). In case of specially crafted code, like - a long list of "IF expr THEN stmt END IF" - a long CASE statement this would actually cause a server crash with a stack overflow. In general, having a stack that grows proportionally with user data (the SQL code given by the client in a CREATE PROCEDURE) is to be avoided. In debug builds only, creating a SP / SF / Trigger which had a significant amount of code would spend --literally-- several minutes in sp_head::create, because of the debug code involved with DBUG_PRINT("info", ("Code %s ... There are several issues with this code: - in a CASE with 5 000 WHEN, there are 15 000 instructions generated, which create a sting representation of the code which is 500 000 bytes long, - using a String instead of an io stream causes performances to degrade to a total server freeze, as time is spent doing realloc of a buffer always too short, - Printing a 500 000 long string in the debug log is too verbose, - Generating this string even when DBUG_PRINT is off is useless, - Having code that potentially can affect the server behavior, used with #ifdef / #endif is useful in some cases, but is also a bad practice. After this change ================= "Case Expressions" (both simple and searched) have been simplified to not use LEX::when_list, which has been removed. Considering all the issues affecting case statements, the grammar for these has been totally re written. The existing actions, used to generate "assembly" sp_inst* code, have been preserved but moved in the new grammar, with the following changes: a) Bison rules are no longer shared between "Simple" and "Searched" case statements, because a stack instead of a flag is required to handle them. Nested statements are handled naturally by the parser stack, which by definition uses the correct rule in the correct context. Nested statements of the opposite type (simple vs searched) works correctly. The flag sp_head::IN_SIMPLE_CASE is no longer used. This is a step towards resolution of WL#2999, which correctly identified that temporary parsing flags do not belong to sp_head. The code in the action is shared by mean of the case_stmt_action_xxx() helpers. b) The backpatch mechanism, used to resolve forward jumps in the generated code, has been changed to: - create a label for the instruction following 'END CASE', - register each jump at the end of a "WHEN expr THEN stmt" in a *unique* backpatch list associated with the 'END CASE' label - resolve all the forward jumps for this label at once. In addition, the code involving backpatch has been commented, so that a reader can now understand by reading matching "Registering" and "Resolving" comments how the forward jumps are resolved and what target they resolve to, as this is far from evident when reading the code alone. The implementation of sp_head::opt_mark() has been revised to avoid recursive calls from jump instructions, and instead add the jump location to the list of paths to explore during the flow analysis of the instruction graph, with a call to sp_head::add_mark_lead(). In addition, the flow analysis will stop if an instruction has already been marked as reachable, which the previous code failed to do in the recursive case. sp_head::opt_mark() is now private, to prevent new calls to this method from being introduced. The debug code present in sp_head::create() has been removed. Considering that SHOW PROCEDURE CODE is also available in debug builds, and can be used anytime regardless of the trace level, as opposed to "CREATE PROCEDURE" time and only if the trace was on, removing the code actually makes debugging easier (usable trace). Tests have been written to cover the parser overflow (big CASE), and to cover nested CASE statements.
2006-11-17 20:14:29 +01:00
virtual uint opt_mark(sp_head *sp, List<sp_instr> *leads);
2007-10-16 22:11:50 +02:00
/** Override sp_instr_jump's shortcut; we stop here */
virtual uint opt_shortcut_jump(sp_head *sp, sp_instr *start)
{
return m_ip;
}
virtual void opt_move(uint dst, List<sp_instr> *ibp);
virtual void set_destination(uint old_dest, uint new_dest)
{
sp_instr_jump::set_destination(old_dest, new_dest);
if (m_cont_dest == old_dest)
m_cont_dest= new_dest;
}
private:
2007-10-16 22:11:50 +02:00
Item *m_expr; ///< The condition
sp_lex_keeper m_lex_keeper;
}; // class sp_instr_jump_if_not : public sp_instr_jump
class sp_instr_freturn : public sp_instr
{
2007-10-16 22:11:50 +02:00
sp_instr_freturn(const sp_instr_freturn &); /**< Prevent use of these */
void operator=(sp_instr_freturn &);
public:
sp_instr_freturn(uint ip, sp_pcontext *ctx,
Item *val, enum enum_field_types type_arg, LEX *lex)
: sp_instr(ip, ctx), m_value(val), m_type(type_arg),
m_lex_keeper(lex, TRUE)
{}
virtual ~sp_instr_freturn()
{}
virtual int execute(THD *thd, uint *nextp);
virtual int exec_core(THD *thd, uint *nextp);
virtual void print(String *str);
Bug#19194 (Right recursion in parser for CASE causes excessive stack usage, limitation) Note to the reviewer ==================== Warning: reviewing this patch is somewhat involved. Due to the nature of several issues all affecting the same area, fixing separately each issue is not practical, since each fix can not be implemented and tested independently. In particular, the issues with - rule recursion - nested case statements - forward jump resolution (backpatch list) are tightly coupled (see below). Definitions =========== The expression CASE expr WHEN expr THEN expr WHEN expr THEN expr ... END is a "Simple Case Expression". The expression CASE WHEN expr THEN expr WHEN expr THEN expr ... END is a "Searched Case Expression". The statement CASE expr WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Simple Case Statement". The statement CASE WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Searched Case Statement". A "Left Recursive" rule is like list: element | list element ; A "Right Recursive" rule is like list: element | element list ; Left and right recursion produces the same language, the difference only affects the *order* in which the text is parsed. In a descendant parser (usually written manually), right recursion works very well, and is typically implemented with a while loop. In an ascendant parser (yacc/bison) left recursion works very well, and is implemented naturally by the parser stack. In both cases, using the wrong type or recursion is very bad and should be avoided, as it causes technical issues with the parser implementation. Before this change ================== The "Simple Case Expression" and "Searched Case Expression" were both implemented by the "when_list" and "when_list2" rules, which are left recursive (ok). These rules, however, used lex->when_list instead of using the parser stack, which is more complex that necessary, and potentially dangerous because of other rules using THD::reset_lex. The "Simple Case Statement" and "Searched Case Statements" were implemented by the "sp_case", "sp_whens" and in part by "sp_proc_stmt" rules. Both cases were right recursive (bad). The grammar involved was convoluted, and is assumed to be the results of tweaks to get the code generation to work, but is not what someone would naturally write. In addition, using a common rule for both "Simple" and "Searched" case statements was implemented with sp_head::m_flags |= IN_SIMPLE_CASE, which is a flag and not a stack, and therefore does not take into account *nested* case statements. This leads to incorrect generated code, and either a server crash or an incorrect result. With regards to the backpatch mechanism, a *different* backpatch list was created for each jump from "WHEN expr THEN stmt" to "END CASE", which relied on the grammar to be right recursive. This is a mis-use of the backpatch list, since this list can resolve multiple references to the same target at once. The optimizer algorithm used to detect dead code in the "assembly" SQL instructions, implemented by sp_head::opt_mark(uint ip), was recursive in some cases (a conditional jump pointing forward to another conditional jump). In case of specially crafted code, like - a long list of "IF expr THEN stmt END IF" - a long CASE statement this would actually cause a server crash with a stack overflow. In general, having a stack that grows proportionally with user data (the SQL code given by the client in a CREATE PROCEDURE) is to be avoided. In debug builds only, creating a SP / SF / Trigger which had a significant amount of code would spend --literally-- several minutes in sp_head::create, because of the debug code involved with DBUG_PRINT("info", ("Code %s ... There are several issues with this code: - in a CASE with 5 000 WHEN, there are 15 000 instructions generated, which create a sting representation of the code which is 500 000 bytes long, - using a String instead of an io stream causes performances to degrade to a total server freeze, as time is spent doing realloc of a buffer always too short, - Printing a 500 000 long string in the debug log is too verbose, - Generating this string even when DBUG_PRINT is off is useless, - Having code that potentially can affect the server behavior, used with #ifdef / #endif is useful in some cases, but is also a bad practice. After this change ================= "Case Expressions" (both simple and searched) have been simplified to not use LEX::when_list, which has been removed. Considering all the issues affecting case statements, the grammar for these has been totally re written. The existing actions, used to generate "assembly" sp_inst* code, have been preserved but moved in the new grammar, with the following changes: a) Bison rules are no longer shared between "Simple" and "Searched" case statements, because a stack instead of a flag is required to handle them. Nested statements are handled naturally by the parser stack, which by definition uses the correct rule in the correct context. Nested statements of the opposite type (simple vs searched) works correctly. The flag sp_head::IN_SIMPLE_CASE is no longer used. This is a step towards resolution of WL#2999, which correctly identified that temporary parsing flags do not belong to sp_head. The code in the action is shared by mean of the case_stmt_action_xxx() helpers. b) The backpatch mechanism, used to resolve forward jumps in the generated code, has been changed to: - create a label for the instruction following 'END CASE', - register each jump at the end of a "WHEN expr THEN stmt" in a *unique* backpatch list associated with the 'END CASE' label - resolve all the forward jumps for this label at once. In addition, the code involving backpatch has been commented, so that a reader can now understand by reading matching "Registering" and "Resolving" comments how the forward jumps are resolved and what target they resolve to, as this is far from evident when reading the code alone. The implementation of sp_head::opt_mark() has been revised to avoid recursive calls from jump instructions, and instead add the jump location to the list of paths to explore during the flow analysis of the instruction graph, with a call to sp_head::add_mark_lead(). In addition, the flow analysis will stop if an instruction has already been marked as reachable, which the previous code failed to do in the recursive case. sp_head::opt_mark() is now private, to prevent new calls to this method from being introduced. The debug code present in sp_head::create() has been removed. Considering that SHOW PROCEDURE CODE is also available in debug builds, and can be used anytime regardless of the trace level, as opposed to "CREATE PROCEDURE" time and only if the trace was on, removing the code actually makes debugging easier (usable trace). Tests have been written to cover the parser overflow (big CASE), and to cover nested CASE statements.
2006-11-17 20:14:29 +01:00
virtual uint opt_mark(sp_head *sp, List<sp_instr> *leads)
{
marked= 1;
return UINT_MAX;
}
protected:
Item *m_value;
enum enum_field_types m_type;
sp_lex_keeper m_lex_keeper;
}; // class sp_instr_freturn : public sp_instr
class sp_instr_hpush_jump : public sp_instr_jump
{
2007-10-16 22:11:50 +02:00
sp_instr_hpush_jump(const sp_instr_hpush_jump &); /**< Prevent use of these */
void operator=(sp_instr_hpush_jump &);
public:
sp_instr_hpush_jump(uint ip, sp_pcontext *ctx, int htype, uint fp)
: sp_instr_jump(ip, ctx), m_type(htype), m_frame(fp)
{
m_cond.empty();
}
virtual ~sp_instr_hpush_jump()
{
m_cond.empty();
}
virtual int execute(THD *thd, uint *nextp);
virtual void print(String *str);
Bug#19194 (Right recursion in parser for CASE causes excessive stack usage, limitation) Note to the reviewer ==================== Warning: reviewing this patch is somewhat involved. Due to the nature of several issues all affecting the same area, fixing separately each issue is not practical, since each fix can not be implemented and tested independently. In particular, the issues with - rule recursion - nested case statements - forward jump resolution (backpatch list) are tightly coupled (see below). Definitions =========== The expression CASE expr WHEN expr THEN expr WHEN expr THEN expr ... END is a "Simple Case Expression". The expression CASE WHEN expr THEN expr WHEN expr THEN expr ... END is a "Searched Case Expression". The statement CASE expr WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Simple Case Statement". The statement CASE WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Searched Case Statement". A "Left Recursive" rule is like list: element | list element ; A "Right Recursive" rule is like list: element | element list ; Left and right recursion produces the same language, the difference only affects the *order* in which the text is parsed. In a descendant parser (usually written manually), right recursion works very well, and is typically implemented with a while loop. In an ascendant parser (yacc/bison) left recursion works very well, and is implemented naturally by the parser stack. In both cases, using the wrong type or recursion is very bad and should be avoided, as it causes technical issues with the parser implementation. Before this change ================== The "Simple Case Expression" and "Searched Case Expression" were both implemented by the "when_list" and "when_list2" rules, which are left recursive (ok). These rules, however, used lex->when_list instead of using the parser stack, which is more complex that necessary, and potentially dangerous because of other rules using THD::reset_lex. The "Simple Case Statement" and "Searched Case Statements" were implemented by the "sp_case", "sp_whens" and in part by "sp_proc_stmt" rules. Both cases were right recursive (bad). The grammar involved was convoluted, and is assumed to be the results of tweaks to get the code generation to work, but is not what someone would naturally write. In addition, using a common rule for both "Simple" and "Searched" case statements was implemented with sp_head::m_flags |= IN_SIMPLE_CASE, which is a flag and not a stack, and therefore does not take into account *nested* case statements. This leads to incorrect generated code, and either a server crash or an incorrect result. With regards to the backpatch mechanism, a *different* backpatch list was created for each jump from "WHEN expr THEN stmt" to "END CASE", which relied on the grammar to be right recursive. This is a mis-use of the backpatch list, since this list can resolve multiple references to the same target at once. The optimizer algorithm used to detect dead code in the "assembly" SQL instructions, implemented by sp_head::opt_mark(uint ip), was recursive in some cases (a conditional jump pointing forward to another conditional jump). In case of specially crafted code, like - a long list of "IF expr THEN stmt END IF" - a long CASE statement this would actually cause a server crash with a stack overflow. In general, having a stack that grows proportionally with user data (the SQL code given by the client in a CREATE PROCEDURE) is to be avoided. In debug builds only, creating a SP / SF / Trigger which had a significant amount of code would spend --literally-- several minutes in sp_head::create, because of the debug code involved with DBUG_PRINT("info", ("Code %s ... There are several issues with this code: - in a CASE with 5 000 WHEN, there are 15 000 instructions generated, which create a sting representation of the code which is 500 000 bytes long, - using a String instead of an io stream causes performances to degrade to a total server freeze, as time is spent doing realloc of a buffer always too short, - Printing a 500 000 long string in the debug log is too verbose, - Generating this string even when DBUG_PRINT is off is useless, - Having code that potentially can affect the server behavior, used with #ifdef / #endif is useful in some cases, but is also a bad practice. After this change ================= "Case Expressions" (both simple and searched) have been simplified to not use LEX::when_list, which has been removed. Considering all the issues affecting case statements, the grammar for these has been totally re written. The existing actions, used to generate "assembly" sp_inst* code, have been preserved but moved in the new grammar, with the following changes: a) Bison rules are no longer shared between "Simple" and "Searched" case statements, because a stack instead of a flag is required to handle them. Nested statements are handled naturally by the parser stack, which by definition uses the correct rule in the correct context. Nested statements of the opposite type (simple vs searched) works correctly. The flag sp_head::IN_SIMPLE_CASE is no longer used. This is a step towards resolution of WL#2999, which correctly identified that temporary parsing flags do not belong to sp_head. The code in the action is shared by mean of the case_stmt_action_xxx() helpers. b) The backpatch mechanism, used to resolve forward jumps in the generated code, has been changed to: - create a label for the instruction following 'END CASE', - register each jump at the end of a "WHEN expr THEN stmt" in a *unique* backpatch list associated with the 'END CASE' label - resolve all the forward jumps for this label at once. In addition, the code involving backpatch has been commented, so that a reader can now understand by reading matching "Registering" and "Resolving" comments how the forward jumps are resolved and what target they resolve to, as this is far from evident when reading the code alone. The implementation of sp_head::opt_mark() has been revised to avoid recursive calls from jump instructions, and instead add the jump location to the list of paths to explore during the flow analysis of the instruction graph, with a call to sp_head::add_mark_lead(). In addition, the flow analysis will stop if an instruction has already been marked as reachable, which the previous code failed to do in the recursive case. sp_head::opt_mark() is now private, to prevent new calls to this method from being introduced. The debug code present in sp_head::create() has been removed. Considering that SHOW PROCEDURE CODE is also available in debug builds, and can be used anytime regardless of the trace level, as opposed to "CREATE PROCEDURE" time and only if the trace was on, removing the code actually makes debugging easier (usable trace). Tests have been written to cover the parser overflow (big CASE), and to cover nested CASE statements.
2006-11-17 20:14:29 +01:00
virtual uint opt_mark(sp_head *sp, List<sp_instr> *leads);
2007-10-16 22:11:50 +02:00
/** Override sp_instr_jump's shortcut; we stop here. */
virtual uint opt_shortcut_jump(sp_head *sp, sp_instr *start)
{
return m_ip;
}
inline void add_condition(struct sp_cond_type *cond)
{
m_cond.push_front(cond);
}
private:
2007-10-16 22:11:50 +02:00
int m_type; ///< Handler type
uint m_frame;
List<struct sp_cond_type> m_cond;
}; // class sp_instr_hpush_jump : public sp_instr_jump
class sp_instr_hpop : public sp_instr
{
2007-10-16 22:11:50 +02:00
sp_instr_hpop(const sp_instr_hpop &); /**< Prevent use of these */
void operator=(sp_instr_hpop &);
public:
sp_instr_hpop(uint ip, sp_pcontext *ctx, uint count)
: sp_instr(ip, ctx), m_count(count)
{}
virtual ~sp_instr_hpop()
{}
virtual int execute(THD *thd, uint *nextp);
virtual void print(String *str);
private:
uint m_count;
}; // class sp_instr_hpop : public sp_instr
class sp_instr_hreturn : public sp_instr_jump
{
2007-10-16 22:11:50 +02:00
sp_instr_hreturn(const sp_instr_hreturn &); /**< Prevent use of these */
void operator=(sp_instr_hreturn &);
public:
sp_instr_hreturn(uint ip, sp_pcontext *ctx, uint fp)
: sp_instr_jump(ip, ctx), m_frame(fp)
{}
virtual ~sp_instr_hreturn()
{}
virtual int execute(THD *thd, uint *nextp);
virtual void print(String *str);
/* This instruction will not be short cut optimized. */
virtual uint opt_shortcut_jump(sp_head *sp, sp_instr *start)
{
return m_ip;
}
Bug#19194 (Right recursion in parser for CASE causes excessive stack usage, limitation) Note to the reviewer ==================== Warning: reviewing this patch is somewhat involved. Due to the nature of several issues all affecting the same area, fixing separately each issue is not practical, since each fix can not be implemented and tested independently. In particular, the issues with - rule recursion - nested case statements - forward jump resolution (backpatch list) are tightly coupled (see below). Definitions =========== The expression CASE expr WHEN expr THEN expr WHEN expr THEN expr ... END is a "Simple Case Expression". The expression CASE WHEN expr THEN expr WHEN expr THEN expr ... END is a "Searched Case Expression". The statement CASE expr WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Simple Case Statement". The statement CASE WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Searched Case Statement". A "Left Recursive" rule is like list: element | list element ; A "Right Recursive" rule is like list: element | element list ; Left and right recursion produces the same language, the difference only affects the *order* in which the text is parsed. In a descendant parser (usually written manually), right recursion works very well, and is typically implemented with a while loop. In an ascendant parser (yacc/bison) left recursion works very well, and is implemented naturally by the parser stack. In both cases, using the wrong type or recursion is very bad and should be avoided, as it causes technical issues with the parser implementation. Before this change ================== The "Simple Case Expression" and "Searched Case Expression" were both implemented by the "when_list" and "when_list2" rules, which are left recursive (ok). These rules, however, used lex->when_list instead of using the parser stack, which is more complex that necessary, and potentially dangerous because of other rules using THD::reset_lex. The "Simple Case Statement" and "Searched Case Statements" were implemented by the "sp_case", "sp_whens" and in part by "sp_proc_stmt" rules. Both cases were right recursive (bad). The grammar involved was convoluted, and is assumed to be the results of tweaks to get the code generation to work, but is not what someone would naturally write. In addition, using a common rule for both "Simple" and "Searched" case statements was implemented with sp_head::m_flags |= IN_SIMPLE_CASE, which is a flag and not a stack, and therefore does not take into account *nested* case statements. This leads to incorrect generated code, and either a server crash or an incorrect result. With regards to the backpatch mechanism, a *different* backpatch list was created for each jump from "WHEN expr THEN stmt" to "END CASE", which relied on the grammar to be right recursive. This is a mis-use of the backpatch list, since this list can resolve multiple references to the same target at once. The optimizer algorithm used to detect dead code in the "assembly" SQL instructions, implemented by sp_head::opt_mark(uint ip), was recursive in some cases (a conditional jump pointing forward to another conditional jump). In case of specially crafted code, like - a long list of "IF expr THEN stmt END IF" - a long CASE statement this would actually cause a server crash with a stack overflow. In general, having a stack that grows proportionally with user data (the SQL code given by the client in a CREATE PROCEDURE) is to be avoided. In debug builds only, creating a SP / SF / Trigger which had a significant amount of code would spend --literally-- several minutes in sp_head::create, because of the debug code involved with DBUG_PRINT("info", ("Code %s ... There are several issues with this code: - in a CASE with 5 000 WHEN, there are 15 000 instructions generated, which create a sting representation of the code which is 500 000 bytes long, - using a String instead of an io stream causes performances to degrade to a total server freeze, as time is spent doing realloc of a buffer always too short, - Printing a 500 000 long string in the debug log is too verbose, - Generating this string even when DBUG_PRINT is off is useless, - Having code that potentially can affect the server behavior, used with #ifdef / #endif is useful in some cases, but is also a bad practice. After this change ================= "Case Expressions" (both simple and searched) have been simplified to not use LEX::when_list, which has been removed. Considering all the issues affecting case statements, the grammar for these has been totally re written. The existing actions, used to generate "assembly" sp_inst* code, have been preserved but moved in the new grammar, with the following changes: a) Bison rules are no longer shared between "Simple" and "Searched" case statements, because a stack instead of a flag is required to handle them. Nested statements are handled naturally by the parser stack, which by definition uses the correct rule in the correct context. Nested statements of the opposite type (simple vs searched) works correctly. The flag sp_head::IN_SIMPLE_CASE is no longer used. This is a step towards resolution of WL#2999, which correctly identified that temporary parsing flags do not belong to sp_head. The code in the action is shared by mean of the case_stmt_action_xxx() helpers. b) The backpatch mechanism, used to resolve forward jumps in the generated code, has been changed to: - create a label for the instruction following 'END CASE', - register each jump at the end of a "WHEN expr THEN stmt" in a *unique* backpatch list associated with the 'END CASE' label - resolve all the forward jumps for this label at once. In addition, the code involving backpatch has been commented, so that a reader can now understand by reading matching "Registering" and "Resolving" comments how the forward jumps are resolved and what target they resolve to, as this is far from evident when reading the code alone. The implementation of sp_head::opt_mark() has been revised to avoid recursive calls from jump instructions, and instead add the jump location to the list of paths to explore during the flow analysis of the instruction graph, with a call to sp_head::add_mark_lead(). In addition, the flow analysis will stop if an instruction has already been marked as reachable, which the previous code failed to do in the recursive case. sp_head::opt_mark() is now private, to prevent new calls to this method from being introduced. The debug code present in sp_head::create() has been removed. Considering that SHOW PROCEDURE CODE is also available in debug builds, and can be used anytime regardless of the trace level, as opposed to "CREATE PROCEDURE" time and only if the trace was on, removing the code actually makes debugging easier (usable trace). Tests have been written to cover the parser overflow (big CASE), and to cover nested CASE statements.
2006-11-17 20:14:29 +01:00
virtual uint opt_mark(sp_head *sp, List<sp_instr> *leads);
private:
uint m_frame;
}; // class sp_instr_hreturn : public sp_instr_jump
2007-10-16 22:11:50 +02:00
/** This is DECLARE CURSOR */
class sp_instr_cpush : public sp_instr
{
2007-10-16 22:11:50 +02:00
sp_instr_cpush(const sp_instr_cpush &); /**< Prevent use of these */
void operator=(sp_instr_cpush &);
public:
sp_instr_cpush(uint ip, sp_pcontext *ctx, LEX *lex, uint offset)
: sp_instr(ip, ctx), m_lex_keeper(lex, TRUE), m_cursor(offset)
{}
virtual ~sp_instr_cpush()
{}
virtual int execute(THD *thd, uint *nextp);
virtual void print(String *str);
2007-10-16 22:11:50 +02:00
/**
This call is used to cleanup the instruction when a sensitive
cursor is closed. For now stored procedures always use materialized
cursors and the call is not used.
*/
virtual void cleanup_stmt() { /* no op */ }
private:
sp_lex_keeper m_lex_keeper;
2007-10-16 22:11:50 +02:00
uint m_cursor; /**< Frame offset (for debugging) */
}; // class sp_instr_cpush : public sp_instr
class sp_instr_cpop : public sp_instr
{
2007-10-16 22:11:50 +02:00
sp_instr_cpop(const sp_instr_cpop &); /**< Prevent use of these */
void operator=(sp_instr_cpop &);
public:
sp_instr_cpop(uint ip, sp_pcontext *ctx, uint count)
: sp_instr(ip, ctx), m_count(count)
{}
virtual ~sp_instr_cpop()
{}
virtual int execute(THD *thd, uint *nextp);
virtual void print(String *str);
private:
uint m_count;
}; // class sp_instr_cpop : public sp_instr
class sp_instr_copen : public sp_instr
{
2007-10-16 22:11:50 +02:00
sp_instr_copen(const sp_instr_copen &); /**< Prevent use of these */
void operator=(sp_instr_copen &);
public:
sp_instr_copen(uint ip, sp_pcontext *ctx, uint c)
: sp_instr(ip, ctx), m_cursor(c)
{}
virtual ~sp_instr_copen()
{}
virtual int execute(THD *thd, uint *nextp);
virtual int exec_core(THD *thd, uint *nextp);
virtual void print(String *str);
private:
2007-10-16 22:11:50 +02:00
uint m_cursor; ///< Stack index
}; // class sp_instr_copen : public sp_instr_stmt
class sp_instr_cclose : public sp_instr
{
2007-10-16 22:11:50 +02:00
sp_instr_cclose(const sp_instr_cclose &); /**< Prevent use of these */
void operator=(sp_instr_cclose &);
public:
sp_instr_cclose(uint ip, sp_pcontext *ctx, uint c)
: sp_instr(ip, ctx), m_cursor(c)
{}
virtual ~sp_instr_cclose()
{}
virtual int execute(THD *thd, uint *nextp);
virtual void print(String *str);
private:
uint m_cursor;
}; // class sp_instr_cclose : public sp_instr
class sp_instr_cfetch : public sp_instr
{
2007-10-16 22:11:50 +02:00
sp_instr_cfetch(const sp_instr_cfetch &); /**< Prevent use of these */
void operator=(sp_instr_cfetch &);
public:
sp_instr_cfetch(uint ip, sp_pcontext *ctx, uint c)
: sp_instr(ip, ctx), m_cursor(c)
{
m_varlist.empty();
}
virtual ~sp_instr_cfetch()
{}
virtual int execute(THD *thd, uint *nextp);
virtual void print(String *str);
void add_to_varlist(struct sp_variable *var)
{
m_varlist.push_back(var);
}
private:
uint m_cursor;
List<struct sp_variable> m_varlist;
}; // class sp_instr_cfetch : public sp_instr
class sp_instr_error : public sp_instr
{
2007-10-16 22:11:50 +02:00
sp_instr_error(const sp_instr_error &); /**< Prevent use of these */
void operator=(sp_instr_error &);
public:
sp_instr_error(uint ip, sp_pcontext *ctx, int errcode)
: sp_instr(ip, ctx), m_errcode(errcode)
{}
virtual ~sp_instr_error()
{}
virtual int execute(THD *thd, uint *nextp);
virtual void print(String *str);
Bug#19194 (Right recursion in parser for CASE causes excessive stack usage, limitation) Note to the reviewer ==================== Warning: reviewing this patch is somewhat involved. Due to the nature of several issues all affecting the same area, fixing separately each issue is not practical, since each fix can not be implemented and tested independently. In particular, the issues with - rule recursion - nested case statements - forward jump resolution (backpatch list) are tightly coupled (see below). Definitions =========== The expression CASE expr WHEN expr THEN expr WHEN expr THEN expr ... END is a "Simple Case Expression". The expression CASE WHEN expr THEN expr WHEN expr THEN expr ... END is a "Searched Case Expression". The statement CASE expr WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Simple Case Statement". The statement CASE WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Searched Case Statement". A "Left Recursive" rule is like list: element | list element ; A "Right Recursive" rule is like list: element | element list ; Left and right recursion produces the same language, the difference only affects the *order* in which the text is parsed. In a descendant parser (usually written manually), right recursion works very well, and is typically implemented with a while loop. In an ascendant parser (yacc/bison) left recursion works very well, and is implemented naturally by the parser stack. In both cases, using the wrong type or recursion is very bad and should be avoided, as it causes technical issues with the parser implementation. Before this change ================== The "Simple Case Expression" and "Searched Case Expression" were both implemented by the "when_list" and "when_list2" rules, which are left recursive (ok). These rules, however, used lex->when_list instead of using the parser stack, which is more complex that necessary, and potentially dangerous because of other rules using THD::reset_lex. The "Simple Case Statement" and "Searched Case Statements" were implemented by the "sp_case", "sp_whens" and in part by "sp_proc_stmt" rules. Both cases were right recursive (bad). The grammar involved was convoluted, and is assumed to be the results of tweaks to get the code generation to work, but is not what someone would naturally write. In addition, using a common rule for both "Simple" and "Searched" case statements was implemented with sp_head::m_flags |= IN_SIMPLE_CASE, which is a flag and not a stack, and therefore does not take into account *nested* case statements. This leads to incorrect generated code, and either a server crash or an incorrect result. With regards to the backpatch mechanism, a *different* backpatch list was created for each jump from "WHEN expr THEN stmt" to "END CASE", which relied on the grammar to be right recursive. This is a mis-use of the backpatch list, since this list can resolve multiple references to the same target at once. The optimizer algorithm used to detect dead code in the "assembly" SQL instructions, implemented by sp_head::opt_mark(uint ip), was recursive in some cases (a conditional jump pointing forward to another conditional jump). In case of specially crafted code, like - a long list of "IF expr THEN stmt END IF" - a long CASE statement this would actually cause a server crash with a stack overflow. In general, having a stack that grows proportionally with user data (the SQL code given by the client in a CREATE PROCEDURE) is to be avoided. In debug builds only, creating a SP / SF / Trigger which had a significant amount of code would spend --literally-- several minutes in sp_head::create, because of the debug code involved with DBUG_PRINT("info", ("Code %s ... There are several issues with this code: - in a CASE with 5 000 WHEN, there are 15 000 instructions generated, which create a sting representation of the code which is 500 000 bytes long, - using a String instead of an io stream causes performances to degrade to a total server freeze, as time is spent doing realloc of a buffer always too short, - Printing a 500 000 long string in the debug log is too verbose, - Generating this string even when DBUG_PRINT is off is useless, - Having code that potentially can affect the server behavior, used with #ifdef / #endif is useful in some cases, but is also a bad practice. After this change ================= "Case Expressions" (both simple and searched) have been simplified to not use LEX::when_list, which has been removed. Considering all the issues affecting case statements, the grammar for these has been totally re written. The existing actions, used to generate "assembly" sp_inst* code, have been preserved but moved in the new grammar, with the following changes: a) Bison rules are no longer shared between "Simple" and "Searched" case statements, because a stack instead of a flag is required to handle them. Nested statements are handled naturally by the parser stack, which by definition uses the correct rule in the correct context. Nested statements of the opposite type (simple vs searched) works correctly. The flag sp_head::IN_SIMPLE_CASE is no longer used. This is a step towards resolution of WL#2999, which correctly identified that temporary parsing flags do not belong to sp_head. The code in the action is shared by mean of the case_stmt_action_xxx() helpers. b) The backpatch mechanism, used to resolve forward jumps in the generated code, has been changed to: - create a label for the instruction following 'END CASE', - register each jump at the end of a "WHEN expr THEN stmt" in a *unique* backpatch list associated with the 'END CASE' label - resolve all the forward jumps for this label at once. In addition, the code involving backpatch has been commented, so that a reader can now understand by reading matching "Registering" and "Resolving" comments how the forward jumps are resolved and what target they resolve to, as this is far from evident when reading the code alone. The implementation of sp_head::opt_mark() has been revised to avoid recursive calls from jump instructions, and instead add the jump location to the list of paths to explore during the flow analysis of the instruction graph, with a call to sp_head::add_mark_lead(). In addition, the flow analysis will stop if an instruction has already been marked as reachable, which the previous code failed to do in the recursive case. sp_head::opt_mark() is now private, to prevent new calls to this method from being introduced. The debug code present in sp_head::create() has been removed. Considering that SHOW PROCEDURE CODE is also available in debug builds, and can be used anytime regardless of the trace level, as opposed to "CREATE PROCEDURE" time and only if the trace was on, removing the code actually makes debugging easier (usable trace). Tests have been written to cover the parser overflow (big CASE), and to cover nested CASE statements.
2006-11-17 20:14:29 +01:00
virtual uint opt_mark(sp_head *sp, List<sp_instr> *leads)
{
marked= 1;
return UINT_MAX;
}
private:
int m_errcode;
}; // class sp_instr_error : public sp_instr
class sp_instr_set_case_expr : public sp_instr_opt_meta
{
public:
sp_instr_set_case_expr(uint ip, sp_pcontext *ctx, uint case_expr_id,
Item *case_expr, LEX *lex)
: sp_instr_opt_meta(ip, ctx),
m_case_expr_id(case_expr_id), m_case_expr(case_expr),
m_lex_keeper(lex, TRUE)
{}
virtual ~sp_instr_set_case_expr()
{}
virtual int execute(THD *thd, uint *nextp);
virtual int exec_core(THD *thd, uint *nextp);
virtual void print(String *str);
Bug#19194 (Right recursion in parser for CASE causes excessive stack usage, limitation) Note to the reviewer ==================== Warning: reviewing this patch is somewhat involved. Due to the nature of several issues all affecting the same area, fixing separately each issue is not practical, since each fix can not be implemented and tested independently. In particular, the issues with - rule recursion - nested case statements - forward jump resolution (backpatch list) are tightly coupled (see below). Definitions =========== The expression CASE expr WHEN expr THEN expr WHEN expr THEN expr ... END is a "Simple Case Expression". The expression CASE WHEN expr THEN expr WHEN expr THEN expr ... END is a "Searched Case Expression". The statement CASE expr WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Simple Case Statement". The statement CASE WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Searched Case Statement". A "Left Recursive" rule is like list: element | list element ; A "Right Recursive" rule is like list: element | element list ; Left and right recursion produces the same language, the difference only affects the *order* in which the text is parsed. In a descendant parser (usually written manually), right recursion works very well, and is typically implemented with a while loop. In an ascendant parser (yacc/bison) left recursion works very well, and is implemented naturally by the parser stack. In both cases, using the wrong type or recursion is very bad and should be avoided, as it causes technical issues with the parser implementation. Before this change ================== The "Simple Case Expression" and "Searched Case Expression" were both implemented by the "when_list" and "when_list2" rules, which are left recursive (ok). These rules, however, used lex->when_list instead of using the parser stack, which is more complex that necessary, and potentially dangerous because of other rules using THD::reset_lex. The "Simple Case Statement" and "Searched Case Statements" were implemented by the "sp_case", "sp_whens" and in part by "sp_proc_stmt" rules. Both cases were right recursive (bad). The grammar involved was convoluted, and is assumed to be the results of tweaks to get the code generation to work, but is not what someone would naturally write. In addition, using a common rule for both "Simple" and "Searched" case statements was implemented with sp_head::m_flags |= IN_SIMPLE_CASE, which is a flag and not a stack, and therefore does not take into account *nested* case statements. This leads to incorrect generated code, and either a server crash or an incorrect result. With regards to the backpatch mechanism, a *different* backpatch list was created for each jump from "WHEN expr THEN stmt" to "END CASE", which relied on the grammar to be right recursive. This is a mis-use of the backpatch list, since this list can resolve multiple references to the same target at once. The optimizer algorithm used to detect dead code in the "assembly" SQL instructions, implemented by sp_head::opt_mark(uint ip), was recursive in some cases (a conditional jump pointing forward to another conditional jump). In case of specially crafted code, like - a long list of "IF expr THEN stmt END IF" - a long CASE statement this would actually cause a server crash with a stack overflow. In general, having a stack that grows proportionally with user data (the SQL code given by the client in a CREATE PROCEDURE) is to be avoided. In debug builds only, creating a SP / SF / Trigger which had a significant amount of code would spend --literally-- several minutes in sp_head::create, because of the debug code involved with DBUG_PRINT("info", ("Code %s ... There are several issues with this code: - in a CASE with 5 000 WHEN, there are 15 000 instructions generated, which create a sting representation of the code which is 500 000 bytes long, - using a String instead of an io stream causes performances to degrade to a total server freeze, as time is spent doing realloc of a buffer always too short, - Printing a 500 000 long string in the debug log is too verbose, - Generating this string even when DBUG_PRINT is off is useless, - Having code that potentially can affect the server behavior, used with #ifdef / #endif is useful in some cases, but is also a bad practice. After this change ================= "Case Expressions" (both simple and searched) have been simplified to not use LEX::when_list, which has been removed. Considering all the issues affecting case statements, the grammar for these has been totally re written. The existing actions, used to generate "assembly" sp_inst* code, have been preserved but moved in the new grammar, with the following changes: a) Bison rules are no longer shared between "Simple" and "Searched" case statements, because a stack instead of a flag is required to handle them. Nested statements are handled naturally by the parser stack, which by definition uses the correct rule in the correct context. Nested statements of the opposite type (simple vs searched) works correctly. The flag sp_head::IN_SIMPLE_CASE is no longer used. This is a step towards resolution of WL#2999, which correctly identified that temporary parsing flags do not belong to sp_head. The code in the action is shared by mean of the case_stmt_action_xxx() helpers. b) The backpatch mechanism, used to resolve forward jumps in the generated code, has been changed to: - create a label for the instruction following 'END CASE', - register each jump at the end of a "WHEN expr THEN stmt" in a *unique* backpatch list associated with the 'END CASE' label - resolve all the forward jumps for this label at once. In addition, the code involving backpatch has been commented, so that a reader can now understand by reading matching "Registering" and "Resolving" comments how the forward jumps are resolved and what target they resolve to, as this is far from evident when reading the code alone. The implementation of sp_head::opt_mark() has been revised to avoid recursive calls from jump instructions, and instead add the jump location to the list of paths to explore during the flow analysis of the instruction graph, with a call to sp_head::add_mark_lead(). In addition, the flow analysis will stop if an instruction has already been marked as reachable, which the previous code failed to do in the recursive case. sp_head::opt_mark() is now private, to prevent new calls to this method from being introduced. The debug code present in sp_head::create() has been removed. Considering that SHOW PROCEDURE CODE is also available in debug builds, and can be used anytime regardless of the trace level, as opposed to "CREATE PROCEDURE" time and only if the trace was on, removing the code actually makes debugging easier (usable trace). Tests have been written to cover the parser overflow (big CASE), and to cover nested CASE statements.
2006-11-17 20:14:29 +01:00
virtual uint opt_mark(sp_head *sp, List<sp_instr> *leads);
virtual void opt_move(uint dst, List<sp_instr> *ibp);
virtual void set_destination(uint old_dest, uint new_dest)
{
if (m_cont_dest == old_dest)
m_cont_dest= new_dest;
}
private:
uint m_case_expr_id;
Item *m_case_expr;
sp_lex_keeper m_lex_keeper;
}; // class sp_instr_set_case_expr : public sp_instr_opt_meta
#ifndef NO_EMBEDDED_ACCESS_CHECKS
bool
sp_change_security_context(THD *thd, sp_head *sp,
Security_context **backup);
void
sp_restore_security_context(THD *thd, Security_context *backup);
bool
set_routine_security_ctx(THD *thd, sp_head *sp, bool is_proc,
Security_context **save_ctx);
#endif /* NO_EMBEDDED_ACCESS_CHECKS */
TABLE_LIST *
sp_add_to_query_tables(THD *thd, LEX *lex,
const char *db, const char *name,
thr_lock_type locktype);
Item *
sp_prepare_func_item(THD* thd, Item **it_addr);
bool
sp_eval_expr(THD *thd, Field *result_field, Item **expr_item_ptr);
/**
@} (end of group Stored_Routines)
*/
#endif /* _SP_HEAD_H_ */