mariadb/sql/sp_head.cc

3906 lines
100 KiB
C++
Raw Normal View History

/* Copyright (C) 2002 MySQL AB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
#include "mysql_priv.h"
#ifdef USE_PRAGMA_IMPLEMENTATION
#pragma implementation
#endif
#include "sp_head.h"
#include "sp.h"
#include "sp_pcontext.h"
#include "sp_rcontext.h"
#include "sp_cache.h"
/*
Sufficient max length of printed destinations and frame offsets (all uints).
*/
#define SP_INSTR_UINT_MAXLEN 8
#define SP_STMT_PRINT_MAXLEN 40
#include <my_user.h>
extern "C" uchar *sp_table_key(const uchar *ptr, size_t *plen, my_bool first);
Item_result
sp_map_result_type(enum enum_field_types type)
{
switch (type) {
case MYSQL_TYPE_BIT:
case MYSQL_TYPE_TINY:
case MYSQL_TYPE_SHORT:
case MYSQL_TYPE_LONG:
case MYSQL_TYPE_LONGLONG:
case MYSQL_TYPE_INT24:
return INT_RESULT;
case MYSQL_TYPE_DECIMAL:
2005-02-09 02:50:45 +04:00
case MYSQL_TYPE_NEWDECIMAL:
return DECIMAL_RESULT;
case MYSQL_TYPE_FLOAT:
case MYSQL_TYPE_DOUBLE:
return REAL_RESULT;
default:
return STRING_RESULT;
}
}
Item::Type
sp_map_item_type(enum enum_field_types type)
{
switch (type) {
case MYSQL_TYPE_BIT:
case MYSQL_TYPE_TINY:
case MYSQL_TYPE_SHORT:
case MYSQL_TYPE_LONG:
case MYSQL_TYPE_LONGLONG:
case MYSQL_TYPE_INT24:
return Item::INT_ITEM;
case MYSQL_TYPE_DECIMAL:
case MYSQL_TYPE_NEWDECIMAL:
return Item::DECIMAL_ITEM;
case MYSQL_TYPE_FLOAT:
case MYSQL_TYPE_DOUBLE:
return Item::REAL_ITEM;
default:
return Item::STRING_ITEM;
}
}
/*
Return a string representation of the Item value.
NOTE: If the item has a string result type, the string is escaped
according to its character set.
SYNOPSIS
item a pointer to the Item
str string buffer for representation of the value
RETURN
NULL on error
a pointer to valid a valid string on success
*/
static String *
sp_get_item_value(THD *thd, Item *item, String *str)
{
switch (item->result_type()) {
case REAL_RESULT:
case INT_RESULT:
case DECIMAL_RESULT:
if (item->field_type() != MYSQL_TYPE_BIT)
return item->val_str(str);
else {/* Bit type is handled as binary string */}
case STRING_RESULT:
{
String *result= item->val_str(str);
if (!result)
return NULL;
{
char buf_holder[STRING_BUFFER_USUAL_SIZE];
String buf(buf_holder, sizeof(buf_holder), result->charset());
CHARSET_INFO *cs= thd->variables.character_set_client;
/* We must reset length of the buffer, because of String specificity. */
buf.length(0);
buf.append('_');
buf.append(result->charset()->csname);
if (cs->escape_with_backslash_is_dangerous)
buf.append(' ');
append_query_string(cs, result, &buf);
str->copy(buf);
return str;
}
}
case ROW_RESULT:
default:
return NULL;
}
}
/*
SYNOPSIS
sp_get_flags_for_command()
DESCRIPTION
Returns a combination of:
* sp_head::MULTI_RESULTS: added if the 'cmd' is a command that might
result in multiple result sets being sent back.
* sp_head::CONTAINS_DYNAMIC_SQL: added if 'cmd' is one of PREPARE,
EXECUTE, DEALLOCATE.
*/
uint
sp_get_flags_for_command(LEX *lex)
{
uint flags;
switch (lex->sql_command) {
case SQLCOM_SELECT:
if (lex->result)
{
flags= 0; /* This is a SELECT with INTO clause */
break;
}
/* fallthrough */
case SQLCOM_ANALYZE:
case SQLCOM_BACKUP_TABLE:
case SQLCOM_OPTIMIZE:
case SQLCOM_PRELOAD_KEYS:
case SQLCOM_ASSIGN_TO_KEYCACHE:
case SQLCOM_CHECKSUM:
case SQLCOM_CHECK:
case SQLCOM_HA_READ:
case SQLCOM_SHOW_AUTHORS:
case SQLCOM_SHOW_BINLOGS:
case SQLCOM_SHOW_BINLOG_EVENTS:
case SQLCOM_SHOW_CHARSETS:
case SQLCOM_SHOW_COLLATIONS:
case SQLCOM_SHOW_COLUMN_TYPES:
case SQLCOM_SHOW_CONTRIBUTORS:
case SQLCOM_SHOW_CREATE:
case SQLCOM_SHOW_CREATE_DB:
case SQLCOM_SHOW_CREATE_FUNC:
case SQLCOM_SHOW_CREATE_PROC:
case SQLCOM_SHOW_CREATE_EVENT:
Patch for the following bugs: - BUG#11986: Stored routines and triggers can fail if the code has a non-ascii symbol - BUG#16291: mysqldump corrupts string-constants with non-ascii-chars - BUG#19443: INFORMATION_SCHEMA does not support charsets properly - BUG#21249: Character set of SP-var can be ignored - BUG#25212: Character set of string constant is ignored (stored routines) - BUG#25221: Character set of string constant is ignored (triggers) There were a few general problems that caused these bugs: 1. Character set information of the original (definition) query for views, triggers, stored routines and events was lost. 2. mysqldump output query in client character set, which can be inappropriate to encode definition-query. 3. INFORMATION_SCHEMA used strings with mixed encodings to display object definition; 1. No query-definition-character set. In order to compile query into execution code, some extra data (such as environment variables or the database character set) is used. The problem here was that this context was not preserved. So, on the next load it can differ from the original one, thus the result will be different. The context contains the following data: - client character set; - connection collation (character set and collation); - collation of the owner database; The fix is to store this context and use it each time we parse (compile) and execute the object (stored routine, trigger, ...). 2. Wrong mysqldump-output. The original query can contain several encodings (by means of character set introducers). The problem here was that we tried to convert original query to the mysqldump-client character set. Moreover, we stored queries in different character sets for different objects (views, for one, used UTF8, triggers used original character set). The solution is - to store definition queries in the original character set; - to change SHOW CREATE statement to output definition query in the binary character set (i.e. without any conversion); - introduce SHOW CREATE TRIGGER statement; - to dump special statements to switch the context to the original one before dumping and restore it afterwards. Note, in order to preserve the database collation at the creation time, additional ALTER DATABASE might be used (to temporary switch the database collation back to the original value). In this case, ALTER DATABASE privilege will be required. This is a backward-incompatible change. 3. INFORMATION_SCHEMA showed non-UTF8 strings The fix is to generate UTF8-query during the parsing, store it in the object and show it in the INFORMATION_SCHEMA. Basically, the idea is to create a copy of the original query convert it to UTF8. Character set introducers are removed and all text literals are converted to UTF8. This UTF8 query is intended to provide user-readable output. It must not be used to recreate the object. Specialized SHOW CREATE statements should be used for this. The reason for this limitation is the following: the original query can contain symbols from several character sets (by means of character set introducers). Example: - original query: CREATE VIEW v1 AS SELECT _cp1251 'Hello' AS c1; - UTF8 query (for INFORMATION_SCHEMA): CREATE VIEW v1 AS SELECT 'Hello' AS c1;
2007-06-28 21:34:54 +04:00
case SQLCOM_SHOW_CREATE_TRIGGER:
case SQLCOM_SHOW_DATABASES:
case SQLCOM_SHOW_ERRORS:
case SQLCOM_SHOW_FIELDS:
case SQLCOM_SHOW_FUNC_CODE:
case SQLCOM_SHOW_GRANTS:
case SQLCOM_SHOW_ENGINE_STATUS:
case SQLCOM_SHOW_ENGINE_LOGS:
case SQLCOM_SHOW_ENGINE_MUTEX:
case SQLCOM_SHOW_EVENTS:
case SQLCOM_SHOW_KEYS:
case SQLCOM_SHOW_MASTER_STAT:
case SQLCOM_SHOW_NEW_MASTER:
case SQLCOM_SHOW_OPEN_TABLES:
case SQLCOM_SHOW_PRIVILEGES:
case SQLCOM_SHOW_PROCESSLIST:
case SQLCOM_SHOW_PROC_CODE:
case SQLCOM_SHOW_SLAVE_HOSTS:
case SQLCOM_SHOW_SLAVE_STAT:
case SQLCOM_SHOW_STATUS:
case SQLCOM_SHOW_STATUS_FUNC:
case SQLCOM_SHOW_STATUS_PROC:
case SQLCOM_SHOW_STORAGE_ENGINES:
case SQLCOM_SHOW_TABLES:
case SQLCOM_SHOW_VARIABLES:
case SQLCOM_SHOW_WARNS:
case SQLCOM_REPAIR:
case SQLCOM_RESTORE_TABLE:
flags= sp_head::MULTI_RESULTS;
break;
/*
EXECUTE statement may return a result set, but doesn't have to.
We can't, however, know it in advance, and therefore must add
this statement here. This is ok, as is equivalent to a result-set
statement within an IF condition.
*/
case SQLCOM_EXECUTE:
flags= sp_head::MULTI_RESULTS | sp_head::CONTAINS_DYNAMIC_SQL;
break;
case SQLCOM_PREPARE:
case SQLCOM_DEALLOCATE_PREPARE:
flags= sp_head::CONTAINS_DYNAMIC_SQL;
break;
case SQLCOM_CREATE_TABLE:
if (lex->create_info.options & HA_LEX_CREATE_TMP_TABLE)
flags= 0;
else
flags= sp_head::HAS_COMMIT_OR_ROLLBACK;
break;
case SQLCOM_DROP_TABLE:
if (lex->drop_temporary)
flags= 0;
else
flags= sp_head::HAS_COMMIT_OR_ROLLBACK;
break;
case SQLCOM_FLUSH:
flags= sp_head::HAS_SQLCOM_FLUSH;
break;
case SQLCOM_RESET:
flags= sp_head::HAS_SQLCOM_RESET;
break;
case SQLCOM_CREATE_INDEX:
case SQLCOM_CREATE_DB:
case SQLCOM_CREATE_VIEW:
case SQLCOM_CREATE_TRIGGER:
case SQLCOM_CREATE_USER:
case SQLCOM_ALTER_TABLE:
case SQLCOM_GRANT:
case SQLCOM_REVOKE:
case SQLCOM_BEGIN:
case SQLCOM_RENAME_TABLE:
case SQLCOM_RENAME_USER:
case SQLCOM_DROP_INDEX:
case SQLCOM_DROP_DB:
case SQLCOM_REVOKE_ALL:
case SQLCOM_DROP_USER:
case SQLCOM_DROP_VIEW:
case SQLCOM_DROP_TRIGGER:
case SQLCOM_TRUNCATE:
case SQLCOM_COMMIT:
case SQLCOM_ROLLBACK:
case SQLCOM_LOAD:
case SQLCOM_LOAD_MASTER_DATA:
case SQLCOM_LOCK_TABLES:
case SQLCOM_CREATE_PROCEDURE:
case SQLCOM_CREATE_SPFUNCTION:
case SQLCOM_ALTER_PROCEDURE:
case SQLCOM_ALTER_FUNCTION:
case SQLCOM_DROP_PROCEDURE:
case SQLCOM_DROP_FUNCTION:
case SQLCOM_CREATE_EVENT:
case SQLCOM_ALTER_EVENT:
case SQLCOM_DROP_EVENT:
case SQLCOM_INSTALL_PLUGIN:
case SQLCOM_UNINSTALL_PLUGIN:
flags= sp_head::HAS_COMMIT_OR_ROLLBACK;
break;
default:
flags= 0;
break;
}
return flags;
}
/*
Prepare an Item for evaluation (call of fix_fields).
SYNOPSIS
sp_prepare_func_item()
thd thread handler
it_addr pointer on item refernce
RETURN
NULL error
prepared item
*/
Item *
sp_prepare_func_item(THD* thd, Item **it_addr)
{
DBUG_ENTER("sp_prepare_func_item");
it_addr= (*it_addr)->this_item_addr(thd, it_addr);
if (!(*it_addr)->fixed &&
((*it_addr)->fix_fields(thd, it_addr) ||
(*it_addr)->check_cols(1)))
{
DBUG_PRINT("info", ("fix_fields() failed"));
DBUG_RETURN(NULL);
}
DBUG_RETURN(*it_addr);
}
/*
Evaluate an expression and store the result in the field.
SYNOPSIS
sp_eval_expr()
thd - current thread object
expr_item - the root item of the expression
result_field - the field to store the result
RETURN VALUES
FALSE on success
TRUE on error
*/
bool
sp_eval_expr(THD *thd, Field *result_field, Item **expr_item_ptr)
{
Item *expr_item;
DBUG_ENTER("sp_eval_expr");
2006-05-15 18:41:05 +02:00
if (!*expr_item_ptr)
DBUG_RETURN(TRUE);
if (!(expr_item= sp_prepare_func_item(thd, expr_item_ptr)))
DBUG_RETURN(TRUE);
bool err_status= FALSE;
/*
Set THD flags to emit warnings/errors in case of overflow/type errors
during saving the item into the field.
Save original values and restore them after save.
*/
enum_check_fields save_count_cuted_fields= thd->count_cuted_fields;
bool save_abort_on_warning= thd->abort_on_warning;
(pushing for Andrei) Bug #27417 thd->no_trans_update.stmt lost value inside of SF-exec-stack Once had been set the flag might later got reset inside of a stored routine execution stack. The reason was in that there was no check if a new statement started at time of resetting. The artifact affects most of binlogable DML queries. Notice, that multi-update is wrapped up within bug@27716 fix, multi-delete bug@29136. Fixed with saving parent's statement flag of whether the statement modified non-transactional table, and unioning (merging) the value with that was gained in mysql_execute_command. Resettling thd->no_trans_update members into thd->transaction.`member`; Asserting code; Effectively the following properties are held. 1. At the end of a substatement thd->transaction.stmt.modified_non_trans_table reflects the fact if such a table got modified by the substatement. That also respects THD::really_abort_on_warnin() requirements. 2. Eventually thd->transaction.stmt.modified_non_trans_table will be computed as the union of the values of all invoked sub-statements. That fixes this bug#27417; Computing of thd->transaction.all.modified_non_trans_table is refined to base to the stmt's value for all the case including insert .. select statement which before the patch had an extra issue bug@28960. Minor issues are covered with mysql_load, mysql_delete, and binloggin of insert in to temp_table select. The supplied test verifies limitely, mostly asserts. The ultimate testing is defered for bug@13270, bug@23333.
2007-07-30 18:27:36 +03:00
bool save_stmt_modified_non_trans_table= thd->transaction.stmt.modified_non_trans_table;
thd->count_cuted_fields= CHECK_FIELD_ERROR_FOR_NULL;
thd->abort_on_warning=
thd->variables.sql_mode &
(MODE_STRICT_TRANS_TABLES | MODE_STRICT_ALL_TABLES);
(pushing for Andrei) Bug #27417 thd->no_trans_update.stmt lost value inside of SF-exec-stack Once had been set the flag might later got reset inside of a stored routine execution stack. The reason was in that there was no check if a new statement started at time of resetting. The artifact affects most of binlogable DML queries. Notice, that multi-update is wrapped up within bug@27716 fix, multi-delete bug@29136. Fixed with saving parent's statement flag of whether the statement modified non-transactional table, and unioning (merging) the value with that was gained in mysql_execute_command. Resettling thd->no_trans_update members into thd->transaction.`member`; Asserting code; Effectively the following properties are held. 1. At the end of a substatement thd->transaction.stmt.modified_non_trans_table reflects the fact if such a table got modified by the substatement. That also respects THD::really_abort_on_warnin() requirements. 2. Eventually thd->transaction.stmt.modified_non_trans_table will be computed as the union of the values of all invoked sub-statements. That fixes this bug#27417; Computing of thd->transaction.all.modified_non_trans_table is refined to base to the stmt's value for all the case including insert .. select statement which before the patch had an extra issue bug@28960. Minor issues are covered with mysql_load, mysql_delete, and binloggin of insert in to temp_table select. The supplied test verifies limitely, mostly asserts. The ultimate testing is defered for bug@13270, bug@23333.
2007-07-30 18:27:36 +03:00
thd->transaction.stmt.modified_non_trans_table= FALSE;
/* Save the value in the field. Convert the value if needed. */
expr_item->save_in_field(result_field, 0);
thd->count_cuted_fields= save_count_cuted_fields;
thd->abort_on_warning= save_abort_on_warning;
(pushing for Andrei) Bug #27417 thd->no_trans_update.stmt lost value inside of SF-exec-stack Once had been set the flag might later got reset inside of a stored routine execution stack. The reason was in that there was no check if a new statement started at time of resetting. The artifact affects most of binlogable DML queries. Notice, that multi-update is wrapped up within bug@27716 fix, multi-delete bug@29136. Fixed with saving parent's statement flag of whether the statement modified non-transactional table, and unioning (merging) the value with that was gained in mysql_execute_command. Resettling thd->no_trans_update members into thd->transaction.`member`; Asserting code; Effectively the following properties are held. 1. At the end of a substatement thd->transaction.stmt.modified_non_trans_table reflects the fact if such a table got modified by the substatement. That also respects THD::really_abort_on_warnin() requirements. 2. Eventually thd->transaction.stmt.modified_non_trans_table will be computed as the union of the values of all invoked sub-statements. That fixes this bug#27417; Computing of thd->transaction.all.modified_non_trans_table is refined to base to the stmt's value for all the case including insert .. select statement which before the patch had an extra issue bug@28960. Minor issues are covered with mysql_load, mysql_delete, and binloggin of insert in to temp_table select. The supplied test verifies limitely, mostly asserts. The ultimate testing is defered for bug@13270, bug@23333.
2007-07-30 18:27:36 +03:00
thd->transaction.stmt.modified_non_trans_table= save_stmt_modified_non_trans_table;
if (thd->is_error())
{
/* Return error status if something went wrong. */
err_status= TRUE;
}
DBUG_RETURN(err_status);
}
/*
*
* sp_name
*
*/
sp_name::sp_name(THD *thd, char *key, uint key_len)
{
m_sroutines_key.str= key;
m_sroutines_key.length= key_len;
m_qname.str= ++key;
m_qname.length= key_len - 1;
if ((m_name.str= strchr(m_qname.str, '.')))
{
m_db.length= m_name.str - key;
m_db.str= strmake_root(thd->mem_root, key, m_db.length);
m_name.str++;
m_name.length= m_qname.length - m_db.length - 1;
}
else
{
m_name.str= m_qname.str;
m_name.length= m_qname.length;
m_db.str= 0;
m_db.length= 0;
}
m_explicit_name= false;
}
void
sp_name::init_qname(THD *thd)
{
const uint dot= !!m_db.length;
/* m_sroutines format: m_type + [database + dot] + name + nul */
m_sroutines_key.length= 1 + m_db.length + dot + m_name.length;
WL#3817: Simplify string / memory area types and make things more consistent (first part) The following type conversions was done: - Changed byte to uchar - Changed gptr to uchar* - Change my_string to char * - Change my_size_t to size_t - Change size_s to size_t Removed declaration of byte, gptr, my_string, my_size_t and size_s. Following function parameter changes was done: - All string functions in mysys/strings was changed to use size_t instead of uint for string lengths. - All read()/write() functions changed to use size_t (including vio). - All protocoll functions changed to use size_t instead of uint - Functions that used a pointer to a string length was changed to use size_t* - Changed malloc(), free() and related functions from using gptr to use void * as this requires fewer casts in the code and is more in line with how the standard functions work. - Added extra length argument to dirname_part() to return the length of the created string. - Changed (at least) following functions to take uchar* as argument: - db_dump() - my_net_write() - net_write_command() - net_store_data() - DBUG_DUMP() - decimal2bin() & bin2decimal() - Changed my_compress() and my_uncompress() to use size_t. Changed one argument to my_uncompress() from a pointer to a value as we only return one value (makes function easier to use). - Changed type of 'pack_data' argument to packfrm() to avoid casts. - Changed in readfrm() and writefrom(), ha_discover and handler::discover() the type for argument 'frmdata' to uchar** to avoid casts. - Changed most Field functions to use uchar* instead of char* (reduced a lot of casts). - Changed field->val_xxx(xxx, new_ptr) to take const pointers. Other changes: - Removed a lot of not needed casts - Added a few new cast required by other changes - Added some cast to my_multi_malloc() arguments for safety (as string lengths needs to be uint, not size_t). - Fixed all calls to hash-get-key functions to use size_t*. (Needed to be done explicitely as this conflict was often hided by casting the function to hash_get_key). - Changed some buffers to memory regions to uchar* to avoid casts. - Changed some string lengths from uint to size_t. - Changed field->ptr to be uchar* instead of char*. This allowed us to get rid of a lot of casts. - Some changes from true -> TRUE, false -> FALSE, unsigned char -> uchar - Include zlib.h in some files as we needed declaration of crc32() - Changed MY_FILE_ERROR to be (size_t) -1. - Changed many variables to hold the result of my_read() / my_write() to be size_t. This was needed to properly detect errors (which are returned as (size_t) -1). - Removed some very old VMS code - Changed packfrm()/unpackfrm() to not be depending on uint size (portability fix) - Removed windows specific code to restore cursor position as this causes slowdown on windows and we should not mix read() and pread() calls anyway as this is not thread safe. Updated function comment to reflect this. Changed function that depended on original behavior of my_pwrite() to itself restore the cursor position (one such case). - Added some missing checking of return value of malloc(). - Changed definition of MOD_PAD_CHAR_TO_FULL_LENGTH to avoid 'long' overflow. - Changed type of table_def::m_size from my_size_t to ulong to reflect that m_size is the number of elements in the array, not a string/memory length. - Moved THD::max_row_length() to table.cc (as it's not depending on THD). Inlined max_row_length_blob() into this function. - More function comments - Fixed some compiler warnings when compiled without partitions. - Removed setting of LEX_STRING() arguments in declaration (portability fix). - Some trivial indentation/variable name changes. - Some trivial code simplifications: - Replaced some calls to alloc_root + memcpy to use strmake_root()/strdup_root(). - Changed some calls from memdup() to strmake() (Safety fix) - Simpler loops in client-simple.c
2007-05-10 12:59:39 +03:00
if (!(m_sroutines_key.str= (char*) thd->alloc(m_sroutines_key.length + 1)))
return;
m_qname.length= m_sroutines_key.length - 1;
m_qname.str= m_sroutines_key.str + 1;
sprintf(m_qname.str, "%.*s%.*s%.*s",
(int) m_db.length, (m_db.length ? m_db.str : ""),
dot, ".",
(int) m_name.length, m_name.str);
}
/*
2006-01-19 16:13:04 +01:00
Check that the name 'ident' is ok. It's assumed to be an 'ident'
from the parser, so we only have to check length and trailing spaces.
The former is a standard requirement (and 'show status' assumes a
non-empty name), the latter is a mysql:ism as trailing spaces are
removed by get_field().
RETURN
TRUE - bad name
FALSE - name is ok
*/
bool
check_routine_name(LEX_STRING *ident)
{
if (!ident || !ident->str || !ident->str[0] ||
ident->str[ident->length-1] == ' ')
{
my_error(ER_SP_WRONG_NAME, MYF(0), ident->str);
return TRUE;
}
if (check_string_char_length(ident, "", NAME_CHAR_LEN,
system_charset_info, 1))
{
my_error(ER_TOO_LONG_IDENT, MYF(0), ident->str);
return TRUE;
}
return FALSE;
}
/*
*
* sp_head
*
*/
void *
sp_head::operator new(size_t size) throw()
{
DBUG_ENTER("sp_head::operator new");
MEM_ROOT own_root;
sp_head *sp;
init_sql_alloc(&own_root, MEM_ROOT_BLOCK_SIZE, MEM_ROOT_PREALLOC);
sp= (sp_head *) alloc_root(&own_root, size);
if (sp == NULL)
return NULL;
sp->main_mem_root= own_root;
DBUG_PRINT("info", ("mem_root 0x%lx", (ulong) &sp->mem_root));
DBUG_RETURN(sp);
}
void
2007-12-14 21:38:58 +01:00
sp_head::operator delete(void *ptr, size_t size) throw()
{
DBUG_ENTER("sp_head::operator delete");
MEM_ROOT own_root;
if (ptr == NULL)
DBUG_VOID_RETURN;
sp_head *sp= (sp_head *) ptr;
/* Make a copy of main_mem_root as free_root will free the sp */
own_root= sp->main_mem_root;
DBUG_PRINT("info", ("mem_root 0x%lx moved to 0x%lx",
(ulong) &sp->mem_root, (ulong) &own_root));
free_root(&own_root, MYF(0));
DBUG_VOID_RETURN;
}
sp_head::sp_head()
:Query_arena(&main_mem_root, INITIALIZED_FOR_SP),
m_flags(0), m_recursion_level(0), m_next_cached_sp(0),
m_cont_level(0)
{
const LEX_STRING str_reset= { NULL, 0 };
m_first_instance= this;
m_first_free_instance= this;
m_last_cached_sp= this;
m_return_field_def.charset = NULL;
/*
FIXME: the only use case when name is NULL is events, and it should
be rewritten soon. Remove the else part and replace 'if' with
an assert when this is done.
*/
m_db= m_name= m_qname= str_reset;
DBUG_ENTER("sp_head::sp_head");
m_backpatch.empty();
m_cont_backpatch.empty();
m_lex.empty();
hash_init(&m_sptabs, system_charset_info, 0, 0, 0, sp_table_key, 0, 0);
hash_init(&m_sroutines, system_charset_info, 0, 0, 0, sp_sroutine_key, 0, 0);
Patch for the following bugs: - BUG#11986: Stored routines and triggers can fail if the code has a non-ascii symbol - BUG#16291: mysqldump corrupts string-constants with non-ascii-chars - BUG#19443: INFORMATION_SCHEMA does not support charsets properly - BUG#21249: Character set of SP-var can be ignored - BUG#25212: Character set of string constant is ignored (stored routines) - BUG#25221: Character set of string constant is ignored (triggers) There were a few general problems that caused these bugs: 1. Character set information of the original (definition) query for views, triggers, stored routines and events was lost. 2. mysqldump output query in client character set, which can be inappropriate to encode definition-query. 3. INFORMATION_SCHEMA used strings with mixed encodings to display object definition; 1. No query-definition-character set. In order to compile query into execution code, some extra data (such as environment variables or the database character set) is used. The problem here was that this context was not preserved. So, on the next load it can differ from the original one, thus the result will be different. The context contains the following data: - client character set; - connection collation (character set and collation); - collation of the owner database; The fix is to store this context and use it each time we parse (compile) and execute the object (stored routine, trigger, ...). 2. Wrong mysqldump-output. The original query can contain several encodings (by means of character set introducers). The problem here was that we tried to convert original query to the mysqldump-client character set. Moreover, we stored queries in different character sets for different objects (views, for one, used UTF8, triggers used original character set). The solution is - to store definition queries in the original character set; - to change SHOW CREATE statement to output definition query in the binary character set (i.e. without any conversion); - introduce SHOW CREATE TRIGGER statement; - to dump special statements to switch the context to the original one before dumping and restore it afterwards. Note, in order to preserve the database collation at the creation time, additional ALTER DATABASE might be used (to temporary switch the database collation back to the original value). In this case, ALTER DATABASE privilege will be required. This is a backward-incompatible change. 3. INFORMATION_SCHEMA showed non-UTF8 strings The fix is to generate UTF8-query during the parsing, store it in the object and show it in the INFORMATION_SCHEMA. Basically, the idea is to create a copy of the original query convert it to UTF8. Character set introducers are removed and all text literals are converted to UTF8. This UTF8 query is intended to provide user-readable output. It must not be used to recreate the object. Specialized SHOW CREATE statements should be used for this. The reason for this limitation is the following: the original query can contain symbols from several character sets (by means of character set introducers). Example: - original query: CREATE VIEW v1 AS SELECT _cp1251 'Hello' AS c1; - UTF8 query (for INFORMATION_SCHEMA): CREATE VIEW v1 AS SELECT 'Hello' AS c1;
2007-06-28 21:34:54 +04:00
m_body_utf8.str= NULL;
m_body_utf8.length= 0;
DBUG_VOID_RETURN;
}
void
sp_head::init(LEX *lex)
{
DBUG_ENTER("sp_head::init");
Bug#26503 (Illegal SQL exception handler code causes the server to crash) Before this fix, the parser would accept illegal code in SQL exceptions handlers, that later causes the runtime to crash when executing the code, due to memory violations in the exception handler stack. The root cause of the problem is instructions within an exception handler that jumps to code located outside of the handler. This is illegal according to the SQL 2003 standard, since labels located outside the handler are not supposed to be visible (they are "out of scope"), so any instruction that jumps to these labels, like ITERATE or LEAVE, should not parse. The section of the standard that is relevant for this is : SQL:2003 SQL/PSM (ISO/IEC 9075-4:2003) section 13.1 <compound statement>, syntax rule 4 <quote> The scope of the <beginning label> is CS excluding every <SQL schema statement> contained in CS and excluding every <local handler declaration list> contained in CS. <beginning label> shall not be equivalent to any other <beginning label>s within that scope. </quote> With this fix, the C++ class sp_pcontext, which represent the "parsing context" tree (a.k.a symbol table) of a stored procedure, has been changed as follows: - constructors have been cleaned up, so that only building a root node for the tree is public; building nodes inside a tree is not public. - a new member, m_label_scope, indicates if a given syntactic context belongs to a DECLARE HANDLER block, - label resolution, in the method find_label(), has been changed to implement the restriction of scope regarding labels used in a compound statement. The actions in the parser, when parsing the body of a SQL exception handler, have been changed as follows: - the implementation of an exception handler (DECLARE HANDLER) now creates explicitly a new sp_pcontext, to isolate the code inside the handler from the containing compound statement context. - registering exception handlers as a result occurs in the parent context, see the rule sp_hcond_element - the code in sp_hcond_list has been cleaned up, to avoid code duplication In addition, the flags IN_SIMPLE_CASE and IN_HANDLER, declared in sp_head.h have been removed, since they are unused and broken by design (as seen with Bug 19194 (Right recursion in parser for CASE causes excessive stack usage, limitation), representing a stack in a single flag is not possible. Tests in sp-error have been added to show that illegal constructs are now rejected. Tests in sp have been added for code coverage, to show that ITERATE or LEAVE statements are legal when jumping to a label in scope, inside the body of an exception handler.
2007-03-14 12:02:32 -06:00
lex->spcont= m_pcont= new sp_pcontext();
if (!lex->spcont)
DBUG_VOID_RETURN;
/*
Altough trg_table_fields list is used only in triggers we init for all
types of stored procedures to simplify reset_lex()/restore_lex() code.
*/
lex->trg_table_fields.empty();
my_init_dynamic_array(&m_instr, sizeof(sp_instr *), 16, 8);
m_param_begin= NULL;
m_param_end= NULL;
m_body_begin= NULL ;
m_qname.str= NULL;
m_qname.length= 0;
m_db.str= NULL;
m_db.length= 0;
m_name.str= NULL;
m_name.length= 0;
m_params.str= NULL;
m_params.length= 0;
m_body.str= NULL;
m_body.length= 0;
m_defstr.str= NULL;
m_defstr.length= 0;
m_sroutines_key.str= NULL;
m_sroutines_key.length= 0;
m_return_field_def.charset= NULL;
DBUG_VOID_RETURN;
}
void
sp_head::init_sp_name(THD *thd, sp_name *spname)
{
DBUG_ENTER("sp_head::init_sp_name");
/* Must be initialized in the parser. */
DBUG_ASSERT(spname && spname->m_db.str && spname->m_db.length);
/* We have to copy strings to get them into the right memroot. */
m_db.length= spname->m_db.length;
m_db.str= strmake_root(thd->mem_root, spname->m_db.str, spname->m_db.length);
m_name.length= spname->m_name.length;
m_name.str= strmake_root(thd->mem_root, spname->m_name.str,
spname->m_name.length);
if (spname->m_qname.length == 0)
spname->init_qname(thd);
m_sroutines_key.length= spname->m_sroutines_key.length;
m_sroutines_key.str= (char*) memdup_root(thd->mem_root,
spname->m_sroutines_key.str,
spname->m_sroutines_key.length + 1);
m_sroutines_key.str[0]= static_cast<char>(m_type);
m_qname.length= m_sroutines_key.length - 1;
m_qname.str= m_sroutines_key.str + 1;
DBUG_VOID_RETURN;
}
void
Patch for the following bugs: - BUG#11986: Stored routines and triggers can fail if the code has a non-ascii symbol - BUG#16291: mysqldump corrupts string-constants with non-ascii-chars - BUG#19443: INFORMATION_SCHEMA does not support charsets properly - BUG#21249: Character set of SP-var can be ignored - BUG#25212: Character set of string constant is ignored (stored routines) - BUG#25221: Character set of string constant is ignored (triggers) There were a few general problems that caused these bugs: 1. Character set information of the original (definition) query for views, triggers, stored routines and events was lost. 2. mysqldump output query in client character set, which can be inappropriate to encode definition-query. 3. INFORMATION_SCHEMA used strings with mixed encodings to display object definition; 1. No query-definition-character set. In order to compile query into execution code, some extra data (such as environment variables or the database character set) is used. The problem here was that this context was not preserved. So, on the next load it can differ from the original one, thus the result will be different. The context contains the following data: - client character set; - connection collation (character set and collation); - collation of the owner database; The fix is to store this context and use it each time we parse (compile) and execute the object (stored routine, trigger, ...). 2. Wrong mysqldump-output. The original query can contain several encodings (by means of character set introducers). The problem here was that we tried to convert original query to the mysqldump-client character set. Moreover, we stored queries in different character sets for different objects (views, for one, used UTF8, triggers used original character set). The solution is - to store definition queries in the original character set; - to change SHOW CREATE statement to output definition query in the binary character set (i.e. without any conversion); - introduce SHOW CREATE TRIGGER statement; - to dump special statements to switch the context to the original one before dumping and restore it afterwards. Note, in order to preserve the database collation at the creation time, additional ALTER DATABASE might be used (to temporary switch the database collation back to the original value). In this case, ALTER DATABASE privilege will be required. This is a backward-incompatible change. 3. INFORMATION_SCHEMA showed non-UTF8 strings The fix is to generate UTF8-query during the parsing, store it in the object and show it in the INFORMATION_SCHEMA. Basically, the idea is to create a copy of the original query convert it to UTF8. Character set introducers are removed and all text literals are converted to UTF8. This UTF8 query is intended to provide user-readable output. It must not be used to recreate the object. Specialized SHOW CREATE statements should be used for this. The reason for this limitation is the following: the original query can contain symbols from several character sets (by means of character set introducers). Example: - original query: CREATE VIEW v1 AS SELECT _cp1251 'Hello' AS c1; - UTF8 query (for INFORMATION_SCHEMA): CREATE VIEW v1 AS SELECT 'Hello' AS c1;
2007-06-28 21:34:54 +04:00
sp_head::set_body_start(THD *thd, const char *begin_ptr)
{
Patch for the following bugs: - BUG#11986: Stored routines and triggers can fail if the code has a non-ascii symbol - BUG#16291: mysqldump corrupts string-constants with non-ascii-chars - BUG#19443: INFORMATION_SCHEMA does not support charsets properly - BUG#21249: Character set of SP-var can be ignored - BUG#25212: Character set of string constant is ignored (stored routines) - BUG#25221: Character set of string constant is ignored (triggers) There were a few general problems that caused these bugs: 1. Character set information of the original (definition) query for views, triggers, stored routines and events was lost. 2. mysqldump output query in client character set, which can be inappropriate to encode definition-query. 3. INFORMATION_SCHEMA used strings with mixed encodings to display object definition; 1. No query-definition-character set. In order to compile query into execution code, some extra data (such as environment variables or the database character set) is used. The problem here was that this context was not preserved. So, on the next load it can differ from the original one, thus the result will be different. The context contains the following data: - client character set; - connection collation (character set and collation); - collation of the owner database; The fix is to store this context and use it each time we parse (compile) and execute the object (stored routine, trigger, ...). 2. Wrong mysqldump-output. The original query can contain several encodings (by means of character set introducers). The problem here was that we tried to convert original query to the mysqldump-client character set. Moreover, we stored queries in different character sets for different objects (views, for one, used UTF8, triggers used original character set). The solution is - to store definition queries in the original character set; - to change SHOW CREATE statement to output definition query in the binary character set (i.e. without any conversion); - introduce SHOW CREATE TRIGGER statement; - to dump special statements to switch the context to the original one before dumping and restore it afterwards. Note, in order to preserve the database collation at the creation time, additional ALTER DATABASE might be used (to temporary switch the database collation back to the original value). In this case, ALTER DATABASE privilege will be required. This is a backward-incompatible change. 3. INFORMATION_SCHEMA showed non-UTF8 strings The fix is to generate UTF8-query during the parsing, store it in the object and show it in the INFORMATION_SCHEMA. Basically, the idea is to create a copy of the original query convert it to UTF8. Character set introducers are removed and all text literals are converted to UTF8. This UTF8 query is intended to provide user-readable output. It must not be used to recreate the object. Specialized SHOW CREATE statements should be used for this. The reason for this limitation is the following: the original query can contain symbols from several character sets (by means of character set introducers). Example: - original query: CREATE VIEW v1 AS SELECT _cp1251 'Hello' AS c1; - UTF8 query (for INFORMATION_SCHEMA): CREATE VIEW v1 AS SELECT 'Hello' AS c1;
2007-06-28 21:34:54 +04:00
m_body_begin= begin_ptr;
thd->m_lip->body_utf8_start(thd, begin_ptr);
}
void
sp_head::set_stmt_end(THD *thd)
{
Lex_input_stream *lip= thd->m_lip; /* shortcut */
const char *end_ptr= lip->get_cpp_ptr(); /* shortcut */
/* Make the string of parameters. */
if (m_param_begin && m_param_end)
{
m_params.length= m_param_end - m_param_begin;
Patch for the following bugs: - BUG#11986: Stored routines and triggers can fail if the code has a non-ascii symbol - BUG#16291: mysqldump corrupts string-constants with non-ascii-chars - BUG#19443: INFORMATION_SCHEMA does not support charsets properly - BUG#21249: Character set of SP-var can be ignored - BUG#25212: Character set of string constant is ignored (stored routines) - BUG#25221: Character set of string constant is ignored (triggers) There were a few general problems that caused these bugs: 1. Character set information of the original (definition) query for views, triggers, stored routines and events was lost. 2. mysqldump output query in client character set, which can be inappropriate to encode definition-query. 3. INFORMATION_SCHEMA used strings with mixed encodings to display object definition; 1. No query-definition-character set. In order to compile query into execution code, some extra data (such as environment variables or the database character set) is used. The problem here was that this context was not preserved. So, on the next load it can differ from the original one, thus the result will be different. The context contains the following data: - client character set; - connection collation (character set and collation); - collation of the owner database; The fix is to store this context and use it each time we parse (compile) and execute the object (stored routine, trigger, ...). 2. Wrong mysqldump-output. The original query can contain several encodings (by means of character set introducers). The problem here was that we tried to convert original query to the mysqldump-client character set. Moreover, we stored queries in different character sets for different objects (views, for one, used UTF8, triggers used original character set). The solution is - to store definition queries in the original character set; - to change SHOW CREATE statement to output definition query in the binary character set (i.e. without any conversion); - introduce SHOW CREATE TRIGGER statement; - to dump special statements to switch the context to the original one before dumping and restore it afterwards. Note, in order to preserve the database collation at the creation time, additional ALTER DATABASE might be used (to temporary switch the database collation back to the original value). In this case, ALTER DATABASE privilege will be required. This is a backward-incompatible change. 3. INFORMATION_SCHEMA showed non-UTF8 strings The fix is to generate UTF8-query during the parsing, store it in the object and show it in the INFORMATION_SCHEMA. Basically, the idea is to create a copy of the original query convert it to UTF8. Character set introducers are removed and all text literals are converted to UTF8. This UTF8 query is intended to provide user-readable output. It must not be used to recreate the object. Specialized SHOW CREATE statements should be used for this. The reason for this limitation is the following: the original query can contain symbols from several character sets (by means of character set introducers). Example: - original query: CREATE VIEW v1 AS SELECT _cp1251 'Hello' AS c1; - UTF8 query (for INFORMATION_SCHEMA): CREATE VIEW v1 AS SELECT 'Hello' AS c1;
2007-06-28 21:34:54 +04:00
m_params.str= thd->strmake(m_param_begin, m_params.length);
}
Patch for the following bugs: - BUG#11986: Stored routines and triggers can fail if the code has a non-ascii symbol - BUG#16291: mysqldump corrupts string-constants with non-ascii-chars - BUG#19443: INFORMATION_SCHEMA does not support charsets properly - BUG#21249: Character set of SP-var can be ignored - BUG#25212: Character set of string constant is ignored (stored routines) - BUG#25221: Character set of string constant is ignored (triggers) There were a few general problems that caused these bugs: 1. Character set information of the original (definition) query for views, triggers, stored routines and events was lost. 2. mysqldump output query in client character set, which can be inappropriate to encode definition-query. 3. INFORMATION_SCHEMA used strings with mixed encodings to display object definition; 1. No query-definition-character set. In order to compile query into execution code, some extra data (such as environment variables or the database character set) is used. The problem here was that this context was not preserved. So, on the next load it can differ from the original one, thus the result will be different. The context contains the following data: - client character set; - connection collation (character set and collation); - collation of the owner database; The fix is to store this context and use it each time we parse (compile) and execute the object (stored routine, trigger, ...). 2. Wrong mysqldump-output. The original query can contain several encodings (by means of character set introducers). The problem here was that we tried to convert original query to the mysqldump-client character set. Moreover, we stored queries in different character sets for different objects (views, for one, used UTF8, triggers used original character set). The solution is - to store definition queries in the original character set; - to change SHOW CREATE statement to output definition query in the binary character set (i.e. without any conversion); - introduce SHOW CREATE TRIGGER statement; - to dump special statements to switch the context to the original one before dumping and restore it afterwards. Note, in order to preserve the database collation at the creation time, additional ALTER DATABASE might be used (to temporary switch the database collation back to the original value). In this case, ALTER DATABASE privilege will be required. This is a backward-incompatible change. 3. INFORMATION_SCHEMA showed non-UTF8 strings The fix is to generate UTF8-query during the parsing, store it in the object and show it in the INFORMATION_SCHEMA. Basically, the idea is to create a copy of the original query convert it to UTF8. Character set introducers are removed and all text literals are converted to UTF8. This UTF8 query is intended to provide user-readable output. It must not be used to recreate the object. Specialized SHOW CREATE statements should be used for this. The reason for this limitation is the following: the original query can contain symbols from several character sets (by means of character set introducers). Example: - original query: CREATE VIEW v1 AS SELECT _cp1251 'Hello' AS c1; - UTF8 query (for INFORMATION_SCHEMA): CREATE VIEW v1 AS SELECT 'Hello' AS c1;
2007-06-28 21:34:54 +04:00
/* Remember end pointer for further dumping of whole statement. */
Patch for the following bugs: - BUG#11986: Stored routines and triggers can fail if the code has a non-ascii symbol - BUG#16291: mysqldump corrupts string-constants with non-ascii-chars - BUG#19443: INFORMATION_SCHEMA does not support charsets properly - BUG#21249: Character set of SP-var can be ignored - BUG#25212: Character set of string constant is ignored (stored routines) - BUG#25221: Character set of string constant is ignored (triggers) There were a few general problems that caused these bugs: 1. Character set information of the original (definition) query for views, triggers, stored routines and events was lost. 2. mysqldump output query in client character set, which can be inappropriate to encode definition-query. 3. INFORMATION_SCHEMA used strings with mixed encodings to display object definition; 1. No query-definition-character set. In order to compile query into execution code, some extra data (such as environment variables or the database character set) is used. The problem here was that this context was not preserved. So, on the next load it can differ from the original one, thus the result will be different. The context contains the following data: - client character set; - connection collation (character set and collation); - collation of the owner database; The fix is to store this context and use it each time we parse (compile) and execute the object (stored routine, trigger, ...). 2. Wrong mysqldump-output. The original query can contain several encodings (by means of character set introducers). The problem here was that we tried to convert original query to the mysqldump-client character set. Moreover, we stored queries in different character sets for different objects (views, for one, used UTF8, triggers used original character set). The solution is - to store definition queries in the original character set; - to change SHOW CREATE statement to output definition query in the binary character set (i.e. without any conversion); - introduce SHOW CREATE TRIGGER statement; - to dump special statements to switch the context to the original one before dumping and restore it afterwards. Note, in order to preserve the database collation at the creation time, additional ALTER DATABASE might be used (to temporary switch the database collation back to the original value). In this case, ALTER DATABASE privilege will be required. This is a backward-incompatible change. 3. INFORMATION_SCHEMA showed non-UTF8 strings The fix is to generate UTF8-query during the parsing, store it in the object and show it in the INFORMATION_SCHEMA. Basically, the idea is to create a copy of the original query convert it to UTF8. Character set introducers are removed and all text literals are converted to UTF8. This UTF8 query is intended to provide user-readable output. It must not be used to recreate the object. Specialized SHOW CREATE statements should be used for this. The reason for this limitation is the following: the original query can contain symbols from several character sets (by means of character set introducers). Example: - original query: CREATE VIEW v1 AS SELECT _cp1251 'Hello' AS c1; - UTF8 query (for INFORMATION_SCHEMA): CREATE VIEW v1 AS SELECT 'Hello' AS c1;
2007-06-28 21:34:54 +04:00
thd->lex->stmt_definition_end= end_ptr;
/* Make the string of body (in the original character set). */
m_body.length= end_ptr - m_body_begin;
m_body.str= thd->strmake(m_body_begin, m_body.length);
Bug#25411 (trigger code truncated), PART II Bug 28127 (Some valid identifiers names are not parsed correctly) Bug 26302 (MySQL server cuts off trailing "*/" from comments in SP/func) This patch is the second part of a major cleanup, required to fix Bug 25411 (trigger code truncated). The root cause of the issue stems from the function skip_rear_comments, which was a work around to remove "extra" "*/" characters from the query text, when parsing a query and reusing the text fragments to represent a view, trigger, function or stored procedure. The reason for this work around is that "special comments", like /*!50002 XXX */, were not parsed properly, so that a query like: AAA /*!50002 BBB */ CCC would be seen by the parser as "AAA BBB */ CCC" when the current version is greater or equal to 5.0.2 The root cause of this stems from how special comments are parsed. Special comments are really out-of-bound text that appear inside a query, that affects how the parser behave. In nature, /*!50002 XXX */ in MySQL is similar to the C concept of preprocessing : #if VERSION >= 50002 XXX #endif Depending on the current VERSION of the server, either the special comment should be expanded or it should be ignored, but in all cases the "text" of the query should be re-written to strip the "/*!50002" and "*/" markers, which does not belong to the SQL language itself. Prior to this fix, these markers would leak into : - the storage format for VIEW, - the storage format for FUNCTION, - the storage format for FUNCTION parameters, in mysql.proc (param_list), - the storage format for PROCEDURE, - the storage format for PROCEDURE parameters, in mysql.proc (param_list), - the storage format for TRIGGER, - the binary log used for replication. In all cases, not only this cause format corruption, but also provide a vector for dormant security issues, by allowing to tunnel code that will be activated after an upgrade. The proper solution is to deal with special comments strictly during parsing, when accepting a query from the outside world. Once a query is parsed and an object is created with a persistant representation, this object should not arbitrarily mutate after an upgrade. In short, special comments are a useful but limited feature for MYSQLdump, when used at an *interface* level to facilitate import/export, but bloating the server *internal* storage format is *not* the proper way to deal with configuration management of the user logic. With this fix: - the Lex_input_stream class now acts as a comment pre-processor, and either expands or ignore special comments on the fly. - MYSQLlex and sql_yacc.yy have been cleaned up to strictly use the public interface of Lex_input_stream. In particular, how the input stream accepts or rejects a character is private to Lex_input_stream, and the internal buffer pointers of that class are strictly private, and should not be tempered with during parsing. This caused many changes mostly in sql_lex.cc. During the code cleanup in case MY_LEX_NUMBER_IDENT, Bug 28127 (Some valid identifiers names are not parsed correctly) was found and fixed. By parsing special comments properly, and removing the function 'skip_rear_comments' [sic], Bug 26302 (MySQL server cuts off trailing "*/" from comments in SP/func) has been fixed as well.
2007-06-12 15:23:58 -06:00
trim_whitespace(thd->charset(), & m_body);
Patch for the following bugs: - BUG#11986: Stored routines and triggers can fail if the code has a non-ascii symbol - BUG#16291: mysqldump corrupts string-constants with non-ascii-chars - BUG#19443: INFORMATION_SCHEMA does not support charsets properly - BUG#21249: Character set of SP-var can be ignored - BUG#25212: Character set of string constant is ignored (stored routines) - BUG#25221: Character set of string constant is ignored (triggers) There were a few general problems that caused these bugs: 1. Character set information of the original (definition) query for views, triggers, stored routines and events was lost. 2. mysqldump output query in client character set, which can be inappropriate to encode definition-query. 3. INFORMATION_SCHEMA used strings with mixed encodings to display object definition; 1. No query-definition-character set. In order to compile query into execution code, some extra data (such as environment variables or the database character set) is used. The problem here was that this context was not preserved. So, on the next load it can differ from the original one, thus the result will be different. The context contains the following data: - client character set; - connection collation (character set and collation); - collation of the owner database; The fix is to store this context and use it each time we parse (compile) and execute the object (stored routine, trigger, ...). 2. Wrong mysqldump-output. The original query can contain several encodings (by means of character set introducers). The problem here was that we tried to convert original query to the mysqldump-client character set. Moreover, we stored queries in different character sets for different objects (views, for one, used UTF8, triggers used original character set). The solution is - to store definition queries in the original character set; - to change SHOW CREATE statement to output definition query in the binary character set (i.e. without any conversion); - introduce SHOW CREATE TRIGGER statement; - to dump special statements to switch the context to the original one before dumping and restore it afterwards. Note, in order to preserve the database collation at the creation time, additional ALTER DATABASE might be used (to temporary switch the database collation back to the original value). In this case, ALTER DATABASE privilege will be required. This is a backward-incompatible change. 3. INFORMATION_SCHEMA showed non-UTF8 strings The fix is to generate UTF8-query during the parsing, store it in the object and show it in the INFORMATION_SCHEMA. Basically, the idea is to create a copy of the original query convert it to UTF8. Character set introducers are removed and all text literals are converted to UTF8. This UTF8 query is intended to provide user-readable output. It must not be used to recreate the object. Specialized SHOW CREATE statements should be used for this. The reason for this limitation is the following: the original query can contain symbols from several character sets (by means of character set introducers). Example: - original query: CREATE VIEW v1 AS SELECT _cp1251 'Hello' AS c1; - UTF8 query (for INFORMATION_SCHEMA): CREATE VIEW v1 AS SELECT 'Hello' AS c1;
2007-06-28 21:34:54 +04:00
/* Make the string of UTF-body. */
Bug#25411 (trigger code truncated), PART II Bug 28127 (Some valid identifiers names are not parsed correctly) Bug 26302 (MySQL server cuts off trailing "*/" from comments in SP/func) This patch is the second part of a major cleanup, required to fix Bug 25411 (trigger code truncated). The root cause of the issue stems from the function skip_rear_comments, which was a work around to remove "extra" "*/" characters from the query text, when parsing a query and reusing the text fragments to represent a view, trigger, function or stored procedure. The reason for this work around is that "special comments", like /*!50002 XXX */, were not parsed properly, so that a query like: AAA /*!50002 BBB */ CCC would be seen by the parser as "AAA BBB */ CCC" when the current version is greater or equal to 5.0.2 The root cause of this stems from how special comments are parsed. Special comments are really out-of-bound text that appear inside a query, that affects how the parser behave. In nature, /*!50002 XXX */ in MySQL is similar to the C concept of preprocessing : #if VERSION >= 50002 XXX #endif Depending on the current VERSION of the server, either the special comment should be expanded or it should be ignored, but in all cases the "text" of the query should be re-written to strip the "/*!50002" and "*/" markers, which does not belong to the SQL language itself. Prior to this fix, these markers would leak into : - the storage format for VIEW, - the storage format for FUNCTION, - the storage format for FUNCTION parameters, in mysql.proc (param_list), - the storage format for PROCEDURE, - the storage format for PROCEDURE parameters, in mysql.proc (param_list), - the storage format for TRIGGER, - the binary log used for replication. In all cases, not only this cause format corruption, but also provide a vector for dormant security issues, by allowing to tunnel code that will be activated after an upgrade. The proper solution is to deal with special comments strictly during parsing, when accepting a query from the outside world. Once a query is parsed and an object is created with a persistant representation, this object should not arbitrarily mutate after an upgrade. In short, special comments are a useful but limited feature for MYSQLdump, when used at an *interface* level to facilitate import/export, but bloating the server *internal* storage format is *not* the proper way to deal with configuration management of the user logic. With this fix: - the Lex_input_stream class now acts as a comment pre-processor, and either expands or ignore special comments on the fly. - MYSQLlex and sql_yacc.yy have been cleaned up to strictly use the public interface of Lex_input_stream. In particular, how the input stream accepts or rejects a character is private to Lex_input_stream, and the internal buffer pointers of that class are strictly private, and should not be tempered with during parsing. This caused many changes mostly in sql_lex.cc. During the code cleanup in case MY_LEX_NUMBER_IDENT, Bug 28127 (Some valid identifiers names are not parsed correctly) was found and fixed. By parsing special comments properly, and removing the function 'skip_rear_comments' [sic], Bug 26302 (MySQL server cuts off trailing "*/" from comments in SP/func) has been fixed as well.
2007-06-12 15:23:58 -06:00
Patch for the following bugs: - BUG#11986: Stored routines and triggers can fail if the code has a non-ascii symbol - BUG#16291: mysqldump corrupts string-constants with non-ascii-chars - BUG#19443: INFORMATION_SCHEMA does not support charsets properly - BUG#21249: Character set of SP-var can be ignored - BUG#25212: Character set of string constant is ignored (stored routines) - BUG#25221: Character set of string constant is ignored (triggers) There were a few general problems that caused these bugs: 1. Character set information of the original (definition) query for views, triggers, stored routines and events was lost. 2. mysqldump output query in client character set, which can be inappropriate to encode definition-query. 3. INFORMATION_SCHEMA used strings with mixed encodings to display object definition; 1. No query-definition-character set. In order to compile query into execution code, some extra data (such as environment variables or the database character set) is used. The problem here was that this context was not preserved. So, on the next load it can differ from the original one, thus the result will be different. The context contains the following data: - client character set; - connection collation (character set and collation); - collation of the owner database; The fix is to store this context and use it each time we parse (compile) and execute the object (stored routine, trigger, ...). 2. Wrong mysqldump-output. The original query can contain several encodings (by means of character set introducers). The problem here was that we tried to convert original query to the mysqldump-client character set. Moreover, we stored queries in different character sets for different objects (views, for one, used UTF8, triggers used original character set). The solution is - to store definition queries in the original character set; - to change SHOW CREATE statement to output definition query in the binary character set (i.e. without any conversion); - introduce SHOW CREATE TRIGGER statement; - to dump special statements to switch the context to the original one before dumping and restore it afterwards. Note, in order to preserve the database collation at the creation time, additional ALTER DATABASE might be used (to temporary switch the database collation back to the original value). In this case, ALTER DATABASE privilege will be required. This is a backward-incompatible change. 3. INFORMATION_SCHEMA showed non-UTF8 strings The fix is to generate UTF8-query during the parsing, store it in the object and show it in the INFORMATION_SCHEMA. Basically, the idea is to create a copy of the original query convert it to UTF8. Character set introducers are removed and all text literals are converted to UTF8. This UTF8 query is intended to provide user-readable output. It must not be used to recreate the object. Specialized SHOW CREATE statements should be used for this. The reason for this limitation is the following: the original query can contain symbols from several character sets (by means of character set introducers). Example: - original query: CREATE VIEW v1 AS SELECT _cp1251 'Hello' AS c1; - UTF8 query (for INFORMATION_SCHEMA): CREATE VIEW v1 AS SELECT 'Hello' AS c1;
2007-06-28 21:34:54 +04:00
lip->body_utf8_append(end_ptr);
m_body_utf8.length= lip->get_body_utf8_length();
m_body_utf8.str= thd->strmake(lip->get_body_utf8_str(), m_body_utf8.length);
trim_whitespace(thd->charset(), & m_body_utf8);
/*
Make the string of whole stored-program-definition query (in the
original character set).
*/
m_defstr.length= end_ptr - lip->get_cpp_buf();
m_defstr.str= thd->strmake(lip->get_cpp_buf(), m_defstr.length);
trim_whitespace(thd->charset(), & m_defstr);
}
static TYPELIB *
create_typelib(MEM_ROOT *mem_root, Create_field *field_def, List<String> *src)
{
TYPELIB *result= NULL;
CHARSET_INFO *cs= field_def->charset;
DBUG_ENTER("create_typelib");
if (src->elements)
{
result= (TYPELIB*) alloc_root(mem_root, sizeof(TYPELIB));
result->count= src->elements;
result->name= "";
if (!(result->type_names=(const char **)
alloc_root(mem_root,(sizeof(char *)+sizeof(int))*(result->count+1))))
DBUG_RETURN(0);
result->type_lengths= (uint*)(result->type_names + result->count+1);
List_iterator<String> it(*src);
String conv;
for (uint i=0; i < result->count; i++)
{
uint32 dummy;
uint length;
String *tmp= it++;
if (String::needs_conversion(tmp->length(), tmp->charset(),
cs, &dummy))
{
uint cnv_errs;
conv.copy(tmp->ptr(), tmp->length(), tmp->charset(), cs, &cnv_errs);
length= conv.length();
result->type_names[i]= (char*) strmake_root(mem_root, conv.ptr(),
length);
}
else
{
length= tmp->length();
result->type_names[i]= strmake_root(mem_root, tmp->ptr(), length);
}
// Strip trailing spaces.
length= cs->cset->lengthsp(cs, result->type_names[i], length);
result->type_lengths[i]= length;
((uchar *)result->type_names[i])[length]= '\0';
}
result->type_names[result->count]= 0;
result->type_lengths[result->count]= 0;
}
DBUG_RETURN(result);
}
int
sp_head::create(THD *thd)
{
DBUG_ENTER("sp_head::create");
DBUG_PRINT("info", ("type: %d name: %s params: %s body: %s",
m_type, m_name.str, m_params.str, m_body.str));
DBUG_RETURN(sp_create_routine(thd, m_type, this));
}
sp_head::~sp_head()
{
DBUG_ENTER("sp_head::~sp_head");
destroy();
delete m_next_cached_sp;
if (m_thd)
restore_thd_mem_root(m_thd);
DBUG_VOID_RETURN;
}
void
sp_head::destroy()
{
sp_instr *i;
LEX *lex;
DBUG_ENTER("sp_head::destroy");
DBUG_PRINT("info", ("name: %s", m_name.str));
for (uint ip = 0 ; (i = get_instr(ip)) ; ip++)
delete i;
delete_dynamic(&m_instr);
m_pcont->destroy();
free_items();
/*
If we have non-empty LEX stack then we just came out of parser with
error. Now we should delete all auxilary LEXes and restore original
THD::lex (In this case sp_head::restore_thd_mem_root() was not called
too, so m_thd points to the current thread context).
It is safe to not update LEX::ptr because further query string parsing
and execution will be stopped anyway.
*/
DBUG_ASSERT(m_lex.is_empty() || m_thd);
while ((lex= (LEX *)m_lex.pop()))
{
lex_end(m_thd->lex);
delete m_thd->lex;
m_thd->lex= lex;
}
hash_free(&m_sptabs);
hash_free(&m_sroutines);
DBUG_VOID_RETURN;
}
/*
This is only used for result fields from functions (both during
fix_length_and_dec() and evaluation).
*/
Field *
sp_head::create_result_field(uint field_max_length, const char *field_name,
TABLE *table)
{
uint field_length;
Field *field;
DBUG_ENTER("sp_head::create_result_field");
field_length= !m_return_field_def.length ?
field_max_length : m_return_field_def.length;
field= ::make_field(table->s, /* TABLE_SHARE ptr */
WL#3817: Simplify string / memory area types and make things more consistent (first part) The following type conversions was done: - Changed byte to uchar - Changed gptr to uchar* - Change my_string to char * - Change my_size_t to size_t - Change size_s to size_t Removed declaration of byte, gptr, my_string, my_size_t and size_s. Following function parameter changes was done: - All string functions in mysys/strings was changed to use size_t instead of uint for string lengths. - All read()/write() functions changed to use size_t (including vio). - All protocoll functions changed to use size_t instead of uint - Functions that used a pointer to a string length was changed to use size_t* - Changed malloc(), free() and related functions from using gptr to use void * as this requires fewer casts in the code and is more in line with how the standard functions work. - Added extra length argument to dirname_part() to return the length of the created string. - Changed (at least) following functions to take uchar* as argument: - db_dump() - my_net_write() - net_write_command() - net_store_data() - DBUG_DUMP() - decimal2bin() & bin2decimal() - Changed my_compress() and my_uncompress() to use size_t. Changed one argument to my_uncompress() from a pointer to a value as we only return one value (makes function easier to use). - Changed type of 'pack_data' argument to packfrm() to avoid casts. - Changed in readfrm() and writefrom(), ha_discover and handler::discover() the type for argument 'frmdata' to uchar** to avoid casts. - Changed most Field functions to use uchar* instead of char* (reduced a lot of casts). - Changed field->val_xxx(xxx, new_ptr) to take const pointers. Other changes: - Removed a lot of not needed casts - Added a few new cast required by other changes - Added some cast to my_multi_malloc() arguments for safety (as string lengths needs to be uint, not size_t). - Fixed all calls to hash-get-key functions to use size_t*. (Needed to be done explicitely as this conflict was often hided by casting the function to hash_get_key). - Changed some buffers to memory regions to uchar* to avoid casts. - Changed some string lengths from uint to size_t. - Changed field->ptr to be uchar* instead of char*. This allowed us to get rid of a lot of casts. - Some changes from true -> TRUE, false -> FALSE, unsigned char -> uchar - Include zlib.h in some files as we needed declaration of crc32() - Changed MY_FILE_ERROR to be (size_t) -1. - Changed many variables to hold the result of my_read() / my_write() to be size_t. This was needed to properly detect errors (which are returned as (size_t) -1). - Removed some very old VMS code - Changed packfrm()/unpackfrm() to not be depending on uint size (portability fix) - Removed windows specific code to restore cursor position as this causes slowdown on windows and we should not mix read() and pread() calls anyway as this is not thread safe. Updated function comment to reflect this. Changed function that depended on original behavior of my_pwrite() to itself restore the cursor position (one such case). - Added some missing checking of return value of malloc(). - Changed definition of MOD_PAD_CHAR_TO_FULL_LENGTH to avoid 'long' overflow. - Changed type of table_def::m_size from my_size_t to ulong to reflect that m_size is the number of elements in the array, not a string/memory length. - Moved THD::max_row_length() to table.cc (as it's not depending on THD). Inlined max_row_length_blob() into this function. - More function comments - Fixed some compiler warnings when compiled without partitions. - Removed setting of LEX_STRING() arguments in declaration (portability fix). - Some trivial indentation/variable name changes. - Some trivial code simplifications: - Replaced some calls to alloc_root + memcpy to use strmake_root()/strdup_root(). - Changed some calls from memdup() to strmake() (Safety fix) - Simpler loops in client-simple.c
2007-05-10 12:59:39 +03:00
(uchar*) 0, /* field ptr */
field_length, /* field [max] length */
(uchar*) "", /* null ptr */
0, /* null bit */
m_return_field_def.pack_flag,
m_return_field_def.sql_type,
m_return_field_def.charset,
m_return_field_def.geom_type,
Field::NONE, /* unreg check */
m_return_field_def.interval,
field_name ? field_name : (const char *) m_name.str);
2005-12-12 13:29:48 +03:00
if (field)
field->init(table);
DBUG_RETURN(field);
}
int cmp_splocal_locations(Item_splocal * const *a, Item_splocal * const *b)
{
return (int)((*a)->pos_in_query - (*b)->pos_in_query);
}
/*
StoredRoutinesBinlogging
This paragraph applies only to statement-based binlogging. Row-based
binlogging does not need anything special like this.
Top-down overview:
1. Statements
Statements that have is_update_query(stmt) == TRUE are written into the
binary log verbatim.
Examples:
UPDATE tbl SET tbl.x = spfunc_w_side_effects()
UPDATE tbl SET tbl.x=1 WHERE spfunc_w_side_effect_that_returns_false(tbl.y)
Statements that have is_update_query(stmt) == FALSE (e.g. SELECTs) are not
written into binary log. Instead we catch function calls the statement
makes and write it into binary log separately (see #3).
2. PROCEDURE calls
CALL statements are not written into binary log. Instead
* Any FUNCTION invocation (in SET, IF, WHILE, OPEN CURSOR and other SP
instructions) is written into binlog separately.
* Each statement executed in SP is binlogged separately, according to rules
in #1, with the exception that we modify query string: we replace uses
of SP local variables with NAME_CONST('spvar_name', <spvar-value>) calls.
This substitution is done in subst_spvars().
3. FUNCTION calls
In sp_head::execute_function(), we check
* If this function invocation is done from a statement that is written
into the binary log.
* If there were any attempts to write events to the binary log during
function execution (grep for start_union_events and stop_union_events)
If the answers are No and Yes, we write the function call into the binary
log as "SELECT spfunc(<param1value>, <param2value>, ...)"
4. Miscellaneous issues.
4.1 User variables.
When we call mysql_bin_log.write() for an SP statement, thd->user_var_events
must hold set<{var_name, value}> pairs for all user variables used during
the statement execution.
This set is produced by tracking user variable reads during statement
execution.
For SPs, this has the following implications:
1) thd->user_var_events may contain events from several SP statements and
needs to be valid after exection of these statements was finished. In
order to achieve that, we
* Allocate user_var_events array elements on appropriate mem_root (grep
for user_var_events_alloc).
* Use is_query_in_union() to determine if user_var_event is created.
2) We need to empty thd->user_var_events after we have wrote a function
call. This is currently done by making
reset_dynamic(&thd->user_var_events);
calls in several different places. (TODO cosider moving this into
mysql_bin_log.write() function)
4.2 Auto_increment storage in binlog
As we may write two statements to binlog from one single logical statement
(case of "SELECT func1(),func2()": it is binlogged as "SELECT func1()" and
then "SELECT func2()"), we need to reset auto_increment binlog variables
after each binlogged SELECT. Otherwise, the auto_increment value of the
first SELECT would be used for the second too.
*/
/*
Replace thd->query{_length} with a string that one can write to the binlog
or the query cache.
SYNOPSIS
subst_spvars()
thd Current thread.
instr Instruction (we look for Item_splocal instances in
instr->free_list)
query_str Original query string
DESCRIPTION
The binlog-suitable string is produced by replacing references to SP local
variables with NAME_CONST('sp_var_name', value) calls. To make this string
suitable for the query cache this function allocates some additional space
for the query cache flags.
RETURN
FALSE on success
thd->query{_length} either has been appropriately replaced or there
is no need for replacements.
TRUE out of memory error.
*/
static bool
subst_spvars(THD *thd, sp_instr *instr, LEX_STRING *query_str)
{
DBUG_ENTER("subst_spvars");
Dynamic_array<Item_splocal*> sp_vars_uses;
char *pbuf, *cur, buffer[512];
String qbuf(buffer, sizeof(buffer), &my_charset_bin);
int prev_pos, res, buf_len;
/* Find all instances of Item_splocal used in this statement */
for (Item *item= instr->free_list; item; item= item->next)
{
if (item->is_splocal())
{
Item_splocal *item_spl= (Item_splocal*)item;
if (item_spl->pos_in_query)
sp_vars_uses.append(item_spl);
}
}
if (!sp_vars_uses.elements())
DBUG_RETURN(FALSE);
/* Sort SP var refs by their occurences in the query */
sp_vars_uses.sort(cmp_splocal_locations);
/*
Construct a statement string where SP local var refs are replaced
with "NAME_CONST(name, value)"
*/
qbuf.length(0);
cur= query_str->str;
prev_pos= res= 0;
for (Item_splocal **splocal= sp_vars_uses.front();
splocal < sp_vars_uses.back(); splocal++)
{
Item *val;
char str_buffer[STRING_BUFFER_USUAL_SIZE];
String str_value_holder(str_buffer, sizeof(str_buffer),
&my_charset_latin1);
String *str_value;
/* append the text between sp ref occurences */
res|= qbuf.append(cur + prev_pos, (*splocal)->pos_in_query - prev_pos);
prev_pos= (*splocal)->pos_in_query + (*splocal)->len_in_query;
/* append the spvar substitute */
res|= qbuf.append(STRING_WITH_LEN(" NAME_CONST('"));
res|= qbuf.append((*splocal)->m_name.str, (*splocal)->m_name.length);
res|= qbuf.append(STRING_WITH_LEN("',"));
res|= (*splocal)->fix_fields(thd, (Item **) splocal);
if (res)
break;
val= (*splocal)->this_item();
DBUG_PRINT("info", ("print 0x%lx", (long) val));
str_value= sp_get_item_value(thd, val, &str_value_holder);
if (str_value)
res|= qbuf.append(*str_value);
else
res|= qbuf.append(STRING_WITH_LEN("NULL"));
res|= qbuf.append(')');
if (res)
break;
}
res|= qbuf.append(cur + prev_pos, query_str->length - prev_pos);
if (res)
DBUG_RETURN(TRUE);
/*
Allocate additional space at the end of the new query string for the
query_cache_send_result_to_client function.
*/
buf_len= qbuf.length() + thd->db_length + 1 + QUERY_CACHE_FLAGS_SIZE + 1;
2007-07-29 21:12:54 +05:00
if ((pbuf= (char *) alloc_root(thd->mem_root, buf_len)))
{
memcpy(pbuf, qbuf.ptr(), qbuf.length());
pbuf[qbuf.length()]= 0;
}
else
DBUG_RETURN(TRUE);
thd->query= pbuf;
thd->query_length= qbuf.length();
DBUG_RETURN(FALSE);
}
/*
Return appropriate error about recursion limit reaching
SYNOPSIS
sp_head::recursion_level_error()
thd Thread handle
NOTE
For functions and triggers we return error about prohibited recursion.
For stored procedures we return about reaching recursion limit.
*/
void sp_head::recursion_level_error(THD *thd)
{
if (m_type == TYPE_ENUM_PROCEDURE)
{
my_error(ER_SP_RECURSION_LIMIT, MYF(0),
thd->variables.max_sp_recursion_depth,
m_name.str);
}
else
my_error(ER_SP_NO_RECURSION, MYF(0));
}
/*
Execute the routine. The main instruction jump loop is there
Assume the parameters already set.
RETURN
FALSE on success
TRUE on error
*/
bool
sp_head::execute(THD *thd)
{
DBUG_ENTER("sp_head::execute");
char saved_cur_db_name_buf[NAME_LEN+1];
LEX_STRING saved_cur_db_name=
{ saved_cur_db_name_buf, sizeof(saved_cur_db_name_buf) };
bool cur_db_changed= FALSE;
sp_rcontext *ctx;
bool err_status= FALSE;
uint ip= 0;
ulong save_sql_mode;
bool save_abort_on_warning;
Query_arena *old_arena;
/* per-instruction arena */
MEM_ROOT execute_mem_root;
Query_arena execute_arena(&execute_mem_root, INITIALIZED_FOR_SP),
backup_arena;
query_id_t old_query_id;
TABLE *old_derived_tables;
LEX *old_lex;
Item_change_list old_change_list;
String old_packet;
Patch for the following bugs: - BUG#11986: Stored routines and triggers can fail if the code has a non-ascii symbol - BUG#16291: mysqldump corrupts string-constants with non-ascii-chars - BUG#19443: INFORMATION_SCHEMA does not support charsets properly - BUG#21249: Character set of SP-var can be ignored - BUG#25212: Character set of string constant is ignored (stored routines) - BUG#25221: Character set of string constant is ignored (triggers) There were a few general problems that caused these bugs: 1. Character set information of the original (definition) query for views, triggers, stored routines and events was lost. 2. mysqldump output query in client character set, which can be inappropriate to encode definition-query. 3. INFORMATION_SCHEMA used strings with mixed encodings to display object definition; 1. No query-definition-character set. In order to compile query into execution code, some extra data (such as environment variables or the database character set) is used. The problem here was that this context was not preserved. So, on the next load it can differ from the original one, thus the result will be different. The context contains the following data: - client character set; - connection collation (character set and collation); - collation of the owner database; The fix is to store this context and use it each time we parse (compile) and execute the object (stored routine, trigger, ...). 2. Wrong mysqldump-output. The original query can contain several encodings (by means of character set introducers). The problem here was that we tried to convert original query to the mysqldump-client character set. Moreover, we stored queries in different character sets for different objects (views, for one, used UTF8, triggers used original character set). The solution is - to store definition queries in the original character set; - to change SHOW CREATE statement to output definition query in the binary character set (i.e. without any conversion); - introduce SHOW CREATE TRIGGER statement; - to dump special statements to switch the context to the original one before dumping and restore it afterwards. Note, in order to preserve the database collation at the creation time, additional ALTER DATABASE might be used (to temporary switch the database collation back to the original value). In this case, ALTER DATABASE privilege will be required. This is a backward-incompatible change. 3. INFORMATION_SCHEMA showed non-UTF8 strings The fix is to generate UTF8-query during the parsing, store it in the object and show it in the INFORMATION_SCHEMA. Basically, the idea is to create a copy of the original query convert it to UTF8. Character set introducers are removed and all text literals are converted to UTF8. This UTF8 query is intended to provide user-readable output. It must not be used to recreate the object. Specialized SHOW CREATE statements should be used for this. The reason for this limitation is the following: the original query can contain symbols from several character sets (by means of character set introducers). Example: - original query: CREATE VIEW v1 AS SELECT _cp1251 'Hello' AS c1; - UTF8 query (for INFORMATION_SCHEMA): CREATE VIEW v1 AS SELECT 'Hello' AS c1;
2007-06-28 21:34:54 +04:00
Object_creation_ctx *saved_creation_ctx;
/* Use some extra margin for possible SP recursion and functions */
WL#3817: Simplify string / memory area types and make things more consistent (first part) The following type conversions was done: - Changed byte to uchar - Changed gptr to uchar* - Change my_string to char * - Change my_size_t to size_t - Change size_s to size_t Removed declaration of byte, gptr, my_string, my_size_t and size_s. Following function parameter changes was done: - All string functions in mysys/strings was changed to use size_t instead of uint for string lengths. - All read()/write() functions changed to use size_t (including vio). - All protocoll functions changed to use size_t instead of uint - Functions that used a pointer to a string length was changed to use size_t* - Changed malloc(), free() and related functions from using gptr to use void * as this requires fewer casts in the code and is more in line with how the standard functions work. - Added extra length argument to dirname_part() to return the length of the created string. - Changed (at least) following functions to take uchar* as argument: - db_dump() - my_net_write() - net_write_command() - net_store_data() - DBUG_DUMP() - decimal2bin() & bin2decimal() - Changed my_compress() and my_uncompress() to use size_t. Changed one argument to my_uncompress() from a pointer to a value as we only return one value (makes function easier to use). - Changed type of 'pack_data' argument to packfrm() to avoid casts. - Changed in readfrm() and writefrom(), ha_discover and handler::discover() the type for argument 'frmdata' to uchar** to avoid casts. - Changed most Field functions to use uchar* instead of char* (reduced a lot of casts). - Changed field->val_xxx(xxx, new_ptr) to take const pointers. Other changes: - Removed a lot of not needed casts - Added a few new cast required by other changes - Added some cast to my_multi_malloc() arguments for safety (as string lengths needs to be uint, not size_t). - Fixed all calls to hash-get-key functions to use size_t*. (Needed to be done explicitely as this conflict was often hided by casting the function to hash_get_key). - Changed some buffers to memory regions to uchar* to avoid casts. - Changed some string lengths from uint to size_t. - Changed field->ptr to be uchar* instead of char*. This allowed us to get rid of a lot of casts. - Some changes from true -> TRUE, false -> FALSE, unsigned char -> uchar - Include zlib.h in some files as we needed declaration of crc32() - Changed MY_FILE_ERROR to be (size_t) -1. - Changed many variables to hold the result of my_read() / my_write() to be size_t. This was needed to properly detect errors (which are returned as (size_t) -1). - Removed some very old VMS code - Changed packfrm()/unpackfrm() to not be depending on uint size (portability fix) - Removed windows specific code to restore cursor position as this causes slowdown on windows and we should not mix read() and pread() calls anyway as this is not thread safe. Updated function comment to reflect this. Changed function that depended on original behavior of my_pwrite() to itself restore the cursor position (one such case). - Added some missing checking of return value of malloc(). - Changed definition of MOD_PAD_CHAR_TO_FULL_LENGTH to avoid 'long' overflow. - Changed type of table_def::m_size from my_size_t to ulong to reflect that m_size is the number of elements in the array, not a string/memory length. - Moved THD::max_row_length() to table.cc (as it's not depending on THD). Inlined max_row_length_blob() into this function. - More function comments - Fixed some compiler warnings when compiled without partitions. - Removed setting of LEX_STRING() arguments in declaration (portability fix). - Some trivial indentation/variable name changes. - Some trivial code simplifications: - Replaced some calls to alloc_root + memcpy to use strmake_root()/strdup_root(). - Changed some calls from memdup() to strmake() (Safety fix) - Simpler loops in client-simple.c
2007-05-10 12:59:39 +03:00
if (check_stack_overrun(thd, 8 * STACK_MIN_SIZE, (uchar*)&old_packet))
DBUG_RETURN(TRUE);
/* init per-instruction memroot */
init_sql_alloc(&execute_mem_root, MEM_ROOT_BLOCK_SIZE, 0);
DBUG_ASSERT(!(m_flags & IS_INVOKED));
m_flags|= IS_INVOKED;
m_first_instance->m_first_free_instance= m_next_cached_sp;
if (m_next_cached_sp)
{
DBUG_PRINT("info",
("first free for 0x%lx ++: 0x%lx->0x%lx level: %lu flags %x",
(ulong)m_first_instance, (ulong) this,
(ulong) m_next_cached_sp,
m_next_cached_sp->m_recursion_level,
m_next_cached_sp->m_flags));
}
/*
Check that if there are not any instances after this one then
pointer to the last instance points on this instance or if there are
some instances after this one then recursion level of next instance
greater then recursion level of current instance on 1
*/
DBUG_ASSERT((m_next_cached_sp == 0 &&
m_first_instance->m_last_cached_sp == this) ||
(m_recursion_level + 1 == m_next_cached_sp->m_recursion_level));
2007-04-06 20:21:30 +04:00
/*
NOTE: The SQL Standard does not specify the context that should be
preserved for stored routines. However, at SAP/Walldorf meeting it was
decided that current database should be preserved.
*/
if (m_db.length &&
(err_status= mysql_opt_change_db(thd, &m_db, &saved_cur_db_name, FALSE,
&cur_db_changed)))
{
goto done;
}
if ((ctx= thd->spcont))
ctx->clear_handler();
thd->is_slave_error= 0;
old_arena= thd->stmt_arena;
/*
Switch query context. This has to be done early as this is sometimes
allocated trough sql_alloc
*/
saved_creation_ctx= m_creation_ctx->set_n_backup(thd);
/*
We have to save/restore this info when we are changing call level to
be able properly do close_thread_tables() in instructions.
*/
old_query_id= thd->query_id;
old_derived_tables= thd->derived_tables;
thd->derived_tables= 0;
save_sql_mode= thd->variables.sql_mode;
thd->variables.sql_mode= m_sql_mode;
save_abort_on_warning= thd->abort_on_warning;
thd->abort_on_warning= 0;
/*
It is also more efficient to save/restore current thd->lex once when
do it in each instruction
*/
old_lex= thd->lex;
/*
We should also save Item tree change list to avoid rollback something
too early in the calling query.
*/
old_change_list= thd->change_list;
thd->change_list.empty();
/*
Cursors will use thd->packet, so they may corrupt data which was prepared
for sending by upper level. OTOH cursors in the same routine can share this
buffer safely so let use use routine-local packet instead of having own
packet buffer for each cursor.
It is probably safe to use same thd->convert_buff everywhere.
*/
old_packet.swap(thd->packet);
/*
Switch to per-instruction arena here. We can do it since we cleanup
arena after every instruction.
*/
thd->set_n_backup_active_arena(&execute_arena, &backup_arena);
/*
Save callers arena in order to store instruction results and out
parameters in it later during sp_eval_func_item()
*/
thd->spcont->callers_arena= &backup_arena;
do
{
sp_instr *i;
uint hip; // Handler ip
i = get_instr(ip); // Returns NULL when we're done.
if (i == NULL)
break;
DBUG_PRINT("execute", ("Instruction %u", ip));
/* Don't change NOW() in FUNCTION or TRIGGER */
if (!thd->in_sub_stmt)
thd->set_time(); // Make current_time() et al work
/*
We have to set thd->stmt_arena before executing the instruction
to store in the instruction free_list all new items, created
during the first execution (for example expanding of '*' or the
items made during other permanent subquery transformations).
*/
thd->stmt_arena= i;
/*
Will write this SP statement into binlog separately
(TODO: consider changing the condition to "not inside event union")
*/
if (thd->prelocked_mode == NON_PRELOCKED)
thd->user_var_events_alloc= thd->mem_root;
err_status= i->execute(thd, &ip);
if (i->free_list)
cleanup_items(i->free_list);
/*
If we've set thd->user_var_events_alloc to mem_root of this SP
statement, clean all the events allocated in it.
*/
if (thd->prelocked_mode == NON_PRELOCKED)
{
reset_dynamic(&thd->user_var_events);
thd->user_var_events_alloc= NULL;//DEBUG
}
/* we should cleanup free_list and memroot, used by instruction */
thd->cleanup_after_query();
free_root(&execute_mem_root, MYF(0));
2005-08-11 15:58:15 +03:00
/*
Check if an exception has occurred and a handler has been found
Note: We have to check even if err_status == FALSE, since warnings (and
some errors) don't return a non-zero value. We also have to check even
if thd->killed != 0, since some errors return with this even when a
handler has been found (e.g. "bad data").
2005-08-11 15:58:15 +03:00
*/
if (ctx)
{
uint hf;
switch (ctx->found_handler(&hip, &hf)) {
case SP_HANDLER_NONE:
break;
case SP_HANDLER_CONTINUE:
thd->restore_active_arena(&execute_arena, &backup_arena);
thd->set_n_backup_active_arena(&execute_arena, &backup_arena);
ctx->push_hstack(i->get_cont_dest());
// Fall through
default:
ip= hip;
err_status= FALSE;
ctx->clear_handler();
ctx->enter_handler(hip);
thd->clear_error();
thd->is_fatal_error= 0;
thd->killed= THD::NOT_KILLED;
thd->mysys_var->abort= 0;
continue;
}
}
} while (!err_status && !thd->killed);
Patch for the following bugs: - BUG#11986: Stored routines and triggers can fail if the code has a non-ascii symbol - BUG#16291: mysqldump corrupts string-constants with non-ascii-chars - BUG#19443: INFORMATION_SCHEMA does not support charsets properly - BUG#21249: Character set of SP-var can be ignored - BUG#25212: Character set of string constant is ignored (stored routines) - BUG#25221: Character set of string constant is ignored (triggers) There were a few general problems that caused these bugs: 1. Character set information of the original (definition) query for views, triggers, stored routines and events was lost. 2. mysqldump output query in client character set, which can be inappropriate to encode definition-query. 3. INFORMATION_SCHEMA used strings with mixed encodings to display object definition; 1. No query-definition-character set. In order to compile query into execution code, some extra data (such as environment variables or the database character set) is used. The problem here was that this context was not preserved. So, on the next load it can differ from the original one, thus the result will be different. The context contains the following data: - client character set; - connection collation (character set and collation); - collation of the owner database; The fix is to store this context and use it each time we parse (compile) and execute the object (stored routine, trigger, ...). 2. Wrong mysqldump-output. The original query can contain several encodings (by means of character set introducers). The problem here was that we tried to convert original query to the mysqldump-client character set. Moreover, we stored queries in different character sets for different objects (views, for one, used UTF8, triggers used original character set). The solution is - to store definition queries in the original character set; - to change SHOW CREATE statement to output definition query in the binary character set (i.e. without any conversion); - introduce SHOW CREATE TRIGGER statement; - to dump special statements to switch the context to the original one before dumping and restore it afterwards. Note, in order to preserve the database collation at the creation time, additional ALTER DATABASE might be used (to temporary switch the database collation back to the original value). In this case, ALTER DATABASE privilege will be required. This is a backward-incompatible change. 3. INFORMATION_SCHEMA showed non-UTF8 strings The fix is to generate UTF8-query during the parsing, store it in the object and show it in the INFORMATION_SCHEMA. Basically, the idea is to create a copy of the original query convert it to UTF8. Character set introducers are removed and all text literals are converted to UTF8. This UTF8 query is intended to provide user-readable output. It must not be used to recreate the object. Specialized SHOW CREATE statements should be used for this. The reason for this limitation is the following: the original query can contain symbols from several character sets (by means of character set introducers). Example: - original query: CREATE VIEW v1 AS SELECT _cp1251 'Hello' AS c1; - UTF8 query (for INFORMATION_SCHEMA): CREATE VIEW v1 AS SELECT 'Hello' AS c1;
2007-06-28 21:34:54 +04:00
/* Restore query context. */
m_creation_ctx->restore_env(thd, saved_creation_ctx);
/* Restore arena. */
thd->restore_active_arena(&execute_arena, &backup_arena);
thd->spcont->pop_all_cursors(); // To avoid memory leaks after an error
/* Restore all saved */
old_packet.swap(thd->packet);
DBUG_ASSERT(thd->change_list.is_empty());
thd->change_list= old_change_list;
/* To avoid wiping out thd->change_list on old_change_list destruction */
old_change_list.empty();
thd->lex= old_lex;
thd->query_id= old_query_id;
DBUG_ASSERT(!thd->derived_tables);
thd->derived_tables= old_derived_tables;
thd->variables.sql_mode= save_sql_mode;
thd->abort_on_warning= save_abort_on_warning;
thd->stmt_arena= old_arena;
state= EXECUTED;
done:
DBUG_PRINT("info", ("err_status: %d killed: %d is_slave_error: %d report_error: %d",
err_status, thd->killed, thd->is_slave_error,
thd->is_error()));
if (thd->killed)
err_status= TRUE;
/*
If the DB has changed, the pointer has changed too, but the
original thd->db will then have been freed
*/
if (cur_db_changed && !thd->killed)
{
/*
Force switching back to the saved current database, because it may be
NULL. In this case, mysql_change_db() would generate an error.
*/
err_status|= mysql_change_db(thd, &saved_cur_db_name, TRUE);
}
m_flags&= ~IS_INVOKED;
DBUG_PRINT("info",
("first free for 0x%lx --: 0x%lx->0x%lx, level: %lu, flags %x",
(ulong) m_first_instance,
(ulong) m_first_instance->m_first_free_instance,
(ulong) this, m_recursion_level, m_flags));
/*
Check that we have one of following:
1) there are not free instances which means that this instance is last
in the list of instances (pointer to the last instance point on it and
ther are not other instances after this one in the list)
2) There are some free instances which mean that first free instance
should go just after this one and recursion level of that free instance
should be on 1 more then recursion level of this instance.
*/
DBUG_ASSERT((m_first_instance->m_first_free_instance == 0 &&
this == m_first_instance->m_last_cached_sp &&
m_next_cached_sp == 0) ||
(m_first_instance->m_first_free_instance != 0 &&
m_first_instance->m_first_free_instance == m_next_cached_sp &&
m_first_instance->m_first_free_instance->m_recursion_level ==
m_recursion_level + 1));
m_first_instance->m_first_free_instance= this;
DBUG_RETURN(err_status);
}
#ifndef NO_EMBEDDED_ACCESS_CHECKS
/*
set_routine_security_ctx() changes routine security context, and
checks if there is an EXECUTE privilege in new context. If there is
no EXECUTE privilege, it changes the context back and returns a
error.
SYNOPSIS
set_routine_security_ctx()
thd thread handle
sp stored routine to change the context for
is_proc TRUE is procedure, FALSE if function
save_ctx pointer to an old security context
RETURN
TRUE if there was a error, and the context wasn't changed.
FALSE if the context was changed.
*/
bool
set_routine_security_ctx(THD *thd, sp_head *sp, bool is_proc,
Security_context **save_ctx)
{
*save_ctx= 0;
if (sp->m_chistics->suid != SP_IS_NOT_SUID &&
sp->m_security_ctx.change_security_context(thd, &sp->m_definer_user,
&sp->m_definer_host,
&sp->m_db,
save_ctx))
return TRUE;
/*
If we changed context to run as another user, we need to check the
access right for the new context again as someone may have revoked
the right to use the procedure from this user.
TODO:
Cache if the definer has the right to use the object on the
first usage and only reset the cache if someone does a GRANT
statement that 'may' affect this.
*/
if (*save_ctx &&
check_routine_access(thd, EXECUTE_ACL,
sp->m_db.str, sp->m_name.str, is_proc, FALSE))
{
sp->m_security_ctx.restore_security_context(thd, *save_ctx);
*save_ctx= 0;
return TRUE;
}
return FALSE;
}
#endif // ! NO_EMBEDDED_ACCESS_CHECKS
/**
Execute trigger stored program.
Execute a trigger:
- changes security context for triggers;
- switch to new memroot;
- call sp_head::execute;
- restore old memroot;
- restores security context.
@param thd Thread context.
@param db_name Database name.
@param table_name Table name.
@param grant_info GRANT_INFO structure to be filled with information
about definer's privileges on subject table.
@return Error status.
@retval FALSE on success.
@retval TRUE on error.
*/
bool
sp_head::execute_trigger(THD *thd,
const LEX_STRING *db_name,
const LEX_STRING *table_name,
GRANT_INFO *grant_info)
{
sp_rcontext *octx = thd->spcont;
sp_rcontext *nctx = NULL;
bool err_status= FALSE;
MEM_ROOT call_mem_root;
Query_arena call_arena(&call_mem_root, Query_arena::INITIALIZED_FOR_SP);
Query_arena backup_arena;
DBUG_ENTER("sp_head::execute_trigger");
DBUG_PRINT("info", ("trigger %s", m_name.str));
#ifndef NO_EMBEDDED_ACCESS_CHECKS
Security_context *save_ctx= NULL;
if (m_chistics->suid != SP_IS_NOT_SUID &&
m_security_ctx.change_security_context(thd,
&m_definer_user,
&m_definer_host,
&m_db,
&save_ctx))
DBUG_RETURN(TRUE);
/*
Fetch information about table-level privileges for subject table into
GRANT_INFO instance. The access check itself will happen in
Item_trigger_field, where this information will be used along with
information about column-level privileges.
*/
fill_effective_table_privileges(thd,
grant_info,
db_name->str,
table_name->str);
/* Check that the definer has TRIGGER privilege on the subject table. */
if (!(grant_info->privilege & TRIGGER_ACL))
{
char priv_desc[128];
get_privilege_desc(priv_desc, sizeof(priv_desc), TRIGGER_ACL);
my_error(ER_TABLEACCESS_DENIED_ERROR, MYF(0), priv_desc,
thd->security_ctx->priv_user, thd->security_ctx->host_or_ip,
table_name->str);
m_security_ctx.restore_security_context(thd, save_ctx);
DBUG_RETURN(TRUE);
}
#endif // NO_EMBEDDED_ACCESS_CHECKS
/*
Prepare arena and memroot for objects which lifetime is whole
duration of trigger call (sp_rcontext, it's tables and items,
sp_cursor and Item_cache holders for case expressions). We can't
use caller's arena/memroot for those objects because in this case
some fixed amount of memory will be consumed for each trigger
invocation and so statements which involve lot of them will hog
memory.
TODO: we should create sp_rcontext once per command and reuse it
on subsequent executions of a trigger.
*/
init_sql_alloc(&call_mem_root, MEM_ROOT_BLOCK_SIZE, 0);
thd->set_n_backup_active_arena(&call_arena, &backup_arena);
if (!(nctx= new sp_rcontext(m_pcont, 0, octx)) ||
nctx->init(thd))
{
err_status= TRUE;
goto err_with_cleanup;
}
#ifndef DBUG_OFF
nctx->sp= this;
#endif
thd->spcont= nctx;
err_status= execute(thd);
err_with_cleanup:
thd->restore_active_arena(&call_arena, &backup_arena);
#ifndef NO_EMBEDDED_ACCESS_CHECKS
m_security_ctx.restore_security_context(thd, save_ctx);
#endif // NO_EMBEDDED_ACCESS_CHECKS
delete nctx;
call_arena.free_items();
free_root(&call_mem_root, MYF(0));
thd->spcont= octx;
if (thd->killed)
thd->send_kill_message();
DBUG_RETURN(err_status);
}
/*
Execute a function:
- evaluate parameters
- changes security context for SUID routines
- switch to new memroot
- call sp_head::execute
- restore old memroot
- evaluate the return value
- restores security context
SYNOPSIS
sp_head::execute_function()
thd Thread handle
argp Passed arguments (these are items from containing
statement?)
argcount Number of passed arguments. We need to check if this is
correct.
return_value_fld Save result here.
RETURN
FALSE on success
TRUE on error
*/
bool
sp_head::execute_function(THD *thd, Item **argp, uint argcount,
Field *return_value_fld)
{
ulonglong binlog_save_options;
bool need_binlog_call;
uint arg_no;
sp_rcontext *octx = thd->spcont;
sp_rcontext *nctx = NULL;
char buf[STRING_BUFFER_USUAL_SIZE];
String binlog_buf(buf, sizeof(buf), &my_charset_bin);
bool err_status= FALSE;
MEM_ROOT call_mem_root;
Query_arena call_arena(&call_mem_root, Query_arena::INITIALIZED_FOR_SP);
Query_arena backup_arena;
DBUG_ENTER("sp_head::execute_function");
DBUG_PRINT("info", ("function %s", m_name.str));
LINT_INIT(binlog_save_options);
/*
Check that the function is called with all specified arguments.
If it is not, use my_error() to report an error, or it will not terminate
the invoking query properly.
*/
if (argcount != m_pcont->context_var_count())
{
2005-08-11 15:58:15 +03:00
/*
Need to use my_error here, or it will not terminate the
2005-08-11 15:58:15 +03:00
invoking query properly.
*/
my_error(ER_SP_WRONG_NO_OF_ARGS, MYF(0),
"FUNCTION", m_qname.str, m_pcont->context_var_count(), argcount);
DBUG_RETURN(TRUE);
}
/*
Prepare arena and memroot for objects which lifetime is whole
duration of function call (sp_rcontext, it's tables and items,
sp_cursor and Item_cache holders for case expressions).
We can't use caller's arena/memroot for those objects because
in this case some fixed amount of memory will be consumed for
each function/trigger invocation and so statements which involve
lot of them will hog memory.
TODO: we should create sp_rcontext once per command and reuse
it on subsequent executions of a function/trigger.
*/
init_sql_alloc(&call_mem_root, MEM_ROOT_BLOCK_SIZE, 0);
thd->set_n_backup_active_arena(&call_arena, &backup_arena);
if (!(nctx= new sp_rcontext(m_pcont, return_value_fld, octx)) ||
nctx->init(thd))
{
thd->restore_active_arena(&call_arena, &backup_arena);
err_status= TRUE;
goto err_with_cleanup;
}
/*
We have to switch temporarily back to callers arena/memroot.
Function arguments belong to the caller and so the may reference
memory which they will allocate during calculation long after
this function call will be finished (e.g. in Item::cleanup()).
*/
thd->restore_active_arena(&call_arena, &backup_arena);
#ifndef DBUG_OFF
nctx->sp= this;
#endif
/* Pass arguments. */
for (arg_no= 0; arg_no < argcount; arg_no++)
{
/* Arguments must be fixed in Item_func_sp::fix_fields */
DBUG_ASSERT(argp[arg_no]->fixed);
if ((err_status= nctx->set_variable(thd, arg_no, &(argp[arg_no]))))
goto err_with_cleanup;
}
/*
If row-based binlogging, we don't need to binlog the function's call, let
each substatement be binlogged its way.
*/
need_binlog_call= mysql_bin_log.is_open() &&
WL#2977 and WL#2712 global and session-level variable to set the binlog format (row/statement), and new binlog format called "mixed" (which is statement-based except if only row-based is correct, in this cset it means if UDF or UUID is used; more cases could be added in later 5.1 release): SET GLOBAL|SESSION BINLOG_FORMAT=row|statement|mixed|default; the global default is statement unless cluster is enabled (then it's row) as in 5.1-alpha. It's not possible to use SET on this variable if a session is currently in row-based mode and has open temporary tables (because CREATE TEMPORARY TABLE was not binlogged so temp table is not known on slave), or if NDB is enabled (because NDB does not support such change on-the-fly, though it will later), of if in a stored function (see below). The added tests test the possibility or impossibility to SET, their effects, and the mixed mode, including in prepared statements and in stored procedures and functions. Caveats: a) The mixed mode will not work for stored functions: in mixed mode, a stored function will always be binlogged as one call and in a statement-based way (e.g. INSERT VALUES(myfunc()) or SELECT myfunc()). b) for the same reason, changing the thread's binlog format inside a stored function is refused with an error message. c) the same problems apply to triggers; implementing b) for triggers will be done later (will ask Dmitri). Additionally, as the binlog format is now changeable by each user for his session, I remove the implication which was done at startup, where row-based automatically set log-bin-trust-routine-creators to 1 (not possible anymore as a user can now switch to stmt-based and do nasty things again), and automatically set --innodb-locks-unsafe-for-binlog to 1 (was anyway theoretically incorrect as it disabled phantom protection). Plus fixes for compiler warnings.
2006-02-25 22:21:03 +01:00
(thd->options & OPTION_BIN_LOG) && !thd->current_stmt_binlog_row_based;
/*
Remember the original arguments for unrolled replication of functions
before they are changed by execution.
*/
if (need_binlog_call)
{
binlog_buf.length(0);
binlog_buf.append(STRING_WITH_LEN("SELECT "));
append_identifier(thd, &binlog_buf, m_db.str, m_db.length);
binlog_buf.append('.');
append_identifier(thd, &binlog_buf, m_name.str, m_name.length);
binlog_buf.append('(');
for (arg_no= 0; arg_no < argcount; arg_no++)
{
String str_value_holder;
String *str_value;
if (arg_no)
binlog_buf.append(',');
str_value= sp_get_item_value(thd, nctx->get_item(arg_no),
&str_value_holder);
if (str_value)
binlog_buf.append(*str_value);
else
binlog_buf.append(STRING_WITH_LEN("NULL"));
}
binlog_buf.append(')');
}
thd->spcont= nctx;
#ifndef NO_EMBEDDED_ACCESS_CHECKS
Security_context *save_security_ctx;
if (set_routine_security_ctx(thd, this, FALSE, &save_security_ctx))
{
err_status= TRUE;
goto err_with_cleanup;
}
#endif
if (need_binlog_call)
{
2007-02-26 14:06:10 -05:00
query_id_t q;
reset_dynamic(&thd->user_var_events);
2007-02-26 14:06:10 -05:00
/*
In case of artificially constructed events for function calls
we have separate union for each such event and hence can't use
query_id of real calling statement as the start of all these
unions (this will break logic of replication of user-defined
variables). So we use artifical value which is guaranteed to
be greater than all query_id's of all statements belonging
to previous events/unions.
Possible alternative to this is logging of all function invocations
as one select and not resetting THD::user_var_events before
each invocation.
*/
VOID(pthread_mutex_lock(&LOCK_thread_count));
BUG#20141 "User-defined variables are not replicated properly for SF/Triggers in SBR mode." BUG#14914 "SP: Uses of session variables in routines are not always replicated" BUG#25167 "Dupl. usage of user-variables in trigger/function is not replicated correctly" This patch corrects a minor error in the previous patch for BUG#20141. This patch corrects an errant code change to sp_head.cc. The comments for the first patch follow: User-defined variables used inside of stored functions/triggers in statements which did not update tables directly were not replicated. We also had problems with replication of user-defined variables which were used in triggers (or stored functions called from table-updating statements) more than once. This patch addresses the first issue by enabling logging of all references to user-defined variables in triggers/stored functions and not only references from table-updating statements. The second issue stemmed from the fact that for user-defined variables used from triggers or stored functions called from table-updating statements we were writing binlog events for each reference instead of only one event for the first reference. This problem is already solved for stored functions called from non-updating statements with help of "event unioning" mechanism. So the patch simply extends this mechanism to the case affected. It also fixes small problem in this mechanism which caused wrong logging of references to user-variables in cases when non-updating statement called several stored functions which used the same variable and some of these function calls were omitted from binlog as they were not updating any tables.
2007-02-27 11:36:17 -05:00
q= global_query_id;
2007-02-26 14:06:10 -05:00
VOID(pthread_mutex_unlock(&LOCK_thread_count));
mysql_bin_log.start_union_events(thd, q + 1);
binlog_save_options= thd->options;
thd->options&= ~OPTION_BIN_LOG;
}
/*
Switch to call arena/mem_root so objects like sp_cursor or
Item_cache holders for case expressions can be allocated on it.
TODO: In future we should associate call arena/mem_root with
sp_rcontext and allocate all these objects (and sp_rcontext
itself) on it directly rather than juggle with arenas.
*/
thd->set_n_backup_active_arena(&call_arena, &backup_arena);
err_status= execute(thd);
thd->restore_active_arena(&call_arena, &backup_arena);
if (need_binlog_call)
{
mysql_bin_log.stop_union_events(thd);
thd->options= binlog_save_options;
if (thd->binlog_evt_union.unioned_events)
{
Query_log_event qinfo(thd, binlog_buf.ptr(), binlog_buf.length(),
thd->binlog_evt_union.unioned_events_trans, FALSE);
if (mysql_bin_log.write(&qinfo) &&
thd->binlog_evt_union.unioned_events_trans)
{
push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
"Invoked ROUTINE modified a transactional table but MySQL "
"failed to reflect this change in the binary log");
}
reset_dynamic(&thd->user_var_events);
/* Forget those values, in case more function calls are binlogged: */
thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt= 0;
thd->auto_inc_intervals_in_cur_stmt_for_binlog.empty();
}
}
if (!err_status)
{
/* We need result only in function but not in trigger */
if (!nctx->is_return_value_set())
{
my_error(ER_SP_NORETURNEND, MYF(0), m_name.str);
err_status= TRUE;
}
}
#ifndef NO_EMBEDDED_ACCESS_CHECKS
m_security_ctx.restore_security_context(thd, save_security_ctx);
#endif
err_with_cleanup:
delete nctx;
call_arena.free_items();
free_root(&call_mem_root, MYF(0));
thd->spcont= octx;
DBUG_RETURN(err_status);
}
/*
Execute a procedure.
SYNOPSIS
sp_head::execute_procedure()
thd Thread handle
args List of values passed as arguments.
DESCRIPTION
The function does the following steps:
- Set all parameters
- changes security context for SUID routines
- call sp_head::execute
- copy back values of INOUT and OUT parameters
- restores security context
RETURN
FALSE on success
TRUE on error
*/
bool
sp_head::execute_procedure(THD *thd, List<Item> *args)
{
bool err_status= FALSE;
uint params = m_pcont->context_var_count();
sp_rcontext *save_spcont, *octx;
sp_rcontext *nctx = NULL;
2006-03-01 03:49:31 +01:00
bool save_enable_slow_log= false;
bool save_log_general= false;
DBUG_ENTER("sp_head::execute_procedure");
DBUG_PRINT("info", ("procedure %s", m_name.str));
if (args->elements != params)
{
my_error(ER_SP_WRONG_NO_OF_ARGS, MYF(0), "PROCEDURE",
m_qname.str, params, args->elements);
DBUG_RETURN(TRUE);
}
save_spcont= octx= thd->spcont;
if (! octx)
{ // Create a temporary old context
if (!(octx= new sp_rcontext(m_pcont, NULL, octx)) ||
octx->init(thd))
{
delete octx; /* Delete octx if it was init() that failed. */
DBUG_RETURN(TRUE);
}
#ifndef DBUG_OFF
octx->sp= 0;
#endif
thd->spcont= octx;
/* set callers_arena to thd, for upper-level function to work */
thd->spcont->callers_arena= thd;
}
if (!(nctx= new sp_rcontext(m_pcont, NULL, octx)) ||
nctx->init(thd))
{
delete nctx; /* Delete nctx if it was init() that failed. */
thd->spcont= save_spcont;
DBUG_RETURN(TRUE);
}
#ifndef DBUG_OFF
nctx->sp= this;
#endif
if (params > 0)
{
List_iterator<Item> it_args(*args);
WL#3817: Simplify string / memory area types and make things more consistent (first part) The following type conversions was done: - Changed byte to uchar - Changed gptr to uchar* - Change my_string to char * - Change my_size_t to size_t - Change size_s to size_t Removed declaration of byte, gptr, my_string, my_size_t and size_s. Following function parameter changes was done: - All string functions in mysys/strings was changed to use size_t instead of uint for string lengths. - All read()/write() functions changed to use size_t (including vio). - All protocoll functions changed to use size_t instead of uint - Functions that used a pointer to a string length was changed to use size_t* - Changed malloc(), free() and related functions from using gptr to use void * as this requires fewer casts in the code and is more in line with how the standard functions work. - Added extra length argument to dirname_part() to return the length of the created string. - Changed (at least) following functions to take uchar* as argument: - db_dump() - my_net_write() - net_write_command() - net_store_data() - DBUG_DUMP() - decimal2bin() & bin2decimal() - Changed my_compress() and my_uncompress() to use size_t. Changed one argument to my_uncompress() from a pointer to a value as we only return one value (makes function easier to use). - Changed type of 'pack_data' argument to packfrm() to avoid casts. - Changed in readfrm() and writefrom(), ha_discover and handler::discover() the type for argument 'frmdata' to uchar** to avoid casts. - Changed most Field functions to use uchar* instead of char* (reduced a lot of casts). - Changed field->val_xxx(xxx, new_ptr) to take const pointers. Other changes: - Removed a lot of not needed casts - Added a few new cast required by other changes - Added some cast to my_multi_malloc() arguments for safety (as string lengths needs to be uint, not size_t). - Fixed all calls to hash-get-key functions to use size_t*. (Needed to be done explicitely as this conflict was often hided by casting the function to hash_get_key). - Changed some buffers to memory regions to uchar* to avoid casts. - Changed some string lengths from uint to size_t. - Changed field->ptr to be uchar* instead of char*. This allowed us to get rid of a lot of casts. - Some changes from true -> TRUE, false -> FALSE, unsigned char -> uchar - Include zlib.h in some files as we needed declaration of crc32() - Changed MY_FILE_ERROR to be (size_t) -1. - Changed many variables to hold the result of my_read() / my_write() to be size_t. This was needed to properly detect errors (which are returned as (size_t) -1). - Removed some very old VMS code - Changed packfrm()/unpackfrm() to not be depending on uint size (portability fix) - Removed windows specific code to restore cursor position as this causes slowdown on windows and we should not mix read() and pread() calls anyway as this is not thread safe. Updated function comment to reflect this. Changed function that depended on original behavior of my_pwrite() to itself restore the cursor position (one such case). - Added some missing checking of return value of malloc(). - Changed definition of MOD_PAD_CHAR_TO_FULL_LENGTH to avoid 'long' overflow. - Changed type of table_def::m_size from my_size_t to ulong to reflect that m_size is the number of elements in the array, not a string/memory length. - Moved THD::max_row_length() to table.cc (as it's not depending on THD). Inlined max_row_length_blob() into this function. - More function comments - Fixed some compiler warnings when compiled without partitions. - Removed setting of LEX_STRING() arguments in declaration (portability fix). - Some trivial indentation/variable name changes. - Some trivial code simplifications: - Replaced some calls to alloc_root + memcpy to use strmake_root()/strdup_root(). - Changed some calls from memdup() to strmake() (Safety fix) - Simpler loops in client-simple.c
2007-05-10 12:59:39 +03:00
DBUG_PRINT("info",(" %.*s: eval args", (int) m_name.length, m_name.str));
for (uint i= 0 ; i < params ; i++)
{
Item *arg_item= it_args++;
if (!arg_item)
break;
sp_variable_t *spvar= m_pcont->find_variable(i);
if (!spvar)
continue;
if (spvar->mode != sp_param_in)
{
Settable_routine_parameter *srp=
arg_item->get_settable_routine_parameter();
if (!srp)
{
my_error(ER_SP_NOT_VAR_ARG, MYF(0), i+1, m_qname.str);
err_status= TRUE;
break;
}
srp->set_required_privilege(spvar->mode == sp_param_inout);
}
if (spvar->mode == sp_param_out)
{
Item_null *null_item= new Item_null();
if (!null_item ||
nctx->set_variable(thd, i, (Item **)&null_item))
{
err_status= TRUE;
break;
}
}
else
{
if (nctx->set_variable(thd, i, it_args.ref()))
{
err_status= TRUE;
break;
}
}
}
/*
Okay, got values for all arguments. Close tables that might be used by
arguments evaluation. If arguments evaluation required prelocking mode,
we'll leave it here.
*/
if (!thd->in_sub_stmt)
close_thread_tables(thd);
WL#3817: Simplify string / memory area types and make things more consistent (first part) The following type conversions was done: - Changed byte to uchar - Changed gptr to uchar* - Change my_string to char * - Change my_size_t to size_t - Change size_s to size_t Removed declaration of byte, gptr, my_string, my_size_t and size_s. Following function parameter changes was done: - All string functions in mysys/strings was changed to use size_t instead of uint for string lengths. - All read()/write() functions changed to use size_t (including vio). - All protocoll functions changed to use size_t instead of uint - Functions that used a pointer to a string length was changed to use size_t* - Changed malloc(), free() and related functions from using gptr to use void * as this requires fewer casts in the code and is more in line with how the standard functions work. - Added extra length argument to dirname_part() to return the length of the created string. - Changed (at least) following functions to take uchar* as argument: - db_dump() - my_net_write() - net_write_command() - net_store_data() - DBUG_DUMP() - decimal2bin() & bin2decimal() - Changed my_compress() and my_uncompress() to use size_t. Changed one argument to my_uncompress() from a pointer to a value as we only return one value (makes function easier to use). - Changed type of 'pack_data' argument to packfrm() to avoid casts. - Changed in readfrm() and writefrom(), ha_discover and handler::discover() the type for argument 'frmdata' to uchar** to avoid casts. - Changed most Field functions to use uchar* instead of char* (reduced a lot of casts). - Changed field->val_xxx(xxx, new_ptr) to take const pointers. Other changes: - Removed a lot of not needed casts - Added a few new cast required by other changes - Added some cast to my_multi_malloc() arguments for safety (as string lengths needs to be uint, not size_t). - Fixed all calls to hash-get-key functions to use size_t*. (Needed to be done explicitely as this conflict was often hided by casting the function to hash_get_key). - Changed some buffers to memory regions to uchar* to avoid casts. - Changed some string lengths from uint to size_t. - Changed field->ptr to be uchar* instead of char*. This allowed us to get rid of a lot of casts. - Some changes from true -> TRUE, false -> FALSE, unsigned char -> uchar - Include zlib.h in some files as we needed declaration of crc32() - Changed MY_FILE_ERROR to be (size_t) -1. - Changed many variables to hold the result of my_read() / my_write() to be size_t. This was needed to properly detect errors (which are returned as (size_t) -1). - Removed some very old VMS code - Changed packfrm()/unpackfrm() to not be depending on uint size (portability fix) - Removed windows specific code to restore cursor position as this causes slowdown on windows and we should not mix read() and pread() calls anyway as this is not thread safe. Updated function comment to reflect this. Changed function that depended on original behavior of my_pwrite() to itself restore the cursor position (one such case). - Added some missing checking of return value of malloc(). - Changed definition of MOD_PAD_CHAR_TO_FULL_LENGTH to avoid 'long' overflow. - Changed type of table_def::m_size from my_size_t to ulong to reflect that m_size is the number of elements in the array, not a string/memory length. - Moved THD::max_row_length() to table.cc (as it's not depending on THD). Inlined max_row_length_blob() into this function. - More function comments - Fixed some compiler warnings when compiled without partitions. - Removed setting of LEX_STRING() arguments in declaration (portability fix). - Some trivial indentation/variable name changes. - Some trivial code simplifications: - Replaced some calls to alloc_root + memcpy to use strmake_root()/strdup_root(). - Changed some calls from memdup() to strmake() (Safety fix) - Simpler loops in client-simple.c
2007-05-10 12:59:39 +03:00
DBUG_PRINT("info",(" %.*s: eval args done",
(int) m_name.length, m_name.str));
}
if (!(m_flags & LOG_SLOW_STATEMENTS) && thd->enable_slow_log)
{
DBUG_PRINT("info", ("Disabling slow log for the execution"));
save_enable_slow_log= true;
thd->enable_slow_log= FALSE;
}
if (!(m_flags & LOG_GENERAL_LOG) && !(thd->options & OPTION_LOG_OFF))
{
DBUG_PRINT("info", ("Disabling general log for the execution"));
save_log_general= true;
/* disable this bit */
thd->options |= OPTION_LOG_OFF;
}
thd->spcont= nctx;
#ifndef NO_EMBEDDED_ACCESS_CHECKS
Security_context *save_security_ctx= 0;
if (!err_status)
err_status= set_routine_security_ctx(thd, this, TRUE, &save_security_ctx);
#endif
if (!err_status)
err_status= execute(thd);
if (save_log_general)
thd->options &= ~OPTION_LOG_OFF;
if (save_enable_slow_log)
thd->enable_slow_log= true;
/*
In the case when we weren't able to employ reuse mechanism for
OUT/INOUT paranmeters, we should reallocate memory. This
allocation should be done on the arena which will live through
all execution of calling routine.
*/
thd->spcont->callers_arena= octx->callers_arena;
if (!err_status && params > 0)
{
List_iterator<Item> it_args(*args);
2005-08-11 15:58:15 +03:00
/*
Copy back all OUT or INOUT values to the previous frame, or
set global user variables
*/
for (uint i= 0 ; i < params ; i++)
{
Item *arg_item= it_args++;
if (!arg_item)
break;
sp_variable_t *spvar= m_pcont->find_variable(i);
if (spvar->mode == sp_param_in)
continue;
Settable_routine_parameter *srp=
arg_item->get_settable_routine_parameter();
DBUG_ASSERT(srp);
if (srp->set_value(thd, octx, nctx->get_item_addr(i)))
{
err_status= TRUE;
break;
}
}
}
#ifndef NO_EMBEDDED_ACCESS_CHECKS
if (save_security_ctx)
m_security_ctx.restore_security_context(thd, save_security_ctx);
#endif
if (!save_spcont)
delete octx;
delete nctx;
thd->spcont= save_spcont;
DBUG_RETURN(err_status);
}
/**
@brief Reset lex during parsing, before we parse a sub statement.
@param thd Thread handler.
@return Error state
@retval true An error occurred.
@retval false Success.
*/
bool
sp_head::reset_lex(THD *thd)
{
2003-05-23 15:32:31 +02:00
DBUG_ENTER("sp_head::reset_lex");
LEX *sublex;
LEX *oldlex= thd->lex;
2003-05-23 15:32:31 +02:00
sublex= new (thd->mem_root)st_lex_local;
if (sublex == 0)
DBUG_RETURN(TRUE);
thd->lex= sublex;
(void)m_lex.push_front(oldlex);
2004-07-22 00:26:33 +02:00
/* Reset most stuff. */
lex_start(thd);
2004-07-22 00:26:33 +02:00
/* And keep the SP stuff too */
sublex->sphead= oldlex->sphead;
sublex->spcont= oldlex->spcont;
/* And trigger related stuff too */
sublex->trg_chistics= oldlex->trg_chistics;
sublex->trg_table_fields.empty();
sublex->sp_lex_in_use= FALSE;
/* Reset type info. */
sublex->charset= NULL;
sublex->length= NULL;
sublex->dec= NULL;
sublex->interval_list.empty();
sublex->type= 0;
DBUG_RETURN(FALSE);
}
// Restore lex during parsing, after we have parsed a sub statement.
void
sp_head::restore_lex(THD *thd)
{
2003-05-23 15:32:31 +02:00
DBUG_ENTER("sp_head::restore_lex");
LEX *sublex= thd->lex;
LEX *oldlex;
sublex->set_trg_event_type_for_tables();
oldlex= (LEX *)m_lex.pop();
if (! oldlex)
return; // Nothing to restore
2003-05-23 15:32:31 +02:00
oldlex->trg_table_fields.push_back(&sublex->trg_table_fields);
* Mixed replication mode * : 1) Fix for BUG#19630 "stored function inserting into two auto_increment breaks statement-based binlog": a stored function inserting into two such tables may fail to replicate (inserting wrong data in the slave's copy of the second table) if the slave's second table had an internal auto_increment counter different from master's. Because the auto_increment value autogenerated by master for the 2nd table does not go into binlog, only the first does, so the slave lacks information. To fix this, if running in mixed binlogging mode, if the stored function or trigger plans to update two different tables both having auto_increment columns, we switch to row-based for the whole function. We don't have a simple solution for statement-based binlogging mode, there the bug remains and will be documented as a known problem. Re-enabling rpl_switch_stm_row_mixed. 2) Fix for BUG#20630 "Mixed binlogging mode does not work with stored functions, triggers, views", which was a documented limitation (in mixed mode, we didn't detect that a stored function's execution needed row-based binlogging (due to some UUID() call for example); same for triggers, same for views (a view created from a SELECT UUID(), and doing INSERT INTO sometable SELECT theview; would not replicate row-based). This is implemented by, after parsing a routine's body, remembering in sp_head that this routine needs row-based binlogging. Then when this routine is used, the caller is marked to require row-based binlogging too. Same for views: when we parse a view and detect that its SELECT needs row-based binary logging, we mark the calling LEX as such. 3) Fix for BUG#20499 "mixed mode with temporary table breaks binlog": a temporary table containing e.g. UUID has its changes not binlogged, so any query updating a permanent table with data from the temporary table will run wrongly on slave. Solution: in mixed mode we don't switch back from row-based to statement-based when there exists temporary tables. 4) Attempt to test mysqlbinlog on a binlog generated by mysqlbinlog; impossible due to BUG#11312 and BUG#20329, but test is in place for when they are fixed.
2006-07-09 17:00:47 +02:00
/*
If this substatement needs row-based, the entire routine does too (we
cannot switch from statement-based to row-based only for this
substatement).
*/
if (sublex->is_stmt_unsafe())
* Mixed replication mode * : 1) Fix for BUG#19630 "stored function inserting into two auto_increment breaks statement-based binlog": a stored function inserting into two such tables may fail to replicate (inserting wrong data in the slave's copy of the second table) if the slave's second table had an internal auto_increment counter different from master's. Because the auto_increment value autogenerated by master for the 2nd table does not go into binlog, only the first does, so the slave lacks information. To fix this, if running in mixed binlogging mode, if the stored function or trigger plans to update two different tables both having auto_increment columns, we switch to row-based for the whole function. We don't have a simple solution for statement-based binlogging mode, there the bug remains and will be documented as a known problem. Re-enabling rpl_switch_stm_row_mixed. 2) Fix for BUG#20630 "Mixed binlogging mode does not work with stored functions, triggers, views", which was a documented limitation (in mixed mode, we didn't detect that a stored function's execution needed row-based binlogging (due to some UUID() call for example); same for triggers, same for views (a view created from a SELECT UUID(), and doing INSERT INTO sometable SELECT theview; would not replicate row-based). This is implemented by, after parsing a routine's body, remembering in sp_head that this routine needs row-based binlogging. Then when this routine is used, the caller is marked to require row-based binlogging too. Same for views: when we parse a view and detect that its SELECT needs row-based binary logging, we mark the calling LEX as such. 3) Fix for BUG#20499 "mixed mode with temporary table breaks binlog": a temporary table containing e.g. UUID has its changes not binlogged, so any query updating a permanent table with data from the temporary table will run wrongly on slave. Solution: in mixed mode we don't switch back from row-based to statement-based when there exists temporary tables. 4) Attempt to test mysqlbinlog on a binlog generated by mysqlbinlog; impossible due to BUG#11312 and BUG#20329, but test is in place for when they are fixed.
2006-07-09 17:00:47 +02:00
m_flags|= BINLOG_ROW_BASED_IF_MIXED;
/*
Add routines which are used by statement to respective set for
this routine.
*/
sp_update_sp_used_routines(&m_sroutines, &sublex->sroutines);
/*
Merge tables used by this statement (but not by its functions or
procedures) to multiset of tables used by this routine.
*/
merge_table_list(thd, sublex->query_tables, sublex);
if (! sublex->sp_lex_in_use)
{
lex_end(sublex);
delete sublex;
}
thd->lex= oldlex;
2003-05-23 15:32:31 +02:00
DBUG_VOID_RETURN;
}
void
sp_head::push_backpatch(sp_instr *i, sp_label_t *lab)
{
bp_t *bp= (bp_t *)sql_alloc(sizeof(bp_t));
if (bp)
{
bp->lab= lab;
bp->instr= i;
(void)m_backpatch.push_front(bp);
}
}
void
sp_head::backpatch(sp_label_t *lab)
{
bp_t *bp;
uint dest= instructions();
List_iterator_fast<bp_t> li(m_backpatch);
DBUG_ENTER("sp_head::backpatch");
while ((bp= li++))
{
if (bp->lab == lab)
{
DBUG_PRINT("info", ("backpatch: (m_ip %d, label 0x%lx <%s>) to dest %d",
bp->instr->m_ip, (ulong) lab, lab->name, dest));
bp->instr->backpatch(dest, lab->ctx);
}
}
DBUG_VOID_RETURN;
}
/*
Prepare an instance of Create_field for field creation (fill all necessary
attributes).
SYNOPSIS
sp_head::fill_field_definition()
thd [IN] Thread handle
lex [IN] Yacc parsing context
field_type [IN] Field type
field_def [OUT] An instance of Create_field to be filled
RETURN
FALSE on success
TRUE on error
*/
bool
sp_head::fill_field_definition(THD *thd, LEX *lex,
enum enum_field_types field_type,
Create_field *field_def)
{
LEX_STRING cmt = { 0, 0 };
uint unused1= 0;
int unused2= 0;
if (field_def->init(thd, (char*) "", field_type, lex->length, lex->dec,
lex->type, (Item*) 0, (Item*) 0, &cmt, 0,
&lex->interval_list,
Patch for the following bugs: - BUG#11986: Stored routines and triggers can fail if the code has a non-ascii symbol - BUG#16291: mysqldump corrupts string-constants with non-ascii-chars - BUG#19443: INFORMATION_SCHEMA does not support charsets properly - BUG#21249: Character set of SP-var can be ignored - BUG#25212: Character set of string constant is ignored (stored routines) - BUG#25221: Character set of string constant is ignored (triggers) There were a few general problems that caused these bugs: 1. Character set information of the original (definition) query for views, triggers, stored routines and events was lost. 2. mysqldump output query in client character set, which can be inappropriate to encode definition-query. 3. INFORMATION_SCHEMA used strings with mixed encodings to display object definition; 1. No query-definition-character set. In order to compile query into execution code, some extra data (such as environment variables or the database character set) is used. The problem here was that this context was not preserved. So, on the next load it can differ from the original one, thus the result will be different. The context contains the following data: - client character set; - connection collation (character set and collation); - collation of the owner database; The fix is to store this context and use it each time we parse (compile) and execute the object (stored routine, trigger, ...). 2. Wrong mysqldump-output. The original query can contain several encodings (by means of character set introducers). The problem here was that we tried to convert original query to the mysqldump-client character set. Moreover, we stored queries in different character sets for different objects (views, for one, used UTF8, triggers used original character set). The solution is - to store definition queries in the original character set; - to change SHOW CREATE statement to output definition query in the binary character set (i.e. without any conversion); - introduce SHOW CREATE TRIGGER statement; - to dump special statements to switch the context to the original one before dumping and restore it afterwards. Note, in order to preserve the database collation at the creation time, additional ALTER DATABASE might be used (to temporary switch the database collation back to the original value). In this case, ALTER DATABASE privilege will be required. This is a backward-incompatible change. 3. INFORMATION_SCHEMA showed non-UTF8 strings The fix is to generate UTF8-query during the parsing, store it in the object and show it in the INFORMATION_SCHEMA. Basically, the idea is to create a copy of the original query convert it to UTF8. Character set introducers are removed and all text literals are converted to UTF8. This UTF8 query is intended to provide user-readable output. It must not be used to recreate the object. Specialized SHOW CREATE statements should be used for this. The reason for this limitation is the following: the original query can contain symbols from several character sets (by means of character set introducers). Example: - original query: CREATE VIEW v1 AS SELECT _cp1251 'Hello' AS c1; - UTF8 query (for INFORMATION_SCHEMA): CREATE VIEW v1 AS SELECT 'Hello' AS c1;
2007-06-28 21:34:54 +04:00
lex->charset ? lex->charset :
thd->variables.collation_database,
lex->uint_geom_type))
return TRUE;
if (field_def->interval_list.elements)
field_def->interval= create_typelib(mem_root, field_def,
&field_def->interval_list);
sp_prepare_create_field(thd, field_def);
if (prepare_create_field(field_def, &unused1, &unused2, &unused2,
HA_CAN_GEOMETRY))
{
return TRUE;
}
return FALSE;
}
void
sp_head::new_cont_backpatch(sp_instr_opt_meta *i)
{
m_cont_level+= 1;
if (i)
{
/* Use the cont. destination slot to store the level */
i->m_cont_dest= m_cont_level;
(void)m_cont_backpatch.push_front(i);
}
}
void
sp_head::add_cont_backpatch(sp_instr_opt_meta *i)
{
i->m_cont_dest= m_cont_level;
(void)m_cont_backpatch.push_front(i);
}
void
sp_head::do_cont_backpatch()
{
uint dest= instructions();
uint lev= m_cont_level--;
sp_instr_opt_meta *i;
while ((i= m_cont_backpatch.head()) && i->m_cont_dest == lev)
{
i->m_cont_dest= dest;
(void)m_cont_backpatch.pop();
}
}
void
sp_head::set_info(longlong created, longlong modified,
st_sp_chistics *chistics, ulong sql_mode)
{
m_created= created;
m_modified= modified;
m_chistics= (st_sp_chistics *) memdup_root(mem_root, (char*) chistics,
sizeof(*chistics));
if (m_chistics->comment.length == 0)
m_chistics->comment.str= 0;
else
m_chistics->comment.str= strmake_root(mem_root,
m_chistics->comment.str,
m_chistics->comment.length);
m_sql_mode= sql_mode;
}
void
sp_head::set_definer(const char *definer, uint definerlen)
{
2006-09-27 19:21:29 +05:00
char user_name_holder[USERNAME_LENGTH + 1];
2006-09-28 18:00:44 +05:00
LEX_STRING user_name= { user_name_holder, USERNAME_LENGTH };
char host_name_holder[HOSTNAME_LENGTH + 1];
LEX_STRING host_name= { host_name_holder, HOSTNAME_LENGTH };
parse_user(definer, definerlen, user_name.str, &user_name.length,
host_name.str, &host_name.length);
set_definer(&user_name, &host_name);
}
void
sp_head::set_definer(const LEX_STRING *user_name, const LEX_STRING *host_name)
{
m_definer_user.str= strmake_root(mem_root, user_name->str, user_name->length);
m_definer_user.length= user_name->length;
m_definer_host.str= strmake_root(mem_root, host_name->str, host_name->length);
m_definer_host.length= host_name->length;
}
void
sp_head::reset_thd_mem_root(THD *thd)
{
DBUG_ENTER("sp_head::reset_thd_mem_root");
m_thd_root= thd->mem_root;
thd->mem_root= &main_mem_root;
DBUG_PRINT("info", ("mem_root 0x%lx moved to thd mem root 0x%lx",
(ulong) &mem_root, (ulong) &thd->mem_root));
free_list= thd->free_list; // Keep the old list
thd->free_list= NULL; // Start a new one
m_thd= thd;
DBUG_VOID_RETURN;
}
void
sp_head::restore_thd_mem_root(THD *thd)
{
DBUG_ENTER("sp_head::restore_thd_mem_root");
Item *flist= free_list; // The old list
set_query_arena(thd); // Get new free_list and mem_root
state= INITIALIZED_FOR_SP;
DBUG_PRINT("info", ("mem_root 0x%lx returned from thd mem root 0x%lx",
(ulong) &mem_root, (ulong) &thd->mem_root));
thd->free_list= flist; // Restore the old one
thd->mem_root= m_thd_root;
m_thd= NULL;
DBUG_VOID_RETURN;
}
/*
Check if a user has access right to a routine
SYNOPSIS
check_show_routine_access()
thd Thread handler
sp SP
full_access Set to 1 if the user has SELECT right to the
'mysql.proc' able or is the owner of the routine
RETURN
0 ok
1 error
*/
bool check_show_routine_access(THD *thd, sp_head *sp, bool *full_access)
{
TABLE_LIST tables;
bzero((char*) &tables,sizeof(tables));
tables.db= (char*) "mysql";
tables.table_name= tables.alias= (char*) "proc";
*full_access= (!check_table_access(thd, SELECT_ACL, &tables, 1) ||
(!strcmp(sp->m_definer_user.str,
thd->security_ctx->priv_user) &&
!strcmp(sp->m_definer_host.str,
thd->security_ctx->priv_host)));
if (!*full_access)
return check_some_routine_access(thd, sp->m_db.str, sp->m_name.str,
sp->m_type == TYPE_ENUM_PROCEDURE);
return 0;
}
/**
Implement SHOW CREATE statement for stored routines.
@param thd Thread context.
@param type Stored routine type
(TYPE_ENUM_PROCEDURE or TYPE_ENUM_FUNCTION)
@return Error status.
@retval FALSE on success
@retval TRUE on error
*/
bool
sp_head::show_create_routine(THD *thd, int type)
{
const char *col1_caption= type == TYPE_ENUM_PROCEDURE ?
"Procedure" : "Function";
const char *col3_caption= type == TYPE_ENUM_PROCEDURE ?
"Create Procedure" : "Create Function";
bool err_status;
Protocol *protocol= thd->protocol;
List<Item> fields;
LEX_STRING sql_mode;
bool full_access;
DBUG_ENTER("sp_head::show_create_routine");
DBUG_PRINT("info", ("routine %s", m_name.str));
DBUG_ASSERT(type == TYPE_ENUM_PROCEDURE ||
type == TYPE_ENUM_FUNCTION);
if (check_show_routine_access(thd, this, &full_access))
DBUG_RETURN(TRUE);
sys_var_thd_sql_mode::symbolic_mode_representation(
thd, m_sql_mode, &sql_mode);
/* Send header. */
fields.push_back(new Item_empty_string(col1_caption, NAME_CHAR_LEN));
fields.push_back(new Item_empty_string("sql_mode", sql_mode.length));
{
/*
NOTE: SQL statement field must be not less than 1024 in order not to
confuse old clients.
*/
Item_empty_string *stmt_fld=
new Item_empty_string(col3_caption,
max(m_defstr.length, 1024));
stmt_fld->maybe_null= TRUE;
fields.push_back(stmt_fld);
}
Patch for the following bugs: - BUG#11986: Stored routines and triggers can fail if the code has a non-ascii symbol - BUG#16291: mysqldump corrupts string-constants with non-ascii-chars - BUG#19443: INFORMATION_SCHEMA does not support charsets properly - BUG#21249: Character set of SP-var can be ignored - BUG#25212: Character set of string constant is ignored (stored routines) - BUG#25221: Character set of string constant is ignored (triggers) There were a few general problems that caused these bugs: 1. Character set information of the original (definition) query for views, triggers, stored routines and events was lost. 2. mysqldump output query in client character set, which can be inappropriate to encode definition-query. 3. INFORMATION_SCHEMA used strings with mixed encodings to display object definition; 1. No query-definition-character set. In order to compile query into execution code, some extra data (such as environment variables or the database character set) is used. The problem here was that this context was not preserved. So, on the next load it can differ from the original one, thus the result will be different. The context contains the following data: - client character set; - connection collation (character set and collation); - collation of the owner database; The fix is to store this context and use it each time we parse (compile) and execute the object (stored routine, trigger, ...). 2. Wrong mysqldump-output. The original query can contain several encodings (by means of character set introducers). The problem here was that we tried to convert original query to the mysqldump-client character set. Moreover, we stored queries in different character sets for different objects (views, for one, used UTF8, triggers used original character set). The solution is - to store definition queries in the original character set; - to change SHOW CREATE statement to output definition query in the binary character set (i.e. without any conversion); - introduce SHOW CREATE TRIGGER statement; - to dump special statements to switch the context to the original one before dumping and restore it afterwards. Note, in order to preserve the database collation at the creation time, additional ALTER DATABASE might be used (to temporary switch the database collation back to the original value). In this case, ALTER DATABASE privilege will be required. This is a backward-incompatible change. 3. INFORMATION_SCHEMA showed non-UTF8 strings The fix is to generate UTF8-query during the parsing, store it in the object and show it in the INFORMATION_SCHEMA. Basically, the idea is to create a copy of the original query convert it to UTF8. Character set introducers are removed and all text literals are converted to UTF8. This UTF8 query is intended to provide user-readable output. It must not be used to recreate the object. Specialized SHOW CREATE statements should be used for this. The reason for this limitation is the following: the original query can contain symbols from several character sets (by means of character set introducers). Example: - original query: CREATE VIEW v1 AS SELECT _cp1251 'Hello' AS c1; - UTF8 query (for INFORMATION_SCHEMA): CREATE VIEW v1 AS SELECT 'Hello' AS c1;
2007-06-28 21:34:54 +04:00
fields.push_back(new Item_empty_string("character_set_client",
MY_CS_NAME_SIZE));
fields.push_back(new Item_empty_string("collation_connection",
MY_CS_NAME_SIZE));
fields.push_back(new Item_empty_string("Database Collation",
MY_CS_NAME_SIZE));
if (protocol->send_fields(&fields,
Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
{
DBUG_RETURN(TRUE);
}
/* Send data. */
protocol->prepare_for_resend();
protocol->store(m_name.str, m_name.length, system_charset_info);
protocol->store(sql_mode.str, sql_mode.length, system_charset_info);
if (full_access)
protocol->store(m_defstr.str, m_defstr.length,
m_creation_ctx->get_client_cs());
else
protocol->store_null();
Patch for the following bugs: - BUG#11986: Stored routines and triggers can fail if the code has a non-ascii symbol - BUG#16291: mysqldump corrupts string-constants with non-ascii-chars - BUG#19443: INFORMATION_SCHEMA does not support charsets properly - BUG#21249: Character set of SP-var can be ignored - BUG#25212: Character set of string constant is ignored (stored routines) - BUG#25221: Character set of string constant is ignored (triggers) There were a few general problems that caused these bugs: 1. Character set information of the original (definition) query for views, triggers, stored routines and events was lost. 2. mysqldump output query in client character set, which can be inappropriate to encode definition-query. 3. INFORMATION_SCHEMA used strings with mixed encodings to display object definition; 1. No query-definition-character set. In order to compile query into execution code, some extra data (such as environment variables or the database character set) is used. The problem here was that this context was not preserved. So, on the next load it can differ from the original one, thus the result will be different. The context contains the following data: - client character set; - connection collation (character set and collation); - collation of the owner database; The fix is to store this context and use it each time we parse (compile) and execute the object (stored routine, trigger, ...). 2. Wrong mysqldump-output. The original query can contain several encodings (by means of character set introducers). The problem here was that we tried to convert original query to the mysqldump-client character set. Moreover, we stored queries in different character sets for different objects (views, for one, used UTF8, triggers used original character set). The solution is - to store definition queries in the original character set; - to change SHOW CREATE statement to output definition query in the binary character set (i.e. without any conversion); - introduce SHOW CREATE TRIGGER statement; - to dump special statements to switch the context to the original one before dumping and restore it afterwards. Note, in order to preserve the database collation at the creation time, additional ALTER DATABASE might be used (to temporary switch the database collation back to the original value). In this case, ALTER DATABASE privilege will be required. This is a backward-incompatible change. 3. INFORMATION_SCHEMA showed non-UTF8 strings The fix is to generate UTF8-query during the parsing, store it in the object and show it in the INFORMATION_SCHEMA. Basically, the idea is to create a copy of the original query convert it to UTF8. Character set introducers are removed and all text literals are converted to UTF8. This UTF8 query is intended to provide user-readable output. It must not be used to recreate the object. Specialized SHOW CREATE statements should be used for this. The reason for this limitation is the following: the original query can contain symbols from several character sets (by means of character set introducers). Example: - original query: CREATE VIEW v1 AS SELECT _cp1251 'Hello' AS c1; - UTF8 query (for INFORMATION_SCHEMA): CREATE VIEW v1 AS SELECT 'Hello' AS c1;
2007-06-28 21:34:54 +04:00
protocol->store(m_creation_ctx->get_client_cs()->csname, system_charset_info);
protocol->store(m_creation_ctx->get_connection_cl()->name, system_charset_info);
protocol->store(m_creation_ctx->get_db_cl()->name, system_charset_info);
err_status= protocol->write();
if (!err_status)
send_eof(thd);
DBUG_RETURN(err_status);
}
/*
Add instruction to SP
SYNOPSIS
sp_head::add_instr()
instr Instruction
*/
void sp_head::add_instr(sp_instr *instr)
{
instr->free_list= m_thd->free_list;
m_thd->free_list= 0;
/*
Memory root of every instruction is designated for permanent
transformations (optimizations) made on the parsed tree during
the first execution. It points to the memory root of the
entire stored procedure, as their life span is equal.
*/
instr->mem_root= &main_mem_root;
WL#3817: Simplify string / memory area types and make things more consistent (first part) The following type conversions was done: - Changed byte to uchar - Changed gptr to uchar* - Change my_string to char * - Change my_size_t to size_t - Change size_s to size_t Removed declaration of byte, gptr, my_string, my_size_t and size_s. Following function parameter changes was done: - All string functions in mysys/strings was changed to use size_t instead of uint for string lengths. - All read()/write() functions changed to use size_t (including vio). - All protocoll functions changed to use size_t instead of uint - Functions that used a pointer to a string length was changed to use size_t* - Changed malloc(), free() and related functions from using gptr to use void * as this requires fewer casts in the code and is more in line with how the standard functions work. - Added extra length argument to dirname_part() to return the length of the created string. - Changed (at least) following functions to take uchar* as argument: - db_dump() - my_net_write() - net_write_command() - net_store_data() - DBUG_DUMP() - decimal2bin() & bin2decimal() - Changed my_compress() and my_uncompress() to use size_t. Changed one argument to my_uncompress() from a pointer to a value as we only return one value (makes function easier to use). - Changed type of 'pack_data' argument to packfrm() to avoid casts. - Changed in readfrm() and writefrom(), ha_discover and handler::discover() the type for argument 'frmdata' to uchar** to avoid casts. - Changed most Field functions to use uchar* instead of char* (reduced a lot of casts). - Changed field->val_xxx(xxx, new_ptr) to take const pointers. Other changes: - Removed a lot of not needed casts - Added a few new cast required by other changes - Added some cast to my_multi_malloc() arguments for safety (as string lengths needs to be uint, not size_t). - Fixed all calls to hash-get-key functions to use size_t*. (Needed to be done explicitely as this conflict was often hided by casting the function to hash_get_key). - Changed some buffers to memory regions to uchar* to avoid casts. - Changed some string lengths from uint to size_t. - Changed field->ptr to be uchar* instead of char*. This allowed us to get rid of a lot of casts. - Some changes from true -> TRUE, false -> FALSE, unsigned char -> uchar - Include zlib.h in some files as we needed declaration of crc32() - Changed MY_FILE_ERROR to be (size_t) -1. - Changed many variables to hold the result of my_read() / my_write() to be size_t. This was needed to properly detect errors (which are returned as (size_t) -1). - Removed some very old VMS code - Changed packfrm()/unpackfrm() to not be depending on uint size (portability fix) - Removed windows specific code to restore cursor position as this causes slowdown on windows and we should not mix read() and pread() calls anyway as this is not thread safe. Updated function comment to reflect this. Changed function that depended on original behavior of my_pwrite() to itself restore the cursor position (one such case). - Added some missing checking of return value of malloc(). - Changed definition of MOD_PAD_CHAR_TO_FULL_LENGTH to avoid 'long' overflow. - Changed type of table_def::m_size from my_size_t to ulong to reflect that m_size is the number of elements in the array, not a string/memory length. - Moved THD::max_row_length() to table.cc (as it's not depending on THD). Inlined max_row_length_blob() into this function. - More function comments - Fixed some compiler warnings when compiled without partitions. - Removed setting of LEX_STRING() arguments in declaration (portability fix). - Some trivial indentation/variable name changes. - Some trivial code simplifications: - Replaced some calls to alloc_root + memcpy to use strmake_root()/strdup_root(). - Changed some calls from memdup() to strmake() (Safety fix) - Simpler loops in client-simple.c
2007-05-10 12:59:39 +03:00
insert_dynamic(&m_instr, (uchar*)&instr);
}
/*
Do some minimal optimization of the code:
1) Mark used instructions
1.1) While doing this, shortcut jumps to jump instructions
2) Compact the code, removing unused instructions
This is the main mark and move loop; it relies on the following methods
in sp_instr and its subclasses:
Bug#19194 (Right recursion in parser for CASE causes excessive stack usage, limitation) Note to the reviewer ==================== Warning: reviewing this patch is somewhat involved. Due to the nature of several issues all affecting the same area, fixing separately each issue is not practical, since each fix can not be implemented and tested independently. In particular, the issues with - rule recursion - nested case statements - forward jump resolution (backpatch list) are tightly coupled (see below). Definitions =========== The expression CASE expr WHEN expr THEN expr WHEN expr THEN expr ... END is a "Simple Case Expression". The expression CASE WHEN expr THEN expr WHEN expr THEN expr ... END is a "Searched Case Expression". The statement CASE expr WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Simple Case Statement". The statement CASE WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Searched Case Statement". A "Left Recursive" rule is like list: element | list element ; A "Right Recursive" rule is like list: element | element list ; Left and right recursion produces the same language, the difference only affects the *order* in which the text is parsed. In a descendant parser (usually written manually), right recursion works very well, and is typically implemented with a while loop. In an ascendant parser (yacc/bison) left recursion works very well, and is implemented naturally by the parser stack. In both cases, using the wrong type or recursion is very bad and should be avoided, as it causes technical issues with the parser implementation. Before this change ================== The "Simple Case Expression" and "Searched Case Expression" were both implemented by the "when_list" and "when_list2" rules, which are left recursive (ok). These rules, however, used lex->when_list instead of using the parser stack, which is more complex that necessary, and potentially dangerous because of other rules using THD::reset_lex. The "Simple Case Statement" and "Searched Case Statements" were implemented by the "sp_case", "sp_whens" and in part by "sp_proc_stmt" rules. Both cases were right recursive (bad). The grammar involved was convoluted, and is assumed to be the results of tweaks to get the code generation to work, but is not what someone would naturally write. In addition, using a common rule for both "Simple" and "Searched" case statements was implemented with sp_head::m_flags |= IN_SIMPLE_CASE, which is a flag and not a stack, and therefore does not take into account *nested* case statements. This leads to incorrect generated code, and either a server crash or an incorrect result. With regards to the backpatch mechanism, a *different* backpatch list was created for each jump from "WHEN expr THEN stmt" to "END CASE", which relied on the grammar to be right recursive. This is a mis-use of the backpatch list, since this list can resolve multiple references to the same target at once. The optimizer algorithm used to detect dead code in the "assembly" SQL instructions, implemented by sp_head::opt_mark(uint ip), was recursive in some cases (a conditional jump pointing forward to another conditional jump). In case of specially crafted code, like - a long list of "IF expr THEN stmt END IF" - a long CASE statement this would actually cause a server crash with a stack overflow. In general, having a stack that grows proportionally with user data (the SQL code given by the client in a CREATE PROCEDURE) is to be avoided. In debug builds only, creating a SP / SF / Trigger which had a significant amount of code would spend --literally-- several minutes in sp_head::create, because of the debug code involved with DBUG_PRINT("info", ("Code %s ... There are several issues with this code: - in a CASE with 5 000 WHEN, there are 15 000 instructions generated, which create a sting representation of the code which is 500 000 bytes long, - using a String instead of an io stream causes performances to degrade to a total server freeze, as time is spent doing realloc of a buffer always too short, - Printing a 500 000 long string in the debug log is too verbose, - Generating this string even when DBUG_PRINT is off is useless, - Having code that potentially can affect the server behavior, used with #ifdef / #endif is useful in some cases, but is also a bad practice. After this change ================= "Case Expressions" (both simple and searched) have been simplified to not use LEX::when_list, which has been removed. Considering all the issues affecting case statements, the grammar for these has been totally re written. The existing actions, used to generate "assembly" sp_inst* code, have been preserved but moved in the new grammar, with the following changes: a) Bison rules are no longer shared between "Simple" and "Searched" case statements, because a stack instead of a flag is required to handle them. Nested statements are handled naturally by the parser stack, which by definition uses the correct rule in the correct context. Nested statements of the opposite type (simple vs searched) works correctly. The flag sp_head::IN_SIMPLE_CASE is no longer used. This is a step towards resolution of WL#2999, which correctly identified that temporary parsing flags do not belong to sp_head. The code in the action is shared by mean of the case_stmt_action_xxx() helpers. b) The backpatch mechanism, used to resolve forward jumps in the generated code, has been changed to: - create a label for the instruction following 'END CASE', - register each jump at the end of a "WHEN expr THEN stmt" in a *unique* backpatch list associated with the 'END CASE' label - resolve all the forward jumps for this label at once. In addition, the code involving backpatch has been commented, so that a reader can now understand by reading matching "Registering" and "Resolving" comments how the forward jumps are resolved and what target they resolve to, as this is far from evident when reading the code alone. The implementation of sp_head::opt_mark() has been revised to avoid recursive calls from jump instructions, and instead add the jump location to the list of paths to explore during the flow analysis of the instruction graph, with a call to sp_head::add_mark_lead(). In addition, the flow analysis will stop if an instruction has already been marked as reachable, which the previous code failed to do in the recursive case. sp_head::opt_mark() is now private, to prevent new calls to this method from being introduced. The debug code present in sp_head::create() has been removed. Considering that SHOW PROCEDURE CODE is also available in debug builds, and can be used anytime regardless of the trace level, as opposed to "CREATE PROCEDURE" time and only if the trace was on, removing the code actually makes debugging easier (usable trace). Tests have been written to cover the parser overflow (big CASE), and to cover nested CASE statements.
2006-11-17 12:14:29 -07:00
opt_mark() Mark instruction as reachable
opt_shortcut_jump() Shortcut jumps to the final destination;
used by opt_mark().
opt_move() Update moved instruction
set_destination() Set the new destination (jump instructions only)
*/
void sp_head::optimize()
{
List<sp_instr> bp;
sp_instr *i;
uint src, dst;
Bug#19194 (Right recursion in parser for CASE causes excessive stack usage, limitation) Note to the reviewer ==================== Warning: reviewing this patch is somewhat involved. Due to the nature of several issues all affecting the same area, fixing separately each issue is not practical, since each fix can not be implemented and tested independently. In particular, the issues with - rule recursion - nested case statements - forward jump resolution (backpatch list) are tightly coupled (see below). Definitions =========== The expression CASE expr WHEN expr THEN expr WHEN expr THEN expr ... END is a "Simple Case Expression". The expression CASE WHEN expr THEN expr WHEN expr THEN expr ... END is a "Searched Case Expression". The statement CASE expr WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Simple Case Statement". The statement CASE WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Searched Case Statement". A "Left Recursive" rule is like list: element | list element ; A "Right Recursive" rule is like list: element | element list ; Left and right recursion produces the same language, the difference only affects the *order* in which the text is parsed. In a descendant parser (usually written manually), right recursion works very well, and is typically implemented with a while loop. In an ascendant parser (yacc/bison) left recursion works very well, and is implemented naturally by the parser stack. In both cases, using the wrong type or recursion is very bad and should be avoided, as it causes technical issues with the parser implementation. Before this change ================== The "Simple Case Expression" and "Searched Case Expression" were both implemented by the "when_list" and "when_list2" rules, which are left recursive (ok). These rules, however, used lex->when_list instead of using the parser stack, which is more complex that necessary, and potentially dangerous because of other rules using THD::reset_lex. The "Simple Case Statement" and "Searched Case Statements" were implemented by the "sp_case", "sp_whens" and in part by "sp_proc_stmt" rules. Both cases were right recursive (bad). The grammar involved was convoluted, and is assumed to be the results of tweaks to get the code generation to work, but is not what someone would naturally write. In addition, using a common rule for both "Simple" and "Searched" case statements was implemented with sp_head::m_flags |= IN_SIMPLE_CASE, which is a flag and not a stack, and therefore does not take into account *nested* case statements. This leads to incorrect generated code, and either a server crash or an incorrect result. With regards to the backpatch mechanism, a *different* backpatch list was created for each jump from "WHEN expr THEN stmt" to "END CASE", which relied on the grammar to be right recursive. This is a mis-use of the backpatch list, since this list can resolve multiple references to the same target at once. The optimizer algorithm used to detect dead code in the "assembly" SQL instructions, implemented by sp_head::opt_mark(uint ip), was recursive in some cases (a conditional jump pointing forward to another conditional jump). In case of specially crafted code, like - a long list of "IF expr THEN stmt END IF" - a long CASE statement this would actually cause a server crash with a stack overflow. In general, having a stack that grows proportionally with user data (the SQL code given by the client in a CREATE PROCEDURE) is to be avoided. In debug builds only, creating a SP / SF / Trigger which had a significant amount of code would spend --literally-- several minutes in sp_head::create, because of the debug code involved with DBUG_PRINT("info", ("Code %s ... There are several issues with this code: - in a CASE with 5 000 WHEN, there are 15 000 instructions generated, which create a sting representation of the code which is 500 000 bytes long, - using a String instead of an io stream causes performances to degrade to a total server freeze, as time is spent doing realloc of a buffer always too short, - Printing a 500 000 long string in the debug log is too verbose, - Generating this string even when DBUG_PRINT is off is useless, - Having code that potentially can affect the server behavior, used with #ifdef / #endif is useful in some cases, but is also a bad practice. After this change ================= "Case Expressions" (both simple and searched) have been simplified to not use LEX::when_list, which has been removed. Considering all the issues affecting case statements, the grammar for these has been totally re written. The existing actions, used to generate "assembly" sp_inst* code, have been preserved but moved in the new grammar, with the following changes: a) Bison rules are no longer shared between "Simple" and "Searched" case statements, because a stack instead of a flag is required to handle them. Nested statements are handled naturally by the parser stack, which by definition uses the correct rule in the correct context. Nested statements of the opposite type (simple vs searched) works correctly. The flag sp_head::IN_SIMPLE_CASE is no longer used. This is a step towards resolution of WL#2999, which correctly identified that temporary parsing flags do not belong to sp_head. The code in the action is shared by mean of the case_stmt_action_xxx() helpers. b) The backpatch mechanism, used to resolve forward jumps in the generated code, has been changed to: - create a label for the instruction following 'END CASE', - register each jump at the end of a "WHEN expr THEN stmt" in a *unique* backpatch list associated with the 'END CASE' label - resolve all the forward jumps for this label at once. In addition, the code involving backpatch has been commented, so that a reader can now understand by reading matching "Registering" and "Resolving" comments how the forward jumps are resolved and what target they resolve to, as this is far from evident when reading the code alone. The implementation of sp_head::opt_mark() has been revised to avoid recursive calls from jump instructions, and instead add the jump location to the list of paths to explore during the flow analysis of the instruction graph, with a call to sp_head::add_mark_lead(). In addition, the flow analysis will stop if an instruction has already been marked as reachable, which the previous code failed to do in the recursive case. sp_head::opt_mark() is now private, to prevent new calls to this method from being introduced. The debug code present in sp_head::create() has been removed. Considering that SHOW PROCEDURE CODE is also available in debug builds, and can be used anytime regardless of the trace level, as opposed to "CREATE PROCEDURE" time and only if the trace was on, removing the code actually makes debugging easier (usable trace). Tests have been written to cover the parser overflow (big CASE), and to cover nested CASE statements.
2006-11-17 12:14:29 -07:00
opt_mark();
bp.empty();
src= dst= 0;
while ((i= get_instr(src)))
{
if (! i->marked)
{
delete i;
src+= 1;
}
else
{
if (src != dst)
{ // Move the instruction and update prev. jumps
sp_instr *ibp;
List_iterator_fast<sp_instr> li(bp);
WL#3817: Simplify string / memory area types and make things more consistent (first part) The following type conversions was done: - Changed byte to uchar - Changed gptr to uchar* - Change my_string to char * - Change my_size_t to size_t - Change size_s to size_t Removed declaration of byte, gptr, my_string, my_size_t and size_s. Following function parameter changes was done: - All string functions in mysys/strings was changed to use size_t instead of uint for string lengths. - All read()/write() functions changed to use size_t (including vio). - All protocoll functions changed to use size_t instead of uint - Functions that used a pointer to a string length was changed to use size_t* - Changed malloc(), free() and related functions from using gptr to use void * as this requires fewer casts in the code and is more in line with how the standard functions work. - Added extra length argument to dirname_part() to return the length of the created string. - Changed (at least) following functions to take uchar* as argument: - db_dump() - my_net_write() - net_write_command() - net_store_data() - DBUG_DUMP() - decimal2bin() & bin2decimal() - Changed my_compress() and my_uncompress() to use size_t. Changed one argument to my_uncompress() from a pointer to a value as we only return one value (makes function easier to use). - Changed type of 'pack_data' argument to packfrm() to avoid casts. - Changed in readfrm() and writefrom(), ha_discover and handler::discover() the type for argument 'frmdata' to uchar** to avoid casts. - Changed most Field functions to use uchar* instead of char* (reduced a lot of casts). - Changed field->val_xxx(xxx, new_ptr) to take const pointers. Other changes: - Removed a lot of not needed casts - Added a few new cast required by other changes - Added some cast to my_multi_malloc() arguments for safety (as string lengths needs to be uint, not size_t). - Fixed all calls to hash-get-key functions to use size_t*. (Needed to be done explicitely as this conflict was often hided by casting the function to hash_get_key). - Changed some buffers to memory regions to uchar* to avoid casts. - Changed some string lengths from uint to size_t. - Changed field->ptr to be uchar* instead of char*. This allowed us to get rid of a lot of casts. - Some changes from true -> TRUE, false -> FALSE, unsigned char -> uchar - Include zlib.h in some files as we needed declaration of crc32() - Changed MY_FILE_ERROR to be (size_t) -1. - Changed many variables to hold the result of my_read() / my_write() to be size_t. This was needed to properly detect errors (which are returned as (size_t) -1). - Removed some very old VMS code - Changed packfrm()/unpackfrm() to not be depending on uint size (portability fix) - Removed windows specific code to restore cursor position as this causes slowdown on windows and we should not mix read() and pread() calls anyway as this is not thread safe. Updated function comment to reflect this. Changed function that depended on original behavior of my_pwrite() to itself restore the cursor position (one such case). - Added some missing checking of return value of malloc(). - Changed definition of MOD_PAD_CHAR_TO_FULL_LENGTH to avoid 'long' overflow. - Changed type of table_def::m_size from my_size_t to ulong to reflect that m_size is the number of elements in the array, not a string/memory length. - Moved THD::max_row_length() to table.cc (as it's not depending on THD). Inlined max_row_length_blob() into this function. - More function comments - Fixed some compiler warnings when compiled without partitions. - Removed setting of LEX_STRING() arguments in declaration (portability fix). - Some trivial indentation/variable name changes. - Some trivial code simplifications: - Replaced some calls to alloc_root + memcpy to use strmake_root()/strdup_root(). - Changed some calls from memdup() to strmake() (Safety fix) - Simpler loops in client-simple.c
2007-05-10 12:59:39 +03:00
set_dynamic(&m_instr, (uchar*)&i, dst);
while ((ibp= li++))
{
sp_instr_opt_meta *im= static_cast<sp_instr_opt_meta *>(ibp);
im->set_destination(src, dst);
}
}
i->opt_move(dst, &bp);
src+= 1;
dst+= 1;
}
}
m_instr.elements= dst;
bp.empty();
}
Bug#19194 (Right recursion in parser for CASE causes excessive stack usage, limitation) Note to the reviewer ==================== Warning: reviewing this patch is somewhat involved. Due to the nature of several issues all affecting the same area, fixing separately each issue is not practical, since each fix can not be implemented and tested independently. In particular, the issues with - rule recursion - nested case statements - forward jump resolution (backpatch list) are tightly coupled (see below). Definitions =========== The expression CASE expr WHEN expr THEN expr WHEN expr THEN expr ... END is a "Simple Case Expression". The expression CASE WHEN expr THEN expr WHEN expr THEN expr ... END is a "Searched Case Expression". The statement CASE expr WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Simple Case Statement". The statement CASE WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Searched Case Statement". A "Left Recursive" rule is like list: element | list element ; A "Right Recursive" rule is like list: element | element list ; Left and right recursion produces the same language, the difference only affects the *order* in which the text is parsed. In a descendant parser (usually written manually), right recursion works very well, and is typically implemented with a while loop. In an ascendant parser (yacc/bison) left recursion works very well, and is implemented naturally by the parser stack. In both cases, using the wrong type or recursion is very bad and should be avoided, as it causes technical issues with the parser implementation. Before this change ================== The "Simple Case Expression" and "Searched Case Expression" were both implemented by the "when_list" and "when_list2" rules, which are left recursive (ok). These rules, however, used lex->when_list instead of using the parser stack, which is more complex that necessary, and potentially dangerous because of other rules using THD::reset_lex. The "Simple Case Statement" and "Searched Case Statements" were implemented by the "sp_case", "sp_whens" and in part by "sp_proc_stmt" rules. Both cases were right recursive (bad). The grammar involved was convoluted, and is assumed to be the results of tweaks to get the code generation to work, but is not what someone would naturally write. In addition, using a common rule for both "Simple" and "Searched" case statements was implemented with sp_head::m_flags |= IN_SIMPLE_CASE, which is a flag and not a stack, and therefore does not take into account *nested* case statements. This leads to incorrect generated code, and either a server crash or an incorrect result. With regards to the backpatch mechanism, a *different* backpatch list was created for each jump from "WHEN expr THEN stmt" to "END CASE", which relied on the grammar to be right recursive. This is a mis-use of the backpatch list, since this list can resolve multiple references to the same target at once. The optimizer algorithm used to detect dead code in the "assembly" SQL instructions, implemented by sp_head::opt_mark(uint ip), was recursive in some cases (a conditional jump pointing forward to another conditional jump). In case of specially crafted code, like - a long list of "IF expr THEN stmt END IF" - a long CASE statement this would actually cause a server crash with a stack overflow. In general, having a stack that grows proportionally with user data (the SQL code given by the client in a CREATE PROCEDURE) is to be avoided. In debug builds only, creating a SP / SF / Trigger which had a significant amount of code would spend --literally-- several minutes in sp_head::create, because of the debug code involved with DBUG_PRINT("info", ("Code %s ... There are several issues with this code: - in a CASE with 5 000 WHEN, there are 15 000 instructions generated, which create a sting representation of the code which is 500 000 bytes long, - using a String instead of an io stream causes performances to degrade to a total server freeze, as time is spent doing realloc of a buffer always too short, - Printing a 500 000 long string in the debug log is too verbose, - Generating this string even when DBUG_PRINT is off is useless, - Having code that potentially can affect the server behavior, used with #ifdef / #endif is useful in some cases, but is also a bad practice. After this change ================= "Case Expressions" (both simple and searched) have been simplified to not use LEX::when_list, which has been removed. Considering all the issues affecting case statements, the grammar for these has been totally re written. The existing actions, used to generate "assembly" sp_inst* code, have been preserved but moved in the new grammar, with the following changes: a) Bison rules are no longer shared between "Simple" and "Searched" case statements, because a stack instead of a flag is required to handle them. Nested statements are handled naturally by the parser stack, which by definition uses the correct rule in the correct context. Nested statements of the opposite type (simple vs searched) works correctly. The flag sp_head::IN_SIMPLE_CASE is no longer used. This is a step towards resolution of WL#2999, which correctly identified that temporary parsing flags do not belong to sp_head. The code in the action is shared by mean of the case_stmt_action_xxx() helpers. b) The backpatch mechanism, used to resolve forward jumps in the generated code, has been changed to: - create a label for the instruction following 'END CASE', - register each jump at the end of a "WHEN expr THEN stmt" in a *unique* backpatch list associated with the 'END CASE' label - resolve all the forward jumps for this label at once. In addition, the code involving backpatch has been commented, so that a reader can now understand by reading matching "Registering" and "Resolving" comments how the forward jumps are resolved and what target they resolve to, as this is far from evident when reading the code alone. The implementation of sp_head::opt_mark() has been revised to avoid recursive calls from jump instructions, and instead add the jump location to the list of paths to explore during the flow analysis of the instruction graph, with a call to sp_head::add_mark_lead(). In addition, the flow analysis will stop if an instruction has already been marked as reachable, which the previous code failed to do in the recursive case. sp_head::opt_mark() is now private, to prevent new calls to this method from being introduced. The debug code present in sp_head::create() has been removed. Considering that SHOW PROCEDURE CODE is also available in debug builds, and can be used anytime regardless of the trace level, as opposed to "CREATE PROCEDURE" time and only if the trace was on, removing the code actually makes debugging easier (usable trace). Tests have been written to cover the parser overflow (big CASE), and to cover nested CASE statements.
2006-11-17 12:14:29 -07:00
void sp_head::add_mark_lead(uint ip, List<sp_instr> *leads)
{
sp_instr *i= get_instr(ip);
if (i && ! i->marked)
leads->push_front(i);
}
void
Bug#19194 (Right recursion in parser for CASE causes excessive stack usage, limitation) Note to the reviewer ==================== Warning: reviewing this patch is somewhat involved. Due to the nature of several issues all affecting the same area, fixing separately each issue is not practical, since each fix can not be implemented and tested independently. In particular, the issues with - rule recursion - nested case statements - forward jump resolution (backpatch list) are tightly coupled (see below). Definitions =========== The expression CASE expr WHEN expr THEN expr WHEN expr THEN expr ... END is a "Simple Case Expression". The expression CASE WHEN expr THEN expr WHEN expr THEN expr ... END is a "Searched Case Expression". The statement CASE expr WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Simple Case Statement". The statement CASE WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Searched Case Statement". A "Left Recursive" rule is like list: element | list element ; A "Right Recursive" rule is like list: element | element list ; Left and right recursion produces the same language, the difference only affects the *order* in which the text is parsed. In a descendant parser (usually written manually), right recursion works very well, and is typically implemented with a while loop. In an ascendant parser (yacc/bison) left recursion works very well, and is implemented naturally by the parser stack. In both cases, using the wrong type or recursion is very bad and should be avoided, as it causes technical issues with the parser implementation. Before this change ================== The "Simple Case Expression" and "Searched Case Expression" were both implemented by the "when_list" and "when_list2" rules, which are left recursive (ok). These rules, however, used lex->when_list instead of using the parser stack, which is more complex that necessary, and potentially dangerous because of other rules using THD::reset_lex. The "Simple Case Statement" and "Searched Case Statements" were implemented by the "sp_case", "sp_whens" and in part by "sp_proc_stmt" rules. Both cases were right recursive (bad). The grammar involved was convoluted, and is assumed to be the results of tweaks to get the code generation to work, but is not what someone would naturally write. In addition, using a common rule for both "Simple" and "Searched" case statements was implemented with sp_head::m_flags |= IN_SIMPLE_CASE, which is a flag and not a stack, and therefore does not take into account *nested* case statements. This leads to incorrect generated code, and either a server crash or an incorrect result. With regards to the backpatch mechanism, a *different* backpatch list was created for each jump from "WHEN expr THEN stmt" to "END CASE", which relied on the grammar to be right recursive. This is a mis-use of the backpatch list, since this list can resolve multiple references to the same target at once. The optimizer algorithm used to detect dead code in the "assembly" SQL instructions, implemented by sp_head::opt_mark(uint ip), was recursive in some cases (a conditional jump pointing forward to another conditional jump). In case of specially crafted code, like - a long list of "IF expr THEN stmt END IF" - a long CASE statement this would actually cause a server crash with a stack overflow. In general, having a stack that grows proportionally with user data (the SQL code given by the client in a CREATE PROCEDURE) is to be avoided. In debug builds only, creating a SP / SF / Trigger which had a significant amount of code would spend --literally-- several minutes in sp_head::create, because of the debug code involved with DBUG_PRINT("info", ("Code %s ... There are several issues with this code: - in a CASE with 5 000 WHEN, there are 15 000 instructions generated, which create a sting representation of the code which is 500 000 bytes long, - using a String instead of an io stream causes performances to degrade to a total server freeze, as time is spent doing realloc of a buffer always too short, - Printing a 500 000 long string in the debug log is too verbose, - Generating this string even when DBUG_PRINT is off is useless, - Having code that potentially can affect the server behavior, used with #ifdef / #endif is useful in some cases, but is also a bad practice. After this change ================= "Case Expressions" (both simple and searched) have been simplified to not use LEX::when_list, which has been removed. Considering all the issues affecting case statements, the grammar for these has been totally re written. The existing actions, used to generate "assembly" sp_inst* code, have been preserved but moved in the new grammar, with the following changes: a) Bison rules are no longer shared between "Simple" and "Searched" case statements, because a stack instead of a flag is required to handle them. Nested statements are handled naturally by the parser stack, which by definition uses the correct rule in the correct context. Nested statements of the opposite type (simple vs searched) works correctly. The flag sp_head::IN_SIMPLE_CASE is no longer used. This is a step towards resolution of WL#2999, which correctly identified that temporary parsing flags do not belong to sp_head. The code in the action is shared by mean of the case_stmt_action_xxx() helpers. b) The backpatch mechanism, used to resolve forward jumps in the generated code, has been changed to: - create a label for the instruction following 'END CASE', - register each jump at the end of a "WHEN expr THEN stmt" in a *unique* backpatch list associated with the 'END CASE' label - resolve all the forward jumps for this label at once. In addition, the code involving backpatch has been commented, so that a reader can now understand by reading matching "Registering" and "Resolving" comments how the forward jumps are resolved and what target they resolve to, as this is far from evident when reading the code alone. The implementation of sp_head::opt_mark() has been revised to avoid recursive calls from jump instructions, and instead add the jump location to the list of paths to explore during the flow analysis of the instruction graph, with a call to sp_head::add_mark_lead(). In addition, the flow analysis will stop if an instruction has already been marked as reachable, which the previous code failed to do in the recursive case. sp_head::opt_mark() is now private, to prevent new calls to this method from being introduced. The debug code present in sp_head::create() has been removed. Considering that SHOW PROCEDURE CODE is also available in debug builds, and can be used anytime regardless of the trace level, as opposed to "CREATE PROCEDURE" time and only if the trace was on, removing the code actually makes debugging easier (usable trace). Tests have been written to cover the parser overflow (big CASE), and to cover nested CASE statements.
2006-11-17 12:14:29 -07:00
sp_head::opt_mark()
{
Bug#19194 (Right recursion in parser for CASE causes excessive stack usage, limitation) Note to the reviewer ==================== Warning: reviewing this patch is somewhat involved. Due to the nature of several issues all affecting the same area, fixing separately each issue is not practical, since each fix can not be implemented and tested independently. In particular, the issues with - rule recursion - nested case statements - forward jump resolution (backpatch list) are tightly coupled (see below). Definitions =========== The expression CASE expr WHEN expr THEN expr WHEN expr THEN expr ... END is a "Simple Case Expression". The expression CASE WHEN expr THEN expr WHEN expr THEN expr ... END is a "Searched Case Expression". The statement CASE expr WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Simple Case Statement". The statement CASE WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Searched Case Statement". A "Left Recursive" rule is like list: element | list element ; A "Right Recursive" rule is like list: element | element list ; Left and right recursion produces the same language, the difference only affects the *order* in which the text is parsed. In a descendant parser (usually written manually), right recursion works very well, and is typically implemented with a while loop. In an ascendant parser (yacc/bison) left recursion works very well, and is implemented naturally by the parser stack. In both cases, using the wrong type or recursion is very bad and should be avoided, as it causes technical issues with the parser implementation. Before this change ================== The "Simple Case Expression" and "Searched Case Expression" were both implemented by the "when_list" and "when_list2" rules, which are left recursive (ok). These rules, however, used lex->when_list instead of using the parser stack, which is more complex that necessary, and potentially dangerous because of other rules using THD::reset_lex. The "Simple Case Statement" and "Searched Case Statements" were implemented by the "sp_case", "sp_whens" and in part by "sp_proc_stmt" rules. Both cases were right recursive (bad). The grammar involved was convoluted, and is assumed to be the results of tweaks to get the code generation to work, but is not what someone would naturally write. In addition, using a common rule for both "Simple" and "Searched" case statements was implemented with sp_head::m_flags |= IN_SIMPLE_CASE, which is a flag and not a stack, and therefore does not take into account *nested* case statements. This leads to incorrect generated code, and either a server crash or an incorrect result. With regards to the backpatch mechanism, a *different* backpatch list was created for each jump from "WHEN expr THEN stmt" to "END CASE", which relied on the grammar to be right recursive. This is a mis-use of the backpatch list, since this list can resolve multiple references to the same target at once. The optimizer algorithm used to detect dead code in the "assembly" SQL instructions, implemented by sp_head::opt_mark(uint ip), was recursive in some cases (a conditional jump pointing forward to another conditional jump). In case of specially crafted code, like - a long list of "IF expr THEN stmt END IF" - a long CASE statement this would actually cause a server crash with a stack overflow. In general, having a stack that grows proportionally with user data (the SQL code given by the client in a CREATE PROCEDURE) is to be avoided. In debug builds only, creating a SP / SF / Trigger which had a significant amount of code would spend --literally-- several minutes in sp_head::create, because of the debug code involved with DBUG_PRINT("info", ("Code %s ... There are several issues with this code: - in a CASE with 5 000 WHEN, there are 15 000 instructions generated, which create a sting representation of the code which is 500 000 bytes long, - using a String instead of an io stream causes performances to degrade to a total server freeze, as time is spent doing realloc of a buffer always too short, - Printing a 500 000 long string in the debug log is too verbose, - Generating this string even when DBUG_PRINT is off is useless, - Having code that potentially can affect the server behavior, used with #ifdef / #endif is useful in some cases, but is also a bad practice. After this change ================= "Case Expressions" (both simple and searched) have been simplified to not use LEX::when_list, which has been removed. Considering all the issues affecting case statements, the grammar for these has been totally re written. The existing actions, used to generate "assembly" sp_inst* code, have been preserved but moved in the new grammar, with the following changes: a) Bison rules are no longer shared between "Simple" and "Searched" case statements, because a stack instead of a flag is required to handle them. Nested statements are handled naturally by the parser stack, which by definition uses the correct rule in the correct context. Nested statements of the opposite type (simple vs searched) works correctly. The flag sp_head::IN_SIMPLE_CASE is no longer used. This is a step towards resolution of WL#2999, which correctly identified that temporary parsing flags do not belong to sp_head. The code in the action is shared by mean of the case_stmt_action_xxx() helpers. b) The backpatch mechanism, used to resolve forward jumps in the generated code, has been changed to: - create a label for the instruction following 'END CASE', - register each jump at the end of a "WHEN expr THEN stmt" in a *unique* backpatch list associated with the 'END CASE' label - resolve all the forward jumps for this label at once. In addition, the code involving backpatch has been commented, so that a reader can now understand by reading matching "Registering" and "Resolving" comments how the forward jumps are resolved and what target they resolve to, as this is far from evident when reading the code alone. The implementation of sp_head::opt_mark() has been revised to avoid recursive calls from jump instructions, and instead add the jump location to the list of paths to explore during the flow analysis of the instruction graph, with a call to sp_head::add_mark_lead(). In addition, the flow analysis will stop if an instruction has already been marked as reachable, which the previous code failed to do in the recursive case. sp_head::opt_mark() is now private, to prevent new calls to this method from being introduced. The debug code present in sp_head::create() has been removed. Considering that SHOW PROCEDURE CODE is also available in debug builds, and can be used anytime regardless of the trace level, as opposed to "CREATE PROCEDURE" time and only if the trace was on, removing the code actually makes debugging easier (usable trace). Tests have been written to cover the parser overflow (big CASE), and to cover nested CASE statements.
2006-11-17 12:14:29 -07:00
uint ip;
sp_instr *i;
Bug#19194 (Right recursion in parser for CASE causes excessive stack usage, limitation) Note to the reviewer ==================== Warning: reviewing this patch is somewhat involved. Due to the nature of several issues all affecting the same area, fixing separately each issue is not practical, since each fix can not be implemented and tested independently. In particular, the issues with - rule recursion - nested case statements - forward jump resolution (backpatch list) are tightly coupled (see below). Definitions =========== The expression CASE expr WHEN expr THEN expr WHEN expr THEN expr ... END is a "Simple Case Expression". The expression CASE WHEN expr THEN expr WHEN expr THEN expr ... END is a "Searched Case Expression". The statement CASE expr WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Simple Case Statement". The statement CASE WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Searched Case Statement". A "Left Recursive" rule is like list: element | list element ; A "Right Recursive" rule is like list: element | element list ; Left and right recursion produces the same language, the difference only affects the *order* in which the text is parsed. In a descendant parser (usually written manually), right recursion works very well, and is typically implemented with a while loop. In an ascendant parser (yacc/bison) left recursion works very well, and is implemented naturally by the parser stack. In both cases, using the wrong type or recursion is very bad and should be avoided, as it causes technical issues with the parser implementation. Before this change ================== The "Simple Case Expression" and "Searched Case Expression" were both implemented by the "when_list" and "when_list2" rules, which are left recursive (ok). These rules, however, used lex->when_list instead of using the parser stack, which is more complex that necessary, and potentially dangerous because of other rules using THD::reset_lex. The "Simple Case Statement" and "Searched Case Statements" were implemented by the "sp_case", "sp_whens" and in part by "sp_proc_stmt" rules. Both cases were right recursive (bad). The grammar involved was convoluted, and is assumed to be the results of tweaks to get the code generation to work, but is not what someone would naturally write. In addition, using a common rule for both "Simple" and "Searched" case statements was implemented with sp_head::m_flags |= IN_SIMPLE_CASE, which is a flag and not a stack, and therefore does not take into account *nested* case statements. This leads to incorrect generated code, and either a server crash or an incorrect result. With regards to the backpatch mechanism, a *different* backpatch list was created for each jump from "WHEN expr THEN stmt" to "END CASE", which relied on the grammar to be right recursive. This is a mis-use of the backpatch list, since this list can resolve multiple references to the same target at once. The optimizer algorithm used to detect dead code in the "assembly" SQL instructions, implemented by sp_head::opt_mark(uint ip), was recursive in some cases (a conditional jump pointing forward to another conditional jump). In case of specially crafted code, like - a long list of "IF expr THEN stmt END IF" - a long CASE statement this would actually cause a server crash with a stack overflow. In general, having a stack that grows proportionally with user data (the SQL code given by the client in a CREATE PROCEDURE) is to be avoided. In debug builds only, creating a SP / SF / Trigger which had a significant amount of code would spend --literally-- several minutes in sp_head::create, because of the debug code involved with DBUG_PRINT("info", ("Code %s ... There are several issues with this code: - in a CASE with 5 000 WHEN, there are 15 000 instructions generated, which create a sting representation of the code which is 500 000 bytes long, - using a String instead of an io stream causes performances to degrade to a total server freeze, as time is spent doing realloc of a buffer always too short, - Printing a 500 000 long string in the debug log is too verbose, - Generating this string even when DBUG_PRINT is off is useless, - Having code that potentially can affect the server behavior, used with #ifdef / #endif is useful in some cases, but is also a bad practice. After this change ================= "Case Expressions" (both simple and searched) have been simplified to not use LEX::when_list, which has been removed. Considering all the issues affecting case statements, the grammar for these has been totally re written. The existing actions, used to generate "assembly" sp_inst* code, have been preserved but moved in the new grammar, with the following changes: a) Bison rules are no longer shared between "Simple" and "Searched" case statements, because a stack instead of a flag is required to handle them. Nested statements are handled naturally by the parser stack, which by definition uses the correct rule in the correct context. Nested statements of the opposite type (simple vs searched) works correctly. The flag sp_head::IN_SIMPLE_CASE is no longer used. This is a step towards resolution of WL#2999, which correctly identified that temporary parsing flags do not belong to sp_head. The code in the action is shared by mean of the case_stmt_action_xxx() helpers. b) The backpatch mechanism, used to resolve forward jumps in the generated code, has been changed to: - create a label for the instruction following 'END CASE', - register each jump at the end of a "WHEN expr THEN stmt" in a *unique* backpatch list associated with the 'END CASE' label - resolve all the forward jumps for this label at once. In addition, the code involving backpatch has been commented, so that a reader can now understand by reading matching "Registering" and "Resolving" comments how the forward jumps are resolved and what target they resolve to, as this is far from evident when reading the code alone. The implementation of sp_head::opt_mark() has been revised to avoid recursive calls from jump instructions, and instead add the jump location to the list of paths to explore during the flow analysis of the instruction graph, with a call to sp_head::add_mark_lead(). In addition, the flow analysis will stop if an instruction has already been marked as reachable, which the previous code failed to do in the recursive case. sp_head::opt_mark() is now private, to prevent new calls to this method from being introduced. The debug code present in sp_head::create() has been removed. Considering that SHOW PROCEDURE CODE is also available in debug builds, and can be used anytime regardless of the trace level, as opposed to "CREATE PROCEDURE" time and only if the trace was on, removing the code actually makes debugging easier (usable trace). Tests have been written to cover the parser overflow (big CASE), and to cover nested CASE statements.
2006-11-17 12:14:29 -07:00
List<sp_instr> leads;
Bug#19194 (Right recursion in parser for CASE causes excessive stack usage, limitation) Note to the reviewer ==================== Warning: reviewing this patch is somewhat involved. Due to the nature of several issues all affecting the same area, fixing separately each issue is not practical, since each fix can not be implemented and tested independently. In particular, the issues with - rule recursion - nested case statements - forward jump resolution (backpatch list) are tightly coupled (see below). Definitions =========== The expression CASE expr WHEN expr THEN expr WHEN expr THEN expr ... END is a "Simple Case Expression". The expression CASE WHEN expr THEN expr WHEN expr THEN expr ... END is a "Searched Case Expression". The statement CASE expr WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Simple Case Statement". The statement CASE WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Searched Case Statement". A "Left Recursive" rule is like list: element | list element ; A "Right Recursive" rule is like list: element | element list ; Left and right recursion produces the same language, the difference only affects the *order* in which the text is parsed. In a descendant parser (usually written manually), right recursion works very well, and is typically implemented with a while loop. In an ascendant parser (yacc/bison) left recursion works very well, and is implemented naturally by the parser stack. In both cases, using the wrong type or recursion is very bad and should be avoided, as it causes technical issues with the parser implementation. Before this change ================== The "Simple Case Expression" and "Searched Case Expression" were both implemented by the "when_list" and "when_list2" rules, which are left recursive (ok). These rules, however, used lex->when_list instead of using the parser stack, which is more complex that necessary, and potentially dangerous because of other rules using THD::reset_lex. The "Simple Case Statement" and "Searched Case Statements" were implemented by the "sp_case", "sp_whens" and in part by "sp_proc_stmt" rules. Both cases were right recursive (bad). The grammar involved was convoluted, and is assumed to be the results of tweaks to get the code generation to work, but is not what someone would naturally write. In addition, using a common rule for both "Simple" and "Searched" case statements was implemented with sp_head::m_flags |= IN_SIMPLE_CASE, which is a flag and not a stack, and therefore does not take into account *nested* case statements. This leads to incorrect generated code, and either a server crash or an incorrect result. With regards to the backpatch mechanism, a *different* backpatch list was created for each jump from "WHEN expr THEN stmt" to "END CASE", which relied on the grammar to be right recursive. This is a mis-use of the backpatch list, since this list can resolve multiple references to the same target at once. The optimizer algorithm used to detect dead code in the "assembly" SQL instructions, implemented by sp_head::opt_mark(uint ip), was recursive in some cases (a conditional jump pointing forward to another conditional jump). In case of specially crafted code, like - a long list of "IF expr THEN stmt END IF" - a long CASE statement this would actually cause a server crash with a stack overflow. In general, having a stack that grows proportionally with user data (the SQL code given by the client in a CREATE PROCEDURE) is to be avoided. In debug builds only, creating a SP / SF / Trigger which had a significant amount of code would spend --literally-- several minutes in sp_head::create, because of the debug code involved with DBUG_PRINT("info", ("Code %s ... There are several issues with this code: - in a CASE with 5 000 WHEN, there are 15 000 instructions generated, which create a sting representation of the code which is 500 000 bytes long, - using a String instead of an io stream causes performances to degrade to a total server freeze, as time is spent doing realloc of a buffer always too short, - Printing a 500 000 long string in the debug log is too verbose, - Generating this string even when DBUG_PRINT is off is useless, - Having code that potentially can affect the server behavior, used with #ifdef / #endif is useful in some cases, but is also a bad practice. After this change ================= "Case Expressions" (both simple and searched) have been simplified to not use LEX::when_list, which has been removed. Considering all the issues affecting case statements, the grammar for these has been totally re written. The existing actions, used to generate "assembly" sp_inst* code, have been preserved but moved in the new grammar, with the following changes: a) Bison rules are no longer shared between "Simple" and "Searched" case statements, because a stack instead of a flag is required to handle them. Nested statements are handled naturally by the parser stack, which by definition uses the correct rule in the correct context. Nested statements of the opposite type (simple vs searched) works correctly. The flag sp_head::IN_SIMPLE_CASE is no longer used. This is a step towards resolution of WL#2999, which correctly identified that temporary parsing flags do not belong to sp_head. The code in the action is shared by mean of the case_stmt_action_xxx() helpers. b) The backpatch mechanism, used to resolve forward jumps in the generated code, has been changed to: - create a label for the instruction following 'END CASE', - register each jump at the end of a "WHEN expr THEN stmt" in a *unique* backpatch list associated with the 'END CASE' label - resolve all the forward jumps for this label at once. In addition, the code involving backpatch has been commented, so that a reader can now understand by reading matching "Registering" and "Resolving" comments how the forward jumps are resolved and what target they resolve to, as this is far from evident when reading the code alone. The implementation of sp_head::opt_mark() has been revised to avoid recursive calls from jump instructions, and instead add the jump location to the list of paths to explore during the flow analysis of the instruction graph, with a call to sp_head::add_mark_lead(). In addition, the flow analysis will stop if an instruction has already been marked as reachable, which the previous code failed to do in the recursive case. sp_head::opt_mark() is now private, to prevent new calls to this method from being introduced. The debug code present in sp_head::create() has been removed. Considering that SHOW PROCEDURE CODE is also available in debug builds, and can be used anytime regardless of the trace level, as opposed to "CREATE PROCEDURE" time and only if the trace was on, removing the code actually makes debugging easier (usable trace). Tests have been written to cover the parser overflow (big CASE), and to cover nested CASE statements.
2006-11-17 12:14:29 -07:00
/*
Forward flow analysis algorithm in the instruction graph:
- first, add the entry point in the graph (the first instruction) to the
'leads' list of paths to explore.
- while there are still leads to explore:
- pick one lead, and follow the path forward. Mark instruction reached.
Stop only if the end of the routine is reached, or the path converge
to code already explored (marked).
- while following a path, collect in the 'leads' list any fork to
another path (caused by conditional jumps instructions), so that these
paths can be explored as well.
*/
/* Add the entry point */
i= get_instr(0);
leads.push_front(i);
/* For each path of code ... */
while (leads.elements != 0)
{
i= leads.pop();
/* Mark the entire path, collecting new leads. */
while (i && ! i->marked)
{
ip= i->opt_mark(this, & leads);
i= get_instr(ip);
}
}
}
#ifndef DBUG_OFF
/*
Return the routine instructions as a result set.
Returns 0 if ok, !=0 on error.
*/
int
sp_head::show_routine_code(THD *thd)
{
Protocol *protocol= thd->protocol;
char buff[2048];
String buffer(buff, sizeof(buff), system_charset_info);
List<Item> field_list;
sp_instr *i;
bool full_access;
int res= 0;
uint ip;
DBUG_ENTER("sp_head::show_routine_code");
DBUG_PRINT("info", ("procedure: %s", m_name.str));
if (check_show_routine_access(thd, this, &full_access) || !full_access)
DBUG_RETURN(1);
field_list.push_back(new Item_uint("Pos", 9));
// 1024 is for not to confuse old clients
field_list.push_back(new Item_empty_string("Instruction",
max(buffer.length(), 1024)));
if (protocol->send_fields(&field_list, Protocol::SEND_NUM_ROWS |
Protocol::SEND_EOF))
DBUG_RETURN(1);
for (ip= 0; (i = get_instr(ip)) ; ip++)
{
/*
Consistency check. If these are different something went wrong
during optimization.
*/
if (ip != i->m_ip)
{
const char *format= "Instruction at position %u has m_ip=%u";
char tmp[sizeof(format) + 2*SP_INSTR_UINT_MAXLEN + 1];
sprintf(tmp, format, ip, i->m_ip);
/*
Since this is for debugging purposes only, we don't bother to
introduce a special error code for it.
*/
push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR, tmp);
}
protocol->prepare_for_resend();
protocol->store((longlong)ip);
buffer.set("", 0, system_charset_info);
i->print(&buffer);
protocol->store(buffer.ptr(), buffer.length(), system_charset_info);
if ((res= protocol->write()))
break;
}
Patch for the following bugs: - BUG#11986: Stored routines and triggers can fail if the code has a non-ascii symbol - BUG#16291: mysqldump corrupts string-constants with non-ascii-chars - BUG#19443: INFORMATION_SCHEMA does not support charsets properly - BUG#21249: Character set of SP-var can be ignored - BUG#25212: Character set of string constant is ignored (stored routines) - BUG#25221: Character set of string constant is ignored (triggers) There were a few general problems that caused these bugs: 1. Character set information of the original (definition) query for views, triggers, stored routines and events was lost. 2. mysqldump output query in client character set, which can be inappropriate to encode definition-query. 3. INFORMATION_SCHEMA used strings with mixed encodings to display object definition; 1. No query-definition-character set. In order to compile query into execution code, some extra data (such as environment variables or the database character set) is used. The problem here was that this context was not preserved. So, on the next load it can differ from the original one, thus the result will be different. The context contains the following data: - client character set; - connection collation (character set and collation); - collation of the owner database; The fix is to store this context and use it each time we parse (compile) and execute the object (stored routine, trigger, ...). 2. Wrong mysqldump-output. The original query can contain several encodings (by means of character set introducers). The problem here was that we tried to convert original query to the mysqldump-client character set. Moreover, we stored queries in different character sets for different objects (views, for one, used UTF8, triggers used original character set). The solution is - to store definition queries in the original character set; - to change SHOW CREATE statement to output definition query in the binary character set (i.e. without any conversion); - introduce SHOW CREATE TRIGGER statement; - to dump special statements to switch the context to the original one before dumping and restore it afterwards. Note, in order to preserve the database collation at the creation time, additional ALTER DATABASE might be used (to temporary switch the database collation back to the original value). In this case, ALTER DATABASE privilege will be required. This is a backward-incompatible change. 3. INFORMATION_SCHEMA showed non-UTF8 strings The fix is to generate UTF8-query during the parsing, store it in the object and show it in the INFORMATION_SCHEMA. Basically, the idea is to create a copy of the original query convert it to UTF8. Character set introducers are removed and all text literals are converted to UTF8. This UTF8 query is intended to provide user-readable output. It must not be used to recreate the object. Specialized SHOW CREATE statements should be used for this. The reason for this limitation is the following: the original query can contain symbols from several character sets (by means of character set introducers). Example: - original query: CREATE VIEW v1 AS SELECT _cp1251 'Hello' AS c1; - UTF8 query (for INFORMATION_SCHEMA): CREATE VIEW v1 AS SELECT 'Hello' AS c1;
2007-06-28 21:34:54 +04:00
if (!res)
send_eof(thd);
DBUG_RETURN(res);
}
#endif // ifndef DBUG_OFF
/*
Prepare LEX and thread for execution of instruction, if requested open
and lock LEX's tables, execute instruction's core function, perform
cleanup afterwards.
SYNOPSIS
reset_lex_and_exec_core()
thd - thread context
nextp - out - next instruction
open_tables - if TRUE then check read access to tables in LEX's table
list and open and lock them (used in instructions which
need to calculate some expression and don't execute
complete statement).
sp_instr - instruction for which we prepare context, and which core
function execute by calling its exec_core() method.
NOTE
We are not saving/restoring some parts of THD which may need this because
we do this once for whole routine execution in sp_head::execute().
RETURN VALUE
0/non-0 - Success/Failure
*/
int
sp_lex_keeper::reset_lex_and_exec_core(THD *thd, uint *nextp,
bool open_tables, sp_instr* instr)
{
int res= 0;
DBUG_ENTER("reset_lex_and_exec_core");
(pushing for Andrei) Bug #27417 thd->no_trans_update.stmt lost value inside of SF-exec-stack Once had been set the flag might later got reset inside of a stored routine execution stack. The reason was in that there was no check if a new statement started at time of resetting. The artifact affects most of binlogable DML queries. Notice, that multi-update is wrapped up within bug@27716 fix, multi-delete bug@29136. Fixed with saving parent's statement flag of whether the statement modified non-transactional table, and unioning (merging) the value with that was gained in mysql_execute_command. Resettling thd->no_trans_update members into thd->transaction.`member`; Asserting code; Effectively the following properties are held. 1. At the end of a substatement thd->transaction.stmt.modified_non_trans_table reflects the fact if such a table got modified by the substatement. That also respects THD::really_abort_on_warnin() requirements. 2. Eventually thd->transaction.stmt.modified_non_trans_table will be computed as the union of the values of all invoked sub-statements. That fixes this bug#27417; Computing of thd->transaction.all.modified_non_trans_table is refined to base to the stmt's value for all the case including insert .. select statement which before the patch had an extra issue bug@28960. Minor issues are covered with mysql_load, mysql_delete, and binloggin of insert in to temp_table select. The supplied test verifies limitely, mostly asserts. The ultimate testing is defered for bug@13270, bug@23333.
2007-07-30 18:27:36 +03:00
/*
The flag is saved at the entry to the following substatement.
It's reset further in the common code part.
It's merged with the saved parent's value at the exit of this func.
*/
bool parent_modified_non_trans_table= thd->transaction.stmt.modified_non_trans_table;
thd->transaction.stmt.modified_non_trans_table= FALSE;
DBUG_ASSERT(!thd->derived_tables);
DBUG_ASSERT(thd->change_list.is_empty());
/*
Use our own lex.
We should not save old value since it is saved/restored in
sp_head::execute() when we are entering/leaving routine.
*/
thd->lex= m_lex;
VOID(pthread_mutex_lock(&LOCK_thread_count));
thd->query_id= next_query_id();
VOID(pthread_mutex_unlock(&LOCK_thread_count));
if (thd->prelocked_mode == NON_PRELOCKED)
{
/*
This statement will enter/leave prelocked mode on its own.
Entering prelocked mode changes table list and related members
of LEX, so we'll need to restore them.
*/
if (lex_query_tables_own_last)
{
/*
We've already entered/left prelocked mode with this statement.
Attach the list of tables that need to be prelocked and mark m_lex
as having such list attached.
*/
*lex_query_tables_own_last= prelocking_tables;
m_lex->mark_as_requiring_prelocking(lex_query_tables_own_last);
}
}
Bug#8407 (Stored functions/triggers ignore exception handler) Bug 18914 (Calling certain SPs from triggers fail) Bug 20713 (Functions will not not continue for SQLSTATE VALUE '42S02') Bug 21825 (Incorrect message error deleting records in a table with a trigger for inserting) Bug 22580 (DROP TABLE in nested stored procedure causes strange dependency error) Bug 25345 (Cursors from Functions) This fix resolves a long standing issue originally reported with bug 8407, which affect the behavior of Stored Procedures, Stored Functions and Trigger in many different ways, causing symptoms reported by all the bugs listed. In all cases, the root cause of the problem traces back to 8407 and how the server locks tables involved with sub statements. Prior to this fix, the implementation of stored routines would: - compute the transitive closure of all the tables referenced by a top level statement - open and lock all the tables involved - execute the top level statement "transitive closure of tables" means collecting: - all the tables, - all the stored functions, - all the views, - all the table triggers - all the stored procedures involved, and recursively inspect these objects definition to find more references to more objects, until the list of every object referenced does not grow any more. This mechanism is known as "pre-locking" tables before execution. The motivation for locking all the tables (possibly) used at once is to prevent dead locks. One problem with this approach is that, if the execution path the code really takes during runtime does not use a given table, and if the table is missing, the server would not execute the statement. This in particular has a major impact on triggers, since a missing table referenced by an update/delete trigger would prevent an insert trigger to run. Another problem is that stored routines might define SQL exception handlers to deal with missing tables, but the server implementation would never give user code a chance to execute this logic, since the routine is never executed when a missing table cause the pre-locking code to fail. With this fix, the internal implementation of the pre-locking code has been relaxed of some constraints, so that failure to open a table does not necessarily prevent execution of a stored routine. In particular, the pre-locking mechanism is now behaving as follows: 1) the first step, to compute the transitive closure of all the tables possibly referenced by a statement, is unchanged. 2) the next step, which is to open all the tables involved, only attempts to open the tables added by the pre-locking code, but silently fails without reporting any error or invoking any exception handler is the table is not present. This is achieved by trapping internal errors with Prelock_error_handler 3) the locking step only locks tables that were successfully opened. 4) when executing sub statements, the list of tables used by each statements is evaluated as before. The tables needed by the sub statement are expected to be already opened and locked. Statement referencing tables that were not opened in step 2) will fail to find the table in the open list, and only at this point will execution of the user code fail. 5) when a runtime exception is raised at 4), the instruction continuation destination (the next instruction to execute in case of SQL continue handlers) is evaluated. This is achieved with sp_instr::exec_open_and_lock_tables() 6) if a user exception handler is present in the stored routine, that handler is invoked as usual, so that ER_NO_SUCH_TABLE exceptions can be trapped by stored routines. If no handler exists, then the runtime execution will fail as expected. With all these changes, a side effect is that view security is impacted, in two different ways. First, a view defined as "select stored_function()", where the stored function references a table that may not exist, is considered valid. The rationale is that, because the stored function might trap exceptions during execution and still return a valid result, there is no way to decide when the view is created if a missing table really cause the view to be invalid. Secondly, testing for existence of tables is now done later during execution. View security, which consist of trapping errors and return a generic ER_VIEW_INVALID (to prevent disclosing information) was only implemented at very specific phases covering *opening* tables, but not covering the runtime execution. Because of this existing limitation, errors that were previously trapped and converted into ER_VIEW_INVALID are not trapped, causing table names to be reported to the user. This change is exposing an existing problem, which is independent and will be resolved separately.
2007-03-05 19:42:07 -07:00
reinit_stmt_before_use(thd, m_lex);
Bug#8407 (Stored functions/triggers ignore exception handler) Bug 18914 (Calling certain SPs from triggers fail) Bug 20713 (Functions will not not continue for SQLSTATE VALUE '42S02') Bug 21825 (Incorrect message error deleting records in a table with a trigger for inserting) Bug 22580 (DROP TABLE in nested stored procedure causes strange dependency error) Bug 25345 (Cursors from Functions) This fix resolves a long standing issue originally reported with bug 8407, which affect the behavior of Stored Procedures, Stored Functions and Trigger in many different ways, causing symptoms reported by all the bugs listed. In all cases, the root cause of the problem traces back to 8407 and how the server locks tables involved with sub statements. Prior to this fix, the implementation of stored routines would: - compute the transitive closure of all the tables referenced by a top level statement - open and lock all the tables involved - execute the top level statement "transitive closure of tables" means collecting: - all the tables, - all the stored functions, - all the views, - all the table triggers - all the stored procedures involved, and recursively inspect these objects definition to find more references to more objects, until the list of every object referenced does not grow any more. This mechanism is known as "pre-locking" tables before execution. The motivation for locking all the tables (possibly) used at once is to prevent dead locks. One problem with this approach is that, if the execution path the code really takes during runtime does not use a given table, and if the table is missing, the server would not execute the statement. This in particular has a major impact on triggers, since a missing table referenced by an update/delete trigger would prevent an insert trigger to run. Another problem is that stored routines might define SQL exception handlers to deal with missing tables, but the server implementation would never give user code a chance to execute this logic, since the routine is never executed when a missing table cause the pre-locking code to fail. With this fix, the internal implementation of the pre-locking code has been relaxed of some constraints, so that failure to open a table does not necessarily prevent execution of a stored routine. In particular, the pre-locking mechanism is now behaving as follows: 1) the first step, to compute the transitive closure of all the tables possibly referenced by a statement, is unchanged. 2) the next step, which is to open all the tables involved, only attempts to open the tables added by the pre-locking code, but silently fails without reporting any error or invoking any exception handler is the table is not present. This is achieved by trapping internal errors with Prelock_error_handler 3) the locking step only locks tables that were successfully opened. 4) when executing sub statements, the list of tables used by each statements is evaluated as before. The tables needed by the sub statement are expected to be already opened and locked. Statement referencing tables that were not opened in step 2) will fail to find the table in the open list, and only at this point will execution of the user code fail. 5) when a runtime exception is raised at 4), the instruction continuation destination (the next instruction to execute in case of SQL continue handlers) is evaluated. This is achieved with sp_instr::exec_open_and_lock_tables() 6) if a user exception handler is present in the stored routine, that handler is invoked as usual, so that ER_NO_SUCH_TABLE exceptions can be trapped by stored routines. If no handler exists, then the runtime execution will fail as expected. With all these changes, a side effect is that view security is impacted, in two different ways. First, a view defined as "select stored_function()", where the stored function references a table that may not exist, is considered valid. The rationale is that, because the stored function might trap exceptions during execution and still return a valid result, there is no way to decide when the view is created if a missing table really cause the view to be invalid. Secondly, testing for existence of tables is now done later during execution. View security, which consist of trapping errors and return a generic ER_VIEW_INVALID (to prevent disclosing information) was only implemented at very specific phases covering *opening* tables, but not covering the runtime execution. Because of this existing limitation, errors that were previously trapped and converted into ER_VIEW_INVALID are not trapped, causing table names to be reported to the user. This change is exposing an existing problem, which is independent and will be resolved separately.
2007-03-05 19:42:07 -07:00
if (open_tables)
res= instr->exec_open_and_lock_tables(thd, m_lex->query_tables);
if (!res)
{
res= instr->exec_core(thd, nextp);
DBUG_PRINT("info",("exec_core returned: %d", res));
}
m_lex->unit.cleanup();
thd->proc_info="closing tables";
close_thread_tables(thd);
2005-08-11 15:58:15 +03:00
thd->proc_info= 0;
if (m_lex->query_tables_own_last)
{
/*
We've entered and left prelocking mode when executing statement
stored in m_lex.
m_lex->query_tables(->next_global)* list now has a 'tail' - a list
of tables that are added for prelocking. (If this is the first
execution, the 'tail' was added by open_tables(), otherwise we've
attached it above in this function).
Now we'll save the 'tail', and detach it.
*/
lex_query_tables_own_last= m_lex->query_tables_own_last;
prelocking_tables= *lex_query_tables_own_last;
*lex_query_tables_own_last= NULL;
m_lex->mark_as_requiring_prelocking(NULL);
}
thd->rollback_item_tree_changes();
/* Update the state of the active arena. */
thd->stmt_arena->state= Query_arena::EXECUTED;
(pushing for Andrei) Bug #27417 thd->no_trans_update.stmt lost value inside of SF-exec-stack Once had been set the flag might later got reset inside of a stored routine execution stack. The reason was in that there was no check if a new statement started at time of resetting. The artifact affects most of binlogable DML queries. Notice, that multi-update is wrapped up within bug@27716 fix, multi-delete bug@29136. Fixed with saving parent's statement flag of whether the statement modified non-transactional table, and unioning (merging) the value with that was gained in mysql_execute_command. Resettling thd->no_trans_update members into thd->transaction.`member`; Asserting code; Effectively the following properties are held. 1. At the end of a substatement thd->transaction.stmt.modified_non_trans_table reflects the fact if such a table got modified by the substatement. That also respects THD::really_abort_on_warnin() requirements. 2. Eventually thd->transaction.stmt.modified_non_trans_table will be computed as the union of the values of all invoked sub-statements. That fixes this bug#27417; Computing of thd->transaction.all.modified_non_trans_table is refined to base to the stmt's value for all the case including insert .. select statement which before the patch had an extra issue bug@28960. Minor issues are covered with mysql_load, mysql_delete, and binloggin of insert in to temp_table select. The supplied test verifies limitely, mostly asserts. The ultimate testing is defered for bug@13270, bug@23333.
2007-07-30 18:27:36 +03:00
/*
Merge here with the saved parent's values
what is needed from the substatement gained
*/
thd->transaction.stmt.modified_non_trans_table |= parent_modified_non_trans_table;
/*
Unlike for PS we should not call Item's destructors for newly created
items after execution of each instruction in stored routine. This is
because SP often create Item (like Item_int, Item_string etc...) when
they want to store some value in local variable, pass return value and
etc... So their life time should be longer than one instruction.
cleanup_items() is called in sp_head::execute()
*/
DBUG_RETURN(res || thd->is_error());
}
2005-08-11 15:58:15 +03:00
/*
sp_instr class functions
*/
int sp_instr::exec_open_and_lock_tables(THD *thd, TABLE_LIST *tables)
Bug#8407 (Stored functions/triggers ignore exception handler) Bug 18914 (Calling certain SPs from triggers fail) Bug 20713 (Functions will not not continue for SQLSTATE VALUE '42S02') Bug 21825 (Incorrect message error deleting records in a table with a trigger for inserting) Bug 22580 (DROP TABLE in nested stored procedure causes strange dependency error) Bug 25345 (Cursors from Functions) This fix resolves a long standing issue originally reported with bug 8407, which affect the behavior of Stored Procedures, Stored Functions and Trigger in many different ways, causing symptoms reported by all the bugs listed. In all cases, the root cause of the problem traces back to 8407 and how the server locks tables involved with sub statements. Prior to this fix, the implementation of stored routines would: - compute the transitive closure of all the tables referenced by a top level statement - open and lock all the tables involved - execute the top level statement "transitive closure of tables" means collecting: - all the tables, - all the stored functions, - all the views, - all the table triggers - all the stored procedures involved, and recursively inspect these objects definition to find more references to more objects, until the list of every object referenced does not grow any more. This mechanism is known as "pre-locking" tables before execution. The motivation for locking all the tables (possibly) used at once is to prevent dead locks. One problem with this approach is that, if the execution path the code really takes during runtime does not use a given table, and if the table is missing, the server would not execute the statement. This in particular has a major impact on triggers, since a missing table referenced by an update/delete trigger would prevent an insert trigger to run. Another problem is that stored routines might define SQL exception handlers to deal with missing tables, but the server implementation would never give user code a chance to execute this logic, since the routine is never executed when a missing table cause the pre-locking code to fail. With this fix, the internal implementation of the pre-locking code has been relaxed of some constraints, so that failure to open a table does not necessarily prevent execution of a stored routine. In particular, the pre-locking mechanism is now behaving as follows: 1) the first step, to compute the transitive closure of all the tables possibly referenced by a statement, is unchanged. 2) the next step, which is to open all the tables involved, only attempts to open the tables added by the pre-locking code, but silently fails without reporting any error or invoking any exception handler is the table is not present. This is achieved by trapping internal errors with Prelock_error_handler 3) the locking step only locks tables that were successfully opened. 4) when executing sub statements, the list of tables used by each statements is evaluated as before. The tables needed by the sub statement are expected to be already opened and locked. Statement referencing tables that were not opened in step 2) will fail to find the table in the open list, and only at this point will execution of the user code fail. 5) when a runtime exception is raised at 4), the instruction continuation destination (the next instruction to execute in case of SQL continue handlers) is evaluated. This is achieved with sp_instr::exec_open_and_lock_tables() 6) if a user exception handler is present in the stored routine, that handler is invoked as usual, so that ER_NO_SUCH_TABLE exceptions can be trapped by stored routines. If no handler exists, then the runtime execution will fail as expected. With all these changes, a side effect is that view security is impacted, in two different ways. First, a view defined as "select stored_function()", where the stored function references a table that may not exist, is considered valid. The rationale is that, because the stored function might trap exceptions during execution and still return a valid result, there is no way to decide when the view is created if a missing table really cause the view to be invalid. Secondly, testing for existence of tables is now done later during execution. View security, which consist of trapping errors and return a generic ER_VIEW_INVALID (to prevent disclosing information) was only implemented at very specific phases covering *opening* tables, but not covering the runtime execution. Because of this existing limitation, errors that were previously trapped and converted into ER_VIEW_INVALID are not trapped, causing table names to be reported to the user. This change is exposing an existing problem, which is independent and will be resolved separately.
2007-03-05 19:42:07 -07:00
{
int result;
/*
Check whenever we have access to tables for this statement
and open and lock them before executing instructions core function.
*/
if (check_table_access(thd, SELECT_ACL, tables, 0)
|| open_and_lock_tables(thd, tables))
result= -1;
else
result= 0;
return result;
}
uint sp_instr::get_cont_dest()
Bug#8407 (Stored functions/triggers ignore exception handler) Bug 18914 (Calling certain SPs from triggers fail) Bug 20713 (Functions will not not continue for SQLSTATE VALUE '42S02') Bug 21825 (Incorrect message error deleting records in a table with a trigger for inserting) Bug 22580 (DROP TABLE in nested stored procedure causes strange dependency error) Bug 25345 (Cursors from Functions) This fix resolves a long standing issue originally reported with bug 8407, which affect the behavior of Stored Procedures, Stored Functions and Trigger in many different ways, causing symptoms reported by all the bugs listed. In all cases, the root cause of the problem traces back to 8407 and how the server locks tables involved with sub statements. Prior to this fix, the implementation of stored routines would: - compute the transitive closure of all the tables referenced by a top level statement - open and lock all the tables involved - execute the top level statement "transitive closure of tables" means collecting: - all the tables, - all the stored functions, - all the views, - all the table triggers - all the stored procedures involved, and recursively inspect these objects definition to find more references to more objects, until the list of every object referenced does not grow any more. This mechanism is known as "pre-locking" tables before execution. The motivation for locking all the tables (possibly) used at once is to prevent dead locks. One problem with this approach is that, if the execution path the code really takes during runtime does not use a given table, and if the table is missing, the server would not execute the statement. This in particular has a major impact on triggers, since a missing table referenced by an update/delete trigger would prevent an insert trigger to run. Another problem is that stored routines might define SQL exception handlers to deal with missing tables, but the server implementation would never give user code a chance to execute this logic, since the routine is never executed when a missing table cause the pre-locking code to fail. With this fix, the internal implementation of the pre-locking code has been relaxed of some constraints, so that failure to open a table does not necessarily prevent execution of a stored routine. In particular, the pre-locking mechanism is now behaving as follows: 1) the first step, to compute the transitive closure of all the tables possibly referenced by a statement, is unchanged. 2) the next step, which is to open all the tables involved, only attempts to open the tables added by the pre-locking code, but silently fails without reporting any error or invoking any exception handler is the table is not present. This is achieved by trapping internal errors with Prelock_error_handler 3) the locking step only locks tables that were successfully opened. 4) when executing sub statements, the list of tables used by each statements is evaluated as before. The tables needed by the sub statement are expected to be already opened and locked. Statement referencing tables that were not opened in step 2) will fail to find the table in the open list, and only at this point will execution of the user code fail. 5) when a runtime exception is raised at 4), the instruction continuation destination (the next instruction to execute in case of SQL continue handlers) is evaluated. This is achieved with sp_instr::exec_open_and_lock_tables() 6) if a user exception handler is present in the stored routine, that handler is invoked as usual, so that ER_NO_SUCH_TABLE exceptions can be trapped by stored routines. If no handler exists, then the runtime execution will fail as expected. With all these changes, a side effect is that view security is impacted, in two different ways. First, a view defined as "select stored_function()", where the stored function references a table that may not exist, is considered valid. The rationale is that, because the stored function might trap exceptions during execution and still return a valid result, there is no way to decide when the view is created if a missing table really cause the view to be invalid. Secondly, testing for existence of tables is now done later during execution. View security, which consist of trapping errors and return a generic ER_VIEW_INVALID (to prevent disclosing information) was only implemented at very specific phases covering *opening* tables, but not covering the runtime execution. Because of this existing limitation, errors that were previously trapped and converted into ER_VIEW_INVALID are not trapped, causing table names to be reported to the user. This change is exposing an existing problem, which is independent and will be resolved separately.
2007-03-05 19:42:07 -07:00
{
return (m_ip+1);
Bug#8407 (Stored functions/triggers ignore exception handler) Bug 18914 (Calling certain SPs from triggers fail) Bug 20713 (Functions will not not continue for SQLSTATE VALUE '42S02') Bug 21825 (Incorrect message error deleting records in a table with a trigger for inserting) Bug 22580 (DROP TABLE in nested stored procedure causes strange dependency error) Bug 25345 (Cursors from Functions) This fix resolves a long standing issue originally reported with bug 8407, which affect the behavior of Stored Procedures, Stored Functions and Trigger in many different ways, causing symptoms reported by all the bugs listed. In all cases, the root cause of the problem traces back to 8407 and how the server locks tables involved with sub statements. Prior to this fix, the implementation of stored routines would: - compute the transitive closure of all the tables referenced by a top level statement - open and lock all the tables involved - execute the top level statement "transitive closure of tables" means collecting: - all the tables, - all the stored functions, - all the views, - all the table triggers - all the stored procedures involved, and recursively inspect these objects definition to find more references to more objects, until the list of every object referenced does not grow any more. This mechanism is known as "pre-locking" tables before execution. The motivation for locking all the tables (possibly) used at once is to prevent dead locks. One problem with this approach is that, if the execution path the code really takes during runtime does not use a given table, and if the table is missing, the server would not execute the statement. This in particular has a major impact on triggers, since a missing table referenced by an update/delete trigger would prevent an insert trigger to run. Another problem is that stored routines might define SQL exception handlers to deal with missing tables, but the server implementation would never give user code a chance to execute this logic, since the routine is never executed when a missing table cause the pre-locking code to fail. With this fix, the internal implementation of the pre-locking code has been relaxed of some constraints, so that failure to open a table does not necessarily prevent execution of a stored routine. In particular, the pre-locking mechanism is now behaving as follows: 1) the first step, to compute the transitive closure of all the tables possibly referenced by a statement, is unchanged. 2) the next step, which is to open all the tables involved, only attempts to open the tables added by the pre-locking code, but silently fails without reporting any error or invoking any exception handler is the table is not present. This is achieved by trapping internal errors with Prelock_error_handler 3) the locking step only locks tables that were successfully opened. 4) when executing sub statements, the list of tables used by each statements is evaluated as before. The tables needed by the sub statement are expected to be already opened and locked. Statement referencing tables that were not opened in step 2) will fail to find the table in the open list, and only at this point will execution of the user code fail. 5) when a runtime exception is raised at 4), the instruction continuation destination (the next instruction to execute in case of SQL continue handlers) is evaluated. This is achieved with sp_instr::exec_open_and_lock_tables() 6) if a user exception handler is present in the stored routine, that handler is invoked as usual, so that ER_NO_SUCH_TABLE exceptions can be trapped by stored routines. If no handler exists, then the runtime execution will fail as expected. With all these changes, a side effect is that view security is impacted, in two different ways. First, a view defined as "select stored_function()", where the stored function references a table that may not exist, is considered valid. The rationale is that, because the stored function might trap exceptions during execution and still return a valid result, there is no way to decide when the view is created if a missing table really cause the view to be invalid. Secondly, testing for existence of tables is now done later during execution. View security, which consist of trapping errors and return a generic ER_VIEW_INVALID (to prevent disclosing information) was only implemented at very specific phases covering *opening* tables, but not covering the runtime execution. Because of this existing limitation, errors that were previously trapped and converted into ER_VIEW_INVALID are not trapped, causing table names to be reported to the user. This change is exposing an existing problem, which is independent and will be resolved separately.
2007-03-05 19:42:07 -07:00
}
int sp_instr::exec_core(THD *thd, uint *nextp)
{
DBUG_ASSERT(0);
return 0;
}
2005-08-11 15:58:15 +03:00
/*
sp_instr_stmt class functions
*/
int
sp_instr_stmt::execute(THD *thd, uint *nextp)
{
char *query;
uint32 query_length;
int res;
DBUG_ENTER("sp_instr_stmt::execute");
DBUG_PRINT("info", ("command: %d", m_lex_keeper.sql_command()));
query= thd->query;
query_length= thd->query_length;
if (!(res= alloc_query(thd, m_query.str, m_query.length)) &&
!(res=subst_spvars(thd, this, &m_query)))
{
/*
(the order of query cache and subst_spvars calls is irrelevant because
queries with SP vars can't be cached)
*/
if (unlikely((thd->options & OPTION_LOG_OFF)==0))
general_log_write(thd, COM_QUERY, thd->query, thd->query_length);
if (query_cache_send_result_to_client(thd,
thd->query, thd->query_length) <= 0)
{
res= m_lex_keeper.reset_lex_and_exec_core(thd, nextp, FALSE, this);
if (thd->main_da.is_eof())
net_end_statement(thd);
query_cache_end_of_result(thd);
if (!res && unlikely(thd->enable_slow_log))
log_slow_statement(thd);
}
else
*nextp= m_ip+1;
thd->query= query;
thd->query_length= query_length;
if (!thd->is_error())
thd->main_da.reset_diagnostics_area();
}
DBUG_RETURN(res);
}
void
sp_instr_stmt::print(String *str)
{
uint i, len;
/* stmt CMD "..." */
if (str->reserve(SP_STMT_PRINT_MAXLEN+SP_INSTR_UINT_MAXLEN+8))
return;
str->qs_append(STRING_WITH_LEN("stmt "));
str->qs_append((uint)m_lex_keeper.sql_command());
str->qs_append(STRING_WITH_LEN(" \""));
len= m_query.length;
/*
Print the query string (but not too much of it), just to indicate which
statement it is.
*/
if (len > SP_STMT_PRINT_MAXLEN)
len= SP_STMT_PRINT_MAXLEN-3;
/* Copy the query string and replace '\n' with ' ' in the process */
for (i= 0 ; i < len ; i++)
{
char c= m_query.str[i];
if (c == '\n')
c= ' ';
str->qs_append(c);
}
if (m_query.length > SP_STMT_PRINT_MAXLEN)
str->qs_append(STRING_WITH_LEN("...")); /* Indicate truncated string */
str->qs_append('"');
}
int
sp_instr_stmt::exec_core(THD *thd, uint *nextp)
{
int res= mysql_execute_command(thd);
*nextp= m_ip+1;
return res;
}
2005-08-11 15:58:15 +03:00
/*
sp_instr_set class functions
*/
int
sp_instr_set::execute(THD *thd, uint *nextp)
{
DBUG_ENTER("sp_instr_set::execute");
DBUG_PRINT("info", ("offset: %u", m_offset));
DBUG_RETURN(m_lex_keeper.reset_lex_and_exec_core(thd, nextp, TRUE, this));
}
2005-08-11 15:58:15 +03:00
int
sp_instr_set::exec_core(THD *thd, uint *nextp)
{
int res= thd->spcont->set_variable(thd, m_offset, &m_value);
if (res && thd->spcont->found_handler_here())
{
/*
Failed to evaluate the value, and a handler has been found. Reset the
variable to NULL.
*/
if (thd->spcont->set_variable(thd, m_offset, 0))
{
/* If this also failed, let's abort. */
sp_rcontext *spcont= thd->spcont;
thd->spcont= NULL; /* Avoid handlers */
my_error(ER_OUT_OF_RESOURCES, MYF(0));
spcont->clear_handler();
thd->spcont= spcont;
}
}
*nextp = m_ip+1;
return res;
}
void
sp_instr_set::print(String *str)
{
/* set name@offset ... */
int rsrv = SP_INSTR_UINT_MAXLEN+6;
sp_variable_t *var = m_ctx->find_variable(m_offset);
/* 'var' should always be non-null, but just in case... */
if (var)
rsrv+= var->name.length;
if (str->reserve(rsrv))
return;
str->qs_append(STRING_WITH_LEN("set "));
if (var)
{
str->qs_append(var->name.str, var->name.length);
str->qs_append('@');
}
str->qs_append(m_offset);
str->qs_append(' ');
m_value->print(str);
}
2005-08-11 15:58:15 +03:00
/*
sp_instr_set_trigger_field class functions
*/
int
sp_instr_set_trigger_field::execute(THD *thd, uint *nextp)
{
DBUG_ENTER("sp_instr_set_trigger_field::execute");
DBUG_RETURN(m_lex_keeper.reset_lex_and_exec_core(thd, nextp, TRUE, this));
}
int
sp_instr_set_trigger_field::exec_core(THD *thd, uint *nextp)
{
const int res= (trigger_field->set_value(thd, &value) ? -1 : 0);
*nextp = m_ip+1;
return res;
}
void
sp_instr_set_trigger_field::print(String *str)
{
str->append(STRING_WITH_LEN("set_trigger_field "));
trigger_field->print(str);
str->append(STRING_WITH_LEN(":="));
value->print(str);
}
Bug#8407 (Stored functions/triggers ignore exception handler) Bug 18914 (Calling certain SPs from triggers fail) Bug 20713 (Functions will not not continue for SQLSTATE VALUE '42S02') Bug 21825 (Incorrect message error deleting records in a table with a trigger for inserting) Bug 22580 (DROP TABLE in nested stored procedure causes strange dependency error) Bug 25345 (Cursors from Functions) This fix resolves a long standing issue originally reported with bug 8407, which affect the behavior of Stored Procedures, Stored Functions and Trigger in many different ways, causing symptoms reported by all the bugs listed. In all cases, the root cause of the problem traces back to 8407 and how the server locks tables involved with sub statements. Prior to this fix, the implementation of stored routines would: - compute the transitive closure of all the tables referenced by a top level statement - open and lock all the tables involved - execute the top level statement "transitive closure of tables" means collecting: - all the tables, - all the stored functions, - all the views, - all the table triggers - all the stored procedures involved, and recursively inspect these objects definition to find more references to more objects, until the list of every object referenced does not grow any more. This mechanism is known as "pre-locking" tables before execution. The motivation for locking all the tables (possibly) used at once is to prevent dead locks. One problem with this approach is that, if the execution path the code really takes during runtime does not use a given table, and if the table is missing, the server would not execute the statement. This in particular has a major impact on triggers, since a missing table referenced by an update/delete trigger would prevent an insert trigger to run. Another problem is that stored routines might define SQL exception handlers to deal with missing tables, but the server implementation would never give user code a chance to execute this logic, since the routine is never executed when a missing table cause the pre-locking code to fail. With this fix, the internal implementation of the pre-locking code has been relaxed of some constraints, so that failure to open a table does not necessarily prevent execution of a stored routine. In particular, the pre-locking mechanism is now behaving as follows: 1) the first step, to compute the transitive closure of all the tables possibly referenced by a statement, is unchanged. 2) the next step, which is to open all the tables involved, only attempts to open the tables added by the pre-locking code, but silently fails without reporting any error or invoking any exception handler is the table is not present. This is achieved by trapping internal errors with Prelock_error_handler 3) the locking step only locks tables that were successfully opened. 4) when executing sub statements, the list of tables used by each statements is evaluated as before. The tables needed by the sub statement are expected to be already opened and locked. Statement referencing tables that were not opened in step 2) will fail to find the table in the open list, and only at this point will execution of the user code fail. 5) when a runtime exception is raised at 4), the instruction continuation destination (the next instruction to execute in case of SQL continue handlers) is evaluated. This is achieved with sp_instr::exec_open_and_lock_tables() 6) if a user exception handler is present in the stored routine, that handler is invoked as usual, so that ER_NO_SUCH_TABLE exceptions can be trapped by stored routines. If no handler exists, then the runtime execution will fail as expected. With all these changes, a side effect is that view security is impacted, in two different ways. First, a view defined as "select stored_function()", where the stored function references a table that may not exist, is considered valid. The rationale is that, because the stored function might trap exceptions during execution and still return a valid result, there is no way to decide when the view is created if a missing table really cause the view to be invalid. Secondly, testing for existence of tables is now done later during execution. View security, which consist of trapping errors and return a generic ER_VIEW_INVALID (to prevent disclosing information) was only implemented at very specific phases covering *opening* tables, but not covering the runtime execution. Because of this existing limitation, errors that were previously trapped and converted into ER_VIEW_INVALID are not trapped, causing table names to be reported to the user. This change is exposing an existing problem, which is independent and will be resolved separately.
2007-03-05 19:42:07 -07:00
/*
sp_instr_opt_meta
*/
uint sp_instr_opt_meta::get_cont_dest()
Bug#8407 (Stored functions/triggers ignore exception handler) Bug 18914 (Calling certain SPs from triggers fail) Bug 20713 (Functions will not not continue for SQLSTATE VALUE '42S02') Bug 21825 (Incorrect message error deleting records in a table with a trigger for inserting) Bug 22580 (DROP TABLE in nested stored procedure causes strange dependency error) Bug 25345 (Cursors from Functions) This fix resolves a long standing issue originally reported with bug 8407, which affect the behavior of Stored Procedures, Stored Functions and Trigger in many different ways, causing symptoms reported by all the bugs listed. In all cases, the root cause of the problem traces back to 8407 and how the server locks tables involved with sub statements. Prior to this fix, the implementation of stored routines would: - compute the transitive closure of all the tables referenced by a top level statement - open and lock all the tables involved - execute the top level statement "transitive closure of tables" means collecting: - all the tables, - all the stored functions, - all the views, - all the table triggers - all the stored procedures involved, and recursively inspect these objects definition to find more references to more objects, until the list of every object referenced does not grow any more. This mechanism is known as "pre-locking" tables before execution. The motivation for locking all the tables (possibly) used at once is to prevent dead locks. One problem with this approach is that, if the execution path the code really takes during runtime does not use a given table, and if the table is missing, the server would not execute the statement. This in particular has a major impact on triggers, since a missing table referenced by an update/delete trigger would prevent an insert trigger to run. Another problem is that stored routines might define SQL exception handlers to deal with missing tables, but the server implementation would never give user code a chance to execute this logic, since the routine is never executed when a missing table cause the pre-locking code to fail. With this fix, the internal implementation of the pre-locking code has been relaxed of some constraints, so that failure to open a table does not necessarily prevent execution of a stored routine. In particular, the pre-locking mechanism is now behaving as follows: 1) the first step, to compute the transitive closure of all the tables possibly referenced by a statement, is unchanged. 2) the next step, which is to open all the tables involved, only attempts to open the tables added by the pre-locking code, but silently fails without reporting any error or invoking any exception handler is the table is not present. This is achieved by trapping internal errors with Prelock_error_handler 3) the locking step only locks tables that were successfully opened. 4) when executing sub statements, the list of tables used by each statements is evaluated as before. The tables needed by the sub statement are expected to be already opened and locked. Statement referencing tables that were not opened in step 2) will fail to find the table in the open list, and only at this point will execution of the user code fail. 5) when a runtime exception is raised at 4), the instruction continuation destination (the next instruction to execute in case of SQL continue handlers) is evaluated. This is achieved with sp_instr::exec_open_and_lock_tables() 6) if a user exception handler is present in the stored routine, that handler is invoked as usual, so that ER_NO_SUCH_TABLE exceptions can be trapped by stored routines. If no handler exists, then the runtime execution will fail as expected. With all these changes, a side effect is that view security is impacted, in two different ways. First, a view defined as "select stored_function()", where the stored function references a table that may not exist, is considered valid. The rationale is that, because the stored function might trap exceptions during execution and still return a valid result, there is no way to decide when the view is created if a missing table really cause the view to be invalid. Secondly, testing for existence of tables is now done later during execution. View security, which consist of trapping errors and return a generic ER_VIEW_INVALID (to prevent disclosing information) was only implemented at very specific phases covering *opening* tables, but not covering the runtime execution. Because of this existing limitation, errors that were previously trapped and converted into ER_VIEW_INVALID are not trapped, causing table names to be reported to the user. This change is exposing an existing problem, which is independent and will be resolved separately.
2007-03-05 19:42:07 -07:00
{
return m_cont_dest;
Bug#8407 (Stored functions/triggers ignore exception handler) Bug 18914 (Calling certain SPs from triggers fail) Bug 20713 (Functions will not not continue for SQLSTATE VALUE '42S02') Bug 21825 (Incorrect message error deleting records in a table with a trigger for inserting) Bug 22580 (DROP TABLE in nested stored procedure causes strange dependency error) Bug 25345 (Cursors from Functions) This fix resolves a long standing issue originally reported with bug 8407, which affect the behavior of Stored Procedures, Stored Functions and Trigger in many different ways, causing symptoms reported by all the bugs listed. In all cases, the root cause of the problem traces back to 8407 and how the server locks tables involved with sub statements. Prior to this fix, the implementation of stored routines would: - compute the transitive closure of all the tables referenced by a top level statement - open and lock all the tables involved - execute the top level statement "transitive closure of tables" means collecting: - all the tables, - all the stored functions, - all the views, - all the table triggers - all the stored procedures involved, and recursively inspect these objects definition to find more references to more objects, until the list of every object referenced does not grow any more. This mechanism is known as "pre-locking" tables before execution. The motivation for locking all the tables (possibly) used at once is to prevent dead locks. One problem with this approach is that, if the execution path the code really takes during runtime does not use a given table, and if the table is missing, the server would not execute the statement. This in particular has a major impact on triggers, since a missing table referenced by an update/delete trigger would prevent an insert trigger to run. Another problem is that stored routines might define SQL exception handlers to deal with missing tables, but the server implementation would never give user code a chance to execute this logic, since the routine is never executed when a missing table cause the pre-locking code to fail. With this fix, the internal implementation of the pre-locking code has been relaxed of some constraints, so that failure to open a table does not necessarily prevent execution of a stored routine. In particular, the pre-locking mechanism is now behaving as follows: 1) the first step, to compute the transitive closure of all the tables possibly referenced by a statement, is unchanged. 2) the next step, which is to open all the tables involved, only attempts to open the tables added by the pre-locking code, but silently fails without reporting any error or invoking any exception handler is the table is not present. This is achieved by trapping internal errors with Prelock_error_handler 3) the locking step only locks tables that were successfully opened. 4) when executing sub statements, the list of tables used by each statements is evaluated as before. The tables needed by the sub statement are expected to be already opened and locked. Statement referencing tables that were not opened in step 2) will fail to find the table in the open list, and only at this point will execution of the user code fail. 5) when a runtime exception is raised at 4), the instruction continuation destination (the next instruction to execute in case of SQL continue handlers) is evaluated. This is achieved with sp_instr::exec_open_and_lock_tables() 6) if a user exception handler is present in the stored routine, that handler is invoked as usual, so that ER_NO_SUCH_TABLE exceptions can be trapped by stored routines. If no handler exists, then the runtime execution will fail as expected. With all these changes, a side effect is that view security is impacted, in two different ways. First, a view defined as "select stored_function()", where the stored function references a table that may not exist, is considered valid. The rationale is that, because the stored function might trap exceptions during execution and still return a valid result, there is no way to decide when the view is created if a missing table really cause the view to be invalid. Secondly, testing for existence of tables is now done later during execution. View security, which consist of trapping errors and return a generic ER_VIEW_INVALID (to prevent disclosing information) was only implemented at very specific phases covering *opening* tables, but not covering the runtime execution. Because of this existing limitation, errors that were previously trapped and converted into ER_VIEW_INVALID are not trapped, causing table names to be reported to the user. This change is exposing an existing problem, which is independent and will be resolved separately.
2007-03-05 19:42:07 -07:00
}
2005-08-11 15:58:15 +03:00
/*
sp_instr_jump class functions
*/
int
sp_instr_jump::execute(THD *thd, uint *nextp)
{
DBUG_ENTER("sp_instr_jump::execute");
DBUG_PRINT("info", ("destination: %u", m_dest));
*nextp= m_dest;
DBUG_RETURN(0);
}
void
sp_instr_jump::print(String *str)
{
/* jump dest */
if (str->reserve(SP_INSTR_UINT_MAXLEN+5))
return;
str->qs_append(STRING_WITH_LEN("jump "));
str->qs_append(m_dest);
}
uint
Bug#19194 (Right recursion in parser for CASE causes excessive stack usage, limitation) Note to the reviewer ==================== Warning: reviewing this patch is somewhat involved. Due to the nature of several issues all affecting the same area, fixing separately each issue is not practical, since each fix can not be implemented and tested independently. In particular, the issues with - rule recursion - nested case statements - forward jump resolution (backpatch list) are tightly coupled (see below). Definitions =========== The expression CASE expr WHEN expr THEN expr WHEN expr THEN expr ... END is a "Simple Case Expression". The expression CASE WHEN expr THEN expr WHEN expr THEN expr ... END is a "Searched Case Expression". The statement CASE expr WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Simple Case Statement". The statement CASE WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Searched Case Statement". A "Left Recursive" rule is like list: element | list element ; A "Right Recursive" rule is like list: element | element list ; Left and right recursion produces the same language, the difference only affects the *order* in which the text is parsed. In a descendant parser (usually written manually), right recursion works very well, and is typically implemented with a while loop. In an ascendant parser (yacc/bison) left recursion works very well, and is implemented naturally by the parser stack. In both cases, using the wrong type or recursion is very bad and should be avoided, as it causes technical issues with the parser implementation. Before this change ================== The "Simple Case Expression" and "Searched Case Expression" were both implemented by the "when_list" and "when_list2" rules, which are left recursive (ok). These rules, however, used lex->when_list instead of using the parser stack, which is more complex that necessary, and potentially dangerous because of other rules using THD::reset_lex. The "Simple Case Statement" and "Searched Case Statements" were implemented by the "sp_case", "sp_whens" and in part by "sp_proc_stmt" rules. Both cases were right recursive (bad). The grammar involved was convoluted, and is assumed to be the results of tweaks to get the code generation to work, but is not what someone would naturally write. In addition, using a common rule for both "Simple" and "Searched" case statements was implemented with sp_head::m_flags |= IN_SIMPLE_CASE, which is a flag and not a stack, and therefore does not take into account *nested* case statements. This leads to incorrect generated code, and either a server crash or an incorrect result. With regards to the backpatch mechanism, a *different* backpatch list was created for each jump from "WHEN expr THEN stmt" to "END CASE", which relied on the grammar to be right recursive. This is a mis-use of the backpatch list, since this list can resolve multiple references to the same target at once. The optimizer algorithm used to detect dead code in the "assembly" SQL instructions, implemented by sp_head::opt_mark(uint ip), was recursive in some cases (a conditional jump pointing forward to another conditional jump). In case of specially crafted code, like - a long list of "IF expr THEN stmt END IF" - a long CASE statement this would actually cause a server crash with a stack overflow. In general, having a stack that grows proportionally with user data (the SQL code given by the client in a CREATE PROCEDURE) is to be avoided. In debug builds only, creating a SP / SF / Trigger which had a significant amount of code would spend --literally-- several minutes in sp_head::create, because of the debug code involved with DBUG_PRINT("info", ("Code %s ... There are several issues with this code: - in a CASE with 5 000 WHEN, there are 15 000 instructions generated, which create a sting representation of the code which is 500 000 bytes long, - using a String instead of an io stream causes performances to degrade to a total server freeze, as time is spent doing realloc of a buffer always too short, - Printing a 500 000 long string in the debug log is too verbose, - Generating this string even when DBUG_PRINT is off is useless, - Having code that potentially can affect the server behavior, used with #ifdef / #endif is useful in some cases, but is also a bad practice. After this change ================= "Case Expressions" (both simple and searched) have been simplified to not use LEX::when_list, which has been removed. Considering all the issues affecting case statements, the grammar for these has been totally re written. The existing actions, used to generate "assembly" sp_inst* code, have been preserved but moved in the new grammar, with the following changes: a) Bison rules are no longer shared between "Simple" and "Searched" case statements, because a stack instead of a flag is required to handle them. Nested statements are handled naturally by the parser stack, which by definition uses the correct rule in the correct context. Nested statements of the opposite type (simple vs searched) works correctly. The flag sp_head::IN_SIMPLE_CASE is no longer used. This is a step towards resolution of WL#2999, which correctly identified that temporary parsing flags do not belong to sp_head. The code in the action is shared by mean of the case_stmt_action_xxx() helpers. b) The backpatch mechanism, used to resolve forward jumps in the generated code, has been changed to: - create a label for the instruction following 'END CASE', - register each jump at the end of a "WHEN expr THEN stmt" in a *unique* backpatch list associated with the 'END CASE' label - resolve all the forward jumps for this label at once. In addition, the code involving backpatch has been commented, so that a reader can now understand by reading matching "Registering" and "Resolving" comments how the forward jumps are resolved and what target they resolve to, as this is far from evident when reading the code alone. The implementation of sp_head::opt_mark() has been revised to avoid recursive calls from jump instructions, and instead add the jump location to the list of paths to explore during the flow analysis of the instruction graph, with a call to sp_head::add_mark_lead(). In addition, the flow analysis will stop if an instruction has already been marked as reachable, which the previous code failed to do in the recursive case. sp_head::opt_mark() is now private, to prevent new calls to this method from being introduced. The debug code present in sp_head::create() has been removed. Considering that SHOW PROCEDURE CODE is also available in debug builds, and can be used anytime regardless of the trace level, as opposed to "CREATE PROCEDURE" time and only if the trace was on, removing the code actually makes debugging easier (usable trace). Tests have been written to cover the parser overflow (big CASE), and to cover nested CASE statements.
2006-11-17 12:14:29 -07:00
sp_instr_jump::opt_mark(sp_head *sp, List<sp_instr> *leads)
{
m_dest= opt_shortcut_jump(sp, this);
if (m_dest != m_ip+1) /* Jumping to following instruction? */
marked= 1;
m_optdest= sp->get_instr(m_dest);
return m_dest;
}
uint
sp_instr_jump::opt_shortcut_jump(sp_head *sp, sp_instr *start)
{
uint dest= m_dest;
sp_instr *i;
while ((i= sp->get_instr(dest)))
{
uint ndest;
if (start == i || this == i)
break;
ndest= i->opt_shortcut_jump(sp, start);
if (ndest == dest)
break;
dest= ndest;
}
return dest;
}
void
sp_instr_jump::opt_move(uint dst, List<sp_instr> *bp)
{
if (m_dest > m_ip)
bp->push_back(this); // Forward
else if (m_optdest)
m_dest= m_optdest->m_ip; // Backward
m_ip= dst;
}
2005-08-11 15:58:15 +03:00
/*
sp_instr_jump_if_not class functions
*/
int
sp_instr_jump_if_not::execute(THD *thd, uint *nextp)
{
DBUG_ENTER("sp_instr_jump_if_not::execute");
DBUG_PRINT("info", ("destination: %u", m_dest));
DBUG_RETURN(m_lex_keeper.reset_lex_and_exec_core(thd, nextp, TRUE, this));
}
int
sp_instr_jump_if_not::exec_core(THD *thd, uint *nextp)
{
Item *it;
int res;
it= sp_prepare_func_item(thd, &m_expr);
if (! it)
{
res= -1;
}
else
{
res= 0;
if (! it->val_bool())
*nextp = m_dest;
else
*nextp = m_ip+1;
}
return res;
}
2005-08-11 15:58:15 +03:00
void
sp_instr_jump_if_not::print(String *str)
{
/* jump_if_not dest(cont) ... */
if (str->reserve(2*SP_INSTR_UINT_MAXLEN+14+32)) // Add some for the expr. too
return;
str->qs_append(STRING_WITH_LEN("jump_if_not "));
str->qs_append(m_dest);
str->qs_append('(');
str->qs_append(m_cont_dest);
str->qs_append(STRING_WITH_LEN(") "));
m_expr->print(str);
}
2005-08-11 15:58:15 +03:00
uint
Bug#19194 (Right recursion in parser for CASE causes excessive stack usage, limitation) Note to the reviewer ==================== Warning: reviewing this patch is somewhat involved. Due to the nature of several issues all affecting the same area, fixing separately each issue is not practical, since each fix can not be implemented and tested independently. In particular, the issues with - rule recursion - nested case statements - forward jump resolution (backpatch list) are tightly coupled (see below). Definitions =========== The expression CASE expr WHEN expr THEN expr WHEN expr THEN expr ... END is a "Simple Case Expression". The expression CASE WHEN expr THEN expr WHEN expr THEN expr ... END is a "Searched Case Expression". The statement CASE expr WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Simple Case Statement". The statement CASE WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Searched Case Statement". A "Left Recursive" rule is like list: element | list element ; A "Right Recursive" rule is like list: element | element list ; Left and right recursion produces the same language, the difference only affects the *order* in which the text is parsed. In a descendant parser (usually written manually), right recursion works very well, and is typically implemented with a while loop. In an ascendant parser (yacc/bison) left recursion works very well, and is implemented naturally by the parser stack. In both cases, using the wrong type or recursion is very bad and should be avoided, as it causes technical issues with the parser implementation. Before this change ================== The "Simple Case Expression" and "Searched Case Expression" were both implemented by the "when_list" and "when_list2" rules, which are left recursive (ok). These rules, however, used lex->when_list instead of using the parser stack, which is more complex that necessary, and potentially dangerous because of other rules using THD::reset_lex. The "Simple Case Statement" and "Searched Case Statements" were implemented by the "sp_case", "sp_whens" and in part by "sp_proc_stmt" rules. Both cases were right recursive (bad). The grammar involved was convoluted, and is assumed to be the results of tweaks to get the code generation to work, but is not what someone would naturally write. In addition, using a common rule for both "Simple" and "Searched" case statements was implemented with sp_head::m_flags |= IN_SIMPLE_CASE, which is a flag and not a stack, and therefore does not take into account *nested* case statements. This leads to incorrect generated code, and either a server crash or an incorrect result. With regards to the backpatch mechanism, a *different* backpatch list was created for each jump from "WHEN expr THEN stmt" to "END CASE", which relied on the grammar to be right recursive. This is a mis-use of the backpatch list, since this list can resolve multiple references to the same target at once. The optimizer algorithm used to detect dead code in the "assembly" SQL instructions, implemented by sp_head::opt_mark(uint ip), was recursive in some cases (a conditional jump pointing forward to another conditional jump). In case of specially crafted code, like - a long list of "IF expr THEN stmt END IF" - a long CASE statement this would actually cause a server crash with a stack overflow. In general, having a stack that grows proportionally with user data (the SQL code given by the client in a CREATE PROCEDURE) is to be avoided. In debug builds only, creating a SP / SF / Trigger which had a significant amount of code would spend --literally-- several minutes in sp_head::create, because of the debug code involved with DBUG_PRINT("info", ("Code %s ... There are several issues with this code: - in a CASE with 5 000 WHEN, there are 15 000 instructions generated, which create a sting representation of the code which is 500 000 bytes long, - using a String instead of an io stream causes performances to degrade to a total server freeze, as time is spent doing realloc of a buffer always too short, - Printing a 500 000 long string in the debug log is too verbose, - Generating this string even when DBUG_PRINT is off is useless, - Having code that potentially can affect the server behavior, used with #ifdef / #endif is useful in some cases, but is also a bad practice. After this change ================= "Case Expressions" (both simple and searched) have been simplified to not use LEX::when_list, which has been removed. Considering all the issues affecting case statements, the grammar for these has been totally re written. The existing actions, used to generate "assembly" sp_inst* code, have been preserved but moved in the new grammar, with the following changes: a) Bison rules are no longer shared between "Simple" and "Searched" case statements, because a stack instead of a flag is required to handle them. Nested statements are handled naturally by the parser stack, which by definition uses the correct rule in the correct context. Nested statements of the opposite type (simple vs searched) works correctly. The flag sp_head::IN_SIMPLE_CASE is no longer used. This is a step towards resolution of WL#2999, which correctly identified that temporary parsing flags do not belong to sp_head. The code in the action is shared by mean of the case_stmt_action_xxx() helpers. b) The backpatch mechanism, used to resolve forward jumps in the generated code, has been changed to: - create a label for the instruction following 'END CASE', - register each jump at the end of a "WHEN expr THEN stmt" in a *unique* backpatch list associated with the 'END CASE' label - resolve all the forward jumps for this label at once. In addition, the code involving backpatch has been commented, so that a reader can now understand by reading matching "Registering" and "Resolving" comments how the forward jumps are resolved and what target they resolve to, as this is far from evident when reading the code alone. The implementation of sp_head::opt_mark() has been revised to avoid recursive calls from jump instructions, and instead add the jump location to the list of paths to explore during the flow analysis of the instruction graph, with a call to sp_head::add_mark_lead(). In addition, the flow analysis will stop if an instruction has already been marked as reachable, which the previous code failed to do in the recursive case. sp_head::opt_mark() is now private, to prevent new calls to this method from being introduced. The debug code present in sp_head::create() has been removed. Considering that SHOW PROCEDURE CODE is also available in debug builds, and can be used anytime regardless of the trace level, as opposed to "CREATE PROCEDURE" time and only if the trace was on, removing the code actually makes debugging easier (usable trace). Tests have been written to cover the parser overflow (big CASE), and to cover nested CASE statements.
2006-11-17 12:14:29 -07:00
sp_instr_jump_if_not::opt_mark(sp_head *sp, List<sp_instr> *leads)
{
sp_instr *i;
marked= 1;
if ((i= sp->get_instr(m_dest)))
{
m_dest= i->opt_shortcut_jump(sp, this);
m_optdest= sp->get_instr(m_dest);
}
Bug#19194 (Right recursion in parser for CASE causes excessive stack usage, limitation) Note to the reviewer ==================== Warning: reviewing this patch is somewhat involved. Due to the nature of several issues all affecting the same area, fixing separately each issue is not practical, since each fix can not be implemented and tested independently. In particular, the issues with - rule recursion - nested case statements - forward jump resolution (backpatch list) are tightly coupled (see below). Definitions =========== The expression CASE expr WHEN expr THEN expr WHEN expr THEN expr ... END is a "Simple Case Expression". The expression CASE WHEN expr THEN expr WHEN expr THEN expr ... END is a "Searched Case Expression". The statement CASE expr WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Simple Case Statement". The statement CASE WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Searched Case Statement". A "Left Recursive" rule is like list: element | list element ; A "Right Recursive" rule is like list: element | element list ; Left and right recursion produces the same language, the difference only affects the *order* in which the text is parsed. In a descendant parser (usually written manually), right recursion works very well, and is typically implemented with a while loop. In an ascendant parser (yacc/bison) left recursion works very well, and is implemented naturally by the parser stack. In both cases, using the wrong type or recursion is very bad and should be avoided, as it causes technical issues with the parser implementation. Before this change ================== The "Simple Case Expression" and "Searched Case Expression" were both implemented by the "when_list" and "when_list2" rules, which are left recursive (ok). These rules, however, used lex->when_list instead of using the parser stack, which is more complex that necessary, and potentially dangerous because of other rules using THD::reset_lex. The "Simple Case Statement" and "Searched Case Statements" were implemented by the "sp_case", "sp_whens" and in part by "sp_proc_stmt" rules. Both cases were right recursive (bad). The grammar involved was convoluted, and is assumed to be the results of tweaks to get the code generation to work, but is not what someone would naturally write. In addition, using a common rule for both "Simple" and "Searched" case statements was implemented with sp_head::m_flags |= IN_SIMPLE_CASE, which is a flag and not a stack, and therefore does not take into account *nested* case statements. This leads to incorrect generated code, and either a server crash or an incorrect result. With regards to the backpatch mechanism, a *different* backpatch list was created for each jump from "WHEN expr THEN stmt" to "END CASE", which relied on the grammar to be right recursive. This is a mis-use of the backpatch list, since this list can resolve multiple references to the same target at once. The optimizer algorithm used to detect dead code in the "assembly" SQL instructions, implemented by sp_head::opt_mark(uint ip), was recursive in some cases (a conditional jump pointing forward to another conditional jump). In case of specially crafted code, like - a long list of "IF expr THEN stmt END IF" - a long CASE statement this would actually cause a server crash with a stack overflow. In general, having a stack that grows proportionally with user data (the SQL code given by the client in a CREATE PROCEDURE) is to be avoided. In debug builds only, creating a SP / SF / Trigger which had a significant amount of code would spend --literally-- several minutes in sp_head::create, because of the debug code involved with DBUG_PRINT("info", ("Code %s ... There are several issues with this code: - in a CASE with 5 000 WHEN, there are 15 000 instructions generated, which create a sting representation of the code which is 500 000 bytes long, - using a String instead of an io stream causes performances to degrade to a total server freeze, as time is spent doing realloc of a buffer always too short, - Printing a 500 000 long string in the debug log is too verbose, - Generating this string even when DBUG_PRINT is off is useless, - Having code that potentially can affect the server behavior, used with #ifdef / #endif is useful in some cases, but is also a bad practice. After this change ================= "Case Expressions" (both simple and searched) have been simplified to not use LEX::when_list, which has been removed. Considering all the issues affecting case statements, the grammar for these has been totally re written. The existing actions, used to generate "assembly" sp_inst* code, have been preserved but moved in the new grammar, with the following changes: a) Bison rules are no longer shared between "Simple" and "Searched" case statements, because a stack instead of a flag is required to handle them. Nested statements are handled naturally by the parser stack, which by definition uses the correct rule in the correct context. Nested statements of the opposite type (simple vs searched) works correctly. The flag sp_head::IN_SIMPLE_CASE is no longer used. This is a step towards resolution of WL#2999, which correctly identified that temporary parsing flags do not belong to sp_head. The code in the action is shared by mean of the case_stmt_action_xxx() helpers. b) The backpatch mechanism, used to resolve forward jumps in the generated code, has been changed to: - create a label for the instruction following 'END CASE', - register each jump at the end of a "WHEN expr THEN stmt" in a *unique* backpatch list associated with the 'END CASE' label - resolve all the forward jumps for this label at once. In addition, the code involving backpatch has been commented, so that a reader can now understand by reading matching "Registering" and "Resolving" comments how the forward jumps are resolved and what target they resolve to, as this is far from evident when reading the code alone. The implementation of sp_head::opt_mark() has been revised to avoid recursive calls from jump instructions, and instead add the jump location to the list of paths to explore during the flow analysis of the instruction graph, with a call to sp_head::add_mark_lead(). In addition, the flow analysis will stop if an instruction has already been marked as reachable, which the previous code failed to do in the recursive case. sp_head::opt_mark() is now private, to prevent new calls to this method from being introduced. The debug code present in sp_head::create() has been removed. Considering that SHOW PROCEDURE CODE is also available in debug builds, and can be used anytime regardless of the trace level, as opposed to "CREATE PROCEDURE" time and only if the trace was on, removing the code actually makes debugging easier (usable trace). Tests have been written to cover the parser overflow (big CASE), and to cover nested CASE statements.
2006-11-17 12:14:29 -07:00
sp->add_mark_lead(m_dest, leads);
if ((i= sp->get_instr(m_cont_dest)))
{
m_cont_dest= i->opt_shortcut_jump(sp, this);
m_cont_optdest= sp->get_instr(m_cont_dest);
}
Bug#19194 (Right recursion in parser for CASE causes excessive stack usage, limitation) Note to the reviewer ==================== Warning: reviewing this patch is somewhat involved. Due to the nature of several issues all affecting the same area, fixing separately each issue is not practical, since each fix can not be implemented and tested independently. In particular, the issues with - rule recursion - nested case statements - forward jump resolution (backpatch list) are tightly coupled (see below). Definitions =========== The expression CASE expr WHEN expr THEN expr WHEN expr THEN expr ... END is a "Simple Case Expression". The expression CASE WHEN expr THEN expr WHEN expr THEN expr ... END is a "Searched Case Expression". The statement CASE expr WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Simple Case Statement". The statement CASE WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Searched Case Statement". A "Left Recursive" rule is like list: element | list element ; A "Right Recursive" rule is like list: element | element list ; Left and right recursion produces the same language, the difference only affects the *order* in which the text is parsed. In a descendant parser (usually written manually), right recursion works very well, and is typically implemented with a while loop. In an ascendant parser (yacc/bison) left recursion works very well, and is implemented naturally by the parser stack. In both cases, using the wrong type or recursion is very bad and should be avoided, as it causes technical issues with the parser implementation. Before this change ================== The "Simple Case Expression" and "Searched Case Expression" were both implemented by the "when_list" and "when_list2" rules, which are left recursive (ok). These rules, however, used lex->when_list instead of using the parser stack, which is more complex that necessary, and potentially dangerous because of other rules using THD::reset_lex. The "Simple Case Statement" and "Searched Case Statements" were implemented by the "sp_case", "sp_whens" and in part by "sp_proc_stmt" rules. Both cases were right recursive (bad). The grammar involved was convoluted, and is assumed to be the results of tweaks to get the code generation to work, but is not what someone would naturally write. In addition, using a common rule for both "Simple" and "Searched" case statements was implemented with sp_head::m_flags |= IN_SIMPLE_CASE, which is a flag and not a stack, and therefore does not take into account *nested* case statements. This leads to incorrect generated code, and either a server crash or an incorrect result. With regards to the backpatch mechanism, a *different* backpatch list was created for each jump from "WHEN expr THEN stmt" to "END CASE", which relied on the grammar to be right recursive. This is a mis-use of the backpatch list, since this list can resolve multiple references to the same target at once. The optimizer algorithm used to detect dead code in the "assembly" SQL instructions, implemented by sp_head::opt_mark(uint ip), was recursive in some cases (a conditional jump pointing forward to another conditional jump). In case of specially crafted code, like - a long list of "IF expr THEN stmt END IF" - a long CASE statement this would actually cause a server crash with a stack overflow. In general, having a stack that grows proportionally with user data (the SQL code given by the client in a CREATE PROCEDURE) is to be avoided. In debug builds only, creating a SP / SF / Trigger which had a significant amount of code would spend --literally-- several minutes in sp_head::create, because of the debug code involved with DBUG_PRINT("info", ("Code %s ... There are several issues with this code: - in a CASE with 5 000 WHEN, there are 15 000 instructions generated, which create a sting representation of the code which is 500 000 bytes long, - using a String instead of an io stream causes performances to degrade to a total server freeze, as time is spent doing realloc of a buffer always too short, - Printing a 500 000 long string in the debug log is too verbose, - Generating this string even when DBUG_PRINT is off is useless, - Having code that potentially can affect the server behavior, used with #ifdef / #endif is useful in some cases, but is also a bad practice. After this change ================= "Case Expressions" (both simple and searched) have been simplified to not use LEX::when_list, which has been removed. Considering all the issues affecting case statements, the grammar for these has been totally re written. The existing actions, used to generate "assembly" sp_inst* code, have been preserved but moved in the new grammar, with the following changes: a) Bison rules are no longer shared between "Simple" and "Searched" case statements, because a stack instead of a flag is required to handle them. Nested statements are handled naturally by the parser stack, which by definition uses the correct rule in the correct context. Nested statements of the opposite type (simple vs searched) works correctly. The flag sp_head::IN_SIMPLE_CASE is no longer used. This is a step towards resolution of WL#2999, which correctly identified that temporary parsing flags do not belong to sp_head. The code in the action is shared by mean of the case_stmt_action_xxx() helpers. b) The backpatch mechanism, used to resolve forward jumps in the generated code, has been changed to: - create a label for the instruction following 'END CASE', - register each jump at the end of a "WHEN expr THEN stmt" in a *unique* backpatch list associated with the 'END CASE' label - resolve all the forward jumps for this label at once. In addition, the code involving backpatch has been commented, so that a reader can now understand by reading matching "Registering" and "Resolving" comments how the forward jumps are resolved and what target they resolve to, as this is far from evident when reading the code alone. The implementation of sp_head::opt_mark() has been revised to avoid recursive calls from jump instructions, and instead add the jump location to the list of paths to explore during the flow analysis of the instruction graph, with a call to sp_head::add_mark_lead(). In addition, the flow analysis will stop if an instruction has already been marked as reachable, which the previous code failed to do in the recursive case. sp_head::opt_mark() is now private, to prevent new calls to this method from being introduced. The debug code present in sp_head::create() has been removed. Considering that SHOW PROCEDURE CODE is also available in debug builds, and can be used anytime regardless of the trace level, as opposed to "CREATE PROCEDURE" time and only if the trace was on, removing the code actually makes debugging easier (usable trace). Tests have been written to cover the parser overflow (big CASE), and to cover nested CASE statements.
2006-11-17 12:14:29 -07:00
sp->add_mark_lead(m_cont_dest, leads);
return m_ip+1;
}
void
sp_instr_jump_if_not::opt_move(uint dst, List<sp_instr> *bp)
{
/*
cont. destinations may point backwards after shortcutting jumps
during the mark phase. If it's still pointing forwards, only
push this for backpatching if sp_instr_jump::opt_move() will not
do it (i.e. if the m_dest points backwards).
*/
if (m_cont_dest > m_ip)
{ // Forward
if (m_dest < m_ip)
bp->push_back(this);
}
else if (m_cont_optdest)
m_cont_dest= m_cont_optdest->m_ip; // Backward
/* This will take care of m_dest and m_ip */
sp_instr_jump::opt_move(dst, bp);
}
2005-08-11 15:58:15 +03:00
/*
sp_instr_freturn class functions
*/
int
sp_instr_freturn::execute(THD *thd, uint *nextp)
{
DBUG_ENTER("sp_instr_freturn::execute");
DBUG_RETURN(m_lex_keeper.reset_lex_and_exec_core(thd, nextp, TRUE, this));
}
int
sp_instr_freturn::exec_core(THD *thd, uint *nextp)
{
/*
Change <next instruction pointer>, so that this will be the last
instruction in the stored function.
*/
*nextp= UINT_MAX;
/*
Evaluate the value of return expression and store it in current runtime
context.
NOTE: It's necessary to evaluate result item right here, because we must
do it in scope of execution the current context/block.
*/
return thd->spcont->set_return_value(thd, &m_value);
}
void
sp_instr_freturn::print(String *str)
{
/* freturn type expr... */
if (str->reserve(1024+8+32)) // Add some for the expr. too
return;
str->qs_append(STRING_WITH_LEN("freturn "));
str->qs_append((uint)m_type);
str->qs_append(' ');
m_value->print(str);
}
2005-08-11 15:58:15 +03:00
/*
sp_instr_hpush_jump class functions
*/
int
sp_instr_hpush_jump::execute(THD *thd, uint *nextp)
{
DBUG_ENTER("sp_instr_hpush_jump::execute");
List_iterator_fast<sp_cond_type_t> li(m_cond);
sp_cond_type_t *p;
while ((p= li++))
thd->spcont->push_handler(p, m_ip+1, m_type, m_frame);
*nextp= m_dest;
DBUG_RETURN(0);
}
void
sp_instr_hpush_jump::print(String *str)
{
/* hpush_jump dest fsize type */
if (str->reserve(SP_INSTR_UINT_MAXLEN*2 + 21))
return;
str->qs_append(STRING_WITH_LEN("hpush_jump "));
str->qs_append(m_dest);
str->qs_append(' ');
str->qs_append(m_frame);
switch (m_type) {
case SP_HANDLER_NONE:
str->qs_append(STRING_WITH_LEN(" NONE")); // This would be a bug
break;
case SP_HANDLER_EXIT:
str->qs_append(STRING_WITH_LEN(" EXIT"));
break;
case SP_HANDLER_CONTINUE:
str->qs_append(STRING_WITH_LEN(" CONTINUE"));
break;
case SP_HANDLER_UNDO:
str->qs_append(STRING_WITH_LEN(" UNDO"));
break;
default:
// This would be a bug as well
str->qs_append(STRING_WITH_LEN(" UNKNOWN:"));
str->qs_append(m_type);
}
}
uint
Bug#19194 (Right recursion in parser for CASE causes excessive stack usage, limitation) Note to the reviewer ==================== Warning: reviewing this patch is somewhat involved. Due to the nature of several issues all affecting the same area, fixing separately each issue is not practical, since each fix can not be implemented and tested independently. In particular, the issues with - rule recursion - nested case statements - forward jump resolution (backpatch list) are tightly coupled (see below). Definitions =========== The expression CASE expr WHEN expr THEN expr WHEN expr THEN expr ... END is a "Simple Case Expression". The expression CASE WHEN expr THEN expr WHEN expr THEN expr ... END is a "Searched Case Expression". The statement CASE expr WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Simple Case Statement". The statement CASE WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Searched Case Statement". A "Left Recursive" rule is like list: element | list element ; A "Right Recursive" rule is like list: element | element list ; Left and right recursion produces the same language, the difference only affects the *order* in which the text is parsed. In a descendant parser (usually written manually), right recursion works very well, and is typically implemented with a while loop. In an ascendant parser (yacc/bison) left recursion works very well, and is implemented naturally by the parser stack. In both cases, using the wrong type or recursion is very bad and should be avoided, as it causes technical issues with the parser implementation. Before this change ================== The "Simple Case Expression" and "Searched Case Expression" were both implemented by the "when_list" and "when_list2" rules, which are left recursive (ok). These rules, however, used lex->when_list instead of using the parser stack, which is more complex that necessary, and potentially dangerous because of other rules using THD::reset_lex. The "Simple Case Statement" and "Searched Case Statements" were implemented by the "sp_case", "sp_whens" and in part by "sp_proc_stmt" rules. Both cases were right recursive (bad). The grammar involved was convoluted, and is assumed to be the results of tweaks to get the code generation to work, but is not what someone would naturally write. In addition, using a common rule for both "Simple" and "Searched" case statements was implemented with sp_head::m_flags |= IN_SIMPLE_CASE, which is a flag and not a stack, and therefore does not take into account *nested* case statements. This leads to incorrect generated code, and either a server crash or an incorrect result. With regards to the backpatch mechanism, a *different* backpatch list was created for each jump from "WHEN expr THEN stmt" to "END CASE", which relied on the grammar to be right recursive. This is a mis-use of the backpatch list, since this list can resolve multiple references to the same target at once. The optimizer algorithm used to detect dead code in the "assembly" SQL instructions, implemented by sp_head::opt_mark(uint ip), was recursive in some cases (a conditional jump pointing forward to another conditional jump). In case of specially crafted code, like - a long list of "IF expr THEN stmt END IF" - a long CASE statement this would actually cause a server crash with a stack overflow. In general, having a stack that grows proportionally with user data (the SQL code given by the client in a CREATE PROCEDURE) is to be avoided. In debug builds only, creating a SP / SF / Trigger which had a significant amount of code would spend --literally-- several minutes in sp_head::create, because of the debug code involved with DBUG_PRINT("info", ("Code %s ... There are several issues with this code: - in a CASE with 5 000 WHEN, there are 15 000 instructions generated, which create a sting representation of the code which is 500 000 bytes long, - using a String instead of an io stream causes performances to degrade to a total server freeze, as time is spent doing realloc of a buffer always too short, - Printing a 500 000 long string in the debug log is too verbose, - Generating this string even when DBUG_PRINT is off is useless, - Having code that potentially can affect the server behavior, used with #ifdef / #endif is useful in some cases, but is also a bad practice. After this change ================= "Case Expressions" (both simple and searched) have been simplified to not use LEX::when_list, which has been removed. Considering all the issues affecting case statements, the grammar for these has been totally re written. The existing actions, used to generate "assembly" sp_inst* code, have been preserved but moved in the new grammar, with the following changes: a) Bison rules are no longer shared between "Simple" and "Searched" case statements, because a stack instead of a flag is required to handle them. Nested statements are handled naturally by the parser stack, which by definition uses the correct rule in the correct context. Nested statements of the opposite type (simple vs searched) works correctly. The flag sp_head::IN_SIMPLE_CASE is no longer used. This is a step towards resolution of WL#2999, which correctly identified that temporary parsing flags do not belong to sp_head. The code in the action is shared by mean of the case_stmt_action_xxx() helpers. b) The backpatch mechanism, used to resolve forward jumps in the generated code, has been changed to: - create a label for the instruction following 'END CASE', - register each jump at the end of a "WHEN expr THEN stmt" in a *unique* backpatch list associated with the 'END CASE' label - resolve all the forward jumps for this label at once. In addition, the code involving backpatch has been commented, so that a reader can now understand by reading matching "Registering" and "Resolving" comments how the forward jumps are resolved and what target they resolve to, as this is far from evident when reading the code alone. The implementation of sp_head::opt_mark() has been revised to avoid recursive calls from jump instructions, and instead add the jump location to the list of paths to explore during the flow analysis of the instruction graph, with a call to sp_head::add_mark_lead(). In addition, the flow analysis will stop if an instruction has already been marked as reachable, which the previous code failed to do in the recursive case. sp_head::opt_mark() is now private, to prevent new calls to this method from being introduced. The debug code present in sp_head::create() has been removed. Considering that SHOW PROCEDURE CODE is also available in debug builds, and can be used anytime regardless of the trace level, as opposed to "CREATE PROCEDURE" time and only if the trace was on, removing the code actually makes debugging easier (usable trace). Tests have been written to cover the parser overflow (big CASE), and to cover nested CASE statements.
2006-11-17 12:14:29 -07:00
sp_instr_hpush_jump::opt_mark(sp_head *sp, List<sp_instr> *leads)
{
sp_instr *i;
marked= 1;
if ((i= sp->get_instr(m_dest)))
{
m_dest= i->opt_shortcut_jump(sp, this);
m_optdest= sp->get_instr(m_dest);
}
Bug#19194 (Right recursion in parser for CASE causes excessive stack usage, limitation) Note to the reviewer ==================== Warning: reviewing this patch is somewhat involved. Due to the nature of several issues all affecting the same area, fixing separately each issue is not practical, since each fix can not be implemented and tested independently. In particular, the issues with - rule recursion - nested case statements - forward jump resolution (backpatch list) are tightly coupled (see below). Definitions =========== The expression CASE expr WHEN expr THEN expr WHEN expr THEN expr ... END is a "Simple Case Expression". The expression CASE WHEN expr THEN expr WHEN expr THEN expr ... END is a "Searched Case Expression". The statement CASE expr WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Simple Case Statement". The statement CASE WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Searched Case Statement". A "Left Recursive" rule is like list: element | list element ; A "Right Recursive" rule is like list: element | element list ; Left and right recursion produces the same language, the difference only affects the *order* in which the text is parsed. In a descendant parser (usually written manually), right recursion works very well, and is typically implemented with a while loop. In an ascendant parser (yacc/bison) left recursion works very well, and is implemented naturally by the parser stack. In both cases, using the wrong type or recursion is very bad and should be avoided, as it causes technical issues with the parser implementation. Before this change ================== The "Simple Case Expression" and "Searched Case Expression" were both implemented by the "when_list" and "when_list2" rules, which are left recursive (ok). These rules, however, used lex->when_list instead of using the parser stack, which is more complex that necessary, and potentially dangerous because of other rules using THD::reset_lex. The "Simple Case Statement" and "Searched Case Statements" were implemented by the "sp_case", "sp_whens" and in part by "sp_proc_stmt" rules. Both cases were right recursive (bad). The grammar involved was convoluted, and is assumed to be the results of tweaks to get the code generation to work, but is not what someone would naturally write. In addition, using a common rule for both "Simple" and "Searched" case statements was implemented with sp_head::m_flags |= IN_SIMPLE_CASE, which is a flag and not a stack, and therefore does not take into account *nested* case statements. This leads to incorrect generated code, and either a server crash or an incorrect result. With regards to the backpatch mechanism, a *different* backpatch list was created for each jump from "WHEN expr THEN stmt" to "END CASE", which relied on the grammar to be right recursive. This is a mis-use of the backpatch list, since this list can resolve multiple references to the same target at once. The optimizer algorithm used to detect dead code in the "assembly" SQL instructions, implemented by sp_head::opt_mark(uint ip), was recursive in some cases (a conditional jump pointing forward to another conditional jump). In case of specially crafted code, like - a long list of "IF expr THEN stmt END IF" - a long CASE statement this would actually cause a server crash with a stack overflow. In general, having a stack that grows proportionally with user data (the SQL code given by the client in a CREATE PROCEDURE) is to be avoided. In debug builds only, creating a SP / SF / Trigger which had a significant amount of code would spend --literally-- several minutes in sp_head::create, because of the debug code involved with DBUG_PRINT("info", ("Code %s ... There are several issues with this code: - in a CASE with 5 000 WHEN, there are 15 000 instructions generated, which create a sting representation of the code which is 500 000 bytes long, - using a String instead of an io stream causes performances to degrade to a total server freeze, as time is spent doing realloc of a buffer always too short, - Printing a 500 000 long string in the debug log is too verbose, - Generating this string even when DBUG_PRINT is off is useless, - Having code that potentially can affect the server behavior, used with #ifdef / #endif is useful in some cases, but is also a bad practice. After this change ================= "Case Expressions" (both simple and searched) have been simplified to not use LEX::when_list, which has been removed. Considering all the issues affecting case statements, the grammar for these has been totally re written. The existing actions, used to generate "assembly" sp_inst* code, have been preserved but moved in the new grammar, with the following changes: a) Bison rules are no longer shared between "Simple" and "Searched" case statements, because a stack instead of a flag is required to handle them. Nested statements are handled naturally by the parser stack, which by definition uses the correct rule in the correct context. Nested statements of the opposite type (simple vs searched) works correctly. The flag sp_head::IN_SIMPLE_CASE is no longer used. This is a step towards resolution of WL#2999, which correctly identified that temporary parsing flags do not belong to sp_head. The code in the action is shared by mean of the case_stmt_action_xxx() helpers. b) The backpatch mechanism, used to resolve forward jumps in the generated code, has been changed to: - create a label for the instruction following 'END CASE', - register each jump at the end of a "WHEN expr THEN stmt" in a *unique* backpatch list associated with the 'END CASE' label - resolve all the forward jumps for this label at once. In addition, the code involving backpatch has been commented, so that a reader can now understand by reading matching "Registering" and "Resolving" comments how the forward jumps are resolved and what target they resolve to, as this is far from evident when reading the code alone. The implementation of sp_head::opt_mark() has been revised to avoid recursive calls from jump instructions, and instead add the jump location to the list of paths to explore during the flow analysis of the instruction graph, with a call to sp_head::add_mark_lead(). In addition, the flow analysis will stop if an instruction has already been marked as reachable, which the previous code failed to do in the recursive case. sp_head::opt_mark() is now private, to prevent new calls to this method from being introduced. The debug code present in sp_head::create() has been removed. Considering that SHOW PROCEDURE CODE is also available in debug builds, and can be used anytime regardless of the trace level, as opposed to "CREATE PROCEDURE" time and only if the trace was on, removing the code actually makes debugging easier (usable trace). Tests have been written to cover the parser overflow (big CASE), and to cover nested CASE statements.
2006-11-17 12:14:29 -07:00
sp->add_mark_lead(m_dest, leads);
return m_ip+1;
}
2005-08-11 15:58:15 +03:00
/*
sp_instr_hpop class functions
*/
int
sp_instr_hpop::execute(THD *thd, uint *nextp)
{
DBUG_ENTER("sp_instr_hpop::execute");
thd->spcont->pop_handlers(m_count);
*nextp= m_ip+1;
DBUG_RETURN(0);
}
void
sp_instr_hpop::print(String *str)
{
/* hpop count */
if (str->reserve(SP_INSTR_UINT_MAXLEN+5))
return;
str->qs_append(STRING_WITH_LEN("hpop "));
str->qs_append(m_count);
}
2005-08-11 15:58:15 +03:00
/*
sp_instr_hreturn class functions
*/
int
sp_instr_hreturn::execute(THD *thd, uint *nextp)
{
DBUG_ENTER("sp_instr_hreturn::execute");
if (m_dest)
*nextp= m_dest;
else
{
*nextp= thd->spcont->pop_hstack();
}
thd->spcont->exit_handler();
DBUG_RETURN(0);
}
2005-08-11 15:58:15 +03:00
void
sp_instr_hreturn::print(String *str)
{
/* hreturn framesize dest */
if (str->reserve(SP_INSTR_UINT_MAXLEN*2 + 9))
return;
str->qs_append(STRING_WITH_LEN("hreturn "));
str->qs_append(m_frame);
if (m_dest)
{
str->qs_append(' ');
str->qs_append(m_dest);
}
}
2005-08-11 15:58:15 +03:00
uint
Bug#19194 (Right recursion in parser for CASE causes excessive stack usage, limitation) Note to the reviewer ==================== Warning: reviewing this patch is somewhat involved. Due to the nature of several issues all affecting the same area, fixing separately each issue is not practical, since each fix can not be implemented and tested independently. In particular, the issues with - rule recursion - nested case statements - forward jump resolution (backpatch list) are tightly coupled (see below). Definitions =========== The expression CASE expr WHEN expr THEN expr WHEN expr THEN expr ... END is a "Simple Case Expression". The expression CASE WHEN expr THEN expr WHEN expr THEN expr ... END is a "Searched Case Expression". The statement CASE expr WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Simple Case Statement". The statement CASE WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Searched Case Statement". A "Left Recursive" rule is like list: element | list element ; A "Right Recursive" rule is like list: element | element list ; Left and right recursion produces the same language, the difference only affects the *order* in which the text is parsed. In a descendant parser (usually written manually), right recursion works very well, and is typically implemented with a while loop. In an ascendant parser (yacc/bison) left recursion works very well, and is implemented naturally by the parser stack. In both cases, using the wrong type or recursion is very bad and should be avoided, as it causes technical issues with the parser implementation. Before this change ================== The "Simple Case Expression" and "Searched Case Expression" were both implemented by the "when_list" and "when_list2" rules, which are left recursive (ok). These rules, however, used lex->when_list instead of using the parser stack, which is more complex that necessary, and potentially dangerous because of other rules using THD::reset_lex. The "Simple Case Statement" and "Searched Case Statements" were implemented by the "sp_case", "sp_whens" and in part by "sp_proc_stmt" rules. Both cases were right recursive (bad). The grammar involved was convoluted, and is assumed to be the results of tweaks to get the code generation to work, but is not what someone would naturally write. In addition, using a common rule for both "Simple" and "Searched" case statements was implemented with sp_head::m_flags |= IN_SIMPLE_CASE, which is a flag and not a stack, and therefore does not take into account *nested* case statements. This leads to incorrect generated code, and either a server crash or an incorrect result. With regards to the backpatch mechanism, a *different* backpatch list was created for each jump from "WHEN expr THEN stmt" to "END CASE", which relied on the grammar to be right recursive. This is a mis-use of the backpatch list, since this list can resolve multiple references to the same target at once. The optimizer algorithm used to detect dead code in the "assembly" SQL instructions, implemented by sp_head::opt_mark(uint ip), was recursive in some cases (a conditional jump pointing forward to another conditional jump). In case of specially crafted code, like - a long list of "IF expr THEN stmt END IF" - a long CASE statement this would actually cause a server crash with a stack overflow. In general, having a stack that grows proportionally with user data (the SQL code given by the client in a CREATE PROCEDURE) is to be avoided. In debug builds only, creating a SP / SF / Trigger which had a significant amount of code would spend --literally-- several minutes in sp_head::create, because of the debug code involved with DBUG_PRINT("info", ("Code %s ... There are several issues with this code: - in a CASE with 5 000 WHEN, there are 15 000 instructions generated, which create a sting representation of the code which is 500 000 bytes long, - using a String instead of an io stream causes performances to degrade to a total server freeze, as time is spent doing realloc of a buffer always too short, - Printing a 500 000 long string in the debug log is too verbose, - Generating this string even when DBUG_PRINT is off is useless, - Having code that potentially can affect the server behavior, used with #ifdef / #endif is useful in some cases, but is also a bad practice. After this change ================= "Case Expressions" (both simple and searched) have been simplified to not use LEX::when_list, which has been removed. Considering all the issues affecting case statements, the grammar for these has been totally re written. The existing actions, used to generate "assembly" sp_inst* code, have been preserved but moved in the new grammar, with the following changes: a) Bison rules are no longer shared between "Simple" and "Searched" case statements, because a stack instead of a flag is required to handle them. Nested statements are handled naturally by the parser stack, which by definition uses the correct rule in the correct context. Nested statements of the opposite type (simple vs searched) works correctly. The flag sp_head::IN_SIMPLE_CASE is no longer used. This is a step towards resolution of WL#2999, which correctly identified that temporary parsing flags do not belong to sp_head. The code in the action is shared by mean of the case_stmt_action_xxx() helpers. b) The backpatch mechanism, used to resolve forward jumps in the generated code, has been changed to: - create a label for the instruction following 'END CASE', - register each jump at the end of a "WHEN expr THEN stmt" in a *unique* backpatch list associated with the 'END CASE' label - resolve all the forward jumps for this label at once. In addition, the code involving backpatch has been commented, so that a reader can now understand by reading matching "Registering" and "Resolving" comments how the forward jumps are resolved and what target they resolve to, as this is far from evident when reading the code alone. The implementation of sp_head::opt_mark() has been revised to avoid recursive calls from jump instructions, and instead add the jump location to the list of paths to explore during the flow analysis of the instruction graph, with a call to sp_head::add_mark_lead(). In addition, the flow analysis will stop if an instruction has already been marked as reachable, which the previous code failed to do in the recursive case. sp_head::opt_mark() is now private, to prevent new calls to this method from being introduced. The debug code present in sp_head::create() has been removed. Considering that SHOW PROCEDURE CODE is also available in debug builds, and can be used anytime regardless of the trace level, as opposed to "CREATE PROCEDURE" time and only if the trace was on, removing the code actually makes debugging easier (usable trace). Tests have been written to cover the parser overflow (big CASE), and to cover nested CASE statements.
2006-11-17 12:14:29 -07:00
sp_instr_hreturn::opt_mark(sp_head *sp, List<sp_instr> *leads)
{
Bug#19194 (Right recursion in parser for CASE causes excessive stack usage, limitation) Note to the reviewer ==================== Warning: reviewing this patch is somewhat involved. Due to the nature of several issues all affecting the same area, fixing separately each issue is not practical, since each fix can not be implemented and tested independently. In particular, the issues with - rule recursion - nested case statements - forward jump resolution (backpatch list) are tightly coupled (see below). Definitions =========== The expression CASE expr WHEN expr THEN expr WHEN expr THEN expr ... END is a "Simple Case Expression". The expression CASE WHEN expr THEN expr WHEN expr THEN expr ... END is a "Searched Case Expression". The statement CASE expr WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Simple Case Statement". The statement CASE WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Searched Case Statement". A "Left Recursive" rule is like list: element | list element ; A "Right Recursive" rule is like list: element | element list ; Left and right recursion produces the same language, the difference only affects the *order* in which the text is parsed. In a descendant parser (usually written manually), right recursion works very well, and is typically implemented with a while loop. In an ascendant parser (yacc/bison) left recursion works very well, and is implemented naturally by the parser stack. In both cases, using the wrong type or recursion is very bad and should be avoided, as it causes technical issues with the parser implementation. Before this change ================== The "Simple Case Expression" and "Searched Case Expression" were both implemented by the "when_list" and "when_list2" rules, which are left recursive (ok). These rules, however, used lex->when_list instead of using the parser stack, which is more complex that necessary, and potentially dangerous because of other rules using THD::reset_lex. The "Simple Case Statement" and "Searched Case Statements" were implemented by the "sp_case", "sp_whens" and in part by "sp_proc_stmt" rules. Both cases were right recursive (bad). The grammar involved was convoluted, and is assumed to be the results of tweaks to get the code generation to work, but is not what someone would naturally write. In addition, using a common rule for both "Simple" and "Searched" case statements was implemented with sp_head::m_flags |= IN_SIMPLE_CASE, which is a flag and not a stack, and therefore does not take into account *nested* case statements. This leads to incorrect generated code, and either a server crash or an incorrect result. With regards to the backpatch mechanism, a *different* backpatch list was created for each jump from "WHEN expr THEN stmt" to "END CASE", which relied on the grammar to be right recursive. This is a mis-use of the backpatch list, since this list can resolve multiple references to the same target at once. The optimizer algorithm used to detect dead code in the "assembly" SQL instructions, implemented by sp_head::opt_mark(uint ip), was recursive in some cases (a conditional jump pointing forward to another conditional jump). In case of specially crafted code, like - a long list of "IF expr THEN stmt END IF" - a long CASE statement this would actually cause a server crash with a stack overflow. In general, having a stack that grows proportionally with user data (the SQL code given by the client in a CREATE PROCEDURE) is to be avoided. In debug builds only, creating a SP / SF / Trigger which had a significant amount of code would spend --literally-- several minutes in sp_head::create, because of the debug code involved with DBUG_PRINT("info", ("Code %s ... There are several issues with this code: - in a CASE with 5 000 WHEN, there are 15 000 instructions generated, which create a sting representation of the code which is 500 000 bytes long, - using a String instead of an io stream causes performances to degrade to a total server freeze, as time is spent doing realloc of a buffer always too short, - Printing a 500 000 long string in the debug log is too verbose, - Generating this string even when DBUG_PRINT is off is useless, - Having code that potentially can affect the server behavior, used with #ifdef / #endif is useful in some cases, but is also a bad practice. After this change ================= "Case Expressions" (both simple and searched) have been simplified to not use LEX::when_list, which has been removed. Considering all the issues affecting case statements, the grammar for these has been totally re written. The existing actions, used to generate "assembly" sp_inst* code, have been preserved but moved in the new grammar, with the following changes: a) Bison rules are no longer shared between "Simple" and "Searched" case statements, because a stack instead of a flag is required to handle them. Nested statements are handled naturally by the parser stack, which by definition uses the correct rule in the correct context. Nested statements of the opposite type (simple vs searched) works correctly. The flag sp_head::IN_SIMPLE_CASE is no longer used. This is a step towards resolution of WL#2999, which correctly identified that temporary parsing flags do not belong to sp_head. The code in the action is shared by mean of the case_stmt_action_xxx() helpers. b) The backpatch mechanism, used to resolve forward jumps in the generated code, has been changed to: - create a label for the instruction following 'END CASE', - register each jump at the end of a "WHEN expr THEN stmt" in a *unique* backpatch list associated with the 'END CASE' label - resolve all the forward jumps for this label at once. In addition, the code involving backpatch has been commented, so that a reader can now understand by reading matching "Registering" and "Resolving" comments how the forward jumps are resolved and what target they resolve to, as this is far from evident when reading the code alone. The implementation of sp_head::opt_mark() has been revised to avoid recursive calls from jump instructions, and instead add the jump location to the list of paths to explore during the flow analysis of the instruction graph, with a call to sp_head::add_mark_lead(). In addition, the flow analysis will stop if an instruction has already been marked as reachable, which the previous code failed to do in the recursive case. sp_head::opt_mark() is now private, to prevent new calls to this method from being introduced. The debug code present in sp_head::create() has been removed. Considering that SHOW PROCEDURE CODE is also available in debug builds, and can be used anytime regardless of the trace level, as opposed to "CREATE PROCEDURE" time and only if the trace was on, removing the code actually makes debugging easier (usable trace). Tests have been written to cover the parser overflow (big CASE), and to cover nested CASE statements.
2006-11-17 12:14:29 -07:00
marked= 1;
if (m_dest)
{
/*
This is an EXIT handler; next instruction step is in m_dest.
*/
return m_dest;
}
/*
This is a CONTINUE handler; next instruction step will come from
the handler stack and not from opt_mark.
*/
Bug#19194 (Right recursion in parser for CASE causes excessive stack usage, limitation) Note to the reviewer ==================== Warning: reviewing this patch is somewhat involved. Due to the nature of several issues all affecting the same area, fixing separately each issue is not practical, since each fix can not be implemented and tested independently. In particular, the issues with - rule recursion - nested case statements - forward jump resolution (backpatch list) are tightly coupled (see below). Definitions =========== The expression CASE expr WHEN expr THEN expr WHEN expr THEN expr ... END is a "Simple Case Expression". The expression CASE WHEN expr THEN expr WHEN expr THEN expr ... END is a "Searched Case Expression". The statement CASE expr WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Simple Case Statement". The statement CASE WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Searched Case Statement". A "Left Recursive" rule is like list: element | list element ; A "Right Recursive" rule is like list: element | element list ; Left and right recursion produces the same language, the difference only affects the *order* in which the text is parsed. In a descendant parser (usually written manually), right recursion works very well, and is typically implemented with a while loop. In an ascendant parser (yacc/bison) left recursion works very well, and is implemented naturally by the parser stack. In both cases, using the wrong type or recursion is very bad and should be avoided, as it causes technical issues with the parser implementation. Before this change ================== The "Simple Case Expression" and "Searched Case Expression" were both implemented by the "when_list" and "when_list2" rules, which are left recursive (ok). These rules, however, used lex->when_list instead of using the parser stack, which is more complex that necessary, and potentially dangerous because of other rules using THD::reset_lex. The "Simple Case Statement" and "Searched Case Statements" were implemented by the "sp_case", "sp_whens" and in part by "sp_proc_stmt" rules. Both cases were right recursive (bad). The grammar involved was convoluted, and is assumed to be the results of tweaks to get the code generation to work, but is not what someone would naturally write. In addition, using a common rule for both "Simple" and "Searched" case statements was implemented with sp_head::m_flags |= IN_SIMPLE_CASE, which is a flag and not a stack, and therefore does not take into account *nested* case statements. This leads to incorrect generated code, and either a server crash or an incorrect result. With regards to the backpatch mechanism, a *different* backpatch list was created for each jump from "WHEN expr THEN stmt" to "END CASE", which relied on the grammar to be right recursive. This is a mis-use of the backpatch list, since this list can resolve multiple references to the same target at once. The optimizer algorithm used to detect dead code in the "assembly" SQL instructions, implemented by sp_head::opt_mark(uint ip), was recursive in some cases (a conditional jump pointing forward to another conditional jump). In case of specially crafted code, like - a long list of "IF expr THEN stmt END IF" - a long CASE statement this would actually cause a server crash with a stack overflow. In general, having a stack that grows proportionally with user data (the SQL code given by the client in a CREATE PROCEDURE) is to be avoided. In debug builds only, creating a SP / SF / Trigger which had a significant amount of code would spend --literally-- several minutes in sp_head::create, because of the debug code involved with DBUG_PRINT("info", ("Code %s ... There are several issues with this code: - in a CASE with 5 000 WHEN, there are 15 000 instructions generated, which create a sting representation of the code which is 500 000 bytes long, - using a String instead of an io stream causes performances to degrade to a total server freeze, as time is spent doing realloc of a buffer always too short, - Printing a 500 000 long string in the debug log is too verbose, - Generating this string even when DBUG_PRINT is off is useless, - Having code that potentially can affect the server behavior, used with #ifdef / #endif is useful in some cases, but is also a bad practice. After this change ================= "Case Expressions" (both simple and searched) have been simplified to not use LEX::when_list, which has been removed. Considering all the issues affecting case statements, the grammar for these has been totally re written. The existing actions, used to generate "assembly" sp_inst* code, have been preserved but moved in the new grammar, with the following changes: a) Bison rules are no longer shared between "Simple" and "Searched" case statements, because a stack instead of a flag is required to handle them. Nested statements are handled naturally by the parser stack, which by definition uses the correct rule in the correct context. Nested statements of the opposite type (simple vs searched) works correctly. The flag sp_head::IN_SIMPLE_CASE is no longer used. This is a step towards resolution of WL#2999, which correctly identified that temporary parsing flags do not belong to sp_head. The code in the action is shared by mean of the case_stmt_action_xxx() helpers. b) The backpatch mechanism, used to resolve forward jumps in the generated code, has been changed to: - create a label for the instruction following 'END CASE', - register each jump at the end of a "WHEN expr THEN stmt" in a *unique* backpatch list associated with the 'END CASE' label - resolve all the forward jumps for this label at once. In addition, the code involving backpatch has been commented, so that a reader can now understand by reading matching "Registering" and "Resolving" comments how the forward jumps are resolved and what target they resolve to, as this is far from evident when reading the code alone. The implementation of sp_head::opt_mark() has been revised to avoid recursive calls from jump instructions, and instead add the jump location to the list of paths to explore during the flow analysis of the instruction graph, with a call to sp_head::add_mark_lead(). In addition, the flow analysis will stop if an instruction has already been marked as reachable, which the previous code failed to do in the recursive case. sp_head::opt_mark() is now private, to prevent new calls to this method from being introduced. The debug code present in sp_head::create() has been removed. Considering that SHOW PROCEDURE CODE is also available in debug builds, and can be used anytime regardless of the trace level, as opposed to "CREATE PROCEDURE" time and only if the trace was on, removing the code actually makes debugging easier (usable trace). Tests have been written to cover the parser overflow (big CASE), and to cover nested CASE statements.
2006-11-17 12:14:29 -07:00
return UINT_MAX;
}
2005-08-11 15:58:15 +03:00
/*
sp_instr_cpush class functions
*/
int
sp_instr_cpush::execute(THD *thd, uint *nextp)
{
Query_arena backup_arena;
DBUG_ENTER("sp_instr_cpush::execute");
/*
We should create cursors in the callers arena, as
it could be (and usually is) used in several instructions.
*/
thd->set_n_backup_active_arena(thd->spcont->callers_arena, &backup_arena);
thd->spcont->push_cursor(&m_lex_keeper, this);
thd->restore_active_arena(thd->spcont->callers_arena, &backup_arena);
*nextp= m_ip+1;
DBUG_RETURN(0);
}
2005-08-11 15:58:15 +03:00
void
sp_instr_cpush::print(String *str)
{
LEX_STRING n;
my_bool found= m_ctx->find_cursor(m_cursor, &n);
/* cpush name@offset */
uint rsrv= SP_INSTR_UINT_MAXLEN+7;
if (found)
rsrv+= n.length;
if (str->reserve(rsrv))
return;
str->qs_append(STRING_WITH_LEN("cpush "));
if (found)
{
str->qs_append(n.str, n.length);
str->qs_append('@');
}
str->qs_append(m_cursor);
}
2005-08-11 15:58:15 +03:00
/*
sp_instr_cpop class functions
*/
int
sp_instr_cpop::execute(THD *thd, uint *nextp)
{
DBUG_ENTER("sp_instr_cpop::execute");
thd->spcont->pop_cursors(m_count);
*nextp= m_ip+1;
DBUG_RETURN(0);
}
2005-08-11 15:58:15 +03:00
void
sp_instr_cpop::print(String *str)
{
/* cpop count */
if (str->reserve(SP_INSTR_UINT_MAXLEN+5))
return;
str->qs_append(STRING_WITH_LEN("cpop "));
str->qs_append(m_count);
}
2005-08-11 15:58:15 +03:00
/*
sp_instr_copen class functions
*/
int
sp_instr_copen::execute(THD *thd, uint *nextp)
{
/*
We don't store a pointer to the cursor in the instruction to be
able to reuse the same instruction among different threads in future.
*/
sp_cursor *c= thd->spcont->get_cursor(m_cursor);
int res;
DBUG_ENTER("sp_instr_copen::execute");
if (! c)
res= -1;
else
{
sp_lex_keeper *lex_keeper= c->get_lex_keeper();
Query_arena *old_arena= thd->stmt_arena;
/*
Get the Query_arena from the cpush instruction, which contains
the free_list of the query, so new items (if any) are stored in
the right free_list, and we can cleanup after each open.
*/
thd->stmt_arena= c->get_instr();
res= lex_keeper->reset_lex_and_exec_core(thd, nextp, FALSE, this);
/* Cleanup the query's items */
if (thd->stmt_arena->free_list)
cleanup_items(thd->stmt_arena->free_list);
thd->stmt_arena= old_arena;
/*
Work around the fact that errors in selects are not returned properly
(but instead converted into a warning), so if a condition handler
caught, we have lost the result code.
*/
if (!res)
{
uint dummy1, dummy2;
if (thd->spcont->found_handler(&dummy1, &dummy2))
res= -1;
}
/* TODO: Assert here that we either have an error or a cursor */
}
DBUG_RETURN(res);
}
int
sp_instr_copen::exec_core(THD *thd, uint *nextp)
{
sp_cursor *c= thd->spcont->get_cursor(m_cursor);
int res= c->open(thd);
*nextp= m_ip+1;
return res;
}
void
sp_instr_copen::print(String *str)
{
LEX_STRING n;
my_bool found= m_ctx->find_cursor(m_cursor, &n);
/* copen name@offset */
uint rsrv= SP_INSTR_UINT_MAXLEN+7;
if (found)
rsrv+= n.length;
if (str->reserve(rsrv))
return;
str->qs_append(STRING_WITH_LEN("copen "));
if (found)
{
str->qs_append(n.str, n.length);
str->qs_append('@');
}
str->qs_append(m_cursor);
}
2005-08-11 15:58:15 +03:00
/*
sp_instr_cclose class functions
*/
int
sp_instr_cclose::execute(THD *thd, uint *nextp)
{
sp_cursor *c= thd->spcont->get_cursor(m_cursor);
int res;
DBUG_ENTER("sp_instr_cclose::execute");
if (! c)
res= -1;
else
res= c->close(thd);
*nextp= m_ip+1;
DBUG_RETURN(res);
}
2005-08-11 15:58:15 +03:00
void
sp_instr_cclose::print(String *str)
{
LEX_STRING n;
my_bool found= m_ctx->find_cursor(m_cursor, &n);
/* cclose name@offset */
uint rsrv= SP_INSTR_UINT_MAXLEN+8;
if (found)
rsrv+= n.length;
if (str->reserve(rsrv))
return;
str->qs_append(STRING_WITH_LEN("cclose "));
if (found)
{
str->qs_append(n.str, n.length);
str->qs_append('@');
}
str->qs_append(m_cursor);
}
2005-08-11 15:58:15 +03:00
/*
sp_instr_cfetch class functions
*/
int
sp_instr_cfetch::execute(THD *thd, uint *nextp)
{
sp_cursor *c= thd->spcont->get_cursor(m_cursor);
int res;
Query_arena backup_arena;
DBUG_ENTER("sp_instr_cfetch::execute");
res= c ? c->fetch(thd, &m_varlist) : -1;
*nextp= m_ip+1;
DBUG_RETURN(res);
}
2005-08-11 15:58:15 +03:00
void
sp_instr_cfetch::print(String *str)
{
List_iterator_fast<struct sp_variable> li(m_varlist);
sp_variable_t *pv;
LEX_STRING n;
my_bool found= m_ctx->find_cursor(m_cursor, &n);
/* cfetch name@offset vars... */
uint rsrv= SP_INSTR_UINT_MAXLEN+8;
if (found)
rsrv+= n.length;
if (str->reserve(rsrv))
return;
str->qs_append(STRING_WITH_LEN("cfetch "));
if (found)
{
str->qs_append(n.str, n.length);
str->qs_append('@');
}
str->qs_append(m_cursor);
while ((pv= li++))
{
if (str->reserve(pv->name.length+SP_INSTR_UINT_MAXLEN+2))
return;
str->qs_append(' ');
str->qs_append(pv->name.str, pv->name.length);
str->qs_append('@');
str->qs_append(pv->offset);
}
}
2005-08-11 15:58:15 +03:00
/*
sp_instr_error class functions
*/
int
sp_instr_error::execute(THD *thd, uint *nextp)
{
DBUG_ENTER("sp_instr_error::execute");
2004-11-12 14:34:00 +02:00
my_message(m_errcode, ER(m_errcode), MYF(0));
*nextp= m_ip+1;
DBUG_RETURN(-1);
}
2005-08-11 15:58:15 +03:00
void
sp_instr_error::print(String *str)
{
/* error code */
if (str->reserve(SP_INSTR_UINT_MAXLEN+6))
return;
str->qs_append(STRING_WITH_LEN("error "));
str->qs_append(m_errcode);
}
/**************************************************************************
sp_instr_set_case_expr class implementation
**************************************************************************/
int
sp_instr_set_case_expr::execute(THD *thd, uint *nextp)
{
DBUG_ENTER("sp_instr_set_case_expr::execute");
DBUG_RETURN(m_lex_keeper.reset_lex_and_exec_core(thd, nextp, TRUE, this));
}
int
sp_instr_set_case_expr::exec_core(THD *thd, uint *nextp)
{
int res= thd->spcont->set_case_expr(thd, m_case_expr_id, &m_case_expr);
if (res &&
!thd->spcont->get_case_expr(m_case_expr_id) &&
thd->spcont->found_handler_here())
{
/*
Failed to evaluate the value, the case expression is still not
initialized, and a handler has been found. Set to NULL so we can continue.
*/
Item *null_item= new Item_null();
if (!null_item ||
thd->spcont->set_case_expr(thd, m_case_expr_id, &null_item))
{
/* If this also failed, we have to abort. */
sp_rcontext *spcont= thd->spcont;
thd->spcont= NULL; /* Avoid handlers */
my_error(ER_OUT_OF_RESOURCES, MYF(0));
spcont->clear_handler();
thd->spcont= spcont;
}
}
else
*nextp= m_ip+1;
return res;
}
void
sp_instr_set_case_expr::print(String *str)
{
/* set_case_expr (cont) id ... */
str->reserve(2*SP_INSTR_UINT_MAXLEN+18+32); // Add some extra for expr too
str->qs_append(STRING_WITH_LEN("set_case_expr ("));
str->qs_append(m_cont_dest);
str->qs_append(STRING_WITH_LEN(") "));
str->qs_append(m_case_expr_id);
str->qs_append(' ');
m_case_expr->print(str);
}
uint
Bug#19194 (Right recursion in parser for CASE causes excessive stack usage, limitation) Note to the reviewer ==================== Warning: reviewing this patch is somewhat involved. Due to the nature of several issues all affecting the same area, fixing separately each issue is not practical, since each fix can not be implemented and tested independently. In particular, the issues with - rule recursion - nested case statements - forward jump resolution (backpatch list) are tightly coupled (see below). Definitions =========== The expression CASE expr WHEN expr THEN expr WHEN expr THEN expr ... END is a "Simple Case Expression". The expression CASE WHEN expr THEN expr WHEN expr THEN expr ... END is a "Searched Case Expression". The statement CASE expr WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Simple Case Statement". The statement CASE WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Searched Case Statement". A "Left Recursive" rule is like list: element | list element ; A "Right Recursive" rule is like list: element | element list ; Left and right recursion produces the same language, the difference only affects the *order* in which the text is parsed. In a descendant parser (usually written manually), right recursion works very well, and is typically implemented with a while loop. In an ascendant parser (yacc/bison) left recursion works very well, and is implemented naturally by the parser stack. In both cases, using the wrong type or recursion is very bad and should be avoided, as it causes technical issues with the parser implementation. Before this change ================== The "Simple Case Expression" and "Searched Case Expression" were both implemented by the "when_list" and "when_list2" rules, which are left recursive (ok). These rules, however, used lex->when_list instead of using the parser stack, which is more complex that necessary, and potentially dangerous because of other rules using THD::reset_lex. The "Simple Case Statement" and "Searched Case Statements" were implemented by the "sp_case", "sp_whens" and in part by "sp_proc_stmt" rules. Both cases were right recursive (bad). The grammar involved was convoluted, and is assumed to be the results of tweaks to get the code generation to work, but is not what someone would naturally write. In addition, using a common rule for both "Simple" and "Searched" case statements was implemented with sp_head::m_flags |= IN_SIMPLE_CASE, which is a flag and not a stack, and therefore does not take into account *nested* case statements. This leads to incorrect generated code, and either a server crash or an incorrect result. With regards to the backpatch mechanism, a *different* backpatch list was created for each jump from "WHEN expr THEN stmt" to "END CASE", which relied on the grammar to be right recursive. This is a mis-use of the backpatch list, since this list can resolve multiple references to the same target at once. The optimizer algorithm used to detect dead code in the "assembly" SQL instructions, implemented by sp_head::opt_mark(uint ip), was recursive in some cases (a conditional jump pointing forward to another conditional jump). In case of specially crafted code, like - a long list of "IF expr THEN stmt END IF" - a long CASE statement this would actually cause a server crash with a stack overflow. In general, having a stack that grows proportionally with user data (the SQL code given by the client in a CREATE PROCEDURE) is to be avoided. In debug builds only, creating a SP / SF / Trigger which had a significant amount of code would spend --literally-- several minutes in sp_head::create, because of the debug code involved with DBUG_PRINT("info", ("Code %s ... There are several issues with this code: - in a CASE with 5 000 WHEN, there are 15 000 instructions generated, which create a sting representation of the code which is 500 000 bytes long, - using a String instead of an io stream causes performances to degrade to a total server freeze, as time is spent doing realloc of a buffer always too short, - Printing a 500 000 long string in the debug log is too verbose, - Generating this string even when DBUG_PRINT is off is useless, - Having code that potentially can affect the server behavior, used with #ifdef / #endif is useful in some cases, but is also a bad practice. After this change ================= "Case Expressions" (both simple and searched) have been simplified to not use LEX::when_list, which has been removed. Considering all the issues affecting case statements, the grammar for these has been totally re written. The existing actions, used to generate "assembly" sp_inst* code, have been preserved but moved in the new grammar, with the following changes: a) Bison rules are no longer shared between "Simple" and "Searched" case statements, because a stack instead of a flag is required to handle them. Nested statements are handled naturally by the parser stack, which by definition uses the correct rule in the correct context. Nested statements of the opposite type (simple vs searched) works correctly. The flag sp_head::IN_SIMPLE_CASE is no longer used. This is a step towards resolution of WL#2999, which correctly identified that temporary parsing flags do not belong to sp_head. The code in the action is shared by mean of the case_stmt_action_xxx() helpers. b) The backpatch mechanism, used to resolve forward jumps in the generated code, has been changed to: - create a label for the instruction following 'END CASE', - register each jump at the end of a "WHEN expr THEN stmt" in a *unique* backpatch list associated with the 'END CASE' label - resolve all the forward jumps for this label at once. In addition, the code involving backpatch has been commented, so that a reader can now understand by reading matching "Registering" and "Resolving" comments how the forward jumps are resolved and what target they resolve to, as this is far from evident when reading the code alone. The implementation of sp_head::opt_mark() has been revised to avoid recursive calls from jump instructions, and instead add the jump location to the list of paths to explore during the flow analysis of the instruction graph, with a call to sp_head::add_mark_lead(). In addition, the flow analysis will stop if an instruction has already been marked as reachable, which the previous code failed to do in the recursive case. sp_head::opt_mark() is now private, to prevent new calls to this method from being introduced. The debug code present in sp_head::create() has been removed. Considering that SHOW PROCEDURE CODE is also available in debug builds, and can be used anytime regardless of the trace level, as opposed to "CREATE PROCEDURE" time and only if the trace was on, removing the code actually makes debugging easier (usable trace). Tests have been written to cover the parser overflow (big CASE), and to cover nested CASE statements.
2006-11-17 12:14:29 -07:00
sp_instr_set_case_expr::opt_mark(sp_head *sp, List<sp_instr> *leads)
{
sp_instr *i;
marked= 1;
if ((i= sp->get_instr(m_cont_dest)))
{
m_cont_dest= i->opt_shortcut_jump(sp, this);
m_cont_optdest= sp->get_instr(m_cont_dest);
}
Bug#19194 (Right recursion in parser for CASE causes excessive stack usage, limitation) Note to the reviewer ==================== Warning: reviewing this patch is somewhat involved. Due to the nature of several issues all affecting the same area, fixing separately each issue is not practical, since each fix can not be implemented and tested independently. In particular, the issues with - rule recursion - nested case statements - forward jump resolution (backpatch list) are tightly coupled (see below). Definitions =========== The expression CASE expr WHEN expr THEN expr WHEN expr THEN expr ... END is a "Simple Case Expression". The expression CASE WHEN expr THEN expr WHEN expr THEN expr ... END is a "Searched Case Expression". The statement CASE expr WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Simple Case Statement". The statement CASE WHEN expr THEN stmts WHEN expr THEN stmts ... END CASE is a "Searched Case Statement". A "Left Recursive" rule is like list: element | list element ; A "Right Recursive" rule is like list: element | element list ; Left and right recursion produces the same language, the difference only affects the *order* in which the text is parsed. In a descendant parser (usually written manually), right recursion works very well, and is typically implemented with a while loop. In an ascendant parser (yacc/bison) left recursion works very well, and is implemented naturally by the parser stack. In both cases, using the wrong type or recursion is very bad and should be avoided, as it causes technical issues with the parser implementation. Before this change ================== The "Simple Case Expression" and "Searched Case Expression" were both implemented by the "when_list" and "when_list2" rules, which are left recursive (ok). These rules, however, used lex->when_list instead of using the parser stack, which is more complex that necessary, and potentially dangerous because of other rules using THD::reset_lex. The "Simple Case Statement" and "Searched Case Statements" were implemented by the "sp_case", "sp_whens" and in part by "sp_proc_stmt" rules. Both cases were right recursive (bad). The grammar involved was convoluted, and is assumed to be the results of tweaks to get the code generation to work, but is not what someone would naturally write. In addition, using a common rule for both "Simple" and "Searched" case statements was implemented with sp_head::m_flags |= IN_SIMPLE_CASE, which is a flag and not a stack, and therefore does not take into account *nested* case statements. This leads to incorrect generated code, and either a server crash or an incorrect result. With regards to the backpatch mechanism, a *different* backpatch list was created for each jump from "WHEN expr THEN stmt" to "END CASE", which relied on the grammar to be right recursive. This is a mis-use of the backpatch list, since this list can resolve multiple references to the same target at once. The optimizer algorithm used to detect dead code in the "assembly" SQL instructions, implemented by sp_head::opt_mark(uint ip), was recursive in some cases (a conditional jump pointing forward to another conditional jump). In case of specially crafted code, like - a long list of "IF expr THEN stmt END IF" - a long CASE statement this would actually cause a server crash with a stack overflow. In general, having a stack that grows proportionally with user data (the SQL code given by the client in a CREATE PROCEDURE) is to be avoided. In debug builds only, creating a SP / SF / Trigger which had a significant amount of code would spend --literally-- several minutes in sp_head::create, because of the debug code involved with DBUG_PRINT("info", ("Code %s ... There are several issues with this code: - in a CASE with 5 000 WHEN, there are 15 000 instructions generated, which create a sting representation of the code which is 500 000 bytes long, - using a String instead of an io stream causes performances to degrade to a total server freeze, as time is spent doing realloc of a buffer always too short, - Printing a 500 000 long string in the debug log is too verbose, - Generating this string even when DBUG_PRINT is off is useless, - Having code that potentially can affect the server behavior, used with #ifdef / #endif is useful in some cases, but is also a bad practice. After this change ================= "Case Expressions" (both simple and searched) have been simplified to not use LEX::when_list, which has been removed. Considering all the issues affecting case statements, the grammar for these has been totally re written. The existing actions, used to generate "assembly" sp_inst* code, have been preserved but moved in the new grammar, with the following changes: a) Bison rules are no longer shared between "Simple" and "Searched" case statements, because a stack instead of a flag is required to handle them. Nested statements are handled naturally by the parser stack, which by definition uses the correct rule in the correct context. Nested statements of the opposite type (simple vs searched) works correctly. The flag sp_head::IN_SIMPLE_CASE is no longer used. This is a step towards resolution of WL#2999, which correctly identified that temporary parsing flags do not belong to sp_head. The code in the action is shared by mean of the case_stmt_action_xxx() helpers. b) The backpatch mechanism, used to resolve forward jumps in the generated code, has been changed to: - create a label for the instruction following 'END CASE', - register each jump at the end of a "WHEN expr THEN stmt" in a *unique* backpatch list associated with the 'END CASE' label - resolve all the forward jumps for this label at once. In addition, the code involving backpatch has been commented, so that a reader can now understand by reading matching "Registering" and "Resolving" comments how the forward jumps are resolved and what target they resolve to, as this is far from evident when reading the code alone. The implementation of sp_head::opt_mark() has been revised to avoid recursive calls from jump instructions, and instead add the jump location to the list of paths to explore during the flow analysis of the instruction graph, with a call to sp_head::add_mark_lead(). In addition, the flow analysis will stop if an instruction has already been marked as reachable, which the previous code failed to do in the recursive case. sp_head::opt_mark() is now private, to prevent new calls to this method from being introduced. The debug code present in sp_head::create() has been removed. Considering that SHOW PROCEDURE CODE is also available in debug builds, and can be used anytime regardless of the trace level, as opposed to "CREATE PROCEDURE" time and only if the trace was on, removing the code actually makes debugging easier (usable trace). Tests have been written to cover the parser overflow (big CASE), and to cover nested CASE statements.
2006-11-17 12:14:29 -07:00
sp->add_mark_lead(m_cont_dest, leads);
return m_ip+1;
}
void
sp_instr_set_case_expr::opt_move(uint dst, List<sp_instr> *bp)
{
if (m_cont_dest > m_ip)
bp->push_back(this); // Forward
else if (m_cont_optdest)
m_cont_dest= m_cont_optdest->m_ip; // Backward
m_ip= dst;
}
2005-08-11 15:58:15 +03:00
/* ------------------------------------------------------------------ */
/*
Structure that represent all instances of one table
in optimized multi-set of tables used by routine.
*/
typedef struct st_sp_table
{
/*
Multi-set key:
db_name\0table_name\0alias\0 - for normal tables
db_name\0table_name\0 - for temporary tables
Note that in both cases we don't take last '\0' into account when
we count length of key.
*/
LEX_STRING qname;
uint db_length, table_name_length;
bool temp; /* true if corresponds to a temporary table */
thr_lock_type lock_type; /* lock type used for prelocking */
uint lock_count;
uint query_lock_count;
uint8 trg_event_map;
} SP_TABLE;
uchar *sp_table_key(const uchar *ptr, size_t *plen, my_bool first)
{
SP_TABLE *tab= (SP_TABLE *)ptr;
*plen= tab->qname.length;
WL#3817: Simplify string / memory area types and make things more consistent (first part) The following type conversions was done: - Changed byte to uchar - Changed gptr to uchar* - Change my_string to char * - Change my_size_t to size_t - Change size_s to size_t Removed declaration of byte, gptr, my_string, my_size_t and size_s. Following function parameter changes was done: - All string functions in mysys/strings was changed to use size_t instead of uint for string lengths. - All read()/write() functions changed to use size_t (including vio). - All protocoll functions changed to use size_t instead of uint - Functions that used a pointer to a string length was changed to use size_t* - Changed malloc(), free() and related functions from using gptr to use void * as this requires fewer casts in the code and is more in line with how the standard functions work. - Added extra length argument to dirname_part() to return the length of the created string. - Changed (at least) following functions to take uchar* as argument: - db_dump() - my_net_write() - net_write_command() - net_store_data() - DBUG_DUMP() - decimal2bin() & bin2decimal() - Changed my_compress() and my_uncompress() to use size_t. Changed one argument to my_uncompress() from a pointer to a value as we only return one value (makes function easier to use). - Changed type of 'pack_data' argument to packfrm() to avoid casts. - Changed in readfrm() and writefrom(), ha_discover and handler::discover() the type for argument 'frmdata' to uchar** to avoid casts. - Changed most Field functions to use uchar* instead of char* (reduced a lot of casts). - Changed field->val_xxx(xxx, new_ptr) to take const pointers. Other changes: - Removed a lot of not needed casts - Added a few new cast required by other changes - Added some cast to my_multi_malloc() arguments for safety (as string lengths needs to be uint, not size_t). - Fixed all calls to hash-get-key functions to use size_t*. (Needed to be done explicitely as this conflict was often hided by casting the function to hash_get_key). - Changed some buffers to memory regions to uchar* to avoid casts. - Changed some string lengths from uint to size_t. - Changed field->ptr to be uchar* instead of char*. This allowed us to get rid of a lot of casts. - Some changes from true -> TRUE, false -> FALSE, unsigned char -> uchar - Include zlib.h in some files as we needed declaration of crc32() - Changed MY_FILE_ERROR to be (size_t) -1. - Changed many variables to hold the result of my_read() / my_write() to be size_t. This was needed to properly detect errors (which are returned as (size_t) -1). - Removed some very old VMS code - Changed packfrm()/unpackfrm() to not be depending on uint size (portability fix) - Removed windows specific code to restore cursor position as this causes slowdown on windows and we should not mix read() and pread() calls anyway as this is not thread safe. Updated function comment to reflect this. Changed function that depended on original behavior of my_pwrite() to itself restore the cursor position (one such case). - Added some missing checking of return value of malloc(). - Changed definition of MOD_PAD_CHAR_TO_FULL_LENGTH to avoid 'long' overflow. - Changed type of table_def::m_size from my_size_t to ulong to reflect that m_size is the number of elements in the array, not a string/memory length. - Moved THD::max_row_length() to table.cc (as it's not depending on THD). Inlined max_row_length_blob() into this function. - More function comments - Fixed some compiler warnings when compiled without partitions. - Removed setting of LEX_STRING() arguments in declaration (portability fix). - Some trivial indentation/variable name changes. - Some trivial code simplifications: - Replaced some calls to alloc_root + memcpy to use strmake_root()/strdup_root(). - Changed some calls from memdup() to strmake() (Safety fix) - Simpler loops in client-simple.c
2007-05-10 12:59:39 +03:00
return (uchar *)tab->qname.str;
}
/*
Merge the list of tables used by some query into the multi-set of
tables used by routine.
SYNOPSIS
merge_table_list()
thd - thread context
table - table list
lex_for_tmp_check - LEX of the query for which we are merging
table list.
NOTE
This method will use LEX provided to check whenever we are creating
temporary table and mark it as such in target multi-set.
RETURN VALUE
TRUE - Success
FALSE - Error
*/
bool
sp_head::merge_table_list(THD *thd, TABLE_LIST *table, LEX *lex_for_tmp_check)
{
SP_TABLE *tab;
if (lex_for_tmp_check->sql_command == SQLCOM_DROP_TABLE &&
lex_for_tmp_check->drop_temporary)
return TRUE;
for (uint i= 0 ; i < m_sptabs.records ; i++)
{
tab= (SP_TABLE *)hash_element(&m_sptabs, i);
tab->query_lock_count= 0;
}
for (; table ; table= table->next_global)
if (!table->derived && !table->schema_table)
{
char tname[(NAME_LEN + 1) * 3]; // db\0table\0alias\0
uint tlen, alen;
tlen= table->db_length;
memcpy(tname, table->db, tlen);
tname[tlen++]= '\0';
memcpy(tname+tlen, table->table_name, table->table_name_length);
tlen+= table->table_name_length;
tname[tlen++]= '\0';
alen= strlen(table->alias);
memcpy(tname+tlen, table->alias, alen);
tlen+= alen;
tname[tlen]= '\0';
A fix and a test case for Bug#21483 "Server abort or deadlock on INSERT DELAYED with another implicit insert" Also fixes and adds test cases for bugs: 20497 "Trigger with INSERT DELAYED causes Error 1165" 21714 "Wrong NEW.value and server abort on INSERT DELAYED to a table with a trigger". Post-review fixes. Problem: In MySQL INSERT DELAYED is a way to pipe all inserts into a given table through a dedicated thread. This is necessary for simplistic storage engines like MyISAM, which do not have internal concurrency control or threading and thus can not achieve efficient INSERT throughput without support from SQL layer. DELAYED INSERT works as follows: For every distinct table, which can accept DELAYED inserts and has pending data to insert, a dedicated thread is created to write data to disk. All user connection threads that attempt to delayed-insert into this table interact with the dedicated thread in producer/consumer fashion: all records to-be inserted are pushed into a queue of the dedicated thread, which fetches the records and writes them. In this design, client connection threads never open or lock the delayed insert table. This functionality was introduced in version 3.23 and does not take into account existence of triggers, views, or pre-locking. E.g. if INSERT DELAYED is called from a stored function, which, in turn, is called from another stored function that uses the delayed table, a deadlock can occur, because delayed locking by-passes pre-locking. Besides: * the delayed thread works directly with the subject table through the storage engine API and does not invoke triggers * even if it was patched to invoke triggers, if triggers, in turn, used other tables, the delayed thread would have to open and lock involved tables (use pre-locking). * even if it was patched to use pre-locking, without deadlock detection the delayed thread could easily lock out user connection threads in case when the same table is used both in a trigger and on the right side of the insert query: the delayed thread would not release locks until all inserts are complete, and user connection can not complete inserts without having locks on the tables used on the right side of the query. Solution: These considerations suggest two general alternatives for the future of INSERT DELAYED: * it is considered a full-fledged alternative to normal INSERT * it is regarded as an optimisation that is only relevant for simplistic engines. Since we missed our chance to provide complete support of new features when 5.0 was in development, the first alternative currently renders infeasible. However, even the second alternative, which is to detect new features and convert DELAYED insert into a normal insert, is not easy to implement. The catch-22 is that we don't know if the subject table has triggers or is a view before we open it, and we only open it in the delayed thread. We don't know if the query involves pre-locking until we have opened all tables, and we always first create the delayed thread, and only then open the remaining tables. This patch detects the problematic scenarios and converts DELAYED INSERT to a normal INSERT using the following approach: * if the statement is executed under pre-locking (e.g. from within a stored function or trigger) or the right side may require pre-locking, we detect the situation before creating a delayed insert thread and convert the statement to a conventional INSERT. * if the subject table is a view or has triggers, we shutdown the delayed thread and convert the statement to a conventional INSERT.
2007-05-16 09:51:05 +04:00
/*
Upgrade the lock type because this table list will be used
only in pre-locked mode, in which DELAYED inserts are always
converted to normal inserts.
*/
if (table->lock_type == TL_WRITE_DELAYED)
table->lock_type= TL_WRITE;
/*
We ignore alias when we check if table was already marked as temporary
(and therefore should not be prelocked). Otherwise we will erroneously
treat table with same name but with different alias as non-temporary.
*/
WL#3817: Simplify string / memory area types and make things more consistent (first part) The following type conversions was done: - Changed byte to uchar - Changed gptr to uchar* - Change my_string to char * - Change my_size_t to size_t - Change size_s to size_t Removed declaration of byte, gptr, my_string, my_size_t and size_s. Following function parameter changes was done: - All string functions in mysys/strings was changed to use size_t instead of uint for string lengths. - All read()/write() functions changed to use size_t (including vio). - All protocoll functions changed to use size_t instead of uint - Functions that used a pointer to a string length was changed to use size_t* - Changed malloc(), free() and related functions from using gptr to use void * as this requires fewer casts in the code and is more in line with how the standard functions work. - Added extra length argument to dirname_part() to return the length of the created string. - Changed (at least) following functions to take uchar* as argument: - db_dump() - my_net_write() - net_write_command() - net_store_data() - DBUG_DUMP() - decimal2bin() & bin2decimal() - Changed my_compress() and my_uncompress() to use size_t. Changed one argument to my_uncompress() from a pointer to a value as we only return one value (makes function easier to use). - Changed type of 'pack_data' argument to packfrm() to avoid casts. - Changed in readfrm() and writefrom(), ha_discover and handler::discover() the type for argument 'frmdata' to uchar** to avoid casts. - Changed most Field functions to use uchar* instead of char* (reduced a lot of casts). - Changed field->val_xxx(xxx, new_ptr) to take const pointers. Other changes: - Removed a lot of not needed casts - Added a few new cast required by other changes - Added some cast to my_multi_malloc() arguments for safety (as string lengths needs to be uint, not size_t). - Fixed all calls to hash-get-key functions to use size_t*. (Needed to be done explicitely as this conflict was often hided by casting the function to hash_get_key). - Changed some buffers to memory regions to uchar* to avoid casts. - Changed some string lengths from uint to size_t. - Changed field->ptr to be uchar* instead of char*. This allowed us to get rid of a lot of casts. - Some changes from true -> TRUE, false -> FALSE, unsigned char -> uchar - Include zlib.h in some files as we needed declaration of crc32() - Changed MY_FILE_ERROR to be (size_t) -1. - Changed many variables to hold the result of my_read() / my_write() to be size_t. This was needed to properly detect errors (which are returned as (size_t) -1). - Removed some very old VMS code - Changed packfrm()/unpackfrm() to not be depending on uint size (portability fix) - Removed windows specific code to restore cursor position as this causes slowdown on windows and we should not mix read() and pread() calls anyway as this is not thread safe. Updated function comment to reflect this. Changed function that depended on original behavior of my_pwrite() to itself restore the cursor position (one such case). - Added some missing checking of return value of malloc(). - Changed definition of MOD_PAD_CHAR_TO_FULL_LENGTH to avoid 'long' overflow. - Changed type of table_def::m_size from my_size_t to ulong to reflect that m_size is the number of elements in the array, not a string/memory length. - Moved THD::max_row_length() to table.cc (as it's not depending on THD). Inlined max_row_length_blob() into this function. - More function comments - Fixed some compiler warnings when compiled without partitions. - Removed setting of LEX_STRING() arguments in declaration (portability fix). - Some trivial indentation/variable name changes. - Some trivial code simplifications: - Replaced some calls to alloc_root + memcpy to use strmake_root()/strdup_root(). - Changed some calls from memdup() to strmake() (Safety fix) - Simpler loops in client-simple.c
2007-05-10 12:59:39 +03:00
if ((tab= (SP_TABLE *)hash_search(&m_sptabs, (uchar *)tname, tlen)) ||
((tab= (SP_TABLE *)hash_search(&m_sptabs, (uchar *)tname,
tlen - alen - 1)) &&
tab->temp))
{
if (tab->lock_type < table->lock_type)
tab->lock_type= table->lock_type; // Use the table with the highest lock type
tab->query_lock_count++;
if (tab->query_lock_count > tab->lock_count)
tab->lock_count++;
tab->trg_event_map|= table->trg_event_map;
}
else
{
if (!(tab= (SP_TABLE *)thd->calloc(sizeof(SP_TABLE))))
return FALSE;
if (lex_for_tmp_check->sql_command == SQLCOM_CREATE_TABLE &&
lex_for_tmp_check->query_tables == table &&
lex_for_tmp_check->create_info.options & HA_LEX_CREATE_TMP_TABLE)
{
tab->temp= TRUE;
tab->qname.length= tlen - alen - 1;
}
else
tab->qname.length= tlen;
tab->qname.str= (char*) thd->memdup(tname, tab->qname.length + 1);
if (!tab->qname.str)
return FALSE;
tab->table_name_length= table->table_name_length;
tab->db_length= table->db_length;
tab->lock_type= table->lock_type;
tab->lock_count= tab->query_lock_count= 1;
tab->trg_event_map= table->trg_event_map;
WL#3817: Simplify string / memory area types and make things more consistent (first part) The following type conversions was done: - Changed byte to uchar - Changed gptr to uchar* - Change my_string to char * - Change my_size_t to size_t - Change size_s to size_t Removed declaration of byte, gptr, my_string, my_size_t and size_s. Following function parameter changes was done: - All string functions in mysys/strings was changed to use size_t instead of uint for string lengths. - All read()/write() functions changed to use size_t (including vio). - All protocoll functions changed to use size_t instead of uint - Functions that used a pointer to a string length was changed to use size_t* - Changed malloc(), free() and related functions from using gptr to use void * as this requires fewer casts in the code and is more in line with how the standard functions work. - Added extra length argument to dirname_part() to return the length of the created string. - Changed (at least) following functions to take uchar* as argument: - db_dump() - my_net_write() - net_write_command() - net_store_data() - DBUG_DUMP() - decimal2bin() & bin2decimal() - Changed my_compress() and my_uncompress() to use size_t. Changed one argument to my_uncompress() from a pointer to a value as we only return one value (makes function easier to use). - Changed type of 'pack_data' argument to packfrm() to avoid casts. - Changed in readfrm() and writefrom(), ha_discover and handler::discover() the type for argument 'frmdata' to uchar** to avoid casts. - Changed most Field functions to use uchar* instead of char* (reduced a lot of casts). - Changed field->val_xxx(xxx, new_ptr) to take const pointers. Other changes: - Removed a lot of not needed casts - Added a few new cast required by other changes - Added some cast to my_multi_malloc() arguments for safety (as string lengths needs to be uint, not size_t). - Fixed all calls to hash-get-key functions to use size_t*. (Needed to be done explicitely as this conflict was often hided by casting the function to hash_get_key). - Changed some buffers to memory regions to uchar* to avoid casts. - Changed some string lengths from uint to size_t. - Changed field->ptr to be uchar* instead of char*. This allowed us to get rid of a lot of casts. - Some changes from true -> TRUE, false -> FALSE, unsigned char -> uchar - Include zlib.h in some files as we needed declaration of crc32() - Changed MY_FILE_ERROR to be (size_t) -1. - Changed many variables to hold the result of my_read() / my_write() to be size_t. This was needed to properly detect errors (which are returned as (size_t) -1). - Removed some very old VMS code - Changed packfrm()/unpackfrm() to not be depending on uint size (portability fix) - Removed windows specific code to restore cursor position as this causes slowdown on windows and we should not mix read() and pread() calls anyway as this is not thread safe. Updated function comment to reflect this. Changed function that depended on original behavior of my_pwrite() to itself restore the cursor position (one such case). - Added some missing checking of return value of malloc(). - Changed definition of MOD_PAD_CHAR_TO_FULL_LENGTH to avoid 'long' overflow. - Changed type of table_def::m_size from my_size_t to ulong to reflect that m_size is the number of elements in the array, not a string/memory length. - Moved THD::max_row_length() to table.cc (as it's not depending on THD). Inlined max_row_length_blob() into this function. - More function comments - Fixed some compiler warnings when compiled without partitions. - Removed setting of LEX_STRING() arguments in declaration (portability fix). - Some trivial indentation/variable name changes. - Some trivial code simplifications: - Replaced some calls to alloc_root + memcpy to use strmake_root()/strdup_root(). - Changed some calls from memdup() to strmake() (Safety fix) - Simpler loops in client-simple.c
2007-05-10 12:59:39 +03:00
my_hash_insert(&m_sptabs, (uchar *)tab);
}
}
return TRUE;
}
/*
Add tables used by routine to the table list.
SYNOPSIS
add_used_tables_to_table_list()
thd [in] Thread context
query_tables_last_ptr [in/out] Pointer to the next_global member of
last element of the list where tables
will be added (or to its root).
belong_to_view [in] Uppermost view which uses this routine,
0 if none.
DESCRIPTION
Converts multi-set of tables used by this routine to table list and adds
this list to the end of table list specified by 'query_tables_last_ptr'.
Elements of list will be allocated in PS memroot, so this list will be
persistent between PS executions.
RETURN VALUE
TRUE - if some elements were added, FALSE - otherwise.
*/
bool
sp_head::add_used_tables_to_table_list(THD *thd,
TABLE_LIST ***query_tables_last_ptr,
TABLE_LIST *belong_to_view)
{
uint i;
Query_arena *arena, backup;
bool result= FALSE;
DBUG_ENTER("sp_head::add_used_tables_to_table_list");
/*
Use persistent arena for table list allocation to be PS/SP friendly.
Note that we also have to copy database/table names and alias to PS/SP
memory since current instance of sp_head object can pass away before
next execution of PS/SP for which tables are added to prelocking list.
This will be fixed by introducing of proper invalidation mechanism
once new TDC is ready.
*/
arena= thd->activate_stmt_arena_if_needed(&backup);
for (i=0 ; i < m_sptabs.records ; i++)
{
char *tab_buff, *key_buff;
TABLE_LIST *table;
SP_TABLE *stab= (SP_TABLE *)hash_element(&m_sptabs, i);
if (stab->temp)
continue;
if (!(tab_buff= (char *)thd->calloc(ALIGN_SIZE(sizeof(TABLE_LIST)) *
stab->lock_count)) ||
!(key_buff= (char*)thd->memdup(stab->qname.str,
stab->qname.length + 1)))
DBUG_RETURN(FALSE);
for (uint j= 0; j < stab->lock_count; j++)
{
table= (TABLE_LIST *)tab_buff;
table->db= key_buff;
table->db_length= stab->db_length;
table->table_name= table->db + table->db_length + 1;
table->table_name_length= stab->table_name_length;
table->alias= table->table_name + table->table_name_length + 1;
table->lock_type= stab->lock_type;
table->cacheable_table= 1;
table->prelocking_placeholder= 1;
table->belong_to_view= belong_to_view;
table->trg_event_map= stab->trg_event_map;
/* Everyting else should be zeroed */
**query_tables_last_ptr= table;
table->prev_global= *query_tables_last_ptr;
*query_tables_last_ptr= &table->next_global;
tab_buff+= ALIGN_SIZE(sizeof(TABLE_LIST));
result= TRUE;
}
}
if (arena)
thd->restore_active_arena(arena, &backup);
DBUG_RETURN(result);
}
2005-08-11 15:58:15 +03:00
/*
2005-08-11 15:58:15 +03:00
Simple function for adding an explicetly named (systems) table to
the global table list, e.g. "mysql", "proc".
*/
TABLE_LIST *
sp_add_to_query_tables(THD *thd, LEX *lex,
const char *db, const char *name,
thr_lock_type locktype)
{
TABLE_LIST *table;
if (!(table= (TABLE_LIST *)thd->calloc(sizeof(TABLE_LIST))))
{
thd->fatal_error();
return NULL;
}
table->db_length= strlen(db);
table->db= thd->strmake(db, table->db_length);
table->table_name_length= strlen(name);
table->table_name= thd->strmake(name, table->table_name_length);
table->alias= thd->strdup(name);
table->lock_type= locktype;
This changeset is largely a handler cleanup changeset (WL#3281), but includes fixes and cleanups that was found necessary while testing the handler changes Changes that requires code changes in other code of other storage engines. (Note that all changes are very straightforward and one should find all issues by compiling a --debug build and fixing all compiler errors and all asserts in field.cc while running the test suite), - New optional handler function introduced: reset() This is called after every DML statement to make it easy for a handler to statement specific cleanups. (The only case it's not called is if force the file to be closed) - handler::extra(HA_EXTRA_RESET) is removed. Code that was there before should be moved to handler::reset() - table->read_set contains a bitmap over all columns that are needed in the query. read_row() and similar functions only needs to read these columns - table->write_set contains a bitmap over all columns that will be updated in the query. write_row() and update_row() only needs to update these columns. The above bitmaps should now be up to date in all context (including ALTER TABLE, filesort()). The handler is informed of any changes to the bitmap after fix_fields() by calling the virtual function handler::column_bitmaps_signal(). If the handler does caching of these bitmaps (instead of using table->read_set, table->write_set), it should redo the caching in this code. as the signal() may be sent several times, it's probably best to set a variable in the signal and redo the caching on read_row() / write_row() if the variable was set. - Removed the read_set and write_set bitmap objects from the handler class - Removed all column bit handling functions from the handler class. (Now one instead uses the normal bitmap functions in my_bitmap.c instead of handler dedicated bitmap functions) - field->query_id is removed. One should instead instead check table->read_set and table->write_set if a field is used in the query. - handler::extra(HA_EXTRA_RETRIVE_ALL_COLS) and handler::extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY) are removed. One should now instead use table->read_set to check for which columns to retrieve. - If a handler needs to call Field->val() or Field->store() on columns that are not used in the query, one should install a temporary all-columns-used map while doing so. For this, we provide the following functions: my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set); field->val(); dbug_tmp_restore_column_map(table->read_set, old_map); and similar for the write map: my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->write_set); field->val(); dbug_tmp_restore_column_map(table->write_set, old_map); If this is not done, you will sooner or later hit a DBUG_ASSERT in the field store() / val() functions. (For not DBUG binaries, the dbug_tmp_restore_column_map() and dbug_tmp_restore_column_map() are inline dummy functions and should be optimized away be the compiler). - If one needs to temporary set the column map for all binaries (and not just to avoid the DBUG_ASSERT() in the Field::store() / Field::val() methods) one should use the functions tmp_use_all_columns() and tmp_restore_column_map() instead of the above dbug_ variants. - All 'status' fields in the handler base class (like records, data_file_length etc) are now stored in a 'stats' struct. This makes it easier to know what status variables are provided by the base handler. This requires some trivial variable names in the extra() function. - New virtual function handler::records(). This is called to optimize COUNT(*) if (handler::table_flags() & HA_HAS_RECORDS()) is true. (stats.records is not supposed to be an exact value. It's only has to be 'reasonable enough' for the optimizer to be able to choose a good optimization path). - Non virtual handler::init() function added for caching of virtual constants from engine. - Removed has_transactions() virtual method. Now one should instead return HA_NO_TRANSACTIONS in table_flags() if the table handler DOES NOT support transactions. - The 'xxxx_create_handler()' function now has a MEM_ROOT_root argument that is to be used with 'new handler_name()' to allocate the handler in the right area. The xxxx_create_handler() function is also responsible for any initialization of the object before returning. For example, one should change: static handler *myisam_create_handler(TABLE_SHARE *table) { return new ha_myisam(table); } -> static handler *myisam_create_handler(TABLE_SHARE *table, MEM_ROOT *mem_root) { return new (mem_root) ha_myisam(table); } - New optional virtual function: use_hidden_primary_key(). This is called in case of an update/delete when (table_flags() and HA_PRIMARY_KEY_REQUIRED_FOR_DELETE) is defined but we don't have a primary key. This allows the handler to take precisions in remembering any hidden primary key to able to update/delete any found row. The default handler marks all columns to be read. - handler::table_flags() now returns a ulonglong (to allow for more flags). - New/changed table_flags() - HA_HAS_RECORDS Set if ::records() is supported - HA_NO_TRANSACTIONS Set if engine doesn't support transactions - HA_PRIMARY_KEY_REQUIRED_FOR_DELETE Set if we should mark all primary key columns for read when reading rows as part of a DELETE statement. If there is no primary key, all columns are marked for read. - HA_PARTIAL_COLUMN_READ Set if engine will not read all columns in some cases (based on table->read_set) - HA_PRIMARY_KEY_ALLOW_RANDOM_ACCESS Renamed to HA_PRIMARY_KEY_REQUIRED_FOR_POSITION. - HA_DUPP_POS Renamed to HA_DUPLICATE_POS - HA_REQUIRES_KEY_COLUMNS_FOR_DELETE Set this if we should mark ALL key columns for read when when reading rows as part of a DELETE statement. In case of an update we will mark all keys for read for which key part changed value. - HA_STATS_RECORDS_IS_EXACT Set this if stats.records is exact. (This saves us some extra records() calls when optimizing COUNT(*)) - Removed table_flags() - HA_NOT_EXACT_COUNT Now one should instead use HA_HAS_RECORDS if handler::records() gives an exact count() and HA_STATS_RECORDS_IS_EXACT if stats.records is exact. - HA_READ_RND_SAME Removed (no one supported this one) - Removed not needed functions ha_retrieve_all_cols() and ha_retrieve_all_pk() - Renamed handler::dupp_pos to handler::dup_pos - Removed not used variable handler::sortkey Upper level handler changes: - ha_reset() now does some overall checks and calls ::reset() - ha_table_flags() added. This is a cached version of table_flags(). The cache is updated on engine creation time and updated on open. MySQL level changes (not obvious from the above): - DBUG_ASSERT() added to check that column usage matches what is set in the column usage bit maps. (This found a LOT of bugs in current column marking code). - In 5.1 before, all used columns was marked in read_set and only updated columns was marked in write_set. Now we only mark columns for which we need a value in read_set. - Column bitmaps are created in open_binary_frm() and open_table_from_share(). (Before this was in table.cc) - handler::table_flags() calls are replaced with handler::ha_table_flags() - For calling field->val() you must have the corresponding bit set in table->read_set. For calling field->store() you must have the corresponding bit set in table->write_set. (There are asserts in all store()/val() functions to catch wrong usage) - thd->set_query_id is renamed to thd->mark_used_columns and instead of setting this to an integer value, this has now the values: MARK_COLUMNS_NONE, MARK_COLUMNS_READ, MARK_COLUMNS_WRITE Changed also all variables named 'set_query_id' to mark_used_columns. - In filesort() we now inform the handler of exactly which columns are needed doing the sort and choosing the rows. - The TABLE_SHARE object has a 'all_set' column bitmap one can use when one needs a column bitmap with all columns set. (This is used for table->use_all_columns() and other places) - The TABLE object has 3 column bitmaps: - def_read_set Default bitmap for columns to be read - def_write_set Default bitmap for columns to be written - tmp_set Can be used as a temporary bitmap when needed. The table object has also two pointer to bitmaps read_set and write_set that the handler should use to find out which columns are used in which way. - count() optimization now calls handler::records() instead of using handler->stats.records (if (table_flags() & HA_HAS_RECORDS) is true). - Added extra argument to Item::walk() to indicate if we should also traverse sub queries. - Added TABLE parameter to cp_buffer_from_ref() - Don't close tables created with CREATE ... SELECT but keep them in the table cache. (Faster usage of newly created tables). New interfaces: - table->clear_column_bitmaps() to initialize the bitmaps for tables at start of new statements. - table->column_bitmaps_set() to set up new column bitmaps and signal the handler about this. - table->column_bitmaps_set_no_signal() for some few cases where we need to setup new column bitmaps but don't signal the handler (as the handler has already been signaled about these before). Used for the momement only in opt_range.cc when doing ROR scans. - table->use_all_columns() to install a bitmap where all columns are marked as use in the read and the write set. - table->default_column_bitmaps() to install the normal read and write column bitmaps, but not signaling the handler about this. This is mainly used when creating TABLE instances. - table->mark_columns_needed_for_delete(), table->mark_columns_needed_for_delete() and table->mark_columns_needed_for_insert() to allow us to put additional columns in column usage maps if handler so requires. (The handler indicates what it neads in handler->table_flags()) - table->prepare_for_position() to allow us to tell handler that it needs to read primary key parts to be able to store them in future table->position() calls. (This replaces the table->file->ha_retrieve_all_pk function) - table->mark_auto_increment_column() to tell handler are going to update columns part of any auto_increment key. - table->mark_columns_used_by_index() to mark all columns that is part of an index. It will also send extra(HA_EXTRA_KEYREAD) to handler to allow it to quickly know that it only needs to read colums that are part of the key. (The handler can also use the column map for detecting this, but simpler/faster handler can just monitor the extra() call). - table->mark_columns_used_by_index_no_reset() to in addition to other columns, also mark all columns that is used by the given key. - table->restore_column_maps_after_mark_index() to restore to default column maps after a call to table->mark_columns_used_by_index(). - New item function register_field_in_read_map(), for marking used columns in table->read_map. Used by filesort() to mark all used columns - Maintain in TABLE->merge_keys set of all keys that are used in query. (Simplices some optimization loops) - Maintain Field->part_of_key_not_clustered which is like Field->part_of_key but the field in the clustered key is not assumed to be part of all index. (used in opt_range.cc for faster loops) - dbug_tmp_use_all_columns(), dbug_tmp_restore_column_map() tmp_use_all_columns() and tmp_restore_column_map() functions to temporally mark all columns as usable. The 'dbug_' version is primarily intended inside a handler when it wants to just call Field:store() & Field::val() functions, but don't need the column maps set for any other usage. (ie:: bitmap_is_set() is never called) - We can't use compare_records() to skip updates for handlers that returns a partial column set and the read_set doesn't cover all columns in the write set. The reason for this is that if we have a column marked only for write we can't in the MySQL level know if the value changed or not. The reason this worked before was that MySQL marked all to be written columns as also to be read. The new 'optimal' bitmaps exposed this 'hidden bug'. - open_table_from_share() does not anymore setup temporary MEM_ROOT object as a thread specific variable for the handler. Instead we send the to-be-used MEMROOT to get_new_handler(). (Simpler, faster code) Bugs fixed: - Column marking was not done correctly in a lot of cases. (ALTER TABLE, when using triggers, auto_increment fields etc) (Could potentially result in wrong values inserted in table handlers relying on that the old column maps or field->set_query_id was correct) Especially when it comes to triggers, there may be cases where the old code would cause lost/wrong values for NDB and/or InnoDB tables. - Split thd->options flag OPTION_STATUS_NO_TRANS_UPDATE to two flags: OPTION_STATUS_NO_TRANS_UPDATE and OPTION_KEEP_LOG. This allowed me to remove some wrong warnings about: "Some non-transactional changed tables couldn't be rolled back" - Fixed handling of INSERT .. SELECT and CREATE ... SELECT that wrongly reset (thd->options & OPTION_STATUS_NO_TRANS_UPDATE) which caused us to loose some warnings about "Some non-transactional changed tables couldn't be rolled back") - Fixed use of uninitialized memory in ha_ndbcluster.cc::delete_table() which could cause delete_table to report random failures. - Fixed core dumps for some tests when running with --debug - Added missing FN_LIBCHAR in mysql_rm_tmp_tables() (This has probably caused us to not properly remove temporary files after crash) - slow_logs was not properly initialized, which could maybe cause extra/lost entries in slow log. - If we get an duplicate row on insert, change column map to read and write all columns while retrying the operation. This is required by the definition of REPLACE and also ensures that fields that are only part of UPDATE are properly handled. This fixed a bug in NDB and REPLACE where REPLACE wrongly copied some column values from the replaced row. - For table handler that doesn't support NULL in keys, we would give an error when creating a primary key with NULL fields, even after the fields has been automaticly converted to NOT NULL. - Creating a primary key on a SPATIAL key, would fail if field was not declared as NOT NULL. Cleanups: - Removed not used condition argument to setup_tables - Removed not needed item function reset_query_id_processor(). - Field->add_index is removed. Now this is instead maintained in (field->flags & FIELD_IN_ADD_INDEX) - Field->fieldnr is removed (use field->field_index instead) - New argument to filesort() to indicate that it should return a set of row pointers (not used columns). This allowed me to remove some references to sql_command in filesort and should also enable us to return column results in some cases where we couldn't before. - Changed column bitmap handling in opt_range.cc to be aligned with TABLE bitmap, which allowed me to use bitmap functions instead of looping over all fields to create some needed bitmaps. (Faster and smaller code) - Broke up found too long lines - Moved some variable declaration at start of function for better code readability. - Removed some not used arguments from functions. (setup_fields(), mysql_prepare_insert_check_table()) - setup_fields() now takes an enum instead of an int for marking columns usage. - For internal temporary tables, use handler::write_row(), handler::delete_row() and handler::update_row() instead of handler::ha_xxxx() for faster execution. - Changed some constants to enum's and define's. - Using separate column read and write sets allows for easier checking of timestamp field was set by statement. - Remove calls to free_io_cache() as this is now done automaticly in ha_reset() - Don't build table->normalized_path as this is now identical to table->path (after bar's fixes to convert filenames) - Fixed some missed DBUG_PRINT(.."%lx") to use "0x%lx" to make it easier to do comparision with the 'convert-dbug-for-diff' tool. Things left to do in 5.1: - We wrongly log failed CREATE TABLE ... SELECT in some cases when using row based logging (as shown by testcase binlog_row_mix_innodb_myisam.result) Mats has promised to look into this. - Test that my fix for CREATE TABLE ... SELECT is indeed correct. (I added several test cases for this, but in this case it's better that someone else also tests this throughly). Lars has promosed to do this.
2006-06-04 18:52:22 +03:00
table->select_lex= lex->current_select;
table->cacheable_table= 1;
lex->add_to_query_tables(table);
return table;
}