mirror of
https://github.com/MariaDB/server.git
synced 2025-01-15 19:42:28 +01:00
MDEV-7231 Field ROUTINE_DEFINITION in INFORMATION_SCHEMA.ROUTINES
contains broken procedure body when used shielding quotes inside.
This commit is contained in:
parent
28a36f617f
commit
ff2d92b17d
8 changed files with 516 additions and 45 deletions
|
@ -533,6 +533,7 @@ struct my_charset_handler_st
|
|||
|
||||
extern MY_CHARSET_HANDLER my_charset_8bit_handler;
|
||||
extern MY_CHARSET_HANDLER my_charset_ucs2_handler;
|
||||
extern MY_CHARSET_HANDLER my_charset_utf8_handler;
|
||||
|
||||
|
||||
/*
|
||||
|
@ -889,6 +890,18 @@ uint32 my_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
|
|||
const char *from, uint32 from_length,
|
||||
CHARSET_INFO *from_cs, uint *errors);
|
||||
|
||||
/**
|
||||
An extended version of my_convert(), to pass non-default mb_wc() and wc_mb().
|
||||
For example, String::copy_printable() which is used in
|
||||
Protocol::store_warning() uses this to escape control
|
||||
and non-convertable characters.
|
||||
*/
|
||||
uint32 my_convert_using_func(char *to, uint32 to_length, CHARSET_INFO *to_cs,
|
||||
my_charset_conv_wc_mb mb_wc,
|
||||
const char *from, uint32 from_length,
|
||||
CHARSET_INFO *from_cs,
|
||||
my_charset_conv_mb_wc wc_mb,
|
||||
uint *errors);
|
||||
/*
|
||||
Convert a string between two character sets.
|
||||
Bad byte sequences as well as characters that cannot be
|
||||
|
|
|
@ -10259,5 +10259,146 @@ Warnings:
|
|||
Note 1003 select `test`.`t1`.`c` AS `c` from `test`.`t1` where (`test`.`t1`.`c` = 'A')
|
||||
DROP TABLE t1;
|
||||
#
|
||||
# MDEV-7231 Field ROUTINE_DEFINITION in INFORMATION_SCHEMA.`ROUTINES` contains broken procedure body when used shielding quotes inside.
|
||||
#
|
||||
CREATE PROCEDURE p1()
|
||||
BEGIN
|
||||
SELECT CONCAT('ABC = ''',1,''''), CONCAT('ABC = ',2);
|
||||
SELECT '''', """", '\'', "\"";
|
||||
SELECT '<tab> <tab>\t<tab>';
|
||||
SELECT '<nl>
|
||||
<nl>\n<nl>';
|
||||
SELECT 'test';
|
||||
SELECT 'tëst';
|
||||
SELECT 'test\0';
|
||||
SELECT 'tëst\0';
|
||||
SELECT _binary'test';
|
||||
SELECT _binary'test\0';
|
||||
SELECT N'''', N"""", N'\'', N"\"";
|
||||
SELECT N'<tab> <tab>\t<tab>';
|
||||
SELECT N'<nl>
|
||||
<nl>\n<nl>';
|
||||
SELECT N'test';
|
||||
SELECT N'tëst';
|
||||
SELECT N'test\0';
|
||||
SELECT N'tëst\0';
|
||||
END$$
|
||||
SELECT ROUTINE_DEFINITION FROM INFORMATION_SCHEMA.ROUTINES
|
||||
WHERE ROUTINE_SCHEMA='test' AND SPECIFIC_NAME ='p1';
|
||||
ROUTINE_DEFINITION
|
||||
BEGIN
|
||||
SELECT CONCAT('ABC = ''',1,''''), CONCAT('ABC = ',2);
|
||||
SELECT '''', """", '''', """";
|
||||
SELECT '<tab>\t<tab>\t<tab>';
|
||||
SELECT '<nl>\n<nl>\n<nl>';
|
||||
SELECT 'test';
|
||||
SELECT 'tëst';
|
||||
SELECT 'test\0';
|
||||
SELECT 'tëst\0';
|
||||
SELECT 'test';
|
||||
SELECT 'test\0';
|
||||
SELECT N'''', N"""", N'''', N"""";
|
||||
SELECT N'<tab>\t<tab>\t<tab>';
|
||||
SELECT N'<nl>\n<nl>\n<nl>';
|
||||
SELECT N'test';
|
||||
SELECT N'tëst';
|
||||
SELECT N'test\0';
|
||||
SELECT N'tëst\0';
|
||||
END
|
||||
SELECT body_utf8 FROM mysql.proc WHERE name='p1';
|
||||
body_utf8
|
||||
BEGIN
|
||||
SELECT CONCAT('ABC = ''',1,''''), CONCAT('ABC = ',2);
|
||||
SELECT '''', """", '''', """";
|
||||
SELECT '<tab>\t<tab>\t<tab>';
|
||||
SELECT '<nl>\n<nl>\n<nl>';
|
||||
SELECT 'test';
|
||||
SELECT 'tëst';
|
||||
SELECT 'test\0';
|
||||
SELECT 'tëst\0';
|
||||
SELECT 'test';
|
||||
SELECT 'test\0';
|
||||
SELECT N'''', N"""", N'''', N"""";
|
||||
SELECT N'<tab>\t<tab>\t<tab>';
|
||||
SELECT N'<nl>\n<nl>\n<nl>';
|
||||
SELECT N'test';
|
||||
SELECT N'tëst';
|
||||
SELECT N'test\0';
|
||||
SELECT N'tëst\0';
|
||||
END
|
||||
DROP PROCEDURE p1;
|
||||
SET @@SQL_MODE='NO_BACKSLASH_ESCAPES';
|
||||
CREATE PROCEDURE p1()
|
||||
BEGIN
|
||||
SELECT CONCAT('ABC = ''',1,''''), CONCAT('ABC = ',2);
|
||||
SELECT '''', """";
|
||||
SELECT '<tab> <tab>\t<tab>';
|
||||
SELECT '<nl>
|
||||
<nl>\n<nl>';
|
||||
SELECT 'test';
|
||||
SELECT 'tëst';
|
||||
SELECT 'test\0';
|
||||
SELECT 'tëst\0';
|
||||
SELECT _binary'test';
|
||||
SELECT _binary'test\0';
|
||||
SELECT N'''', N"""";
|
||||
SELECT N'<tab> <tab>\t<tab>';
|
||||
SELECT N'<nl>
|
||||
<nl>\n<nl>';
|
||||
SELECT N'test';
|
||||
SELECT N'tëst';
|
||||
SELECT N'test\0';
|
||||
SELECT N'tëst\0';
|
||||
END$$
|
||||
SELECT ROUTINE_DEFINITION FROM INFORMATION_SCHEMA.ROUTINES
|
||||
WHERE ROUTINE_SCHEMA='test' AND SPECIFIC_NAME ='p1';
|
||||
ROUTINE_DEFINITION
|
||||
BEGIN
|
||||
SELECT CONCAT('ABC = ''',1,''''), CONCAT('ABC = ',2);
|
||||
SELECT '''', """";
|
||||
SELECT '<tab> <tab>\t<tab>';
|
||||
SELECT '<nl>
|
||||
<nl>\n<nl>';
|
||||
SELECT 'test';
|
||||
SELECT 'tëst';
|
||||
SELECT 'test\0';
|
||||
SELECT 'tëst\0';
|
||||
SELECT 'test';
|
||||
SELECT 'test\0';
|
||||
SELECT N'''', N"""";
|
||||
SELECT N'<tab> <tab>\t<tab>';
|
||||
SELECT N'<nl>
|
||||
<nl>\n<nl>';
|
||||
SELECT N'test';
|
||||
SELECT N'tëst';
|
||||
SELECT N'test\0';
|
||||
SELECT N'tëst\0';
|
||||
END
|
||||
SELECT body_utf8 FROM mysql.proc WHERE name='p1';
|
||||
body_utf8
|
||||
BEGIN
|
||||
SELECT CONCAT('ABC = ''',1,''''), CONCAT('ABC = ',2);
|
||||
SELECT '''', """";
|
||||
SELECT '<tab> <tab>\t<tab>';
|
||||
SELECT '<nl>
|
||||
<nl>\n<nl>';
|
||||
SELECT 'test';
|
||||
SELECT 'tëst';
|
||||
SELECT 'test\0';
|
||||
SELECT 'tëst\0';
|
||||
SELECT 'test';
|
||||
SELECT 'test\0';
|
||||
SELECT N'''', N"""";
|
||||
SELECT N'<tab> <tab>\t<tab>';
|
||||
SELECT N'<nl>
|
||||
<nl>\n<nl>';
|
||||
SELECT N'test';
|
||||
SELECT N'tëst';
|
||||
SELECT N'test\0';
|
||||
SELECT N'tëst\0';
|
||||
END
|
||||
DROP PROCEDURE p1;
|
||||
SET @@SQL_MODE=default;
|
||||
#
|
||||
# End of 10.1 tests
|
||||
#
|
||||
|
|
|
@ -3382,5 +3382,19 @@ SET NAMES utf8mb4;
|
|||
SELECT * FROM `test😁😁test`;
|
||||
ERROR HY000: Invalid utf8mb4 character string: 'test\xF0\x9F\x98\x81\xF0\x9F\x98\x81test'
|
||||
#
|
||||
# MDEV-7231 Field ROUTINE_DEFINITION in INFORMATION_SCHEMA.`ROUTINES` contains broken procedure body when used shielding quotes inside.
|
||||
#
|
||||
SET NAMES utf8mb4;
|
||||
CREATE FUNCTION f1() RETURNS TEXT CHARACTER SET utf8mb4
|
||||
RETURN CONCAT('😎','x😎','😎y','x😎y');
|
||||
SELECT ROUTINE_DEFINITION FROM INFORMATION_SCHEMA.ROUTINES
|
||||
WHERE ROUTINE_SCHEMA='test' AND SPECIFIC_NAME ='f1';
|
||||
ROUTINE_DEFINITION
|
||||
RETURN CONCAT('?','x?','?y','x?y')
|
||||
SELECT body_utf8 FROM mysql.proc WHERE name='f1';
|
||||
body_utf8
|
||||
RETURN CONCAT('?','x?','?y','x?y')
|
||||
DROP FUNCTION f1;
|
||||
#
|
||||
# End of 10.1 tests
|
||||
#
|
||||
|
|
|
@ -1871,6 +1871,82 @@ SELECT * FROM t1 WHERE c>=_utf8'a' COLLATE utf8_general_ci AND c='A';
|
|||
DROP TABLE t1;
|
||||
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-7231 Field ROUTINE_DEFINITION in INFORMATION_SCHEMA.`ROUTINES` contains broken procedure body when used shielding quotes inside.
|
||||
--echo #
|
||||
DELIMITER $$;
|
||||
CREATE PROCEDURE p1()
|
||||
BEGIN
|
||||
SELECT CONCAT('ABC = ''',1,''''), CONCAT('ABC = ',2);
|
||||
SELECT '''', """", '\'', "\"";
|
||||
SELECT '<tab> <tab>\t<tab>';
|
||||
SELECT '<nl>
|
||||
<nl>\n<nl>';
|
||||
SELECT 'test';
|
||||
SELECT 'tëst';
|
||||
SELECT 'test\0';
|
||||
SELECT 'tëst\0';
|
||||
SELECT _binary'test';
|
||||
SELECT _binary'test\0';
|
||||
SELECT N'''', N"""", N'\'', N"\"";
|
||||
SELECT N'<tab> <tab>\t<tab>';
|
||||
SELECT N'<nl>
|
||||
<nl>\n<nl>';
|
||||
SELECT N'test';
|
||||
SELECT N'tëst';
|
||||
SELECT N'test\0';
|
||||
SELECT N'tëst\0';
|
||||
END$$
|
||||
DELIMITER ;$$
|
||||
SELECT ROUTINE_DEFINITION FROM INFORMATION_SCHEMA.ROUTINES
|
||||
WHERE ROUTINE_SCHEMA='test' AND SPECIFIC_NAME ='p1';
|
||||
SELECT body_utf8 FROM mysql.proc WHERE name='p1';
|
||||
DROP PROCEDURE p1;
|
||||
|
||||
SET @@SQL_MODE='NO_BACKSLASH_ESCAPES';
|
||||
DELIMITER $$;
|
||||
CREATE PROCEDURE p1()
|
||||
BEGIN
|
||||
SELECT CONCAT('ABC = ''',1,''''), CONCAT('ABC = ',2);
|
||||
SELECT '''', """";
|
||||
SELECT '<tab> <tab>\t<tab>';
|
||||
SELECT '<nl>
|
||||
<nl>\n<nl>';
|
||||
SELECT 'test';
|
||||
SELECT 'tëst';
|
||||
SELECT 'test\0';
|
||||
SELECT 'tëst\0';
|
||||
SELECT _binary'test';
|
||||
SELECT _binary'test\0';
|
||||
SELECT N'''', N"""";
|
||||
SELECT N'<tab> <tab>\t<tab>';
|
||||
SELECT N'<nl>
|
||||
<nl>\n<nl>';
|
||||
SELECT N'test';
|
||||
SELECT N'tëst';
|
||||
SELECT N'test\0';
|
||||
SELECT N'tëst\0';
|
||||
END$$
|
||||
DELIMITER ;$$
|
||||
SELECT ROUTINE_DEFINITION FROM INFORMATION_SCHEMA.ROUTINES
|
||||
WHERE ROUTINE_SCHEMA='test' AND SPECIFIC_NAME ='p1';
|
||||
SELECT body_utf8 FROM mysql.proc WHERE name='p1';
|
||||
DROP PROCEDURE p1;
|
||||
SET @@SQL_MODE=default;
|
||||
|
||||
|
||||
# TODO: Uncomment the below test whe we fix:
|
||||
# MDEV-9623INFORMATION_SCHEMA.ROUTINES.ROUTINE_DEFINITION does not handle binary literals well
|
||||
#
|
||||
#SET NAMES binary;
|
||||
#CREATE FUNCTION f1() RETURNS TEXT RETURN CONCAT('i','й');
|
||||
#SELECT ROUTINE_DEFINITION FROM INFORMATION_SCHEMA.ROUTINES
|
||||
#WHERE ROUTINE_SCHEMA='test' AND SPECIFIC_NAME ='f1';
|
||||
#SELECT body_utf8 FROM mysql.proc WHERE name='f1';
|
||||
#DROP FUNCTION f1;
|
||||
#SET NAMES utf8;
|
||||
|
||||
|
||||
--echo #
|
||||
--echo # End of 10.1 tests
|
||||
--echo #
|
||||
|
|
|
@ -1904,6 +1904,18 @@ SET NAMES utf8mb4;
|
|||
--error ER_INVALID_CHARACTER_STRING
|
||||
SELECT * FROM `test😁😁test`;
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-7231 Field ROUTINE_DEFINITION in INFORMATION_SCHEMA.`ROUTINES` contains broken procedure body when used shielding quotes inside.
|
||||
--echo #
|
||||
# Non-BMP characters should be replaced to '?' in ROUTINE_DEFINITION/body_utf8
|
||||
SET NAMES utf8mb4;
|
||||
CREATE FUNCTION f1() RETURNS TEXT CHARACTER SET utf8mb4
|
||||
RETURN CONCAT('😎','x😎','😎y','x😎y');
|
||||
SELECT ROUTINE_DEFINITION FROM INFORMATION_SCHEMA.ROUTINES
|
||||
WHERE ROUTINE_SCHEMA='test' AND SPECIFIC_NAME ='f1';
|
||||
SELECT body_utf8 FROM mysql.proc WHERE name='f1';
|
||||
DROP FUNCTION f1;
|
||||
|
||||
--echo #
|
||||
--echo # End of 10.1 tests
|
||||
--echo #
|
||||
|
|
255
sql/sql_lex.cc
255
sql/sql_lex.cc
|
@ -324,9 +324,7 @@ void Lex_input_stream::body_utf8_start(THD *thd, const char *begin_ptr)
|
|||
DBUG_ASSERT(begin_ptr);
|
||||
DBUG_ASSERT(m_cpp_buf <= begin_ptr && begin_ptr <= m_cpp_buf + m_buf_length);
|
||||
|
||||
uint body_utf8_length=
|
||||
(m_buf_length / thd->variables.character_set_client->mbminlen) *
|
||||
my_charset_utf8_bin.mbmaxlen;
|
||||
uint body_utf8_length= get_body_utf8_maximum_length(thd);
|
||||
|
||||
m_body_utf8= (char *) thd->alloc(body_utf8_length + 1);
|
||||
m_body_utf8_ptr= m_body_utf8;
|
||||
|
@ -335,6 +333,22 @@ void Lex_input_stream::body_utf8_start(THD *thd, const char *begin_ptr)
|
|||
m_cpp_utf8_processed_ptr= begin_ptr;
|
||||
}
|
||||
|
||||
|
||||
uint Lex_input_stream::get_body_utf8_maximum_length(THD *thd)
|
||||
{
|
||||
/*
|
||||
String literals can grow during escaping:
|
||||
1a. Character string '<TAB>' can grow to '\t', 3 bytes to 4 bytes growth.
|
||||
1b. Character string '1000 times <TAB>' grows from
|
||||
1002 to 2002 bytes (including quotes), which gives a little bit
|
||||
less than 2 times growth.
|
||||
"2" should be a reasonable multiplier that safely covers escaping needs.
|
||||
*/
|
||||
return (m_buf_length / thd->variables.character_set_client->mbminlen) *
|
||||
my_charset_utf8_bin.mbmaxlen * 2/*for escaping*/;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
@brief The operation appends unprocessed part of pre-processed buffer till
|
||||
the given pointer (ptr) and sets m_cpp_utf8_processed_ptr to end_ptr.
|
||||
|
@ -402,15 +416,15 @@ void Lex_input_stream::body_utf8_append(const char *ptr)
|
|||
operation.
|
||||
*/
|
||||
|
||||
void Lex_input_stream::body_utf8_append_literal(THD *thd,
|
||||
const LEX_STRING *txt,
|
||||
CHARSET_INFO *txt_cs,
|
||||
const char *end_ptr)
|
||||
void Lex_input_stream::body_utf8_append_ident(THD *thd,
|
||||
const LEX_STRING *txt,
|
||||
const char *end_ptr)
|
||||
{
|
||||
if (!m_cpp_utf8_processed_ptr)
|
||||
return;
|
||||
|
||||
LEX_STRING utf_txt;
|
||||
CHARSET_INFO *txt_cs= thd->charset();
|
||||
|
||||
if (!my_charset_same(txt_cs, &my_charset_utf8_general_ci))
|
||||
{
|
||||
|
@ -434,6 +448,189 @@ void Lex_input_stream::body_utf8_append_literal(THD *thd,
|
|||
m_cpp_utf8_processed_ptr= end_ptr;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
extern "C" {
|
||||
|
||||
/**
|
||||
Escape a character. Consequently puts "escape" and "wc" characters into
|
||||
the destination utf8 string.
|
||||
@param cs - the character set (utf8)
|
||||
@param escape - the escape character (backslash, single quote, double quote)
|
||||
@param wc - the character to be escaped
|
||||
@param str - the destination string
|
||||
@param end - the end of the destination string
|
||||
@returns - a code according to the wc_mb() convension.
|
||||
*/
|
||||
int my_wc_mb_utf8_with_escape(CHARSET_INFO *cs, my_wc_t escape, my_wc_t wc,
|
||||
uchar *str, uchar *end)
|
||||
{
|
||||
DBUG_ASSERT(escape > 0);
|
||||
if (str + 1 >= end)
|
||||
return MY_CS_TOOSMALL2; // Not enough space, need at least two bytes.
|
||||
*str= escape;
|
||||
int cnvres= my_charset_utf8_handler.wc_mb(cs, wc, str + 1, end);
|
||||
if (cnvres > 0)
|
||||
return cnvres + 1; // The character was normally put
|
||||
if (cnvres == MY_CS_ILUNI)
|
||||
return MY_CS_ILUNI; // Could not encode "wc" (e.g. non-BMP character)
|
||||
DBUG_ASSERT(cnvres <= MY_CS_TOOSMALL);
|
||||
return cnvres - 1; // Not enough space
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Optionally escape a character.
|
||||
If "escape" is non-zero, then both "escape" and "wc" are put to
|
||||
the destination string. Otherwise, only "wc" is put.
|
||||
@param cs - the character set (utf8)
|
||||
@param wc - the character to be optionally escaped
|
||||
@param escape - the escape character, or 0
|
||||
@param ewc - the escaped replacement of "wc" (e.g. 't' for '\t')
|
||||
@param str - the destination string
|
||||
@param end - the end of the destination string
|
||||
@returns - a code according to the wc_mb() conversion.
|
||||
*/
|
||||
int my_wc_mb_utf8_opt_escape(CHARSET_INFO *cs,
|
||||
my_wc_t wc, my_wc_t escape, my_wc_t ewc,
|
||||
uchar *str, uchar *end)
|
||||
{
|
||||
return escape ? my_wc_mb_utf8_with_escape(cs, escape, ewc, str, end) :
|
||||
my_charset_utf8_handler.wc_mb(cs, wc, str, end);
|
||||
}
|
||||
|
||||
/**
|
||||
Encode a character with optional backlash escaping and quote escaping.
|
||||
Quote marks are escaped using another quote mark.
|
||||
Additionally, if "escape" is non-zero, then special characters are
|
||||
also escaped using "escape".
|
||||
Otherwise (if "escape" is zero, e.g. in case of MODE_NO_BACKSLASH_ESCAPES),
|
||||
then special characters are not escaped and handled as normal characters.
|
||||
|
||||
@param cs - the character set (utf8)
|
||||
@param wc - the character to be encoded
|
||||
@param str - the destination string
|
||||
@param end - the end of the destination string
|
||||
@param sep - the string delimiter (e.g. ' or ")
|
||||
@param escape - the escape character (backslash, or 0)
|
||||
@returns - a code according to the wc_mb() convension.
|
||||
*/
|
||||
int my_wc_mb_utf8_escape(CHARSET_INFO *cs, my_wc_t wc, uchar *str, uchar *end,
|
||||
my_wc_t sep, my_wc_t escape)
|
||||
{
|
||||
DBUG_ASSERT(escape == 0 || escape == '\\');
|
||||
DBUG_ASSERT(sep == '"' || sep == '\'');
|
||||
switch (wc) {
|
||||
case 0: return my_wc_mb_utf8_opt_escape(cs, wc, escape, '0', str, end);
|
||||
case '\t': return my_wc_mb_utf8_opt_escape(cs, wc, escape, 't', str, end);
|
||||
case '\r': return my_wc_mb_utf8_opt_escape(cs, wc, escape, 'r', str, end);
|
||||
case '\n': return my_wc_mb_utf8_opt_escape(cs, wc, escape, 'n', str, end);
|
||||
case '\032': return my_wc_mb_utf8_opt_escape(cs, wc, escape, 'Z', str, end);
|
||||
case '\'':
|
||||
case '\"':
|
||||
if (wc == sep)
|
||||
return my_wc_mb_utf8_with_escape(cs, wc, wc, str, end);
|
||||
}
|
||||
return my_charset_utf8_handler.wc_mb(cs, wc, str, end); // No escaping needed
|
||||
}
|
||||
|
||||
|
||||
/** wc_mb() compatible routines for all sql_mode and delimiter combinations */
|
||||
int my_wc_mb_utf8_escape_single_quote_and_backslash(CHARSET_INFO *cs,
|
||||
my_wc_t wc,
|
||||
uchar *str, uchar *end)
|
||||
{
|
||||
return my_wc_mb_utf8_escape(cs, wc, str, end, '\'', '\\');
|
||||
}
|
||||
|
||||
|
||||
int my_wc_mb_utf8_escape_double_quote_and_backslash(CHARSET_INFO *cs,
|
||||
my_wc_t wc,
|
||||
uchar *str, uchar *end)
|
||||
{
|
||||
return my_wc_mb_utf8_escape(cs, wc, str, end, '"', '\\');
|
||||
}
|
||||
|
||||
|
||||
int my_wc_mb_utf8_escape_single_quote(CHARSET_INFO *cs, my_wc_t wc,
|
||||
uchar *str, uchar *end)
|
||||
{
|
||||
return my_wc_mb_utf8_escape(cs, wc, str, end, '\'', 0);
|
||||
}
|
||||
|
||||
|
||||
int my_wc_mb_utf8_escape_double_quote(CHARSET_INFO *cs, my_wc_t wc,
|
||||
uchar *str, uchar *end)
|
||||
{
|
||||
return my_wc_mb_utf8_escape(cs, wc, str, end, '"', 0);
|
||||
}
|
||||
|
||||
}; // End of extern "C"
|
||||
|
||||
|
||||
/**
|
||||
Get an escaping function, depending on the current sql_mode and the
|
||||
string separator.
|
||||
*/
|
||||
my_charset_conv_wc_mb
|
||||
Lex_input_stream::get_escape_func(THD *thd, my_wc_t sep) const
|
||||
{
|
||||
return thd->backslash_escapes() ?
|
||||
(sep == '"' ? my_wc_mb_utf8_escape_double_quote_and_backslash:
|
||||
my_wc_mb_utf8_escape_single_quote_and_backslash) :
|
||||
(sep == '"' ? my_wc_mb_utf8_escape_double_quote:
|
||||
my_wc_mb_utf8_escape_single_quote);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Append a text literal to the end of m_body_utf8.
|
||||
The string is escaped according to the current sql_mode and the
|
||||
string delimiter (e.g. ' or ").
|
||||
|
||||
@param thd - current THD
|
||||
@param txt - the string to be appended to m_body_utf8.
|
||||
Note, the string must be already unescaped.
|
||||
@param cs - the character set of the string
|
||||
@param end_ptr - m_cpp_utf8_processed_ptr will be set to this value
|
||||
(see body_utf8_append_ident for details)
|
||||
@param sep - the string delimiter (single or double quote)
|
||||
*/
|
||||
void Lex_input_stream::body_utf8_append_escape(THD *thd,
|
||||
const LEX_STRING *txt,
|
||||
CHARSET_INFO *cs,
|
||||
const char *end_ptr,
|
||||
my_wc_t sep)
|
||||
{
|
||||
DBUG_ASSERT(sep == '\'' || sep == '"');
|
||||
if (!m_cpp_utf8_processed_ptr)
|
||||
return;
|
||||
uint errors;
|
||||
/**
|
||||
We previously alloced m_body_utf8 to be able to store the query with all
|
||||
strings properly escaped. See get_body_utf8_maximum_length().
|
||||
So here we have guaranteedly enough space to append any string literal
|
||||
with escaping. Passing txt->length*2 as "available space" is always safe.
|
||||
For better safety purposes we could calculate get_body_utf8_maximum_length()
|
||||
every time we append a string, but this would affect performance negatively,
|
||||
so let's check that we don't get beyond the allocated buffer in
|
||||
debug build only.
|
||||
*/
|
||||
DBUG_ASSERT(m_body_utf8 + get_body_utf8_maximum_length(thd) >=
|
||||
m_body_utf8_ptr + txt->length * 2);
|
||||
uint32 cnv_length= my_convert_using_func(m_body_utf8_ptr, txt->length * 2,
|
||||
&my_charset_utf8_general_ci,
|
||||
get_escape_func(thd, sep),
|
||||
txt->str, txt->length,
|
||||
cs, cs->cset->mb_wc,
|
||||
&errors);
|
||||
m_body_utf8_ptr+= cnv_length;
|
||||
*m_body_utf8_ptr= 0;
|
||||
m_cpp_utf8_processed_ptr= end_ptr;
|
||||
}
|
||||
|
||||
|
||||
void Lex_input_stream::add_digest_token(uint token, LEX_YYSTYPE yylval)
|
||||
{
|
||||
if (m_digest != NULL)
|
||||
|
@ -797,14 +994,14 @@ Lex_input_stream::unescape(CHARSET_INFO *cs, char *to,
|
|||
Fix sometimes to do only one scan of the string
|
||||
*/
|
||||
|
||||
bool Lex_input_stream::get_text(LEX_STRING *dst, int pre_skip, int post_skip)
|
||||
bool Lex_input_stream::get_text(LEX_STRING *dst, uint sep,
|
||||
int pre_skip, int post_skip)
|
||||
{
|
||||
reg1 uchar c,sep;
|
||||
reg1 uchar c;
|
||||
uint found_escape=0;
|
||||
CHARSET_INFO *cs= m_thd->charset();
|
||||
|
||||
tok_bitmap= 0;
|
||||
sep= yyGetLast(); // String should end with this
|
||||
while (! eof())
|
||||
{
|
||||
c= yyGet();
|
||||
|
@ -1169,6 +1366,8 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
|
|||
return((int) c);
|
||||
|
||||
case MY_LEX_IDENT_OR_NCHAR:
|
||||
{
|
||||
uint sep;
|
||||
if (lip->yyPeek() != '\'')
|
||||
{
|
||||
state= MY_LEX_IDENT;
|
||||
|
@ -1176,14 +1375,20 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
|
|||
}
|
||||
/* Found N'string' */
|
||||
lip->yySkip(); // Skip '
|
||||
if (lip->get_text(&yylval->lex_str, 2, 1))
|
||||
if (lip->get_text(&yylval->lex_str, (sep= lip->yyGetLast()), 2, 1))
|
||||
{
|
||||
state= MY_LEX_CHAR; // Read char by char
|
||||
break;
|
||||
}
|
||||
|
||||
lip->body_utf8_append(lip->m_cpp_text_start);
|
||||
lip->body_utf8_append_escape(thd, &yylval->lex_str,
|
||||
national_charset_info,
|
||||
lip->m_cpp_text_end, sep);
|
||||
|
||||
lex->text_string_is_7bit= (lip->tok_bitmap & 0x80) ? 0 : 1;
|
||||
return(NCHAR_STRING);
|
||||
|
||||
}
|
||||
case MY_LEX_IDENT_OR_HEX:
|
||||
if (lip->yyPeek() == '\'')
|
||||
{ // Found x'hex-number'
|
||||
|
@ -1286,8 +1491,7 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
|
|||
|
||||
lip->body_utf8_append(lip->m_cpp_text_start);
|
||||
|
||||
lip->body_utf8_append_literal(thd, &yylval->lex_str, cs,
|
||||
lip->m_cpp_text_end);
|
||||
lip->body_utf8_append_ident(thd, &yylval->lex_str, lip->m_cpp_text_end);
|
||||
|
||||
return(result_state); // IDENT or IDENT_QUOTED
|
||||
|
||||
|
@ -1391,8 +1595,7 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
|
|||
|
||||
lip->body_utf8_append(lip->m_cpp_text_start);
|
||||
|
||||
lip->body_utf8_append_literal(thd, &yylval->lex_str, cs,
|
||||
lip->m_cpp_text_end);
|
||||
lip->body_utf8_append_ident(thd, &yylval->lex_str, lip->m_cpp_text_end);
|
||||
|
||||
return(result_state);
|
||||
|
||||
|
@ -1435,8 +1638,7 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
|
|||
|
||||
lip->body_utf8_append(lip->m_cpp_text_start);
|
||||
|
||||
lip->body_utf8_append_literal(thd, &yylval->lex_str, cs,
|
||||
lip->m_cpp_text_end);
|
||||
lip->body_utf8_append_ident(thd, &yylval->lex_str, lip->m_cpp_text_end);
|
||||
|
||||
return(IDENT_QUOTED);
|
||||
}
|
||||
|
@ -1541,23 +1743,23 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
|
|||
}
|
||||
/* " used for strings */
|
||||
case MY_LEX_STRING: // Incomplete text string
|
||||
if (lip->get_text(&yylval->lex_str, 1, 1))
|
||||
{
|
||||
uint sep;
|
||||
if (lip->get_text(&yylval->lex_str, (sep= lip->yyGetLast()), 1, 1))
|
||||
{
|
||||
state= MY_LEX_CHAR; // Read char by char
|
||||
break;
|
||||
}
|
||||
|
||||
CHARSET_INFO *strcs= lip->m_underscore_cs ? lip->m_underscore_cs : cs;
|
||||
lip->body_utf8_append(lip->m_cpp_text_start);
|
||||
|
||||
lip->body_utf8_append_literal(thd, &yylval->lex_str,
|
||||
lip->m_underscore_cs ? lip->m_underscore_cs : cs,
|
||||
lip->m_cpp_text_end);
|
||||
|
||||
lip->body_utf8_append_escape(thd, &yylval->lex_str, strcs,
|
||||
lip->m_cpp_text_end, sep);
|
||||
lip->m_underscore_cs= NULL;
|
||||
|
||||
lex->text_string_is_7bit= (lip->tok_bitmap & 0x80) ? 0 : 1;
|
||||
return(TEXT_STRING);
|
||||
|
||||
}
|
||||
case MY_LEX_COMMENT: // Comment
|
||||
lex->select_lex.options|= OPTION_FOUND_COMMENT;
|
||||
while ((c = lip->yyGet()) != '\n' && c) ;
|
||||
|
@ -1806,8 +2008,7 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
|
|||
|
||||
lip->body_utf8_append(lip->m_cpp_text_start);
|
||||
|
||||
lip->body_utf8_append_literal(thd, &yylval->lex_str, cs,
|
||||
lip->m_cpp_text_end);
|
||||
lip->body_utf8_append_ident(thd, &yylval->lex_str, lip->m_cpp_text_end);
|
||||
|
||||
return(result_state);
|
||||
}
|
||||
|
|
|
@ -1807,6 +1807,7 @@ class Lex_input_stream
|
|||
{
|
||||
size_t unescape(CHARSET_INFO *cs, char *to,
|
||||
const char *str, const char *end, int sep);
|
||||
my_charset_conv_wc_mb get_escape_func(THD *thd, my_wc_t sep) const;
|
||||
public:
|
||||
Lex_input_stream()
|
||||
{
|
||||
|
@ -2077,14 +2078,23 @@ public:
|
|||
return (uint) (m_body_utf8_ptr - m_body_utf8);
|
||||
}
|
||||
|
||||
/**
|
||||
Get the maximum length of the utf8-body buffer.
|
||||
The utf8 body can grow because of the character set conversion and escaping.
|
||||
*/
|
||||
uint get_body_utf8_maximum_length(THD *thd);
|
||||
|
||||
void body_utf8_start(THD *thd, const char *begin_ptr);
|
||||
void body_utf8_append(const char *ptr);
|
||||
void body_utf8_append(const char *ptr, const char *end_ptr);
|
||||
void body_utf8_append_literal(THD *thd,
|
||||
const LEX_STRING *txt,
|
||||
CHARSET_INFO *txt_cs,
|
||||
const char *end_ptr);
|
||||
|
||||
void body_utf8_append_ident(THD *thd,
|
||||
const LEX_STRING *txt,
|
||||
const char *end_ptr);
|
||||
void body_utf8_append_escape(THD *thd,
|
||||
const LEX_STRING *txt,
|
||||
CHARSET_INFO *txt_cs,
|
||||
const char *end_ptr,
|
||||
my_wc_t sep);
|
||||
/** Current thread. */
|
||||
THD *m_thd;
|
||||
|
||||
|
@ -2105,7 +2115,7 @@ public:
|
|||
/** LALR(2) resolution, value of the look ahead token.*/
|
||||
LEX_YYSTYPE lookahead_yylval;
|
||||
|
||||
bool get_text(LEX_STRING *to, int pre_skip, int post_skip);
|
||||
bool get_text(LEX_STRING *to, uint sep, int pre_skip, int post_skip);
|
||||
|
||||
void add_digest_token(uint token, LEX_YYSTYPE yylval);
|
||||
|
||||
|
|
|
@ -1030,19 +1030,18 @@ my_charset_is_ascii_compatible(CHARSET_INFO *cs)
|
|||
@return Number of bytes copied to 'to' string
|
||||
*/
|
||||
|
||||
static uint32
|
||||
my_convert_internal(char *to, uint32 to_length,
|
||||
CHARSET_INFO *to_cs,
|
||||
const char *from, uint32 from_length,
|
||||
CHARSET_INFO *from_cs, uint *errors)
|
||||
uint32
|
||||
my_convert_using_func(char *to, uint32 to_length,
|
||||
CHARSET_INFO *to_cs, my_charset_conv_wc_mb wc_mb,
|
||||
const char *from, uint32 from_length,
|
||||
CHARSET_INFO *from_cs, my_charset_conv_mb_wc mb_wc,
|
||||
uint *errors)
|
||||
{
|
||||
int cnvres;
|
||||
my_wc_t wc;
|
||||
const uchar *from_end= (const uchar*) from + from_length;
|
||||
char *to_start= to;
|
||||
uchar *to_end= (uchar*) to + to_length;
|
||||
my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
|
||||
my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
|
||||
uint error_count= 0;
|
||||
|
||||
while (1)
|
||||
|
@ -1119,8 +1118,11 @@ my_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
|
|||
immediately switch to slow mb_wc->wc_mb method.
|
||||
*/
|
||||
if ((to_cs->state | from_cs->state) & MY_CS_NONASCII)
|
||||
return my_convert_internal(to, to_length, to_cs,
|
||||
from, from_length, from_cs, errors);
|
||||
return my_convert_using_func(to, to_length,
|
||||
to_cs, to_cs->cset->wc_mb,
|
||||
from, from_length,
|
||||
from_cs, from_cs->cset->mb_wc,
|
||||
errors);
|
||||
|
||||
length= length2= MY_MIN(to_length, from_length);
|
||||
|
||||
|
@ -1152,9 +1154,11 @@ my_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
|
|||
uint32 copied_length= length2 - length;
|
||||
to_length-= copied_length;
|
||||
from_length-= copied_length;
|
||||
return copied_length + my_convert_internal(to, to_length, to_cs,
|
||||
from, from_length, from_cs,
|
||||
errors);
|
||||
return copied_length + my_convert_using_func(to, to_length, to_cs,
|
||||
to_cs->cset->wc_mb,
|
||||
from, from_length, from_cs,
|
||||
from_cs->cset->mb_wc,
|
||||
errors);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue