mirror of
https://github.com/MariaDB/server.git
synced 2026-04-23 00:35:32 +02:00
Preparatory refactoring for:
MDEV-6218 Wrong result of CHAR_LENGTH(non-BMP-character) with 3-byte utf8 - Moving get_text() as a method to Lex_input_stream. - Moving the unescaping part into a separate function, this piece of code will later go to /strings most likely. - Removing Lex_input_string::yytoklen, as it's not needed any more.
This commit is contained in:
parent
01d7da6785
commit
50eee60504
3 changed files with 108 additions and 90 deletions
|
|
@ -2960,6 +2960,10 @@ public:
|
||||||
return (bool) (variables.sql_mode & (MODE_STRICT_TRANS_TABLES |
|
return (bool) (variables.sql_mode & (MODE_STRICT_TRANS_TABLES |
|
||||||
MODE_STRICT_ALL_TABLES));
|
MODE_STRICT_ALL_TABLES));
|
||||||
}
|
}
|
||||||
|
inline bool backslash_escapes() const
|
||||||
|
{
|
||||||
|
return !MY_TEST(variables.sql_mode & MODE_NO_BACKSLASH_ESCAPES);
|
||||||
|
}
|
||||||
inline my_time_t query_start() { query_start_used=1; return start_time; }
|
inline my_time_t query_start() { query_start_used=1; return start_time; }
|
||||||
inline ulong query_start_sec_part()
|
inline ulong query_start_sec_part()
|
||||||
{ query_start_sec_part_used=1; return start_time_sec_part; }
|
{ query_start_sec_part_used=1; return start_time_sec_part; }
|
||||||
|
|
|
||||||
188
sql/sql_lex.cc
188
sql/sql_lex.cc
|
|
@ -281,7 +281,6 @@ void
|
||||||
Lex_input_stream::reset(char *buffer, unsigned int length)
|
Lex_input_stream::reset(char *buffer, unsigned int length)
|
||||||
{
|
{
|
||||||
yylineno= 1;
|
yylineno= 1;
|
||||||
yytoklen= 0;
|
|
||||||
yylval= NULL;
|
yylval= NULL;
|
||||||
lookahead_token= -1;
|
lookahead_token= -1;
|
||||||
lookahead_yylval= NULL;
|
lookahead_yylval= NULL;
|
||||||
|
|
@ -641,7 +640,7 @@ static LEX_STRING get_token(Lex_input_stream *lip, uint skip, uint length)
|
||||||
{
|
{
|
||||||
LEX_STRING tmp;
|
LEX_STRING tmp;
|
||||||
lip->yyUnget(); // ptr points now after last token char
|
lip->yyUnget(); // ptr points now after last token char
|
||||||
tmp.length=lip->yytoklen=length;
|
tmp.length= length;
|
||||||
tmp.str= lip->m_thd->strmake(lip->get_tok_start() + skip, tmp.length);
|
tmp.str= lip->m_thd->strmake(lip->get_tok_start() + skip, tmp.length);
|
||||||
|
|
||||||
lip->m_cpp_text_start= lip->get_cpp_tok_start() + skip;
|
lip->m_cpp_text_start= lip->get_cpp_tok_start() + skip;
|
||||||
|
|
@ -665,7 +664,7 @@ static LEX_STRING get_quoted_token(Lex_input_stream *lip,
|
||||||
const char *from, *end;
|
const char *from, *end;
|
||||||
char *to;
|
char *to;
|
||||||
lip->yyUnget(); // ptr points now after last token char
|
lip->yyUnget(); // ptr points now after last token char
|
||||||
tmp.length= lip->yytoklen=length;
|
tmp.length= length;
|
||||||
tmp.str=(char*) lip->m_thd->alloc(tmp.length+1);
|
tmp.str=(char*) lip->m_thd->alloc(tmp.length+1);
|
||||||
from= lip->get_tok_start() + skip;
|
from= lip->get_tok_start() + skip;
|
||||||
to= tmp.str;
|
to= tmp.str;
|
||||||
|
|
@ -687,135 +686,152 @@ static LEX_STRING get_quoted_token(Lex_input_stream *lip,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static size_t
|
||||||
|
my_unescape(CHARSET_INFO *cs, char *to, const char *str, const char *end,
|
||||||
|
int sep, bool backslash_escapes)
|
||||||
|
{
|
||||||
|
char *start= to;
|
||||||
|
for ( ; str != end ; str++)
|
||||||
|
{
|
||||||
|
#ifdef USE_MB
|
||||||
|
int l;
|
||||||
|
if (use_mb(cs) && (l= my_ismbchar(cs, str, end)))
|
||||||
|
{
|
||||||
|
while (l--)
|
||||||
|
*to++ = *str++;
|
||||||
|
str--;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
if (backslash_escapes && *str == '\\' && str + 1 != end)
|
||||||
|
{
|
||||||
|
switch(*++str) {
|
||||||
|
case 'n':
|
||||||
|
*to++='\n';
|
||||||
|
break;
|
||||||
|
case 't':
|
||||||
|
*to++= '\t';
|
||||||
|
break;
|
||||||
|
case 'r':
|
||||||
|
*to++ = '\r';
|
||||||
|
break;
|
||||||
|
case 'b':
|
||||||
|
*to++ = '\b';
|
||||||
|
break;
|
||||||
|
case '0':
|
||||||
|
*to++= 0; // Ascii null
|
||||||
|
break;
|
||||||
|
case 'Z': // ^Z must be escaped on Win32
|
||||||
|
*to++='\032';
|
||||||
|
break;
|
||||||
|
case '_':
|
||||||
|
case '%':
|
||||||
|
*to++= '\\'; // remember prefix for wildcard
|
||||||
|
/* Fall through */
|
||||||
|
default:
|
||||||
|
*to++= *str;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (*str == sep)
|
||||||
|
*to++= *str++; // Two ' or "
|
||||||
|
else
|
||||||
|
*to++ = *str;
|
||||||
|
}
|
||||||
|
*to= 0;
|
||||||
|
return to - start;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
size_t
|
||||||
|
Lex_input_stream::unescape(CHARSET_INFO *cs, char *to,
|
||||||
|
const char *str, const char *end,
|
||||||
|
int sep)
|
||||||
|
{
|
||||||
|
return my_unescape(cs, to, str, end, sep, m_thd->backslash_escapes());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Return an unescaped text literal without quotes
|
Return an unescaped text literal without quotes
|
||||||
Fix sometimes to do only one scan of the string
|
Fix sometimes to do only one scan of the string
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static char *get_text(Lex_input_stream *lip, int pre_skip, int post_skip)
|
bool Lex_input_stream::get_text(LEX_STRING *dst, int pre_skip, int post_skip)
|
||||||
{
|
{
|
||||||
reg1 uchar c,sep;
|
reg1 uchar c,sep;
|
||||||
uint found_escape=0;
|
uint found_escape=0;
|
||||||
CHARSET_INFO *cs= lip->m_thd->charset();
|
CHARSET_INFO *cs= m_thd->charset();
|
||||||
|
|
||||||
lip->tok_bitmap= 0;
|
tok_bitmap= 0;
|
||||||
sep= lip->yyGetLast(); // String should end with this
|
sep= yyGetLast(); // String should end with this
|
||||||
while (! lip->eof())
|
while (! eof())
|
||||||
{
|
{
|
||||||
c= lip->yyGet();
|
c= yyGet();
|
||||||
lip->tok_bitmap|= c;
|
tok_bitmap|= c;
|
||||||
#ifdef USE_MB
|
#ifdef USE_MB
|
||||||
{
|
{
|
||||||
int l;
|
int l;
|
||||||
if (use_mb(cs) &&
|
if (use_mb(cs) &&
|
||||||
(l = my_ismbchar(cs,
|
(l = my_ismbchar(cs,
|
||||||
lip->get_ptr() -1,
|
get_ptr() -1,
|
||||||
lip->get_end_of_query()))) {
|
get_end_of_query()))) {
|
||||||
lip->skip_binary(l-1);
|
skip_binary(l-1);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
if (c == '\\' &&
|
if (c == '\\' &&
|
||||||
!(lip->m_thd->variables.sql_mode & MODE_NO_BACKSLASH_ESCAPES))
|
!(m_thd->variables.sql_mode & MODE_NO_BACKSLASH_ESCAPES))
|
||||||
{ // Escaped character
|
{ // Escaped character
|
||||||
found_escape=1;
|
found_escape=1;
|
||||||
if (lip->eof())
|
if (eof())
|
||||||
return 0;
|
return true;
|
||||||
lip->yySkip();
|
yySkip();
|
||||||
}
|
}
|
||||||
else if (c == sep)
|
else if (c == sep)
|
||||||
{
|
{
|
||||||
if (c == lip->yyGet()) // Check if two separators in a row
|
if (c == yyGet()) // Check if two separators in a row
|
||||||
{
|
{
|
||||||
found_escape=1; // duplicate. Remember for delete
|
found_escape=1; // duplicate. Remember for delete
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
lip->yyUnget();
|
yyUnget();
|
||||||
|
|
||||||
/* Found end. Unescape and return string */
|
/* Found end. Unescape and return string */
|
||||||
const char *str, *end;
|
const char *str, *end;
|
||||||
char *start;
|
|
||||||
|
|
||||||
str= lip->get_tok_start();
|
str= get_tok_start();
|
||||||
end= lip->get_ptr();
|
end= get_ptr();
|
||||||
/* Extract the text from the token */
|
/* Extract the text from the token */
|
||||||
str += pre_skip;
|
str += pre_skip;
|
||||||
end -= post_skip;
|
end -= post_skip;
|
||||||
DBUG_ASSERT(end >= str);
|
DBUG_ASSERT(end >= str);
|
||||||
|
|
||||||
if (!(start= (char*) lip->m_thd->alloc((uint) (end-str)+1)))
|
if (!(dst->str= (char*) m_thd->alloc((uint) (end - str) + 1)))
|
||||||
return (char*) ""; // Sql_alloc has set error flag
|
{
|
||||||
|
dst->str= (char*) ""; // Sql_alloc has set error flag
|
||||||
|
dst->length= 0;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
lip->m_cpp_text_start= lip->get_cpp_tok_start() + pre_skip;
|
m_cpp_text_start= get_cpp_tok_start() + pre_skip;
|
||||||
lip->m_cpp_text_end= lip->get_cpp_ptr() - post_skip;
|
m_cpp_text_end= get_cpp_ptr() - post_skip;
|
||||||
|
|
||||||
if (!found_escape)
|
if (!found_escape)
|
||||||
{
|
{
|
||||||
lip->yytoklen=(uint) (end-str);
|
memcpy(dst->str, str, dst->length= (end - str));
|
||||||
memcpy(start,str,lip->yytoklen);
|
dst->str[dst->length]= 0;
|
||||||
start[lip->yytoklen]=0;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
char *to;
|
dst->length= unescape(cs, dst->str, str, end, sep);
|
||||||
|
|
||||||
for (to=start ; str != end ; str++)
|
|
||||||
{
|
|
||||||
#ifdef USE_MB
|
|
||||||
int l;
|
|
||||||
if (use_mb(cs) &&
|
|
||||||
(l = my_ismbchar(cs, str, end))) {
|
|
||||||
while (l--)
|
|
||||||
*to++ = *str++;
|
|
||||||
str--;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
if (!(lip->m_thd->variables.sql_mode & MODE_NO_BACKSLASH_ESCAPES) &&
|
|
||||||
*str == '\\' && str+1 != end)
|
|
||||||
{
|
|
||||||
switch(*++str) {
|
|
||||||
case 'n':
|
|
||||||
*to++='\n';
|
|
||||||
break;
|
|
||||||
case 't':
|
|
||||||
*to++= '\t';
|
|
||||||
break;
|
|
||||||
case 'r':
|
|
||||||
*to++ = '\r';
|
|
||||||
break;
|
|
||||||
case 'b':
|
|
||||||
*to++ = '\b';
|
|
||||||
break;
|
|
||||||
case '0':
|
|
||||||
*to++= 0; // Ascii null
|
|
||||||
break;
|
|
||||||
case 'Z': // ^Z must be escaped on Win32
|
|
||||||
*to++='\032';
|
|
||||||
break;
|
|
||||||
case '_':
|
|
||||||
case '%':
|
|
||||||
*to++= '\\'; // remember prefix for wildcard
|
|
||||||
/* Fall through */
|
|
||||||
default:
|
|
||||||
*to++= *str;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (*str == sep)
|
|
||||||
*to++= *str++; // Two ' or "
|
|
||||||
else
|
|
||||||
*to++ = *str;
|
|
||||||
}
|
|
||||||
*to=0;
|
|
||||||
lip->yytoklen=(uint) (to-start);
|
|
||||||
}
|
}
|
||||||
return start;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return 0; // unexpected end of query
|
return true; // unexpected end of query
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1122,12 +1138,11 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
|
||||||
}
|
}
|
||||||
/* Found N'string' */
|
/* Found N'string' */
|
||||||
lip->yySkip(); // Skip '
|
lip->yySkip(); // Skip '
|
||||||
if (!(yylval->lex_str.str = get_text(lip, 2, 1)))
|
if (lip->get_text(&yylval->lex_str, 2, 1))
|
||||||
{
|
{
|
||||||
state= MY_LEX_CHAR; // Read char by char
|
state= MY_LEX_CHAR; // Read char by char
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
yylval->lex_str.length= lip->yytoklen;
|
|
||||||
lex->text_string_is_7bit= (lip->tok_bitmap & 0x80) ? 0 : 1;
|
lex->text_string_is_7bit= (lip->tok_bitmap & 0x80) ? 0 : 1;
|
||||||
return(NCHAR_STRING);
|
return(NCHAR_STRING);
|
||||||
|
|
||||||
|
|
@ -1488,12 +1503,11 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
|
||||||
}
|
}
|
||||||
/* " used for strings */
|
/* " used for strings */
|
||||||
case MY_LEX_STRING: // Incomplete text string
|
case MY_LEX_STRING: // Incomplete text string
|
||||||
if (!(yylval->lex_str.str = get_text(lip, 1, 1)))
|
if (lip->get_text(&yylval->lex_str, 1, 1))
|
||||||
{
|
{
|
||||||
state= MY_LEX_CHAR; // Read char by char
|
state= MY_LEX_CHAR; // Read char by char
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
yylval->lex_str.length=lip->yytoklen;
|
|
||||||
|
|
||||||
lip->body_utf8_append(lip->m_cpp_text_start);
|
lip->body_utf8_append(lip->m_cpp_text_start);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1804,6 +1804,8 @@ enum enum_comment_state
|
||||||
|
|
||||||
class Lex_input_stream
|
class Lex_input_stream
|
||||||
{
|
{
|
||||||
|
size_t unescape(CHARSET_INFO *cs, char *to,
|
||||||
|
const char *str, const char *end, int sep);
|
||||||
public:
|
public:
|
||||||
Lex_input_stream()
|
Lex_input_stream()
|
||||||
{
|
{
|
||||||
|
|
@ -2088,9 +2090,6 @@ public:
|
||||||
/** Current line number. */
|
/** Current line number. */
|
||||||
uint yylineno;
|
uint yylineno;
|
||||||
|
|
||||||
/** Length of the last token parsed. */
|
|
||||||
uint yytoklen;
|
|
||||||
|
|
||||||
/** Interface with bison, value of the last token parsed. */
|
/** Interface with bison, value of the last token parsed. */
|
||||||
LEX_YYSTYPE yylval;
|
LEX_YYSTYPE yylval;
|
||||||
|
|
||||||
|
|
@ -2105,6 +2104,7 @@ public:
|
||||||
/** LALR(2) resolution, value of the look ahead token.*/
|
/** LALR(2) resolution, value of the look ahead token.*/
|
||||||
LEX_YYSTYPE lookahead_yylval;
|
LEX_YYSTYPE lookahead_yylval;
|
||||||
|
|
||||||
|
bool get_text(LEX_STRING *to, int pre_skip, int post_skip);
|
||||||
private:
|
private:
|
||||||
/** Pointer to the current position in the raw input stream. */
|
/** Pointer to the current position in the raw input stream. */
|
||||||
char *m_ptr;
|
char *m_ptr;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue