mirror of
https://github.com/MariaDB/server.git
synced 2025-01-17 20:42:30 +01:00
Bug#16233: XML: ExtractValue() fails with special characters
ExtractValue didn't understand tag and attribute names consisting of "tricky" national letters (e.g. latin accenter letters). It happened because XPath lex parser recognized only basic latin letter a..z ad a part of an identifier. Fixed to recognize all letters by means of new "full ctype" which was added recently.
This commit is contained in:
parent
ba5d08f340
commit
df2d425afd
4 changed files with 70 additions and 39 deletions
|
@ -615,3 +615,26 @@ select extractValue('<e>1</e>','last()');
|
|||
ERROR HY000: XPATH syntax error: ''
|
||||
select extractValue('<e><a>1</a></e>','/e/');
|
||||
ERROR HY000: XPATH syntax error: ''
|
||||
set names utf8;
|
||||
select extractValue('<Ñ><r>r</r></Ñ>','/Ñ/r');
|
||||
extractValue('<Ñ><r>r</r></Ñ>','/Ñ/r')
|
||||
r
|
||||
select extractValue('<r><Ñ>Ñ</Ñ></r>','/r/Ñ');
|
||||
extractValue('<r><Ñ>Ñ</Ñ></r>','/r/Ñ')
|
||||
Ñ
|
||||
select extractValue('<Ñ r="r"/>','/Ñ/@r');
|
||||
extractValue('<Ñ r="r"/>','/Ñ/@r')
|
||||
r
|
||||
select extractValue('<r Ñ="Ñ"/>','/r/@Ñ');
|
||||
extractValue('<r Ñ="Ñ"/>','/r/@Ñ')
|
||||
Ñ
|
||||
DROP PROCEDURE IF EXISTS p2;
|
||||
CREATE PROCEDURE p2 ()
|
||||
BEGIN
|
||||
DECLARE p LONGTEXT CHARACTER SET UTF8 DEFAULT '<Ñ><r>A</r></Ñ>';
|
||||
SELECT EXTRACTVALUE(p,'/Ñ/r');
|
||||
END//
|
||||
CALL p2();
|
||||
EXTRACTVALUE(p,'/Ñ/r')
|
||||
A
|
||||
DROP PROCEDURE p2;
|
||||
|
|
|
@ -295,3 +295,23 @@ select extractValue('<e>1</e>','last()');
|
|||
--error 1105
|
||||
select extractValue('<e><a>1</a></e>','/e/');
|
||||
|
||||
#
|
||||
# Bug#16233: XML: ExtractValue() fails with special characters
|
||||
#
|
||||
set names utf8;
|
||||
select extractValue('<Ñ><r>r</r></Ñ>','/Ñ/r');
|
||||
select extractValue('<r><Ñ>Ñ</Ñ></r>','/r/Ñ');
|
||||
select extractValue('<Ñ r="r"/>','/Ñ/@r');
|
||||
select extractValue('<r Ñ="Ñ"/>','/r/@Ñ');
|
||||
--disable_warnings
|
||||
DROP PROCEDURE IF EXISTS p2;
|
||||
--enable_warnings
|
||||
DELIMITER //;
|
||||
CREATE PROCEDURE p2 ()
|
||||
BEGIN
|
||||
DECLARE p LONGTEXT CHARACTER SET UTF8 DEFAULT '<Ñ><r>A</r></Ñ>';
|
||||
SELECT EXTRACTVALUE(p,'/Ñ/r');
|
||||
END//
|
||||
DELIMITER ;//
|
||||
CALL p2();
|
||||
DROP PROCEDURE p2;
|
||||
|
|
|
@ -1304,30 +1304,6 @@ my_xpath_init(MY_XPATH *xpath)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
Some ctype-alike helper functions. Note, we cannot
|
||||
reuse cs->ident_map[], because in Xpath, unlike in SQL,
|
||||
dash character is a valid identifier part.
|
||||
*/
|
||||
static int
|
||||
my_xident_beg(int c)
|
||||
{
|
||||
return (((c) >= 'a' && (c) <= 'z') ||
|
||||
((c) >= 'A' && (c) <= 'Z') ||
|
||||
((c) == '_'));
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
my_xident_body(int c)
|
||||
{
|
||||
return (((c) >= 'a' && (c) <= 'z') ||
|
||||
((c) >= 'A' && (c) <= 'Z') ||
|
||||
((c) >= '0' && (c) <= '9') ||
|
||||
((c)=='-') || ((c) == '_'));
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
my_xdigit(int c)
|
||||
{
|
||||
|
@ -1350,7 +1326,7 @@ static void
|
|||
my_xpath_lex_scan(MY_XPATH *xpath,
|
||||
MY_XPATH_LEX *lex, const char *beg, const char *end)
|
||||
{
|
||||
int ch;
|
||||
int ch, ctype, length;
|
||||
for ( ; beg < end && *beg == ' ' ; beg++); // skip leading spaces
|
||||
lex->beg= beg;
|
||||
|
||||
|
@ -1360,20 +1336,20 @@ my_xpath_lex_scan(MY_XPATH *xpath,
|
|||
lex->term= MY_XPATH_LEX_EOF; // end of line reached
|
||||
return;
|
||||
}
|
||||
ch= *beg++;
|
||||
|
||||
if (ch > 0 && ch < 128 && simpletok[ch])
|
||||
|
||||
// Check ident, or a function call, or a keyword
|
||||
if ((length= xpath->cs->cset->ctype(xpath->cs, &ctype,
|
||||
(const uchar*) beg,
|
||||
(const uchar*) end)) > 0 &&
|
||||
((ctype & (_MY_L | _MY_U)) || *beg == '_'))
|
||||
{
|
||||
// a token consisting of one character found
|
||||
lex->end= beg;
|
||||
lex->term= ch;
|
||||
return;
|
||||
}
|
||||
|
||||
if (my_xident_beg(ch)) // ident, or a function call, or a keyword
|
||||
{
|
||||
// scan until the end of the identifier
|
||||
for ( ; beg < end && my_xident_body(*beg); beg++);
|
||||
// scan untill the end of the idenfitier
|
||||
for (beg+= length;
|
||||
(length= xpath->cs->cset->ctype(xpath->cs, &ctype,
|
||||
(const uchar*) beg,
|
||||
(const uchar*) end)) > 0 &&
|
||||
((ctype & (_MY_L | _MY_U | _MY_NMR)) || *beg == '_' || *beg == '-') ;
|
||||
beg+= length) /* no op */;
|
||||
lex->end= beg;
|
||||
|
||||
// check if a function call
|
||||
|
@ -1388,6 +1364,18 @@ my_xpath_lex_scan(MY_XPATH *xpath,
|
|||
return;
|
||||
}
|
||||
|
||||
|
||||
ch= *beg++;
|
||||
|
||||
if (ch > 0 && ch < 128 && simpletok[ch])
|
||||
{
|
||||
// a token consisting of one character found
|
||||
lex->end= beg;
|
||||
lex->term= ch;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
if (my_xdigit(ch)) // a sequence of digits
|
||||
{
|
||||
for ( ; beg < end && my_xdigit(*beg) ; beg++);
|
||||
|
|
|
@ -1362,7 +1362,7 @@ int my_mb_ctype_8bit(CHARSET_INFO *cs, int *ctype,
|
|||
*ctype= 0;
|
||||
return MY_CS_TOOSMALL;
|
||||
}
|
||||
*ctype= cs->ctype[*s];
|
||||
*ctype= cs->ctype[*s + 1];
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue