Bug#16233: XML: ExtractValue() fails with special characters

ExtractValue didn't understand tag and attribute names
consisting of "tricky" national letters (e.g. latin accenter letters).
It happened because XPath lex parser recognized only basic
latin letter a..z ad a part of an identifier.

Fixed to recognize all letters by means of new "full ctype" which
was added recently.
This commit is contained in:
bar@mysql.com 2006-04-11 13:25:02 +05:00
parent ba5d08f340
commit df2d425afd
4 changed files with 70 additions and 39 deletions

View file

@ -615,3 +615,26 @@ select extractValue('<e>1</e>','last()');
ERROR HY000: XPATH syntax error: '' ERROR HY000: XPATH syntax error: ''
select extractValue('<e><a>1</a></e>','/e/'); select extractValue('<e><a>1</a></e>','/e/');
ERROR HY000: XPATH syntax error: '' ERROR HY000: XPATH syntax error: ''
set names utf8;
select extractValue('<Ñ><r>r</r></Ñ>','/Ñ/r');
extractValue('<Ñ><r>r</r></Ñ>','/Ñ/r')
r
select extractValue('<r><Ñ>Ñ</Ñ></r>','/r/Ñ');
extractValue('<r><Ñ>Ñ</Ñ></r>','/r/Ñ')
Ñ
select extractValue('<Ñ r="r"/>','/Ñ/@r');
extractValue('<Ñ r="r"/>','/Ñ/@r')
r
select extractValue('<r Ñ="Ñ"/>','/r/@Ñ');
extractValue('<r Ñ="Ñ"/>','/r/@Ñ')
Ñ
DROP PROCEDURE IF EXISTS p2;
CREATE PROCEDURE p2 ()
BEGIN
DECLARE p LONGTEXT CHARACTER SET UTF8 DEFAULT '<Ñ><r>A</r></Ñ>';
SELECT EXTRACTVALUE(p,'/Ñ/r');
END//
CALL p2();
EXTRACTVALUE(p,'/Ñ/r')
A
DROP PROCEDURE p2;

View file

@ -295,3 +295,23 @@ select extractValue('<e>1</e>','last()');
--error 1105 --error 1105
select extractValue('<e><a>1</a></e>','/e/'); select extractValue('<e><a>1</a></e>','/e/');
#
# Bug#16233: XML: ExtractValue() fails with special characters
#
set names utf8;
select extractValue('<Ñ><r>r</r></Ñ>','/Ñ/r');
select extractValue('<r><Ñ>Ñ</Ñ></r>','/r/Ñ');
select extractValue('<Ñ r="r"/>','/Ñ/@r');
select extractValue('<r Ñ="Ñ"/>','/r/@Ñ');
--disable_warnings
DROP PROCEDURE IF EXISTS p2;
--enable_warnings
DELIMITER //;
CREATE PROCEDURE p2 ()
BEGIN
DECLARE p LONGTEXT CHARACTER SET UTF8 DEFAULT '<Ñ><r>A</r></Ñ>';
SELECT EXTRACTVALUE(p,'/Ñ/r');
END//
DELIMITER ;//
CALL p2();
DROP PROCEDURE p2;

View file

@ -1304,30 +1304,6 @@ my_xpath_init(MY_XPATH *xpath)
} }
/*
Some ctype-alike helper functions. Note, we cannot
reuse cs->ident_map[], because in Xpath, unlike in SQL,
dash character is a valid identifier part.
*/
static int
my_xident_beg(int c)
{
return (((c) >= 'a' && (c) <= 'z') ||
((c) >= 'A' && (c) <= 'Z') ||
((c) == '_'));
}
static int
my_xident_body(int c)
{
return (((c) >= 'a' && (c) <= 'z') ||
((c) >= 'A' && (c) <= 'Z') ||
((c) >= '0' && (c) <= '9') ||
((c)=='-') || ((c) == '_'));
}
static int static int
my_xdigit(int c) my_xdigit(int c)
{ {
@ -1350,7 +1326,7 @@ static void
my_xpath_lex_scan(MY_XPATH *xpath, my_xpath_lex_scan(MY_XPATH *xpath,
MY_XPATH_LEX *lex, const char *beg, const char *end) MY_XPATH_LEX *lex, const char *beg, const char *end)
{ {
int ch; int ch, ctype, length;
for ( ; beg < end && *beg == ' ' ; beg++); // skip leading spaces for ( ; beg < end && *beg == ' ' ; beg++); // skip leading spaces
lex->beg= beg; lex->beg= beg;
@ -1360,20 +1336,20 @@ my_xpath_lex_scan(MY_XPATH *xpath,
lex->term= MY_XPATH_LEX_EOF; // end of line reached lex->term= MY_XPATH_LEX_EOF; // end of line reached
return; return;
} }
ch= *beg++;
// Check ident, or a function call, or a keyword
if (ch > 0 && ch < 128 && simpletok[ch]) if ((length= xpath->cs->cset->ctype(xpath->cs, &ctype,
(const uchar*) beg,
(const uchar*) end)) > 0 &&
((ctype & (_MY_L | _MY_U)) || *beg == '_'))
{ {
// a token consisting of one character found // scan untill the end of the idenfitier
lex->end= beg; for (beg+= length;
lex->term= ch; (length= xpath->cs->cset->ctype(xpath->cs, &ctype,
return; (const uchar*) beg,
} (const uchar*) end)) > 0 &&
((ctype & (_MY_L | _MY_U | _MY_NMR)) || *beg == '_' || *beg == '-') ;
if (my_xident_beg(ch)) // ident, or a function call, or a keyword beg+= length) /* no op */;
{
// scan until the end of the identifier
for ( ; beg < end && my_xident_body(*beg); beg++);
lex->end= beg; lex->end= beg;
// check if a function call // check if a function call
@ -1388,6 +1364,18 @@ my_xpath_lex_scan(MY_XPATH *xpath,
return; return;
} }
ch= *beg++;
if (ch > 0 && ch < 128 && simpletok[ch])
{
// a token consisting of one character found
lex->end= beg;
lex->term= ch;
return;
}
if (my_xdigit(ch)) // a sequence of digits if (my_xdigit(ch)) // a sequence of digits
{ {
for ( ; beg < end && my_xdigit(*beg) ; beg++); for ( ; beg < end && my_xdigit(*beg) ; beg++);

View file

@ -1362,7 +1362,7 @@ int my_mb_ctype_8bit(CHARSET_INFO *cs, int *ctype,
*ctype= 0; *ctype= 0;
return MY_CS_TOOSMALL; return MY_CS_TOOSMALL;
} }
*ctype= cs->ctype[*s]; *ctype= cs->ctype[*s + 1];
return 1; return 1;
} }