Bug #16171518 - LOAD XML DOES NOT HANDLE EMPTY ELEMENTS

DESCRIPTION
===========
Inability of mysql LOAD XML command to handle empty XML
tags i.e. <row><tag/></row>. Also the behaviour is wrong
and (different than above) when there is a space in empty
tag i.e. <row><tag /></row>

ANALYSIS
========
In read_xml() the case where we encounter a close tag ('/')
we're decreasing the 'level' blindly which is wrong.
Actually when its an without-space-empty-tag (succeeding
char is '>'), we need to skip the decrement. In other words
whenever we hit a close tag ('/'), decrease the 'level'
only when (i) It's not an (without space) empty tag i.e.
<tag/> or, (ii) It is of format <row col="val" .../>

FIX
===
The switch case for '/' is modified. We've removed the
blind decrement of 'level'. We do it only when its not an
without-space-empty-tag. Also we are setting 'in_tag' to
false to let program know that we're done reading current
tag (required in the case of format <row col="val" .../>)
This commit is contained in:
Shishir Jaiswal 2015-08-18 12:24:27 +05:30
parent 93ac0eb1c4
commit ee02650bac
5 changed files with 121 additions and 2 deletions

View file

@ -93,3 +93,30 @@ a b
216 !&bb b;
3 !b3
DROP TABLE t1;
#
# Bug#16171518 LOAD XML DOES NOT HANDLE EMPTY ELEMENTS
#
CREATE TABLE t1 (col1 VARCHAR(3), col2 VARCHAR(3), col3 VARCHAR(3), col4 VARCHAR(4));
LOAD XML INFILE '../../std_data/bug16171518_1.dat' INTO TABLE t1;
SELECT * FROM t1 ORDER BY col1, col2, col3, col4;
col1 col2 col3 col4
0bc def ghi jkl
1no NULL pqr stu
2BC DEF GHI JKL
3NO NULL PQR STU
4bc def ghi jkl
5no pqr stu vwx
6BC DEF NULL JKL
7NO PQR STU VWX
8bc def ghi NULL
9kl NULL mno pqr
ABC DEF NULL JKL
MNO NULL STU VWX
DROP TABLE t1;
CREATE TABLE t1 (col1 VARCHAR(3), col2 VARCHAR(3), col3 INTEGER);
LOAD XML INFILE '../../std_data/bug16171518_2.dat' INTO TABLE t1;
SELECT * FROM t1 ORDER BY col1, col2, col3;
col1 col2 col3
ABC DEF NULL
GHI NULL 123
DROP TABLE t1;

View file

@ -0,0 +1,59 @@
<test_rows>
<row>
<col1>0bc</col1>
<col2>def</col2>
<col3>ghi</col3>
<col4>jkl</col4>
</row>
<row>
<col1>1no</col1>
<col2/>
<col3>pqr</col3>
<col4>stu</col4>
</row>
<row>
<col1>2BC</col1>
<col2>DEF</col2>
<col3>GHI</col3>
<col4>JKL</col4>
</row>
<row>
<col1>3NO</col1>
<col2 />
<col3>PQR</col3>
<col4>STU</col4>
</row>
<row col1="4bc" col2="def" col3="ghi" col4="jkl"/>
<row col1="5no" col2="pqr" col3="stu" col4="vwx" />
<row>
<field name='col1'>6BC</field>
<field name='col2'>DEF</field>
<field name='col3'></field>
<field name='col4'>JKL</field>
</row>
<row>
<field name='col1'>7NO</field>
<field name='col2'>PQR</field>
<field name='col3'>STU</field>
<field name='col4'>VWX</field>
</row>
<row>
<col1>8bc</col1>
<col2>def</col2>
<col3>ghi</col3>
<col4 />
</row>
<row>
<col1>9kl</col1>
<col2/>
<col3>mno</col3>
<col4>pqr</col4>
</row>
<row col1="ABC" col2="DEF" col3="" col4="JKL"/>
<row col1="MNO" col2="" col3="STU" col4="VWX"/>
</test_rows>

View file

@ -0,0 +1,12 @@
<test_rows>
<row>
<col1>ABC</col1>
<col2>DEF</col2>
<col3 />
</row>
<row>
<col1>GHI</col1>
<col2 />
<col3>123</col3>
</row>
</test_rows>

View file

@ -116,3 +116,17 @@ LOAD XML INFILE '../../std_data/loadxml.dat' INTO TABLE t1
ROWS IDENTIFIED BY '<row>' (a,@b) SET b=concat('!',@b);
SELECT * FROM t1 ORDER BY a;
DROP TABLE t1;
--echo #
--echo # Bug#16171518 LOAD XML DOES NOT HANDLE EMPTY ELEMENTS
--echo #
CREATE TABLE t1 (col1 VARCHAR(3), col2 VARCHAR(3), col3 VARCHAR(3), col4 VARCHAR(4));
LOAD XML INFILE '../../std_data/bug16171518_1.dat' INTO TABLE t1;
SELECT * FROM t1 ORDER BY col1, col2, col3, col4;
DROP TABLE t1;
CREATE TABLE t1 (col1 VARCHAR(3), col2 VARCHAR(3), col3 INTEGER);
LOAD XML INFILE '../../std_data/bug16171518_2.dat' INTO TABLE t1;
SELECT * FROM t1 ORDER BY col1, col2, col3;
DROP TABLE t1;

View file

@ -1,5 +1,5 @@
/*
Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -1987,8 +1987,15 @@ int READ_INFO::read_xml()
break;
case '/': /* close tag */
level--;
chr= my_tospace(GET);
/* Decrease the 'level' only when (i) It's not an */
/* (without space) empty tag i.e. <tag/> or, (ii) */
/* It is of format <row col="val" .../> */
if(chr != '>' || in_tag)
{
level--;
in_tag= false;
}
if(chr != '>') /* if this is an empty tag <tag /> */
tag.length(0); /* we should keep tag value */
while(chr != '>' && chr != my_b_EOF)