mirror of
https://github.com/MariaDB/server.git
synced 2025-01-29 02:05:57 +01:00
The CSV format has always relied on numbers being quoted, which doesn't always happen. This fixes that so that numbers can now be unquoted (and the output does this as well so that the log takes up less space).
mysql-test/r/csv.result: Fixed test case for format change. Added new test for floating point numbers. mysql-test/t/csv.test: Fixed test for format change. Added test case for floating point numbers. storage/csv/ha_tina.cc: Updated code to handle numbers as raw unquotes types in CSV
This commit is contained in:
parent
20f0c7a65f
commit
8bd11441fd
3 changed files with 134 additions and 70 deletions
|
@ -5210,16 +5210,32 @@ create table bug22080_3 (id int,string varchar(64)) Engine=CSV;
|
|||
insert into bug22080_1 values(1,'string');
|
||||
insert into bug22080_1 values(2,'string');
|
||||
insert into bug22080_1 values(3,'string');
|
||||
"1","string"
|
||||
1,"string"
|
||||
2","string"
|
||||
"3","string"
|
||||
3,"string"
|
||||
check table bug22080_2;
|
||||
Table Op Msg_type Msg_text
|
||||
test.bug22080_2 check error Corrupt
|
||||
"1","string"
|
||||
"2",string"
|
||||
"3","string"
|
||||
1,"string"
|
||||
2,"string"
|
||||
3,"string"
|
||||
check table bug22080_3;
|
||||
Table Op Msg_type Msg_text
|
||||
test.bug22080_3 check error Corrupt
|
||||
drop tables bug22080_1,bug22080_2,bug22080_3;
|
||||
create table float_test (id float,string varchar(64)) Engine=CSV;
|
||||
insert into float_test values(1.0,'string');
|
||||
insert into float_test values(2.23,'serg.g');
|
||||
insert into float_test values(0.03,'string');
|
||||
insert into float_test values(0.19,'string');
|
||||
insert into float_test values(.67,'string');
|
||||
insert into float_test values(9.67,'string');
|
||||
select * from float_test;
|
||||
id string
|
||||
1 string
|
||||
2.23 serg.g
|
||||
0.03 string
|
||||
0.19 string
|
||||
0.67 string
|
||||
9.67 string
|
||||
drop table float_test;
|
||||
|
|
|
@ -1595,7 +1595,7 @@ insert into bug22080_1 values(2,'string');
|
|||
insert into bug22080_1 values(3,'string');
|
||||
|
||||
# Currupt the file as described in the bug report
|
||||
--exec sed -e 's/"2"/2"/' $MYSQLTEST_VARDIR/master-data/test/bug22080_1.CSV > $MYSQLTEST_VARDIR/master-data/test/bug22080_2.CSV
|
||||
--exec sed -e 's/2/2"/' $MYSQLTEST_VARDIR/master-data/test/bug22080_1.CSV > $MYSQLTEST_VARDIR/master-data/test/bug22080_2.CSV
|
||||
--exec sed -e 's/2","/2",/' $MYSQLTEST_VARDIR/master-data/test/bug22080_1.CSV > $MYSQLTEST_VARDIR/master-data/test/bug22080_3.CSV
|
||||
|
||||
--exec cat $MYSQLTEST_VARDIR/master-data/test/bug22080_2.CSV
|
||||
|
@ -1605,3 +1605,17 @@ check table bug22080_2;
|
|||
check table bug22080_3;
|
||||
|
||||
drop tables bug22080_1,bug22080_2,bug22080_3;
|
||||
|
||||
#
|
||||
# Testing float type
|
||||
#
|
||||
create table float_test (id float,string varchar(64)) Engine=CSV;
|
||||
insert into float_test values(1.0,'string');
|
||||
insert into float_test values(2.23,'serg.g');
|
||||
insert into float_test values(0.03,'string');
|
||||
insert into float_test values(0.19,'string');
|
||||
insert into float_test values(.67,'string');
|
||||
insert into float_test values(9.67,'string');
|
||||
select * from float_test;
|
||||
|
||||
drop table float_test;
|
||||
|
|
|
@ -462,6 +462,7 @@ int ha_tina::encode_quote(byte *buf)
|
|||
|
||||
my_bitmap_map *org_bitmap= dbug_tmp_use_all_columns(table, table->read_set);
|
||||
buffer.length(0);
|
||||
|
||||
for (Field **field=table->field ; *field ; field++)
|
||||
{
|
||||
const char *ptr;
|
||||
|
@ -478,50 +479,58 @@ int ha_tina::encode_quote(byte *buf)
|
|||
buffer.append(STRING_WITH_LEN("\"\","));
|
||||
continue;
|
||||
}
|
||||
else
|
||||
|
||||
(*field)->val_str(&attribute,&attribute);
|
||||
|
||||
if ((*field)->str_needs_quotes())
|
||||
{
|
||||
(*field)->val_str(&attribute,&attribute);
|
||||
ptr= attribute.ptr();
|
||||
end_ptr= attribute.length() + ptr;
|
||||
|
||||
buffer.append('"');
|
||||
|
||||
while (ptr < end_ptr)
|
||||
{
|
||||
if (*ptr == '"')
|
||||
{
|
||||
buffer.append('\\');
|
||||
buffer.append('"');
|
||||
*ptr++;
|
||||
}
|
||||
else if (*ptr == '\r')
|
||||
{
|
||||
buffer.append('\\');
|
||||
buffer.append('r');
|
||||
*ptr++;
|
||||
}
|
||||
else if (*ptr == '\\')
|
||||
{
|
||||
buffer.append('\\');
|
||||
buffer.append('\\');
|
||||
*ptr++;
|
||||
}
|
||||
else if (*ptr == '\n')
|
||||
{
|
||||
buffer.append('\\');
|
||||
buffer.append('n');
|
||||
*ptr++;
|
||||
}
|
||||
else
|
||||
buffer.append(*ptr++);
|
||||
}
|
||||
buffer.append('"');
|
||||
}
|
||||
|
||||
buffer.append('"');
|
||||
|
||||
while (ptr < end_ptr)
|
||||
else
|
||||
{
|
||||
if (*ptr == '"')
|
||||
{
|
||||
buffer.append('\\');
|
||||
buffer.append('"');
|
||||
*ptr++;
|
||||
}
|
||||
else if (*ptr == '\r')
|
||||
{
|
||||
buffer.append('\\');
|
||||
buffer.append('r');
|
||||
*ptr++;
|
||||
}
|
||||
else if (*ptr == '\\')
|
||||
{
|
||||
buffer.append('\\');
|
||||
buffer.append('\\');
|
||||
*ptr++;
|
||||
}
|
||||
else if (*ptr == '\n')
|
||||
{
|
||||
buffer.append('\\');
|
||||
buffer.append('n');
|
||||
*ptr++;
|
||||
}
|
||||
else
|
||||
buffer.append(*ptr++);
|
||||
buffer.append(attribute);
|
||||
}
|
||||
buffer.append('"');
|
||||
|
||||
buffer.append(',');
|
||||
}
|
||||
// Remove the comma, add a line feed
|
||||
buffer.length(buffer.length() - 1);
|
||||
buffer.append('\n');
|
||||
|
||||
//buffer.replace(buffer.length(), 0, "\n", 1);
|
||||
|
||||
dbug_tmp_restore_column_map(table->read_set, org_bitmap);
|
||||
|
@ -601,47 +610,72 @@ int ha_tina::find_current_row(byte *buf)
|
|||
buffer.length(0);
|
||||
if (curr_offset < end_offset &&
|
||||
file_buff->get_value(curr_offset) == '"')
|
||||
curr_offset++; // Incrementpast the first quote
|
||||
else
|
||||
goto err;
|
||||
for(;curr_offset < end_offset; curr_offset++)
|
||||
{
|
||||
// Need to convert line feeds!
|
||||
if (file_buff->get_value(curr_offset) == '"' &&
|
||||
((file_buff->get_value(curr_offset + 1) == ',') ||
|
||||
(curr_offset == end_offset -1 )))
|
||||
curr_offset++; // Incrementpast the first quote
|
||||
|
||||
for(;curr_offset < end_offset; curr_offset++)
|
||||
{
|
||||
curr_offset+= 2; // Move past the , and the "
|
||||
break;
|
||||
}
|
||||
if (file_buff->get_value(curr_offset) == '\\' &&
|
||||
curr_offset != (end_offset - 1))
|
||||
{
|
||||
curr_offset++;
|
||||
if (file_buff->get_value(curr_offset) == 'r')
|
||||
buffer.append('\r');
|
||||
else if (file_buff->get_value(curr_offset) == 'n' )
|
||||
buffer.append('\n');
|
||||
else if ((file_buff->get_value(curr_offset) == '\\') ||
|
||||
(file_buff->get_value(curr_offset) == '"'))
|
||||
buffer.append(file_buff->get_value(curr_offset));
|
||||
else /* This could only happed with an externally created file */
|
||||
// Need to convert line feeds!
|
||||
if (file_buff->get_value(curr_offset) == '"' &&
|
||||
((file_buff->get_value(curr_offset + 1) == ',') ||
|
||||
(curr_offset == end_offset -1 )))
|
||||
{
|
||||
buffer.append('\\');
|
||||
curr_offset+= 2; // Move past the , and the "
|
||||
break;
|
||||
}
|
||||
if (file_buff->get_value(curr_offset) == '\\' &&
|
||||
curr_offset != (end_offset - 1))
|
||||
{
|
||||
curr_offset++;
|
||||
if (file_buff->get_value(curr_offset) == 'r')
|
||||
buffer.append('\r');
|
||||
else if (file_buff->get_value(curr_offset) == 'n' )
|
||||
buffer.append('\n');
|
||||
else if ((file_buff->get_value(curr_offset) == '\\') ||
|
||||
(file_buff->get_value(curr_offset) == '"'))
|
||||
buffer.append(file_buff->get_value(curr_offset));
|
||||
else /* This could only happed with an externally created file */
|
||||
{
|
||||
buffer.append('\\');
|
||||
buffer.append(file_buff->get_value(curr_offset));
|
||||
}
|
||||
}
|
||||
else // ordinary symbol
|
||||
{
|
||||
/*
|
||||
We are at final symbol and no last quote was found =>
|
||||
we are working with a damaged file.
|
||||
*/
|
||||
if (curr_offset == end_offset - 1)
|
||||
goto err;
|
||||
buffer.append(file_buff->get_value(curr_offset));
|
||||
}
|
||||
}
|
||||
else // ordinary symbol
|
||||
}
|
||||
else if (my_isdigit(system_charset_info,
|
||||
file_buff->get_value(curr_offset)))
|
||||
{
|
||||
for(;curr_offset < end_offset; curr_offset++)
|
||||
{
|
||||
/*
|
||||
We are at final symbol and no last quote was found =>
|
||||
we are working with a damaged file.
|
||||
*/
|
||||
if (curr_offset == end_offset - 1)
|
||||
if (file_buff->get_value(curr_offset) == ',')
|
||||
{
|
||||
curr_offset+= 1; // Move past the ,
|
||||
break;
|
||||
}
|
||||
|
||||
if (my_isdigit(system_charset_info, file_buff->get_value(curr_offset)))
|
||||
buffer.append(file_buff->get_value(curr_offset));
|
||||
else if (file_buff->get_value(curr_offset) == '.')
|
||||
buffer.append(file_buff->get_value(curr_offset));
|
||||
else
|
||||
goto err;
|
||||
buffer.append(file_buff->get_value(curr_offset));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (bitmap_is_set(table->read_set, (*field)->field_index))
|
||||
(*field)->store(buffer.ptr(), buffer.length(), system_charset_info);
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue