MDEV-34883 LOAD DATA INFILE with geometry data fails

We write field using field data charset, so we should read it
using the field charset.
This commit is contained in:
Oleksandr Byelkin 2024-10-16 13:28:20 +02:00
parent c00145de58
commit 600c42ea86
3 changed files with 112 additions and 5 deletions

View file

@ -686,3 +686,44 @@ SELECT * FROM t1;
id
1
DROP TABLE t1;
#
# MDEV-34883: LOAD DATA INFILE with geometry data fails
#
CREATE OR REPLACE TABLE t1 (
p point NOT NULL
) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
INSERT INTO t1 VALUES (GeomFromText('POINT(37.646944 -75.761111)'));
SELECT * FROM t1 INTO OUTFILE 'MYSQLTEST_VARDIR/tmp/t1.tsv';
Warnings:
Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
CREATE OR REPLACE TABLE t2 LIKE t1;
LOAD DATA INFILE 'MYSQLTEST_VARDIR/tmp/t1.tsv' INTO TABLE t2;
drop table t1, t2;
CREATE OR REPLACE TABLE t1 (
p point NOT NULL,
chr char(20)
) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
INSERT INTO t1 VALUES (GeomFromText('POINT(37.646944 -75.761111)'),"їєі");
SELECT * FROM t1 INTO OUTFILE 'MYSQLTEST_VARDIR/tmp/t1.tsv';
Warnings:
Warning 1287 '<select expression> INTO <destination>;' is deprecated and will be removed in a future release. Please use 'SELECT <select list> INTO <destination> FROM...' instead
CREATE OR REPLACE TABLE t2 LIKE t1;
LOAD DATA INFILE 'MYSQLTEST_VARDIR/tmp/t1.tsv' INTO TABLE t2 CHARACTER SET latin1;
select ST_AsText(p), chr from t1;
ST_AsText(p) chr
POINT(37.646944 -75.761111) їєі
# incorrect string (but correct geom)
select ST_AsText(p), chr from t2;
ST_AsText(p) chr
POINT(37.646944 -75.761111) їєі
delete from t2;
LOAD DATA INFILE 'MYSQLTEST_VARDIR/tmp/t1.tsv' INTO TABLE t2 CHARACTER SET utf8mb4;
select ST_AsText(p), chr from t1;
ST_AsText(p) chr
POINT(37.646944 -75.761111) їєі
# all is correct
select ST_AsText(p), chr from t2;
ST_AsText(p) chr
POINT(37.646944 -75.761111) їєі
drop table t1, t2;
# End of 10.5 tests

View file

@ -832,3 +832,61 @@ CREATE TABLE t1 (id integer not null auto_increment primary key);
LOAD DATA INFILE '../../std_data/loaddata/nl.txt' INTO TABLE t1 FIELDS TERMINATED BY '';
SELECT * FROM t1;
DROP TABLE t1;
--echo #
--echo # MDEV-34883: LOAD DATA INFILE with geometry data fails
--echo #
CREATE OR REPLACE TABLE t1 (
p point NOT NULL
) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
INSERT INTO t1 VALUES (GeomFromText('POINT(37.646944 -75.761111)'));
--disable_ps2_protocol
--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
eval SELECT * FROM t1 INTO OUTFILE '$MYSQLTEST_VARDIR/tmp/t1.tsv';
--enable_ps2_protocol
CREATE OR REPLACE TABLE t2 LIKE t1;
--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
eval LOAD DATA INFILE '$MYSQLTEST_VARDIR/tmp/t1.tsv' INTO TABLE t2;
--remove_file '$MYSQLTEST_VARDIR/tmp/t1.tsv'
drop table t1, t2;
CREATE OR REPLACE TABLE t1 (
p point NOT NULL,
chr char(20)
) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
INSERT INTO t1 VALUES (GeomFromText('POINT(37.646944 -75.761111)'),"їєі");
--disable_ps2_protocol
--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
eval SELECT * FROM t1 INTO OUTFILE '$MYSQLTEST_VARDIR/tmp/t1.tsv';
--enable_ps2_protocol
CREATE OR REPLACE TABLE t2 LIKE t1;
--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
eval LOAD DATA INFILE '$MYSQLTEST_VARDIR/tmp/t1.tsv' INTO TABLE t2 CHARACTER SET latin1;
select ST_AsText(p), chr from t1;
--echo # incorrect string (but correct geom)
select ST_AsText(p), chr from t2;
delete from t2;
--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
eval LOAD DATA INFILE '$MYSQLTEST_VARDIR/tmp/t1.tsv' INTO TABLE t2 CHARACTER SET utf8mb4;
select ST_AsText(p), chr from t1;
--echo # all is correct
select ST_AsText(p), chr from t2;
--remove_file '$MYSQLTEST_VARDIR/tmp/t1.tsv'
drop table t1, t2;
--echo # End of 10.5 tests

View file

@ -245,7 +245,7 @@ public:
String &field_term,String &line_start,String &line_term,
String &enclosed,int escape,bool get_it_from_net, bool is_fifo);
~READ_INFO();
int read_field();
int read_field(CHARSET_INFO *cs);
int read_fixed_length(void);
int next_line(void);
char unescape(char chr);
@ -1124,7 +1124,15 @@ read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
{
uint length;
uchar *pos;
if (read_info.read_field())
CHARSET_INFO *cs;
/*
Avoiding of handling binary data as a text
*/
if(item->charset_for_protocol() == &my_charset_bin)
cs= &my_charset_bin;
else
cs= read_info.charset();
if (read_info.read_field(cs))
break;
/* If this line is to be skipped we don't want to fill field or var */
@ -1497,7 +1505,7 @@ inline bool READ_INFO::terminator(const uchar *ptr, uint length)
must make sure to use escapes properly.
*/
int READ_INFO::read_field()
int READ_INFO::read_field(CHARSET_INFO *cs)
{
int chr,found_enclosed_char;
@ -1533,7 +1541,7 @@ int READ_INFO::read_field()
for (;;)
{
// Make sure we have enough space for the longest multi-byte character.
while (data.length() + charset()->mbmaxlen <= data.alloced_length())
while (data.length() + cs->mbmaxlen <= data.alloced_length())
{
chr = GET;
if (chr == my_b_EOF)
@ -1619,7 +1627,7 @@ int READ_INFO::read_field()
}
}
data.append(chr);
if (charset()->use_mb() && read_mbtail(&data))
if (cs->use_mb() && read_mbtail(&data))
goto found_eof;
}
/*