From 600c42ea8619b361388b32db35c9eb51f7385adb Mon Sep 17 00:00:00 2001 From: Oleksandr Byelkin Date: Wed, 16 Oct 2024 13:28:20 +0200 Subject: [PATCH] MDEV-34883 LOAD DATA INFILE with geometry data fails We write field using field data charset, so we should read it using the field charset. --- mysql-test/main/loaddata.result | 41 +++++++++++++++++++++++ mysql-test/main/loaddata.test | 58 +++++++++++++++++++++++++++++++++ sql/sql_load.cc | 18 +++++++--- 3 files changed, 112 insertions(+), 5 deletions(-) diff --git a/mysql-test/main/loaddata.result b/mysql-test/main/loaddata.result index d00eb034505..a8ac29fdfa0 100644 --- a/mysql-test/main/loaddata.result +++ b/mysql-test/main/loaddata.result @@ -686,3 +686,44 @@ SELECT * FROM t1; id 1 DROP TABLE t1; +# +# MDEV-34883: LOAD DATA INFILE with geometry data fails +# +CREATE OR REPLACE TABLE t1 ( +p point NOT NULL +) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; +INSERT INTO t1 VALUES (GeomFromText('POINT(37.646944 -75.761111)')); +SELECT * FROM t1 INTO OUTFILE 'MYSQLTEST_VARDIR/tmp/t1.tsv'; +Warnings: +Warning 1287 ' INTO FROM...' instead +CREATE OR REPLACE TABLE t2 LIKE t1; +LOAD DATA INFILE 'MYSQLTEST_VARDIR/tmp/t1.tsv' INTO TABLE t2; +drop table t1, t2; +CREATE OR REPLACE TABLE t1 ( +p point NOT NULL, +chr char(20) +) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; +INSERT INTO t1 VALUES (GeomFromText('POINT(37.646944 -75.761111)'),"їєі"); +SELECT * FROM t1 INTO OUTFILE 'MYSQLTEST_VARDIR/tmp/t1.tsv'; +Warnings: +Warning 1287 ' INTO FROM...' instead +CREATE OR REPLACE TABLE t2 LIKE t1; +LOAD DATA INFILE 'MYSQLTEST_VARDIR/tmp/t1.tsv' INTO TABLE t2 CHARACTER SET latin1; +select ST_AsText(p), chr from t1; +ST_AsText(p) chr +POINT(37.646944 -75.761111) їєі +# incorrect string (but correct geom) +select ST_AsText(p), chr from t2; +ST_AsText(p) chr +POINT(37.646944 -75.761111) їєі +delete from t2; +LOAD DATA INFILE 'MYSQLTEST_VARDIR/tmp/t1.tsv' INTO TABLE t2 CHARACTER SET utf8mb4; +select ST_AsText(p), chr from t1; +ST_AsText(p) chr +POINT(37.646944 -75.761111) їєі +# all is correct +select ST_AsText(p), chr from t2; +ST_AsText(p) chr +POINT(37.646944 -75.761111) їєі +drop table t1, t2; +# End of 10.5 tests diff --git a/mysql-test/main/loaddata.test b/mysql-test/main/loaddata.test index ef1496787d4..e88a9b3cf01 100644 --- a/mysql-test/main/loaddata.test +++ b/mysql-test/main/loaddata.test @@ -832,3 +832,61 @@ CREATE TABLE t1 (id integer not null auto_increment primary key); LOAD DATA INFILE '../../std_data/loaddata/nl.txt' INTO TABLE t1 FIELDS TERMINATED BY ''; SELECT * FROM t1; DROP TABLE t1; + + +--echo # +--echo # MDEV-34883: LOAD DATA INFILE with geometry data fails +--echo # + +CREATE OR REPLACE TABLE t1 ( + p point NOT NULL +) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; +INSERT INTO t1 VALUES (GeomFromText('POINT(37.646944 -75.761111)')); + +--disable_ps2_protocol +--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR +eval SELECT * FROM t1 INTO OUTFILE '$MYSQLTEST_VARDIR/tmp/t1.tsv'; +--enable_ps2_protocol + +CREATE OR REPLACE TABLE t2 LIKE t1; + +--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR +eval LOAD DATA INFILE '$MYSQLTEST_VARDIR/tmp/t1.tsv' INTO TABLE t2; + +--remove_file '$MYSQLTEST_VARDIR/tmp/t1.tsv' +drop table t1, t2; + + +CREATE OR REPLACE TABLE t1 ( + p point NOT NULL, + chr char(20) +) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +INSERT INTO t1 VALUES (GeomFromText('POINT(37.646944 -75.761111)'),"їєі"); +--disable_ps2_protocol +--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR +eval SELECT * FROM t1 INTO OUTFILE '$MYSQLTEST_VARDIR/tmp/t1.tsv'; +--enable_ps2_protocol + +CREATE OR REPLACE TABLE t2 LIKE t1; + +--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR +eval LOAD DATA INFILE '$MYSQLTEST_VARDIR/tmp/t1.tsv' INTO TABLE t2 CHARACTER SET latin1; + +select ST_AsText(p), chr from t1; +--echo # incorrect string (but correct geom) +select ST_AsText(p), chr from t2; + +delete from t2; + +--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR +eval LOAD DATA INFILE '$MYSQLTEST_VARDIR/tmp/t1.tsv' INTO TABLE t2 CHARACTER SET utf8mb4; + +select ST_AsText(p), chr from t1; +--echo # all is correct +select ST_AsText(p), chr from t2; + +--remove_file '$MYSQLTEST_VARDIR/tmp/t1.tsv' +drop table t1, t2; + +--echo # End of 10.5 tests diff --git a/sql/sql_load.cc b/sql/sql_load.cc index 7329a7d2db5..3ccb6f48994 100644 --- a/sql/sql_load.cc +++ b/sql/sql_load.cc @@ -245,7 +245,7 @@ public: String &field_term,String &line_start,String &line_term, String &enclosed,int escape,bool get_it_from_net, bool is_fifo); ~READ_INFO(); - int read_field(); + int read_field(CHARSET_INFO *cs); int read_fixed_length(void); int next_line(void); char unescape(char chr); @@ -1124,7 +1124,15 @@ read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list, { uint length; uchar *pos; - if (read_info.read_field()) + CHARSET_INFO *cs; + /* + Avoiding of handling binary data as a text + */ + if(item->charset_for_protocol() == &my_charset_bin) + cs= &my_charset_bin; + else + cs= read_info.charset(); + if (read_info.read_field(cs)) break; /* If this line is to be skipped we don't want to fill field or var */ @@ -1497,7 +1505,7 @@ inline bool READ_INFO::terminator(const uchar *ptr, uint length) must make sure to use escapes properly. */ -int READ_INFO::read_field() +int READ_INFO::read_field(CHARSET_INFO *cs) { int chr,found_enclosed_char; @@ -1533,7 +1541,7 @@ int READ_INFO::read_field() for (;;) { // Make sure we have enough space for the longest multi-byte character. - while (data.length() + charset()->mbmaxlen <= data.alloced_length()) + while (data.length() + cs->mbmaxlen <= data.alloced_length()) { chr = GET; if (chr == my_b_EOF) @@ -1619,7 +1627,7 @@ int READ_INFO::read_field() } } data.append(chr); - if (charset()->use_mb() && read_mbtail(&data)) + if (cs->use_mb() && read_mbtail(&data)) goto found_eof; } /*