diff --git a/mysql-test/r/ctype_eucjpms.result b/mysql-test/r/ctype_eucjpms.result index cdb28cd0fdd..ad9666d0c86 100755 --- a/mysql-test/r/ctype_eucjpms.result +++ b/mysql-test/r/ctype_eucjpms.result @@ -9819,3 +9819,9 @@ eucjpms_bin 6109 eucjpms_bin 61 eucjpms_bin 6120 drop table t1; +select hex(convert(_eucjpms 0xA5FE41 using ucs2)); +hex(convert(_eucjpms 0xA5FE41 using ucs2)) +003F0041 +select hex(convert(_eucjpms 0x8FABF841 using ucs2)); +hex(convert(_eucjpms 0x8FABF841 using ucs2)) +003F0041 diff --git a/mysql-test/r/ctype_ujis.result b/mysql-test/r/ctype_ujis.result index e8ec1b2cbfb..091d96c56d3 100644 --- a/mysql-test/r/ctype_ujis.result +++ b/mysql-test/r/ctype_ujis.result @@ -2307,6 +2307,12 @@ select c1 as c2h from t1 where c1 like 'ab#_def' escape '#'; c2h ab_def drop table t1; +select hex(convert(_ujis 0xA5FE41 using ucs2)); +hex(convert(_ujis 0xA5FE41 using ucs2)) +003F0041 +select hex(convert(_ujis 0x8FABF841 using ucs2)); +hex(convert(_ujis 0x8FABF841 using ucs2)) +003F0041 DROP TABLE IF EXISTS t1, t2; DROP PROCEDURE IF EXISTS sp1; set names ujis; @@ -2337,9 +2343,3 @@ DROP TABLE t2; set names default; set character_set_database=default; set character_set_server=default; -select hex(convert(_ujis 0xA5FE41 using ucs2)); -hex(convert(_ujis 0xA5FE41 using ucs2)) -003F0041 -select hex(convert(_ujis 0x8FABF841 using ucs2)); -hex(convert(_ujis 0x8FABF841 using ucs2)) -003F0041 diff --git a/mysql-test/t/ctype_eucjpms.test b/mysql-test/t/ctype_eucjpms.test index 3609407fe96..8f813fbd82b 100644 --- a/mysql-test/t/ctype_eucjpms.test +++ b/mysql-test/t/ctype_eucjpms.test @@ -363,3 +363,20 @@ SET collation_connection='eucjpms_japanese_ci'; -- source include/ctype_filesort.inc SET collation_connection='eucjpms_bin'; -- source include/ctype_filesort.inc + + +# +# Bugs#15375: Unassigned multibyte codes are broken +# into parts when converting to Unicode. +# This query should return 0x003F0041. I.e. it should +# scan unassigned double-byte character 0xA5FE, convert +# it as QUESTION MARK 0x003F and then scan the next +# character, which is a single byte character 0x41. +# +select hex(convert(_eucjpms 0xA5FE41 using ucs2)); +# This one should return 0x003F0041: +# scan unassigned three-byte character 0x8FABF8, +# convert it as QUESTION MARK 0x003F and then scan +# the next character, which is a single byte character 0x41. +select hex(convert(_eucjpms 0x8FABF841 using ucs2)); + diff --git a/strings/ctype-cp932.c b/strings/ctype-cp932.c index 1e3126865f3..5f8a93b1c2b 100644 --- a/strings/ctype-cp932.c +++ b/strings/ctype-cp932.c @@ -5355,7 +5355,7 @@ my_mb_wc_cp932(CHARSET_INFO *cs __attribute__((unused)), int hi=s[0]; if (s >= e) - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL; if (hi < 0x80) { @@ -5370,10 +5370,10 @@ my_mb_wc_cp932(CHARSET_INFO *cs __attribute__((unused)), } if (s+2>e) - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL2; if (!(pwc[0]=func_cp932_uni_onechar((hi<<8)+s[1]))) - return MY_CS_ILSEQ; + return -2; return 2; } diff --git a/strings/ctype-eucjpms.c b/strings/ctype-eucjpms.c index f9210fcb10e..348eb2f6e87 100644 --- a/strings/ctype-eucjpms.c +++ b/strings/ctype-eucjpms.c @@ -243,7 +243,7 @@ my_mb_wc_jisx0201(CHARSET_INFO *cs __attribute__((unused)), const uchar *e __attribute__((unused))) { wc[0]=tab_jisx0201_uni[*s]; - return (!wc[0] && s[0]) ? MY_CS_ILSEQ : 1; + return (!wc[0] && s[0]) ? -1 : 1; } @@ -8473,7 +8473,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e) int c1,c2,c3; if (s >= e) - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL; c1=s[0]; @@ -8485,7 +8485,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e) } if (s+2>e) - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL2; c2=s[1]; @@ -8500,7 +8500,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e) { pwc[0]=my_jisx0208_uni_onechar( ((c1-0x80) << 8) + (c2-0x80)); if (!pwc[0]) - return MY_CS_ILSEQ; + return -2; } else { @@ -8520,7 +8520,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e) ret = my_mb_wc_jisx0201(cs,pwc,s+1,e); if (ret!=1) - return ret; + return -2; return 2; } @@ -8531,7 +8531,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e) return MY_CS_ILSEQ; if (s+3>e) - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL3; c3=s[2]; if (c3 < 0xA1 || c3>=0xFF) @@ -8540,8 +8540,8 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e) if (c2<0xF5) { pwc[0]=my_jisx0212_uni_onechar((c2-0x80)*256 + (c3-0x80)); - if (!pwc) - return MY_CS_ILSEQ; + if (!pwc[0]) + return -3; } else { @@ -8572,7 +8572,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e) if ((jp=my_uni_jisx0208_onechar(wc))) { if (s+2>e) - return MY_CS_TOOSMALL; + return MY_CS_TOOSMALL2; jp+=0x8080; s[0]=jp>>8; @@ -8584,7 +8584,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e) if (my_wc_mb_jisx0201(c,wc,s,e) == 1) { if (s+2>e) - return MY_CS_TOOSMALL; + return MY_CS_TOOSMALL2; s[1]= s[0]; s[0]= 0x8E; return 2; @@ -8594,7 +8594,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e) if ((jp=my_uni_jisx0212_onechar(wc))) { if (s+3>e) - return MY_CS_TOOSMALL; + return MY_CS_TOOSMALL3; jp+=0x8080; s[0]=0x8F; @@ -8608,7 +8608,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e) if (wc>=0xE000 && wc<0xE3AC) { if (s+2>e) - return MY_CS_TOOSMALL; + return MY_CS_TOOSMALL2; c1=((unsigned)(wc-0xE000)/94)+0xF5; s[0]=c1; @@ -8622,7 +8622,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e) if (wc>=0xE3AC && wc<0xE758) { if (s+3>e) - return MY_CS_TOOSMALL; + return MY_CS_TOOSMALL3; s[0]=0x8F; c1=((unsigned)(wc-0xE3AC)/94)+0xF5;