mirror of
https://github.com/MariaDB/server.git
synced 2026-05-15 03:17:20 +02:00
Bug#15377 Valid multibyte sequences are truncated on INSERT
ctype-euc_kr.c: ctype-gb2312.c: Adding specific well_formed_length functions for gb2312 and euckr, to allow storing characters which are correct according to the character set specifications but just don't have Unicode mapping. Previously only those which have Unicode mapping could be stored, while unassigned characters lead to data truncation. Many files: new file strings/ctype-gb2312.c: Bug#15377 Valid multibyte sequences are truncated on INSERT Adding specific well_formed_length functions for gb2312 and euckr, to allow storing characters which are correct according to the character set. Previously only those which have Unicode mapping could be stored. strings/ctype-euc_kr.c: Adding specific well_formed_length functions for gb2312 and euckr, to allow storing characters which are correct according to the character set. Previously only those which have Unicode mapping could be stored.
This commit is contained in:
parent
5aeb69296a
commit
7063bd4d2b
10 changed files with 484 additions and 2 deletions
|
|
@ -8635,6 +8635,41 @@ my_mb_wc_euc_kr(CHARSET_INFO *cs __attribute__((unused)),
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
Returns well formed length of a EUC-KR string.
|
||||
*/
|
||||
static uint
|
||||
my_well_formed_len_euckr(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const char *b, const char *e,
|
||||
uint pos, int *error)
|
||||
{
|
||||
const char *b0= b;
|
||||
const char *emb= e - 1; /* Last possible end of an MB character */
|
||||
|
||||
*error= 0;
|
||||
while (pos-- && b < e)
|
||||
{
|
||||
if ((uchar) b[0] < 128)
|
||||
{
|
||||
/* Single byte ascii character */
|
||||
b++;
|
||||
}
|
||||
else if (b < emb && iseuc_kr(*b) && iseuc_kr(b[1]))
|
||||
{
|
||||
/* Double byte character */
|
||||
b+= 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Wrong byte sequence */
|
||||
*error= 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return (uint) (b - b0);
|
||||
}
|
||||
|
||||
|
||||
static MY_COLLATION_HANDLER my_collation_ci_handler =
|
||||
{
|
||||
NULL, /* init */
|
||||
|
|
@ -8655,7 +8690,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
|||
mbcharlen_euc_kr,
|
||||
my_numchars_mb,
|
||||
my_charpos_mb,
|
||||
my_well_formed_len_mb,
|
||||
my_well_formed_len_euckr,
|
||||
my_lengthsp_8bit,
|
||||
my_numcells_8bit,
|
||||
my_mb_wc_euc_kr, /* mb_wc */
|
||||
|
|
|
|||
|
|
@ -5686,6 +5686,41 @@ my_mb_wc_gb2312(CHARSET_INFO *cs __attribute__((unused)),
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
Returns well formed length of a EUC-KR string.
|
||||
*/
|
||||
static uint
|
||||
my_well_formed_len_gb2312(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const char *b, const char *e,
|
||||
uint pos, int *error)
|
||||
{
|
||||
const char *b0= b;
|
||||
const char *emb= e - 1; /* Last possible end of an MB character */
|
||||
|
||||
*error= 0;
|
||||
while (pos-- && b < e)
|
||||
{
|
||||
if ((uchar) b[0] < 128)
|
||||
{
|
||||
/* Single byte ascii character */
|
||||
b++;
|
||||
}
|
||||
else if (b < emb && isgb2312head(*b) && isgb2312tail(b[1]))
|
||||
{
|
||||
/* Double byte character */
|
||||
b+= 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Wrong byte sequence */
|
||||
*error= 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return (uint) (b - b0);
|
||||
}
|
||||
|
||||
|
||||
static MY_COLLATION_HANDLER my_collation_ci_handler =
|
||||
{
|
||||
NULL, /* init */
|
||||
|
|
@ -5706,7 +5741,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
|||
mbcharlen_gb2312,
|
||||
my_numchars_mb,
|
||||
my_charpos_mb,
|
||||
my_well_formed_len_mb,
|
||||
my_well_formed_len_gb2312,
|
||||
my_lengthsp_8bit,
|
||||
my_numcells_8bit,
|
||||
my_mb_wc_gb2312, /* mb_wc */
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue