Bug#15377 Valid multibyte sequences are truncated on INSERT

ctype-euc_kr.c:
ctype-gb2312.c:
  Adding specific well_formed_length functions
  for gb2312 and euckr, to allow storing characters
  which are correct according to the character set
  specifications but just don't have Unicode mapping.
  Previously only those which have Unicode mapping
  could be stored, while unassigned characters lead
  to data truncation.
Many files:
  new file


strings/ctype-gb2312.c:
  Bug#15377 Valid multibyte sequences are truncated on INSERT
  Adding specific well_formed_length functions
  for gb2312 and euckr, to allow storing characters
  which are correct according to the character set.
  Previously only those which have Unicode mapping
  could be stored.
strings/ctype-euc_kr.c:
  Adding specific well_formed_length functions
  for gb2312 and euckr, to allow storing characters
  which are correct according to the character set.
  Previously only those which have Unicode mapping
  could be stored.
This commit is contained in:
unknown 2005-12-09 16:37:58 +04:00
commit 7063bd4d2b
10 changed files with 484 additions and 2 deletions

View file

@ -8635,6 +8635,41 @@ my_mb_wc_euc_kr(CHARSET_INFO *cs __attribute__((unused)),
}
/*
Returns well formed length of a EUC-KR string.
*/
static uint
my_well_formed_len_euckr(CHARSET_INFO *cs __attribute__((unused)),
const char *b, const char *e,
uint pos, int *error)
{
const char *b0= b;
const char *emb= e - 1; /* Last possible end of an MB character */
*error= 0;
while (pos-- && b < e)
{
if ((uchar) b[0] < 128)
{
/* Single byte ascii character */
b++;
}
else if (b < emb && iseuc_kr(*b) && iseuc_kr(b[1]))
{
/* Double byte character */
b+= 2;
}
else
{
/* Wrong byte sequence */
*error= 1;
break;
}
}
return (uint) (b - b0);
}
static MY_COLLATION_HANDLER my_collation_ci_handler =
{
NULL, /* init */
@ -8655,7 +8690,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
mbcharlen_euc_kr,
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_mb,
my_well_formed_len_euckr,
my_lengthsp_8bit,
my_numcells_8bit,
my_mb_wc_euc_kr, /* mb_wc */

View file

@ -5686,6 +5686,41 @@ my_mb_wc_gb2312(CHARSET_INFO *cs __attribute__((unused)),
}
/*
Returns well formed length of a EUC-KR string.
*/
static uint
my_well_formed_len_gb2312(CHARSET_INFO *cs __attribute__((unused)),
const char *b, const char *e,
uint pos, int *error)
{
const char *b0= b;
const char *emb= e - 1; /* Last possible end of an MB character */
*error= 0;
while (pos-- && b < e)
{
if ((uchar) b[0] < 128)
{
/* Single byte ascii character */
b++;
}
else if (b < emb && isgb2312head(*b) && isgb2312tail(b[1]))
{
/* Double byte character */
b+= 2;
}
else
{
/* Wrong byte sequence */
*error= 1;
break;
}
}
return (uint) (b - b0);
}
static MY_COLLATION_HANDLER my_collation_ci_handler =
{
NULL, /* init */
@ -5706,7 +5741,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
mbcharlen_gb2312,
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_mb,
my_well_formed_len_gb2312,
my_lengthsp_8bit,
my_numcells_8bit,
my_mb_wc_gb2312, /* mb_wc */