MDEV-7769 MY_CHARSET_INFO refactoring# On branch 10.2

Part 3 (final): removing MY_CHARSET_HANDLER::well_formed_len().
This commit is contained in:
Alexander Barkov 2016-10-10 14:36:09 +04:00
commit 5058ced5df
34 changed files with 130 additions and 342 deletions

View file

@ -6775,7 +6775,6 @@ static MY_CHARSET_HANDLER my_charset_big5_handler=
NULL, /* init */
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_big5,
my_lengthsp_8bit,
my_numcells_8bit,
my_mb_wc_big5, /* mb_wc */

View file

@ -535,7 +535,6 @@ static MY_CHARSET_HANDLER my_charset_handler=
NULL, /* init */
my_numchars_8bit,
my_charpos_8bit,
my_well_formed_len_8bit,
my_lengthsp_binary,
my_numcells_8bit,
my_mb_wc_bin,

View file

@ -34731,7 +34731,6 @@ static MY_CHARSET_HANDLER my_charset_handler=
NULL, /* init */
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_cp932,
my_lengthsp_8bit,
my_numcells_cp932,
my_mb_wc_cp932, /* mb_wc */

View file

@ -10021,7 +10021,6 @@ static MY_CHARSET_HANDLER my_charset_handler=
NULL, /* init */
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_euckr,
my_lengthsp_8bit,
my_numcells_8bit,
my_mb_wc_euc_kr, /* mb_wc */

View file

@ -67559,7 +67559,6 @@ static MY_CHARSET_HANDLER my_charset_handler=
NULL, /* init */
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_eucjpms,
my_lengthsp_8bit,
my_numcells_eucjpms,
my_mb_wc_eucjpms, /* mb_wc */

View file

@ -6426,7 +6426,6 @@ static MY_CHARSET_HANDLER my_charset_handler=
NULL, /* init */
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_gb2312,
my_lengthsp_8bit,
my_numcells_8bit,
my_mb_wc_gb2312, /* mb_wc */

View file

@ -10708,7 +10708,6 @@ static MY_CHARSET_HANDLER my_charset_handler=
NULL, /* init */
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_gbk,
my_lengthsp_8bit,
my_numcells_8bit,
my_mb_wc_gbk,

View file

@ -398,7 +398,6 @@ static MY_CHARSET_HANDLER my_charset_handler=
NULL, /* init */
my_numchars_8bit,
my_charpos_8bit,
my_well_formed_len_8bit,
my_lengthsp_8bit,
my_numcells_8bit,
my_mb_wc_latin1,

View file

@ -401,28 +401,6 @@ size_t my_charpos_mb(CHARSET_INFO *cs __attribute__((unused)),
}
size_t my_well_formed_len_mb(CHARSET_INFO *cs, const char *b, const char *e,
size_t pos, int *error)
{
const char *b_start= b;
*error= 0;
while (pos)
{
my_wc_t wc;
int mb_len;
if ((mb_len= cs->cset->mb_wc(cs, &wc, (uchar*) b, (uchar*) e)) <= 0)
{
*error= b < e ? 1 : 0;
break;
}
b+= mb_len;
pos--;
}
return (size_t) (b - b_start);
}
/*
Append a badly formed piece of string.
Bad bytes are fixed to '?'.

View file

@ -30,7 +30,6 @@
#ifdef DEFINE_ASIAN_ROUTINES
#define DEFINE_WELL_FORMED_LEN
#define DEFINE_WELL_FORMED_CHAR_LENGTH
#define DEFINE_CHARLEN
#define DEFINE_NATIVE_TO_MB_VARLEN
@ -96,73 +95,7 @@ MY_FUNCTION_NAME(charlen)(CHARSET_INFO *cs __attribute__((unused)),
/* Wrong byte sequence */
return MY_CS_ILSEQ;
}
#endif /* DEFINE_WELL_FORMED_LEN */
#ifdef DEFINE_WELL_FORMED_LEN
/**
Returns well formed length of a character string with
variable character length for character sets with:
- mbminlen == 1
- mbmaxlen == 2, 3, or 4
*/
static size_t
MY_FUNCTION_NAME(well_formed_len)(CHARSET_INFO *cs __attribute__((unused)),
const char *b, const char *e,
size_t nchars, int *error)
{
const char *b0= b;
DBUG_ASSERT(cs->mbminlen == 1);
DBUG_ASSERT(cs->mbmaxlen <= 4);
for (*error= 0 ; b < e && nchars-- ; )
{
if ((uchar) b[0] < 128)
{
b++; /* Single byte ASCII character */
continue;
}
if (b + 2 <= e && IS_MB2_CHAR(b[0], b[1]))
{
b+= 2; /* Double byte character */
continue;
}
#ifdef IS_MB3_CHAR
if (b + 3 <= e && IS_MB3_CHAR(b[0], b[1], b[2]))
{
b+= 3; /* Three-byte character */
continue;
}
#endif
#ifdef IS_MB4_CHAR
if (b + 4 <= e && IS_MB4_CHAR(b[0], b[1], b[2], b[3]))
{
b+= 4; /* Four-byte character */
continue;
}
#endif
#ifdef IS_8BIT_CHAR
if (IS_8BIT_CHAR(b[0]))
{
b++; /* Single byte non-ASCII character, e.g. half width kana in sjis */
continue;
}
#endif
/* Wrong byte sequence */
*error= 1;
break;
}
return b - b0;
}
#endif /* DEFINE_WELL_FORMED_LEN */
#endif /* DEFINE_CHARLEN */
#ifdef DEFINE_WELL_FORMED_CHAR_LENGTH

View file

@ -1143,16 +1143,6 @@ size_t my_charpos_8bit(CHARSET_INFO *cs __attribute__((unused)),
}
size_t my_well_formed_len_8bit(CHARSET_INFO *cs __attribute__((unused)),
const char *start, const char *end,
size_t nchars, int *error)
{
size_t nbytes= (size_t) (end-start);
*error= 0;
return MY_MIN(nbytes, nchars);
}
size_t
my_well_formed_char_length_8bit(CHARSET_INFO *cs __attribute__((unused)),
const char *start, const char *end,
@ -2064,7 +2054,6 @@ MY_CHARSET_HANDLER my_charset_8bit_handler=
my_cset_init_8bit,
my_numchars_8bit,
my_charpos_8bit,
my_well_formed_len_8bit,
my_lengthsp_8bit,
my_numcells_8bit,
my_mb_wc_8bit,

View file

@ -34110,7 +34110,6 @@ static MY_CHARSET_HANDLER my_charset_handler=
NULL, /* init */
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_sjis,
my_lengthsp_8bit,
my_numcells_sjis,
my_mb_wc_sjis, /* mb_wc */

View file

@ -880,7 +880,6 @@ static MY_CHARSET_HANDLER my_charset_handler=
NULL, /* init */
my_numchars_8bit,
my_charpos_8bit,
my_well_formed_len_8bit,
my_lengthsp_8bit,
my_numcells_8bit,
my_mb_wc_tis620, /* mb_wc */

View file

@ -1490,27 +1490,6 @@ my_charpos_utf16(CHARSET_INFO *cs,
}
static size_t
my_well_formed_len_utf16(CHARSET_INFO *cs,
const char *b, const char *e,
size_t nchars, int *error)
{
const char *b0= b;
uint charlen;
*error= 0;
for ( ; nchars; b+= charlen, nchars--)
{
if (!(charlen= my_ismbchar(cs, b, e)))
{
*error= b < e ? 1 : 0;
break;
}
}
return (size_t) (b - b0);
}
static int
my_wildcmp_utf16_ci(CHARSET_INFO *cs,
const char *str,const char *str_end,
@ -1629,7 +1608,6 @@ MY_CHARSET_HANDLER my_charset_utf16_handler=
NULL, /* init */
my_numchars_utf16,
my_charpos_utf16,
my_well_formed_len_utf16,
my_lengthsp_mb2,
my_numcells_mb,
my_utf16_uni, /* mb_wc */
@ -1963,7 +1941,6 @@ static MY_CHARSET_HANDLER my_charset_utf16le_handler=
NULL, /* init */
my_numchars_utf16,
my_charpos_utf16,
my_well_formed_len_utf16,
my_lengthsp_utf16le,
my_numcells_mb,
my_utf16le_uni, /* mb_wc */
@ -2636,34 +2613,6 @@ my_charpos_utf32(CHARSET_INFO *cs __attribute__((unused)),
}
static size_t
my_well_formed_len_utf32(CHARSET_INFO *cs __attribute__((unused)),
const char *b, const char *e,
size_t nchars, int *error)
{
/* Ensure string length is divisible by 4 */
const char *b0= b;
size_t length= e - b;
DBUG_ASSERT((length % 4) == 0);
*error= 0;
nchars*= 4;
if (length > nchars)
{
length= nchars;
e= b + nchars;
}
for (; b < e; b+= 4)
{
if (!IS_UTF32_MBHEAD4(b[0], b[1]))
{
*error= 1;
return b - b0;
}
}
return length;
}
static
void my_fill_utf32(CHARSET_INFO *cs,
char *s, size_t slen, int fill)
@ -2809,7 +2758,6 @@ MY_CHARSET_HANDLER my_charset_utf32_handler=
NULL, /* init */
my_numchars_utf32,
my_charpos_utf32,
my_well_formed_len_utf32,
my_lengthsp_utf32,
my_numcells_mb,
my_utf32_uni,
@ -3248,19 +3196,6 @@ size_t my_charpos_ucs2(CHARSET_INFO *cs __attribute__((unused)),
}
static
size_t my_well_formed_len_ucs2(CHARSET_INFO *cs __attribute__((unused)),
const char *b, const char *e,
size_t nchars, int *error)
{
/* Ensure string length is dividable with 2 */
size_t nbytes= ((size_t) (e-b)) & ~(size_t) 1;
*error= 0;
nchars*= 2;
return MY_MIN(nbytes, nchars);
}
static size_t
my_well_formed_char_length_ucs2(CHARSET_INFO *cs __attribute__((unused)),
const char *b, const char *e,
@ -3403,7 +3338,6 @@ MY_CHARSET_HANDLER my_charset_ucs2_handler=
NULL, /* init */
my_numchars_ucs2,
my_charpos_ucs2,
my_well_formed_len_ucs2,
my_lengthsp_mb2,
my_numcells_mb,
my_ucs2_uni, /* mb_wc */

View file

@ -67303,7 +67303,6 @@ static MY_CHARSET_HANDLER my_charset_handler=
NULL, /* init */
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_ujis,
my_lengthsp_8bit,
my_numcells_eucjp,
my_mb_wc_euc_jp, /* mb_wc */

View file

@ -5421,27 +5421,6 @@ int my_charlen_utf8(CHARSET_INFO *cs __attribute__((unused)),
return MY_CS_ILSEQ;
}
static size_t
my_well_formed_len_utf8(CHARSET_INFO *cs, const char *b, const char *e,
size_t pos, int *error)
{
const char *b_start= b;
*error= 0;
while (pos)
{
int mb_len;
if ((mb_len= my_charlen_utf8(cs, (uchar*) b, (uchar*) e)) <= 0)
{
*error= b < e ? 1 : 0;
break;
}
b+= mb_len;
pos--;
}
return (size_t) (b - b_start);
}
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8
#define CHARLEN(cs,str,end) my_charlen_utf8(cs,str,end)
@ -5656,7 +5635,6 @@ MY_CHARSET_HANDLER my_charset_utf8_handler=
NULL, /* init */
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_utf8,
my_lengthsp_8bit,
my_numcells_mb,
my_utf8_uni,
@ -7276,7 +7254,6 @@ static MY_CHARSET_HANDLER my_charset_filename_handler=
NULL, /* init */
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_mb,
my_lengthsp_8bit,
my_numcells_mb,
my_mb_wc_filename,
@ -7885,29 +7862,6 @@ my_charlen_utf8mb4(CHARSET_INFO *cs __attribute__((unused)),
}
static
size_t my_well_formed_len_utf8mb4(CHARSET_INFO *cs,
const char *b, const char *e,
size_t pos, int *error)
{
const char *b_start= b;
*error= 0;
while (pos)
{
int mb_len;
if ((mb_len= my_charlen_utf8mb4(cs, (uchar*) b, (uchar*) e)) <= 0)
{
*error= b < e ? 1 : 0;
break;
}
b+= mb_len;
pos--;
}
return (size_t) (b - b_start);
}
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8mb4
#define CHARLEN(cs,str,end) my_charlen_utf8mb4(cs,str,end)
#define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
@ -8033,7 +7987,6 @@ MY_CHARSET_HANDLER my_charset_utf8mb4_handler=
NULL, /* init */
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_utf8mb4,
my_lengthsp_8bit,
my_numcells_mb,
my_mb_wc_utf8mb4,

View file

@ -1138,7 +1138,9 @@ my_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
size_t
my_convert_fix(CHARSET_INFO *to_cs, char *to, size_t to_length,
CHARSET_INFO *from_cs, const char *from, size_t from_length,
size_t nchars, MY_STRCONV_STATUS *status)
size_t nchars,
MY_STRCOPY_STATUS *copy_status,
MY_STRCONV_STATUS *conv_status)
{
int cnvres;
my_wc_t wc;
@ -1151,8 +1153,8 @@ my_convert_fix(CHARSET_INFO *to_cs, char *to, size_t to_length,
DBUG_ASSERT(to_cs != &my_charset_bin);
DBUG_ASSERT(from_cs != &my_charset_bin);
status->m_native_copy_status.m_well_formed_error_pos= NULL;
status->m_cannot_convert_error_pos= NULL;
copy_status->m_well_formed_error_pos= NULL;
conv_status->m_cannot_convert_error_pos= NULL;
for ( ; nchars; nchars--)
{
@ -1161,8 +1163,8 @@ my_convert_fix(CHARSET_INFO *to_cs, char *to, size_t to_length,
from+= cnvres;
else if (cnvres == MY_CS_ILSEQ)
{
if (!status->m_native_copy_status.m_well_formed_error_pos)
status->m_native_copy_status.m_well_formed_error_pos= from;
if (!copy_status->m_well_formed_error_pos)
copy_status->m_well_formed_error_pos= from;
from++;
wc= '?';
}
@ -1172,8 +1174,8 @@ my_convert_fix(CHARSET_INFO *to_cs, char *to, size_t to_length,
A correct multibyte sequence detected
But it doesn't have Unicode mapping.
*/
if (!status->m_cannot_convert_error_pos)
status->m_cannot_convert_error_pos= from;
if (!conv_status->m_cannot_convert_error_pos)
conv_status->m_cannot_convert_error_pos= from;
from+= (-cnvres);
wc= '?';
}
@ -1182,8 +1184,8 @@ my_convert_fix(CHARSET_INFO *to_cs, char *to, size_t to_length,
if ((uchar *) from >= from_end)
break; // End of line
// Incomplete byte sequence
if (!status->m_native_copy_status.m_well_formed_error_pos)
status->m_native_copy_status.m_well_formed_error_pos= from;
if (!copy_status->m_well_formed_error_pos)
copy_status->m_well_formed_error_pos= from;
from++;
wc= '?';
}
@ -1192,8 +1194,8 @@ outp:
to+= cnvres;
else if (cnvres == MY_CS_ILUNI && wc != '?')
{
if (!status->m_cannot_convert_error_pos)
status->m_cannot_convert_error_pos= from_prev;
if (!conv_status->m_cannot_convert_error_pos)
conv_status->m_cannot_convert_error_pos= from_prev;
wc= '?';
goto outp;
}
@ -1203,6 +1205,6 @@ outp:
break;
}
}
status->m_native_copy_status.m_source_end_pos= from;
copy_status->m_source_end_pos= from;
return to - to_start;
}

View file

@ -205,8 +205,7 @@ static char *process_str_arg(CHARSET_INFO *cs, char *to, const char *end,
plen= strnlen(par, width);
if (left_len <= plen)
plen = left_len - 1;
plen= cs->cset->well_formed_len(cs, par, par + plen,
width, &well_formed_error);
plen= my_well_formed_length(cs, par, par + plen, width, &well_formed_error);
if (print_type & ESCAPED_ARG)
to= backtick_string(cs, to, end, par, plen, '`');
else