MDEV-6353 my_ismbchar() and my_mbcharlen() refactoring

This commit is contained in:
Alexander Barkov 2016-05-17 15:27:10 +04:00
parent 7e66a24dfb
commit e7ff281d2e
28 changed files with 118 additions and 270 deletions

View file

@ -6571,37 +6571,35 @@ int read_line(char *buf, int size)
if (!skip_char)
{
/* Could be a multibyte character */
/* This code is based on the code in "sql_load.cc" */
#ifdef USE_MB
int charlen = my_mbcharlen(charset_info, (unsigned char) c);
/* We give up if multibyte character is started but not */
/* completed before we pass buf_end */
if ((charlen > 1) && (p + charlen) <= buf_end)
*p++= c;
if (use_mb(charset_info))
{
int i;
char* mb_start = p;
*p++ = c;
for (i= 1; i < charlen; i++)
{
c= my_getc(cur_file->file);
if (feof(cur_file->file))
goto found_eof;
*p++ = c;
}
if (! my_ismbchar(charset_info, mb_start, p))
{
/* It was not a multiline char, push back the characters */
/* We leave first 'c', i.e. pretend it was a normal char */
while (p-1 > mb_start)
my_ungetc(*--p);
}
const char *mb_start= p - 1;
/* Could be a multibyte character */
/* See a similar code in "sql_load.cc" */
for ( ; p < buf_end; )
{
int charlen= my_charlen(charset_info, mb_start, p);
if (charlen > 0)
break; /* Full character */
if (MY_CS_IS_TOOSMALL(charlen))
{
/* We give up if multibyte character is started but not */
/* completed before we pass buf_end */
c= my_getc(cur_file->file);
if (feof(cur_file->file))
goto found_eof;
*p++ = c;
continue;
}
DBUG_ASSERT(charlen == MY_CS_ILSEQ);
/* It was not a multiline char, push back the characters */
/* We leave first 'c', i.e. pretend it was a normal char */
while (p - 1 > mb_start)
my_ungetc(*--p);
break;
}
}
else
#endif
*p++= c;
}
}
die("The input buffer is too small for this query.x\n" \

View file

@ -186,6 +186,7 @@ extern MY_UNI_CTYPE my_uni_ctype[256];
#define MY_SEQ_INTTAIL 1
#define MY_SEQ_SPACES 2
#define MY_SEQ_NONSPACES 3 /* Skip non-space characters, including bad bytes */
/* My charsets_list flags */
#define MY_CS_COMPILED 1 /* compiled-in sets */
@ -403,7 +404,6 @@ struct my_charset_handler_st
{
my_bool (*init)(struct charset_info_st *, MY_CHARSET_LOADER *loader);
/* Multibyte routines */
uint (*mbcharlen)(CHARSET_INFO *, uint c);
size_t (*numchars)(CHARSET_INFO *, const char *b, const char *e);
size_t (*charpos)(CHARSET_INFO *, const char *b, const char *e,
size_t pos);
@ -779,7 +779,6 @@ size_t my_well_formed_char_length_8bit(CHARSET_INFO *cs,
size_t nchars,
MY_STRCOPY_STATUS *status);
int my_charlen_8bit(CHARSET_INFO *, const uchar *str, const uchar *end);
uint my_mbcharlen_8bit(CHARSET_INFO *, uint c);
/* Functions for multibyte charsets */
@ -1010,11 +1009,19 @@ int my_charlen(CHARSET_INFO *cs, const char *str, const char *end)
return (cs->cset->charlen)(cs, (const uchar *) str,
(const uchar *) end);
}
#ifdef USE_MB
#define my_mbcharlen(s, a) ((s)->cset->mbcharlen((s),(a)))
#else
#define my_mbcharlen(s, a) 1
#endif
/**
Convert broken and incomplete byte sequences to 1 byte.
*/
static inline
uint my_charlen_fix(CHARSET_INFO *cs, const char *str, const char *end)
{
int char_length= my_charlen(cs, str, end);
DBUG_ASSERT(str < end);
return char_length > 0 ? (uint) char_length : (uint) 1U;
}
#define my_caseup_str(s, a) ((s)->cset->caseup_str((s), (a)))
#define my_casedn_str(s, a) ((s)->cset->casedn_str((s), (a)))

View file

@ -54,6 +54,12 @@ get_collation_number_internal(const char *name)
}
static my_bool is_multi_byte_ident(CHARSET_INFO *cs, uchar ch)
{
int chlen= my_charlen(cs, (const char *) &ch, (const char *) &ch + 1);
return MY_CS_IS_TOOSMALL(chlen) ? TRUE : FALSE;
}
static my_bool init_state_maps(struct charset_info_st *cs)
{
uint i;
@ -73,10 +79,8 @@ static my_bool init_state_maps(struct charset_info_st *cs)
state_map[i]=(uchar) MY_LEX_IDENT;
else if (my_isdigit(cs,i))
state_map[i]=(uchar) MY_LEX_NUMBER_IDENT;
#if defined(USE_MB) && defined(USE_MB_IDENT)
else if (my_mbcharlen(cs, i)>1)
else if (is_multi_byte_ident(cs, i))
state_map[i]=(uchar) MY_LEX_IDENT;
#endif
else if (my_isspace(cs,i))
state_map[i]=(uchar) MY_LEX_SKIP;
else
@ -902,15 +906,13 @@ size_t escape_string_for_mysql(CHARSET_INFO *charset_info,
const char *to_start= to;
const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length);
my_bool overflow= FALSE;
#ifdef USE_MB
my_bool use_mb_flag= use_mb(charset_info);
#endif
for (end= from + length; from < end; from++)
{
char escape= 0;
#ifdef USE_MB
int tmp_length;
if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end)))
int tmp_length= use_mb(charset_info) ? my_charlen(charset_info, from, end) :
1;
if (tmp_length > 1)
{
if (to + tmp_length > to_end)
{
@ -933,7 +935,7 @@ size_t escape_string_for_mysql(CHARSET_INFO *charset_info,
multi-byte character into a valid one. For example, 0xbf27 is not
a valid GBK character, but 0xbf5c is. (0x27 = ', 0x5c = \)
*/
if (use_mb_flag && (tmp_length= my_mbcharlen(charset_info, *from)) > 1)
if (tmp_length < 1) /* Bad byte sequence */
escape= *from;
else
#endif

View file

@ -184,7 +184,7 @@ static bool set_one_value(ha_create_table_option *opt,
{
for (end=start;
*end && *end != ',';
end+= my_mbcharlen(system_charset_info, *end)) /* no-op */;
end++) /* no-op */;
if (!my_strnncoll(system_charset_info,
(uchar*)start, end-start,
(uchar*)value->str, value->length))

View file

@ -847,16 +847,16 @@ static bool debug_sync_set_action(THD *thd, st_debug_sync_action *action)
to the string terminator ASCII NUL ('\0').
*/
static char *debug_sync_token(char **token_p, uint *token_length_p, char *ptr)
static char *debug_sync_token(char **token_p, uint *token_length_p,
char *ptr, char *ptrend)
{
DBUG_ASSERT(token_p);
DBUG_ASSERT(token_length_p);
DBUG_ASSERT(ptr);
/* Skip leading space */
while (my_isspace(system_charset_info, *ptr))
ptr+= my_mbcharlen(system_charset_info, (uchar) *ptr);
ptr+= system_charset_info->cset->scan(system_charset_info,
ptr, ptrend, MY_SEQ_SPACES);
if (!*ptr)
{
ptr= NULL;
@ -867,8 +867,8 @@ static char *debug_sync_token(char **token_p, uint *token_length_p, char *ptr)
*token_p= ptr;
/* Find token end. */
while (*ptr && !my_isspace(system_charset_info, *ptr))
ptr+= my_mbcharlen(system_charset_info, (uchar) *ptr);
ptr+= system_charset_info->cset->scan(system_charset_info,
ptr, ptrend, MY_SEQ_NONSPACES);
/* Get token length. */
*token_length_p= ptr - *token_p;
@ -876,8 +876,9 @@ static char *debug_sync_token(char **token_p, uint *token_length_p, char *ptr)
/* If necessary, terminate token. */
if (*ptr)
{
DBUG_ASSERT(ptr < ptrend);
/* Get terminator character length. */
uint mbspacelen= my_mbcharlen(system_charset_info, (uchar) *ptr);
uint mbspacelen= my_charlen_fix(system_charset_info, ptr, ptrend);
/* Terminate token. */
*ptr= '\0';
@ -886,8 +887,8 @@ static char *debug_sync_token(char **token_p, uint *token_length_p, char *ptr)
ptr+= mbspacelen;
/* Skip trailing space */
while (my_isspace(system_charset_info, *ptr))
ptr+= my_mbcharlen(system_charset_info, (uchar) *ptr);
ptr+= system_charset_info->cset->scan(system_charset_info,
ptr, ptrend, MY_SEQ_SPACES);
}
end:
@ -917,7 +918,8 @@ static char *debug_sync_token(char **token_p, uint *token_length_p, char *ptr)
undefined in this case.
*/
static char *debug_sync_number(ulong *number_p, char *actstrptr)
static char *debug_sync_number(ulong *number_p, char *actstrptr,
char *actstrend)
{
char *ptr;
char *ept;
@ -927,7 +929,7 @@ static char *debug_sync_number(ulong *number_p, char *actstrptr)
DBUG_ASSERT(actstrptr);
/* Get token from string. */
if (!(ptr= debug_sync_token(&token, &token_length, actstrptr)))
if (!(ptr= debug_sync_token(&token, &token_length, actstrptr, actstrend)))
goto end;
*number_p= strtoul(token, &ept, 10);
@ -971,7 +973,7 @@ static char *debug_sync_number(ulong *number_p, char *actstrptr)
for the string.
*/
static bool debug_sync_eval_action(THD *thd, char *action_str)
static bool debug_sync_eval_action(THD *thd, char *action_str, char *action_end)
{
st_debug_sync_action *action= NULL;
const char *errmsg;
@ -986,7 +988,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
/*
Get debug sync point name. Or a special command.
*/
if (!(ptr= debug_sync_token(&token, &token_length, action_str)))
if (!(ptr= debug_sync_token(&token, &token_length, action_str, action_end)))
{
errmsg= "Missing synchronization point name";
goto err;
@ -1009,7 +1011,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
/*
Get kind of action to be taken at sync point.
*/
if (!(ptr= debug_sync_token(&token, &token_length, ptr)))
if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end)))
{
/* No action present. Try special commands. Token unchanged. */
@ -1090,7 +1092,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
if (!my_strcasecmp(system_charset_info, token, "SIGNAL"))
{
/* It is SIGNAL. Signal name must follow. */
if (!(ptr= debug_sync_token(&token, &token_length, ptr)))
if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end)))
{
errmsg= "Missing signal name after action SIGNAL";
goto err;
@ -1108,7 +1110,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
action->execute= 1;
/* Get next token. If none follows, set action. */
if (!(ptr= debug_sync_token(&token, &token_length, ptr)))
if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end)))
goto set_action;
}
@ -1118,7 +1120,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
if (!my_strcasecmp(system_charset_info, token, "WAIT_FOR"))
{
/* It is WAIT_FOR. Wait_for signal name must follow. */
if (!(ptr= debug_sync_token(&token, &token_length, ptr)))
if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end)))
{
errmsg= "Missing signal name after action WAIT_FOR";
goto err;
@ -1137,7 +1139,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
action->timeout= opt_debug_sync_timeout;
/* Get next token. If none follows, set action. */
if (!(ptr= debug_sync_token(&token, &token_length, ptr)))
if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end)))
goto set_action;
/*
@ -1146,14 +1148,14 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
if (!my_strcasecmp(system_charset_info, token, "TIMEOUT"))
{
/* It is TIMEOUT. Number must follow. */
if (!(ptr= debug_sync_number(&action->timeout, ptr)))
if (!(ptr= debug_sync_number(&action->timeout, ptr, action_end)))
{
errmsg= "Missing valid number after TIMEOUT";
goto err;
}
/* Get next token. If none follows, set action. */
if (!(ptr= debug_sync_token(&token, &token_length, ptr)))
if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end)))
goto set_action;
}
}
@ -1174,14 +1176,14 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
}
/* Number must follow. */
if (!(ptr= debug_sync_number(&action->execute, ptr)))
if (!(ptr= debug_sync_number(&action->execute, ptr, action_end)))
{
errmsg= "Missing valid number after EXECUTE";
goto err;
}
/* Get next token. If none follows, set action. */
if (!(ptr= debug_sync_token(&token, &token_length, ptr)))
if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end)))
goto set_action;
}
@ -1191,14 +1193,14 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
if (!my_strcasecmp(system_charset_info, token, "HIT_LIMIT"))
{
/* Number must follow. */
if (!(ptr= debug_sync_number(&action->hit_limit, ptr)))
if (!(ptr= debug_sync_number(&action->hit_limit, ptr, action_end)))
{
errmsg= "Missing valid number after HIT_LIMIT";
goto err;
}
/* Get next token. If none follows, set action. */
if (!(ptr= debug_sync_token(&token, &token_length, ptr)))
if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end)))
goto set_action;
}
@ -1246,7 +1248,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
terminators in the string. So we need to take a copy here.
*/
bool debug_sync_update(THD *thd, char *val_str)
bool debug_sync_update(THD *thd, char *val_str, size_t len)
{
DBUG_ENTER("debug_sync_update");
DBUG_PRINT("debug_sync", ("set action: '%s'", val_str));
@ -1255,8 +1257,9 @@ bool debug_sync_update(THD *thd, char *val_str)
debug_sync_eval_action() places '\0' in the string, which itself
must be '\0' terminated.
*/
DBUG_ASSERT(val_str[len] == '\0');
DBUG_RETURN(opt_debug_sync_timeout ?
debug_sync_eval_action(thd, val_str) :
debug_sync_eval_action(thd, val_str, val_str + len) :
FALSE);
}
@ -1592,7 +1595,7 @@ bool debug_sync_set_action(THD *thd, const char *action_str, size_t len)
DBUG_ASSERT(action_str);
value= strmake_root(thd->mem_root, action_str, len);
rc= debug_sync_eval_action(thd, value);
rc= debug_sync_eval_action(thd, value, value + len);
DBUG_RETURN(rc);
}

View file

@ -45,6 +45,9 @@ extern void debug_sync_init_thread(THD *thd);
extern void debug_sync_end_thread(THD *thd);
extern bool debug_sync_set_action(THD *thd, const char *action_str, size_t len);
extern bool debug_sync_update(THD *thd, char *val_str, size_t len);
extern uchar *debug_sync_value_ptr(THD *thd);
#endif /* defined(ENABLED_DEBUG_SYNC) */
#endif /* DEBUG_SYNC_INCLUDED */

View file

@ -3235,7 +3235,7 @@ int select_export::send_data(List<Item> &items)
if ((NEED_ESCAPING(*pos) ||
(check_second_byte &&
my_mbcharlen(character_set_client, (uchar) *pos) == 2 &&
((uchar) *pos) > 0x7F /* a potential MB2HEAD */ &&
pos + 1 < end &&
NEED_ESCAPING(pos[1]))) &&
/*

View file

@ -1434,6 +1434,9 @@ public:
};
#if defined(ENABLED_DEBUG_SYNC)
#include "debug_sync.h"
/**
The class for @@debug_sync session-only variable
*/
@ -1462,15 +1465,19 @@ public:
String str(buff, sizeof(buff), system_charset_info), *res;
if (!(res=var->value->val_str(&str)))
var->save_result.string_value.str= const_cast<char*>("");
var->save_result.string_value= empty_lex_str;
else
var->save_result.string_value.str= thd->strmake(res->ptr(), res->length());
{
if (!thd->make_lex_string(&var->save_result.string_value,
res->ptr(), res->length()))
return true;
}
return false;
}
bool session_update(THD *thd, set_var *var)
{
extern bool debug_sync_update(THD *thd, char *val_str);
return debug_sync_update(thd, var->save_result.string_value.str);
return debug_sync_update(thd, var->save_result.string_value.str,
var->save_result.string_value.length);
}
bool global_update(THD *thd, set_var *var)
{
@ -1488,7 +1495,6 @@ public:
}
uchar *session_value_ptr(THD *thd, const LEX_STRING *base)
{
extern uchar *debug_sync_value_ptr(THD *thd);
return debug_sync_value_ptr(thd);
}
uchar *global_value_ptr(THD *thd, const LEX_STRING *base)

View file

@ -561,8 +561,7 @@ static bool append_ident(String *string, const char *name, size_t length,
for (name_end= name+length; name < name_end; name+= clen)
{
uchar c= *(uchar *) name;
if (!(clen= my_mbcharlen(system_charset_info, c)))
clen= 1;
clen= my_charlen_fix(system_charset_info, name, name_end);
if (clen == 1 && c == (uchar) quote_char &&
(result= string->append(&quote_char, 1, system_charset_info)))
goto err;

View file

@ -500,8 +500,7 @@ bool append_ident(String *string, const char *name, uint length,
for (name_end= name+length; name < name_end; name+= clen)
{
uchar c= *(uchar *) name;
if (!(clen= my_mbcharlen(system_charset_info, c)))
clen= 1;
clen= my_charlen_fix(system_charset_info, name, name_end);
if (clen == 1 && c == (uchar) quote_char &&
(result= string->append(&quote_char, 1, system_charset_info)))
goto err;

View file

@ -1370,7 +1370,7 @@ int spider_db_append_name_with_quote_str(
for (name_end = name + length; name < name_end; name += length)
{
head_code = *name;
if (!(length = my_mbcharlen(system_charset_info, (uchar) head_code)))
if ((length= my_charlen(system_charset_info, name, name_end)) < 1)
{
my_message(ER_SPIDER_WRONG_CHARACTER_IN_NAME_NUM,
ER_SPIDER_WRONG_CHARACTER_IN_NAME_STR, MYF(0));

View file

@ -848,12 +848,6 @@ static uint16 big5strokexfrm(uint16 i)
}
static uint mbcharlen_big5(CHARSET_INFO *cs __attribute__((unused)), uint c)
{
return (isbig5head(c)? 2 : 1);
}
/* page 0 0xA140-0xC7FC */
static const uint16 tab_big5_uni0[]={
0x3000,0xFF0C,0x3001,0x3002,0xFF0E,0x2022,0xFF1B,0xFF1A,
@ -6731,7 +6725,6 @@ static MY_COLLATION_HANDLER my_collation_handler_big5_bin=
static MY_CHARSET_HANDLER my_charset_big5_handler=
{
NULL, /* init */
mbcharlen_big5,
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_big5,

View file

@ -225,13 +225,6 @@ static int my_strcasecmp_bin(CHARSET_INFO * cs __attribute__((unused)),
}
uint my_mbcharlen_8bit(CHARSET_INFO *cs __attribute__((unused)),
uint c __attribute__((unused)))
{
return 1;
}
static int my_mb_wc_bin(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t *wc,
const uchar *str,
@ -510,7 +503,6 @@ static MY_COLLATION_HANDLER my_collation_binary_handler =
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
my_mbcharlen_8bit, /* mbcharlen */
my_numchars_8bit,
my_charpos_8bit,
my_well_formed_len_8bit,

View file

@ -191,12 +191,6 @@ static const uchar sort_order_cp932[]=
#include "ctype-mb.ic"
static uint mbcharlen_cp932(CHARSET_INFO *cs __attribute__((unused)),uint c)
{
return (iscp932head((uchar) c) ? 2 : 1);
}
#define cp932code(c,d) ((((uint) (uchar)(c)) << 8) | (uint) (uchar) (d))
@ -34687,7 +34681,6 @@ static MY_COLLATION_HANDLER my_collation_handler_cp932_bin=
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
mbcharlen_cp932,
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_cp932,

View file

@ -210,12 +210,6 @@ static const uchar sort_order_euc_kr[]=
#include "ctype-mb.ic"
static uint mbcharlen_euc_kr(CHARSET_INFO *cs __attribute__((unused)),uint c)
{
return (iseuc_kr_head(c) ? 2 : 1);
}
static MY_UNICASE_CHARACTER cA3[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
@ -9979,7 +9973,6 @@ static MY_COLLATION_HANDLER my_collation_handler_euckr_bin=
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
mbcharlen_euc_kr,
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_euckr,

View file

@ -221,12 +221,6 @@ static const uchar sort_order_eucjpms[]=
#include "strcoll.ic"
static uint mbcharlen_eucjpms(CHARSET_INFO *cs __attribute__((unused)),uint c)
{
return (iseucjpms(c)? 2: iseucjpms_ss2(c)? 2: iseucjpms_ss3(c)? 3: 1);
}
/* Case info pages for JIS-X-0208 range */
static MY_UNICASE_CHARACTER cA2[256]=
@ -67511,7 +67505,6 @@ static MY_COLLATION_HANDLER my_collation_eucjpms_bin_handler =
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
mbcharlen_eucjpms,
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_eucjpms,

View file

@ -173,12 +173,6 @@ static const uchar sort_order_gb2312[]=
#include "ctype-mb.ic"
static uint mbcharlen_gb2312(CHARSET_INFO *cs __attribute__((unused)),uint c)
{
return (isgb2312head(c)? 2 : 1);
}
static MY_UNICASE_CHARACTER cA2[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
@ -6385,7 +6379,6 @@ static MY_COLLATION_HANDLER my_collation_handler_gb2312_bin=
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
mbcharlen_gb2312,
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_gb2312,

View file

@ -3451,11 +3451,6 @@ static uint16 gbksortorder(uint16 i)
}
static uint mbcharlen_gbk(CHARSET_INFO *cs __attribute__((unused)),uint c)
{
return (isgbkhead(c)? 2 : 1);
}
/* page 0 0x8140-0xFE4F */
static const uint16 tab_gbk_uni0[]={
0x4E02,0x4E04,0x4E05,0x4E06,0x4E0F,0x4E12,0x4E17,0x4E1F,
@ -10666,7 +10661,6 @@ static MY_COLLATION_HANDLER my_collation_handler_gbk_bin=
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
mbcharlen_gbk,
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_gbk,

View file

@ -396,7 +396,6 @@ int my_wc_mb_latin1(CHARSET_INFO *cs __attribute__((unused)),
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
my_mbcharlen_8bit,
my_numchars_8bit,
my_charpos_8bit,
my_well_formed_len_8bit,

View file

@ -230,7 +230,7 @@ int my_strcasecmp_mb(CHARSET_INFO * cs,const char *s, const char *t)
if (*s++ != *t++)
return 1;
}
else if (my_mbcharlen(cs, *t) > 1)
else if (my_charlen(cs, t, t + cs->mbmaxlen) > 1)
return 1;
else if (map[(uchar) *s++] != map[(uchar) *t++])
return 1;

View file

@ -1059,6 +1059,13 @@ size_t my_scan_8bit(CHARSET_INFO *cs, const char *str, const char *end, int sq)
break;
}
return (size_t) (str - str0);
case MY_SEQ_NONSPACES:
for ( ; str < end ; str++)
{
if (my_isspace(cs, *str))
break;
}
return (size_t) (str - str0);
default:
return 0;
}
@ -1916,7 +1923,6 @@ my_strxfrm_pad_desc_and_reverse(CHARSET_INFO *cs,
MY_CHARSET_HANDLER my_charset_8bit_handler=
{
my_cset_init_8bit,
my_mbcharlen_8bit, /* mbcharlen */
my_numchars_8bit,
my_charpos_8bit,
my_well_formed_len_8bit,

View file

@ -192,12 +192,6 @@ static const uchar sort_order_sjis[]=
#include "ctype-mb.ic"
static uint mbcharlen_sjis(CHARSET_INFO *cs __attribute__((unused)),uint c)
{
return (issjishead((uchar) c) ? 2 : 1);
}
#define sjiscode(c,d) ((((uint) (uchar)(c)) << 8) | (uint) (uchar) (d))
@ -34066,7 +34060,6 @@ static MY_COLLATION_HANDLER my_collation_handler_sjis_bin=
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
mbcharlen_sjis,
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_sjis,

View file

@ -834,7 +834,6 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
my_mbcharlen_8bit, /* mbcharlen */
my_numchars_8bit,
my_charpos_8bit,
my_well_formed_len_8bit,

View file

@ -1049,6 +1049,9 @@ my_scan_mb2(CHARSET_INFO *cs __attribute__((unused)),
{
}
return (size_t) (str - str0);
case MY_SEQ_NONSPACES:
DBUG_ASSERT(0); /* Not implemented */
/* pass through */
default:
return 0;
}
@ -1431,15 +1434,6 @@ my_charlen_utf16(CHARSET_INFO *cs, const uchar *str, const uchar *end)
/* Defines my_well_formed_char_length_utf16 */
static uint
my_mbcharlen_utf16(CHARSET_INFO *cs __attribute__((unused)),
uint c __attribute__((unused)))
{
DBUG_ASSERT(0);
return MY_UTF16_HIGH_HEAD(c) ? 4 : 2;
}
static size_t
my_numchars_utf16(CHARSET_INFO *cs,
const char *b, const char *e)
@ -1567,7 +1561,6 @@ static MY_COLLATION_HANDLER my_collation_utf16_bin_handler =
MY_CHARSET_HANDLER my_charset_utf16_handler=
{
NULL, /* init */
my_mbcharlen_utf16, /* mbcharlen */
my_numchars_utf16,
my_charpos_utf16,
my_well_formed_len_utf16,
@ -1789,7 +1782,6 @@ static MY_COLLATION_HANDLER my_collation_utf16le_bin_handler =
static MY_CHARSET_HANDLER my_charset_utf16le_handler=
{
NULL, /* init */
my_mbcharlen_utf16,
my_numchars_utf16,
my_charpos_utf16,
my_well_formed_len_utf16,
@ -2083,14 +2075,6 @@ my_charlen_utf32(CHARSET_INFO *cs __attribute__((unused)),
/* Defines my_well_formed_char_length_utf32 */
static uint
my_mbcharlen_utf32(CHARSET_INFO *cs __attribute__((unused)) ,
uint c __attribute__((unused)))
{
return 4;
}
static int
my_vsnprintf_utf32(char *dst, size_t n, const char* fmt, va_list ap)
{
@ -2484,6 +2468,9 @@ my_scan_utf32(CHARSET_INFO *cs,
str+= res;
}
return (size_t) (str - str0);
case MY_SEQ_NONSPACES:
DBUG_ASSERT(0); /* Not implemented */
/* pass through */
default:
return 0;
}
@ -2525,7 +2512,6 @@ static MY_COLLATION_HANDLER my_collation_utf32_bin_handler =
MY_CHARSET_HANDLER my_charset_utf32_handler=
{
NULL, /* init */
my_mbcharlen_utf32,
my_numchars_utf32,
my_charpos_utf32,
my_well_formed_len_utf32,
@ -2862,13 +2848,6 @@ my_fill_ucs2(CHARSET_INFO *cs __attribute__((unused)),
}
static uint my_mbcharlen_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
uint c __attribute__((unused)))
{
return 2;
}
static
size_t my_numchars_ucs2(CHARSET_INFO *cs __attribute__((unused)),
const char *b, const char *e)
@ -3003,7 +2982,6 @@ static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
MY_CHARSET_HANDLER my_charset_ucs2_handler=
{
NULL, /* init */
my_mbcharlen_ucs2, /* mbcharlen */
my_numchars_ucs2,
my_charpos_ucs2,
my_well_formed_len_ucs2,

View file

@ -220,12 +220,6 @@ static const uchar sort_order_ujis[]=
#include "strcoll.ic"
static uint mbcharlen_ujis(CHARSET_INFO *cs __attribute__((unused)),uint c)
{
return (isujis(c)? 2: isujis_ss2(c)? 2: isujis_ss3(c)? 3: 1);
}
static
size_t my_numcells_eucjp(CHARSET_INFO *cs __attribute__((unused)),
const char *str, const char *str_end)
@ -67255,7 +67249,6 @@ static MY_COLLATION_HANDLER my_collation_ujis_bin_handler =
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
mbcharlen_ujis,
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_ujis,

View file

@ -5426,21 +5426,6 @@ my_weight_mb3_utf8_general_mysql500_ci(uchar b0, uchar b1, uchar b2)
#include "strcoll.ic"
static uint my_mbcharlen_utf8(CHARSET_INFO *cs __attribute__((unused)),
uint c)
{
if (c < 0x80)
return 1;
else if (c < 0xc2)
return 0; /* Illegal mb head */
else if (c < 0xe0)
return 2;
else if (c < 0xf0)
return 3;
return 0; /* Illegal mb head */;
}
static MY_COLLATION_HANDLER my_collation_utf8_general_ci_handler =
{
NULL, /* init */
@ -5491,7 +5476,6 @@ static MY_COLLATION_HANDLER my_collation_utf8_bin_handler =
MY_CHARSET_HANDLER my_charset_utf8_handler=
{
NULL, /* init */
my_mbcharlen_utf8,
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_utf8,
@ -7045,7 +7029,6 @@ static MY_COLLATION_HANDLER my_collation_filename_handler =
static MY_CHARSET_HANDLER my_charset_filename_handler=
{
NULL, /* init */
my_mbcharlen_utf8,
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_mb,
@ -7111,57 +7094,6 @@ struct charset_info_st my_charset_filename=
};
#ifdef MY_TEST_UTF8
#include <stdio.h>
static void test_mb(CHARSET_INFO *cs, uchar *s)
{
while(*s)
{
if (my_ismbhead_utf8(cs,*s))
{
uint len=my_mbcharlen_utf8(cs,*s);
while(len--)
{
printf("%c",*s);
s++;
}
printf("\n");
}
else
{
printf("%c\n",*s);
s++;
}
}
}
int main()
{
char str[1024]=" utf8 test проба ПЕРА по-РУССКИ";
CHARSET_INFO *cs;
test_mb(cs,(uchar*)str);
printf("orig :'%s'\n",str);
my_caseup_utf8(cs,str,15);
printf("caseup :'%s'\n",str);
my_caseup_str_utf8(cs,str);
printf("caseup_str:'%s'\n",str);
my_casedn_utf8(cs,str,15);
printf("casedn :'%s'\n",str);
my_casedn_str_utf8(cs,str);
printf("casedn_str:'%s'\n",str);
return 0;
}
#endif
#endif /* HAVE_CHARSET_UTF8 */
@ -7755,23 +7687,6 @@ size_t my_well_formed_len_utf8mb4(CHARSET_INFO *cs,
#include "strcoll.ic"
static uint
my_mbcharlen_utf8mb4(CHARSET_INFO *cs __attribute__((unused)), uint c)
{
if (c < 0x80)
return 1;
if (c < 0xc2)
return 0; /* Illegal mb head */
if (c < 0xe0)
return 2;
if (c < 0xf0)
return 3;
if (c < 0xf8)
return 4;
return 0; /* Illegal mb head */;
}
static MY_COLLATION_HANDLER my_collation_utf8mb4_general_ci_handler=
{
NULL, /* init */
@ -7807,7 +7722,6 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_bin_handler =
MY_CHARSET_HANDLER my_charset_utf8mb4_handler=
{
NULL, /* init */
my_mbcharlen_utf8mb4,
my_numchars_mb,
my_charpos_mb,
my_well_formed_len_utf8mb4,

View file

@ -38,7 +38,7 @@
const char *acc_end= (ACC) + (LEN); \
for (ptr_str= (STR) ; ptr_str < (END) ; ptr_str+= mbl) \
{ \
mbl= my_mbcharlen((CS), *(uchar*)ptr_str); \
mbl= my_charlen_fix((CS), ptr_str, (END)); \
if (mbl < 2) \
{ \
DBUG_ASSERT(mbl == 1); \
@ -63,10 +63,9 @@ end: \
char *my_strchr(CHARSET_INFO *cs, const char *str, const char *end,
pchar c)
{
uint mbl;
while (str < end)
{
mbl= my_mbcharlen(cs, *(uchar *)str);
uint mbl= my_ismbchar(cs, str, end);
if (mbl < 2)
{
if (*str == c)

View file

@ -168,8 +168,7 @@ static char *backtick_string(CHARSET_INFO *cs, char *to, const char *end,
for ( ; par < par_end; par+= char_len)
{
uchar c= *(uchar *) par;
if (!(char_len= my_mbcharlen(cs, c)))
char_len= 1;
char_len= my_charlen_fix(cs, par, par_end);
if (char_len == 1 && c == (uchar) quote_char )
{
if (start + 1 >= end)