mirror of
https://github.com/MariaDB/server.git
synced 2025-01-15 19:42:28 +01:00
MDEV-6353 my_ismbchar() and my_mbcharlen() refactoring
This commit is contained in:
parent
7e66a24dfb
commit
e7ff281d2e
28 changed files with 118 additions and 270 deletions
|
@ -6571,37 +6571,35 @@ int read_line(char *buf, int size)
|
|||
|
||||
if (!skip_char)
|
||||
{
|
||||
/* Could be a multibyte character */
|
||||
/* This code is based on the code in "sql_load.cc" */
|
||||
#ifdef USE_MB
|
||||
int charlen = my_mbcharlen(charset_info, (unsigned char) c);
|
||||
/* We give up if multibyte character is started but not */
|
||||
/* completed before we pass buf_end */
|
||||
if ((charlen > 1) && (p + charlen) <= buf_end)
|
||||
*p++= c;
|
||||
if (use_mb(charset_info))
|
||||
{
|
||||
int i;
|
||||
char* mb_start = p;
|
||||
|
||||
*p++ = c;
|
||||
|
||||
for (i= 1; i < charlen; i++)
|
||||
{
|
||||
c= my_getc(cur_file->file);
|
||||
if (feof(cur_file->file))
|
||||
goto found_eof;
|
||||
*p++ = c;
|
||||
}
|
||||
if (! my_ismbchar(charset_info, mb_start, p))
|
||||
{
|
||||
/* It was not a multiline char, push back the characters */
|
||||
/* We leave first 'c', i.e. pretend it was a normal char */
|
||||
while (p-1 > mb_start)
|
||||
my_ungetc(*--p);
|
||||
}
|
||||
const char *mb_start= p - 1;
|
||||
/* Could be a multibyte character */
|
||||
/* See a similar code in "sql_load.cc" */
|
||||
for ( ; p < buf_end; )
|
||||
{
|
||||
int charlen= my_charlen(charset_info, mb_start, p);
|
||||
if (charlen > 0)
|
||||
break; /* Full character */
|
||||
if (MY_CS_IS_TOOSMALL(charlen))
|
||||
{
|
||||
/* We give up if multibyte character is started but not */
|
||||
/* completed before we pass buf_end */
|
||||
c= my_getc(cur_file->file);
|
||||
if (feof(cur_file->file))
|
||||
goto found_eof;
|
||||
*p++ = c;
|
||||
continue;
|
||||
}
|
||||
DBUG_ASSERT(charlen == MY_CS_ILSEQ);
|
||||
/* It was not a multiline char, push back the characters */
|
||||
/* We leave first 'c', i.e. pretend it was a normal char */
|
||||
while (p - 1 > mb_start)
|
||||
my_ungetc(*--p);
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
*p++= c;
|
||||
}
|
||||
}
|
||||
die("The input buffer is too small for this query.x\n" \
|
||||
|
|
|
@ -186,6 +186,7 @@ extern MY_UNI_CTYPE my_uni_ctype[256];
|
|||
|
||||
#define MY_SEQ_INTTAIL 1
|
||||
#define MY_SEQ_SPACES 2
|
||||
#define MY_SEQ_NONSPACES 3 /* Skip non-space characters, including bad bytes */
|
||||
|
||||
/* My charsets_list flags */
|
||||
#define MY_CS_COMPILED 1 /* compiled-in sets */
|
||||
|
@ -403,7 +404,6 @@ struct my_charset_handler_st
|
|||
{
|
||||
my_bool (*init)(struct charset_info_st *, MY_CHARSET_LOADER *loader);
|
||||
/* Multibyte routines */
|
||||
uint (*mbcharlen)(CHARSET_INFO *, uint c);
|
||||
size_t (*numchars)(CHARSET_INFO *, const char *b, const char *e);
|
||||
size_t (*charpos)(CHARSET_INFO *, const char *b, const char *e,
|
||||
size_t pos);
|
||||
|
@ -779,7 +779,6 @@ size_t my_well_formed_char_length_8bit(CHARSET_INFO *cs,
|
|||
size_t nchars,
|
||||
MY_STRCOPY_STATUS *status);
|
||||
int my_charlen_8bit(CHARSET_INFO *, const uchar *str, const uchar *end);
|
||||
uint my_mbcharlen_8bit(CHARSET_INFO *, uint c);
|
||||
|
||||
|
||||
/* Functions for multibyte charsets */
|
||||
|
@ -1010,11 +1009,19 @@ int my_charlen(CHARSET_INFO *cs, const char *str, const char *end)
|
|||
return (cs->cset->charlen)(cs, (const uchar *) str,
|
||||
(const uchar *) end);
|
||||
}
|
||||
#ifdef USE_MB
|
||||
#define my_mbcharlen(s, a) ((s)->cset->mbcharlen((s),(a)))
|
||||
#else
|
||||
#define my_mbcharlen(s, a) 1
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
Convert broken and incomplete byte sequences to 1 byte.
|
||||
*/
|
||||
static inline
|
||||
uint my_charlen_fix(CHARSET_INFO *cs, const char *str, const char *end)
|
||||
{
|
||||
int char_length= my_charlen(cs, str, end);
|
||||
DBUG_ASSERT(str < end);
|
||||
return char_length > 0 ? (uint) char_length : (uint) 1U;
|
||||
}
|
||||
|
||||
|
||||
#define my_caseup_str(s, a) ((s)->cset->caseup_str((s), (a)))
|
||||
#define my_casedn_str(s, a) ((s)->cset->casedn_str((s), (a)))
|
||||
|
|
|
@ -54,6 +54,12 @@ get_collation_number_internal(const char *name)
|
|||
}
|
||||
|
||||
|
||||
static my_bool is_multi_byte_ident(CHARSET_INFO *cs, uchar ch)
|
||||
{
|
||||
int chlen= my_charlen(cs, (const char *) &ch, (const char *) &ch + 1);
|
||||
return MY_CS_IS_TOOSMALL(chlen) ? TRUE : FALSE;
|
||||
}
|
||||
|
||||
static my_bool init_state_maps(struct charset_info_st *cs)
|
||||
{
|
||||
uint i;
|
||||
|
@ -73,10 +79,8 @@ static my_bool init_state_maps(struct charset_info_st *cs)
|
|||
state_map[i]=(uchar) MY_LEX_IDENT;
|
||||
else if (my_isdigit(cs,i))
|
||||
state_map[i]=(uchar) MY_LEX_NUMBER_IDENT;
|
||||
#if defined(USE_MB) && defined(USE_MB_IDENT)
|
||||
else if (my_mbcharlen(cs, i)>1)
|
||||
else if (is_multi_byte_ident(cs, i))
|
||||
state_map[i]=(uchar) MY_LEX_IDENT;
|
||||
#endif
|
||||
else if (my_isspace(cs,i))
|
||||
state_map[i]=(uchar) MY_LEX_SKIP;
|
||||
else
|
||||
|
@ -902,15 +906,13 @@ size_t escape_string_for_mysql(CHARSET_INFO *charset_info,
|
|||
const char *to_start= to;
|
||||
const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length);
|
||||
my_bool overflow= FALSE;
|
||||
#ifdef USE_MB
|
||||
my_bool use_mb_flag= use_mb(charset_info);
|
||||
#endif
|
||||
for (end= from + length; from < end; from++)
|
||||
{
|
||||
char escape= 0;
|
||||
#ifdef USE_MB
|
||||
int tmp_length;
|
||||
if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end)))
|
||||
int tmp_length= use_mb(charset_info) ? my_charlen(charset_info, from, end) :
|
||||
1;
|
||||
if (tmp_length > 1)
|
||||
{
|
||||
if (to + tmp_length > to_end)
|
||||
{
|
||||
|
@ -933,7 +935,7 @@ size_t escape_string_for_mysql(CHARSET_INFO *charset_info,
|
|||
multi-byte character into a valid one. For example, 0xbf27 is not
|
||||
a valid GBK character, but 0xbf5c is. (0x27 = ', 0x5c = \)
|
||||
*/
|
||||
if (use_mb_flag && (tmp_length= my_mbcharlen(charset_info, *from)) > 1)
|
||||
if (tmp_length < 1) /* Bad byte sequence */
|
||||
escape= *from;
|
||||
else
|
||||
#endif
|
||||
|
|
|
@ -184,7 +184,7 @@ static bool set_one_value(ha_create_table_option *opt,
|
|||
{
|
||||
for (end=start;
|
||||
*end && *end != ',';
|
||||
end+= my_mbcharlen(system_charset_info, *end)) /* no-op */;
|
||||
end++) /* no-op */;
|
||||
if (!my_strnncoll(system_charset_info,
|
||||
(uchar*)start, end-start,
|
||||
(uchar*)value->str, value->length))
|
||||
|
|
|
@ -847,16 +847,16 @@ static bool debug_sync_set_action(THD *thd, st_debug_sync_action *action)
|
|||
to the string terminator ASCII NUL ('\0').
|
||||
*/
|
||||
|
||||
static char *debug_sync_token(char **token_p, uint *token_length_p, char *ptr)
|
||||
static char *debug_sync_token(char **token_p, uint *token_length_p,
|
||||
char *ptr, char *ptrend)
|
||||
{
|
||||
DBUG_ASSERT(token_p);
|
||||
DBUG_ASSERT(token_length_p);
|
||||
DBUG_ASSERT(ptr);
|
||||
|
||||
/* Skip leading space */
|
||||
while (my_isspace(system_charset_info, *ptr))
|
||||
ptr+= my_mbcharlen(system_charset_info, (uchar) *ptr);
|
||||
|
||||
ptr+= system_charset_info->cset->scan(system_charset_info,
|
||||
ptr, ptrend, MY_SEQ_SPACES);
|
||||
if (!*ptr)
|
||||
{
|
||||
ptr= NULL;
|
||||
|
@ -867,8 +867,8 @@ static char *debug_sync_token(char **token_p, uint *token_length_p, char *ptr)
|
|||
*token_p= ptr;
|
||||
|
||||
/* Find token end. */
|
||||
while (*ptr && !my_isspace(system_charset_info, *ptr))
|
||||
ptr+= my_mbcharlen(system_charset_info, (uchar) *ptr);
|
||||
ptr+= system_charset_info->cset->scan(system_charset_info,
|
||||
ptr, ptrend, MY_SEQ_NONSPACES);
|
||||
|
||||
/* Get token length. */
|
||||
*token_length_p= ptr - *token_p;
|
||||
|
@ -876,8 +876,9 @@ static char *debug_sync_token(char **token_p, uint *token_length_p, char *ptr)
|
|||
/* If necessary, terminate token. */
|
||||
if (*ptr)
|
||||
{
|
||||
DBUG_ASSERT(ptr < ptrend);
|
||||
/* Get terminator character length. */
|
||||
uint mbspacelen= my_mbcharlen(system_charset_info, (uchar) *ptr);
|
||||
uint mbspacelen= my_charlen_fix(system_charset_info, ptr, ptrend);
|
||||
|
||||
/* Terminate token. */
|
||||
*ptr= '\0';
|
||||
|
@ -886,8 +887,8 @@ static char *debug_sync_token(char **token_p, uint *token_length_p, char *ptr)
|
|||
ptr+= mbspacelen;
|
||||
|
||||
/* Skip trailing space */
|
||||
while (my_isspace(system_charset_info, *ptr))
|
||||
ptr+= my_mbcharlen(system_charset_info, (uchar) *ptr);
|
||||
ptr+= system_charset_info->cset->scan(system_charset_info,
|
||||
ptr, ptrend, MY_SEQ_SPACES);
|
||||
}
|
||||
|
||||
end:
|
||||
|
@ -917,7 +918,8 @@ static char *debug_sync_token(char **token_p, uint *token_length_p, char *ptr)
|
|||
undefined in this case.
|
||||
*/
|
||||
|
||||
static char *debug_sync_number(ulong *number_p, char *actstrptr)
|
||||
static char *debug_sync_number(ulong *number_p, char *actstrptr,
|
||||
char *actstrend)
|
||||
{
|
||||
char *ptr;
|
||||
char *ept;
|
||||
|
@ -927,7 +929,7 @@ static char *debug_sync_number(ulong *number_p, char *actstrptr)
|
|||
DBUG_ASSERT(actstrptr);
|
||||
|
||||
/* Get token from string. */
|
||||
if (!(ptr= debug_sync_token(&token, &token_length, actstrptr)))
|
||||
if (!(ptr= debug_sync_token(&token, &token_length, actstrptr, actstrend)))
|
||||
goto end;
|
||||
|
||||
*number_p= strtoul(token, &ept, 10);
|
||||
|
@ -971,7 +973,7 @@ static char *debug_sync_number(ulong *number_p, char *actstrptr)
|
|||
for the string.
|
||||
*/
|
||||
|
||||
static bool debug_sync_eval_action(THD *thd, char *action_str)
|
||||
static bool debug_sync_eval_action(THD *thd, char *action_str, char *action_end)
|
||||
{
|
||||
st_debug_sync_action *action= NULL;
|
||||
const char *errmsg;
|
||||
|
@ -986,7 +988,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
|
|||
/*
|
||||
Get debug sync point name. Or a special command.
|
||||
*/
|
||||
if (!(ptr= debug_sync_token(&token, &token_length, action_str)))
|
||||
if (!(ptr= debug_sync_token(&token, &token_length, action_str, action_end)))
|
||||
{
|
||||
errmsg= "Missing synchronization point name";
|
||||
goto err;
|
||||
|
@ -1009,7 +1011,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
|
|||
/*
|
||||
Get kind of action to be taken at sync point.
|
||||
*/
|
||||
if (!(ptr= debug_sync_token(&token, &token_length, ptr)))
|
||||
if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end)))
|
||||
{
|
||||
/* No action present. Try special commands. Token unchanged. */
|
||||
|
||||
|
@ -1090,7 +1092,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
|
|||
if (!my_strcasecmp(system_charset_info, token, "SIGNAL"))
|
||||
{
|
||||
/* It is SIGNAL. Signal name must follow. */
|
||||
if (!(ptr= debug_sync_token(&token, &token_length, ptr)))
|
||||
if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end)))
|
||||
{
|
||||
errmsg= "Missing signal name after action SIGNAL";
|
||||
goto err;
|
||||
|
@ -1108,7 +1110,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
|
|||
action->execute= 1;
|
||||
|
||||
/* Get next token. If none follows, set action. */
|
||||
if (!(ptr= debug_sync_token(&token, &token_length, ptr)))
|
||||
if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end)))
|
||||
goto set_action;
|
||||
}
|
||||
|
||||
|
@ -1118,7 +1120,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
|
|||
if (!my_strcasecmp(system_charset_info, token, "WAIT_FOR"))
|
||||
{
|
||||
/* It is WAIT_FOR. Wait_for signal name must follow. */
|
||||
if (!(ptr= debug_sync_token(&token, &token_length, ptr)))
|
||||
if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end)))
|
||||
{
|
||||
errmsg= "Missing signal name after action WAIT_FOR";
|
||||
goto err;
|
||||
|
@ -1137,7 +1139,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
|
|||
action->timeout= opt_debug_sync_timeout;
|
||||
|
||||
/* Get next token. If none follows, set action. */
|
||||
if (!(ptr= debug_sync_token(&token, &token_length, ptr)))
|
||||
if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end)))
|
||||
goto set_action;
|
||||
|
||||
/*
|
||||
|
@ -1146,14 +1148,14 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
|
|||
if (!my_strcasecmp(system_charset_info, token, "TIMEOUT"))
|
||||
{
|
||||
/* It is TIMEOUT. Number must follow. */
|
||||
if (!(ptr= debug_sync_number(&action->timeout, ptr)))
|
||||
if (!(ptr= debug_sync_number(&action->timeout, ptr, action_end)))
|
||||
{
|
||||
errmsg= "Missing valid number after TIMEOUT";
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* Get next token. If none follows, set action. */
|
||||
if (!(ptr= debug_sync_token(&token, &token_length, ptr)))
|
||||
if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end)))
|
||||
goto set_action;
|
||||
}
|
||||
}
|
||||
|
@ -1174,14 +1176,14 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
|
|||
}
|
||||
|
||||
/* Number must follow. */
|
||||
if (!(ptr= debug_sync_number(&action->execute, ptr)))
|
||||
if (!(ptr= debug_sync_number(&action->execute, ptr, action_end)))
|
||||
{
|
||||
errmsg= "Missing valid number after EXECUTE";
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* Get next token. If none follows, set action. */
|
||||
if (!(ptr= debug_sync_token(&token, &token_length, ptr)))
|
||||
if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end)))
|
||||
goto set_action;
|
||||
}
|
||||
|
||||
|
@ -1191,14 +1193,14 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
|
|||
if (!my_strcasecmp(system_charset_info, token, "HIT_LIMIT"))
|
||||
{
|
||||
/* Number must follow. */
|
||||
if (!(ptr= debug_sync_number(&action->hit_limit, ptr)))
|
||||
if (!(ptr= debug_sync_number(&action->hit_limit, ptr, action_end)))
|
||||
{
|
||||
errmsg= "Missing valid number after HIT_LIMIT";
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* Get next token. If none follows, set action. */
|
||||
if (!(ptr= debug_sync_token(&token, &token_length, ptr)))
|
||||
if (!(ptr= debug_sync_token(&token, &token_length, ptr, action_end)))
|
||||
goto set_action;
|
||||
}
|
||||
|
||||
|
@ -1246,7 +1248,7 @@ static bool debug_sync_eval_action(THD *thd, char *action_str)
|
|||
terminators in the string. So we need to take a copy here.
|
||||
*/
|
||||
|
||||
bool debug_sync_update(THD *thd, char *val_str)
|
||||
bool debug_sync_update(THD *thd, char *val_str, size_t len)
|
||||
{
|
||||
DBUG_ENTER("debug_sync_update");
|
||||
DBUG_PRINT("debug_sync", ("set action: '%s'", val_str));
|
||||
|
@ -1255,8 +1257,9 @@ bool debug_sync_update(THD *thd, char *val_str)
|
|||
debug_sync_eval_action() places '\0' in the string, which itself
|
||||
must be '\0' terminated.
|
||||
*/
|
||||
DBUG_ASSERT(val_str[len] == '\0');
|
||||
DBUG_RETURN(opt_debug_sync_timeout ?
|
||||
debug_sync_eval_action(thd, val_str) :
|
||||
debug_sync_eval_action(thd, val_str, val_str + len) :
|
||||
FALSE);
|
||||
}
|
||||
|
||||
|
@ -1592,7 +1595,7 @@ bool debug_sync_set_action(THD *thd, const char *action_str, size_t len)
|
|||
DBUG_ASSERT(action_str);
|
||||
|
||||
value= strmake_root(thd->mem_root, action_str, len);
|
||||
rc= debug_sync_eval_action(thd, value);
|
||||
rc= debug_sync_eval_action(thd, value, value + len);
|
||||
DBUG_RETURN(rc);
|
||||
}
|
||||
|
||||
|
|
|
@ -45,6 +45,9 @@ extern void debug_sync_init_thread(THD *thd);
|
|||
extern void debug_sync_end_thread(THD *thd);
|
||||
extern bool debug_sync_set_action(THD *thd, const char *action_str, size_t len);
|
||||
|
||||
extern bool debug_sync_update(THD *thd, char *val_str, size_t len);
|
||||
extern uchar *debug_sync_value_ptr(THD *thd);
|
||||
|
||||
#endif /* defined(ENABLED_DEBUG_SYNC) */
|
||||
|
||||
#endif /* DEBUG_SYNC_INCLUDED */
|
||||
|
|
|
@ -3235,7 +3235,7 @@ int select_export::send_data(List<Item> &items)
|
|||
|
||||
if ((NEED_ESCAPING(*pos) ||
|
||||
(check_second_byte &&
|
||||
my_mbcharlen(character_set_client, (uchar) *pos) == 2 &&
|
||||
((uchar) *pos) > 0x7F /* a potential MB2HEAD */ &&
|
||||
pos + 1 < end &&
|
||||
NEED_ESCAPING(pos[1]))) &&
|
||||
/*
|
||||
|
|
|
@ -1434,6 +1434,9 @@ public:
|
|||
};
|
||||
|
||||
#if defined(ENABLED_DEBUG_SYNC)
|
||||
|
||||
#include "debug_sync.h"
|
||||
|
||||
/**
|
||||
The class for @@debug_sync session-only variable
|
||||
*/
|
||||
|
@ -1462,15 +1465,19 @@ public:
|
|||
String str(buff, sizeof(buff), system_charset_info), *res;
|
||||
|
||||
if (!(res=var->value->val_str(&str)))
|
||||
var->save_result.string_value.str= const_cast<char*>("");
|
||||
var->save_result.string_value= empty_lex_str;
|
||||
else
|
||||
var->save_result.string_value.str= thd->strmake(res->ptr(), res->length());
|
||||
{
|
||||
if (!thd->make_lex_string(&var->save_result.string_value,
|
||||
res->ptr(), res->length()))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
bool session_update(THD *thd, set_var *var)
|
||||
{
|
||||
extern bool debug_sync_update(THD *thd, char *val_str);
|
||||
return debug_sync_update(thd, var->save_result.string_value.str);
|
||||
return debug_sync_update(thd, var->save_result.string_value.str,
|
||||
var->save_result.string_value.length);
|
||||
}
|
||||
bool global_update(THD *thd, set_var *var)
|
||||
{
|
||||
|
@ -1488,7 +1495,6 @@ public:
|
|||
}
|
||||
uchar *session_value_ptr(THD *thd, const LEX_STRING *base)
|
||||
{
|
||||
extern uchar *debug_sync_value_ptr(THD *thd);
|
||||
return debug_sync_value_ptr(thd);
|
||||
}
|
||||
uchar *global_value_ptr(THD *thd, const LEX_STRING *base)
|
||||
|
|
|
@ -561,8 +561,7 @@ static bool append_ident(String *string, const char *name, size_t length,
|
|||
for (name_end= name+length; name < name_end; name+= clen)
|
||||
{
|
||||
uchar c= *(uchar *) name;
|
||||
if (!(clen= my_mbcharlen(system_charset_info, c)))
|
||||
clen= 1;
|
||||
clen= my_charlen_fix(system_charset_info, name, name_end);
|
||||
if (clen == 1 && c == (uchar) quote_char &&
|
||||
(result= string->append("e_char, 1, system_charset_info)))
|
||||
goto err;
|
||||
|
|
|
@ -500,8 +500,7 @@ bool append_ident(String *string, const char *name, uint length,
|
|||
for (name_end= name+length; name < name_end; name+= clen)
|
||||
{
|
||||
uchar c= *(uchar *) name;
|
||||
if (!(clen= my_mbcharlen(system_charset_info, c)))
|
||||
clen= 1;
|
||||
clen= my_charlen_fix(system_charset_info, name, name_end);
|
||||
if (clen == 1 && c == (uchar) quote_char &&
|
||||
(result= string->append("e_char, 1, system_charset_info)))
|
||||
goto err;
|
||||
|
|
|
@ -1370,7 +1370,7 @@ int spider_db_append_name_with_quote_str(
|
|||
for (name_end = name + length; name < name_end; name += length)
|
||||
{
|
||||
head_code = *name;
|
||||
if (!(length = my_mbcharlen(system_charset_info, (uchar) head_code)))
|
||||
if ((length= my_charlen(system_charset_info, name, name_end)) < 1)
|
||||
{
|
||||
my_message(ER_SPIDER_WRONG_CHARACTER_IN_NAME_NUM,
|
||||
ER_SPIDER_WRONG_CHARACTER_IN_NAME_STR, MYF(0));
|
||||
|
|
|
@ -848,12 +848,6 @@ static uint16 big5strokexfrm(uint16 i)
|
|||
}
|
||||
|
||||
|
||||
static uint mbcharlen_big5(CHARSET_INFO *cs __attribute__((unused)), uint c)
|
||||
{
|
||||
return (isbig5head(c)? 2 : 1);
|
||||
}
|
||||
|
||||
|
||||
/* page 0 0xA140-0xC7FC */
|
||||
static const uint16 tab_big5_uni0[]={
|
||||
0x3000,0xFF0C,0x3001,0x3002,0xFF0E,0x2022,0xFF1B,0xFF1A,
|
||||
|
@ -6731,7 +6725,6 @@ static MY_COLLATION_HANDLER my_collation_handler_big5_bin=
|
|||
static MY_CHARSET_HANDLER my_charset_big5_handler=
|
||||
{
|
||||
NULL, /* init */
|
||||
mbcharlen_big5,
|
||||
my_numchars_mb,
|
||||
my_charpos_mb,
|
||||
my_well_formed_len_big5,
|
||||
|
|
|
@ -225,13 +225,6 @@ static int my_strcasecmp_bin(CHARSET_INFO * cs __attribute__((unused)),
|
|||
}
|
||||
|
||||
|
||||
uint my_mbcharlen_8bit(CHARSET_INFO *cs __attribute__((unused)),
|
||||
uint c __attribute__((unused)))
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
static int my_mb_wc_bin(CHARSET_INFO *cs __attribute__((unused)),
|
||||
my_wc_t *wc,
|
||||
const uchar *str,
|
||||
|
@ -510,7 +503,6 @@ static MY_COLLATION_HANDLER my_collation_binary_handler =
|
|||
static MY_CHARSET_HANDLER my_charset_handler=
|
||||
{
|
||||
NULL, /* init */
|
||||
my_mbcharlen_8bit, /* mbcharlen */
|
||||
my_numchars_8bit,
|
||||
my_charpos_8bit,
|
||||
my_well_formed_len_8bit,
|
||||
|
|
|
@ -191,12 +191,6 @@ static const uchar sort_order_cp932[]=
|
|||
#include "ctype-mb.ic"
|
||||
|
||||
|
||||
static uint mbcharlen_cp932(CHARSET_INFO *cs __attribute__((unused)),uint c)
|
||||
{
|
||||
return (iscp932head((uchar) c) ? 2 : 1);
|
||||
}
|
||||
|
||||
|
||||
#define cp932code(c,d) ((((uint) (uchar)(c)) << 8) | (uint) (uchar) (d))
|
||||
|
||||
|
||||
|
@ -34687,7 +34681,6 @@ static MY_COLLATION_HANDLER my_collation_handler_cp932_bin=
|
|||
static MY_CHARSET_HANDLER my_charset_handler=
|
||||
{
|
||||
NULL, /* init */
|
||||
mbcharlen_cp932,
|
||||
my_numchars_mb,
|
||||
my_charpos_mb,
|
||||
my_well_formed_len_cp932,
|
||||
|
|
|
@ -210,12 +210,6 @@ static const uchar sort_order_euc_kr[]=
|
|||
#include "ctype-mb.ic"
|
||||
|
||||
|
||||
static uint mbcharlen_euc_kr(CHARSET_INFO *cs __attribute__((unused)),uint c)
|
||||
{
|
||||
return (iseuc_kr_head(c) ? 2 : 1);
|
||||
}
|
||||
|
||||
|
||||
static MY_UNICASE_CHARACTER cA3[256]=
|
||||
{
|
||||
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
|
||||
|
@ -9979,7 +9973,6 @@ static MY_COLLATION_HANDLER my_collation_handler_euckr_bin=
|
|||
static MY_CHARSET_HANDLER my_charset_handler=
|
||||
{
|
||||
NULL, /* init */
|
||||
mbcharlen_euc_kr,
|
||||
my_numchars_mb,
|
||||
my_charpos_mb,
|
||||
my_well_formed_len_euckr,
|
||||
|
|
|
@ -221,12 +221,6 @@ static const uchar sort_order_eucjpms[]=
|
|||
#include "strcoll.ic"
|
||||
|
||||
|
||||
static uint mbcharlen_eucjpms(CHARSET_INFO *cs __attribute__((unused)),uint c)
|
||||
{
|
||||
return (iseucjpms(c)? 2: iseucjpms_ss2(c)? 2: iseucjpms_ss3(c)? 3: 1);
|
||||
}
|
||||
|
||||
|
||||
/* Case info pages for JIS-X-0208 range */
|
||||
|
||||
static MY_UNICASE_CHARACTER cA2[256]=
|
||||
|
@ -67511,7 +67505,6 @@ static MY_COLLATION_HANDLER my_collation_eucjpms_bin_handler =
|
|||
static MY_CHARSET_HANDLER my_charset_handler=
|
||||
{
|
||||
NULL, /* init */
|
||||
mbcharlen_eucjpms,
|
||||
my_numchars_mb,
|
||||
my_charpos_mb,
|
||||
my_well_formed_len_eucjpms,
|
||||
|
|
|
@ -173,12 +173,6 @@ static const uchar sort_order_gb2312[]=
|
|||
#include "ctype-mb.ic"
|
||||
|
||||
|
||||
static uint mbcharlen_gb2312(CHARSET_INFO *cs __attribute__((unused)),uint c)
|
||||
{
|
||||
return (isgb2312head(c)? 2 : 1);
|
||||
}
|
||||
|
||||
|
||||
static MY_UNICASE_CHARACTER cA2[256]=
|
||||
{
|
||||
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
|
||||
|
@ -6385,7 +6379,6 @@ static MY_COLLATION_HANDLER my_collation_handler_gb2312_bin=
|
|||
static MY_CHARSET_HANDLER my_charset_handler=
|
||||
{
|
||||
NULL, /* init */
|
||||
mbcharlen_gb2312,
|
||||
my_numchars_mb,
|
||||
my_charpos_mb,
|
||||
my_well_formed_len_gb2312,
|
||||
|
|
|
@ -3451,11 +3451,6 @@ static uint16 gbksortorder(uint16 i)
|
|||
}
|
||||
|
||||
|
||||
static uint mbcharlen_gbk(CHARSET_INFO *cs __attribute__((unused)),uint c)
|
||||
{
|
||||
return (isgbkhead(c)? 2 : 1);
|
||||
}
|
||||
|
||||
/* page 0 0x8140-0xFE4F */
|
||||
static const uint16 tab_gbk_uni0[]={
|
||||
0x4E02,0x4E04,0x4E05,0x4E06,0x4E0F,0x4E12,0x4E17,0x4E1F,
|
||||
|
@ -10666,7 +10661,6 @@ static MY_COLLATION_HANDLER my_collation_handler_gbk_bin=
|
|||
static MY_CHARSET_HANDLER my_charset_handler=
|
||||
{
|
||||
NULL, /* init */
|
||||
mbcharlen_gbk,
|
||||
my_numchars_mb,
|
||||
my_charpos_mb,
|
||||
my_well_formed_len_gbk,
|
||||
|
|
|
@ -396,7 +396,6 @@ int my_wc_mb_latin1(CHARSET_INFO *cs __attribute__((unused)),
|
|||
static MY_CHARSET_HANDLER my_charset_handler=
|
||||
{
|
||||
NULL, /* init */
|
||||
my_mbcharlen_8bit,
|
||||
my_numchars_8bit,
|
||||
my_charpos_8bit,
|
||||
my_well_formed_len_8bit,
|
||||
|
|
|
@ -230,7 +230,7 @@ int my_strcasecmp_mb(CHARSET_INFO * cs,const char *s, const char *t)
|
|||
if (*s++ != *t++)
|
||||
return 1;
|
||||
}
|
||||
else if (my_mbcharlen(cs, *t) > 1)
|
||||
else if (my_charlen(cs, t, t + cs->mbmaxlen) > 1)
|
||||
return 1;
|
||||
else if (map[(uchar) *s++] != map[(uchar) *t++])
|
||||
return 1;
|
||||
|
|
|
@ -1059,6 +1059,13 @@ size_t my_scan_8bit(CHARSET_INFO *cs, const char *str, const char *end, int sq)
|
|||
break;
|
||||
}
|
||||
return (size_t) (str - str0);
|
||||
case MY_SEQ_NONSPACES:
|
||||
for ( ; str < end ; str++)
|
||||
{
|
||||
if (my_isspace(cs, *str))
|
||||
break;
|
||||
}
|
||||
return (size_t) (str - str0);
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
@ -1916,7 +1923,6 @@ my_strxfrm_pad_desc_and_reverse(CHARSET_INFO *cs,
|
|||
MY_CHARSET_HANDLER my_charset_8bit_handler=
|
||||
{
|
||||
my_cset_init_8bit,
|
||||
my_mbcharlen_8bit, /* mbcharlen */
|
||||
my_numchars_8bit,
|
||||
my_charpos_8bit,
|
||||
my_well_formed_len_8bit,
|
||||
|
|
|
@ -192,12 +192,6 @@ static const uchar sort_order_sjis[]=
|
|||
#include "ctype-mb.ic"
|
||||
|
||||
|
||||
static uint mbcharlen_sjis(CHARSET_INFO *cs __attribute__((unused)),uint c)
|
||||
{
|
||||
return (issjishead((uchar) c) ? 2 : 1);
|
||||
}
|
||||
|
||||
|
||||
#define sjiscode(c,d) ((((uint) (uchar)(c)) << 8) | (uint) (uchar) (d))
|
||||
|
||||
|
||||
|
@ -34066,7 +34060,6 @@ static MY_COLLATION_HANDLER my_collation_handler_sjis_bin=
|
|||
static MY_CHARSET_HANDLER my_charset_handler=
|
||||
{
|
||||
NULL, /* init */
|
||||
mbcharlen_sjis,
|
||||
my_numchars_mb,
|
||||
my_charpos_mb,
|
||||
my_well_formed_len_sjis,
|
||||
|
|
|
@ -834,7 +834,6 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
|
|||
static MY_CHARSET_HANDLER my_charset_handler=
|
||||
{
|
||||
NULL, /* init */
|
||||
my_mbcharlen_8bit, /* mbcharlen */
|
||||
my_numchars_8bit,
|
||||
my_charpos_8bit,
|
||||
my_well_formed_len_8bit,
|
||||
|
|
|
@ -1049,6 +1049,9 @@ my_scan_mb2(CHARSET_INFO *cs __attribute__((unused)),
|
|||
{
|
||||
}
|
||||
return (size_t) (str - str0);
|
||||
case MY_SEQ_NONSPACES:
|
||||
DBUG_ASSERT(0); /* Not implemented */
|
||||
/* pass through */
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
@ -1431,15 +1434,6 @@ my_charlen_utf16(CHARSET_INFO *cs, const uchar *str, const uchar *end)
|
|||
/* Defines my_well_formed_char_length_utf16 */
|
||||
|
||||
|
||||
static uint
|
||||
my_mbcharlen_utf16(CHARSET_INFO *cs __attribute__((unused)),
|
||||
uint c __attribute__((unused)))
|
||||
{
|
||||
DBUG_ASSERT(0);
|
||||
return MY_UTF16_HIGH_HEAD(c) ? 4 : 2;
|
||||
}
|
||||
|
||||
|
||||
static size_t
|
||||
my_numchars_utf16(CHARSET_INFO *cs,
|
||||
const char *b, const char *e)
|
||||
|
@ -1567,7 +1561,6 @@ static MY_COLLATION_HANDLER my_collation_utf16_bin_handler =
|
|||
MY_CHARSET_HANDLER my_charset_utf16_handler=
|
||||
{
|
||||
NULL, /* init */
|
||||
my_mbcharlen_utf16, /* mbcharlen */
|
||||
my_numchars_utf16,
|
||||
my_charpos_utf16,
|
||||
my_well_formed_len_utf16,
|
||||
|
@ -1789,7 +1782,6 @@ static MY_COLLATION_HANDLER my_collation_utf16le_bin_handler =
|
|||
static MY_CHARSET_HANDLER my_charset_utf16le_handler=
|
||||
{
|
||||
NULL, /* init */
|
||||
my_mbcharlen_utf16,
|
||||
my_numchars_utf16,
|
||||
my_charpos_utf16,
|
||||
my_well_formed_len_utf16,
|
||||
|
@ -2083,14 +2075,6 @@ my_charlen_utf32(CHARSET_INFO *cs __attribute__((unused)),
|
|||
/* Defines my_well_formed_char_length_utf32 */
|
||||
|
||||
|
||||
static uint
|
||||
my_mbcharlen_utf32(CHARSET_INFO *cs __attribute__((unused)) ,
|
||||
uint c __attribute__((unused)))
|
||||
{
|
||||
return 4;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
my_vsnprintf_utf32(char *dst, size_t n, const char* fmt, va_list ap)
|
||||
{
|
||||
|
@ -2484,6 +2468,9 @@ my_scan_utf32(CHARSET_INFO *cs,
|
|||
str+= res;
|
||||
}
|
||||
return (size_t) (str - str0);
|
||||
case MY_SEQ_NONSPACES:
|
||||
DBUG_ASSERT(0); /* Not implemented */
|
||||
/* pass through */
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
@ -2525,7 +2512,6 @@ static MY_COLLATION_HANDLER my_collation_utf32_bin_handler =
|
|||
MY_CHARSET_HANDLER my_charset_utf32_handler=
|
||||
{
|
||||
NULL, /* init */
|
||||
my_mbcharlen_utf32,
|
||||
my_numchars_utf32,
|
||||
my_charpos_utf32,
|
||||
my_well_formed_len_utf32,
|
||||
|
@ -2862,13 +2848,6 @@ my_fill_ucs2(CHARSET_INFO *cs __attribute__((unused)),
|
|||
}
|
||||
|
||||
|
||||
static uint my_mbcharlen_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
|
||||
uint c __attribute__((unused)))
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
|
||||
|
||||
static
|
||||
size_t my_numchars_ucs2(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const char *b, const char *e)
|
||||
|
@ -3003,7 +2982,6 @@ static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
|
|||
MY_CHARSET_HANDLER my_charset_ucs2_handler=
|
||||
{
|
||||
NULL, /* init */
|
||||
my_mbcharlen_ucs2, /* mbcharlen */
|
||||
my_numchars_ucs2,
|
||||
my_charpos_ucs2,
|
||||
my_well_formed_len_ucs2,
|
||||
|
|
|
@ -220,12 +220,6 @@ static const uchar sort_order_ujis[]=
|
|||
#include "strcoll.ic"
|
||||
|
||||
|
||||
static uint mbcharlen_ujis(CHARSET_INFO *cs __attribute__((unused)),uint c)
|
||||
{
|
||||
return (isujis(c)? 2: isujis_ss2(c)? 2: isujis_ss3(c)? 3: 1);
|
||||
}
|
||||
|
||||
|
||||
static
|
||||
size_t my_numcells_eucjp(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const char *str, const char *str_end)
|
||||
|
@ -67255,7 +67249,6 @@ static MY_COLLATION_HANDLER my_collation_ujis_bin_handler =
|
|||
static MY_CHARSET_HANDLER my_charset_handler=
|
||||
{
|
||||
NULL, /* init */
|
||||
mbcharlen_ujis,
|
||||
my_numchars_mb,
|
||||
my_charpos_mb,
|
||||
my_well_formed_len_ujis,
|
||||
|
|
|
@ -5426,21 +5426,6 @@ my_weight_mb3_utf8_general_mysql500_ci(uchar b0, uchar b1, uchar b2)
|
|||
#include "strcoll.ic"
|
||||
|
||||
|
||||
static uint my_mbcharlen_utf8(CHARSET_INFO *cs __attribute__((unused)),
|
||||
uint c)
|
||||
{
|
||||
if (c < 0x80)
|
||||
return 1;
|
||||
else if (c < 0xc2)
|
||||
return 0; /* Illegal mb head */
|
||||
else if (c < 0xe0)
|
||||
return 2;
|
||||
else if (c < 0xf0)
|
||||
return 3;
|
||||
return 0; /* Illegal mb head */;
|
||||
}
|
||||
|
||||
|
||||
static MY_COLLATION_HANDLER my_collation_utf8_general_ci_handler =
|
||||
{
|
||||
NULL, /* init */
|
||||
|
@ -5491,7 +5476,6 @@ static MY_COLLATION_HANDLER my_collation_utf8_bin_handler =
|
|||
MY_CHARSET_HANDLER my_charset_utf8_handler=
|
||||
{
|
||||
NULL, /* init */
|
||||
my_mbcharlen_utf8,
|
||||
my_numchars_mb,
|
||||
my_charpos_mb,
|
||||
my_well_formed_len_utf8,
|
||||
|
@ -7045,7 +7029,6 @@ static MY_COLLATION_HANDLER my_collation_filename_handler =
|
|||
static MY_CHARSET_HANDLER my_charset_filename_handler=
|
||||
{
|
||||
NULL, /* init */
|
||||
my_mbcharlen_utf8,
|
||||
my_numchars_mb,
|
||||
my_charpos_mb,
|
||||
my_well_formed_len_mb,
|
||||
|
@ -7111,57 +7094,6 @@ struct charset_info_st my_charset_filename=
|
|||
};
|
||||
|
||||
|
||||
#ifdef MY_TEST_UTF8
|
||||
#include <stdio.h>
|
||||
|
||||
static void test_mb(CHARSET_INFO *cs, uchar *s)
|
||||
{
|
||||
while(*s)
|
||||
{
|
||||
if (my_ismbhead_utf8(cs,*s))
|
||||
{
|
||||
uint len=my_mbcharlen_utf8(cs,*s);
|
||||
while(len--)
|
||||
{
|
||||
printf("%c",*s);
|
||||
s++;
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("%c\n",*s);
|
||||
s++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
char str[1024]=" utf8 test проба ПЕРА по-РУССКИ";
|
||||
CHARSET_INFO *cs;
|
||||
|
||||
test_mb(cs,(uchar*)str);
|
||||
|
||||
printf("orig :'%s'\n",str);
|
||||
|
||||
my_caseup_utf8(cs,str,15);
|
||||
printf("caseup :'%s'\n",str);
|
||||
|
||||
my_caseup_str_utf8(cs,str);
|
||||
printf("caseup_str:'%s'\n",str);
|
||||
|
||||
my_casedn_utf8(cs,str,15);
|
||||
printf("casedn :'%s'\n",str);
|
||||
|
||||
my_casedn_str_utf8(cs,str);
|
||||
printf("casedn_str:'%s'\n",str);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* HAVE_CHARSET_UTF8 */
|
||||
|
||||
|
||||
|
@ -7755,23 +7687,6 @@ size_t my_well_formed_len_utf8mb4(CHARSET_INFO *cs,
|
|||
#include "strcoll.ic"
|
||||
|
||||
|
||||
static uint
|
||||
my_mbcharlen_utf8mb4(CHARSET_INFO *cs __attribute__((unused)), uint c)
|
||||
{
|
||||
if (c < 0x80)
|
||||
return 1;
|
||||
if (c < 0xc2)
|
||||
return 0; /* Illegal mb head */
|
||||
if (c < 0xe0)
|
||||
return 2;
|
||||
if (c < 0xf0)
|
||||
return 3;
|
||||
if (c < 0xf8)
|
||||
return 4;
|
||||
return 0; /* Illegal mb head */;
|
||||
}
|
||||
|
||||
|
||||
static MY_COLLATION_HANDLER my_collation_utf8mb4_general_ci_handler=
|
||||
{
|
||||
NULL, /* init */
|
||||
|
@ -7807,7 +7722,6 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_bin_handler =
|
|||
MY_CHARSET_HANDLER my_charset_utf8mb4_handler=
|
||||
{
|
||||
NULL, /* init */
|
||||
my_mbcharlen_utf8mb4,
|
||||
my_numchars_mb,
|
||||
my_charpos_mb,
|
||||
my_well_formed_len_utf8mb4,
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
const char *acc_end= (ACC) + (LEN); \
|
||||
for (ptr_str= (STR) ; ptr_str < (END) ; ptr_str+= mbl) \
|
||||
{ \
|
||||
mbl= my_mbcharlen((CS), *(uchar*)ptr_str); \
|
||||
mbl= my_charlen_fix((CS), ptr_str, (END)); \
|
||||
if (mbl < 2) \
|
||||
{ \
|
||||
DBUG_ASSERT(mbl == 1); \
|
||||
|
@ -63,10 +63,9 @@ end: \
|
|||
char *my_strchr(CHARSET_INFO *cs, const char *str, const char *end,
|
||||
pchar c)
|
||||
{
|
||||
uint mbl;
|
||||
while (str < end)
|
||||
{
|
||||
mbl= my_mbcharlen(cs, *(uchar *)str);
|
||||
uint mbl= my_ismbchar(cs, str, end);
|
||||
if (mbl < 2)
|
||||
{
|
||||
if (*str == c)
|
||||
|
|
|
@ -168,8 +168,7 @@ static char *backtick_string(CHARSET_INFO *cs, char *to, const char *end,
|
|||
for ( ; par < par_end; par+= char_len)
|
||||
{
|
||||
uchar c= *(uchar *) par;
|
||||
if (!(char_len= my_mbcharlen(cs, c)))
|
||||
char_len= 1;
|
||||
char_len= my_charlen_fix(cs, par, par_end);
|
||||
if (char_len == 1 && c == (uchar) quote_char )
|
||||
{
|
||||
if (start + 1 >= end)
|
||||
|
|
Loading…
Reference in a new issue