mirror of
https://github.com/MariaDB/server.git
synced 2025-01-21 06:22:28 +01:00
Merge abarkov@bk-internal.mysql.com:/home/bk/mysql-5.1-new
into mysql.com:/usr/home/bar/mysql-5.1.new.ctype
This commit is contained in:
commit
5312aafc7b
20 changed files with 1757 additions and 1 deletions
|
@ -24,7 +24,7 @@ pkginclude_HEADERS = my_dbug.h m_string.h my_sys.h my_list.h my_xml.h \
|
|||
sslopt-vars.h sslopt-case.h sql_common.h keycache.h \
|
||||
mysql_time.h plugin.h $(BUILT_SOURCES)
|
||||
noinst_HEADERS = config-win.h config-os2.h config-netware.h \
|
||||
heap.h my_bitmap.h\
|
||||
heap.h my_bitmap.h my_uctype.h \
|
||||
myisam.h myisampack.h myisammrg.h ft_global.h\
|
||||
mysys_err.h my_base.h help_start.h help_end.h \
|
||||
my_nosys.h my_alarm.h queues.h rijndael.h sha1.h \
|
||||
|
|
|
@ -47,6 +47,15 @@ typedef struct unicase_info_st
|
|||
extern MY_UNICASE_INFO *my_unicase_default[256];
|
||||
extern MY_UNICASE_INFO *my_unicase_turkish[256];
|
||||
|
||||
typedef struct uni_ctype_st
|
||||
{
|
||||
unsigned char pctype;
|
||||
unsigned char *ctype;
|
||||
} MY_UNI_CTYPE;
|
||||
|
||||
extern MY_UNI_CTYPE my_uni_ctype[256];
|
||||
|
||||
|
||||
#define MY_CS_ILSEQ 0
|
||||
#define MY_CS_ILUNI 0
|
||||
#define MY_CS_TOOSMALL -1
|
||||
|
@ -165,6 +174,10 @@ typedef struct my_charset_handler_st
|
|||
int (*wc_mb)(struct charset_info_st *cs,my_wc_t wc,
|
||||
unsigned char *s,unsigned char *e);
|
||||
|
||||
/* CTYPE scanner */
|
||||
int (*ctype)(struct charset_info_st *cs, int *ctype,
|
||||
const unsigned char *s, const unsigned char *e);
|
||||
|
||||
/* Functions for case and sort convertion */
|
||||
void (*caseup_str)(struct charset_info_st *, char *);
|
||||
void (*casedn_str)(struct charset_info_st *, char *);
|
||||
|
@ -308,6 +321,9 @@ extern int my_strcasecmp_8bit(CHARSET_INFO * cs, const char *, const char *);
|
|||
int my_mb_wc_8bit(CHARSET_INFO *cs,my_wc_t *wc, const uchar *s,const uchar *e);
|
||||
int my_wc_mb_8bit(CHARSET_INFO *cs,my_wc_t wc, uchar *s, uchar *e);
|
||||
|
||||
int my_mb_ctype_8bit(CHARSET_INFO *,int *, const uchar *,const uchar *);
|
||||
int my_mb_ctype_mb(CHARSET_INFO *,int *, const uchar *,const uchar *);
|
||||
|
||||
ulong my_scan_8bit(CHARSET_INFO *cs, const char *b, const char *e, int sq);
|
||||
|
||||
int my_snprintf_8bit(struct charset_info_st *, char *to, uint n,
|
||||
|
|
1464
include/my_uctype.h
Normal file
1464
include/my_uctype.h
Normal file
File diff suppressed because it is too large
Load diff
|
@ -40,6 +40,7 @@ endif
|
|||
|
||||
libmystrings_a_SOURCES = $(ASRCS) $(CSRCS)
|
||||
noinst_PROGRAMS = conf_to_src
|
||||
CLEANFILES = str_test uctypedump test_decimal
|
||||
# Default charset definitions
|
||||
EXTRA_DIST = ctype-big5.c ctype-cp932.c ctype-czech.c ctype-eucjpms.c ctype-euc_kr.c ctype-win1250ch.c \
|
||||
ctype-gb2312.c ctype-gbk.c ctype-sjis.c ctype-utf8.c \
|
||||
|
@ -77,6 +78,9 @@ FLAGS=$(DEFS) $(INCLUDES) $(CPPFLAGS) $(CFLAGS) @NOINST_LDFLAGS@
|
|||
str_test: str_test.c $(pkglib_LIBRARIES)
|
||||
$(LINK) $(FLAGS) -DMAIN $(INCLUDES) $(srcdir)/str_test.c $(LDADD) $(pkglib_LIBRARIES)
|
||||
|
||||
uctypedump: uctypedump.c
|
||||
$(LINK) $(INCLUDES) $(srcdir)/uctypedump.c
|
||||
|
||||
test_decimal$(EXEEXT): decimal.c $(pkglib_LIBRARIES)
|
||||
$(CP) $(srcdir)/decimal.c ./test_decimal.c
|
||||
$(LINK) $(FLAGS) -DMAIN ./test_decimal.c $(LDADD) $(pkglib_LIBRARIES)
|
||||
|
|
|
@ -6356,6 +6356,7 @@ static MY_CHARSET_HANDLER my_charset_big5_handler=
|
|||
my_numcells_8bit,
|
||||
my_mb_wc_big5, /* mb_wc */
|
||||
my_wc_mb_big5, /* wc_mb */
|
||||
my_mb_ctype_mb,
|
||||
my_caseup_str_mb,
|
||||
my_casedn_str_mb,
|
||||
my_caseup_mb,
|
||||
|
|
|
@ -503,6 +503,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
|||
my_numcells_8bit,
|
||||
my_mb_wc_bin,
|
||||
my_wc_mb_bin,
|
||||
my_mb_ctype_8bit,
|
||||
my_case_str_bin,
|
||||
my_case_str_bin,
|
||||
my_case_bin,
|
||||
|
|
|
@ -5478,6 +5478,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
|||
my_numcells_cp932,
|
||||
my_mb_wc_cp932, /* mb_wc */
|
||||
my_wc_mb_cp932, /* wc_mb */
|
||||
my_mb_ctype_mb,
|
||||
my_caseup_str_8bit,
|
||||
my_casedn_str_8bit,
|
||||
my_caseup_8bit,
|
||||
|
|
|
@ -8697,6 +8697,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
|||
my_numcells_8bit,
|
||||
my_mb_wc_euc_kr, /* mb_wc */
|
||||
my_wc_mb_euc_kr, /* wc_mb */
|
||||
my_mb_ctype_mb,
|
||||
my_caseup_str_mb,
|
||||
my_casedn_str_mb,
|
||||
my_caseup_mb,
|
||||
|
|
|
@ -8663,6 +8663,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
|||
my_numcells_eucjp,
|
||||
my_mb_wc_euc_jp, /* mb_wc */
|
||||
my_wc_mb_euc_jp, /* wc_mb */
|
||||
my_mb_ctype_mb,
|
||||
my_caseup_str_mb,
|
||||
my_casedn_str_mb,
|
||||
my_caseup_mb,
|
||||
|
|
|
@ -5748,6 +5748,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
|||
my_numcells_8bit,
|
||||
my_mb_wc_gb2312, /* mb_wc */
|
||||
my_wc_mb_gb2312, /* wc_mb */
|
||||
my_mb_ctype_mb,
|
||||
my_caseup_str_mb,
|
||||
my_casedn_str_mb,
|
||||
my_caseup_mb,
|
||||
|
|
|
@ -10001,6 +10001,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
|||
my_numcells_8bit,
|
||||
my_mb_wc_gbk,
|
||||
my_wc_mb_gbk,
|
||||
my_mb_ctype_mb,
|
||||
my_caseup_str_mb,
|
||||
my_casedn_str_mb,
|
||||
my_caseup_mb,
|
||||
|
|
|
@ -397,6 +397,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
|||
my_numcells_8bit,
|
||||
my_mb_wc_latin1,
|
||||
my_wc_mb_latin1,
|
||||
my_mb_ctype_8bit,
|
||||
my_caseup_str_8bit,
|
||||
my_casedn_str_8bit,
|
||||
my_caseup_8bit,
|
||||
|
|
|
@ -914,6 +914,22 @@ uint my_numcells_mb(CHARSET_INFO *cs, const char *b, const char *e)
|
|||
}
|
||||
|
||||
|
||||
int my_mb_ctype_mb(CHARSET_INFO *cs, int *ctype,
|
||||
const unsigned char *s, const unsigned char *e)
|
||||
{
|
||||
my_wc_t wc;
|
||||
int res= cs->cset->mb_wc(cs, &wc, s, e);
|
||||
if (res <= 0)
|
||||
*ctype= 0;
|
||||
else
|
||||
*ctype= my_uni_ctype[wc>>8].ctype ?
|
||||
my_uni_ctype[wc>>8].ctype[wc&0xFF] :
|
||||
my_uni_ctype[wc>>8].pctype;
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
||||
MY_COLLATION_HANDLER my_collation_mb_bin_handler =
|
||||
{
|
||||
NULL, /* init */
|
||||
|
|
|
@ -1354,6 +1354,19 @@ longlong my_strtoll10_8bit(CHARSET_INFO *cs __attribute__((unused)),
|
|||
}
|
||||
|
||||
|
||||
int my_mb_ctype_8bit(CHARSET_INFO *cs, int *ctype,
|
||||
const unsigned char *s, const unsigned char *e)
|
||||
{
|
||||
if (s >= e)
|
||||
{
|
||||
*ctype= 0;
|
||||
return MY_CS_TOOFEW(0);
|
||||
}
|
||||
*ctype= cs->ctype[*s];
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Check if a constant can be propagated
|
||||
|
||||
|
@ -1420,6 +1433,7 @@ MY_CHARSET_HANDLER my_charset_8bit_handler=
|
|||
my_numcells_8bit,
|
||||
my_mb_wc_8bit,
|
||||
my_wc_mb_8bit,
|
||||
my_mb_ctype_8bit,
|
||||
my_caseup_str_8bit,
|
||||
my_casedn_str_8bit,
|
||||
my_caseup_8bit,
|
||||
|
|
|
@ -4649,6 +4649,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
|||
my_numcells_sjis,
|
||||
my_mb_wc_sjis, /* mb_wc */
|
||||
my_wc_mb_sjis, /* wc_mb */
|
||||
my_mb_ctype_mb,
|
||||
my_caseup_str_8bit,
|
||||
my_casedn_str_8bit,
|
||||
my_caseup_8bit,
|
||||
|
|
|
@ -877,6 +877,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
|||
my_numcells_8bit,
|
||||
my_mb_wc_tis620, /* mb_wc */
|
||||
my_wc_mb_tis620, /* wc_mb */
|
||||
my_mb_ctype_8bit,
|
||||
my_caseup_str_8bit,
|
||||
my_casedn_str_8bit,
|
||||
my_caseup_8bit,
|
||||
|
|
|
@ -1615,6 +1615,7 @@ MY_CHARSET_HANDLER my_charset_ucs2_handler=
|
|||
my_numcells_mb,
|
||||
my_ucs2_uni, /* mb_wc */
|
||||
my_uni_ucs2, /* wc_mb */
|
||||
my_mb_ctype_mb,
|
||||
my_caseup_str_ucs2,
|
||||
my_casedn_str_ucs2,
|
||||
my_caseup_ucs2,
|
||||
|
|
|
@ -8531,6 +8531,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
|||
my_numcells_eucjp,
|
||||
my_mb_wc_euc_jp, /* mb_wc */
|
||||
my_wc_mb_euc_jp, /* wc_mb */
|
||||
my_mb_ctype_mb,
|
||||
my_caseup_str_mb,
|
||||
my_casedn_str_mb,
|
||||
my_caseup_mb,
|
||||
|
|
|
@ -41,6 +41,8 @@
|
|||
|
||||
#ifdef HAVE_UNIDATA
|
||||
|
||||
#include "my_uctype.h"
|
||||
|
||||
static MY_UNICASE_INFO plane00[]={
|
||||
{0x0000,0x0000,0x0000}, {0x0001,0x0001,0x0001},
|
||||
{0x0002,0x0002,0x0002}, {0x0003,0x0003,0x0003},
|
||||
|
@ -2534,6 +2536,7 @@ MY_CHARSET_HANDLER my_charset_utf8_handler=
|
|||
my_numcells_mb,
|
||||
my_utf8_uni,
|
||||
my_uni_utf8,
|
||||
my_mb_ctype_mb,
|
||||
my_caseup_str_utf8,
|
||||
my_casedn_str_utf8,
|
||||
my_caseup_utf8,
|
||||
|
@ -4027,6 +4030,7 @@ static MY_CHARSET_HANDLER my_charset_filename_handler=
|
|||
my_numcells_mb,
|
||||
my_mb_wc_filename,
|
||||
my_wc_mb_filename,
|
||||
my_mb_ctype_mb,
|
||||
my_caseup_str_utf8,
|
||||
my_casedn_str_utf8,
|
||||
my_caseup_utf8,
|
||||
|
|
226
strings/uctypedump.c
Normal file
226
strings/uctypedump.c
Normal file
|
@ -0,0 +1,226 @@
|
|||
/*
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
*/
|
||||
#include <my_global.h>
|
||||
#include <m_string.h>
|
||||
#include <m_ctype.h>
|
||||
#include "m_ctype.h"
|
||||
|
||||
|
||||
typedef struct my_ctype_name_st
|
||||
{
|
||||
const char *name;
|
||||
int val;
|
||||
} MY_CTYPE_NAME_ST;
|
||||
|
||||
|
||||
static MY_CTYPE_NAME_ST my_ctype_name[]=
|
||||
{
|
||||
{"Lu", _MY_U}, /* Letter, Uppercase */
|
||||
{"Ll", _MY_L}, /* Letter, Lowercase */
|
||||
{"Lt", _MY_U}, /* Letter, Titlecase */
|
||||
{"Lm", _MY_L}, /* Letter, Modifier */
|
||||
{"Lo", _MY_L}, /* Letter, other */
|
||||
|
||||
{"Nd", _MY_NMR}, /* Number, Decimal Digit */
|
||||
{"Nl", _MY_NMR|_MY_U|_MY_L}, /* Number, Letter */
|
||||
{"No", _MY_NMR|_MY_PNT}, /* Number, Other */
|
||||
|
||||
{"Mn", _MY_L|_MY_PNT}, /* Mark, Nonspacing */
|
||||
{"Mc", _MY_L|_MY_PNT}, /* Mark, Spacing Combining */
|
||||
{"Me", _MY_L|_MY_PNT}, /* Mark, Enclosing */
|
||||
|
||||
{"Pc", _MY_PNT}, /* Punctuation, Connector */
|
||||
{"Pd", _MY_PNT}, /* Punctuation, Dash */
|
||||
{"Ps", _MY_PNT}, /* Punctuation, Open */
|
||||
{"Pe", _MY_PNT}, /* Punctuation, Close */
|
||||
{"Pi", _MY_PNT}, /* Punctuation, Initial quote */
|
||||
{"Pf", _MY_PNT}, /* Punctuation, Final quote */
|
||||
{"Po", _MY_PNT}, /* Punctuation, Other */
|
||||
|
||||
{"Sm", _MY_PNT}, /* Symbol, Math */
|
||||
{"Sc", _MY_PNT}, /* Symbol, Currency */
|
||||
{"Sk", _MY_PNT}, /* Symbol, Modifier */
|
||||
{"So", _MY_PNT}, /* Symbol, Other */
|
||||
|
||||
{"Zs", _MY_SPC}, /* Separator, Space */
|
||||
{"Zl", _MY_SPC}, /* Separator, Line */
|
||||
{"Zp", _MY_SPC}, /* Separator, Paragraph */
|
||||
|
||||
{"Cc", _MY_CTR}, /* Other, Control */
|
||||
{"Cf", _MY_CTR}, /* Other, Format */
|
||||
{"Cs", _MY_CTR}, /* Other, Surrogate */
|
||||
{"Co", _MY_CTR}, /* Other, Private Use */
|
||||
{"Cn", _MY_CTR}, /* Other, Not Assigned */
|
||||
{NULL, 0}
|
||||
};
|
||||
|
||||
|
||||
static int
|
||||
ctypestr2num(const char *tok)
|
||||
{
|
||||
MY_CTYPE_NAME_ST *p;
|
||||
for (p= my_ctype_name; p->name; p++)
|
||||
{
|
||||
if (!strncasecmp(p->name, tok, 2))
|
||||
return p->val;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int main(int ac, char ** av)
|
||||
{
|
||||
char str[1024];
|
||||
unsigned char ctypea[64*1024];
|
||||
size_t i;
|
||||
size_t plane;
|
||||
MY_UNI_CTYPE uctype[256];
|
||||
FILE *f= stdin;
|
||||
|
||||
if (ac > 1 && av[1] && !(f= fopen(av[1],"r")))
|
||||
{
|
||||
fprintf(stderr, "Can't open file %s\n", av[1]);
|
||||
exit(1);
|
||||
}
|
||||
bzero(&ctypea,sizeof(ctypea));
|
||||
bzero(&uctype, sizeof(uctype));
|
||||
|
||||
printf("/*\n");
|
||||
printf(" Unicode ctype data\n");
|
||||
printf(" Generated from %s\n", av[1] ? av[1] : "stdin");
|
||||
printf("*/\n");
|
||||
|
||||
while(fgets(str, sizeof(str), f))
|
||||
{
|
||||
size_t n= 0, code= 0;
|
||||
char *s,*e;
|
||||
int ctype= 0;
|
||||
|
||||
for(s= str; s; )
|
||||
{
|
||||
char *end;
|
||||
char tok[1024]="";
|
||||
e=strchr(s,';');
|
||||
if(e)
|
||||
{
|
||||
strncpy(tok,s,(unsigned int)(e-s));
|
||||
tok[e-s]=0;
|
||||
}
|
||||
else
|
||||
{
|
||||
strcpy(tok,s);
|
||||
}
|
||||
|
||||
end=tok+strlen(tok);
|
||||
|
||||
switch(n)
|
||||
{
|
||||
case 0: code= strtol(tok,&end,16);break;
|
||||
case 2: ctype= ctypestr2num(tok);break;
|
||||
}
|
||||
|
||||
n++;
|
||||
if(e) s=e+1;
|
||||
else s=e;
|
||||
}
|
||||
if(code<=0xFFFF)
|
||||
{
|
||||
ctypea[code]= ctype;
|
||||
}
|
||||
}
|
||||
|
||||
/* Fill digits */
|
||||
for (i= '0'; i <= '9'; i++)
|
||||
ctypea[i]= _MY_NMR;
|
||||
|
||||
for (i= 'a'; i <= 'z'; i++)
|
||||
ctypea[i]|= _MY_X;
|
||||
for (i= 'A'; i <= 'Z'; i++)
|
||||
ctypea[i]|= _MY_X;
|
||||
|
||||
|
||||
/* Fill ideographs */
|
||||
|
||||
/* CJK Ideographs Extension A (U+3400 - U+4DB5) */
|
||||
for(i=0x3400;i<=0x4DB5;i++)
|
||||
{
|
||||
ctypea[i]= _MY_L | _MY_U;
|
||||
}
|
||||
|
||||
/* CJK Ideographs (U+4E00 - U+9FA5) */
|
||||
for(i=0x4E00;i<=0x9FA5;i++){
|
||||
ctypea[i]= _MY_L | _MY_U;
|
||||
}
|
||||
|
||||
/* Hangul Syllables (U+AC00 - U+D7A3) */
|
||||
for(i=0xAC00;i<=0xD7A3;i++)
|
||||
{
|
||||
ctypea[i]= _MY_L | _MY_U;
|
||||
}
|
||||
|
||||
|
||||
/* Calc plane parameters */
|
||||
for(plane=0;plane<256;plane++)
|
||||
{
|
||||
size_t character;
|
||||
uctype[plane].ctype= ctypea+plane*256;
|
||||
|
||||
uctype[plane].pctype= uctype[plane].ctype[0];
|
||||
for(character=1;character<256;character++)
|
||||
{
|
||||
if (uctype[plane].ctype[character] != uctype[plane].pctype)
|
||||
{
|
||||
uctype[plane].pctype= 0; /* Mixed plane */
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (character==256) /* All the same, no needs to dump whole plane */
|
||||
uctype[plane].ctype= NULL;
|
||||
}
|
||||
|
||||
/* Dump mixed planes */
|
||||
for(plane=0;plane<256;plane++)
|
||||
{
|
||||
if(uctype[plane].ctype)
|
||||
{
|
||||
int charnum=0;
|
||||
int num=0;
|
||||
|
||||
printf("static unsigned char uctype_page%02X[256]=\n{\n",plane);
|
||||
|
||||
for(charnum=0;charnum<256;charnum++)
|
||||
{
|
||||
int cod;
|
||||
|
||||
cod=(plane<<8)+charnum;
|
||||
printf(" %2d%s",uctype[plane].ctype[charnum],charnum<255?",":"");
|
||||
|
||||
num++;
|
||||
if(num==16)
|
||||
{
|
||||
printf("\n");
|
||||
num=0;
|
||||
}
|
||||
}
|
||||
printf("};\n\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Dump plane index */
|
||||
printf("MY_UNI_CTYPE my_uni_ctype[256]={\n");
|
||||
for(plane=0;plane<256;plane++)
|
||||
{
|
||||
char plane_name[128]="NULL";
|
||||
if(uctype[plane].ctype){
|
||||
sprintf(plane_name,"uctype_page%02X",plane);
|
||||
}
|
||||
printf("\t{%d,%s}%s\n",uctype[plane].pctype,plane_name,plane<255?",":"");
|
||||
}
|
||||
printf("};\n");
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in a new issue