Merge abarkov@bk-internal.mysql.com:/home/bk/mysql-5.1-new

into  mysql.com:/usr/home/bar/mysql-5.1.new.ctype
This commit is contained in:
unknown 2006-02-02 10:09:35 +04:00
commit 5312aafc7b
20 changed files with 1757 additions and 1 deletions

View file

@ -24,7 +24,7 @@ pkginclude_HEADERS = my_dbug.h m_string.h my_sys.h my_list.h my_xml.h \
sslopt-vars.h sslopt-case.h sql_common.h keycache.h \
mysql_time.h plugin.h $(BUILT_SOURCES)
noinst_HEADERS = config-win.h config-os2.h config-netware.h \
heap.h my_bitmap.h\
heap.h my_bitmap.h my_uctype.h \
myisam.h myisampack.h myisammrg.h ft_global.h\
mysys_err.h my_base.h help_start.h help_end.h \
my_nosys.h my_alarm.h queues.h rijndael.h sha1.h \

View file

@ -47,6 +47,15 @@ typedef struct unicase_info_st
extern MY_UNICASE_INFO *my_unicase_default[256];
extern MY_UNICASE_INFO *my_unicase_turkish[256];
typedef struct uni_ctype_st
{
unsigned char pctype;
unsigned char *ctype;
} MY_UNI_CTYPE;
extern MY_UNI_CTYPE my_uni_ctype[256];
#define MY_CS_ILSEQ 0
#define MY_CS_ILUNI 0
#define MY_CS_TOOSMALL -1
@ -165,6 +174,10 @@ typedef struct my_charset_handler_st
int (*wc_mb)(struct charset_info_st *cs,my_wc_t wc,
unsigned char *s,unsigned char *e);
/* CTYPE scanner */
int (*ctype)(struct charset_info_st *cs, int *ctype,
const unsigned char *s, const unsigned char *e);
/* Functions for case and sort convertion */
void (*caseup_str)(struct charset_info_st *, char *);
void (*casedn_str)(struct charset_info_st *, char *);
@ -308,6 +321,9 @@ extern int my_strcasecmp_8bit(CHARSET_INFO * cs, const char *, const char *);
int my_mb_wc_8bit(CHARSET_INFO *cs,my_wc_t *wc, const uchar *s,const uchar *e);
int my_wc_mb_8bit(CHARSET_INFO *cs,my_wc_t wc, uchar *s, uchar *e);
int my_mb_ctype_8bit(CHARSET_INFO *,int *, const uchar *,const uchar *);
int my_mb_ctype_mb(CHARSET_INFO *,int *, const uchar *,const uchar *);
ulong my_scan_8bit(CHARSET_INFO *cs, const char *b, const char *e, int sq);
int my_snprintf_8bit(struct charset_info_st *, char *to, uint n,

1464
include/my_uctype.h Normal file

File diff suppressed because it is too large Load diff

View file

@ -40,6 +40,7 @@ endif
libmystrings_a_SOURCES = $(ASRCS) $(CSRCS)
noinst_PROGRAMS = conf_to_src
CLEANFILES = str_test uctypedump test_decimal
# Default charset definitions
EXTRA_DIST = ctype-big5.c ctype-cp932.c ctype-czech.c ctype-eucjpms.c ctype-euc_kr.c ctype-win1250ch.c \
ctype-gb2312.c ctype-gbk.c ctype-sjis.c ctype-utf8.c \
@ -77,6 +78,9 @@ FLAGS=$(DEFS) $(INCLUDES) $(CPPFLAGS) $(CFLAGS) @NOINST_LDFLAGS@
str_test: str_test.c $(pkglib_LIBRARIES)
$(LINK) $(FLAGS) -DMAIN $(INCLUDES) $(srcdir)/str_test.c $(LDADD) $(pkglib_LIBRARIES)
uctypedump: uctypedump.c
$(LINK) $(INCLUDES) $(srcdir)/uctypedump.c
test_decimal$(EXEEXT): decimal.c $(pkglib_LIBRARIES)
$(CP) $(srcdir)/decimal.c ./test_decimal.c
$(LINK) $(FLAGS) -DMAIN ./test_decimal.c $(LDADD) $(pkglib_LIBRARIES)

View file

@ -6356,6 +6356,7 @@ static MY_CHARSET_HANDLER my_charset_big5_handler=
my_numcells_8bit,
my_mb_wc_big5, /* mb_wc */
my_wc_mb_big5, /* wc_mb */
my_mb_ctype_mb,
my_caseup_str_mb,
my_casedn_str_mb,
my_caseup_mb,

View file

@ -503,6 +503,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_numcells_8bit,
my_mb_wc_bin,
my_wc_mb_bin,
my_mb_ctype_8bit,
my_case_str_bin,
my_case_str_bin,
my_case_bin,

View file

@ -5478,6 +5478,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_numcells_cp932,
my_mb_wc_cp932, /* mb_wc */
my_wc_mb_cp932, /* wc_mb */
my_mb_ctype_mb,
my_caseup_str_8bit,
my_casedn_str_8bit,
my_caseup_8bit,

View file

@ -8697,6 +8697,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_numcells_8bit,
my_mb_wc_euc_kr, /* mb_wc */
my_wc_mb_euc_kr, /* wc_mb */
my_mb_ctype_mb,
my_caseup_str_mb,
my_casedn_str_mb,
my_caseup_mb,

View file

@ -8663,6 +8663,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_numcells_eucjp,
my_mb_wc_euc_jp, /* mb_wc */
my_wc_mb_euc_jp, /* wc_mb */
my_mb_ctype_mb,
my_caseup_str_mb,
my_casedn_str_mb,
my_caseup_mb,

View file

@ -5748,6 +5748,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_numcells_8bit,
my_mb_wc_gb2312, /* mb_wc */
my_wc_mb_gb2312, /* wc_mb */
my_mb_ctype_mb,
my_caseup_str_mb,
my_casedn_str_mb,
my_caseup_mb,

View file

@ -10001,6 +10001,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_numcells_8bit,
my_mb_wc_gbk,
my_wc_mb_gbk,
my_mb_ctype_mb,
my_caseup_str_mb,
my_casedn_str_mb,
my_caseup_mb,

View file

@ -397,6 +397,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_numcells_8bit,
my_mb_wc_latin1,
my_wc_mb_latin1,
my_mb_ctype_8bit,
my_caseup_str_8bit,
my_casedn_str_8bit,
my_caseup_8bit,

View file

@ -914,6 +914,22 @@ uint my_numcells_mb(CHARSET_INFO *cs, const char *b, const char *e)
}
int my_mb_ctype_mb(CHARSET_INFO *cs, int *ctype,
const unsigned char *s, const unsigned char *e)
{
my_wc_t wc;
int res= cs->cset->mb_wc(cs, &wc, s, e);
if (res <= 0)
*ctype= 0;
else
*ctype= my_uni_ctype[wc>>8].ctype ?
my_uni_ctype[wc>>8].ctype[wc&0xFF] :
my_uni_ctype[wc>>8].pctype;
return res;
}
MY_COLLATION_HANDLER my_collation_mb_bin_handler =
{
NULL, /* init */

View file

@ -1354,6 +1354,19 @@ longlong my_strtoll10_8bit(CHARSET_INFO *cs __attribute__((unused)),
}
int my_mb_ctype_8bit(CHARSET_INFO *cs, int *ctype,
const unsigned char *s, const unsigned char *e)
{
if (s >= e)
{
*ctype= 0;
return MY_CS_TOOFEW(0);
}
*ctype= cs->ctype[*s];
return 1;
}
/*
Check if a constant can be propagated
@ -1420,6 +1433,7 @@ MY_CHARSET_HANDLER my_charset_8bit_handler=
my_numcells_8bit,
my_mb_wc_8bit,
my_wc_mb_8bit,
my_mb_ctype_8bit,
my_caseup_str_8bit,
my_casedn_str_8bit,
my_caseup_8bit,

View file

@ -4649,6 +4649,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_numcells_sjis,
my_mb_wc_sjis, /* mb_wc */
my_wc_mb_sjis, /* wc_mb */
my_mb_ctype_mb,
my_caseup_str_8bit,
my_casedn_str_8bit,
my_caseup_8bit,

View file

@ -877,6 +877,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_numcells_8bit,
my_mb_wc_tis620, /* mb_wc */
my_wc_mb_tis620, /* wc_mb */
my_mb_ctype_8bit,
my_caseup_str_8bit,
my_casedn_str_8bit,
my_caseup_8bit,

View file

@ -1615,6 +1615,7 @@ MY_CHARSET_HANDLER my_charset_ucs2_handler=
my_numcells_mb,
my_ucs2_uni, /* mb_wc */
my_uni_ucs2, /* wc_mb */
my_mb_ctype_mb,
my_caseup_str_ucs2,
my_casedn_str_ucs2,
my_caseup_ucs2,

View file

@ -8531,6 +8531,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_numcells_eucjp,
my_mb_wc_euc_jp, /* mb_wc */
my_wc_mb_euc_jp, /* wc_mb */
my_mb_ctype_mb,
my_caseup_str_mb,
my_casedn_str_mb,
my_caseup_mb,

View file

@ -41,6 +41,8 @@
#ifdef HAVE_UNIDATA
#include "my_uctype.h"
static MY_UNICASE_INFO plane00[]={
{0x0000,0x0000,0x0000}, {0x0001,0x0001,0x0001},
{0x0002,0x0002,0x0002}, {0x0003,0x0003,0x0003},
@ -2534,6 +2536,7 @@ MY_CHARSET_HANDLER my_charset_utf8_handler=
my_numcells_mb,
my_utf8_uni,
my_uni_utf8,
my_mb_ctype_mb,
my_caseup_str_utf8,
my_casedn_str_utf8,
my_caseup_utf8,
@ -4027,6 +4030,7 @@ static MY_CHARSET_HANDLER my_charset_filename_handler=
my_numcells_mb,
my_mb_wc_filename,
my_wc_mb_filename,
my_mb_ctype_mb,
my_caseup_str_utf8,
my_casedn_str_utf8,
my_caseup_utf8,

226
strings/uctypedump.c Normal file
View file

@ -0,0 +1,226 @@
/*
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
*/
#include <my_global.h>
#include <m_string.h>
#include <m_ctype.h>
#include "m_ctype.h"
typedef struct my_ctype_name_st
{
const char *name;
int val;
} MY_CTYPE_NAME_ST;
static MY_CTYPE_NAME_ST my_ctype_name[]=
{
{"Lu", _MY_U}, /* Letter, Uppercase */
{"Ll", _MY_L}, /* Letter, Lowercase */
{"Lt", _MY_U}, /* Letter, Titlecase */
{"Lm", _MY_L}, /* Letter, Modifier */
{"Lo", _MY_L}, /* Letter, other */
{"Nd", _MY_NMR}, /* Number, Decimal Digit */
{"Nl", _MY_NMR|_MY_U|_MY_L}, /* Number, Letter */
{"No", _MY_NMR|_MY_PNT}, /* Number, Other */
{"Mn", _MY_L|_MY_PNT}, /* Mark, Nonspacing */
{"Mc", _MY_L|_MY_PNT}, /* Mark, Spacing Combining */
{"Me", _MY_L|_MY_PNT}, /* Mark, Enclosing */
{"Pc", _MY_PNT}, /* Punctuation, Connector */
{"Pd", _MY_PNT}, /* Punctuation, Dash */
{"Ps", _MY_PNT}, /* Punctuation, Open */
{"Pe", _MY_PNT}, /* Punctuation, Close */
{"Pi", _MY_PNT}, /* Punctuation, Initial quote */
{"Pf", _MY_PNT}, /* Punctuation, Final quote */
{"Po", _MY_PNT}, /* Punctuation, Other */
{"Sm", _MY_PNT}, /* Symbol, Math */
{"Sc", _MY_PNT}, /* Symbol, Currency */
{"Sk", _MY_PNT}, /* Symbol, Modifier */
{"So", _MY_PNT}, /* Symbol, Other */
{"Zs", _MY_SPC}, /* Separator, Space */
{"Zl", _MY_SPC}, /* Separator, Line */
{"Zp", _MY_SPC}, /* Separator, Paragraph */
{"Cc", _MY_CTR}, /* Other, Control */
{"Cf", _MY_CTR}, /* Other, Format */
{"Cs", _MY_CTR}, /* Other, Surrogate */
{"Co", _MY_CTR}, /* Other, Private Use */
{"Cn", _MY_CTR}, /* Other, Not Assigned */
{NULL, 0}
};
static int
ctypestr2num(const char *tok)
{
MY_CTYPE_NAME_ST *p;
for (p= my_ctype_name; p->name; p++)
{
if (!strncasecmp(p->name, tok, 2))
return p->val;
}
return 0;
}
int main(int ac, char ** av)
{
char str[1024];
unsigned char ctypea[64*1024];
size_t i;
size_t plane;
MY_UNI_CTYPE uctype[256];
FILE *f= stdin;
if (ac > 1 && av[1] && !(f= fopen(av[1],"r")))
{
fprintf(stderr, "Can't open file %s\n", av[1]);
exit(1);
}
bzero(&ctypea,sizeof(ctypea));
bzero(&uctype, sizeof(uctype));
printf("/*\n");
printf(" Unicode ctype data\n");
printf(" Generated from %s\n", av[1] ? av[1] : "stdin");
printf("*/\n");
while(fgets(str, sizeof(str), f))
{
size_t n= 0, code= 0;
char *s,*e;
int ctype= 0;
for(s= str; s; )
{
char *end;
char tok[1024]="";
e=strchr(s,';');
if(e)
{
strncpy(tok,s,(unsigned int)(e-s));
tok[e-s]=0;
}
else
{
strcpy(tok,s);
}
end=tok+strlen(tok);
switch(n)
{
case 0: code= strtol(tok,&end,16);break;
case 2: ctype= ctypestr2num(tok);break;
}
n++;
if(e) s=e+1;
else s=e;
}
if(code<=0xFFFF)
{
ctypea[code]= ctype;
}
}
/* Fill digits */
for (i= '0'; i <= '9'; i++)
ctypea[i]= _MY_NMR;
for (i= 'a'; i <= 'z'; i++)
ctypea[i]|= _MY_X;
for (i= 'A'; i <= 'Z'; i++)
ctypea[i]|= _MY_X;
/* Fill ideographs */
/* CJK Ideographs Extension A (U+3400 - U+4DB5) */
for(i=0x3400;i<=0x4DB5;i++)
{
ctypea[i]= _MY_L | _MY_U;
}
/* CJK Ideographs (U+4E00 - U+9FA5) */
for(i=0x4E00;i<=0x9FA5;i++){
ctypea[i]= _MY_L | _MY_U;
}
/* Hangul Syllables (U+AC00 - U+D7A3) */
for(i=0xAC00;i<=0xD7A3;i++)
{
ctypea[i]= _MY_L | _MY_U;
}
/* Calc plane parameters */
for(plane=0;plane<256;plane++)
{
size_t character;
uctype[plane].ctype= ctypea+plane*256;
uctype[plane].pctype= uctype[plane].ctype[0];
for(character=1;character<256;character++)
{
if (uctype[plane].ctype[character] != uctype[plane].pctype)
{
uctype[plane].pctype= 0; /* Mixed plane */
break;
}
}
if (character==256) /* All the same, no needs to dump whole plane */
uctype[plane].ctype= NULL;
}
/* Dump mixed planes */
for(plane=0;plane<256;plane++)
{
if(uctype[plane].ctype)
{
int charnum=0;
int num=0;
printf("static unsigned char uctype_page%02X[256]=\n{\n",plane);
for(charnum=0;charnum<256;charnum++)
{
int cod;
cod=(plane<<8)+charnum;
printf(" %2d%s",uctype[plane].ctype[charnum],charnum<255?",":"");
num++;
if(num==16)
{
printf("\n");
num=0;
}
}
printf("};\n\n");
}
}
/* Dump plane index */
printf("MY_UNI_CTYPE my_uni_ctype[256]={\n");
for(plane=0;plane<256;plane++)
{
char plane_name[128]="NULL";
if(uctype[plane].ctype){
sprintf(plane_name,"uctype_page%02X",plane);
}
printf("\t{%d,%s}%s\n",uctype[plane].pctype,plane_name,plane<255?",":"");
}
printf("};\n");
return 0;
}