mariadb/strings/uctypedump.c

242 lines
6.1 KiB
C
Raw Normal View History

Many files: Removed "MySQL Finland AB & TCX DataKonsult AB" from copyright header Adjusted year(s) in copyright header Added GPL copyright text my_vle.h, rpl_utility.h, my_vle.c, base64-t.c, rpl_utility.cc: Changed copyright header formatting some plugin_example.c, daemon_example.c: Added "Copyright (C) 2006 MySQL AB" to GPL header CMakeLists.txt: Added GPL copyright text client/CMakeLists.txt: Added GPL copyright text dbug/CMakeLists.txt: Added GPL copyright text extra/CMakeLists.txt: Added GPL copyright text extra/yassl/CMakeLists.txt: Added GPL copyright text extra/yassl/taocrypt/CMakeLists.txt: Added GPL copyright text libmysql/CMakeLists.txt: Added GPL copyright text libmysqld/CMakeLists.txt: Added GPL copyright text libmysqld/examples/CMakeLists.txt: Added GPL copyright text mysys/CMakeLists.txt: Added GPL copyright text regex/CMakeLists.txt: Added GPL copyright text server-tools/instance-manager/CMakeLists.txt: Added GPL copyright text sql/CMakeLists.txt: Added GPL copyright text storage/archive/CMakeLists.txt: Added GPL copyright text storage/blackhole/CMakeLists.txt: Added GPL copyright text storage/csv/CMakeLists.txt: Added GPL copyright text storage/example/CMakeLists.txt: Added GPL copyright text storage/federated/CMakeLists.txt: Added GPL copyright text storage/heap/CMakeLists.txt: Added GPL copyright text storage/innobase/CMakeLists.txt: Added GPL copyright text storage/myisam/CMakeLists.txt: Added GPL copyright text storage/myisammrg/CMakeLists.txt: Added GPL copyright text strings/CMakeLists.txt: Added GPL copyright text tests/CMakeLists.txt: Added GPL copyright text vio/CMakeLists.txt: Added GPL copyright text zlib/CMakeLists.txt: Added GPL copyright text include/my_vle.h: Changed copyright header formatting some mysys/my_vle.c: Changed copyright header formatting some plugin/daemon_example/daemon_example.c: Added "Copyright (C) 2006 MySQL AB" to GPL header plugin/fulltext/plugin_example.c: Added "Copyright (C) 2006 MySQL AB" to GPL header plugin/Makefile.am: Removed "MySQL Finland AB & TCX DataKonsult AB" from copyright header Adjusted year(s) in copyright header sql/authors.h: Removed "MySQL Finland AB & TCX DataKonsult AB" from copyright header Adjusted year(s) in copyright header sql/contributors.h: Removed "MySQL Finland AB & TCX DataKonsult AB" from copyright header Adjusted year(s) in copyright header sql/item_xmlfunc.cc: Removed "MySQL Finland AB & TCX DataKonsult AB" from copyright header Adjusted year(s) in copyright header sql/partition_element.h: Removed "MySQL Finland AB & TCX DataKonsult AB" from copyright header Adjusted year(s) in copyright header sql/partition_info.h: Removed "MySQL Finland AB & TCX DataKonsult AB" from copyright header Adjusted year(s) in copyright header sql/rpl_injector.cc: Removed "MySQL Finland AB & TCX DataKonsult AB" from copyright header Adjusted year(s) in copyright header sql/rpl_injector.h: Removed "MySQL Finland AB & TCX DataKonsult AB" from copyright header Adjusted year(s) in copyright header sql/sql_binlog.cc: Removed "MySQL Finland AB & TCX DataKonsult AB" from copyright header Adjusted year(s) in copyright header sql/sql_servers.h: Removed "MySQL Finland AB & TCX DataKonsult AB" from copyright header Adjusted year(s) in copyright header storage/Makefile.am: Removed "MySQL Finland AB & TCX DataKonsult AB" from copyright header Adjusted year(s) in copyright header storage/archive/Makefile.am: Removed "MySQL Finland AB & TCX DataKonsult AB" from copyright header Adjusted year(s) in copyright header storage/blackhole/Makefile.am: Removed "MySQL Finland AB & TCX DataKonsult AB" from copyright header Adjusted year(s) in copyright header storage/csv/Makefile.am: Removed "MySQL Finland AB & TCX DataKonsult AB" from copyright header Adjusted year(s) in copyright header storage/example/Makefile.am: Removed "MySQL Finland AB & TCX DataKonsult AB" from copyright header Adjusted year(s) in copyright header storage/federated/Makefile.am: Removed "MySQL Finland AB & TCX DataKonsult AB" from copyright header Adjusted year(s) in copyright header win/Makefile.am: Removed "MySQL Finland AB & TCX DataKonsult AB" from copyright header Adjusted year(s) in copyright header sql/rpl_utility.cc: Changed copyright header formatting some sql/rpl_utility.h: Changed copyright header formatting some unittest/mysys/base64-t.c: Changed copyright header formatting some include/my_uctype.h: Added GPL copyright text plugin/daemon_example/Makefile.am: Added GPL copyright text plugin/fulltext/Makefile.am: Added GPL copyright text scripts/make_win_bin_dist: Added GPL copyright text server-tools/instance-manager/user_management_commands.cc: Added GPL copyright text sql/sql_builtin.cc.in: Added GPL copyright text sql/sql_show.h: Added GPL copyright text storage/archive/archive_test.c: Added GPL copyright text storage/ndb/src/kernel/blocks/dbtup/test_varpage.cpp: Added GPL copyright text storage/ndb/src/kernel/blocks/diskpage.cpp: Added GPL copyright text storage/ndb/src/kernel/vm/Rope.cpp: Added GPL copyright text storage/ndb/src/mgmsrv/ParamInfo.hpp: Added GPL copyright text strings/uctypedump.c: Added GPL copyright text unittest/Makefile.am: Added GPL copyright text unittest/examples/Makefile.am: Added GPL copyright text unittest/examples/core-t.c: Added GPL copyright text unittest/examples/no_plan-t.c: Added GPL copyright text unittest/examples/simple-t.c: Added GPL copyright text unittest/examples/skip-t.c: Added GPL copyright text unittest/examples/skip_all-t.c: Added GPL copyright text unittest/examples/todo-t.c: Added GPL copyright text unittest/mysys/Makefile.am: Added GPL copyright text unittest/mytap/Makefile.am: Added GPL copyright text unittest/mytap/t/Makefile.am: Added GPL copyright text unittest/mytap/t/basic-t.c: Added GPL copyright text unittest/unit.pl: Added GPL copyright text win/build-vs71.bat: Added GPL copyright text win/build-vs8.bat: Added GPL copyright text win/configure.js: Added GPL copyright text
2006-12-31 02:29:11 +01:00
/* Copyright (C) 2006 MySQL AB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
WL#1386 - CTYPE table for unicode character sets A prerequisite for several fulltext and XML bugs. MY_CHARSET_HANDLER now has a new function "ctype" to detect a type of the next character in a string (i.e. digit, letter, space, punctuation, control, etc), which now works correctly for both 8bit and multibyte charsets. Previously only 8bit charsets worked correctly, while any multibyte character was considered as letter in multibyte charsets. Many files: Adding new function Makefile.am: Adding build rules for uctypedump, a dump tool to create my_uctype.h using Unicode Character Database file. m_ctype.h: Adding declaration of my_uni_ctype, ctype data for Unicode. Adding new member into MY_CHARSET_HANDLER Makefile.am: Adding my_uctype.h into noinst_HEADERS my_uctype.h, uctypedump.c: new files: ctype data for unicode, and the tool to generate it from a Unicode Character Database file. include/Makefile.am: Adding my_uctype.h include/m_ctype.h: Adding declaration of my_uni_ctype, ctype data for Unicode. strings/Makefile.am: Adding build rules for uctypedump, a dump tool to create my_uctype.h using Unicode Character Database file. strings/ctype-big5.c: Adding new function strings/ctype-bin.c: Adding new function strings/ctype-cp932.c: Adding new function strings/ctype-euc_kr.c: Adding new function strings/ctype-eucjpms.c: Adding new function strings/ctype-gb2312.c: Adding new function strings/ctype-gbk.c: Adding new function strings/ctype-latin1.c: Adding new function strings/ctype-mb.c: Adding new function strings/ctype-simple.c: Adding new function strings/ctype-sjis.c: Adding new function strings/ctype-tis620.c: Adding new function strings/ctype-ucs2.c: Adding new function strings/ctype-ujis.c: Adding new function strings/ctype-utf8.c: Adding new function
2006-02-02 10:07:47 +04:00
/*
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
*/
#include <my_global.h>
#include <m_string.h>
#include <m_ctype.h>
#include "m_ctype.h"
typedef struct my_ctype_name_st
{
const char *name;
int val;
} MY_CTYPE_NAME_ST;
static MY_CTYPE_NAME_ST my_ctype_name[]=
{
{"Lu", _MY_U}, /* Letter, Uppercase */
{"Ll", _MY_L}, /* Letter, Lowercase */
{"Lt", _MY_U}, /* Letter, Titlecase */
{"Lm", _MY_L}, /* Letter, Modifier */
{"Lo", _MY_L}, /* Letter, other */
{"Nd", _MY_NMR}, /* Number, Decimal Digit */
{"Nl", _MY_NMR|_MY_U|_MY_L}, /* Number, Letter */
{"No", _MY_NMR|_MY_PNT}, /* Number, Other */
{"Mn", _MY_L|_MY_PNT}, /* Mark, Nonspacing */
{"Mc", _MY_L|_MY_PNT}, /* Mark, Spacing Combining */
{"Me", _MY_L|_MY_PNT}, /* Mark, Enclosing */
{"Pc", _MY_PNT}, /* Punctuation, Connector */
{"Pd", _MY_PNT}, /* Punctuation, Dash */
{"Ps", _MY_PNT}, /* Punctuation, Open */
{"Pe", _MY_PNT}, /* Punctuation, Close */
{"Pi", _MY_PNT}, /* Punctuation, Initial quote */
{"Pf", _MY_PNT}, /* Punctuation, Final quote */
{"Po", _MY_PNT}, /* Punctuation, Other */
{"Sm", _MY_PNT}, /* Symbol, Math */
{"Sc", _MY_PNT}, /* Symbol, Currency */
{"Sk", _MY_PNT}, /* Symbol, Modifier */
{"So", _MY_PNT}, /* Symbol, Other */
{"Zs", _MY_SPC}, /* Separator, Space */
{"Zl", _MY_SPC}, /* Separator, Line */
{"Zp", _MY_SPC}, /* Separator, Paragraph */
{"Cc", _MY_CTR}, /* Other, Control */
{"Cf", _MY_CTR}, /* Other, Format */
{"Cs", _MY_CTR}, /* Other, Surrogate */
{"Co", _MY_CTR}, /* Other, Private Use */
{"Cn", _MY_CTR}, /* Other, Not Assigned */
{NULL, 0}
};
static int
ctypestr2num(const char *tok)
{
MY_CTYPE_NAME_ST *p;
for (p= my_ctype_name; p->name; p++)
{
if (!strncasecmp(p->name, tok, 2))
return p->val;
}
return 0;
}
int main(int ac, char ** av)
{
char str[1024];
unsigned char ctypea[64*1024];
size_t i;
size_t plane;
MY_UNI_CTYPE uctype[256];
FILE *f= stdin;
if (ac > 1 && av[1] && !(f= fopen(av[1],"r")))
{
fprintf(stderr, "Can't open file %s\n", av[1]);
exit(1);
}
bzero(&ctypea,sizeof(ctypea));
bzero(&uctype, sizeof(uctype));
printf("/*\n");
printf(" Unicode ctype data\n");
printf(" Generated from %s\n", av[1] ? av[1] : "stdin");
printf("*/\n");
while(fgets(str, sizeof(str), f))
{
size_t n= 0, code= 0;
char *s,*e;
int ctype= 0;
for(s= str; s; )
{
char *end;
char tok[1024]="";
e=strchr(s,';');
if(e)
{
strncpy(tok,s,(unsigned int)(e-s));
tok[e-s]=0;
}
else
{
strcpy(tok,s);
}
end=tok+strlen(tok);
switch(n)
{
case 0: code= strtol(tok,&end,16);break;
case 2: ctype= ctypestr2num(tok);break;
}
n++;
if(e) s=e+1;
else s=e;
}
if(code<=0xFFFF)
{
ctypea[code]= ctype;
}
}
/* Fill digits */
for (i= '0'; i <= '9'; i++)
ctypea[i]= _MY_NMR;
for (i= 'a'; i <= 'z'; i++)
ctypea[i]|= _MY_X;
for (i= 'A'; i <= 'Z'; i++)
ctypea[i]|= _MY_X;
/* Fill ideographs */
/* CJK Ideographs Extension A (U+3400 - U+4DB5) */
for(i=0x3400;i<=0x4DB5;i++)
{
ctypea[i]= _MY_L | _MY_U;
}
/* CJK Ideographs (U+4E00 - U+9FA5) */
for(i=0x4E00;i<=0x9FA5;i++){
ctypea[i]= _MY_L | _MY_U;
}
/* Hangul Syllables (U+AC00 - U+D7A3) */
for(i=0xAC00;i<=0xD7A3;i++)
{
ctypea[i]= _MY_L | _MY_U;
}
/* Calc plane parameters */
for(plane=0;plane<256;plane++)
{
size_t character;
uctype[plane].ctype= ctypea+plane*256;
uctype[plane].pctype= uctype[plane].ctype[0];
for(character=1;character<256;character++)
{
if (uctype[plane].ctype[character] != uctype[plane].pctype)
{
uctype[plane].pctype= 0; /* Mixed plane */
break;
}
}
if (character==256) /* All the same, no needs to dump whole plane */
uctype[plane].ctype= NULL;
}
/* Dump mixed planes */
for(plane=0;plane<256;plane++)
{
if(uctype[plane].ctype)
{
int charnum=0;
int num=0;
printf("static unsigned char uctype_page%02X[256]=\n{\n",plane);
for(charnum=0;charnum<256;charnum++)
{
int cod;
cod=(plane<<8)+charnum;
printf(" %2d%s",uctype[plane].ctype[charnum],charnum<255?",":"");
num++;
if(num==16)
{
printf("\n");
num=0;
}
}
printf("};\n\n");
}
}
/* Dump plane index */
printf("MY_UNI_CTYPE my_uni_ctype[256]={\n");
for(plane=0;plane<256;plane++)
{
char plane_name[128]="NULL";
if(uctype[plane].ctype){
sprintf(plane_name,"uctype_page%02X",plane);
}
printf("\t{%d,%s}%s\n",uctype[plane].pctype,plane_name,plane<255?",":"");
}
printf("};\n");
return 0;
}