/* Copyright (C) 2000 MySQL AB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #ifndef SCO #include #endif /* This files implements routines which parse XML based character set and collation description files. Unicode collations are encoded according to Unicode Technical Standard #35 Locale Data Markup Language (LDML) http://www.unicode.org/reports/tr35/ and converted into ICU string according to Collation Customization http://oss.software.ibm.com/icu/userguide/Collate_Customization.html */ static char *mstr(char *str,const char *src,uint l1,uint l2) { l1= l1str; s++) { if (!strncmp(attr,s->str,len)) return s; } return NULL; } #define MY_CS_CSDESCR_SIZE 64 #define MY_CS_TAILORING_SIZE 128 typedef struct my_cs_file_info { char csname[MY_CS_NAME_SIZE]; char name[MY_CS_NAME_SIZE]; uchar ctype[MY_CS_CTYPE_TABLE_SIZE]; uchar to_lower[MY_CS_TO_LOWER_TABLE_SIZE]; uchar to_upper[MY_CS_TO_UPPER_TABLE_SIZE]; uchar sort_order[MY_CS_SORT_ORDER_TABLE_SIZE]; uint16 tab_to_uni[MY_CS_TO_UNI_TABLE_SIZE]; char comment[MY_CS_CSDESCR_SIZE]; char tailoring[MY_CS_TAILORING_SIZE]; size_t tailoring_length; CHARSET_INFO cs; int (*add_collation)(CHARSET_INFO *cs); } MY_CHARSET_LOADER; static int fill_uchar(uchar *a,uint size,const char *str, uint len) { uint i= 0; const char *s, *b, *e=str+len; for (s=str ; s < e ; i++) { for ( ; (s < e) && strchr(" \t\r\n",s[0]); s++) ; b=s; for ( ; (s < e) && !strchr(" \t\r\n",s[0]); s++) ; if (s == b || i > size) break; a[i]= (uchar) strtoul(b,NULL,16); } return 0; } static int fill_uint16(uint16 *a,uint size,const char *str, uint len) { uint i= 0; const char *s, *b, *e=str+len; for (s=str ; s < e ; i++) { for ( ; (s < e) && strchr(" \t\r\n",s[0]); s++) ; b=s; for ( ; (s < e) && !strchr(" \t\r\n",s[0]); s++) ; if (s == b || i > size) break; a[i]= (uint16) strtol(b,NULL,16); } return 0; } static int cs_enter(MY_XML_PARSER *st,const char *attr, uint len) { struct my_cs_file_info *i= (struct my_cs_file_info *)st->user_data; struct my_cs_file_section_st *s= cs_file_sec(attr,len); if ( s && (s->state == _CS_CHARSET)) bzero(&i->cs,sizeof(i->cs)); if (s && (s->state == _CS_COLLATION)) i->tailoring_length= 0; return MY_XML_OK; } static int cs_leave(MY_XML_PARSER *st,const char *attr, uint len) { struct my_cs_file_info *i= (struct my_cs_file_info *)st->user_data; struct my_cs_file_section_st *s= cs_file_sec(attr,len); int state= s ? s->state : 0; int rc; switch(state){ case _CS_COLLATION: rc= i->add_collation ? i->add_collation(&i->cs) : MY_XML_OK; break; default: rc=MY_XML_OK; } return rc; } static int cs_value(MY_XML_PARSER *st,const char *attr, uint len) { struct my_cs_file_info *i= (struct my_cs_file_info *)st->user_data; struct my_cs_file_section_st *s; int state= (s=cs_file_sec(st->attr,strlen(st->attr))) ? s->state : 0; #ifndef DBUG_OFF if(0){ char str[1024]; mstr(str,attr,len,sizeof(str)-1); printf("VALUE %d %s='%s'\n",state,st->attr,str); } #endif switch (state) { case _CS_ID: i->cs.number= strtol(attr,(char**)NULL,10); break; case _CS_BINARY_ID: i->cs.binary_number= strtol(attr,(char**)NULL,10); break; case _CS_PRIMARY_ID: i->cs.primary_number= strtol(attr,(char**)NULL,10); break; case _CS_COLNAME: i->cs.name=mstr(i->name,attr,len,MY_CS_NAME_SIZE-1); break; case _CS_CSNAME: i->cs.csname=mstr(i->csname,attr,len,MY_CS_NAME_SIZE-1); break; case _CS_CSDESCRIPT: i->cs.comment=mstr(i->comment,attr,len,MY_CS_CSDESCR_SIZE-1); break; case _CS_FLAG: if (!strncmp("primary",attr,len)) i->cs.state|= MY_CS_PRIMARY; else if (!strncmp("binary",attr,len)) i->cs.state|= MY_CS_BINSORT; else if (!strncmp("compiled",attr,len)) i->cs.state|= MY_CS_COMPILED; break; case _CS_UPPERMAP: fill_uchar(i->to_upper,MY_CS_TO_UPPER_TABLE_SIZE,attr,len); i->cs.to_upper=i->to_upper; break; case _CS_LOWERMAP: fill_uchar(i->to_lower,MY_CS_TO_LOWER_TABLE_SIZE,attr,len); i->cs.to_lower=i->to_lower; break; case _CS_UNIMAP: fill_uint16(i->tab_to_uni,MY_CS_TO_UNI_TABLE_SIZE,attr,len); i->cs.tab_to_uni=i->tab_to_uni; break; case _CS_COLLMAP: fill_uchar(i->sort_order,MY_CS_SORT_ORDER_TABLE_SIZE,attr,len); i->cs.sort_order=i->sort_order; break; case _CS_CTYPEMAP: fill_uchar(i->ctype,MY_CS_CTYPE_TABLE_SIZE,attr,len); i->cs.ctype=i->ctype; break; case _CS_RESET: case _CS_DIFF1: case _CS_DIFF2: case _CS_DIFF3: { /* Convert collation description from Locale Data Markup Language (LDML) into ICU Collation Customization expression. */ char arg[16]; const char *cmd[]= {"&","<","<<","<<<"}; i->cs.tailoring= i->tailoring; mstr(arg,attr,len,sizeof(arg)-1); if (i->tailoring_length + 20 < sizeof(i->tailoring)) { char *dst= i->tailoring_length + i->tailoring; i->tailoring_length+= sprintf(dst," %s %s",cmd[state-_CS_RESET],arg); } } } return MY_XML_OK; } my_bool my_parse_charset_xml(const char *buf, uint len, int (*add_collation)(CHARSET_INFO *cs)) { MY_XML_PARSER p; struct my_cs_file_info i; my_bool rc; my_xml_parser_create(&p); my_xml_set_enter_handler(&p,cs_enter); my_xml_set_value_handler(&p,cs_value); my_xml_set_leave_handler(&p,cs_leave); i.add_collation= add_collation; my_xml_set_user_data(&p,(void*)&i); rc= (my_xml_parse(&p,buf,len) == MY_XML_OK) ? FALSE : TRUE; my_xml_parser_free(&p); return rc; }