2001-12-06 13:10:51 +01:00
|
|
|
/* Copyright (C) 2000 MySQL AB
|
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
2000-07-31 21:29:14 +02:00
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
2001-12-06 13:10:51 +01:00
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
|
2000-07-31 21:29:14 +02:00
|
|
|
|
|
|
|
#include "mysys_priv.h"
|
|
|
|
#include "mysys_err.h"
|
|
|
|
#include <m_ctype.h>
|
|
|
|
#include <m_string.h>
|
|
|
|
#include <my_dir.h>
|
2003-01-03 11:35:32 +01:00
|
|
|
#include <my_xml.h>
|
2000-07-31 21:29:14 +02:00
|
|
|
|
2003-01-05 14:34:24 +01:00
|
|
|
|
|
|
|
static void set_max_sort_char(CHARSET_INFO *cs);
|
|
|
|
static my_bool create_fromuni(CHARSET_INFO *cs);
|
|
|
|
|
|
|
|
|
2003-01-03 11:35:32 +01:00
|
|
|
#define MY_CHARSET_INDEX "Index.xml"
|
2000-08-22 22:08:34 +02:00
|
|
|
|
2000-07-31 21:29:14 +02:00
|
|
|
const char *charsets_dir = NULL;
|
|
|
|
static int charset_initialized=0;
|
|
|
|
|
|
|
|
#define MAX_LINE 1024
|
|
|
|
|
|
|
|
#define CTYPE_TABLE_SIZE 257
|
|
|
|
#define TO_LOWER_TABLE_SIZE 256
|
|
|
|
#define TO_UPPER_TABLE_SIZE 256
|
|
|
|
#define SORT_ORDER_TABLE_SIZE 256
|
2002-05-31 18:04:47 +02:00
|
|
|
#define TO_UNI_TABLE_SIZE 256
|
2000-07-31 21:29:14 +02:00
|
|
|
|
2000-10-09 22:10:41 +02:00
|
|
|
char *get_charsets_dir(char *buf)
|
2000-07-31 21:29:14 +02:00
|
|
|
{
|
|
|
|
const char *sharedir = SHAREDIR;
|
|
|
|
DBUG_ENTER("get_charsets_dir");
|
|
|
|
|
|
|
|
if (charsets_dir != NULL)
|
2001-01-19 03:57:29 +01:00
|
|
|
strmake(buf, charsets_dir, FN_REFLEN-1);
|
2000-07-31 21:29:14 +02:00
|
|
|
else
|
|
|
|
{
|
|
|
|
if (test_if_hard_path(sharedir) ||
|
|
|
|
is_prefix(sharedir, DEFAULT_CHARSET_HOME))
|
|
|
|
strxmov(buf, sharedir, "/", CHARSET_DIR, NullS);
|
|
|
|
else
|
|
|
|
strxmov(buf, DEFAULT_CHARSET_HOME, "/", sharedir, "/", CHARSET_DIR,
|
|
|
|
NullS);
|
|
|
|
}
|
2001-10-08 03:58:07 +02:00
|
|
|
convert_dirname(buf,buf,NullS);
|
2000-07-31 21:29:14 +02:00
|
|
|
DBUG_PRINT("info",("charsets dir='%s'", buf));
|
|
|
|
DBUG_RETURN(strend(buf));
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2003-01-03 11:35:32 +01:00
|
|
|
#define MAX_BUF 1024*16
|
|
|
|
|
2003-01-05 14:34:24 +01:00
|
|
|
#ifndef DBUG_OFF
|
2003-01-03 11:35:32 +01:00
|
|
|
static void mstr(char *str,const char *src,uint l1,uint l2)
|
2000-07-31 21:29:14 +02:00
|
|
|
{
|
2003-01-03 11:35:32 +01:00
|
|
|
l1 = l1<l2 ? l1 : l2;
|
|
|
|
memcpy(str,src,l1);
|
|
|
|
str[l1]='\0';
|
|
|
|
}
|
2003-01-05 14:34:24 +01:00
|
|
|
#endif
|
2000-07-31 21:29:14 +02:00
|
|
|
|
|
|
|
|
2003-01-03 11:35:32 +01:00
|
|
|
struct my_cs_file_section_st
|
|
|
|
{
|
|
|
|
int state;
|
|
|
|
const char *str;
|
|
|
|
};
|
|
|
|
|
|
|
|
#define _CS_MISC 1
|
|
|
|
#define _CS_ID 2
|
|
|
|
#define _CS_NAME 3
|
|
|
|
#define _CS_FAMILY 4
|
|
|
|
#define _CS_ORDER 5
|
|
|
|
#define _CS_COLNAME 6
|
|
|
|
#define _CS_FLAG 7
|
|
|
|
#define _CS_CHARSET 8
|
|
|
|
#define _CS_COLLATION 9
|
2003-01-05 14:34:24 +01:00
|
|
|
#define _CS_UPPERMAP 10
|
|
|
|
#define _CS_LOWERMAP 11
|
|
|
|
#define _CS_UNIMAP 12
|
|
|
|
#define _CS_COLLMAP 13
|
|
|
|
#define _CS_CTYPEMAP 14
|
2003-01-03 11:35:32 +01:00
|
|
|
|
|
|
|
static struct my_cs_file_section_st sec[] =
|
|
|
|
{
|
|
|
|
{_CS_MISC, "xml"},
|
|
|
|
{_CS_MISC, "xml.version"},
|
|
|
|
{_CS_MISC, "xml.encoding"},
|
|
|
|
{_CS_MISC, "charsets"},
|
|
|
|
{_CS_MISC, "charsets.max-id"},
|
|
|
|
{_CS_MISC, "charsets.description"},
|
|
|
|
{_CS_CHARSET, "charsets.charset"},
|
|
|
|
{_CS_NAME, "charsets.charset.name"},
|
|
|
|
{_CS_FAMILY, "charsets.charset.family"},
|
|
|
|
{_CS_MISC, "charsets.charset.alias"},
|
2003-01-05 14:34:24 +01:00
|
|
|
{_CS_MISC, "charsets.charset.ctype"},
|
|
|
|
{_CS_CTYPEMAP, "charsets.charset.ctype.map"},
|
|
|
|
{_CS_MISC, "charsets.charset.upper"},
|
|
|
|
{_CS_UPPERMAP, "charsets.charset.upper.map"},
|
|
|
|
{_CS_MISC, "charsets.charset.lower"},
|
|
|
|
{_CS_LOWERMAP, "charsets.charset.lower.map"},
|
|
|
|
{_CS_MISC, "charsets.charset.unicode"},
|
|
|
|
{_CS_UNIMAP, "charsets.charset.unicode.map"},
|
2003-01-03 11:35:32 +01:00
|
|
|
{_CS_COLLATION, "charsets.charset.collation"},
|
|
|
|
{_CS_COLNAME, "charsets.charset.collation.name"},
|
|
|
|
{_CS_ID, "charsets.charset.collation.id"},
|
|
|
|
{_CS_ORDER, "charsets.charset.collation.order"},
|
|
|
|
{_CS_FLAG, "charsets.charset.collation.flag"},
|
2003-01-05 14:34:24 +01:00
|
|
|
{_CS_COLLMAP, "charsets.charset.collation.map"},
|
2003-01-03 11:35:32 +01:00
|
|
|
{0, NULL}
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct my_cs_file_section_st * cs_file_sec(const char *attr, uint len)
|
|
|
|
{
|
|
|
|
struct my_cs_file_section_st *s;
|
|
|
|
for (s=sec; s->str; s++)
|
|
|
|
if (!strncmp(attr,s->str,len))
|
|
|
|
return s;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2003-01-05 14:34:24 +01:00
|
|
|
#define CS_MAX_NM_LEN 32
|
|
|
|
|
2003-01-03 11:35:32 +01:00
|
|
|
struct my_cs_file_info
|
|
|
|
{
|
2003-01-05 14:34:24 +01:00
|
|
|
char csname[CS_MAX_NM_LEN];
|
|
|
|
char name[CS_MAX_NM_LEN];
|
|
|
|
uchar ctype[CTYPE_TABLE_SIZE];
|
|
|
|
uchar to_lower[TO_LOWER_TABLE_SIZE];
|
|
|
|
uchar to_upper[TO_UPPER_TABLE_SIZE];
|
|
|
|
uchar sort_order[SORT_ORDER_TABLE_SIZE];
|
|
|
|
uint16 tab_to_uni[TO_UNI_TABLE_SIZE];
|
2003-01-03 11:35:32 +01:00
|
|
|
CHARSET_INFO cs;
|
|
|
|
myf myflags;
|
|
|
|
};
|
|
|
|
|
2003-01-05 14:34:24 +01:00
|
|
|
static void simple_cs_init_functions(CHARSET_INFO *cs)
|
|
|
|
{
|
|
|
|
cs->like_range = my_like_range_simple;
|
|
|
|
cs->wildcmp = my_wildcmp_8bit;
|
|
|
|
cs->strnncoll = my_strnncoll_simple;
|
|
|
|
cs->caseup_str = my_caseup_str_8bit;
|
|
|
|
cs->casedn_str = my_casedn_str_8bit;
|
|
|
|
cs->caseup = my_caseup_8bit;
|
|
|
|
cs->casedn = my_casedn_8bit;
|
|
|
|
cs->tosort = my_tosort_8bit;
|
|
|
|
cs->strcasecmp = my_strcasecmp_8bit;
|
|
|
|
cs->strncasecmp = my_strncasecmp_8bit;
|
|
|
|
cs->mb_wc = my_mb_wc_8bit;
|
|
|
|
cs->wc_mb = my_wc_mb_8bit;
|
|
|
|
cs->hash_caseup = my_hash_caseup_simple;
|
|
|
|
cs->hash_sort = my_hash_sort_simple;
|
|
|
|
cs->snprintf = my_snprintf_8bit;
|
|
|
|
cs->strntol = my_strntol_8bit;
|
|
|
|
cs->strntoul = my_strntoul_8bit;
|
|
|
|
cs->strntoll = my_strntoll_8bit;
|
|
|
|
cs->strntoull = my_strntoull_8bit;
|
|
|
|
cs->strntod = my_strntod_8bit;
|
|
|
|
cs->mbmaxlen = 1;
|
|
|
|
}
|
|
|
|
|
2003-01-07 09:30:54 +01:00
|
|
|
/* FIXME: BAR: move to more proper place, my_alloc.c I suppose */
|
|
|
|
static char *my_once_strdup(const char *src,myf myflags)
|
|
|
|
{
|
|
|
|
uint len=strlen(src);
|
|
|
|
char *dst=my_once_alloc(len+1,myflags);
|
|
|
|
if (dst)
|
|
|
|
memcpy(dst,src,len+1);
|
|
|
|
return dst;
|
|
|
|
}
|
|
|
|
|
2003-01-05 14:34:24 +01:00
|
|
|
static void simple_cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
|
|
|
|
{
|
|
|
|
to->number = from->number ? from->number : to->number;
|
|
|
|
to->state |= from->state;
|
|
|
|
|
|
|
|
if (from->csname)
|
2003-01-07 09:30:54 +01:00
|
|
|
to->csname=my_once_strdup(from->csname,MYF(MY_WME));
|
2003-01-05 14:34:24 +01:00
|
|
|
|
|
|
|
if (from->name)
|
2003-01-07 09:30:54 +01:00
|
|
|
to->name=my_once_strdup(from->name,MYF(MY_WME));
|
2003-01-05 14:34:24 +01:00
|
|
|
|
|
|
|
if (from->ctype)
|
|
|
|
{
|
|
|
|
to->ctype = (uchar*) my_once_alloc(CTYPE_TABLE_SIZE,MYF(MY_WME));
|
|
|
|
memcpy((char*)to->ctype,(char*)from->ctype,CTYPE_TABLE_SIZE);
|
|
|
|
}
|
|
|
|
if (from->to_lower)
|
|
|
|
{
|
|
|
|
to->to_lower = (uchar*) my_once_alloc(TO_LOWER_TABLE_SIZE,MYF(MY_WME));
|
|
|
|
memcpy((char*)to->to_lower,(char*)from->to_lower,TO_LOWER_TABLE_SIZE);
|
|
|
|
}
|
|
|
|
if (from->to_upper)
|
|
|
|
{
|
|
|
|
to->to_upper = (uchar*) my_once_alloc(TO_UPPER_TABLE_SIZE,MYF(MY_WME));
|
|
|
|
memcpy((char*)to->to_upper,(char*)from->to_upper,TO_UPPER_TABLE_SIZE);
|
|
|
|
}
|
|
|
|
if (from->sort_order)
|
|
|
|
{
|
|
|
|
to->sort_order=(uchar*) my_once_alloc(SORT_ORDER_TABLE_SIZE,MYF(MY_WME));
|
|
|
|
memcpy((char*)to->sort_order,(char*)from->sort_order, SORT_ORDER_TABLE_SIZE);
|
|
|
|
set_max_sort_char(to);
|
|
|
|
}
|
|
|
|
if (from->tab_to_uni)
|
|
|
|
{
|
|
|
|
uint sz=TO_UNI_TABLE_SIZE*sizeof(uint16);
|
|
|
|
to->tab_to_uni=(uint16*)my_once_alloc(sz,MYF(MY_WME));
|
|
|
|
memcpy((char*)to->tab_to_uni,(char*)from->tab_to_uni,sz);
|
|
|
|
create_fromuni(to);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static my_bool simple_cs_is_full(CHARSET_INFO *cs)
|
|
|
|
{
|
|
|
|
return
|
|
|
|
(cs->csname && cs->tab_to_uni && cs->ctype && cs->to_upper && cs->to_lower)
|
|
|
|
&&
|
|
|
|
(cs->number && cs->name && cs->sort_order);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int fill_uchar(uchar *a,uint size,const char *str, uint len)
|
|
|
|
{
|
|
|
|
uint i=0;
|
|
|
|
const char *s, *b, *e=str+len;
|
|
|
|
|
|
|
|
for (s=str ; s<e ; i++)
|
|
|
|
{
|
|
|
|
for ( ; (s<e) && strchr(" \t\r\n",s[0]); s++);
|
|
|
|
b=s;
|
|
|
|
for ( ; (s<e) && !strchr(" \t\r\n",s[0]); s++);
|
|
|
|
if (s==b)
|
|
|
|
break;
|
|
|
|
if (i>size)
|
|
|
|
break;
|
|
|
|
a[i]=my_strntoul(my_charset_latin1,b,s-b,NULL,16);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int fill_uint16(uint16 *a,uint size,const char *str, uint len)
|
|
|
|
{
|
|
|
|
uint i=0;
|
|
|
|
const char *s, *b, *e=str+len;
|
|
|
|
for (s=str ; s<e ; i++)
|
|
|
|
{
|
|
|
|
for ( ; (s<e) && strchr(" \t\r\n",s[0]); s++);
|
|
|
|
b=s;
|
|
|
|
for ( ; (s<e) && !strchr(" \t\r\n",s[0]); s++);
|
|
|
|
if (s==b)
|
|
|
|
break;
|
|
|
|
if (i>size)
|
|
|
|
break;
|
|
|
|
a[i]=my_strntol(my_charset_latin1,b,s-b,NULL,16);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2003-01-03 11:35:32 +01:00
|
|
|
static int cs_enter(MY_XML_PARSER *st,const char *attr, uint len)
|
|
|
|
{
|
|
|
|
struct my_cs_file_info *i = (struct my_cs_file_info *)st->user_data;
|
|
|
|
struct my_cs_file_section_st *s = cs_file_sec(attr,len);
|
2002-07-30 11:02:29 +02:00
|
|
|
|
2003-01-03 11:35:32 +01:00
|
|
|
if ( s && (s->state == _CS_CHARSET))
|
2000-07-31 21:29:14 +02:00
|
|
|
{
|
2003-01-03 11:35:32 +01:00
|
|
|
bzero(&i->cs,sizeof(i->cs));
|
|
|
|
}
|
|
|
|
return MY_XML_OK;
|
|
|
|
}
|
2000-08-22 22:08:34 +02:00
|
|
|
|
2003-01-03 11:35:32 +01:00
|
|
|
static int cs_leave(MY_XML_PARSER *st,const char *attr, uint len)
|
|
|
|
{
|
|
|
|
struct my_cs_file_info *i = (struct my_cs_file_info *)st->user_data;
|
|
|
|
struct my_cs_file_section_st *s = cs_file_sec(attr,len);
|
2003-01-04 11:12:20 +01:00
|
|
|
int state = s ? s->state : 0;
|
2003-01-03 11:35:32 +01:00
|
|
|
|
2003-01-04 11:12:20 +01:00
|
|
|
if (state == _CS_COLLATION)
|
2003-01-03 11:35:32 +01:00
|
|
|
{
|
2003-01-05 14:34:24 +01:00
|
|
|
if (i->cs.name && (i->cs.number || (i->cs.number=get_charset_number(i->cs.name))))
|
2003-01-04 11:12:20 +01:00
|
|
|
{
|
2003-01-05 14:34:24 +01:00
|
|
|
if (!all_charsets[i->cs.number])
|
2003-01-04 11:12:20 +01:00
|
|
|
{
|
2003-01-05 14:34:24 +01:00
|
|
|
if (!(all_charsets[i->cs.number]=
|
|
|
|
(CHARSET_INFO*) my_once_alloc(sizeof(CHARSET_INFO),i->myflags)))
|
|
|
|
{
|
|
|
|
return MY_XML_ERROR;
|
|
|
|
}
|
|
|
|
bzero((void*)all_charsets[i->cs.number],sizeof(CHARSET_INFO));
|
2003-01-04 11:12:20 +01:00
|
|
|
}
|
2003-01-05 14:34:24 +01:00
|
|
|
|
|
|
|
if (!(all_charsets[i->cs.number]->state & MY_CS_COMPILED))
|
|
|
|
{
|
|
|
|
simple_cs_copy_data(all_charsets[i->cs.number],&i->cs);
|
|
|
|
if (simple_cs_is_full(all_charsets[i->cs.number]))
|
|
|
|
{
|
|
|
|
simple_cs_init_functions(all_charsets[i->cs.number]);
|
|
|
|
all_charsets[i->cs.number]->state |= MY_CS_LOADED;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
i->cs.number=0;
|
|
|
|
i->cs.name=NULL;
|
|
|
|
i->cs.state=0;
|
|
|
|
i->cs.sort_order=NULL;
|
|
|
|
i->cs.state=0;
|
2000-08-22 22:08:34 +02:00
|
|
|
}
|
2003-01-03 11:35:32 +01:00
|
|
|
}
|
2003-01-05 14:34:24 +01:00
|
|
|
|
2003-01-03 11:35:32 +01:00
|
|
|
return MY_XML_OK;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int cs_value(MY_XML_PARSER *st,const char *attr, uint len)
|
|
|
|
{
|
|
|
|
struct my_cs_file_info *i = (struct my_cs_file_info *)st->user_data;
|
|
|
|
struct my_cs_file_section_st *s;
|
|
|
|
int state = (s=cs_file_sec(st->attr,strlen(st->attr))) ? s->state : 0;
|
|
|
|
|
2003-01-05 14:34:24 +01:00
|
|
|
#ifndef DBUG_OFF
|
|
|
|
if(0){
|
|
|
|
char str[1024];
|
2003-01-03 11:35:32 +01:00
|
|
|
mstr(str,attr,len,sizeof(str)-1);
|
|
|
|
printf("VALUE %d %s='%s'\n",state,st->attr,str);
|
|
|
|
}
|
2003-01-05 14:34:24 +01:00
|
|
|
#endif
|
2003-01-03 11:35:32 +01:00
|
|
|
|
|
|
|
switch (state)
|
|
|
|
{
|
|
|
|
case _CS_ID:
|
|
|
|
i->cs.number = my_strntoul(my_charset_latin1,attr,len,(char**)NULL,0);
|
|
|
|
break;
|
|
|
|
case _CS_COLNAME:
|
2003-01-05 14:34:24 +01:00
|
|
|
memcpy(i->name,attr,len=min(len,CS_MAX_NM_LEN-1));
|
|
|
|
i->name[len]='\0';
|
|
|
|
i->cs.name=i->name;
|
2003-01-03 11:35:32 +01:00
|
|
|
break;
|
2003-01-04 11:12:20 +01:00
|
|
|
case _CS_NAME:
|
2003-01-05 14:34:24 +01:00
|
|
|
memcpy(i->csname,attr,len=min(len,CS_MAX_NM_LEN-1));
|
|
|
|
i->csname[len]='\0';
|
|
|
|
i->cs.csname=i->csname;
|
2003-01-04 11:12:20 +01:00
|
|
|
break;
|
|
|
|
case _CS_FLAG:
|
|
|
|
if (!strncmp("primary",attr,len))
|
|
|
|
i->cs.state |= MY_CS_PRIMARY;
|
2003-01-05 14:34:24 +01:00
|
|
|
break;
|
|
|
|
case _CS_UPPERMAP:
|
|
|
|
fill_uchar(i->to_upper,TO_UPPER_TABLE_SIZE,attr,len);
|
|
|
|
i->cs.to_upper=i->to_upper;
|
|
|
|
break;
|
|
|
|
case _CS_LOWERMAP:
|
|
|
|
fill_uchar(i->to_lower,TO_LOWER_TABLE_SIZE,attr,len);
|
|
|
|
i->cs.to_lower=i->to_lower;
|
|
|
|
break;
|
|
|
|
case _CS_UNIMAP:
|
|
|
|
fill_uint16(i->tab_to_uni,TO_UNI_TABLE_SIZE,attr,len);
|
|
|
|
i->cs.tab_to_uni=i->tab_to_uni;
|
|
|
|
break;
|
|
|
|
case _CS_COLLMAP:
|
|
|
|
fill_uchar(i->sort_order,SORT_ORDER_TABLE_SIZE,attr,len);
|
|
|
|
i->cs.sort_order=i->sort_order;
|
|
|
|
break;
|
|
|
|
case _CS_CTYPEMAP:
|
|
|
|
fill_uchar(i->ctype,CTYPE_TABLE_SIZE,attr,len);
|
|
|
|
i->cs.ctype=i->ctype;
|
|
|
|
break;
|
2000-07-31 21:29:14 +02:00
|
|
|
}
|
2003-01-03 11:35:32 +01:00
|
|
|
return MY_XML_OK;
|
|
|
|
}
|
2000-07-31 21:29:14 +02:00
|
|
|
|
2003-01-04 12:21:52 +01:00
|
|
|
static my_bool read_charset_index(const char *filename, myf myflags)
|
2003-01-03 11:35:32 +01:00
|
|
|
{
|
|
|
|
char *buf;
|
|
|
|
int fd;
|
|
|
|
uint len;
|
|
|
|
MY_XML_PARSER p;
|
|
|
|
struct my_cs_file_info i;
|
|
|
|
|
|
|
|
if (! (buf = (char *)my_malloc(MAX_BUF,myflags)))
|
|
|
|
return FALSE;
|
|
|
|
|
2003-01-04 12:21:52 +01:00
|
|
|
strmov(get_charsets_dir(buf),filename);
|
2003-01-03 11:35:32 +01:00
|
|
|
|
|
|
|
if ((fd=my_open(buf,O_RDONLY,myflags)) < 0)
|
|
|
|
{
|
|
|
|
my_free(buf,myflags);
|
|
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
len=read(fd,buf,MAX_BUF);
|
|
|
|
my_xml_parser_create(&p);
|
|
|
|
my_close(fd,myflags);
|
|
|
|
|
|
|
|
my_xml_set_enter_handler(&p,cs_enter);
|
|
|
|
my_xml_set_value_handler(&p,cs_value);
|
|
|
|
my_xml_set_leave_handler(&p,cs_leave);
|
|
|
|
my_xml_set_user_data(&p,(void*)&i);
|
|
|
|
|
|
|
|
if (MY_XML_OK!=my_xml_parse(&p,buf,len))
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
printf("ERROR at line %d pos %d '%s'\n",
|
|
|
|
my_xml_error_lineno(&p)+1,
|
|
|
|
my_xml_error_pos(&p),
|
|
|
|
my_xml_error_string(&p));
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
|
|
|
|
my_xml_parser_free(&p);
|
|
|
|
|
2000-07-31 21:29:14 +02:00
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
2002-07-31 10:25:37 +02:00
|
|
|
static void set_max_sort_char(CHARSET_INFO *cs)
|
|
|
|
{
|
|
|
|
uchar max_char;
|
|
|
|
uint i;
|
|
|
|
|
|
|
|
if (!cs->sort_order)
|
|
|
|
return;
|
|
|
|
|
|
|
|
max_char=cs->sort_order[(uchar) cs->max_sort_char];
|
|
|
|
for (i = 0; i < 256; i++)
|
|
|
|
{
|
|
|
|
if ((uchar) cs->sort_order[i] > max_char)
|
|
|
|
{
|
|
|
|
max_char=(uchar) cs->sort_order[i];
|
|
|
|
cs->max_sort_char= (char) i;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2000-07-31 21:29:14 +02:00
|
|
|
|
|
|
|
static my_bool init_available_charsets(myf myflags)
|
|
|
|
{
|
2002-07-30 11:02:29 +02:00
|
|
|
my_bool error=FALSE;
|
2000-07-31 21:29:14 +02:00
|
|
|
/*
|
|
|
|
We have to use charset_initialized to not lock on THR_LOCK_charset
|
|
|
|
inside get_internal_charset...
|
|
|
|
*/
|
|
|
|
if (!charset_initialized)
|
|
|
|
{
|
2002-10-10 12:52:32 +02:00
|
|
|
CHARSET_INFO **cs;
|
2000-07-31 21:29:14 +02:00
|
|
|
/*
|
|
|
|
To make things thread safe we are not allowing other threads to interfere
|
|
|
|
while we may changing the cs_info_table
|
|
|
|
*/
|
|
|
|
pthread_mutex_lock(&THR_LOCK_charset);
|
2002-07-30 11:02:29 +02:00
|
|
|
|
|
|
|
bzero(&all_charsets,sizeof(all_charsets));
|
2002-10-10 12:52:32 +02:00
|
|
|
init_compiled_charsets(myflags);
|
2002-07-30 11:02:29 +02:00
|
|
|
|
|
|
|
/* Copy compiled charsets */
|
2002-10-10 12:52:32 +02:00
|
|
|
for (cs=all_charsets; cs < all_charsets+255 ; cs++)
|
2002-07-31 10:25:37 +02:00
|
|
|
{
|
2002-10-10 12:52:32 +02:00
|
|
|
if (*cs)
|
|
|
|
set_max_sort_char(*cs);
|
2002-07-31 10:25:37 +02:00
|
|
|
}
|
2003-01-04 12:21:52 +01:00
|
|
|
error = read_charset_index(MY_CHARSET_INDEX,myflags);
|
2000-07-31 21:29:14 +02:00
|
|
|
charset_initialized=1;
|
|
|
|
pthread_mutex_unlock(&THR_LOCK_charset);
|
|
|
|
}
|
2000-08-29 15:14:43 +02:00
|
|
|
return error;
|
2000-07-31 21:29:14 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void free_charsets(void)
|
|
|
|
{
|
2001-12-06 13:10:51 +01:00
|
|
|
charset_initialized=0;
|
2000-07-31 21:29:14 +02:00
|
|
|
}
|
|
|
|
|
2002-07-30 11:02:29 +02:00
|
|
|
static void get_charset_conf_name(const char *cs_name, char *buf)
|
2000-07-31 21:29:14 +02:00
|
|
|
{
|
2002-07-30 11:02:29 +02:00
|
|
|
strxmov(get_charsets_dir(buf), cs_name, ".conf", NullS);
|
2000-07-31 21:29:14 +02:00
|
|
|
}
|
|
|
|
|
2002-06-02 18:32:02 +02:00
|
|
|
typedef struct {
|
|
|
|
int nchars;
|
|
|
|
MY_UNI_IDX uidx;
|
|
|
|
} uni_idx;
|
|
|
|
|
|
|
|
#define PLANE_SIZE 0x100
|
|
|
|
#define PLANE_NUM 0x100
|
|
|
|
#define PLANE_NUMBER(x) (((x)>>8) % PLANE_NUM)
|
|
|
|
|
|
|
|
static int pcmp(const void * f, const void * s)
|
|
|
|
{
|
|
|
|
const uni_idx *F=(const uni_idx*)f;
|
|
|
|
const uni_idx *S=(const uni_idx*)s;
|
|
|
|
int res;
|
|
|
|
|
|
|
|
if(!(res=((S->nchars)-(F->nchars))))
|
|
|
|
res=((F->uidx.from)-(S->uidx.to));
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
static my_bool create_fromuni(CHARSET_INFO *cs){
|
|
|
|
uni_idx idx[PLANE_NUM];
|
|
|
|
int i,n;
|
|
|
|
|
|
|
|
/* Clear plane statistics */
|
|
|
|
bzero(idx,sizeof(idx));
|
|
|
|
|
|
|
|
/* Count number of characters in each plane */
|
|
|
|
for(i=0;i<0x100;i++)
|
|
|
|
{
|
|
|
|
uint16 wc=cs->tab_to_uni[i];
|
|
|
|
int pl= PLANE_NUMBER(wc);
|
|
|
|
|
|
|
|
if(wc || !i)
|
|
|
|
{
|
|
|
|
if(!idx[pl].nchars)
|
|
|
|
{
|
|
|
|
idx[pl].uidx.from=wc;
|
|
|
|
idx[pl].uidx.to=wc;
|
|
|
|
}else
|
|
|
|
{
|
|
|
|
idx[pl].uidx.from=wc<idx[pl].uidx.from?wc:idx[pl].uidx.from;
|
|
|
|
idx[pl].uidx.to=wc>idx[pl].uidx.to?wc:idx[pl].uidx.to;
|
|
|
|
}
|
|
|
|
idx[pl].nchars++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Sort planes in descending order */
|
|
|
|
qsort(&idx,PLANE_NUM,sizeof(uni_idx),&pcmp);
|
|
|
|
|
|
|
|
for(i=0;i<PLANE_NUM;i++)
|
|
|
|
{
|
|
|
|
int ch,numchars;
|
|
|
|
|
|
|
|
/* Skip empty plane */
|
|
|
|
if(!idx[i].nchars)
|
|
|
|
break;
|
|
|
|
|
|
|
|
numchars=idx[i].uidx.to-idx[i].uidx.from+1;
|
|
|
|
idx[i].uidx.tab=(unsigned char*)my_once_alloc(numchars*sizeof(*idx[i].uidx.tab),MYF(MY_WME));
|
|
|
|
bzero(idx[i].uidx.tab,numchars*sizeof(*idx[i].uidx.tab));
|
|
|
|
|
|
|
|
for(ch=1;ch<PLANE_SIZE;ch++)
|
|
|
|
{
|
|
|
|
uint16 wc=cs->tab_to_uni[ch];
|
|
|
|
if(wc>=idx[i].uidx.from && wc<=idx[i].uidx.to && wc)
|
|
|
|
{
|
|
|
|
int ofs=wc-idx[i].uidx.from;
|
|
|
|
idx[i].uidx.tab[ofs]=ch;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Allocate and fill reverse table for each plane */
|
|
|
|
n=i;
|
|
|
|
cs->tab_from_uni=(MY_UNI_IDX*)my_once_alloc(sizeof(MY_UNI_IDX)*(n+1),MYF(MY_WME));
|
|
|
|
for(i=0;i<n;i++)
|
|
|
|
cs->tab_from_uni[i]=idx[i].uidx;
|
|
|
|
|
|
|
|
/* Set end-of-list marker */
|
|
|
|
bzero(&cs->tab_from_uni[i],sizeof(MY_UNI_IDX));
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
2000-07-31 21:29:14 +02:00
|
|
|
|
2002-07-30 11:02:29 +02:00
|
|
|
uint get_charset_number(const char *charset_name)
|
|
|
|
{
|
2002-10-10 12:52:32 +02:00
|
|
|
CHARSET_INFO **cs;
|
2002-07-30 11:02:29 +02:00
|
|
|
if (init_available_charsets(MYF(0))) /* If it isn't initialized */
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
for (cs = all_charsets; cs < all_charsets+255; ++cs)
|
2002-10-10 12:52:32 +02:00
|
|
|
if ( cs[0] && cs[0]->name && !strcmp(cs[0]->name, charset_name))
|
|
|
|
return cs[0]->number;
|
2002-07-30 11:02:29 +02:00
|
|
|
|
|
|
|
return 0; /* this mimics find_type() */
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
const char *get_charset_name(uint charset_number)
|
|
|
|
{
|
|
|
|
CHARSET_INFO *cs;
|
|
|
|
if (init_available_charsets(MYF(0))) /* If it isn't initialized */
|
|
|
|
return "?";
|
|
|
|
|
2002-10-10 12:52:32 +02:00
|
|
|
cs=all_charsets[charset_number];
|
|
|
|
if ( cs && (cs->number==charset_number) && cs->name )
|
2002-07-30 14:12:51 +02:00
|
|
|
return (char*) cs->name;
|
|
|
|
|
2002-07-30 11:02:29 +02:00
|
|
|
return (char*) "?"; /* this mimics find_type() */
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2001-11-06 01:15:45 +01:00
|
|
|
static CHARSET_INFO *get_internal_charset(uint cs_number, myf flags)
|
2000-07-31 21:29:14 +02:00
|
|
|
{
|
2003-01-05 14:34:24 +01:00
|
|
|
char buf[FN_REFLEN];
|
2000-07-31 21:29:14 +02:00
|
|
|
CHARSET_INFO *cs;
|
|
|
|
/*
|
|
|
|
To make things thread safe we are not allowing other threads to interfere
|
|
|
|
while we may changing the cs_info_table
|
|
|
|
*/
|
|
|
|
pthread_mutex_lock(&THR_LOCK_charset);
|
2002-07-30 11:02:29 +02:00
|
|
|
|
2002-10-10 12:52:32 +02:00
|
|
|
cs = all_charsets[cs_number];
|
2002-07-31 10:25:37 +02:00
|
|
|
|
2003-01-05 14:34:24 +01:00
|
|
|
if (cs && !(cs->state & (MY_CS_COMPILED | MY_CS_LOADED)))
|
|
|
|
{
|
|
|
|
strxmov(buf, cs->csname, ".xml", NullS);
|
|
|
|
read_charset_index(buf,flags);
|
|
|
|
cs = (cs->state & MY_CS_LOADED) ? cs : NULL;
|
|
|
|
}
|
2000-07-31 21:29:14 +02:00
|
|
|
pthread_mutex_unlock(&THR_LOCK_charset);
|
|
|
|
return cs;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2001-11-06 01:15:45 +01:00
|
|
|
static CHARSET_INFO *get_internal_charset_by_name(const char *name, myf flags)
|
2000-07-31 21:29:14 +02:00
|
|
|
{
|
2002-07-30 14:12:51 +02:00
|
|
|
uint cs_number=get_charset_number(name);
|
|
|
|
return cs_number ? get_internal_charset(cs_number,flags) : NULL;
|
2000-07-31 21:29:14 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2002-07-30 14:12:51 +02:00
|
|
|
|
2000-07-31 21:29:14 +02:00
|
|
|
CHARSET_INFO *get_charset(uint cs_number, myf flags)
|
|
|
|
{
|
|
|
|
CHARSET_INFO *cs;
|
|
|
|
(void) init_available_charsets(MYF(0)); /* If it isn't initialized */
|
2002-07-30 14:12:51 +02:00
|
|
|
|
|
|
|
if (!cs_number)
|
|
|
|
return NULL;
|
|
|
|
|
2001-11-06 01:15:45 +01:00
|
|
|
cs=get_internal_charset(cs_number, flags);
|
2000-07-31 21:29:14 +02:00
|
|
|
|
2000-09-07 03:55:17 +02:00
|
|
|
if (!cs && (flags & MY_WME))
|
2000-07-31 21:29:14 +02:00
|
|
|
{
|
|
|
|
char index_file[FN_REFLEN], cs_string[23];
|
2003-01-03 11:35:32 +01:00
|
|
|
strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX);
|
2000-07-31 21:29:14 +02:00
|
|
|
cs_string[0]='#';
|
|
|
|
int10_to_str(cs_number, cs_string+1, 10);
|
|
|
|
my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_string, index_file);
|
|
|
|
}
|
|
|
|
return cs;
|
|
|
|
}
|
|
|
|
|
|
|
|
my_bool set_default_charset(uint cs, myf flags)
|
|
|
|
{
|
2001-08-22 00:45:07 +02:00
|
|
|
CHARSET_INFO *new_charset;
|
2000-07-31 21:29:14 +02:00
|
|
|
DBUG_ENTER("set_default_charset");
|
|
|
|
DBUG_PRINT("enter",("character set: %d",(int) cs));
|
2001-08-22 00:45:07 +02:00
|
|
|
new_charset = get_charset(cs, flags);
|
|
|
|
if (!new_charset)
|
2000-07-31 21:29:14 +02:00
|
|
|
{
|
|
|
|
DBUG_PRINT("error",("Couldn't set default character set"));
|
|
|
|
DBUG_RETURN(TRUE); /* error */
|
|
|
|
}
|
2001-08-22 00:45:07 +02:00
|
|
|
default_charset_info = new_charset;
|
2002-03-14 17:52:48 +01:00
|
|
|
system_charset_info = new_charset;
|
2000-07-31 21:29:14 +02:00
|
|
|
DBUG_RETURN(FALSE);
|
|
|
|
}
|
|
|
|
|
|
|
|
CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags)
|
|
|
|
{
|
|
|
|
CHARSET_INFO *cs;
|
|
|
|
(void) init_available_charsets(MYF(0)); /* If it isn't initialized */
|
2001-11-06 01:15:45 +01:00
|
|
|
cs=get_internal_charset_by_name(cs_name, flags);
|
2000-07-31 21:29:14 +02:00
|
|
|
|
|
|
|
if (!cs && (flags & MY_WME))
|
|
|
|
{
|
|
|
|
char index_file[FN_REFLEN];
|
2003-01-03 11:35:32 +01:00
|
|
|
strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX);
|
2000-07-31 21:29:14 +02:00
|
|
|
my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_name, index_file);
|
|
|
|
}
|
|
|
|
|
|
|
|
return cs;
|
|
|
|
}
|
|
|
|
|
|
|
|
my_bool set_default_charset_by_name(const char *cs_name, myf flags)
|
|
|
|
{
|
2001-08-22 00:45:07 +02:00
|
|
|
CHARSET_INFO *new_charset;
|
2000-07-31 21:29:14 +02:00
|
|
|
DBUG_ENTER("set_default_charset_by_name");
|
|
|
|
DBUG_PRINT("enter",("character set: %s", cs_name));
|
2001-08-22 00:45:07 +02:00
|
|
|
new_charset = get_charset_by_name(cs_name, flags);
|
|
|
|
if (!new_charset)
|
2000-07-31 21:29:14 +02:00
|
|
|
{
|
|
|
|
DBUG_PRINT("error",("Couldn't set default character set"));
|
|
|
|
DBUG_RETURN(TRUE); /* error */
|
|
|
|
}
|
|
|
|
|
2001-08-22 00:45:07 +02:00
|
|
|
default_charset_info = new_charset;
|
2002-03-14 17:52:48 +01:00
|
|
|
system_charset_info = new_charset;
|
2000-07-31 21:29:14 +02:00
|
|
|
DBUG_RETURN(FALSE);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Only append name if it doesn't exist from before */
|
|
|
|
|
|
|
|
static my_bool charset_in_string(const char *name, DYNAMIC_STRING *s)
|
|
|
|
{
|
2000-08-15 19:09:37 +02:00
|
|
|
uint length= (uint) strlen(name);
|
2000-07-31 21:29:14 +02:00
|
|
|
const char *pos;
|
|
|
|
for (pos=s->str ; (pos=strstr(pos,name)) ; pos++)
|
|
|
|
{
|
|
|
|
if (! pos[length] || pos[length] == ' ')
|
|
|
|
return TRUE; /* Already existed */
|
|
|
|
}
|
|
|
|
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void charset_append(DYNAMIC_STRING *s, const char *name)
|
|
|
|
{
|
|
|
|
if (!charset_in_string(name, s)) {
|
|
|
|
dynstr_append(s, name);
|
|
|
|
dynstr_append(s, " ");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Returns a dynamically-allocated string listing the character sets
|
|
|
|
requested. The caller is responsible for freeing the memory. */
|
|
|
|
|
|
|
|
char * list_charsets(myf want_flags)
|
|
|
|
{
|
|
|
|
DYNAMIC_STRING s;
|
|
|
|
char *p;
|
|
|
|
|
2000-08-29 15:14:43 +02:00
|
|
|
(void)init_available_charsets(MYF(0));
|
2000-07-31 21:29:14 +02:00
|
|
|
init_dynamic_string(&s, NullS, 256, 1024);
|
|
|
|
|
2002-07-30 11:02:29 +02:00
|
|
|
if (want_flags & MY_CS_COMPILED)
|
2000-07-31 21:29:14 +02:00
|
|
|
{
|
2002-10-10 12:52:32 +02:00
|
|
|
CHARSET_INFO **cs;
|
|
|
|
for (cs = all_charsets; cs < all_charsets+255; cs++)
|
2000-07-31 21:29:14 +02:00
|
|
|
{
|
2002-10-10 12:52:32 +02:00
|
|
|
if (cs[0])
|
|
|
|
{
|
|
|
|
dynstr_append(&s, cs[0]->name);
|
|
|
|
dynstr_append(&s, " ");
|
|
|
|
}
|
2000-07-31 21:29:14 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2002-07-30 11:02:29 +02:00
|
|
|
if (want_flags & MY_CS_CONFIG)
|
2000-07-31 21:29:14 +02:00
|
|
|
{
|
2002-10-10 12:52:32 +02:00
|
|
|
CHARSET_INFO **cs;
|
2000-07-31 21:29:14 +02:00
|
|
|
char buf[FN_REFLEN];
|
2000-10-10 23:06:37 +02:00
|
|
|
MY_STAT status;
|
2000-07-31 21:29:14 +02:00
|
|
|
|
2002-07-30 11:02:29 +02:00
|
|
|
for (cs=all_charsets; cs < all_charsets+255; cs++)
|
|
|
|
{
|
2002-10-10 12:52:32 +02:00
|
|
|
if (!cs[0] || !cs[0]->name || charset_in_string(cs[0]->name, &s))
|
2002-07-30 11:02:29 +02:00
|
|
|
continue;
|
2002-10-10 12:52:32 +02:00
|
|
|
get_charset_conf_name(cs[0]->name, buf);
|
2002-07-30 11:02:29 +02:00
|
|
|
if (!my_stat(buf, &status, MYF(0)))
|
|
|
|
continue; /* conf file doesn't exist */
|
2002-10-10 12:52:32 +02:00
|
|
|
dynstr_append(&s, cs[0]->name);
|
2002-07-30 11:02:29 +02:00
|
|
|
dynstr_append(&s, " ");
|
|
|
|
}
|
2000-07-31 21:29:14 +02:00
|
|
|
}
|
|
|
|
|
2002-07-30 14:12:51 +02:00
|
|
|
if (want_flags & (MY_CS_INDEX|MY_CS_LOADED))
|
2000-07-31 21:29:14 +02:00
|
|
|
{
|
2002-10-10 12:52:32 +02:00
|
|
|
CHARSET_INFO **cs;
|
2002-07-30 11:02:29 +02:00
|
|
|
for (cs = all_charsets; cs < all_charsets + 255; cs++)
|
2002-10-10 12:52:32 +02:00
|
|
|
if (cs[0] && cs[0]->name && (cs[0]->state & want_flags) )
|
|
|
|
charset_append(&s, cs[0]->name);
|
2000-07-31 21:29:14 +02:00
|
|
|
}
|
2002-07-30 14:12:51 +02:00
|
|
|
|
|
|
|
if (s.length)
|
2000-07-31 21:29:14 +02:00
|
|
|
{
|
2002-07-30 14:12:51 +02:00
|
|
|
s.str[s.length - 1] = '\0'; /* chop trailing space */
|
|
|
|
p = my_strdup(s.str, MYF(MY_WME));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
p = my_strdup("", MYF(MY_WME));
|
2000-07-31 21:29:14 +02:00
|
|
|
}
|
|
|
|
dynstr_free(&s);
|
2002-07-30 14:12:51 +02:00
|
|
|
|
2000-07-31 21:29:14 +02:00
|
|
|
return p;
|
|
|
|
}
|