mariadb/mysys/charset.c
tim@black.box 34e2c4b2ab Do not print a warning message when the server's character set is
not found by the client.  This is especially important for PHP, which
only includes the latin1 character set in the bundled libmysql.
2001-11-05 19:15:45 -05:00

557 lines
15 KiB
C

/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with this library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
MA 02111-1307, USA */
#include "mysys_priv.h"
#include "mysys_err.h"
#include <m_ctype.h>
#include <m_string.h>
#include <my_dir.h>
typedef struct cs_id_st {
char *name;
uint number;
} CS_ID;
const char *charsets_dir = NULL;
static DYNAMIC_ARRAY cs_info_table;
static CS_ID **available_charsets;
static int charset_initialized=0;
#define MAX_LINE 1024
#define CTYPE_TABLE_SIZE 257
#define TO_LOWER_TABLE_SIZE 256
#define TO_UPPER_TABLE_SIZE 256
#define SORT_ORDER_TABLE_SIZE 256
struct simpleconfig_buf_st {
FILE *f;
char buf[MAX_LINE];
char *p;
};
static uint num_from_csname(CS_ID **cs, const char *name)
{
CS_ID **c;
for (c = cs; *c; ++c)
if (!strcmp((*c)->name, name))
return (*c)->number;
return 0; /* this mimics find_type() */
}
static char *name_from_csnum(CS_ID **cs, uint number)
{
CS_ID **c;
if(cs)
for (c = cs; *c; ++c)
if ((*c)->number == number)
return (*c)->name;
return (char*) "?"; /* this mimics find_type() */
}
static my_bool get_word(struct simpleconfig_buf_st *fb, char *buf)
{
char *endptr=fb->p;
for (;;)
{
while (isspace(*endptr))
++endptr;
if (*endptr && *endptr != '#') /* Not comment */
break; /* Found something */
if ((fgets(fb->buf, sizeof(fb->buf), fb->f)) == NULL)
return TRUE; /* end of file */
endptr = fb->buf;
}
while (!isspace(*endptr))
*buf++= *endptr++;
*buf=0;
fb->p = endptr;
return FALSE;
}
char *get_charsets_dir(char *buf)
{
const char *sharedir = SHAREDIR;
DBUG_ENTER("get_charsets_dir");
if (charsets_dir != NULL)
strmake(buf, charsets_dir, FN_REFLEN-1);
else
{
if (test_if_hard_path(sharedir) ||
is_prefix(sharedir, DEFAULT_CHARSET_HOME))
strxmov(buf, sharedir, "/", CHARSET_DIR, NullS);
else
strxmov(buf, DEFAULT_CHARSET_HOME, "/", sharedir, "/", CHARSET_DIR,
NullS);
}
convert_dirname(buf,buf,NullS);
DBUG_PRINT("info",("charsets dir='%s'", buf));
DBUG_RETURN(strend(buf));
}
static my_bool read_charset_index(CS_ID ***charsets, myf myflags)
{
struct simpleconfig_buf_st fb;
char buf[MAX_LINE], num_buf[MAX_LINE];
DYNAMIC_ARRAY cs;
CS_ID *csid;
strmov(get_charsets_dir(buf), "Index");
if ((fb.f = my_fopen(buf, O_RDONLY, myflags)) == NULL)
return TRUE;
fb.buf[0] = '\0';
fb.p = fb.buf;
if (init_dynamic_array(&cs, sizeof(CS_ID *), 32, 32))
return TRUE;
while (!get_word(&fb, buf) && !get_word(&fb, num_buf))
{
uint csnum;
uint length;
if (!(csnum = atoi(num_buf)))
{
/* corrupt Index file */
my_fclose(fb.f,myflags);
return TRUE;
}
if (!(csid = (CS_ID*) my_once_alloc(sizeof(CS_ID), myflags)) ||
!(csid->name=
(char*) my_once_alloc(length= (uint) strlen(buf)+1, myflags)))
{
my_fclose(fb.f,myflags);
return TRUE;
}
memcpy(csid->name,buf,length);
csid->number = csnum;
insert_dynamic(&cs, (gptr) &csid);
}
my_fclose(fb.f,myflags);
if (!(*charsets =
(CS_ID **) my_once_alloc((cs.elements + 1) * sizeof(CS_ID *), myflags)))
return TRUE;
/* unwarranted chumminess with dynamic_array implementation? */
memcpy((byte *) *charsets, cs.buffer, cs.elements * sizeof(CS_ID *));
(*charsets)[cs.elements] = NULL;
delete_dynamic(&cs);
return FALSE;
}
static my_bool init_available_charsets(myf myflags)
{
my_bool error=0;
/*
We have to use charset_initialized to not lock on THR_LOCK_charset
inside get_internal_charset...
*/
if (!charset_initialized)
{
/*
To make things thread safe we are not allowing other threads to interfere
while we may changing the cs_info_table
*/
pthread_mutex_lock(&THR_LOCK_charset);
if (!cs_info_table.buffer) /* If not initialized */
{
init_dynamic_array(&cs_info_table, sizeof(CHARSET_INFO*), 16, 8);
error = read_charset_index(&available_charsets, myflags);
}
charset_initialized=1;
pthread_mutex_unlock(&THR_LOCK_charset);
}
if(!available_charsets || !available_charsets[0])
error = TRUE;
return error;
}
void free_charsets(void)
{
delete_dynamic(&cs_info_table);
}
static my_bool fill_array(uchar *array, int sz, struct simpleconfig_buf_st *fb)
{
char buf[MAX_LINE];
while (sz--)
{
if (get_word(fb, buf))
{
DBUG_PRINT("error",("get_word failed, expecting %d more words", sz + 1));
return 1;
}
*array++ = (uchar) strtol(buf, NULL, 16);
}
return 0;
}
static void get_charset_conf_name(uint cs_number, char *buf)
{
strxmov(get_charsets_dir(buf),
name_from_csnum(available_charsets, cs_number), ".conf", NullS);
}
static my_bool read_charset_file(uint cs_number, CHARSET_INFO *set,
myf myflags)
{
struct simpleconfig_buf_st fb;
char buf[FN_REFLEN];
my_bool result;
DBUG_ENTER("read_charset_file");
DBUG_PRINT("enter",("cs_number: %d", cs_number));
if (cs_number <= 0)
DBUG_RETURN(TRUE);
get_charset_conf_name(cs_number, buf);
DBUG_PRINT("info",("file name: %s", buf));
if ((fb.f = my_fopen(buf, O_RDONLY, myflags)) == NULL)
DBUG_RETURN(TRUE);
fb.buf[0] = '\0'; /* Init for get_word */
fb.p = fb.buf;
result=FALSE;
if (fill_array(set->ctype, CTYPE_TABLE_SIZE, &fb) ||
fill_array(set->to_lower, TO_LOWER_TABLE_SIZE, &fb) ||
fill_array(set->to_upper, TO_UPPER_TABLE_SIZE, &fb) ||
fill_array(set->sort_order, SORT_ORDER_TABLE_SIZE, &fb))
result=TRUE;
my_fclose(fb.f, MYF(0));
DBUG_RETURN(result);
}
uint get_charset_number(const char *charset_name)
{
uint number=compiled_charset_number(charset_name);
if (number)
return number;
if (init_available_charsets(MYF(0))) /* If it isn't initialized */
return 0;
return num_from_csname(available_charsets, charset_name);
}
const char *get_charset_name(uint charset_number)
{
const char *name=compiled_charset_name(charset_number);
if (*name != '?')
return name;
if (init_available_charsets(MYF(0))) /* If it isn't initialized */
return "?";
return name_from_csnum(available_charsets, charset_number);
}
static CHARSET_INFO *find_charset(CHARSET_INFO **table, uint cs_number,
size_t tablesz)
{
uint i;
for (i = 0; i < tablesz; ++i)
if (table[i]->number == cs_number)
return table[i];
return NULL;
}
static CHARSET_INFO *find_charset_by_name(CHARSET_INFO **table,
const char *name, size_t tablesz)
{
uint i;
for (i = 0; i < tablesz; ++i)
if (!strcmp(table[i]->name,name))
return table[i];
return NULL;
}
static CHARSET_INFO *add_charset(uint cs_number, const char *cs_name, myf flags)
{
CHARSET_INFO tmp_cs,*cs;
uchar tmp_ctype[CTYPE_TABLE_SIZE];
uchar tmp_to_lower[TO_LOWER_TABLE_SIZE];
uchar tmp_to_upper[TO_UPPER_TABLE_SIZE];
uchar tmp_sort_order[SORT_ORDER_TABLE_SIZE];
/* Don't allocate memory if we are not sure we can find the char set */
cs= &tmp_cs;
bzero((char*) cs, sizeof(*cs));
cs->ctype=tmp_ctype;
cs->to_lower=tmp_to_lower;
cs->to_upper=tmp_to_upper;
cs->sort_order=tmp_sort_order;
if (read_charset_file(cs_number, cs, flags))
return NULL;
cs = (CHARSET_INFO*) my_once_alloc(sizeof(CHARSET_INFO),
MYF(MY_WME));
*cs=tmp_cs;
cs->name = (char *) my_once_alloc((uint) strlen(cs_name)+1, MYF(MY_WME));
cs->ctype = (uchar*) my_once_alloc(CTYPE_TABLE_SIZE, MYF(MY_WME));
cs->to_lower = (uchar*) my_once_alloc(TO_LOWER_TABLE_SIZE, MYF(MY_WME));
cs->to_upper = (uchar*) my_once_alloc(TO_UPPER_TABLE_SIZE, MYF(MY_WME));
cs->sort_order=(uchar*) my_once_alloc(SORT_ORDER_TABLE_SIZE, MYF(MY_WME));
cs->number = cs_number;
memcpy((char*) cs->name, (char*) cs_name, strlen(cs_name) + 1);
memcpy((char*) cs->ctype, (char*) tmp_ctype, sizeof(tmp_ctype));
memcpy((char*) cs->to_lower, (char*) tmp_to_lower, sizeof(tmp_to_lower));
memcpy((char*) cs->to_upper, (char*) tmp_to_upper, sizeof(tmp_to_upper));
memcpy((char*) cs->sort_order, (char*) tmp_sort_order,
sizeof(tmp_sort_order));
insert_dynamic(&cs_info_table, (gptr) &cs);
return cs;
}
static CHARSET_INFO *get_internal_charset(uint cs_number, myf flags)
{
CHARSET_INFO *cs;
/*
To make things thread safe we are not allowing other threads to interfere
while we may changing the cs_info_table
*/
pthread_mutex_lock(&THR_LOCK_charset);
if (!(cs = find_charset((CHARSET_INFO**) cs_info_table.buffer, cs_number,
cs_info_table.elements)))
if (!(cs = find_compiled_charset(cs_number)))
cs=add_charset(cs_number, get_charset_name(cs_number), flags);
pthread_mutex_unlock(&THR_LOCK_charset);
return cs;
}
static CHARSET_INFO *get_internal_charset_by_name(const char *name, myf flags)
{
CHARSET_INFO *cs;
/*
To make things thread safe we are not allowing other threads to interfere
while we may changing the cs_info_table
*/
pthread_mutex_lock(&THR_LOCK_charset);
if (!(cs = find_charset_by_name((CHARSET_INFO**) cs_info_table.buffer, name,
cs_info_table.elements)))
if (!(cs = find_compiled_charset_by_name(name)))
cs=add_charset(get_charset_number(name), name, flags);
pthread_mutex_unlock(&THR_LOCK_charset);
return cs;
}
CHARSET_INFO *get_charset(uint cs_number, myf flags)
{
CHARSET_INFO *cs;
(void) init_available_charsets(MYF(0)); /* If it isn't initialized */
cs=get_internal_charset(cs_number, flags);
if (!cs && (flags & MY_WME))
{
char index_file[FN_REFLEN], cs_string[23];
strmov(get_charsets_dir(index_file), "Index");
cs_string[0]='#';
int10_to_str(cs_number, cs_string+1, 10);
my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_string, index_file);
}
return cs;
}
my_bool set_default_charset(uint cs, myf flags)
{
CHARSET_INFO *new_charset;
DBUG_ENTER("set_default_charset");
DBUG_PRINT("enter",("character set: %d",(int) cs));
new_charset = get_charset(cs, flags);
if (!new_charset)
{
DBUG_PRINT("error",("Couldn't set default character set"));
DBUG_RETURN(TRUE); /* error */
}
default_charset_info = new_charset;
DBUG_RETURN(FALSE);
}
CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags)
{
CHARSET_INFO *cs;
(void) init_available_charsets(MYF(0)); /* If it isn't initialized */
cs=get_internal_charset_by_name(cs_name, flags);
if (!cs && (flags & MY_WME))
{
char index_file[FN_REFLEN];
strmov(get_charsets_dir(index_file), "Index");
my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_name, index_file);
}
return cs;
}
my_bool set_default_charset_by_name(const char *cs_name, myf flags)
{
CHARSET_INFO *new_charset;
DBUG_ENTER("set_default_charset_by_name");
DBUG_PRINT("enter",("character set: %s", cs_name));
new_charset = get_charset_by_name(cs_name, flags);
if (!new_charset)
{
DBUG_PRINT("error",("Couldn't set default character set"));
DBUG_RETURN(TRUE); /* error */
}
default_charset_info = new_charset;
DBUG_RETURN(FALSE);
}
/* Only append name if it doesn't exist from before */
static my_bool charset_in_string(const char *name, DYNAMIC_STRING *s)
{
uint length= (uint) strlen(name);
const char *pos;
for (pos=s->str ; (pos=strstr(pos,name)) ; pos++)
{
if (! pos[length] || pos[length] == ' ')
return TRUE; /* Already existed */
}
return FALSE;
}
static void charset_append(DYNAMIC_STRING *s, const char *name)
{
if (!charset_in_string(name, s)) {
dynstr_append(s, name);
dynstr_append(s, " ");
}
}
/* Returns a dynamically-allocated string listing the character sets
requested. The caller is responsible for freeing the memory. */
char * list_charsets(myf want_flags)
{
DYNAMIC_STRING s;
char *p;
(void)init_available_charsets(MYF(0));
init_dynamic_string(&s, NullS, 256, 1024);
if (want_flags & MY_COMPILED_SETS)
{
CHARSET_INFO *cs;
for (cs = compiled_charsets; cs->number > 0; cs++)
{
dynstr_append(&s, cs->name);
dynstr_append(&s, " ");
}
}
if (want_flags & MY_CONFIG_SETS)
{
CS_ID **c;
char buf[FN_REFLEN];
MY_STAT status;
if((c=available_charsets))
for (; *c; ++c)
{
if (charset_in_string((*c)->name, &s))
continue;
get_charset_conf_name((*c)->number, buf);
if (!my_stat(buf, &status, MYF(0)))
continue; /* conf file doesn't exist */
dynstr_append(&s, (*c)->name);
dynstr_append(&s, " ");
}
}
if (want_flags & MY_INDEX_SETS)
{
CS_ID **c;
for (c = available_charsets; *c; ++c)
charset_append(&s, (*c)->name);
}
if (want_flags & MY_LOADED_SETS)
{
uint i;
for (i = 0; i < cs_info_table.elements; i++)
charset_append(&s,
dynamic_element(&cs_info_table, i, CHARSET_INFO *)->name);
}
s.str[s.length - 1] = '\0'; /* chop trailing space */
p = my_strdup(s.str, MYF(MY_WME));
dynstr_free(&s);
return p;
}
/****************************************************************************
* Code for debugging.
****************************************************************************/
static void _print_array(uint8 *data, uint size)
{
uint i;
for (i = 0; i < size; ++i)
{
if (i == 0 || i % 16 == size % 16) printf(" ");
printf(" %02x", data[i]);
if ((i+1) % 16 == size % 16) printf("\n");
}
}
/* _print_csinfo is called from test_charset.c */
void _print_csinfo(CHARSET_INFO *cs)
{
printf("%s #%d\n", cs->name, cs->number);
printf("ctype:\n"); _print_array(cs->ctype, 257);
printf("to_lower:\n"); _print_array(cs->to_lower, 256);
printf("to_upper:\n"); _print_array(cs->to_upper, 256);
printf("sort_order:\n"); _print_array(cs->sort_order, 256);
printf("collate: %3s (%d, %p, %p, %p, %p, %p)\n",
cs->strxfrm_multiply ? "yes" : "no",
cs->strxfrm_multiply,
cs->strcoll,
cs->strxfrm,
cs->strnncoll,
cs->strnxfrm,
cs->like_range);
printf("multi-byte: %3s (%d, %p, %p, %p)\n",
cs->mbmaxlen ? "yes" : "no",
cs->mbmaxlen,
cs->ismbchar,
cs->ismbhead,
cs->mbcharlen);
}