mariadb/mysys/charset-def.c
unknown af0a3afe05 Bug#28916 LDML doesn't work for utf8
and is not described in the manual
- Adding missing initialization for utf8 collations
- Minor code clean-ups: renaming variables,
  moving code into a new separate function.
- Adding test, to check that both ucs2 and utf8 user
  defined collations work (ucs2_test_ci and utf8_test_ci)
- Adding Vietnamese collation as a complex user defined
  collation example.


include/m_ctype.h:
  Renaming variable names to match collation names (for convenience).
mysys/charset-def.c:
  - Removing redundant declarations for variables declared in m_ctype.h
  - Renaming variable names to match collation names (for convenience).
mysys/charset.c:
  - Renaming "new" to "newcs", to avoid using C reserved word as a variable name
  - Moving UCA initialization code into a separate function
  - The bug fix itself: adding initialization of utf8 collations
strings/ctype-uca.c:
  Renaming variable names to match collation names (for convenience).
strings/ctype.c:
  Increasing buffer size to fit tailoring for languages
  with complex rules (e.g. Vietnamese).
mysql-test/r/ctype_ldml.result:
  Adding test case
mysql-test/std_data/Index.xml:
  Adding Index.xml example with user defined collations.
mysql-test/t/ctype_ldml-master.opt:
  Adding OPT file for the test case,
  to use the example Index.xml file.
mysql-test/t/ctype_ldml.test:
  Adding test case
2007-06-07 17:55:55 +05:00

196 lines
7.4 KiB
C

/* Copyright (C) 2000 MySQL AB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
#include "mysys_priv.h"
/*
Include all compiled character sets into the client
If a client don't want to use all of them, he can define his own
init_compiled_charsets() that only adds those that he wants
*/
#ifdef HAVE_UCA_COLLATIONS
#ifdef HAVE_CHARSET_ucs2
extern CHARSET_INFO my_charset_ucs2_icelandic_uca_ci;
extern CHARSET_INFO my_charset_ucs2_latvian_uca_ci;
extern CHARSET_INFO my_charset_ucs2_romanian_uca_ci;
extern CHARSET_INFO my_charset_ucs2_slovenian_uca_ci;
extern CHARSET_INFO my_charset_ucs2_polish_uca_ci;
extern CHARSET_INFO my_charset_ucs2_estonian_uca_ci;
extern CHARSET_INFO my_charset_ucs2_spanish_uca_ci;
extern CHARSET_INFO my_charset_ucs2_swedish_uca_ci;
extern CHARSET_INFO my_charset_ucs2_turkish_uca_ci;
extern CHARSET_INFO my_charset_ucs2_czech_uca_ci;
extern CHARSET_INFO my_charset_ucs2_danish_uca_ci;
extern CHARSET_INFO my_charset_ucs2_lithuanian_uca_ci;
extern CHARSET_INFO my_charset_ucs2_slovak_uca_ci;
extern CHARSET_INFO my_charset_ucs2_spanish2_uca_ci;
extern CHARSET_INFO my_charset_ucs2_roman_uca_ci;
extern CHARSET_INFO my_charset_ucs2_persian_uca_ci;
extern CHARSET_INFO my_charset_ucs2_esperanto_uca_ci;
extern CHARSET_INFO my_charset_ucs2_hungarian_uca_ci;
#endif
#ifdef HAVE_CHARSET_utf8
extern CHARSET_INFO my_charset_utf8_icelandic_uca_ci;
extern CHARSET_INFO my_charset_utf8_latvian_uca_ci;
extern CHARSET_INFO my_charset_utf8_romanian_uca_ci;
extern CHARSET_INFO my_charset_utf8_slovenian_uca_ci;
extern CHARSET_INFO my_charset_utf8_polish_uca_ci;
extern CHARSET_INFO my_charset_utf8_estonian_uca_ci;
extern CHARSET_INFO my_charset_utf8_spanish_uca_ci;
extern CHARSET_INFO my_charset_utf8_swedish_uca_ci;
extern CHARSET_INFO my_charset_utf8_turkish_uca_ci;
extern CHARSET_INFO my_charset_utf8_czech_uca_ci;
extern CHARSET_INFO my_charset_utf8_danish_uca_ci;
extern CHARSET_INFO my_charset_utf8_lithuanian_uca_ci;
extern CHARSET_INFO my_charset_utf8_slovak_uca_ci;
extern CHARSET_INFO my_charset_utf8_spanish2_uca_ci;
extern CHARSET_INFO my_charset_utf8_roman_uca_ci;
extern CHARSET_INFO my_charset_utf8_persian_uca_ci;
extern CHARSET_INFO my_charset_utf8_esperanto_uca_ci;
extern CHARSET_INFO my_charset_utf8_hungarian_uca_ci;
#ifdef HAVE_UTF8_GENERAL_CS
extern CHARSET_INFO my_charset_utf8_general_cs;
#endif
#endif
#endif /* HAVE_UCA_COLLATIONS */
my_bool init_compiled_charsets(myf flags __attribute__((unused)))
{
CHARSET_INFO *cs;
add_compiled_collation(&my_charset_bin);
add_compiled_collation(&my_charset_latin1);
add_compiled_collation(&my_charset_latin1_bin);
add_compiled_collation(&my_charset_latin1_german2_ci);
#ifdef HAVE_CHARSET_big5
add_compiled_collation(&my_charset_big5_chinese_ci);
add_compiled_collation(&my_charset_big5_bin);
#endif
#ifdef HAVE_CHARSET_cp1250
add_compiled_collation(&my_charset_cp1250_czech_ci);
#endif
#ifdef HAVE_CHARSET_cp932
add_compiled_collation(&my_charset_cp932_japanese_ci);
add_compiled_collation(&my_charset_cp932_bin);
#endif
#ifdef HAVE_CHARSET_latin2
add_compiled_collation(&my_charset_latin2_czech_ci);
#endif
#ifdef HAVE_CHARSET_eucjpms
add_compiled_collation(&my_charset_eucjpms_japanese_ci);
add_compiled_collation(&my_charset_eucjpms_bin);
#endif
#ifdef HAVE_CHARSET_euckr
add_compiled_collation(&my_charset_euckr_korean_ci);
add_compiled_collation(&my_charset_euckr_bin);
#endif
#ifdef HAVE_CHARSET_gb2312
add_compiled_collation(&my_charset_gb2312_chinese_ci);
add_compiled_collation(&my_charset_gb2312_bin);
#endif
#ifdef HAVE_CHARSET_gbk
add_compiled_collation(&my_charset_gbk_chinese_ci);
add_compiled_collation(&my_charset_gbk_bin);
#endif
#ifdef HAVE_CHARSET_sjis
add_compiled_collation(&my_charset_sjis_japanese_ci);
add_compiled_collation(&my_charset_sjis_bin);
#endif
#ifdef HAVE_CHARSET_tis620
add_compiled_collation(&my_charset_tis620_thai_ci);
add_compiled_collation(&my_charset_tis620_bin);
#endif
#ifdef HAVE_CHARSET_ucs2
add_compiled_collation(&my_charset_ucs2_general_ci);
add_compiled_collation(&my_charset_ucs2_bin);
#ifdef HAVE_UCA_COLLATIONS
add_compiled_collation(&my_charset_ucs2_unicode_ci);
add_compiled_collation(&my_charset_ucs2_icelandic_uca_ci);
add_compiled_collation(&my_charset_ucs2_latvian_uca_ci);
add_compiled_collation(&my_charset_ucs2_romanian_uca_ci);
add_compiled_collation(&my_charset_ucs2_slovenian_uca_ci);
add_compiled_collation(&my_charset_ucs2_polish_uca_ci);
add_compiled_collation(&my_charset_ucs2_estonian_uca_ci);
add_compiled_collation(&my_charset_ucs2_spanish_uca_ci);
add_compiled_collation(&my_charset_ucs2_swedish_uca_ci);
add_compiled_collation(&my_charset_ucs2_turkish_uca_ci);
add_compiled_collation(&my_charset_ucs2_czech_uca_ci);
add_compiled_collation(&my_charset_ucs2_danish_uca_ci);
add_compiled_collation(&my_charset_ucs2_lithuanian_uca_ci);
add_compiled_collation(&my_charset_ucs2_slovak_uca_ci);
add_compiled_collation(&my_charset_ucs2_spanish2_uca_ci);
add_compiled_collation(&my_charset_ucs2_roman_uca_ci);
add_compiled_collation(&my_charset_ucs2_persian_uca_ci);
add_compiled_collation(&my_charset_ucs2_esperanto_uca_ci);
add_compiled_collation(&my_charset_ucs2_hungarian_uca_ci);
#endif
#endif
#ifdef HAVE_CHARSET_ujis
add_compiled_collation(&my_charset_ujis_japanese_ci);
add_compiled_collation(&my_charset_ujis_bin);
#endif
#ifdef HAVE_CHARSET_utf8
add_compiled_collation(&my_charset_utf8_general_ci);
add_compiled_collation(&my_charset_utf8_bin);
#ifdef HAVE_UTF8_GENERAL_CS
add_compiled_collation(&my_charset_utf8_general_cs);
#endif
#ifdef HAVE_UCA_COLLATIONS
add_compiled_collation(&my_charset_utf8_unicode_ci);
add_compiled_collation(&my_charset_utf8_icelandic_uca_ci);
add_compiled_collation(&my_charset_utf8_latvian_uca_ci);
add_compiled_collation(&my_charset_utf8_romanian_uca_ci);
add_compiled_collation(&my_charset_utf8_slovenian_uca_ci);
add_compiled_collation(&my_charset_utf8_polish_uca_ci);
add_compiled_collation(&my_charset_utf8_estonian_uca_ci);
add_compiled_collation(&my_charset_utf8_spanish_uca_ci);
add_compiled_collation(&my_charset_utf8_swedish_uca_ci);
add_compiled_collation(&my_charset_utf8_turkish_uca_ci);
add_compiled_collation(&my_charset_utf8_czech_uca_ci);
add_compiled_collation(&my_charset_utf8_danish_uca_ci);
add_compiled_collation(&my_charset_utf8_lithuanian_uca_ci);
add_compiled_collation(&my_charset_utf8_slovak_uca_ci);
add_compiled_collation(&my_charset_utf8_spanish2_uca_ci);
add_compiled_collation(&my_charset_utf8_roman_uca_ci);
add_compiled_collation(&my_charset_utf8_persian_uca_ci);
add_compiled_collation(&my_charset_utf8_esperanto_uca_ci);
add_compiled_collation(&my_charset_utf8_hungarian_uca_ci);
#endif
#endif
/* Copy compiled charsets */
for (cs=compiled_charsets; cs->name; cs++)
add_compiled_collation(cs);
return FALSE;
}