Reorganization to restore generating charset C files from conf files

2025-01-15 19:42:28 +01:00 · 2003-01-29 15:08:09 +04:00 · 2003-01-29 15:08:09 +04:00 · 2c2b14f9a2
commit 2c2b14f9a2
parent 231553039f
17 changed files with 4139 additions and 4067 deletions
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@ -166,10 +166,20 @@ typedef struct charset_info_st

 extern CHARSET_INFO *my_charset_bin;
 extern CHARSET_INFO *my_charset_latin1;
-extern CHARSET_INFO *default_charset_info;
-extern CHARSET_INFO *system_charset_info;
-extern CHARSET_INFO *all_charsets[256];
-extern my_bool init_compiled_charsets(myf flags);
+extern CHARSET_INFO my_charset_big5;
+extern CHARSET_INFO my_charset_czech;
+extern CHARSET_INFO my_charset_euc_kr;
+extern CHARSET_INFO my_charset_gb2312;
+extern CHARSET_INFO my_charset_gbk;
+extern CHARSET_INFO my_charset_latin1_de;
+extern CHARSET_INFO my_charset_sjis;
+extern CHARSET_INFO my_charset_tis620;
+extern CHARSET_INFO my_charset_ucs2;
+extern CHARSET_INFO my_charset_ujis;
+extern CHARSET_INFO my_charset_utf8;
+extern CHARSET_INFO my_charset_win1250ch;
+
+
 extern my_bool my_parse_charset_xml(const char *bug, uint len,
 				    int (*add)(CHARSET_INFO *cs));

--- a/include/my_sys.h
+++ b/include/my_sys.h
@ -201,6 +201,11 @@ extern int (*fatal_error_handler_hook)(uint my_err, const char *str,
 				       myf MyFlags);

 /* charsets */
+extern CHARSET_INFO *default_charset_info;
+extern CHARSET_INFO *system_charset_info;
+extern CHARSET_INFO *all_charsets[256];
+extern CHARSET_INFO compiled_charsets[];
+
 extern uint get_charset_number(const char *cs_name);
 extern const char *get_charset_name(uint cs_number);
 extern CHARSET_INFO *get_charset(uint cs_number, myf flags);
--- a/libmysql/Makefile.shared
+++ b/libmysql/Makefile.shared
@ -40,7 +40,7 @@ mystringsobjects =	strmov.lo strxmov.lo strxnmov.lo strnmov.lo \
 			strtoull.lo strtoll.lo llstr.lo my_vsnprintf.lo \
 			ctype.lo ctype-simple.lo ctype-bin.lo ctype-mb.lo \
 			ctype-big5.lo ctype-czech.lo ctype-euc_kr.lo \
-			ctype-win1250ch.lo ctype-utf8.lo \
+			ctype-win1250ch.lo ctype-utf8.lo ctype-extra.lo \
 			ctype-gb2312.lo ctype-gbk.lo ctype-latin1_de.lo \
 			ctype-sjis.lo ctype-tis620.lo ctype-ujis.lo xml.lo

--- a/mysys/charset.c
+++ b/mysys/charset.c
@ -314,6 +314,76 @@ char *get_charsets_dir(char *buf)
  DBUG_RETURN(strend(buf));
 }

+CHARSET_INFO *all_charsets[256];
+CHARSET_INFO *default_charset_info = &compiled_charsets[0];
+CHARSET_INFO *system_charset_info = &compiled_charsets[0];
+
+#define MY_ADD_CHARSET(x)	all_charsets[(x)->number]=(x)
+
+
+static my_bool init_compiled_charsets(myf flags  __attribute__((unused)))
+{
+  CHARSET_INFO *cs;
+
+  MY_ADD_CHARSET(my_charset_bin);
+
+#ifdef HAVE_CHARSET_big5
+  MY_ADD_CHARSET(&my_charset_big5);
+#endif
+
+#ifdef HAVE_CHARSET_czech
+  MY_ADD_CHARSET(&my_charset_czech);
+#endif
+
+#ifdef HAVE_CHARSET_euc_kr
+  MY_ADD_CHARSET(&my_charset_euc_kr);
+#endif
+
+#ifdef HAVE_CHARSET_gb2312
+  MY_ADD_CHARSET(&my_charset_gb2312);
+#endif
+
+#ifdef HAVE_CHARSET_gbk
+  MY_ADD_CHARSET(&my_charset_gbk);
+#endif
+
+#ifdef HAVE_CHARSET_latin1_de
+  MY_ADD_CHARSET(&my_charset_latin1_de);
+#endif
+
+#ifdef HAVE_CHARSET_sjis
+  MY_ADD_CHARSET(&my_charset_sjis);
+#endif
+
+#ifdef HAVE_CHARSET_tis620
+  MY_ADD_CHARSET(&my_charset_tis620);
+#endif
+
+#ifdef HAVE_CHARSET_ucs2
+  MY_ADD_CHARSET(&my_charset_ucs2);
+#endif
+
+#ifdef HAVE_CHARSET_ujis
+  MY_ADD_CHARSET(&my_charset_ujis);
+#endif
+
+#ifdef HAVE_CHARSET_utf8
+  MY_ADD_CHARSET(&my_charset_utf8);
+#endif
+
+#ifdef HAVE_CHARSET_win1250ch
+  MY_ADD_CHARSET(&my_charset_win1250ch);
+#endif
+
+  /* Copy compiled charsets */
+  for (cs=compiled_charsets; cs->name; cs++)
+  {
+    all_charsets[cs->number]=cs;
+  }
+  
+  return FALSE;
+}
+
 static my_bool init_available_charsets(myf myflags)
 {
  char fname[FN_REFLEN];
@ -607,3 +677,5 @@ char *list_charsets(myf want_flags)
  
  return p;
 }
+
+
--- a/regex/Makefile.am
+++ b/regex/Makefile.am
@ -17,7 +17,7 @@

 INCLUDES =		@MT_INCLUDES@ -I$(top_srcdir)/include
 noinst_LIBRARIES =	libregex.a
-LDADD =			libregex.a ../strings/libmystrings.a ../mysys/libmysys.a
+LDADD =			libregex.a ../strings/libmystrings.a
 noinst_HEADERS =	cclass.h cname.h regex2.h utils.h engine.c regex.h
 libregex_a_SOURCES =	regerror.c regcomp.c regexec.c regfree.c reginit.c
 noinst_PROGRAMS =	re
--- a/regex/main.c
+++ b/regex/main.c
@ -74,7 +74,7 @@ char *argv[];
 		exit(status);
 	}

-	err = regcomp(&re, argv[optind++], copts, default_charset_info);
+	err = regcomp(&re, argv[optind++], copts, my_charset_latin1);
 	if (err) {
 		len = regerror(err, &re, erbuf, sizeof(erbuf));
 		fprintf(stderr, "error %s, %d/%d `%s'\n",
@ -226,7 +226,7 @@ int opts;			/* may not match f1 */
 	strcpy(f0copy, f0);
 	re.re_endp = (opts&REG_PEND) ? f0copy + strlen(f0copy) : NULL;
 	fixstr(f0copy);
-	err = regcomp(&re, f0copy, opts, default_charset_info);
+	err = regcomp(&re, f0copy, opts, my_charset_latin1);
 	if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
 		/* unexpected error or wrong error */
 		len = regerror(err, &re, erbuf, sizeof(erbuf));
--- a/regex/regcomp.c
+++ b/regex/regcomp.c
@ -117,7 +117,7 @@ CHARSET_INFO *charset;
 #	define	GOODFLAGS(f)	((f)&~REG_DUMP)
 #endif

-	regex_init();				/* Init cclass if neaded */
+	regex_init(charset);	/* Init cclass if neaded */
 	preg->charset=charset;
 	cflags = GOODFLAGS(cflags);
 	if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
--- a/regex/regex.h
+++ b/regex/regex.h
@ -76,7 +76,7 @@ extern void regfree(regex_t *);

 /* === reginit.c === */

-extern void regex_init(void);	/* Should be called for multithread progs */
+extern void regex_init(CHARSET_INFO *cs);	/* Should be called for multithread progs */
 extern void regex_end(void);	/* If one wants a clean end */

 #ifdef __cplusplus
--- a/regex/reginit.c
+++ b/regex/reginit.c
@ -7,12 +7,11 @@

 static bool regex_inited=0;

-void regex_init()
+void regex_init(CHARSET_INFO *cs)
 {
  char buff[CCLASS_LAST][256];
  int  count[CCLASS_LAST];
  uint i;
-  CHARSET_INFO *cs=default_charset_info;

  if (!regex_inited)
  {
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@ -1937,7 +1937,7 @@ static int init_common_variables(const char *conf_file_name, int argc,
  set_var_init();
  mysys_uses_curses=0;
 #ifdef USE_REGEX
-  regex_init();
+  regex_init(my_charset_latin1);
 #endif
  if (set_default_charset_by_name(sys_charset.value, MYF(MY_WME)))
    return 1;
--- a/strings/Makefile.am
+++ b/strings/Makefile.am
@ -22,19 +22,19 @@ pkglib_LIBRARIES =	libmystrings.a
 # Exact one of ASSEMBLER_X
 if ASSEMBLER_x86
 ASRCS		= strings-x86.s longlong2str-x86.s
-CSRCS		= bfill.c bmove.c bmove512.c bchange.c strxnmov.c int2str.c str2int.c r_strinstr.c atof.c bcmp.c strtol.c strtoul.c strtoll.c strtoull.c llstr.c strnlen.c ctype.c ctype-simple.c ctype-mb.c ctype-big5.c ctype-czech.c ctype-euc_kr.c ctype-gb2312.c ctype-gbk.c ctype-latin1_de.c ctype-sjis.c ctype-tis620.c ctype-ujis.c ctype-utf8.c ctype-win1250ch.c ctype-bin.c my_vsnprintf.c xml.c
+CSRCS		= bfill.c bmove.c bmove512.c bchange.c strxnmov.c int2str.c str2int.c r_strinstr.c atof.c bcmp.c strtol.c strtoul.c strtoll.c strtoull.c llstr.c strnlen.c ctype.c ctype-simple.c ctype-mb.c ctype-big5.c ctype-czech.c ctype-euc_kr.c ctype-gb2312.c ctype-gbk.c ctype-latin1_de.c ctype-sjis.c ctype-tis620.c ctype-ujis.c ctype-utf8.c ctype-win1250ch.c ctype-bin.c my_vsnprintf.c xml.c ctype-extra.c
 else
 if ASSEMBLER_sparc32
 # These file MUST all be on the same line!! Otherwise automake
 # generats a very broken makefile
 ASRCS		= bmove_upp-sparc.s strappend-sparc.s strend-sparc.s strinstr-sparc.s strmake-sparc.s strmov-sparc.s strnmov-sparc.s strstr-sparc.s
-CSRCS		= strcont.c strfill.c strcend.c is_prefix.c longlong2str.c bfill.c bmove.c bmove512.c bchange.c strxnmov.c int2str.c str2int.c r_strinstr.c atof.c bcmp.c strtol.c strtoul.c strtoll.c strtoull.c llstr.c strnlen.c strxmov.c ctype.c ctype-simple.c ctype-mb.c ctype-big5.c ctype-czech.c ctype-euc_kr.c ctype-gb2312.c ctype-gbk.c ctype-latin1_de.c ctype-sjis.c ctype-tis620.c ctype-ujis.c ctype-utf8.c ctype-win1250ch.c ctype-bin.c my_vsnprintf.c xml.c
+CSRCS		= strcont.c strfill.c strcend.c is_prefix.c longlong2str.c bfill.c bmove.c bmove512.c bchange.c strxnmov.c int2str.c str2int.c r_strinstr.c atof.c bcmp.c strtol.c strtoul.c strtoll.c strtoull.c llstr.c strnlen.c strxmov.c ctype.c ctype-simple.c ctype-mb.c ctype-big5.c ctype-czech.c ctype-euc_kr.c ctype-gb2312.c ctype-gbk.c ctype-latin1_de.c ctype-sjis.c ctype-tis620.c ctype-ujis.c ctype-utf8.c ctype-win1250ch.c ctype-bin.c my_vsnprintf.c xml.c ctype-extra.c
 else
 #no assembler
 ASRCS		=
 # These file MUST all be on the same line!! Otherwise automake
 # generats a very broken makefile
-CSRCS		= strxmov.c bmove_upp.c strappend.c strcont.c strend.c strfill.c strcend.c is_prefix.c strstr.c strinstr.c strmake.c strnmov.c strmov.c longlong2str.c bfill.c bmove.c bmove512.c bchange.c strxnmov.c int2str.c str2int.c r_strinstr.c atof.c bcmp.c strtol.c strtoul.c strtoll.c strtoull.c llstr.c strnlen.c ctype.c ctype-simple.c ctype-mb.c ctype-big5.c ctype-czech.c ctype-euc_kr.c ctype-gb2312.c ctype-gbk.c ctype-latin1_de.c ctype-sjis.c ctype-tis620.c ctype-ujis.c ctype-utf8.c ctype-win1250ch.c ctype-bin.c my_vsnprintf.c xml.c
+CSRCS		= strxmov.c bmove_upp.c strappend.c strcont.c strend.c strfill.c strcend.c is_prefix.c strstr.c strinstr.c strmake.c strnmov.c strmov.c longlong2str.c bfill.c bmove.c bmove512.c bchange.c strxnmov.c int2str.c str2int.c r_strinstr.c atof.c bcmp.c strtol.c strtoul.c strtoll.c strtoull.c llstr.c strnlen.c ctype.c ctype-simple.c ctype-mb.c ctype-big5.c ctype-czech.c ctype-euc_kr.c ctype-gb2312.c ctype-gbk.c ctype-latin1_de.c ctype-sjis.c ctype-tis620.c ctype-ujis.c ctype-utf8.c ctype-win1250ch.c ctype-bin.c my_vsnprintf.c xml.c ctype-extra.c
 endif
 endif

@ -56,7 +56,7 @@ EXTRA_DIST =		ctype-big5.c ctype-czech.c ctype-euc_kr.c ctype-win1250ch.c \
                        t_ctype.h

 libmystrings_a_LIBADD=
-conf_to_src_SOURCES = conf_to_src.c
+conf_to_src_SOURCES = conf_to_src.c xml.c ctype.c
 conf_to_src_LDADD=
 #force static linking of conf_to_src - essential when linking against
 #custom installation of libc
--- a/strings/conf_to_src.c
+++ b/strings/conf_to_src.c
@ -21,6 +21,11 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <fcntl.h>
+
+#include <my_global.h>
+#include <m_ctype.h>
+#include <my_xml.h>

 #define CHARSETS_SUBDIR "sql/share/charsets"
 #define CTYPE_TABLE_SIZE      257
@ -29,39 +34,9 @@
 #define SORT_ORDER_TABLE_SIZE 256
 #define ROW_LEN 16

-void print_arrays_for(char *set);
-
 char *prog;
 char buf[1024], *p, *endptr;

-int
-main(int argc, char **argv)
-{
-  prog = *argv;
-
-  if (argc < 2) {
-    fprintf(stderr, "usage: %s source-dir [charset [, charset]]\n", prog);
-    exit(EXIT_FAILURE);
-  }
-
-  --argc; ++argv;       /* skip program name */
-
-  if (chdir(*argv) != 0) {
-    fprintf(stderr, "%s: can't cd to %s\n", prog, *argv);
-    exit(EXIT_FAILURE);
-  }
-  --argc; ++argv;
-
-  if (chdir(CHARSETS_SUBDIR) != 0) {
-    fprintf(stderr, "%s: can't cd to %s\n", prog, CHARSETS_SUBDIR);
-    exit(EXIT_FAILURE);
-  }
-
-  while (argc--)
-    print_arrays_for(*argv++);
-
-  exit(EXIT_SUCCESS);
-}

 void
 print_array(FILE *f, const char *set, const char *name, int n)
@ -140,3 +115,80 @@ print_arrays_for(char *set)

  return;
 }
+
+#define MAX_BUF	16*1024
+
+static CHARSET_INFO all_charsets[256];
+
+static int get_charset_number(const char *charset_name)
+{
+  CHARSET_INFO *cs;
+  for (cs= all_charsets; cs < all_charsets+255; ++cs)
+  {
+    if ( cs->name && !strcmp(cs->name, charset_name))
+      return cs->number;
+  }  
+  return 0;
+}
+
+static void simple_cs_copy_data()
+{
+}
+
+static int add_collation(CHARSET_INFO *cs)
+{
+  if (cs->name && (cs->number || (cs->number=get_charset_number(cs->name))))
+  {
+#if 0
+    if (!(all_charsets[cs->number].state & MY_CS_COMPILED))
+    {
+      simple_cs_copy_data(all_charsets[cs->number],cs);
+      if (simple_cs_is_full(all_charsets[cs->number]))
+      {
+        simple_cs_init_functions(all_charsets[cs->number]);
+        all_charsets[cs->number]->state |= MY_CS_LOADED;
+      }
+    }
+    
+    cs->number= 0;
+    cs->name= NULL;
+    cs->state= 0;
+    cs->sort_order= NULL;
+    cs->state= 0;
+#endif
+  }
+  return MY_XML_OK;
+}
+
+
+static int my_read_charset_file(const char *filename)
+{
+  char buf[MAX_BUF];
+  int  fd;
+  uint len;
+  
+  if ((fd=open(filename,O_RDONLY)) < 0)
+    return 1;
+  
+  len=read(fd,buf,MAX_BUF);
+  close(fd);
+  
+  if (my_parse_charset_xml(buf,len,add_collation))
+  {
+#if 0
+    printf("ERROR at line %d pos %d '%s'\n",
+	   my_xml_error_lineno(&p)+1,
+	   my_xml_error_pos(&p),
+	   my_xml_error_string(&p));
+#endif
+  }
+  
+  return FALSE;
+}
+
+
+int main()
+{
+  bzero(&all_charsets,sizeof(all_charsets));
+  return 0;
+}
--- a/strings/ctype-extra.c
+++ b/strings/ctype-extra.c
--- a/strings/ctype.c
+++ b/strings/ctype.c
--- a/strings/my_vsnprintf.c
+++ b/strings/my_vsnprintf.c
@ -65,7 +65,7 @@ int my_vsnprintf(char *to, size_t n, const char* fmt, va_list ap)
    length= num_state= pre_zero= 0;
    for (;; fmt++)
    {
-      if (my_isdigit(system_charset_info,*fmt))
+      if (my_isdigit(my_charset_latin1,*fmt))
      {
 	if (!num_state)
 	{
--- a/strings/str2int.c
+++ b/strings/str2int.c
@ -98,7 +98,7 @@ char *str2int(register const char *src, register int radix, long int lower,
      converted value (and the scale!) as *negative* numbers,
      so the sign is the opposite of what you might expect.
      */
-  while (my_isspace(system_charset_info,*src)) src++;
+  while (my_isspace(my_charset_latin1,*src)) src++;
  sign = -1;
  if (*src == '+') src++; else
    if (*src == '-') src++, sign = 1;
--- a/strings/strto.c
+++ b/strings/strto.c
@ -95,7 +95,7 @@ function (const char *nptr,char **endptr,int base)
  s = nptr;

  /* Skip white space.	*/
-  while (my_isspace (system_charset_info, *s))
+  while (my_isspace (my_charset_latin1, *s))
    ++s;
  if (*s == '\0')
  {
@ -115,7 +115,7 @@ function (const char *nptr,char **endptr,int base)
  }
    

-  if (base == 16 && s[0] == '0' && my_toupper (system_charset_info, s[1]) == 'X')
+  if (base == 16 && s[0] == '0' && my_toupper (my_charset_latin1, s[1]) == 'X')
    s += 2;

  /* If BASE is zero, figure it out ourselves.	*/
@ -123,7 +123,7 @@ function (const char *nptr,char **endptr,int base)
  {
    if (*s == '0')
    {
-      if (my_toupper (system_charset_info, s[1]) == 'X')
+      if (my_toupper (my_charset_latin1, s[1]) == 'X')
      {
 	s += 2;
 	base = 16;
@ -145,10 +145,10 @@ function (const char *nptr,char **endptr,int base)
  i = 0;
  for (c = *s; c != '\0'; c = *++s)
  {
-    if (my_isdigit (system_charset_info, c))
+    if (my_isdigit (my_charset_latin1, c))
      c -= '0';
-    else if (my_isalpha (system_charset_info, c))
-      c = my_toupper (system_charset_info, c) - 'A' + 10;
+    else if (my_isalpha (my_charset_latin1, c))
+      c = my_toupper (my_charset_latin1, c) - 'A' + 10;
    else
      break;
    if (c >= base)