mirror of
				https://github.com/MariaDB/server.git
				synced 2025-11-04 04:46:15 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			354 lines
		
	
	
	
		
			8.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			354 lines
		
	
	
	
		
			8.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/* Copyright (c) 2004, 2006 MySQL AB
 | 
						|
   Copyright (c) 2009-2011, Monty Program Ab
 | 
						|
   Use is subject to license terms.
 | 
						|
   Copyright (c) 2009-2011, Monty Program Ab
 | 
						|
 | 
						|
   This program is free software; you can redistribute it and/or modify
 | 
						|
   it under the terms of the GNU General Public License as published by
 | 
						|
   the Free Software Foundation; version 2 of the License.
 | 
						|
 | 
						|
   This program is distributed in the hope that it will be useful,
 | 
						|
   but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
   GNU General Public License for more details.
 | 
						|
 | 
						|
   You should have received a copy of the GNU General Public License
 | 
						|
   along with this program; if not, write to the Free Software
 | 
						|
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1335  USA */
 | 
						|
 | 
						|
#include <stdio.h>
 | 
						|
#include <stdlib.h>
 | 
						|
#include <string.h>
 | 
						|
 | 
						|
typedef unsigned char uchar;
 | 
						|
typedef unsigned short uint16;
 | 
						|
 | 
						|
struct uca_item_st
 | 
						|
{
 | 
						|
  uchar  num;
 | 
						|
  uint16 weight[4][9];
 | 
						|
};
 | 
						|
 | 
						|
#if 0
 | 
						|
#define MY_UCA_NPAGES	1024
 | 
						|
#define MY_UCA_NCHARS	64
 | 
						|
#define MY_UCA_CMASK	63
 | 
						|
#define MY_UCA_PSHIFT	6
 | 
						|
#else
 | 
						|
#define MY_UCA_NPAGES	4352 /* 0x110000 characters / 0x100 chars per page */
 | 
						|
#define MY_UCA_NCHARS	256
 | 
						|
#define MY_UCA_CMASK	255
 | 
						|
#define MY_UCA_PSHIFT	8
 | 
						|
#endif
 | 
						|
 | 
						|
#define MAX_ALLOWED_CODE 0x10FFFF
 | 
						|
 | 
						|
/* Name that goes into all array names */
 | 
						|
static const char *global_name_prefix= "uca520";
 | 
						|
 | 
						|
/* Name prefix that goes into page weight array names after global_name_prefix */
 | 
						|
static char *pname_prefix[]= {"_p", "_p", "_p"};
 | 
						|
 | 
						|
/* Name suffix that goes into page weight array names after page number */
 | 
						|
static char *pname_suffix[]= {"", "_w2", "_w3"};
 | 
						|
 | 
						|
 | 
						|
int main(int ac, char **av)
 | 
						|
{
 | 
						|
  char str[256];
 | 
						|
  char *weights[64];
 | 
						|
  static struct uca_item_st uca[MAX_ALLOWED_CODE+1];
 | 
						|
  size_t code, w;
 | 
						|
  int pageloaded[MY_UCA_NPAGES];
 | 
						|
  
 | 
						|
  bzero(uca, sizeof(uca));
 | 
						|
  bzero(pageloaded, sizeof(pageloaded));
 | 
						|
  
 | 
						|
  while (fgets(str,sizeof(str),stdin))
 | 
						|
  {
 | 
						|
    char *comment;
 | 
						|
    char *weight;
 | 
						|
    char *s;
 | 
						|
    size_t codenum;
 | 
						|
    
 | 
						|
    code= strtol(str,NULL,16);
 | 
						|
    
 | 
						|
    if (str[0]=='#' || (code > MAX_ALLOWED_CODE))
 | 
						|
      continue;
 | 
						|
    if ((comment=strchr(str,'#')))
 | 
						|
    {
 | 
						|
      *comment++= '\0';
 | 
						|
      for ( ; *comment==' ' ; comment++);
 | 
						|
    }else
 | 
						|
      continue;
 | 
						|
    
 | 
						|
    if ((weight=strchr(str,';')))
 | 
						|
    {
 | 
						|
      *weight++= '\0';
 | 
						|
      for ( ; *weight==' ' ; weight++);
 | 
						|
    }
 | 
						|
    else
 | 
						|
      continue;
 | 
						|
    
 | 
						|
    codenum= 0;
 | 
						|
    s= strtok(str, " \t");
 | 
						|
    while (s)
 | 
						|
    {
 | 
						|
      s= strtok(NULL, " \t");
 | 
						|
      codenum++;
 | 
						|
    }
 | 
						|
    
 | 
						|
    if (codenum>1)
 | 
						|
    {
 | 
						|
      /* Multi-character weight, 
 | 
						|
         i.e. contraction. 
 | 
						|
         Not supported yet.
 | 
						|
      */
 | 
						|
      continue;
 | 
						|
    }
 | 
						|
    
 | 
						|
    uca[code].num= 0;
 | 
						|
    s= strtok(weight, " []");
 | 
						|
    while (s)
 | 
						|
    {
 | 
						|
      weights[uca[code].num]= s;
 | 
						|
      s= strtok(NULL, " []");
 | 
						|
      uca[code].num++;
 | 
						|
    }
 | 
						|
    
 | 
						|
    for (w=0; w < uca[code].num; w++)
 | 
						|
    {
 | 
						|
      size_t partnum;
 | 
						|
      
 | 
						|
      partnum= 0;
 | 
						|
      s= weights[w];
 | 
						|
      while (*s)
 | 
						|
      {
 | 
						|
        char *endptr;
 | 
						|
        size_t part;
 | 
						|
        part= strtol(s+1,&endptr,16);
 | 
						|
        uca[code].weight[partnum][w]= part;
 | 
						|
        s= endptr;
 | 
						|
        partnum++;
 | 
						|
      }
 | 
						|
    }
 | 
						|
    /* Mark that a character from this page was loaded */
 | 
						|
    pageloaded[code >> MY_UCA_PSHIFT]++;
 | 
						|
  }
 | 
						|
  
 | 
						|
  
 | 
						|
  
 | 
						|
  /* Now set implicit weights */
 | 
						|
  for (code=0; code <= MAX_ALLOWED_CODE; code++)
 | 
						|
  {
 | 
						|
    size_t base, aaaa, bbbb;
 | 
						|
    
 | 
						|
    if (uca[code].num)
 | 
						|
      continue;
 | 
						|
    
 | 
						|
    /*
 | 
						|
    3400;<CJK Ideograph Extension A, First>
 | 
						|
    4DB5;<CJK Ideograph Extension A, Last>
 | 
						|
    4E00;<CJK Ideograph, First>
 | 
						|
    9FA5;<CJK Ideograph, Last>
 | 
						|
    */
 | 
						|
    
 | 
						|
    if (code >= 0x3400 && code <= 0x4DB5)
 | 
						|
      base= 0xFB80;
 | 
						|
    else if (code >= 0x4E00 && code <= 0x9FA5)
 | 
						|
      base= 0xFB40;
 | 
						|
    else
 | 
						|
      base= 0xFBC0;
 | 
						|
    
 | 
						|
    aaaa= base +  (code >> 15);
 | 
						|
    bbbb= (code & 0x7FFF) | 0x8000;
 | 
						|
    uca[code].weight[0][0]= aaaa;
 | 
						|
    uca[code].weight[0][1]= bbbb;
 | 
						|
    
 | 
						|
    uca[code].weight[1][0]= 0x0020;
 | 
						|
    uca[code].weight[1][1]= 0x0000;
 | 
						|
    
 | 
						|
    uca[code].weight[2][0]= 0x0002;
 | 
						|
    uca[code].weight[2][1]= 0x0000;
 | 
						|
    
 | 
						|
    uca[code].weight[3][0]= 0x0001;
 | 
						|
    uca[code].weight[3][2]= 0x0000;
 | 
						|
    
 | 
						|
    uca[code].num= 2;
 | 
						|
  }
 | 
						|
  
 | 
						|
  printf("#include \"my_uca.h\"\n");
 | 
						|
  
 | 
						|
  printf("#define MY_UCA_NPAGES %d\n",MY_UCA_NPAGES);
 | 
						|
  printf("#define MY_UCA_NCHARS %d\n",MY_UCA_NCHARS);
 | 
						|
  printf("#define MY_UCA_CMASK  %d\n",MY_UCA_CMASK);
 | 
						|
  printf("#define MY_UCA_PSHIFT %d\n",MY_UCA_PSHIFT);
 | 
						|
 | 
						|
  for (w=0; w<3; w++)
 | 
						|
  {
 | 
						|
    size_t page;
 | 
						|
    int pagemaxlen[MY_UCA_NPAGES];
 | 
						|
 | 
						|
    for (page=0; page < MY_UCA_NPAGES; page++)
 | 
						|
    {
 | 
						|
      size_t offs;
 | 
						|
      size_t maxnum= 0;
 | 
						|
      size_t nchars= 0;
 | 
						|
      size_t mchars;
 | 
						|
      size_t ndefs= 0;
 | 
						|
      size_t code_line_start= page * MY_UCA_NCHARS;
 | 
						|
      
 | 
						|
      pagemaxlen[page]= 0;
 | 
						|
      
 | 
						|
      /*
 | 
						|
        Skip this page if no weights were loaded
 | 
						|
      */
 | 
						|
      
 | 
						|
      if (!pageloaded[page])
 | 
						|
        continue;
 | 
						|
      
 | 
						|
      /* 
 | 
						|
        Calculate maximum weight
 | 
						|
        length for this page
 | 
						|
      */
 | 
						|
      
 | 
						|
      for (offs=0; offs < MY_UCA_NCHARS; offs++)
 | 
						|
      {
 | 
						|
        size_t i, num;
 | 
						|
        
 | 
						|
        code= page*MY_UCA_NCHARS+offs;
 | 
						|
        
 | 
						|
        /* Calculate only non-zero weights */
 | 
						|
        for (num=0, i=0; i < uca[code].num; i++)
 | 
						|
          if (uca[code].weight[w][i])
 | 
						|
            num++;
 | 
						|
        
 | 
						|
        maxnum= maxnum < num ? num : maxnum;
 | 
						|
        
 | 
						|
        /* Check if default weight */
 | 
						|
        if (w == 1 && num == 1)
 | 
						|
        {
 | 
						|
          /* 0020 0000 ... */
 | 
						|
          if (uca[code].weight[w][0] == 0x0020)
 | 
						|
            ndefs++;
 | 
						|
        }
 | 
						|
        else if (w == 2 && num == 1)
 | 
						|
        {
 | 
						|
          /* 0002 0000 ... */
 | 
						|
          if (uca[code].weight[w][0] == 0x0002)
 | 
						|
            ndefs++;
 | 
						|
        }
 | 
						|
      } 
 | 
						|
      maxnum++;
 | 
						|
      
 | 
						|
      /*
 | 
						|
        If the page have only default weights
 | 
						|
        then no needs to dump it, skip.
 | 
						|
      */
 | 
						|
      if (ndefs == MY_UCA_NCHARS)
 | 
						|
      {
 | 
						|
        continue;
 | 
						|
      }
 | 
						|
      switch (maxnum)
 | 
						|
      {
 | 
						|
        case 0: mchars= 8; break;
 | 
						|
        case 1: mchars= 8; break;
 | 
						|
        case 2: mchars= 8; break;
 | 
						|
        case 3: mchars= 9; break;
 | 
						|
        case 4: mchars= 8; break;
 | 
						|
        default: mchars= uca[code].num;
 | 
						|
      }
 | 
						|
      
 | 
						|
      pagemaxlen[page]= maxnum;
 | 
						|
 | 
						|
 | 
						|
      /*
 | 
						|
        Now print this page
 | 
						|
      */
 | 
						|
      
 | 
						|
      
 | 
						|
      printf("static const uint16 %s%s%03X%s[]= { /* %04X (%d weights per char) */\n",
 | 
						|
              global_name_prefix, pname_prefix[w], (int) page, pname_suffix[w],
 | 
						|
              (int) page*MY_UCA_NCHARS, (int) maxnum);
 | 
						|
      
 | 
						|
      for (offs=0; offs < MY_UCA_NCHARS; offs++)
 | 
						|
      {
 | 
						|
        uint16 weight[8];
 | 
						|
        size_t num, i;
 | 
						|
        
 | 
						|
        code= page*MY_UCA_NCHARS+offs;
 | 
						|
        
 | 
						|
        bzero(weight,sizeof(weight));
 | 
						|
        
 | 
						|
        /* Copy non-zero weights */
 | 
						|
        for (num=0, i=0; i < uca[code].num; i++)
 | 
						|
        {
 | 
						|
          if (uca[code].weight[w][i])
 | 
						|
          {
 | 
						|
            weight[num]= uca[code].weight[w][i];
 | 
						|
            num++;
 | 
						|
          }
 | 
						|
        }
 | 
						|
        
 | 
						|
        for (i=0; i < maxnum; i++)
 | 
						|
        {
 | 
						|
          /* 
 | 
						|
            Invert weights for secondary level to
 | 
						|
            sort upper case letters before their
 | 
						|
            lower case counter part.
 | 
						|
          */
 | 
						|
          int tmp= weight[i];
 | 
						|
          if (w == 2 && tmp)
 | 
						|
            tmp= (int)(0x20 - weight[i]);
 | 
						|
          
 | 
						|
          
 | 
						|
          printf("0x%04X", tmp);
 | 
						|
          if ((offs+1 != MY_UCA_NCHARS) || (i+1!=maxnum))
 | 
						|
            printf(",");
 | 
						|
          else
 | 
						|
            printf(" ");
 | 
						|
          nchars++;
 | 
						|
        }
 | 
						|
        if (nchars >=mchars)
 | 
						|
        {
 | 
						|
          printf(" /* %04X */\n", (int) code_line_start);
 | 
						|
          code_line_start= code + 1;
 | 
						|
          nchars=0;
 | 
						|
        }
 | 
						|
        else
 | 
						|
        {
 | 
						|
          printf(" ");
 | 
						|
        }
 | 
						|
      }
 | 
						|
      printf("};\n\n");
 | 
						|
    }
 | 
						|
 | 
						|
    printf("const uchar %s_length%s[%d]={\n",
 | 
						|
           global_name_prefix, pname_suffix[w], MY_UCA_NPAGES);
 | 
						|
    for (page=0; page < MY_UCA_NPAGES; page++)
 | 
						|
    {
 | 
						|
      printf("%d%s%s",pagemaxlen[page],page<MY_UCA_NPAGES-1?",":"",(page+1) % 16 ? "":"\n");
 | 
						|
    }
 | 
						|
    printf("};\n");
 | 
						|
 | 
						|
 | 
						|
    printf("static const uint16 *%s_weight%s[%d]={\n",
 | 
						|
           global_name_prefix, pname_suffix[w], MY_UCA_NPAGES);
 | 
						|
    for (page=0; page < MY_UCA_NPAGES; page++)
 | 
						|
    {
 | 
						|
      const char *comma= page < MY_UCA_NPAGES-1 ? "," : "";
 | 
						|
      const char *nline= (page+1) % 4 ? "" : "\n";
 | 
						|
      if (!pagemaxlen[page])
 | 
						|
        printf("NULL       %s%s%s", w ? " ": "",  comma , nline);
 | 
						|
      else
 | 
						|
        printf("%s%s%03X%s%s%s",
 | 
						|
               global_name_prefix, pname_prefix[w], (int) page, pname_suffix[w],
 | 
						|
               comma, nline);
 | 
						|
    }
 | 
						|
    printf("};\n");
 | 
						|
  }
 | 
						|
 | 
						|
  
 | 
						|
  printf("int main(void){ return 0;};\n");
 | 
						|
  return 0;
 | 
						|
}
 |