2000-07-31 21:29:14 +02:00
/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
2000-08-31 11:05:41 +02:00
2000-07-31 21:29:14 +02:00
This program is free software ; you can redistribute it and / or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation ; either version 2 of the License , or
( at your option ) any later version .
2000-08-31 11:05:41 +02:00
2000-07-31 21:29:14 +02:00
This program is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU General Public License for more details .
2000-08-31 11:05:41 +02:00
2000-07-31 21:29:14 +02:00
You should have received a copy of the GNU General Public License
along with this program ; if not , write to the Free Software
Foundation , Inc . , 59 Temple Place , Suite 330 , Boston , MA 02111 - 1307 USA */
# define NO_YACC_SYMBOLS
# include <global.h>
# include <my_sys.h>
# include <m_string.h>
# ifndef __GNU_LIBRARY__
# define __GNU_LIBRARY__ // Skipp warnings in getopt.h
# endif
# include <getopt.h>
# include "mysql_version.h"
# include "lex.h"
bool opt_search = 0 , opt_verbose = 0 ;
2000-08-15 19:09:37 +02:00
ulong opt_count = 100000 ;
2000-07-31 21:29:14 +02:00
# define max_allowed_array 8000 // Don't generate bigger arrays than this
# define max_symbol 32767 // Use this for 'not found'
# define how_much_for_plus 8 // 2-8
# define type_count 1 // 1-5
# define char_table_count 5
# define total_symbols (sizeof(symbols) / sizeof(SYMBOL) +\
sizeof ( sql_functions ) / sizeof ( SYMBOL ) )
# define how_much_and INT_MAX24
/*
The following only have to work with characters in the set
used by SQL commands
*/
# undef tolower
# define tolower(a) ((a) >= 'A' && (a) <= 'Z') ? ((a)- 'A' + 'a') : (a)
static uint how_long_symbols , function_plus , function_mod , function_type ;
static uint char_table [ 256 ] ;
static uchar unique_length [ 256 ] ;
static uchar bits [ how_much_and / 8 + 1 ] ;
static uint primes [ max_allowed_array + 1 ] ;
static ulong hash_results [ type_count ] [ how_much_for_plus + 1 ] [ total_symbols ] ;
static ulong start_value = 0 ;
struct rand_struct {
unsigned long seed1 , seed2 , max_value ;
double max_value_dbl ;
} ;
void randominit ( struct rand_struct * rand_st , ulong seed1 , ulong seed2 )
{ /* For mysql 3.21.# */
rand_st - > max_value = 0x3FFFFFFFL ;
rand_st - > max_value_dbl = ( double ) rand_st - > max_value ;
rand_st - > seed1 = seed1 % rand_st - > max_value ;
rand_st - > seed2 = seed2 % rand_st - > max_value ;
}
double rnd ( struct rand_struct * rand_st )
{
rand_st - > seed1 = ( rand_st - > seed1 * 3 + rand_st - > seed2 ) % rand_st - > max_value ;
rand_st - > seed2 = ( rand_st - > seed1 + rand_st - > seed2 + 33 ) % rand_st - > max_value ;
return ( ( ( double ) rand_st - > seed1 ) / rand_st - > max_value_dbl ) ;
}
static void make_char_table ( ulong t1 , ulong t2 , int type )
{
uint i ;
struct rand_struct rand_st ;
randominit ( & rand_st , t1 , t2 ) ;
for ( i = 0 ; i < 256 ; i + + )
{
switch ( type ) {
case 0 : char_table [ i ] = i + ( i < < 8 ) ; break ;
case 1 : char_table [ i ] = i + ( ( i ^ 255 ) < < 8 ) ; break ;
case 2 : char_table [ i ] = i ; break ;
case 3 : char_table [ i ] = i + ( ( uint ) ( rnd ( & rand_st ) * 255 ) < < 8 ) ; break ;
case 4 : char_table [ i ] = ( uint ) ( rnd ( & rand_st ) * 255 ) + ( i < < 8 ) ; break ;
}
}
char_table [ 0 ] | = 1 + 257 ; // Avoid problems with 0
for ( i = 0 ; i < 256 ; i + + )
{
uint tmp = ( uint ) ( rnd ( & rand_st ) * 255 ) ;
swap ( uint , char_table [ i ] , char_table [ tmp ] ) ;
}
/* lower characters should be mapped to upper */
for ( i = ' a ' ; i < = ' z ' ; i + + )
{
/* This loop is coded with extra variables to avoid a bug in gcc 2.96 */
uchar tmp = ( uchar ) ( i - ' a ' ) ; // Assume ascii
tmp + = ' A ' ;
char_table [ i ] = char_table [ tmp ] ;
}
}
/* Fill array primes with primes between start and 'max_allowed_array' */
static void make_prime_array ( uint start )
{
uint i , j , * to ;
uint max_index = ( uint ) sqrt ( ( double ) max_allowed_array ) ;
bzero ( ( char * ) primes , sizeof ( primes [ 0 ] ) * max_allowed_array ) ;
2000-08-31 11:05:41 +02:00
2000-07-31 21:29:14 +02:00
i = 2 ;
while ( i < max_index )
{
for ( j = i + i ; j < = max_allowed_array ; j + = i )
primes [ j ] = 1 ;
while ( primes [ + + i ] ) ;
}
2000-08-31 11:05:41 +02:00
2000-07-31 21:29:14 +02:00
to = primes ;
for ( i = start ; i < = max_allowed_array ; i + + )
if ( ! primes [ i ] )
* to + + = i ;
* to = 0 ; // end marker
}
# define USE_char_table
static ulong tab_index_function ( const char * s , uint add , uint type )
{
register ulong nr = start_value + char_table [ ( uchar ) * s ] ; // Nice value
ulong pos = 3 ;
uint tmp_length = unique_length [ ( uchar ) * s ] - 1 ;
while ( * + + s & & tmp_length - - > 0 )
{
switch ( type ) {
case 0 :
nr = ( nr ^ ( char_table [ ( uchar ) * s ] + ( nr < < add ) ) ) ;
break ;
case 1 :
nr = ( nr + ( char_table [ ( uchar ) * s ] + ( nr < < add ) ) ) ;
break ;
case 2 :
nr = ( nr ^ ( char_table [ ( uchar ) * s ] ^ ( nr < < add ) ) ) ;
break ;
case 3 :
nr = ( char_table [ ( uchar ) * s ] ^ ( nr < < add ) ) ;
break ;
case 4 :
nr + = nr + nr + ( ( nr & 63 ) + pos ) * ( ( ulong ) char_table [ ( uchar ) * s ] ) ;
pos + = add ;
break ;
}
}
return nr & INT_MAX24 ;
}
static int search ( bool write_warning )
{
uint size_symbols = sizeof ( symbols ) / sizeof ( SYMBOL ) ;
uint size_functions = sizeof ( sql_functions ) / sizeof ( SYMBOL ) ;
uint size = size_symbols + size_functions ;
uint i = 0 , found , * prime , type ;
int igra [ max_allowed_array ] , test_count = INT_MAX ;
uint possible_plus [ how_much_for_plus * type_count + type_count ] ;
how_long_symbols = sizeof ( symbols ) / sizeof ( SYMBOL ) ;
bzero ( ( char * ) possible_plus , sizeof ( possible_plus ) ) ;
found = 0 ;
/* Check first which function_plus are possible */
for ( type = 0 ; type < type_count ; type + + )
{
for ( function_plus = 1 ;
function_plus < = how_much_for_plus ;
function_plus + + )
{
bzero ( ( char * ) bits , sizeof ( bits ) ) ;
for ( i = 0 ; i < size ; i + + )
{
ulong order = tab_index_function ( ( i < how_long_symbols ) ?
symbols [ i ] . name :
sql_functions [ i - how_long_symbols ] . name ,
function_plus , type ) ;
hash_results [ type ] [ function_plus ] [ i ] = order ;
uint pos = order / 8 ;
uint bit = order & 7 ;
if ( bits [ pos ] & ( 1 < < bit ) )
break ;
bits [ pos ] | = 1 < < bit ;
}
if ( i = = size )
{
possible_plus [ found + + ] = function_plus ;
}
}
possible_plus [ found + + ] = 0 ; // End marker
}
if ( found = = type_count )
{
if ( write_warning )
fprintf ( stderr , " \
The hash function didn ' t return a unique value for any parameter \ n \
You have to change gen_lex_code . cc , function ' tab_index_function ' to \ n \
generate unique values for some parameter . When you have succeeded in this , \ n \
you have to change ' main ' to print out the new function \ n " );
return ( 1 ) ;
}
if ( opt_verbose )
fprintf ( stderr , " Info: Possible add values: %d \n " , found - type_count ) ;
for ( prime = primes ; ( function_mod = * prime ) ; prime + + )
{
uint * plus_ptr = possible_plus ;
for ( type = 0 ; type < type_count ; type + + )
{
while ( ( function_plus = * plus_ptr + + ) )
{
ulong * order_pos = & hash_results [ type ] [ function_plus ] [ 0 ] ;
if ( test_count + + = = INT_MAX )
{
test_count = 1 ;
bzero ( ( char * ) igra , sizeof ( igra ) ) ;
}
for ( i = 0 ; i < size ; i + + )
{
ulong order ;
order = * order_pos + + % function_mod ;
if ( igra [ order ] = = test_count )
break ;
igra [ order ] = test_count ;
}
if ( i = = size )
{
* prime = 0 ; // Mark this used
function_type = type ;
return 0 ; // Found ok value
}
}
}
}
function_mod = max_allowed_array ;
if ( write_warning )
fprintf ( stderr , " Fatal error when generating hash for symbols \n \
Didn ' t find suitable values for perfect hashing : \ n \
You have to edit gen_lex_hase . cc to generate a new hashing function . \ n \
You can try running gen_lex_hash with - - search to find a suitable value \ n \
Symbol array size = % d \ n " ,function_mod);
return - 1 ;
}
void print_arrays ( )
{
uint size_symbols = sizeof ( symbols ) / sizeof ( SYMBOL ) ;
uint size_functions = sizeof ( sql_functions ) / sizeof ( SYMBOL ) ;
uint size = size_symbols + size_functions ;
uint i ;
fprintf ( stderr , " Symbols: %d Functions: %d; Total: %d \n Shifts per char: %d, Array size: %d \n " ,
size_symbols , size_functions , size_symbols + size_functions ,
function_plus , function_mod ) ;
int * prva = ( int * ) my_alloca ( sizeof ( int ) * function_mod ) ;
for ( i = 0 ; i < = function_mod ; i + + )
prva [ i ] = max_symbol ;
for ( i = 0 ; i < size ; i + + )
{
ulong order = tab_index_function ( ( i < how_long_symbols ) ? symbols [ i ] . name : sql_functions [ i - how_long_symbols ] . name , function_plus , function_type ) ;
order % = function_mod ;
prva [ order ] = i ;
}
# ifdef USE_char_table
printf ( " static uint16 char_table[] = { \n " ) ;
for ( i = 0 ; i < 255 ; i + + ) // < 255 is correct
{
printf ( " %u, " , char_table [ i ] ) ;
if ( ( ( i + 1 ) & 15 ) = = 0 )
puts ( " " ) ;
}
printf ( " %d \n }; \n \n \n " , char_table [ i ] ) ;
# endif
printf ( " static uchar unique_length[] = { \n " ) ;
for ( i = 0 ; i < 255 ; i + + ) // < 255 is correct
{
printf ( " %u, " , unique_length [ i ] ) ;
if ( ( ( i + 1 ) & 15 ) = = 0 )
puts ( " " ) ;
}
printf ( " %d \n }; \n \n \n " , unique_length [ i ] ) ;
printf ( " static uint16 my_function_table[] = { \n " ) ;
for ( i = 0 ; i < function_mod - 1 ; i + + )
{
printf ( " %d, " , prva [ i ] ) ;
if ( ( ( i + 1 ) % 12 ) = = 0 )
puts ( " " ) ;
}
printf ( " %d \n }; \n \n \n " , prva [ i ] ) ;
my_afree ( ( gptr ) prva ) ;
}
static struct option long_options [ ] =
{
2000-08-15 19:09:37 +02:00
{ " count " , required_argument , 0 , ' c ' } ,
2000-07-31 21:29:14 +02:00
{ " search " , no_argument , 0 , ' S ' } ,
{ " verbose " , no_argument , 0 , ' v ' } ,
{ " version " , no_argument , 0 , ' V ' } ,
{ " rnd1 " , required_argument , 0 , ' r ' } ,
{ " rnd2 " , required_argument , 0 , ' R ' } ,
{ " type " , required_argument , 0 , ' t ' } ,
{ 0 , 0 , 0 , 0 }
} ;
static void usage ( int version )
{
2000-12-27 02:31:06 +01:00
printf ( " %s Ver 3.2 Distrib %s, for %s (%s) \n " ,
2000-07-31 21:29:14 +02:00
my_progname , MYSQL_SERVER_VERSION , SYSTEM_TYPE , MACHINE_TYPE ) ;
if ( version )
return ;
puts ( " Copyright (C) 2000 MySQL AB & MySQL Finland AB, by Sinisa and Monty " ) ;
puts ( " This software comes with ABSOLUTELY NO WARRANTY. This is free software, \n and you are welcome to modify and redistribute it under the GPL license \n " ) ;
puts ( " This program generates a perfect hashing function for the sql_lex.cc " ) ;
printf ( " Usage: %s [OPTIONS] \n " , my_progname ) ;
printf ( " \n \
2000-08-15 19:09:37 +02:00
- c , - - count = # Try count times to find a optimal hash table \ n \
2000-07-31 21:29:14 +02:00
- r , - - rnd1 = # Set 1 part of rnd value for hash generator \ n \
- R , - - rnd2 = # Set 2 part of rnd value for hash generator \ n \
- t , - - type = # Set type of char table to generate \ n \
- S , - - search Search after good rnd1 and rnd2 values \ n \
- v , - - verbose Write some information while the program executes \ n \
- V , - - version Output version information and exit \ n " );
}
static uint best_type ;
2000-12-27 02:31:06 +01:00
static ulong best_t1 , best_t2 , best_start_value ;
2000-07-31 21:29:14 +02:00
static int get_options ( int argc , char * * argv )
{
int c , option_index = 0 ;
2000-08-15 19:09:37 +02:00
while ( ( c = getopt_long ( argc , argv , " ?SvVc:r:R:t: " ,
2000-07-31 21:29:14 +02:00
long_options , & option_index ) ) ! = EOF )
{
switch ( c ) {
2000-08-15 19:09:37 +02:00
case ' c ' :
opt_count = atol ( optarg ) ;
break ;
2000-07-31 21:29:14 +02:00
case ' r ' :
best_t1 = atol ( optarg ) ;
break ;
case ' R ' :
best_t2 = atol ( optarg ) ;
break ;
case ' t ' :
best_type = atoi ( optarg ) ;
break ;
case ' S ' :
opt_search = 1 ;
break ;
case ' v ' :
opt_verbose = 1 ;
break ;
case ' V ' : usage ( 1 ) ; exit ( 0 ) ;
case ' I ' :
case ' ? ' :
usage ( 0 ) ;
exit ( 0 ) ;
default :
fprintf ( stderr , " illegal option: -%c \n " , opterr ) ;
usage ( 0 ) ;
exit ( 1 ) ;
}
}
argc - = optind ;
argv + = optind ;
if ( argc > = 1 )
{
usage ( 0 ) ;
exit ( 1 ) ;
}
return ( 0 ) ;
}
static uint max_prefix ( const char * name )
{
uint i ;
uint max_length = 1 ;
for ( i = 0 ; i < sizeof ( symbols ) / sizeof ( SYMBOL ) ; i + + )
{
const char * str = symbols [ i ] . name ;
if ( str ! = name )
{
const char * str2 = name ;
uint length ;
while ( * str & & * str = = * str2 )
{
str + + ;
str2 + + ;
}
length = ( uint ) ( str2 - name ) + 1 ;
if ( length > max_length )
max_length = length ;
}
}
for ( i = 0 ; i < sizeof ( sql_functions ) / sizeof ( SYMBOL ) ; i + + )
{
const char * str = sql_functions [ i ] . name ;
if ( str ! = name )
{
const char * str2 = name ;
uint length ;
while ( * str & & * str = = * str2 )
{
str + + ;
str2 + + ;
}
length = ( uint ) ( str2 - name ) + 1 ;
if ( length > max_length )
max_length = length ;
}
}
return max_length ;
}
static void make_max_length_table ( void )
{
uint i ;
for ( i = 0 ; i < sizeof ( symbols ) / sizeof ( SYMBOL ) ; i + + )
{
uint length = max_prefix ( symbols [ i ] . name ) ;
if ( length > unique_length [ ( uchar ) symbols [ i ] . name [ 0 ] ] )
{
unique_length [ ( uchar ) symbols [ i ] . name [ 0 ] ] = length ;
unique_length [ ( uchar ) tolower ( symbols [ i ] . name [ 0 ] ) ] = length ;
2000-08-31 11:05:41 +02:00
}
2000-07-31 21:29:14 +02:00
}
for ( i = 0 ; i < sizeof ( sql_functions ) / sizeof ( SYMBOL ) ; i + + )
{
uint length = max_prefix ( sql_functions [ i ] . name ) ;
if ( length > unique_length [ ( uchar ) sql_functions [ i ] . name [ 0 ] ] )
{
unique_length [ ( uchar ) sql_functions [ i ] . name [ 0 ] ] = length ;
unique_length [ ( uchar ) tolower ( sql_functions [ i ] . name [ 0 ] ) ] = length ;
}
}
}
int main ( int argc , char * * argv )
{
struct rand_struct rand_st ;
static uint best_mod , best_add , best_functype ;
int error ;
2000-08-31 11:05:41 +02:00
2000-07-31 21:29:14 +02:00
MY_INIT ( argv [ 0 ] ) ;
2000-12-26 16:34:04 +01:00
start_value = 5307411L ; best_t1 = 4597287L ; best_t2 = 3375760L ; best_type = 1 ; /* mode=4783 add=5 func_type: 0 */
2000-07-31 21:29:14 +02:00
if ( get_options ( argc , ( char * * ) argv ) )
exit ( 1 ) ;
make_max_length_table ( ) ;
make_char_table ( best_t1 , best_t2 , best_type ) ;
make_prime_array ( sizeof ( symbols ) / sizeof ( SYMBOL ) +
sizeof ( sql_functions ) / sizeof ( SYMBOL ) ) ;
if ( ( error = search ( 1 ) ) > 0 | | error & & ! opt_search )
exit ( 1 ) ; // This should work
best_mod = function_mod ; best_add = function_plus ; best_functype = function_type ;
if ( opt_search )
{
time_t start_time = time ( ( time_t * ) 0 ) ;
randominit ( & rand_st , start_time , start_time / 2 ) ; // Some random values
printf ( " start_value=%ldL; best_t1=%ldL; best_t2=%ldL; best_type=%d; /* mode=%d add=%d type: %d */ \n " ,
start_value , best_t1 , best_t2 , best_type , best_mod , best_add ,
best_functype ) ;
2000-08-15 19:09:37 +02:00
for ( uint i = 1 ; i < = opt_count ; i + + )
2000-07-31 21:29:14 +02:00
{
if ( i % 10 = = 0 )
{
putchar ( ' . ' ) ;
fflush ( stdout ) ;
}
ulong t1 = ( ulong ) ( rnd ( & rand_st ) * INT_MAX24 ) ;
ulong t2 = ( ulong ) ( rnd ( & rand_st ) * INT_MAX24 ) ;
uint type = ( int ) ( rnd ( & rand_st ) * char_table_count ) ;
start_value = ( ulong ) ( rnd ( & rand_st ) * INT_MAX24 ) ;
make_char_table ( t1 , t2 , type ) ;
if ( ! search ( 0 ) )
{
best_mod = function_mod ; best_add = function_plus ;
best_functype = function_type ;
best_t1 = t1 ; best_t2 = t2 ; best_type = type ;
2000-12-27 02:31:06 +01:00
best_start_value = start_value ;
printf ( " \n start_value=%ldL; best_t1=%ldL; best_t2=%ldL; best_type=%d; /* mode=%d add=%d type: %d */ \n " ,
best_start_value , best_t1 , best_t2 , best_type , best_mod , best_add ,
best_functype ) ;
2000-07-31 21:29:14 +02:00
}
}
}
function_mod = best_mod ; function_plus = best_add ;
make_char_table ( best_t1 , best_t2 , best_type ) ;
printf ( " /* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB \n \
This program is free software ; you can redistribute it and / or modify \ n \
it under the terms of the GNU General Public License as published by \ n \
the Free Software Foundation ; either version 2 of the License , or \ n \
( at your option ) any later version . \ n \ n \
This program is distributed in the hope that it will be useful , \ n \
but WITHOUT ANY WARRANTY ; without even the implied warranty of \ n \
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the \ n \
GNU General Public License for more details . \ n \ n \
You should have received a copy of the GNU General Public License \ n \
along with this program ; if not , write to the Free Software \ n \
Foundation , Inc . , 59 Temple Place , Suite 330 , Boston , MA 02111 - 1307 USA */ \ n \ n " );
printf ( " /* This code is generated by gen_lex_hash.cc that seeks for a perfect \n hash function */ \n \n " ) ;
printf ( " #include \" lex.h \" \n \n " ) ;
print_arrays ( ) ;
2000-12-27 02:31:06 +01:00
printf ( " /* start_value=%ldL; best_t1=%ldL; best_t2=%ldL; best_type=%d; */ /* mode=%d add=%d t ype: %d */ \n \n " ,
best_start_value , best_t1 , best_t2 , best_type ,
2000-08-15 19:09:37 +02:00
best_mod , best_add , best_functype ) ;
2000-08-31 11:05:41 +02:00
2000-07-31 21:29:14 +02:00
printf ( " inline SYMBOL *get_hash_symbol(const char *s,unsigned int length,bool function) \n \
{ \ n \
ulong idx = % lu + char_table [ ( uchar ) * s ] ; \ n \
SYMBOL * sim ; \ n \
const char * start = s ; \ n \
int i = unique_length [ ( uchar ) * s + + ] ; \ n \
if ( i > ( int ) length ) i = ( int ) length ; \ n \
while ( - - i > 0 ) \ n \
idx = ( idx ^ ( char_table [ ( uchar ) * s + + ] + ( idx < < % d ) ) ) ; \ n \
idx = my_function_table [ ( idx & % d ) % % % d ] ; \ n \
if ( idx > = % d ) \ n \
{ \ n \
if ( ! function | | idx > = % d ) return ( SYMBOL * ) 0 ; \ n \
sim = sql_functions + ( idx - % d ) ; \ n \
} \ n \
else \ n \
sim = symbols + idx ; \ n \
if ( ( length ! = sim - > length ) | | lex_casecmp ( start , sim - > name , length ) ) \ n \
return ( SYMBOL * ) 0 ; \ n \
return sim ; \ n \
} \ n " ,(ulong) start_value,(int) function_plus,(int) how_much_and,function_mod,how_long_symbols,max_symbol,how_long_symbols);
exit ( 0 ) ;
return 0 ;
}