diff --git a/client/mysqlbinlog.cc b/client/mysqlbinlog.cc index d44b6188f0a..0f3260e903d 100644 --- a/client/mysqlbinlog.cc +++ b/client/mysqlbinlog.cc @@ -652,7 +652,7 @@ static int dump_remote_log_entries(const char* logname) { char buf[128]; char last_db[FN_REFLEN+1] = ""; - uint len; + uint len, logname_len; NET* net = &mysql->net; int old_format; DBUG_ENTER("dump_remote_log_entries"); @@ -669,16 +669,16 @@ static int dump_remote_log_entries(const char* logname) } int4store(buf, position); int2store(buf + BIN_LOG_HEADER_SIZE, binlog_flags); - len = (uint) strlen(logname); + logname_len = (uint) strlen(logname); int4store(buf + 6, 0); - memcpy(buf + 10, logname,len); - if (simple_command(mysql, COM_BINLOG_DUMP, buf, len + 10, 1)) + memcpy(buf + 10, logname, logname_len); + if (simple_command(mysql, COM_BINLOG_DUMP, buf, logname_len + 10, 1)) { fprintf(stderr,"Got fatal error sending the log dump command\n"); DBUG_RETURN(1); } - my_off_t old_off= 0; + my_off_t old_off= position; ulonglong rec_count= 0; char fname[FN_REFLEN+1]; @@ -707,6 +707,37 @@ static int dump_remote_log_entries(const char* logname) Log_event_type type= ev->get_type_code(); if (!old_format || ( type != LOAD_EVENT && type != CREATE_FILE_EVENT)) { + /* + If this is a Rotate event, maybe it's the end of the requested binlog; + in this case we are done (stop transfer). + This is suitable for binlogs, not relay logs (but for now we don't read + relay logs remotely because the server is not able to do that). If one + day we read relay logs remotely, then we will have a problem with the + detection below: relay logs contain Rotate events which are about the + binlogs, so which would trigger the end-detection below. + */ + if (ev->get_type_code() == ROTATE_EVENT) + { + Rotate_log_event *rev= (Rotate_log_event *)ev; + /* + If this is a fake Rotate event, and not about our log, we can stop + transfer. If this a real Rotate event (so it's not about our log, + it's in our log describing the next log), we print it (because it's + part of our log) and then we will stop when we receive the fake one + soon. + */ + if (rev->when == 0) + { + if ((rev->ident_len != logname_len) || + memcmp(rev->new_log_ident, logname, logname_len)) + DBUG_RETURN(0); + /* + Otherwise, this is a fake Rotate for our log, at the very beginning + for sure. Skip it. + */ + continue; + } + } if (process_event(&rec_count,last_db,ev,old_off,old_format)) DBUG_RETURN(1); } @@ -735,12 +766,10 @@ static int dump_remote_log_entries(const char* logname) /* Let's adjust offset for remote log as for local log to produce - similar text.. + similar text. As we don't print the fake Rotate event, all events are + real so we can simply add the length. */ - if (old_off) - old_off+= len-1; - else - old_off= BIN_LOG_HEADER_SIZE; + old_off+= len-1; } DBUG_RETURN(0); } diff --git a/configure.in b/configure.in index d95562a3791..de546d9c078 100644 --- a/configure.in +++ b/configure.in @@ -2704,7 +2704,9 @@ then /* ) rel_srcdir="$srcdir" ;; * ) rel_srcdir="../../$srcdir" ;; esac - AC_CONFIG_SUBDIRS($bdb/dist/configure) + (cd $bdb/build_unix && \ + sh $rel_srcdir/$bdb/dist/configure $bdb_conf_flags) || \ + AC_MSG_ERROR([could not configure Berkeley DB]) dnl echo "bdb = '$bdb'; inc = '$bdb_includes', lib = '$bdb_libs'" echo "END OF BERKELEY DB CONFIGURATION" diff --git a/include/m_ctype.h b/include/m_ctype.h index 4444249999d..328359125f4 100644 --- a/include/m_ctype.h +++ b/include/m_ctype.h @@ -210,7 +210,8 @@ typedef struct charset_info_st uint strxfrm_multiply; uint mbminlen; uint mbmaxlen; - char max_sort_char; /* For LIKE optimization */ + uint16 min_sort_char; + uint16 max_sort_char; /* For LIKE optimization */ MY_CHARSET_HANDLER *cset; MY_COLLATION_HANDLER *coll; @@ -305,6 +306,13 @@ my_bool my_like_range_simple(CHARSET_INFO *cs, char *min_str, char *max_str, uint *min_length, uint *max_length); +my_bool my_like_range_ucs2(CHARSET_INFO *cs, + const char *ptr, uint ptr_length, + pbool escape, pbool w_one, pbool w_many, + uint res_length, + char *min_str, char *max_str, + uint *min_length, uint *max_length); + int my_wildcmp_8bit(CHARSET_INFO *, const char *str,const char *str_end, diff --git a/mysql-test/r/alter_table.result b/mysql-test/r/alter_table.result index 703cc59d9a4..dee7433404f 100644 --- a/mysql-test/r/alter_table.result +++ b/mysql-test/r/alter_table.result @@ -343,6 +343,22 @@ t1 CREATE TABLE `t1` ( `a` char(10) character set latin1 default NULL ) ENGINE=MyISAM DEFAULT CHARSET=cp1251 drop table t1; +create table t1 (myblob longblob,mytext longtext) +default charset latin1 collate latin1_general_cs; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `myblob` longblob, + `mytext` longtext character set latin1 collate latin1_general_cs +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +alter table t1 character set latin2; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `myblob` longblob, + `mytext` longtext character set latin2 +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +drop table t1; CREATE TABLE t1 ( Host varchar(16) binary NOT NULL default '', User varchar(16) binary NOT NULL default '', diff --git a/mysql-test/r/func_str.result b/mysql-test/r/func_str.result index 86c4b8cf0dc..d3225679b3e 100644 --- a/mysql-test/r/func_str.result +++ b/mysql-test/r/func_str.result @@ -339,6 +339,8 @@ select concat(_latin1'a',_latin2'a',_latin5'a'); ERROR HY000: Illegal mix of collations (latin1_swedish_ci,COERCIBLE), (latin2_general_ci,COERCIBLE), (latin5_turkish_ci,COERCIBLE) for operation 'concat' select concat(_latin1'a',_latin2'a',_latin5'a',_latin7'a'); ERROR HY000: Illegal mix of collations for operation 'concat' +select concat_ws(_latin1'a',_latin2'a'); +ERROR HY000: Illegal mix of collations (latin1_swedish_ci,COERCIBLE) and (latin2_general_ci,COERCIBLE) for operation 'concat_ws' select FIELD('b','A','B'); FIELD('b','A','B') 2 diff --git a/mysql-test/r/mysqlbinlog.result b/mysql-test/r/mysqlbinlog.result index 72eb4c37d34..5592524cc39 100644 --- a/mysql-test/r/mysqlbinlog.result +++ b/mysql-test/r/mysqlbinlog.result @@ -64,9 +64,6 @@ LOAD DATA LOCAL INFILE 'MYSQL_TEST_DIR/var/tmp/words.dat-2-1' INTO TABLE `t1` FI LOAD DATA LOCAL INFILE 'MYSQL_TEST_DIR/var/tmp/words.dat-3-1' INTO TABLE `t1` FIELDS TERMINATED BY '\t' ENCLOSED BY '' ESCAPED BY '\\' LINES TERMINATED BY '\n' STARTING BY '' (word); LOAD DATA LOCAL INFILE 'MYSQL_TEST_DIR/var/tmp/words.dat-4-1' INTO TABLE `t1` FIELDS TERMINATED BY '\t' ENCLOSED BY '' ESCAPED BY '\\' LINES TERMINATED BY '\n' STARTING BY '' (word); LOAD DATA LOCAL INFILE 'MYSQL_TEST_DIR/var/tmp/words.dat-5-1' INTO TABLE `t1` FIELDS TERMINATED BY '\t' ENCLOSED BY '' ESCAPED BY '\\' LINES TERMINATED BY '\n' STARTING BY '' (word); -LOAD DATA LOCAL INFILE 'MYSQL_TEST_DIR/var/tmp/words.dat-6-1' INTO TABLE `t1` FIELDS TERMINATED BY '\t' ENCLOSED BY '' ESCAPED BY '\\' LINES TERMINATED BY '\n' STARTING BY '' (word); -SET TIMESTAMP=1000000000; -insert into t1 values ("Alas"); --- Broken LOAD DATA -- /*!40019 SET @@session.max_insert_delayed_threads=0*/; diff --git a/mysql-test/t/alter_table.test b/mysql-test/t/alter_table.test index 8da017e8acb..bb174c0225a 100644 --- a/mysql-test/t/alter_table.test +++ b/mysql-test/t/alter_table.test @@ -183,6 +183,17 @@ show create table t1; drop table t1; +# +# Bug#2821 +# Test that table CHARACTER SET does not affect blobs +# +create table t1 (myblob longblob,mytext longtext) +default charset latin1 collate latin1_general_cs; +show create table t1; +alter table t1 character set latin2; +show create table t1; +drop table t1; + # # Test ALTER TABLE ENABLE/DISABLE keys when things are locked # diff --git a/mysql-test/t/func_str.test b/mysql-test/t/func_str.test index 1e6d279f721..1c85f83baac 100644 --- a/mysql-test/t/func_str.test +++ b/mysql-test/t/func_str.test @@ -195,6 +195,8 @@ select concat(_latin1'a',_latin2'a'); select concat(_latin1'a',_latin2'a',_latin5'a'); --error 1271 select concat(_latin1'a',_latin2'a',_latin5'a',_latin7'a'); +--error 1267 +select concat_ws(_latin1'a',_latin2'a'); # # Test FIELD() and collations diff --git a/mysql-test/t/mysqlbinlog.test b/mysql-test/t/mysqlbinlog.test index 07adf2732dc..1b07ddc63a6 100644 --- a/mysql-test/t/mysqlbinlog.test +++ b/mysql-test/t/mysqlbinlog.test @@ -70,7 +70,6 @@ select "--- Remote --" as ""; --enable_query_log # This is broken now -# By the way it seems that remote version fetches all events with name >= master-bin.000001 --replace_result $MYSQL_TEST_DIR MYSQL_TEST_DIR --exec $MYSQL_BINLOG --short-form --local-load=$MYSQL_TEST_DIR/var/tmp/ --read-from-remote-server --user=root --host=127.0.0.1 --port=$MASTER_MYPORT master-bin.000001 diff --git a/mysys/charset.c b/mysys/charset.c index 5bf0ea972a5..c422ead89c0 100644 --- a/mysys/charset.c +++ b/mysys/charset.c @@ -62,7 +62,7 @@ static void set_max_sort_char(CHARSET_INFO *cs) if ((uchar) cs->sort_order[i] > max_char) { max_char=(uchar) cs->sort_order[i]; - cs->max_sort_char= (char) i; + cs->max_sort_char= i; } } } diff --git a/mysys/uca-dump.c b/mysys/uca-dump.c new file mode 100644 index 00000000000..c8339d7b85b --- /dev/null +++ b/mysys/uca-dump.c @@ -0,0 +1,258 @@ +#include +#include +#include + +typedef unsigned char uchar; +typedef unsigned short uint16; + +struct uca_item_st +{ + uchar num; + uint16 weight[4][8]; +}; + +#if 1 +#define MY_UCA_NPAGES 1024 +#define MY_UCA_NCHARS 64 +#define MY_UCA_CMASK 63 +#define MY_UCA_PSHIFT 6 +#else +#define MY_UCA_NPAGES 256 +#define MY_UCA_NCHARS 256 +#define MY_UCA_CMASK 255 +#define MY_UCA_PSHIFT 8 +#endif + +int main(int ac, char **av) +{ + char str[256]; + char *weights[64]; + struct uca_item_st uca[64*1024]; + size_t code, page, w; + int pagemaxlen[MY_UCA_NPAGES]; + + bzero(uca, sizeof(uca)); + + while (fgets(str,sizeof(str),stdin)) + { + char *comment; + char *weight; + char *s; + size_t codenum; + + code= strtol(str,NULL,16); + + if (str[0]=='#' || (code > 0xFFFF)) + continue; + if ((comment=strchr(str,'#'))) + { + *comment++= '\0'; + for ( ; *comment==' ' ; comment++); + }else + continue; + + if ((weight=strchr(str,';'))) + { + *weight++= '\0'; + for ( ; *weight==' ' ; weight++); + } + else + continue; + + codenum= 0; + s= strtok(str, " \t"); + while (s) + { + s= strtok(NULL, " \t"); + codenum++; + } + + if (codenum>1) + { + /* Multi-character weight */ + continue; + } + + uca[code].num= 0; + s= strtok(weight, " []"); + while (s) + { + weights[uca[code].num]= s; + s= strtok(NULL, " []"); + uca[code].num++; + } + + for (w=0; w < uca[code].num; w++) + { + size_t partnum; + + partnum= 0; + s= weights[w]; + while (*s) + { + char *endptr; + size_t part; + part= strtol(s+1,&endptr,16); + uca[code].weight[partnum][w]= part; + s= endptr; + partnum++; + } + + } + } + + /* Now set implicit weights */ + for (code=0; code <= 0xFFFF; code++) + { + size_t base, aaaa, bbbb; + + if (uca[code].num) + continue; + + /* + 3400; + 4DB5; + 4E00; + 9FA5; + */ + + if (code >= 0x3400 && code <= 0x4DB5) + base= 0xFB80; + else if (code >= 0x4E00 && code <= 0x9FA5) + base= 0xFB40; + else + base= 0xFBC0; + + aaaa= base + (code >> 15); + bbbb= (code & 0x7FFF) | 0x8000; + uca[code].weight[0][0]= aaaa; + uca[code].weight[0][1]= bbbb; + + uca[code].weight[1][0]= 0x0020; + uca[code].weight[1][1]= 0x0000; + + uca[code].weight[2][0]= 0x0002; + uca[code].weight[2][1]= 0x0000; + + uca[code].weight[3][0]= 0x0001; + uca[code].weight[3][2]= 0x0000; + + uca[code].num= 2; + } + + printf("#include \"my_uca.h\"\n"); + + printf("#define MY_UCA_NPAGES %d\n",MY_UCA_NPAGES); + printf("#define MY_UCA_NCHARS %d\n",MY_UCA_NCHARS); + printf("#define MY_UCA_CMASK %d\n",MY_UCA_CMASK); + printf("#define MY_UCA_PSHIFT %d\n",MY_UCA_PSHIFT); + + for (w=0; w<1; w++) + { + for (page=0; page < MY_UCA_NPAGES; page++) + { + size_t offs; + size_t maxnum= 0; + size_t nchars= 0; + size_t mchars; + + + /* + Calculate maximum weight + length for this page + */ + + for (offs=0; offs < MY_UCA_NCHARS; offs++) + { + size_t i, num; + + code= page*MY_UCA_NCHARS+offs; + + /* Calculate only non-zero weights */ + num=0; + for (i=0; i < uca[code].num; i++) + if (uca[code].weight[w][i]) + num++; + + maxnum= maxnum < num ? num : maxnum; + } + if (!maxnum) + maxnum=1; + + switch (maxnum) + { + case 0: mchars= 8; break; + case 1: mchars= 8; break; + case 2: mchars= 8; break; + case 3: mchars= 9; break; + case 4: mchars= 8; break; + default: mchars= uca[code].num; + } + + pagemaxlen[page]= maxnum; + + printf("uint16 page%03Xdata[]= { /* %04X (%d weights per char) */\n", + page, page*MY_UCA_NCHARS, maxnum); + + /* + Now print this page + */ + + for (offs=0; offs < MY_UCA_NCHARS; offs++) + { + uint16 weight[8]; + size_t num, i; + + code= page*MY_UCA_NCHARS+offs; + + bzero(weight,sizeof(weight)); + + /* Copy non-zero weights */ + for (num=0, i=0; i < uca[code].num; i++) + { + if (uca[code].weight[w][i]) + { + weight[num]= uca[code].weight[w][i]; + num++; + } + } + + for (i=0; i < maxnum; i++) + { + printf("0x%04X",(int)weight[i]); + if ((offs+1 != MY_UCA_NCHARS) || (i+1!=maxnum)) + printf(","); + nchars++; + } + if (nchars >=mchars) + { + printf("\n"); + nchars=0; + } + else + { + printf(" "); + } + } + printf("};\n\n"); + } + } + + printf("uchar ucal[%d]={\n",MY_UCA_NPAGES); + for (page=0; page < MY_UCA_NPAGES; page++) + { + printf("%d%s%s",pagemaxlen[page],pagefix_fields(thd, tables, arg)) - return 1; + DBUG_RETURN(1); // we can't assign 'item' before, because fix_fields() can change arg Item *item= *arg; if (item->check_cols(1)) - return 1; + DBUG_RETURN(1); /* TODO: We should think about this. It is not always right way just to set an UDF result to return my_charset_bin diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index 2833e1ca016..06131950ec3 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -628,9 +628,11 @@ void Item_func_concat_ws::fix_length_and_dec() max_length=separator->max_length*(arg_count-1); for (uint i=0 ; i < arg_count ; i++) { + DTCollation tmp(collation.collation, collation.derivation); max_length+=args[i]->max_length; if (collation.aggregate(args[i]->collation)) { + collation.set(tmp); // Restore the previous value my_coll_agg_error(collation, args[i]->collation, func_name()); break; } diff --git a/sql/sql_table.cc b/sql/sql_table.cc index f4440f27945..8011809d6ab 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -470,9 +470,11 @@ int mysql_create_table(THD *thd,const char *db, const char *table_name, sql_field->charset= create_info->default_table_charset; /* table_charset is set in ALTER TABLE if we want change character set - for all varchar/char columns + for all varchar/char columns. + But the table charset must not affect the BLOB fields, so don't + allow to change my_charset_bin to somethig else. */ - if (create_info->table_charset) + if (create_info->table_charset && sql_field->charset != &my_charset_bin) sql_field->charset= create_info->table_charset; sql_field->create_length_to_internal_length(); diff --git a/sql/sql_udf.cc b/sql/sql_udf.cc index a4461beed29..561f79f9de1 100644 --- a/sql/sql_udf.cc +++ b/sql/sql_udf.cc @@ -298,7 +298,11 @@ udf_func *find_udf(const char *name,uint length,bool mark_used) DBUG_ENTER("find_udf"); /* TODO: This should be changed to reader locks someday! */ - rw_rdlock(&THR_LOCK_udf); + if (mark_used) + rw_wrlock(&THR_LOCK_udf); /* Called during fix_fields */ + else + rw_rdlock(&THR_LOCK_udf); /* Called during parsing */ + if ((udf=(udf_func*) hash_search(&udf_hash,(byte*) name, length ? length : (uint) strlen(name)))) { @@ -474,7 +478,7 @@ int mysql_drop_function(THD *thd,const LEX_STRING *udf_name) if (!(udf=(udf_func*) hash_search(&udf_hash,(byte*) udf_name->str, (uint) udf_name->length))) { - net_printf(thd, ER_FUNCTION_NOT_DEFINED, udf_name); + net_printf(thd, ER_FUNCTION_NOT_DEFINED, udf_name->str); goto err; } del_udf(udf); diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c index 712f909ae47..ee55cfda6c1 100644 --- a/strings/ctype-big5.c +++ b/strings/ctype-big5.c @@ -6284,7 +6284,8 @@ CHARSET_INFO my_charset_big5_chinese_ci= 1, /* strxfrm_multiply */ 1, /* mbminlen */ 2, /* mbmaxlen */ - 0, + 0, /* min_sort_char */ + 0, /* max_sort_char */ &my_charset_big5_handler, &my_collation_big5_chinese_ci_handler }; @@ -6308,7 +6309,8 @@ CHARSET_INFO my_charset_big5_bin= 1, /* strxfrm_multiply */ 1, /* mbminlen */ 2, /* mbmaxlen */ - 0, + 0, /* min_sort_char */ + 0, /* max_sort_char */ &my_charset_big5_handler, &my_collation_mb_bin_handler }; diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c index ea7777aabc0..ea0a471ef74 100644 --- a/strings/ctype-bin.c +++ b/strings/ctype-bin.c @@ -388,7 +388,8 @@ CHARSET_INFO my_charset_bin = 1, /* strxfrm_multiply */ 1, /* mbminlen */ 1, /* mbmaxlen */ - (char) 255, /* max_sort_char */ + 0, /* min_sort_char */ + 255, /* max_sort_char */ &my_charset_handler, &my_collation_8bit_bin_handler }; diff --git a/strings/ctype-czech.c b/strings/ctype-czech.c index 8aea7358a9c..ed8c0b5b415 100644 --- a/strings/ctype-czech.c +++ b/strings/ctype-czech.c @@ -634,7 +634,8 @@ CHARSET_INFO my_charset_latin2_czech_ci = 4, /* strxfrm_multiply */ 1, /* mbminlen */ 1, /* mbmaxlen */ - 0, + 0, /* min_sort_char */ + 0, /* max_sort_char */ &my_charset_8bit_handler, &my_collation_latin2_czech_ci_handler }; diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c index ce6f695059f..7f89b8152e8 100644 --- a/strings/ctype-euc_kr.c +++ b/strings/ctype-euc_kr.c @@ -8692,7 +8692,8 @@ CHARSET_INFO my_charset_euckr_korean_ci= 1, /* strxfrm_multiply */ 1, /* mbminlen */ 2, /* mbmaxlen */ - 0, + 0, /* min_sort_char */ + 0, /* max_sort_char */ &my_charset_handler, &my_collation_ci_handler }; @@ -8716,7 +8717,8 @@ CHARSET_INFO my_charset_euckr_bin= 1, /* strxfrm_multiply */ 1, /* mbminlen */ 2, /* mbmaxlen */ - 0, + 0, /* min_sort_char */ + 0, /* max_sort_char */ &my_charset_handler, &my_collation_mb_bin_handler }; diff --git a/strings/ctype-extra.c b/strings/ctype-extra.c index 0085d264416..55dab3ca2a8 100644 --- a/strings/ctype-extra.c +++ b/strings/ctype-extra.c @@ -35,6 +35,7 @@ CHARSET_INFO compiled_charsets[] = { 0, 0, 0, + 0, NULL, NULL } diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c index 9f663f0b1ce..e5dfaf45276 100644 --- a/strings/ctype-gb2312.c +++ b/strings/ctype-gb2312.c @@ -5743,7 +5743,8 @@ CHARSET_INFO my_charset_gb2312_chinese_ci= 1, /* strxfrm_multiply */ 1, /* mbminlen */ 2, /* mbmaxlen */ - 0, + 0, /* min_sort_char */ + 0, /* max_sort_char */ &my_charset_handler, &my_collation_ci_handler }; @@ -5766,7 +5767,8 @@ CHARSET_INFO my_charset_gb2312_bin= 1, /* strxfrm_multiply */ 1, /* mbminlen */ 2, /* mbmaxlen */ - 0, + 0, /* min_sort_char */ + 0, /* max_sort_char */ &my_charset_handler, &my_collation_mb_bin_handler }; diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c index f49bc5d360b..98511406ba9 100644 --- a/strings/ctype-gbk.c +++ b/strings/ctype-gbk.c @@ -9939,7 +9939,8 @@ CHARSET_INFO my_charset_gbk_chinese_ci= 1, /* strxfrm_multiply */ 1, /* mbminlen */ 2, /* mbmaxlen */ - 0, + 0, /* min_sort_char */ + 0, /* max_sort_char */ &my_charset_handler, &my_collation_ci_handler }; @@ -9962,7 +9963,8 @@ CHARSET_INFO my_charset_gbk_bin= 1, /* strxfrm_multiply */ 1, /* mbminlen */ 2, /* mbmaxlen */ - 0, + 0, /* min_sort_char */ + 0, /* max_sort_char */ &my_charset_handler, &my_collation_mb_bin_handler }; diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c index eaef5441a65..0682b15d135 100644 --- a/strings/ctype-latin1.c +++ b/strings/ctype-latin1.c @@ -218,7 +218,8 @@ CHARSET_INFO my_charset_latin1= 1, /* strxfrm_multiply */ 1, /* mbminlen */ 1, /* mbmaxlen */ - 0, + 0, /* min_sort_char */ + 0, /* max_sort_char */ &my_charset_handler, &my_collation_8bit_simple_ci_handler }; @@ -414,7 +415,8 @@ CHARSET_INFO my_charset_latin1_german2_ci= 2, /* strxfrm_multiply */ 1, /* mbminlen */ 1, /* mbmaxlen */ - 0, + 0, /* min_sort_char */ + 0, /* max_sort_char */ &my_charset_handler, &my_collation_german2_ci_handler }; @@ -438,7 +440,8 @@ CHARSET_INFO my_charset_latin1_bin= 1, /* strxfrm_multiply */ 1, /* mbminlen */ 1, /* mbmaxlen */ - 0, + 0, /* min_sort_char */ + 0, /* max_sort_char */ &my_charset_handler, &my_collation_8bit_bin_handler }; diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c index 6e7d0b849e2..0aae60a0b56 100644 --- a/strings/ctype-simple.c +++ b/strings/ctype-simple.c @@ -946,7 +946,7 @@ my_bool my_like_range_simple(CHARSET_INFO *cs, if (*ptr == w_one) /* '_' in SQL */ { *min_str++='\0'; /* This should be min char */ - *max_str++=cs->max_sort_char; + *max_str++= (char) cs->max_sort_char; continue; } if (*ptr == w_many) /* '%' in SQL */ @@ -955,7 +955,7 @@ my_bool my_like_range_simple(CHARSET_INFO *cs, *max_length=res_length; do { *min_str++ = ' '; /* Because if key compression */ - *max_str++ = cs->max_sort_char; + *max_str++ = (char) cs->max_sort_char; } while (min_str != min_end); return 0; } diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c index a4a8b3d4b2c..c337b8122fb 100644 --- a/strings/ctype-sjis.c +++ b/strings/ctype-sjis.c @@ -4528,7 +4528,8 @@ CHARSET_INFO my_charset_sjis_japanese_ci= 1, /* strxfrm_multiply */ 1, /* mbminlen */ 2, /* mbmaxlen */ - 0, + 0, /* min_sort_char */ + 0, /* max_sort_char */ &my_charset_handler, &my_collation_ci_handler }; @@ -4551,7 +4552,8 @@ CHARSET_INFO my_charset_sjis_bin= 1, /* strxfrm_multiply */ 1, /* mbminlen */ 2, /* mbmaxlen */ - 0, + 0, /* min_sort_char */ + 0, /* max_sort_char */ &my_charset_handler, &my_collation_mb_bin_handler }; diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c index abe9972cfd6..59be820863a 100644 --- a/strings/ctype-tis620.c +++ b/strings/ctype-tis620.c @@ -913,7 +913,8 @@ CHARSET_INFO my_charset_tis620_thai_ci= 4, /* strxfrm_multiply */ 1, /* mbminlen */ 1, /* mbmaxlen */ - 0, + 0, /* min_sort_char */ + 0, /* max_sort_char */ &my_charset_handler, &my_collation_ci_handler }; @@ -936,7 +937,8 @@ CHARSET_INFO my_charset_tis620_bin= 1, /* strxfrm_multiply */ 1, /* mbminlen */ 1, /* mbmaxlen */ - 0, + 0, /* min_sort_char */ + 0, /* max_sort_char */ &my_charset_handler, &my_collation_8bit_bin_handler }; diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index 908d27859ff..f4c1a22939a 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -1284,13 +1284,94 @@ void my_hash_sort_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)), } } +/* +** Calculate min_str and max_str that ranges a LIKE string. +** Arguments: +** ptr Pointer to LIKE string. +** ptr_length Length of LIKE string. +** escape Escape character in LIKE. (Normally '\'). +** All escape characters should be removed from min_str and max_str +** res_length Length of min_str and max_str. +** min_str Smallest case sensitive string that ranges LIKE. +** Should be space padded to res_length. +** max_str Largest case sensitive string that ranges LIKE. +** Normally padded with the biggest character sort value. +** +** The function should return 0 if ok and 1 if the LIKE string can't be +** optimized ! +*/ + +my_bool my_like_range_ucs2(CHARSET_INFO *cs, + const char *ptr,uint ptr_length, + pbool escape, pbool w_one, pbool w_many, + uint res_length, + char *min_str,char *max_str, + uint *min_length,uint *max_length) +{ + const char *end=ptr+ptr_length; + char *min_org=min_str; + char *min_end=min_str+res_length; + + for (; ptr + 1 < end && min_str + 1 < min_end ; ptr+=2) + { + if (ptr[0] == '\0' && ptr[1] == escape && ptr+2 < end) + { + ptr+=2; /* Skip escape */ + *min_str++= *max_str++ = ptr[0]; + *min_str++= *max_str++ = ptr[1]; + continue; + } + if (ptr[0] == '\0' && ptr[1] == w_one) /* '_' in SQL */ + { + *min_str++= (char) cs->min_sort_char >> 8; + *min_str++= (char) cs->min_sort_char & 255; + *max_str++= (char) cs->max_sort_char >> 8; + *max_str++= (char) cs->max_sort_char & 255; + continue; + } + if (ptr[0] == '\0' && ptr[1] == w_many) /* '%' in SQL */ + { + *min_length= (uint) (min_str - min_org); + *max_length=res_length; + do { + *min_str++ = '\0'; + *min_str++ = ' '; /* Because if key compression */ + *max_str++ = (char) cs->max_sort_char >>8; + *max_str++ = (char) cs->max_sort_char & 255; + } while (min_str + 1 < min_end); + return 0; + } + *min_str++= *max_str++ = ptr[0]; + *min_str++= *max_str++ = ptr[1]; + } + *min_length= *max_length = (uint) (min_str - min_org); + + /* Temporary fix for handling w_one at end of string (key compression) */ + { + char *tmp; + for (tmp= min_str ; tmp-1 > min_org && tmp[-1] == '\0' && tmp[-2]=='\0';) + { + *--tmp=' '; + *--tmp='\0'; + } + } + + while (min_str + 1 < min_end) + { + *min_str++ = *max_str++ = '\0'; + *min_str++ = *max_str++ = ' '; /* Because if key compression */ + } + return 0; +} + +extern MY_COLLATION_HANDLER my_collation_uca_handler; static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler = { my_strnncoll_ucs2, my_strnncoll_ucs2, my_strnxfrm_ucs2, - my_like_range_simple, + my_like_range_ucs2, my_wildcmp_ucs2_ci, my_strcasecmp_ucs2, my_instr_mb, @@ -1356,7 +1437,8 @@ CHARSET_INFO my_charset_ucs2_general_ci= 1, /* strxfrm_multiply */ 2, /* mbminlen */ 2, /* mbmaxlen */ - 0, + 0, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ &my_charset_ucs2_handler, &my_collation_ucs2_general_ci_handler }; @@ -1380,7 +1462,8 @@ CHARSET_INFO my_charset_ucs2_bin= 1, /* strxfrm_multiply */ 2, /* mbminlen */ 2, /* mbmaxlen */ - 0, + 0, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ &my_charset_ucs2_handler, &my_collation_ucs2_bin_handler }; diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c index acb1d049a03..997618d7645 100644 --- a/strings/ctype-ujis.c +++ b/strings/ctype-ujis.c @@ -8483,7 +8483,8 @@ CHARSET_INFO my_charset_ujis_japanese_ci= 1, /* strxfrm_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ - 0, + 0, /* min_sort_char */ + 0, /* max_sort_char */ &my_charset_handler, &my_collation_ci_handler }; @@ -8506,7 +8507,8 @@ CHARSET_INFO my_charset_ujis_bin= 1, /* strxfrm_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ - 0, + 0, /* min_sort_char */ + 0, /* max_sort_char */ &my_charset_handler, &my_collation_mb_bin_handler }; diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index cafc43f3f70..dca73e5a79f 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -2009,7 +2009,8 @@ CHARSET_INFO my_charset_utf8_general_ci= 1, /* strxfrm_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ - 0, + 0, /* min_sort_char */ + 0, /* max_sort_char */ &my_charset_handler, &my_collation_ci_handler }; @@ -2033,7 +2034,8 @@ CHARSET_INFO my_charset_utf8_bin= 1, /* strxfrm_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ - 0, + 0, /* min_sort_char */ + 0, /* max_sort_char */ &my_charset_handler, &my_collation_mb_bin_handler }; diff --git a/strings/ctype-win1250ch.c b/strings/ctype-win1250ch.c index 889cf2d2dae..bda349f1988 100644 --- a/strings/ctype-win1250ch.c +++ b/strings/ctype-win1250ch.c @@ -629,7 +629,8 @@ CHARSET_INFO my_charset_cp1250_czech_ci = 2, /* strxfrm_multiply */ 1, /* mbminlen */ 1, /* mbmaxlen */ - 0, + 0, /* min_sort_char */ + 0, /* max_sort_char */ &my_charset_8bit_handler, &my_collation_czech_ci_handler };