From 6832eb5155774d69e1bf7fffb561ef48680053ca Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 7 Jun 2004 01:06:17 +0300 Subject: [PATCH 1/4] fixed mistyping --- sql/sql_select.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/sql_select.cc b/sql/sql_select.cc index b87f88a3988..f7a0d5259a6 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -933,7 +933,7 @@ JOIN::optimize() If having is not handled here, it will be checked before the row is sent to the client. */ - if (having && + if (tmp_having && (sort_and_group || (exec_tmp_table1->distinct && !group_list))) having= tmp_having; From 19480ed616000e9e67e0adae52dc5dc9cb27fdcb Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 8 Jun 2004 15:38:22 +0300 Subject: [PATCH 2/4] Added function innobase_store_binlog_offset_and_flush_log requested by Guilhem to ha_innodb.cc and ha_innodb.h sql/ha_innodb.cc: /*********************************************************************** This function stores binlog offset and flushes logs */ void innobase_store_binlog_offset_and_flush_log( /*=============================*/ char *binlog_name, /* in: binlog name */ longlong offset /* in: binlog offset */ ); BitKeeper/etc/logging_ok: Logging to logging@openlogging.org accepted --- BitKeeper/etc/logging_ok | 1 + sql/ha_innodb.cc | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/BitKeeper/etc/logging_ok b/BitKeeper/etc/logging_ok index e419f9edfb9..32e46fb079f 100644 --- a/BitKeeper/etc/logging_ok +++ b/BitKeeper/etc/logging_ok @@ -78,6 +78,7 @@ jcole@sarvik.tfr.cafe.ee jcole@tetra.spaceapes.com joreland@mysql.com jorge@linux.jorge.mysql.com +jplindst@t41.(none) kaj@work.mysql.com konstantin@mysql.com kostja@oak.local diff --git a/sql/ha_innodb.cc b/sql/ha_innodb.cc index 36202efcc02..619c05711c4 100644 --- a/sql/ha_innodb.cc +++ b/sql/ha_innodb.cc @@ -66,6 +66,7 @@ extern "C" { #include "../innobase/include/trx0roll.h" #include "../innobase/include/trx0trx.h" #include "../innobase/include/trx0sys.h" +#include "../innobase/include/mtr0mtr.h" #include "../innobase/include/row0ins.h" #include "../innobase/include/row0mysql.h" #include "../innobase/include/row0sel.h" @@ -5182,4 +5183,36 @@ ha_innobase::get_auto_increment() return(nr); } +/*********************************************************************** +This function stores binlog offset and flushes logs */ + +void +innobase_store_binlog_offset_and_flush_log( +/*=============================*/ + char *binlog_name, /* in: binlog name */ + longlong offset /* in: binlog offset */ +) +{ + mtr_t mtr; + + assert(binlog_name != NULL); + + /* Start a mini-transaction */ + mtr_start_noninline(&mtr); + + /* Update the latest MySQL binlog name and offset info + in trx sys header */ + + trx_sys_update_mysql_binlog_offset( + binlog_name, + offset, + TRX_SYS_MYSQL_LOG_INFO, &mtr); + + /* Commits the mini-transaction */ + mtr_commit(&mtr); + + /* Syncronous flush of the log buffer to disk */ + log_buffer_flush_to_disk(); +} + #endif /* HAVE_INNOBASE_DB */ From 8ab01b335d502730bef01764d0cd4adae6f86ee0 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 8 Jun 2004 17:56:15 +0500 Subject: [PATCH 3/4] WL#916: Unicode collations for some languages --- include/m_ctype.h | 1 + mysys/charset.c | 210 ++++++++++++++++++++++++++++++++------ strings/ctype-big5.c | 2 + strings/ctype-bin.c | 1 + strings/ctype-czech.c | 1 + strings/ctype-euc_kr.c | 2 + strings/ctype-gb2312.c | 2 + strings/ctype-gbk.c | 2 + strings/ctype-latin1.c | 3 + strings/ctype-sjis.c | 2 + strings/ctype-tis620.c | 2 + strings/ctype-uca.c | 1 + strings/ctype-ucs2.c | 2 + strings/ctype-ujis.c | 2 + strings/ctype-utf8.c | 2 + strings/ctype-win1250ch.c | 1 + strings/ctype.c | 14 +-- 17 files changed, 214 insertions(+), 36 deletions(-) diff --git a/include/m_ctype.h b/include/m_ctype.h index 9502805b017..87b45bd4954 100644 --- a/include/m_ctype.h +++ b/include/m_ctype.h @@ -200,6 +200,7 @@ typedef struct charset_info_st const char *csname; const char *name; const char *comment; + const char *tailoring; uchar *ctype; uchar *to_lower; uchar *to_upper; diff --git a/mysys/charset.c b/mysys/charset.c index 7eccf2dab68..ea07708963d 100644 --- a/mysys/charset.c +++ b/mysys/charset.c @@ -624,6 +624,123 @@ err: #ifdef HAVE_CHARSET_ucs2 +typedef struct my_tailoring_st +{ + uint number; + const char *name; + const char *tailoring; +} my_tailoring; + +static my_tailoring tailoring[]= +{ + { + 0, "icelandic", + /* + Some sources treat LETTER A WITH DIARESIS (00E4,00C4) + secondary greater than LETTER AE (00E6,00C6). + http://www.evertype.com/alphabets/icelandic.pdf + http://developer.mimer.com/collations/charts/icelandic.htm + + Other sources do not provide any special rules + for LETTER A WITH DIARESIS: + http://www.omniglot.com/writing/icelandic.htm + http://en.wikipedia.org/wiki/Icelandic_alphabet + http://oss.software.ibm.com/icu/charts/collation/is.html + + Let's go the first way. + */ + "& A < \\u00E1 <<< \\u00C1 " + "& D < \\u00F0 <<< \\u00D0 " + "& E < \\u00E9 <<< \\u00C9 " + "& I < \\u00ED <<< \\u00CD " + "& O < \\u00F3 <<< \\u00D3 " + "& U < \\u00FA <<< \\u00DA " + "& Y < \\u00FD <<< \\u00DD " + "& Z < \\u00FE <<< \\u00DE " + "< \\u00E6 <<< \\u00C6 << \\u00E4 <<< \\u00C4 " + "< \\u00F6 <<< \\u00D6 << \\u00F8 <<< \\u00D8 " + "< \\u00E5 <<< \\u00C5 " + }, + { + 1, "latvian", + /* + Some sources treat I and Y primary different. + Other sources treat I and Y the same on primary level. + We'll go the first way. + */ + "& C < \\u010D <<< \\u010C " + "& G < \\u0123 <<< \\u0122 " + "& I < \\u0079 <<< \\u0059 " + "& K < \\u0137 <<< \\u0136 " + "& L < \\u013C <<< \\u013B " + "& N < \\u0146 <<< \\u0145 " + "& R < \\u0157 <<< \\u0156 " + "& S < \\u0161 <<< \\u0160 " + "& Z < \\u017E <<< \\u017D " + }, + { + 2, "romanian", + "& A < \\u0103 <<< \\u0102 < \\u00E2 <<< \\u00C2 " + "& I < \\u00EE <<< \\u00CE " + "& S < \\u0219 <<< \\u0218 << \\u015F <<< \\u015E " + "& T < \\u021B <<< \\u021A << \\u0163 <<< \\u0162 " + }, + { + 3, "slovenian", + "& C < \\u010D <<< \\u010C " + "& S < \\u0161 <<< \\u0160 " + "& Z < \\u017E <<< \\u017D " + }, + { + 4, "polish", + "& A < \\u0105 <<< \\u0104 " + "& C < \\u0107 <<< \\u0106 " + "& E < \\u0119 <<< \\u0118 " + "& L < \\u0142 <<< \\u0141 " + "& N < \\u0144 <<< \\u0143 " + "& O < \\u00F3 <<< \\u00D3 " + "& S < \\u015B <<< \\u015A " + "& Z < \\u017A <<< \\u017B " + }, + { + 5, "estonian", + "& S < \\u0161 <<< \\u0160 " + " < \\u007A <<< \\u005A " + " < \\u017E <<< \\u017D " + "& W < \\u00F5 <<< \\u00D5 " + "< \\u00E4 <<< \\u00C4 " + "< \\u00F6 <<< \\u00D6 " + "< \\u00FC <<< \\u00DC " + }, + { + 6, "spanish", + "& N < \\u00F1 <<< \\u00D1 " + }, + { + 7, "swedish", + /* + Some sources treat V and W as similar on primary level. + We'll treat V and W as different on primary level. + */ + "& Y <<\\u00FC <<< \\u00DC " + "& Z < \\u00E5 <<< \\u00C5 " + "< \\u00E4 <<< \\u00C4 << \\u00E6 <<< \\u00C6 " + "< \\u00F6 <<< \\u00D6 << \\u00F8 <<< \\u00D8 " + }, + { + 8, "turkish", + "& C < \\u00E7 <<< \\u00C7 " + "& G < \\u011F <<< \\u011E " + "& H < \\u0131 <<< \\u0049 " + "& O < \\u00F6 <<< \\u00D6 " + "& S < \\u015F <<< \\u015E " + "& U < \\u00FC <<< \\u00DC " + }, + { + 0, NULL, NULL + } +}; + #define MY_MAX_COLL_RULE 64 /* @@ -643,7 +760,7 @@ err: default weights. */ -static int ucs2_copy_data(CHARSET_INFO *to, CHARSET_INFO *from) +static my_bool create_tailoring(CHARSET_INFO *cs) { MY_COLL_RULE rule[MY_MAX_COLL_RULE]; char errstr[128]; @@ -652,32 +769,14 @@ static int ucs2_copy_data(CHARSET_INFO *to, CHARSET_INFO *from) const uchar *deflengths= my_charset_ucs2_general_uca.sort_order; uint16 **defweights= my_charset_ucs2_general_uca.sort_order_big; int rc, i; - - to->number= from->number ? from->number : to->number; - - if (from->csname) - if (!(to->csname= my_once_strdup(from->csname,MYF(MY_WME)))) - goto err; - - if (from->name) - if (!(to->name= my_once_strdup(from->name,MYF(MY_WME)))) - goto err; - - if (from->comment) - if (!(to->comment= my_once_strdup(from->comment,MYF(MY_WME)))) - goto err; - - to->strxfrm_multiply= my_charset_ucs2_general_uca.strxfrm_multiply; - to->min_sort_char= my_charset_ucs2_general_uca.min_sort_char; - to->max_sort_char= my_charset_ucs2_general_uca.max_sort_char; - to->mbminlen= 2; - to->mbmaxlen= 2; - + + if (!cs->tailoring) + return 1; /* Parse ICU Collation Customization expression */ if ((rc= my_coll_rule_parse(rule, MY_MAX_COLL_RULE, - from->sort_order, - from->sort_order + strlen(from->sort_order), + cs->tailoring, + cs->tailoring + strlen(cs->tailoring), errstr, sizeof(errstr))) <= 0) { /* @@ -687,13 +786,12 @@ static int ucs2_copy_data(CHARSET_INFO *to, CHARSET_INFO *from) return 1; } - if (!(newweights= (uint16**) my_once_alloc(256*sizeof(uint16*),MYF(MY_WME)))) - goto err; + return 1; bzero(newweights, 256*sizeof(uint16*)); if (!(newlengths= (uchar*) my_once_memdup(deflengths,256,MYF(MY_WME)))) - goto err; + return 1; /* Calculate maximum lenghts for the pages @@ -720,7 +818,7 @@ static int ucs2_copy_data(CHARSET_INFO *to, CHARSET_INFO *from) uint size= 256*newlengths[pagec]*sizeof(uint16); if (!(newweights[pagec]= (uint16*) my_once_alloc(size,MYF(MY_WME)))) - goto err; + return 1; bzero((void*) newweights[pagec], size); for (chc=0 ; chc < 256; chc++) @@ -749,10 +847,41 @@ static int ucs2_copy_data(CHARSET_INFO *to, CHARSET_INFO *from) if (!newweights[i]) newweights[i]= defweights[i]; - to->sort_order= newlengths; - to->sort_order_big= newweights; + cs->sort_order= newlengths; + cs->sort_order_big= newweights; return 0; +} + + +static int ucs2_copy_data(CHARSET_INFO *to, CHARSET_INFO *from) +{ + + to->number= from->number ? from->number : to->number; + + if (from->csname) + if (!(to->csname= my_once_strdup(from->csname,MYF(MY_WME)))) + goto err; + + if (from->name) + if (!(to->name= my_once_strdup(from->name,MYF(MY_WME)))) + goto err; + + if (from->comment) + if (!(to->comment= my_once_strdup(from->comment,MYF(MY_WME)))) + goto err; + + if (from->tailoring) + if (!(to->tailoring= my_once_strdup(from->tailoring,MYF(MY_WME)))) + goto err; + + to->strxfrm_multiply= my_charset_ucs2_general_uca.strxfrm_multiply; + to->min_sort_char= my_charset_ucs2_general_uca.min_sort_char; + to->max_sort_char= my_charset_ucs2_general_uca.max_sort_char; + to->mbminlen= 2; + to->mbmaxlen= 2; + + return create_tailoring(to); err: return 1; @@ -848,6 +977,24 @@ static int add_collation(CHARSET_INFO *cs) return MY_XML_OK; } +#ifdef HAVE_CHARSET_ucs2 +static my_bool init_uca_charsets() +{ + my_tailoring *t; + CHARSET_INFO cs= my_charset_ucs2_general_uca; + cs.state= MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONTEXT; + char name[64]; + for (t= tailoring; t->tailoring; t++) + { + cs.number= 128 + t->number; + cs.tailoring= t->tailoring; + cs.name= name; + sprintf(name, "ucs2_%s_ci", t->name); + add_collation(&cs); + } + return 0; +} +#endif #define MY_MAX_ALLOWED_BUF 1024*1024 #define MY_CHARSET_INDEX "Index.xml" @@ -947,6 +1094,9 @@ static my_bool init_available_charsets(myf myflags) bzero(&all_charsets,sizeof(all_charsets)); init_compiled_charsets(myflags); +#ifdef HAVE_CHARSET_ucs2 + init_uca_charsets(); +#endif /* Copy compiled charsets */ for (cs=all_charsets; diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c index f024fa0cc14..7a3c4503d74 100644 --- a/strings/ctype-big5.c +++ b/strings/ctype-big5.c @@ -6313,6 +6313,7 @@ CHARSET_INFO my_charset_big5_chinese_ci= "big5", /* cs name */ "big5_chinese_ci", /* name */ "", /* comment */ + NULL, /* tailoring */ ctype_big5, to_lower_big5, to_upper_big5, @@ -6339,6 +6340,7 @@ CHARSET_INFO my_charset_big5_bin= "big5", /* cs name */ "big5_bin", /* name */ "", /* comment */ + NULL, /* tailoring */ ctype_big5, to_lower_big5, to_upper_big5, diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c index 7cac8c7c337..48323018cca 100644 --- a/strings/ctype-bin.c +++ b/strings/ctype-bin.c @@ -378,6 +378,7 @@ CHARSET_INFO my_charset_bin = "binary", /* cs name */ "binary", /* name */ "", /* comment */ + NULL, /* tailoring */ ctype_bin, /* ctype */ bin_char_array, /* to_lower */ bin_char_array, /* to_upper */ diff --git a/strings/ctype-czech.c b/strings/ctype-czech.c index 2eb2fac46e9..dede737f361 100644 --- a/strings/ctype-czech.c +++ b/strings/ctype-czech.c @@ -589,6 +589,7 @@ CHARSET_INFO my_charset_latin2_czech_ci = "latin2", /* cs name */ "latin2_czech_cs", /* name */ "", /* comment */ + NULL, /* tailoring */ ctype_czech, to_lower_czech, to_upper_czech, diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c index 8f955c15a73..2d4c68978a3 100644 --- a/strings/ctype-euc_kr.c +++ b/strings/ctype-euc_kr.c @@ -8681,6 +8681,7 @@ CHARSET_INFO my_charset_euckr_korean_ci= "euckr", /* cs name */ "euckr_korean_ci", /* name */ "", /* comment */ + NULL, /* tailoring */ ctype_euc_kr, to_lower_euc_kr, to_upper_euc_kr, @@ -8707,6 +8708,7 @@ CHARSET_INFO my_charset_euckr_bin= "euckr", /* cs name */ "euckr_bin", /* name */ "", /* comment */ + NULL, /* tailoring */ ctype_euc_kr, to_lower_euc_kr, to_upper_euc_kr, diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c index b76511fc4f3..49ca736a3c2 100644 --- a/strings/ctype-gb2312.c +++ b/strings/ctype-gb2312.c @@ -5732,6 +5732,7 @@ CHARSET_INFO my_charset_gb2312_chinese_ci= "gb2312", /* cs name */ "gb2312_chinese_ci",/* name */ "", /* comment */ + NULL, /* tailoring */ ctype_gb2312, to_lower_gb2312, to_upper_gb2312, @@ -5757,6 +5758,7 @@ CHARSET_INFO my_charset_gb2312_bin= "gb2312", /* cs name */ "gb2312_bin", /* name */ "", /* comment */ + NULL, /* tailoring */ ctype_gb2312, to_lower_gb2312, to_upper_gb2312, diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c index 0dc00a73fa3..0273feb4c2c 100644 --- a/strings/ctype-gbk.c +++ b/strings/ctype-gbk.c @@ -9962,6 +9962,7 @@ CHARSET_INFO my_charset_gbk_chinese_ci= "gbk", /* cs name */ "gbk_chinese_ci", /* name */ "", /* comment */ + NULL, /* tailoring */ ctype_gbk, to_lower_gbk, to_upper_gbk, @@ -9987,6 +9988,7 @@ CHARSET_INFO my_charset_gbk_bin= "gbk", /* cs name */ "gbk_bin", /* name */ "", /* comment */ + NULL, /* tailoring */ ctype_gbk, to_lower_gbk, to_upper_gbk, diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c index 0b439964c7c..fe39303e2ac 100644 --- a/strings/ctype-latin1.c +++ b/strings/ctype-latin1.c @@ -412,6 +412,7 @@ CHARSET_INFO my_charset_latin1= "latin1", /* cs name */ "latin1_swedish_ci", /* name */ "", /* comment */ + NULL, /* tailoring */ ctype_latin1, to_lower_latin1, to_upper_latin1, @@ -690,6 +691,7 @@ CHARSET_INFO my_charset_latin1_german2_ci= "latin1", /* cs name */ "latin1_german2_ci", /* name */ "", /* comment */ + NULL, /* tailoring */ ctype_latin1, to_lower_latin1, to_upper_latin1, @@ -715,6 +717,7 @@ CHARSET_INFO my_charset_latin1_bin= "latin1", /* cs name */ "latin1_bin", /* name */ "", /* comment */ + NULL, /* tailoring */ ctype_latin1, to_lower_latin1, to_upper_latin1, diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c index 72666175a1f..22c58360348 100644 --- a/strings/ctype-sjis.c +++ b/strings/ctype-sjis.c @@ -4579,6 +4579,7 @@ CHARSET_INFO my_charset_sjis_japanese_ci= "sjis", /* cs name */ "sjis_japanese_ci", /* name */ "", /* comment */ + NULL, /* tailoring */ ctype_sjis, to_lower_sjis, to_upper_sjis, @@ -4604,6 +4605,7 @@ CHARSET_INFO my_charset_sjis_bin= "sjis", /* cs name */ "sjis_bin", /* name */ "", /* comment */ + NULL, /* tailoring */ ctype_sjis, to_lower_sjis, to_upper_sjis, diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c index e2a138300c3..b2b1ab98352 100644 --- a/strings/ctype-tis620.c +++ b/strings/ctype-tis620.c @@ -951,6 +951,7 @@ CHARSET_INFO my_charset_tis620_thai_ci= "tis620", /* cs name */ "tis620_thai_ci", /* name */ "", /* comment */ + NULL, /* tailoring */ ctype_tis620, to_lower_tis620, to_upper_tis620, @@ -976,6 +977,7 @@ CHARSET_INFO my_charset_tis620_bin= "tis620", /* cs name */ "tis620_bin", /* name */ "", /* comment */ + NULL, /* tailoring */ ctype_tis620, to_lower_tis620, to_upper_tis620, diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c index 81073d47554..75e2c06eec2 100644 --- a/strings/ctype-uca.c +++ b/strings/ctype-uca.c @@ -7055,6 +7055,7 @@ CHARSET_INFO my_charset_ucs2_general_uca= "ucs2", /* cs name */ "ucs2_general_uca", /* name */ "", /* comment */ + NULL, /* tailoring */ NULL, /* ctype */ NULL, /* to_lower */ NULL, /* to_upper */ diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index 67340fdd4f4..d1ba63b8b84 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -1431,6 +1431,7 @@ CHARSET_INFO my_charset_ucs2_general_ci= "ucs2", /* cs name */ "ucs2_general_ci", /* name */ "", /* comment */ + NULL, /* tailoring */ ctype_ucs2, /* ctype */ to_lower_ucs2, /* to_lower */ to_upper_ucs2, /* to_upper */ @@ -1456,6 +1457,7 @@ CHARSET_INFO my_charset_ucs2_bin= "ucs2", /* cs name */ "ucs2_bin", /* name */ "", /* comment */ + NULL, /* tailoring */ ctype_ucs2, /* ctype */ to_lower_ucs2, /* to_lower */ to_upper_ucs2, /* to_upper */ diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c index fd3692553be..668dc7beb8b 100644 --- a/strings/ctype-ujis.c +++ b/strings/ctype-ujis.c @@ -8468,6 +8468,7 @@ CHARSET_INFO my_charset_ujis_japanese_ci= "ujis", /* cs name */ "ujis_japanese_ci", /* name */ "", /* comment */ + NULL, /* tailoring */ ctype_ujis, to_lower_ujis, to_upper_ujis, @@ -8494,6 +8495,7 @@ CHARSET_INFO my_charset_ujis_bin= "ujis", /* cs name */ "ujis_bin", /* name */ "", /* comment */ + NULL, /* tailoring */ ctype_ujis, to_lower_ujis, to_upper_ujis, diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 09b918b0777..2d0feb1c890 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -2090,6 +2090,7 @@ CHARSET_INFO my_charset_utf8_general_ci= "utf8", /* cs name */ "utf8_general_ci", /* name */ "", /* comment */ + NULL, /* tailoring */ ctype_utf8, /* ctype */ to_lower_utf8, /* to_lower */ to_upper_utf8, /* to_upper */ @@ -2116,6 +2117,7 @@ CHARSET_INFO my_charset_utf8_bin= "utf8", /* cs name */ "utf8_bin", /* name */ "", /* comment */ + NULL, /* tailoring */ ctype_utf8, /* ctype */ to_lower_utf8, /* to_lower */ to_upper_utf8, /* to_upper */ diff --git a/strings/ctype-win1250ch.c b/strings/ctype-win1250ch.c index 2eefb570170..bb287eb695e 100644 --- a/strings/ctype-win1250ch.c +++ b/strings/ctype-win1250ch.c @@ -623,6 +623,7 @@ CHARSET_INFO my_charset_cp1250_czech_ci = "cp1250", /* cs name */ "cp1250_czech_cs", /* name */ "", /* comment */ + NULL, /* tailoring */ ctype_win1250ch, to_lower_win1250ch, to_upper_win1250ch, diff --git a/strings/ctype.c b/strings/ctype.c index 44bf20ada5c..4454d3c45e1 100644 --- a/strings/ctype.c +++ b/strings/ctype.c @@ -124,6 +124,7 @@ static struct my_cs_file_section_st * cs_file_sec(const char *attr, uint len) } #define MY_CS_CSDESCR_SIZE 64 +#define MY_CS_TAILORING_SIZE 128 typedef struct my_cs_file_info { @@ -135,7 +136,8 @@ typedef struct my_cs_file_info uchar sort_order[MY_CS_SORT_ORDER_TABLE_SIZE]; uint16 tab_to_uni[MY_CS_TO_UNI_TABLE_SIZE]; char comment[MY_CS_CSDESCR_SIZE]; - size_t sort_order_length; + char tailoring[MY_CS_TAILORING_SIZE]; + size_t tailoring_length; CHARSET_INFO cs; int (*add_collation)(CHARSET_INFO *cs); } MY_CHARSET_LOADER; @@ -186,7 +188,7 @@ static int cs_enter(MY_XML_PARSER *st,const char *attr, uint len) bzero(&i->cs,sizeof(i->cs)); if (s && (s->state == _CS_COLLATION)) - i->sort_order_length= 0; + i->tailoring_length= 0; return MY_XML_OK; } @@ -283,12 +285,12 @@ static int cs_value(MY_XML_PARSER *st,const char *attr, uint len) */ char arg[16]; const char *cmd[]= {"&","<","<<","<<<"}; - i->cs.sort_order= i->sort_order; + i->cs.tailoring= i->tailoring; mstr(arg,attr,len,sizeof(arg)-1); - if (i->sort_order_length + 20 < sizeof(i->sort_order)) + if (i->tailoring_length + 20 < sizeof(i->tailoring)) { - char *dst= i->sort_order_length + i->sort_order; - i->sort_order_length+= sprintf(dst," %s %s",cmd[state-_CS_RESET],arg); + char *dst= i->tailoring_length + i->tailoring; + i->tailoring_length+= sprintf(dst," %s %s",cmd[state-_CS_RESET],arg); } } } From 7bd8167fb703aea4f3d533441a26a9dbc628a258 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 8 Jun 2004 19:55:04 +0200 Subject: [PATCH 4/4] Correction to replication of charsets in 4.1: In mysqlbinlog, there was a problem with how we escaped the content of a string user variable. To be perfect, we should have escaped with character_set_client. But this charset is unknown to mysqlbinlog. So the simplest is to print the string in hex. This is unreadable but 100% safe with any charset (checked with Bar), no more need to bother with character_set_client. mysql-test/r/rpl_charset.result: hex strings mysql-test/r/rpl_user_variables.result: hex strings mysql-test/r/user_var.result: hex strings sql/log_event.cc: In mysqlbinlog, there was a problem with how we escaped the content of a string user variable. To be perfect, we should have escaped with character_set_client. But this charset is unknown to mysqlbinlog. So the simplest is to print the string in hex. This is unreadable but 100% safe with any charset (checked with Bar), no more need to bother with character_set_client. --- mysql-test/r/rpl_charset.result | 2 +- mysql-test/r/rpl_user_variables.result | 12 +++--- mysql-test/r/user_var.result | 12 +++--- sql/log_event.cc | 59 +++++++++++++++++++------- 4 files changed, 57 insertions(+), 28 deletions(-) diff --git a/mysql-test/r/rpl_charset.result b/mysql-test/r/rpl_charset.result index 6ba82d0dd2f..1433443691d 100644 --- a/mysql-test/r/rpl_charset.result +++ b/mysql-test/r/rpl_charset.result @@ -153,7 +153,7 @@ master-bin.000001 4413 Query 1 4413 use `test2`; SET ONE_SHOT CHARACTER_SET_CLIE master-bin.000001 4549 Query 1 4549 use `test2`; truncate table t1 master-bin.000001 4602 Query 1 4602 use `test2`; SET ONE_SHOT CHARACTER_SET_CLIENT=8,COLLATION_CONNECTION=31,COLLATION_DATABASE=9,COLLATION_SERVER=64 master-bin.000001 4738 Intvar 1 4738 INSERT_ID=1 -master-bin.000001 4766 User var 1 4766 @`a`=_cp850'Müller' COLLATE cp850_general_ci +master-bin.000001 4766 User var 1 4766 @`a`=_cp850 0x4DFC6C6C6572 COLLATE cp850_general_ci master-bin.000001 4806 Query 1 4806 use `test2`; insert into t1 (b) values(collation(@a)) master-bin.000001 4882 Query 1 4882 use `test2`; SET ONE_SHOT CHARACTER_SET_CLIENT=8,COLLATION_CONNECTION=31,COLLATION_DATABASE=9,COLLATION_SERVER=64 master-bin.000001 5018 Query 1 5018 use `test2`; drop database test2 diff --git a/mysql-test/r/rpl_user_variables.result b/mysql-test/r/rpl_user_variables.result index ce2fb9c6f9c..85768270ba3 100644 --- a/mysql-test/r/rpl_user_variables.result +++ b/mysql-test/r/rpl_user_variables.result @@ -86,11 +86,11 @@ slave-bin.000001 313 Query 1 313 use `test`; insert into t1 values (@i1), (@i2), slave-bin.000001 396 User var 2 396 @`r1`=12.5 slave-bin.000001 439 User var 2 439 @`r2`=-12.5 slave-bin.000001 482 Query 1 482 use `test`; insert into t1 values (@r1), (@r2) -slave-bin.000001 551 User var 2 551 @`s1`=_latin1'This is a test' COLLATE latin1_swedish_ci -slave-bin.000001 600 User var 2 600 @`s2`=_latin1'' COLLATE latin1_swedish_ci -slave-bin.000001 635 User var 2 635 @`s3`=_latin1'abc\'def' COLLATE latin1_swedish_ci -slave-bin.000001 677 User var 2 677 @`s4`=_latin1'abc\\def' COLLATE latin1_swedish_ci -slave-bin.000001 719 User var 2 719 @`s5`=_latin1'abc\'def' COLLATE latin1_swedish_ci +slave-bin.000001 551 User var 2 551 @`s1`=_latin1 0x5468697320697320612074657374 COLLATE latin1_swedish_ci +slave-bin.000001 600 User var 2 600 @`s2`=_latin1 "" COLLATE latin1_swedish_ci +slave-bin.000001 635 User var 2 635 @`s3`=_latin1 0x61626327646566 COLLATE latin1_swedish_ci +slave-bin.000001 677 User var 2 677 @`s4`=_latin1 0x6162635C646566 COLLATE latin1_swedish_ci +slave-bin.000001 719 User var 2 719 @`s5`=_latin1 0x61626327646566 COLLATE latin1_swedish_ci slave-bin.000001 761 Query 1 761 use `test`; insert into t1 values (@s1), (@s2), (@s3), (@s4), (@s5) slave-bin.000001 851 User var 2 851 @`n1`=NULL slave-bin.000001 877 Query 1 877 use `test`; insert into t1 values (@n1) @@ -99,7 +99,7 @@ slave-bin.000001 965 Query 1 965 use `test`; insert into t1 values (@n2) slave-bin.000001 1027 Query 1 1027 use `test`; insert into t1 values (@a:=0), (@a:=@a+1), (@a:=@a+1) slave-bin.000001 1115 User var 2 1115 @`a`=2 slave-bin.000001 1157 Query 1 1157 use `test`; insert into t1 values (@a+(@b:=@a+1)) -slave-bin.000001 1229 User var 2 1229 @`q`=_latin1'abc' COLLATE latin1_swedish_ci +slave-bin.000001 1229 User var 2 1229 @`q`=_latin1 0x616263 COLLATE latin1_swedish_ci slave-bin.000001 1266 Query 1 1266 use `test`; insert t1 values (@q), (@q:=concat(@q, 'n1')), (@q:=concat(@q, 'n2')) slave-bin.000001 1370 User var 2 1370 @`a`=5 slave-bin.000001 1412 Query 1 1412 use `test`; insert into t1 values (@a),(@a) diff --git a/mysql-test/r/user_var.result b/mysql-test/r/user_var.result index 605780a7280..2750478c1c5 100644 --- a/mysql-test/r/user_var.result +++ b/mysql-test/r/user_var.result @@ -174,24 +174,24 @@ set @v=convert('abc' using ucs2); insert into t2 values (@v); show binlog events from 79; Log_name Pos Event_type Server_id Orig_log_pos Info -master-bin.000001 79 User var 1 79 @`a b`=_latin1'hello' COLLATE latin1_swedish_ci +master-bin.000001 79 User var 1 79 @`a b`=_latin1 0x68656C6C6F COLLATE latin1_swedish_ci master-bin.000001 120 Query 1 120 use `test`; INSERT INTO t1 VALUES(@`a b`) -master-bin.000001 184 User var 1 184 @`var1`=_latin1'\';aaa' COLLATE latin1_swedish_ci +master-bin.000001 184 User var 1 184 @`var1`=_latin1 0x273B616161 COLLATE latin1_swedish_ci master-bin.000001 226 Query 1 226 use `test`; insert into t1 values (@var1) master-bin.000001 290 Query 1 290 use `test`; create table t2 (c char(30)) charset=ucs2 -master-bin.000001 366 User var 1 366 @`v`=_ucs2'\0a\0b\0c' COLLATE ucs2_general_ci +master-bin.000001 366 User var 1 366 @`v`=_ucs2 0x006100620063 COLLATE ucs2_general_ci master-bin.000001 406 Query 1 406 use `test`; insert into t2 values (@v) /*!40019 SET @@session.max_insert_delayed_threads=0*/; -SET @`a b`:=_latin1'hello' COLLATE latin1_swedish_ci; +SET @`a b`:=_latin1 0x68656C6C6F COLLATE latin1_swedish_ci; use test; SET TIMESTAMP=10000; INSERT INTO t1 VALUES(@`a b`); -SET @`var1`:=_latin1'\';aaa' COLLATE latin1_swedish_ci; +SET @`var1`:=_latin1 0x273B616161 COLLATE latin1_swedish_ci; SET TIMESTAMP=10000; insert into t1 values (@var1); SET TIMESTAMP=10000; create table t2 (c char(30)) charset=ucs2; -SET @`v`:=_ucs2'\0a\0b\0c' COLLATE ucs2_general_ci; +SET @`v`:=_ucs2 0x006100620063 COLLATE ucs2_general_ci; SET TIMESTAMP=10000; insert into t2 values (@v); drop table t1, t2; diff --git a/sql/log_event.cc b/sql/log_event.cc index a76725a95e0..315b0f670dd 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -202,6 +202,29 @@ static inline int read_str(char * &buf, char *buf_end, char * &str, return 0; } +/* + Transforms a string into "" or its expression in 0x... form. +*/ +static char *str_to_hex(char *to, char *from, uint len) +{ + char *p= to; + if (len) + { + p= strmov(p, "0x"); + for (uint i= 0; i < len; i++, p+= 2) + { + /* val[i] is char. Casting to uchar helps greatly if val[i] < 0 */ + uint tmp= (uint) (uchar) from[i]; + p[0]= _dig_vec_upper[tmp >> 4]; + p[1]= _dig_vec_upper[tmp & 15]; + } + *p= 0; + } + else + p= strmov(p, "\"\""); + return p; // pointer to end 0 of 'to' +} + /************************************************************************** Log_event methods @@ -2210,9 +2233,9 @@ void User_var_log_event::pack_info(Protocol* protocol) } else { - char *p= strxmov(buf + val_offset, "_", cs->csname, "'", NullS); - p+= escape_string_for_mysql(&my_charset_bin, p, val, val_len); - p= strxmov(p, "' COLLATE ", cs->name, NullS); + char *p= strxmov(buf + val_offset, "_", cs->csname, " ", NullS); + p= str_to_hex(p, val, val_len); + p= strxmov(p, " COLLATE ", cs->name, NullS); event_len= p-buf; } break; @@ -2341,11 +2364,24 @@ void User_var_log_event::print(FILE* file, bool short_form, char* last_db) break; case STRING_RESULT: { - char *p; - if (!(p= (char *)my_alloca(2*val_len+1))) + /* + Let's express the string in hex. That's the most robust way. If we + print it in character form instead, we need to escape it with + character_set_client which we don't know (we will know it in 5.0, but + in 4.1 we don't know it easily when we are printing + User_var_log_event). Explanation why we would need to bother with + character_set_client (quoting Bar): + > Note, the parser doesn't switch to another unescaping mode after + > it has met a character set introducer. + > For example, if an SJIS client says something like: + > SET @a= _ucs2 \0a\0b' + > the string constant is still unescaped according to SJIS, not + > according to UCS2. + */ + char *p, *q; + if (!(p= (char *)my_alloca(2*val_len+1+2))) // 2 hex digits per byte break; // no error, as we are 'void' - escape_string_for_mysql(&my_charset_bin, p, val, val_len); -#if MYSQL_VERSION_ID < 50000 + str_to_hex(p, val, val_len); /* For proper behaviour when mysqlbinlog|mysql, we need to explicitely specify the variable's collation. It will however cause problems when @@ -2360,14 +2396,7 @@ void User_var_log_event::print(FILE* file, bool short_form, char* last_db) */ fprintf(file, ":=???;\n"); else - fprintf(file, ":=_%s'%s' COLLATE %s;\n", cs->csname, p, cs->name); -#else - /* - In 5.0 we will have some SET CHARACTER_SET_ect automatically printed - for all events where it's needed. - */ - fprintf(file, ":='%s';\n", p); -#endif + fprintf(file, ":=_%s %s COLLATE %s;\n", cs->csname, p, cs->name); my_afree(p); } break;