Merge pippilotta.erinye.com:/shared/home/df/mysql/build/mysql-5.0

into  pippilotta.erinye.com:/shared/home/df/mysql/build/mysql-5.0-build
This commit is contained in:
unknown 2007-08-05 16:37:34 +02:00
commit fe87e88f52
20 changed files with 431 additions and 56 deletions

View file

@ -78,8 +78,14 @@ extern MY_UNICASE_INFO *my_unicase_turkish[256];
#define MY_CS_READY 256 /* if a charset is initialized */
#define MY_CS_AVAILABLE 512 /* If either compiled-in or loaded*/
#define MY_CS_CSSORT 1024 /* if case sensitive sort order */
#define MY_CS_PUREASCII 2048 /* if a charset is pure ascii */
#define MY_CHARSET_UNDEFINED 0
/* Character repertoire flags */
#define MY_REPERTOIRE_ASCII 1 /* Pure ASCII U+0000..U+007F */
#define MY_REPERTOIRE_EXTENDED 2 /* Extended characters: U+0080..U+FFFF */
#define MY_REPERTOIRE_UNICODE30 3 /* ASCII | EXTENDED: U+0000..U+FFFF */
typedef struct my_uni_idx_st
{
@ -436,6 +442,11 @@ my_bool my_propagate_simple(CHARSET_INFO *cs, const uchar *str, uint len);
my_bool my_propagate_complex(CHARSET_INFO *cs, const uchar *str, uint len);
uint my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong len);
my_bool my_charset_is_ascii_based(CHARSET_INFO *cs);
my_bool my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs);
#define _MY_U 01 /* Upper case */
#define _MY_L 02 /* Lower case */
#define _MY_NMR 04 /* Numeral (digit) */

View file

@ -896,4 +896,30 @@ select hex(convert(s1 using latin1)) from t1;
hex(convert(s1 using latin1))
7F
drop table t1;
create table t1 (a varchar(15) character set ascii not null, b int);
insert into t1 values ('a',1);
select concat(a,if(b<10,_ucs2 0x0061,_ucs2 0x0062)) from t1;
concat(a,if(b<10,_ucs2 0x0061,_ucs2 0x0062))
aa
select concat(a,if(b>10,_ucs2 0x0061,_ucs2 0x0062)) from t1;
concat(a,if(b>10,_ucs2 0x0061,_ucs2 0x0062))
ab
select * from t1 where a=if(b<10,_ucs2 0x0061,_ucs2 0x0062);
a b
a 1
select * from t1 where a=if(b>10,_ucs2 0x0061,_ucs2 0x0062);
a b
select concat(a,if(b<10,_ucs2 0x00C0,_ucs2 0x0062)) from t1;
ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (ucs2_general_ci,COERCIBLE) for operation 'concat'
select concat(a,if(b>10,_ucs2 0x00C0,_ucs2 0x0062)) from t1;
ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (ucs2_general_ci,COERCIBLE) for operation 'concat'
select concat(a,if(b<10,_ucs2 0x0062,_ucs2 0x00C0)) from t1;
ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (ucs2_general_ci,COERCIBLE) for operation 'concat'
select concat(a,if(b>10,_ucs2 0x0062,_ucs2 0x00C0)) from t1;
ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (ucs2_general_ci,COERCIBLE) for operation 'concat'
select * from t1 where a=if(b<10,_ucs2 0x00C0,_ucs2 0x0062);
ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (ucs2_general_ci,COERCIBLE) for operation '='
select * from t1 where a=if(b<10,_ucs2 0x0062,_ucs2 0x00C0);
ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (ucs2_general_ci,COERCIBLE) for operation '='
drop table t1;
End of 5.0 tests

View file

@ -1639,6 +1639,42 @@ coercibility(col1) collation(col1)
0 utf8_swedish_ci
drop view v1, v2;
drop table t1;
set names utf8;
create table t1 (a varchar(10) character set latin1, b int);
insert into t1 values ('a',1);
select concat(a, if(b>10, N'x', N'y')) from t1;
concat(a, if(b>10, N'x', N'y'))
ay
select concat(a, if(b>10, N'æ', N'ß')) from t1;
ERROR HY000: Illegal mix of collations (latin1_swedish_ci,IMPLICIT) and (utf8_general_ci,COERCIBLE) for operation 'concat'
drop table t1;
set names utf8;
create table t1 (a varchar(10) character set latin1, b int);
insert into t1 values ('a',1);
select concat(a, if(b>10, _utf8'x', _utf8'y')) from t1;
concat(a, if(b>10, _utf8'x', _utf8'y'))
ay
select concat(a, if(b>10, _utf8'æ', _utf8'ß')) from t1;
ERROR HY000: Illegal mix of collations (latin1_swedish_ci,IMPLICIT) and (utf8_general_ci,COERCIBLE) for operation 'concat'
drop table t1;
set names utf8;
create table t1 (a varchar(10) character set latin1, b int);
insert into t1 values ('a',1);
select concat(a, if(b>10, _utf8 0x78, _utf8 0x79)) from t1;
concat(a, if(b>10, _utf8 0x78, _utf8 0x79))
ay
select concat(a, if(b>10, _utf8 0xC3A6, _utf8 0xC3AF)) from t1;
ERROR HY000: Illegal mix of collations (latin1_swedish_ci,IMPLICIT) and (utf8_general_ci,COERCIBLE) for operation 'concat'
drop table t1;
set names utf8;
create table t1 (a varchar(10) character set latin1, b int);
insert into t1 values ('a',1);
select concat(a, if(b>10, 'x' 'x', 'y' 'y')) from t1;
concat(a, if(b>10, 'x' 'x', 'y' 'y'))
ayy
select concat(a, if(b>10, 'x' 'æ', 'y' 'ß')) from t1;
ERROR HY000: Illegal mix of collations (latin1_swedish_ci,IMPLICIT) and (utf8_general_ci,COERCIBLE) for operation 'concat'
drop table t1;
CREATE TABLE t1 (
colA int(11) NOT NULL,
colB varchar(255) character set utf8 NOT NULL,

View file

@ -1246,3 +1246,19 @@ SELECT TIME_FORMAT(SEC_TO_TIME(a),"%H:%i:%s") FROM (SELECT 3020399 AS a UNION SE
TIME_FORMAT(SEC_TO_TIME(a),"%H:%i:%s")
838:59:58
838:59:59
set names latin1;
create table t1 (a varchar(15) character set ascii not null);
insert into t1 values ('070514-000000');
select concat(a,ifnull(min(date_format(now(), '%Y-%m-%d')),' ull')) from t1;
concat(a,ifnull(min(date_format(now(), '%Y-%m-%d')),' ull'))
#
set names swe7;
select concat(a,ifnull(min(date_format(now(), '%Y-%m-%d')),' ull')) from t1;
ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (swe7_swedish_ci,COERCIBLE) for operation 'concat'
set names latin1;
set lc_time_names=fr_FR;
select concat(a,ifnull(min(date_format(now(), '%Y-%m-%d')),' ull')) from t1;
ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (latin1_swedish_ci,COERCIBLE) for operation 'concat'
set lc_time_names=en_US;
drop table t1;
End of 5.0 tests

View file

@ -622,4 +622,33 @@ select hex(s2) from t1;
select hex(convert(s1 using latin1)) from t1;
drop table t1;
#
# Conversion from UCS2 to ASCII is possible
# if the UCS2 string consists of only ASCII characters
#
create table t1 (a varchar(15) character set ascii not null, b int);
insert into t1 values ('a',1);
select concat(a,if(b<10,_ucs2 0x0061,_ucs2 0x0062)) from t1;
select concat(a,if(b>10,_ucs2 0x0061,_ucs2 0x0062)) from t1;
select * from t1 where a=if(b<10,_ucs2 0x0061,_ucs2 0x0062);
select * from t1 where a=if(b>10,_ucs2 0x0061,_ucs2 0x0062);
#
# Conversion from UCS2 to ASCII is not possible if
# the UCS2 string has non-ASCII characters
#
--error 1267
select concat(a,if(b<10,_ucs2 0x00C0,_ucs2 0x0062)) from t1;
--error 1267
select concat(a,if(b>10,_ucs2 0x00C0,_ucs2 0x0062)) from t1;
--error 1267
select concat(a,if(b<10,_ucs2 0x0062,_ucs2 0x00C0)) from t1;
--error 1267
select concat(a,if(b>10,_ucs2 0x0062,_ucs2 0x00C0)) from t1;
--error 1267
select * from t1 where a=if(b<10,_ucs2 0x00C0,_ucs2 0x0062);
--error 1267
select * from t1 where a=if(b<10,_ucs2 0x0062,_ucs2 0x00C0);
drop table t1;
--echo End of 5.0 tests

View file

@ -1314,6 +1314,46 @@ select coercibility(col1), collation(col1) from v2;
drop view v1, v2;
drop table t1;
#
# Check conversion of NCHAR strings to subset (e.g. latin1).
# Conversion is possible if string repertoire is ASCII.
# Conversion is not possible if the string have extended characters
#
set names utf8;
create table t1 (a varchar(10) character set latin1, b int);
insert into t1 values ('a',1);
select concat(a, if(b>10, N'x', N'y')) from t1;
--error 1267
select concat(a, if(b>10, N'æ', N'ß')) from t1;
drop table t1;
# Conversion tests for character set introducers
set names utf8;
create table t1 (a varchar(10) character set latin1, b int);
insert into t1 values ('a',1);
select concat(a, if(b>10, _utf8'x', _utf8'y')) from t1;
--error 1267
select concat(a, if(b>10, _utf8'æ', _utf8'ß')) from t1;
drop table t1;
# Conversion tests for introducer + HEX string
set names utf8;
create table t1 (a varchar(10) character set latin1, b int);
insert into t1 values ('a',1);
select concat(a, if(b>10, _utf8 0x78, _utf8 0x79)) from t1;
--error 1267
select concat(a, if(b>10, _utf8 0xC3A6, _utf8 0xC3AF)) from t1;
drop table t1;
# Conversion tests for "text_literal TEXT_STRING_literal" syntax structure
set names utf8;
create table t1 (a varchar(10) character set latin1, b int);
insert into t1 values ('a',1);
select concat(a, if(b>10, 'x' 'x', 'y' 'y')) from t1;
--error 1267
select concat(a, if(b>10, 'x' 'æ', 'y' 'ß')) from t1;
drop table t1;
#
# Bug#19960: Inconsistent results when joining

View file

@ -752,3 +752,29 @@ DROP TABLE t1;
# Check if using GROUP BY with TIME_FORMAT() produces correct results
SELECT TIME_FORMAT(SEC_TO_TIME(a),"%H:%i:%s") FROM (SELECT 3020399 AS a UNION SELECT 3020398 ) x GROUP BY 1;
#
# Bug#28875 Conversion between ASCII and LATIN1 charsets does not function
#
set names latin1;
create table t1 (a varchar(15) character set ascii not null);
insert into t1 values ('070514-000000');
# Conversion of date_format() result to ASCII
# is safe with the default locale en_US
--replace_column 1 #
select concat(a,ifnull(min(date_format(now(), '%Y-%m-%d')),' ull')) from t1;
# Error for swe7: it is not ASCII compatible
set names swe7;
--error 1267
select concat(a,ifnull(min(date_format(now(), '%Y-%m-%d')),' ull')) from t1;
set names latin1;
# Conversion of date_format() result to ASCII
# is not safe with the non-default locale fr_FR
# because month and day names can have accented characters
set lc_time_names=fr_FR;
--error 1267
select concat(a,ifnull(min(date_format(now(), '%Y-%m-%d')),' ull')) from t1;
set lc_time_names=en_US;
drop table t1;
--echo End of 5.0 tests

View file

@ -277,6 +277,9 @@ static int add_collation(CHARSET_INFO *cs)
if (sort_order && sort_order['A'] < sort_order['a'] &&
sort_order['a'] < sort_order['B'])
all_charsets[cs->number]->state|= MY_CS_CSSORT;
if (my_charset_is_8bit_pure_ascii(all_charsets[cs->number]))
all_charsets[cs->number]->state|= MY_CS_PUREASCII;
}
}
else

View file

@ -1327,6 +1327,25 @@ void Item::split_sum_func2(THD *thd, Item **ref_pointer_array,
}
static bool
left_is_superset(DTCollation *left, DTCollation *right)
{
/* Allow convert to Unicode */
if (left->collation->state & MY_CS_UNICODE &&
(left->derivation < right->derivation ||
(left->derivation == right->derivation &&
!(right->collation->state & MY_CS_UNICODE))))
return TRUE;
/* Allow convert from ASCII */
if (right->repertoire == MY_REPERTOIRE_ASCII &&
(left->derivation < right->derivation ||
(left->derivation == right->derivation &&
!(left->repertoire == MY_REPERTOIRE_ASCII))))
return TRUE;
/* Disallow conversion otherwise */
return FALSE;
}
/*
Aggregate two collations together taking
into account their coercibility (aka derivation):
@ -1391,18 +1410,12 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags)
; // Do nothing
}
else if ((flags & MY_COLL_ALLOW_SUPERSET_CONV) &&
collation->state & MY_CS_UNICODE &&
(derivation < dt.derivation ||
(derivation == dt.derivation &&
!(dt.collation->state & MY_CS_UNICODE))))
left_is_superset(this, &dt))
{
// Do nothing
}
else if ((flags & MY_COLL_ALLOW_SUPERSET_CONV) &&
dt.collation->state & MY_CS_UNICODE &&
(dt.derivation < derivation ||
(dt.derivation == derivation &&
!(collation->state & MY_CS_UNICODE))))
left_is_superset(&dt, this))
{
set(dt);
}
@ -1421,7 +1434,7 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags)
else
{
// Cannot apply conversion
set(0, DERIVATION_NONE);
set(0, DERIVATION_NONE, 0);
return 1;
}
}
@ -1443,8 +1456,8 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags)
{
if (derivation == DERIVATION_EXPLICIT)
{
set(0, DERIVATION_NONE);
return 1;
set(0, DERIVATION_NONE, 0);
return 1;
}
if (collation->state & MY_CS_BINSORT)
return 0;
@ -1458,6 +1471,7 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags)
set(bin, DERIVATION_NONE);
}
}
repertoire|= dt.repertoire;
return 0;
}
@ -1597,12 +1611,16 @@ bool agg_item_charsets(DTCollation &coll, const char *fname,
{
Item* conv;
uint32 dummy_offset;
if (!String::needs_conversion(0, coll.collation,
(*arg)->collation.collation,
if (!String::needs_conversion(0, (*arg)->collation.collation,
coll.collation,
&dummy_offset))
continue;
if (!(conv= (*arg)->safe_charset_converter(coll.collation)))
if (!(conv= (*arg)->safe_charset_converter(coll.collation)) &&
((*arg)->collation.repertoire == MY_REPERTOIRE_ASCII))
conv= new Item_func_conv_charset(*arg, coll.collation, 1);
if (!conv)
{
if (nargs >=2 && nargs <= 3)
{

View file

@ -49,29 +49,50 @@ class DTCollation {
public:
CHARSET_INFO *collation;
enum Derivation derivation;
uint repertoire;
void set_repertoire_from_charset(CHARSET_INFO *cs)
{
repertoire= cs->state & MY_CS_PUREASCII ?
MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30;
}
DTCollation()
{
collation= &my_charset_bin;
derivation= DERIVATION_NONE;
repertoire= MY_REPERTOIRE_UNICODE30;
}
DTCollation(CHARSET_INFO *collation_arg, Derivation derivation_arg)
{
collation= collation_arg;
derivation= derivation_arg;
set_repertoire_from_charset(collation_arg);
}
void set(DTCollation &dt)
{
collation= dt.collation;
derivation= dt.derivation;
repertoire= dt.repertoire;
}
void set(CHARSET_INFO *collation_arg, Derivation derivation_arg)
{
collation= collation_arg;
derivation= derivation_arg;
set_repertoire_from_charset(collation_arg);
}
void set(CHARSET_INFO *collation_arg,
Derivation derivation_arg,
uint repertoire_arg)
{
collation= collation_arg;
derivation= derivation_arg;
repertoire= repertoire_arg;
}
void set(CHARSET_INFO *collation_arg)
{ collation= collation_arg; }
{
collation= collation_arg;
set_repertoire_from_charset(collation_arg);
}
void set(Derivation derivation_arg)
{ derivation= derivation_arg; }
bool aggregate(DTCollation &dt, uint flags= 0);
@ -1672,10 +1693,11 @@ class Item_string :public Item
{
public:
Item_string(const char *str,uint length,
CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE)
CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE,
uint repertoire= MY_REPERTOIRE_UNICODE30)
{
collation.set(cs, dv);
str_value.set_or_copy_aligned(str,length,cs);
str_value.set_or_copy_aligned(str, length, cs);
collation.set(cs, dv, repertoire);
/*
We have to have a different max_length than 'length' here to
ensure that we get the right length if we do use the item
@ -1699,10 +1721,11 @@ public:
fixed= 1;
}
Item_string(const char *name_par, const char *str, uint length,
CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE)
CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE,
uint repertoire= MY_REPERTOIRE_UNICODE30)
{
collation.set(cs, dv);
str_value.set_or_copy_aligned(str,length,cs);
str_value.set_or_copy_aligned(str, length, cs);
collation.set(cs, dv, repertoire);
max_length= str_value.numchars()*cs->mbmaxlen;
set_name(name_par, 0, cs);
decimals=NOT_FIXED_DEC;
@ -1718,6 +1741,12 @@ public:
str_value.copy(str_arg, length_arg, collation.collation);
max_length= str_value.numchars() * collation.collation->mbmaxlen;
}
void set_repertoire_from_value()
{
collation.repertoire= my_string_repertoire(str_value.charset(),
str_value.ptr(),
str_value.length());
}
enum Type type() const { return STRING_ITEM; }
double val_real();
longlong val_int();

View file

@ -3767,7 +3767,7 @@ static user_var_entry *get_variable(HASH *hash, LEX_STRING &name,
entry->value=0;
entry->length=0;
entry->update_query_id=0;
entry->collation.set(NULL, DERIVATION_IMPLICIT);
entry->collation.set(NULL, DERIVATION_IMPLICIT, 0);
entry->unsigned_flag= 0;
/*
If we are here, we were called from a SET or a query which sets a

View file

@ -2673,7 +2673,8 @@ void Item_func_set_collation::fix_length_and_dec()
colname, args[0]->collation.collation->csname);
return;
}
collation.set(set_collation, DERIVATION_EXPLICIT);
collation.set(set_collation, DERIVATION_EXPLICIT,
args[0]->collation.repertoire);
max_length= args[0]->max_length;
}

View file

@ -1718,7 +1718,11 @@ void Item_func_date_format::fix_length_and_dec()
Item *arg1= args[1]->this_item();
decimals=0;
collation.set(thd->variables.collation_connection);
CHARSET_INFO *cs= thd->variables.collation_connection;
uint32 repertoire= arg1->collation.repertoire;
if (!thd->variables.lc_time_names->is_ascii)
repertoire|= MY_REPERTOIRE_EXTENDED;
collation.set(cs, arg1->collation.derivation, repertoire);
if (arg1->type() == STRING_ITEM)
{ // Optimize the normal case
fixed_length=1;

View file

@ -311,10 +311,12 @@ static char *get_text(Lex_input_stream *lip)
uint found_escape=0;
CHARSET_INFO *cs= lip->m_thd->charset();
lip->tok_bitmap= 0;
sep= yyGetLast(); // String should end with this
while (lip->ptr != lip->end_of_query)
{
c = yyGet();
c= yyGet();
lip->tok_bitmap|= c;
#ifdef USE_MB
{
int l;
@ -605,6 +607,7 @@ int MYSQLlex(void *arg, void *yythd)
break;
}
yylval->lex_str.length= lip->yytoklen;
lex->text_string_is_7bit= (lip->tok_bitmap & 0x80) ? 0 : 1;
return(NCHAR_STRING);
case MY_LEX_IDENT_OR_HEX:
@ -926,6 +929,7 @@ int MYSQLlex(void *arg, void *yythd)
break;
}
yylval->lex_str.length=lip->yytoklen;
lex->text_string_is_7bit= (lip->tok_bitmap & 0x80) ? 0 : 1;
return(TEXT_STRING);
case MY_LEX_COMMENT: // Comment

View file

@ -957,6 +957,9 @@ public:
/** Position of ';' in the stream, to delimit multiple queries. */
const char* found_semicolon;
/** Token character bitmaps, to detect 7bit strings. */
uchar tok_bitmap;
/** SQL_MODE = IGNORE_SPACE. */
bool ignore_space;
@ -994,6 +997,7 @@ typedef struct st_lex : public Query_tables_list
gptr yacc_yyss,yacc_yyvs;
THD *thd;
CHARSET_INFO *charset, *underscore_charset;
bool text_string_is_7bit;
/* store original leaf_tables for INSERT SELECT and PS/SP */
TABLE_LIST *leaf_tables_insert;
/* Position (first character index) of SELECT of CREATE VIEW statement */

View file

@ -263,6 +263,8 @@ bool String::needs_conversion(uint32 arg_length,
(to_cs == &my_charset_bin) ||
(to_cs == from_cs) ||
my_charset_same(from_cs, to_cs) ||
(my_charset_is_ascii_based(to_cs) &&
my_charset_is_8bit_pure_ascii(from_cs)) ||
((from_cs == &my_charset_bin) &&
(!(*offset=(arg_length % to_cs->mbminlen)))))
return FALSE;

View file

@ -7523,18 +7523,54 @@ opt_load_data_set_spec:
/* Common definitions */
text_literal:
TEXT_STRING_literal
{
THD *thd= YYTHD;
$$ = new Item_string($1.str,$1.length,thd->variables.collation_connection);
}
| NCHAR_STRING
{ $$= new Item_string($1.str,$1.length,national_charset_info); }
| UNDERSCORE_CHARSET TEXT_STRING
{ $$ = new Item_string($2.str,$2.length,Lex->underscore_charset); }
| text_literal TEXT_STRING_literal
{ ((Item_string*) $1)->append($2.str,$2.length); }
;
TEXT_STRING
{
LEX_STRING tmp;
THD *thd= YYTHD;
CHARSET_INFO *cs_con= thd->variables.collation_connection;
CHARSET_INFO *cs_cli= thd->variables.character_set_client;
uint repertoire= thd->lex->text_string_is_7bit &&
my_charset_is_ascii_based(cs_cli) ?
MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30;
if (thd->charset_is_collation_connection ||
(repertoire == MY_REPERTOIRE_ASCII &&
my_charset_is_ascii_based(cs_con)))
tmp= $1;
else
thd->convert_string(&tmp, cs_con, $1.str, $1.length, cs_cli);
$$= new Item_string(tmp.str, tmp.length, cs_con,
DERIVATION_COERCIBLE, repertoire);
}
| NCHAR_STRING
{
uint repertoire= Lex->text_string_is_7bit ?
MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30;
DBUG_ASSERT(my_charset_is_ascii_based(national_charset_info));
$$= new Item_string($1.str, $1.length, national_charset_info,
DERIVATION_COERCIBLE, repertoire);
}
| UNDERSCORE_CHARSET TEXT_STRING
{
$$= new Item_string($2.str, $2.length, Lex->underscore_charset);
((Item_string*) $$)->set_repertoire_from_value();
}
| text_literal TEXT_STRING_literal
{
Item_string* item= (Item_string*) $1;
item->append($2.str, $2.length);
if (!(item->collation.repertoire & MY_REPERTOIRE_EXTENDED))
{
/*
If the string has been pure ASCII so far,
check the new part.
*/
CHARSET_INFO *cs= YYTHD->variables.collation_connection;
item->collation.repertoire|= my_string_repertoire(cs,
$2.str,
$2.length);
}
}
;
text_string:
TEXT_STRING_literal
@ -7606,20 +7642,22 @@ literal:
| TRUE_SYM { $$= new Item_int((char*) "TRUE",1,1); }
| HEX_NUM { $$ = new Item_hex_string($1.str, $1.length);}
| BIN_NUM { $$= new Item_bin_string($1.str, $1.length); }
| UNDERSCORE_CHARSET HEX_NUM
{
Item *tmp= new Item_hex_string($2.str, $2.length);
/*
it is OK only emulate fix_fieds, because we need only
| UNDERSCORE_CHARSET HEX_NUM
{
Item *tmp= new Item_hex_string($2.str, $2.length);
/*
it is OK only emulate fix_fieds, because we need only
value of constant
*/
String *str= tmp ?
tmp->quick_fix_field(), tmp->val_str((String*) 0) :
(String*) 0;
$$= new Item_string(str ? str->ptr() : "",
str ? str->length() : 0,
Lex->underscore_charset);
}
*/
String *str= tmp ?
tmp->quick_fix_field(), tmp->val_str((String*) 0) :
(String*) 0;
$$= new Item_string(str ? str->ptr() : "",
str ? str->length() : 0,
Lex->underscore_charset);
if ($$)
((Item_string *) $$)->set_repertoire_from_value();
}
| UNDERSCORE_CHARSET BIN_NUM
{
Item *tmp= new Item_bin_string($2.str, $2.length);

View file

@ -179,14 +179,16 @@ is_case_sensitive(CHARSET_INFO *cs)
cs->sort_order['a'] < cs->sort_order['B']) ? 1 : 0;
}
void dispcset(FILE *f,CHARSET_INFO *cs)
{
fprintf(f,"{\n");
fprintf(f," %d,%d,%d,\n",cs->number,0,0);
fprintf(f," MY_CS_COMPILED%s%s%s,\n",
cs->state & MY_CS_BINSORT ? "|MY_CS_BINSORT" : "",
cs->state & MY_CS_PRIMARY ? "|MY_CS_PRIMARY" : "",
is_case_sensitive(cs) ? "|MY_CS_CSSORT" : "");
fprintf(f," MY_CS_COMPILED%s%s%s%s,\n",
cs->state & MY_CS_BINSORT ? "|MY_CS_BINSORT" : "",
cs->state & MY_CS_PRIMARY ? "|MY_CS_PRIMARY" : "",
is_case_sensitive(cs) ? "|MY_CS_CSSORT" : "",
my_charset_is_8bit_pure_ascii(cs) ? "|MY_CS_PUREASCII" : "");
if (cs->name)
{

View file

@ -6722,7 +6722,7 @@ CHARSET_INFO compiled_charsets[] = {
#ifdef HAVE_CHARSET_ascii
{
11,0,0,
MY_CS_COMPILED|MY_CS_PRIMARY,
MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_PUREASCII,
"ascii", /* cset name */
"ascii_general_ci", /* coll name */
"", /* comment */
@ -7811,7 +7811,7 @@ CHARSET_INFO compiled_charsets[] = {
#ifdef HAVE_CHARSET_ascii
{
65,0,0,
MY_CS_COMPILED|MY_CS_BINSORT,
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_PUREASCII,
"ascii", /* cset name */
"ascii_bin", /* coll name */
"", /* comment */

View file

@ -306,3 +306,89 @@ my_bool my_parse_charset_xml(const char *buf, uint len,
my_xml_parser_free(&p);
return rc;
}
/*
Check repertoire: detect pure ascii strings
*/
uint
my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong length)
{
const char *strend= str + length;
if (cs->mbminlen == 1)
{
for ( ; str < strend; str++)
{
if (((uchar) *str) > 0x7F)
return MY_REPERTOIRE_UNICODE30;
}
}
else
{
my_wc_t wc;
int chlen;
for (; (chlen= cs->cset->mb_wc(cs, &wc, str, strend)) > 0; str+= chlen)
{
if (wc > 0x7F)
return MY_REPERTOIRE_UNICODE30;
}
}
return MY_REPERTOIRE_ASCII;
}
/*
Detect whether a character set is ASCII compatible.
Returns TRUE for:
- all 8bit character sets whose Unicode mapping of 0x7B is '{'
(ignores swe7 which maps 0x7B to "LATIN LETTER A WITH DIAERESIS")
- all multi-byte character sets having mbminlen == 1
(ignores ucs2 whose mbminlen is 2)
TODO:
When merging to 5.2, this function should be changed
to check a new flag MY_CS_NONASCII,
return (cs->flag & MY_CS_NONASCII) ? 0 : 1;
This flag was previously added into 5.2 under terms
of WL#3759 "Optimize identifier conversion in client-server protocol"
especially to mark character sets not compatible with ASCII.
We won't backport this flag to 5.0 or 5.1.
This function is Ok for 5.0 and 5.1, because we're not going
to introduce new tricky character sets between 5.0 and 5.2.
*/
my_bool
my_charset_is_ascii_based(CHARSET_INFO *cs)
{
return
(cs->mbmaxlen == 1 && cs->tab_to_uni && cs->tab_to_uni['{'] == '{') ||
(cs->mbminlen == 1 && cs->mbmaxlen > 1);
}
/*
Detect if a character set is 8bit,
and it is pure ascii, i.e. doesn't have
characters outside U+0000..U+007F
This functions is shared between "conf_to_src"
and dynamic charsets loader in "mysqld".
*/
my_bool
my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs)
{
size_t code;
if (!cs->tab_to_uni)
return 0;
for (code= 0; code < 256; code++)
{
if (cs->tab_to_uni[code] > 0x7F)
return 0;
}
return 1;
}