mirror of
https://github.com/MariaDB/server.git
synced 2025-01-29 02:05:57 +01:00
Merge pippilotta.erinye.com:/shared/home/df/mysql/build/mysql-5.0
into pippilotta.erinye.com:/shared/home/df/mysql/build/mysql-5.0-build
This commit is contained in:
commit
fe87e88f52
20 changed files with 431 additions and 56 deletions
|
@ -78,8 +78,14 @@ extern MY_UNICASE_INFO *my_unicase_turkish[256];
|
|||
#define MY_CS_READY 256 /* if a charset is initialized */
|
||||
#define MY_CS_AVAILABLE 512 /* If either compiled-in or loaded*/
|
||||
#define MY_CS_CSSORT 1024 /* if case sensitive sort order */
|
||||
#define MY_CS_PUREASCII 2048 /* if a charset is pure ascii */
|
||||
#define MY_CHARSET_UNDEFINED 0
|
||||
|
||||
/* Character repertoire flags */
|
||||
#define MY_REPERTOIRE_ASCII 1 /* Pure ASCII U+0000..U+007F */
|
||||
#define MY_REPERTOIRE_EXTENDED 2 /* Extended characters: U+0080..U+FFFF */
|
||||
#define MY_REPERTOIRE_UNICODE30 3 /* ASCII | EXTENDED: U+0000..U+FFFF */
|
||||
|
||||
|
||||
typedef struct my_uni_idx_st
|
||||
{
|
||||
|
@ -436,6 +442,11 @@ my_bool my_propagate_simple(CHARSET_INFO *cs, const uchar *str, uint len);
|
|||
my_bool my_propagate_complex(CHARSET_INFO *cs, const uchar *str, uint len);
|
||||
|
||||
|
||||
uint my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong len);
|
||||
my_bool my_charset_is_ascii_based(CHARSET_INFO *cs);
|
||||
my_bool my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs);
|
||||
|
||||
|
||||
#define _MY_U 01 /* Upper case */
|
||||
#define _MY_L 02 /* Lower case */
|
||||
#define _MY_NMR 04 /* Numeral (digit) */
|
||||
|
|
|
@ -896,4 +896,30 @@ select hex(convert(s1 using latin1)) from t1;
|
|||
hex(convert(s1 using latin1))
|
||||
7F
|
||||
drop table t1;
|
||||
create table t1 (a varchar(15) character set ascii not null, b int);
|
||||
insert into t1 values ('a',1);
|
||||
select concat(a,if(b<10,_ucs2 0x0061,_ucs2 0x0062)) from t1;
|
||||
concat(a,if(b<10,_ucs2 0x0061,_ucs2 0x0062))
|
||||
aa
|
||||
select concat(a,if(b>10,_ucs2 0x0061,_ucs2 0x0062)) from t1;
|
||||
concat(a,if(b>10,_ucs2 0x0061,_ucs2 0x0062))
|
||||
ab
|
||||
select * from t1 where a=if(b<10,_ucs2 0x0061,_ucs2 0x0062);
|
||||
a b
|
||||
a 1
|
||||
select * from t1 where a=if(b>10,_ucs2 0x0061,_ucs2 0x0062);
|
||||
a b
|
||||
select concat(a,if(b<10,_ucs2 0x00C0,_ucs2 0x0062)) from t1;
|
||||
ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (ucs2_general_ci,COERCIBLE) for operation 'concat'
|
||||
select concat(a,if(b>10,_ucs2 0x00C0,_ucs2 0x0062)) from t1;
|
||||
ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (ucs2_general_ci,COERCIBLE) for operation 'concat'
|
||||
select concat(a,if(b<10,_ucs2 0x0062,_ucs2 0x00C0)) from t1;
|
||||
ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (ucs2_general_ci,COERCIBLE) for operation 'concat'
|
||||
select concat(a,if(b>10,_ucs2 0x0062,_ucs2 0x00C0)) from t1;
|
||||
ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (ucs2_general_ci,COERCIBLE) for operation 'concat'
|
||||
select * from t1 where a=if(b<10,_ucs2 0x00C0,_ucs2 0x0062);
|
||||
ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (ucs2_general_ci,COERCIBLE) for operation '='
|
||||
select * from t1 where a=if(b<10,_ucs2 0x0062,_ucs2 0x00C0);
|
||||
ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (ucs2_general_ci,COERCIBLE) for operation '='
|
||||
drop table t1;
|
||||
End of 5.0 tests
|
||||
|
|
|
@ -1639,6 +1639,42 @@ coercibility(col1) collation(col1)
|
|||
0 utf8_swedish_ci
|
||||
drop view v1, v2;
|
||||
drop table t1;
|
||||
set names utf8;
|
||||
create table t1 (a varchar(10) character set latin1, b int);
|
||||
insert into t1 values ('a',1);
|
||||
select concat(a, if(b>10, N'x', N'y')) from t1;
|
||||
concat(a, if(b>10, N'x', N'y'))
|
||||
ay
|
||||
select concat(a, if(b>10, N'æ', N'ß')) from t1;
|
||||
ERROR HY000: Illegal mix of collations (latin1_swedish_ci,IMPLICIT) and (utf8_general_ci,COERCIBLE) for operation 'concat'
|
||||
drop table t1;
|
||||
set names utf8;
|
||||
create table t1 (a varchar(10) character set latin1, b int);
|
||||
insert into t1 values ('a',1);
|
||||
select concat(a, if(b>10, _utf8'x', _utf8'y')) from t1;
|
||||
concat(a, if(b>10, _utf8'x', _utf8'y'))
|
||||
ay
|
||||
select concat(a, if(b>10, _utf8'æ', _utf8'ß')) from t1;
|
||||
ERROR HY000: Illegal mix of collations (latin1_swedish_ci,IMPLICIT) and (utf8_general_ci,COERCIBLE) for operation 'concat'
|
||||
drop table t1;
|
||||
set names utf8;
|
||||
create table t1 (a varchar(10) character set latin1, b int);
|
||||
insert into t1 values ('a',1);
|
||||
select concat(a, if(b>10, _utf8 0x78, _utf8 0x79)) from t1;
|
||||
concat(a, if(b>10, _utf8 0x78, _utf8 0x79))
|
||||
ay
|
||||
select concat(a, if(b>10, _utf8 0xC3A6, _utf8 0xC3AF)) from t1;
|
||||
ERROR HY000: Illegal mix of collations (latin1_swedish_ci,IMPLICIT) and (utf8_general_ci,COERCIBLE) for operation 'concat'
|
||||
drop table t1;
|
||||
set names utf8;
|
||||
create table t1 (a varchar(10) character set latin1, b int);
|
||||
insert into t1 values ('a',1);
|
||||
select concat(a, if(b>10, 'x' 'x', 'y' 'y')) from t1;
|
||||
concat(a, if(b>10, 'x' 'x', 'y' 'y'))
|
||||
ayy
|
||||
select concat(a, if(b>10, 'x' 'æ', 'y' 'ß')) from t1;
|
||||
ERROR HY000: Illegal mix of collations (latin1_swedish_ci,IMPLICIT) and (utf8_general_ci,COERCIBLE) for operation 'concat'
|
||||
drop table t1;
|
||||
CREATE TABLE t1 (
|
||||
colA int(11) NOT NULL,
|
||||
colB varchar(255) character set utf8 NOT NULL,
|
||||
|
|
|
@ -1246,3 +1246,19 @@ SELECT TIME_FORMAT(SEC_TO_TIME(a),"%H:%i:%s") FROM (SELECT 3020399 AS a UNION SE
|
|||
TIME_FORMAT(SEC_TO_TIME(a),"%H:%i:%s")
|
||||
838:59:58
|
||||
838:59:59
|
||||
set names latin1;
|
||||
create table t1 (a varchar(15) character set ascii not null);
|
||||
insert into t1 values ('070514-000000');
|
||||
select concat(a,ifnull(min(date_format(now(), '%Y-%m-%d')),' ull')) from t1;
|
||||
concat(a,ifnull(min(date_format(now(), '%Y-%m-%d')),' ull'))
|
||||
#
|
||||
set names swe7;
|
||||
select concat(a,ifnull(min(date_format(now(), '%Y-%m-%d')),' ull')) from t1;
|
||||
ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (swe7_swedish_ci,COERCIBLE) for operation 'concat'
|
||||
set names latin1;
|
||||
set lc_time_names=fr_FR;
|
||||
select concat(a,ifnull(min(date_format(now(), '%Y-%m-%d')),' ull')) from t1;
|
||||
ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (latin1_swedish_ci,COERCIBLE) for operation 'concat'
|
||||
set lc_time_names=en_US;
|
||||
drop table t1;
|
||||
End of 5.0 tests
|
||||
|
|
|
@ -622,4 +622,33 @@ select hex(s2) from t1;
|
|||
select hex(convert(s1 using latin1)) from t1;
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# Conversion from UCS2 to ASCII is possible
|
||||
# if the UCS2 string consists of only ASCII characters
|
||||
#
|
||||
create table t1 (a varchar(15) character set ascii not null, b int);
|
||||
insert into t1 values ('a',1);
|
||||
select concat(a,if(b<10,_ucs2 0x0061,_ucs2 0x0062)) from t1;
|
||||
select concat(a,if(b>10,_ucs2 0x0061,_ucs2 0x0062)) from t1;
|
||||
select * from t1 where a=if(b<10,_ucs2 0x0061,_ucs2 0x0062);
|
||||
select * from t1 where a=if(b>10,_ucs2 0x0061,_ucs2 0x0062);
|
||||
|
||||
#
|
||||
# Conversion from UCS2 to ASCII is not possible if
|
||||
# the UCS2 string has non-ASCII characters
|
||||
#
|
||||
--error 1267
|
||||
select concat(a,if(b<10,_ucs2 0x00C0,_ucs2 0x0062)) from t1;
|
||||
--error 1267
|
||||
select concat(a,if(b>10,_ucs2 0x00C0,_ucs2 0x0062)) from t1;
|
||||
--error 1267
|
||||
select concat(a,if(b<10,_ucs2 0x0062,_ucs2 0x00C0)) from t1;
|
||||
--error 1267
|
||||
select concat(a,if(b>10,_ucs2 0x0062,_ucs2 0x00C0)) from t1;
|
||||
--error 1267
|
||||
select * from t1 where a=if(b<10,_ucs2 0x00C0,_ucs2 0x0062);
|
||||
--error 1267
|
||||
select * from t1 where a=if(b<10,_ucs2 0x0062,_ucs2 0x00C0);
|
||||
drop table t1;
|
||||
|
||||
--echo End of 5.0 tests
|
||||
|
|
|
@ -1314,6 +1314,46 @@ select coercibility(col1), collation(col1) from v2;
|
|||
drop view v1, v2;
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# Check conversion of NCHAR strings to subset (e.g. latin1).
|
||||
# Conversion is possible if string repertoire is ASCII.
|
||||
# Conversion is not possible if the string have extended characters
|
||||
#
|
||||
set names utf8;
|
||||
create table t1 (a varchar(10) character set latin1, b int);
|
||||
insert into t1 values ('a',1);
|
||||
select concat(a, if(b>10, N'x', N'y')) from t1;
|
||||
--error 1267
|
||||
select concat(a, if(b>10, N'æ', N'ß')) from t1;
|
||||
drop table t1;
|
||||
|
||||
# Conversion tests for character set introducers
|
||||
set names utf8;
|
||||
create table t1 (a varchar(10) character set latin1, b int);
|
||||
insert into t1 values ('a',1);
|
||||
select concat(a, if(b>10, _utf8'x', _utf8'y')) from t1;
|
||||
--error 1267
|
||||
select concat(a, if(b>10, _utf8'æ', _utf8'ß')) from t1;
|
||||
drop table t1;
|
||||
|
||||
# Conversion tests for introducer + HEX string
|
||||
set names utf8;
|
||||
create table t1 (a varchar(10) character set latin1, b int);
|
||||
insert into t1 values ('a',1);
|
||||
select concat(a, if(b>10, _utf8 0x78, _utf8 0x79)) from t1;
|
||||
--error 1267
|
||||
select concat(a, if(b>10, _utf8 0xC3A6, _utf8 0xC3AF)) from t1;
|
||||
drop table t1;
|
||||
|
||||
# Conversion tests for "text_literal TEXT_STRING_literal" syntax structure
|
||||
set names utf8;
|
||||
create table t1 (a varchar(10) character set latin1, b int);
|
||||
insert into t1 values ('a',1);
|
||||
select concat(a, if(b>10, 'x' 'x', 'y' 'y')) from t1;
|
||||
--error 1267
|
||||
select concat(a, if(b>10, 'x' 'æ', 'y' 'ß')) from t1;
|
||||
drop table t1;
|
||||
|
||||
|
||||
#
|
||||
# Bug#19960: Inconsistent results when joining
|
||||
|
|
|
@ -752,3 +752,29 @@ DROP TABLE t1;
|
|||
# Check if using GROUP BY with TIME_FORMAT() produces correct results
|
||||
|
||||
SELECT TIME_FORMAT(SEC_TO_TIME(a),"%H:%i:%s") FROM (SELECT 3020399 AS a UNION SELECT 3020398 ) x GROUP BY 1;
|
||||
|
||||
#
|
||||
# Bug#28875 Conversion between ASCII and LATIN1 charsets does not function
|
||||
#
|
||||
set names latin1;
|
||||
create table t1 (a varchar(15) character set ascii not null);
|
||||
insert into t1 values ('070514-000000');
|
||||
# Conversion of date_format() result to ASCII
|
||||
# is safe with the default locale en_US
|
||||
--replace_column 1 #
|
||||
select concat(a,ifnull(min(date_format(now(), '%Y-%m-%d')),' ull')) from t1;
|
||||
# Error for swe7: it is not ASCII compatible
|
||||
set names swe7;
|
||||
--error 1267
|
||||
select concat(a,ifnull(min(date_format(now(), '%Y-%m-%d')),' ull')) from t1;
|
||||
set names latin1;
|
||||
# Conversion of date_format() result to ASCII
|
||||
# is not safe with the non-default locale fr_FR
|
||||
# because month and day names can have accented characters
|
||||
set lc_time_names=fr_FR;
|
||||
--error 1267
|
||||
select concat(a,ifnull(min(date_format(now(), '%Y-%m-%d')),' ull')) from t1;
|
||||
set lc_time_names=en_US;
|
||||
drop table t1;
|
||||
|
||||
--echo End of 5.0 tests
|
||||
|
|
|
@ -277,6 +277,9 @@ static int add_collation(CHARSET_INFO *cs)
|
|||
if (sort_order && sort_order['A'] < sort_order['a'] &&
|
||||
sort_order['a'] < sort_order['B'])
|
||||
all_charsets[cs->number]->state|= MY_CS_CSSORT;
|
||||
|
||||
if (my_charset_is_8bit_pure_ascii(all_charsets[cs->number]))
|
||||
all_charsets[cs->number]->state|= MY_CS_PUREASCII;
|
||||
}
|
||||
}
|
||||
else
|
||||
|
|
46
sql/item.cc
46
sql/item.cc
|
@ -1327,6 +1327,25 @@ void Item::split_sum_func2(THD *thd, Item **ref_pointer_array,
|
|||
}
|
||||
|
||||
|
||||
static bool
|
||||
left_is_superset(DTCollation *left, DTCollation *right)
|
||||
{
|
||||
/* Allow convert to Unicode */
|
||||
if (left->collation->state & MY_CS_UNICODE &&
|
||||
(left->derivation < right->derivation ||
|
||||
(left->derivation == right->derivation &&
|
||||
!(right->collation->state & MY_CS_UNICODE))))
|
||||
return TRUE;
|
||||
/* Allow convert from ASCII */
|
||||
if (right->repertoire == MY_REPERTOIRE_ASCII &&
|
||||
(left->derivation < right->derivation ||
|
||||
(left->derivation == right->derivation &&
|
||||
!(left->repertoire == MY_REPERTOIRE_ASCII))))
|
||||
return TRUE;
|
||||
/* Disallow conversion otherwise */
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/*
|
||||
Aggregate two collations together taking
|
||||
into account their coercibility (aka derivation):
|
||||
|
@ -1391,18 +1410,12 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags)
|
|||
; // Do nothing
|
||||
}
|
||||
else if ((flags & MY_COLL_ALLOW_SUPERSET_CONV) &&
|
||||
collation->state & MY_CS_UNICODE &&
|
||||
(derivation < dt.derivation ||
|
||||
(derivation == dt.derivation &&
|
||||
!(dt.collation->state & MY_CS_UNICODE))))
|
||||
left_is_superset(this, &dt))
|
||||
{
|
||||
// Do nothing
|
||||
}
|
||||
else if ((flags & MY_COLL_ALLOW_SUPERSET_CONV) &&
|
||||
dt.collation->state & MY_CS_UNICODE &&
|
||||
(dt.derivation < derivation ||
|
||||
(dt.derivation == derivation &&
|
||||
!(collation->state & MY_CS_UNICODE))))
|
||||
left_is_superset(&dt, this))
|
||||
{
|
||||
set(dt);
|
||||
}
|
||||
|
@ -1421,7 +1434,7 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags)
|
|||
else
|
||||
{
|
||||
// Cannot apply conversion
|
||||
set(0, DERIVATION_NONE);
|
||||
set(0, DERIVATION_NONE, 0);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
@ -1443,8 +1456,8 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags)
|
|||
{
|
||||
if (derivation == DERIVATION_EXPLICIT)
|
||||
{
|
||||
set(0, DERIVATION_NONE);
|
||||
return 1;
|
||||
set(0, DERIVATION_NONE, 0);
|
||||
return 1;
|
||||
}
|
||||
if (collation->state & MY_CS_BINSORT)
|
||||
return 0;
|
||||
|
@ -1458,6 +1471,7 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags)
|
|||
set(bin, DERIVATION_NONE);
|
||||
}
|
||||
}
|
||||
repertoire|= dt.repertoire;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1597,12 +1611,16 @@ bool agg_item_charsets(DTCollation &coll, const char *fname,
|
|||
{
|
||||
Item* conv;
|
||||
uint32 dummy_offset;
|
||||
if (!String::needs_conversion(0, coll.collation,
|
||||
(*arg)->collation.collation,
|
||||
if (!String::needs_conversion(0, (*arg)->collation.collation,
|
||||
coll.collation,
|
||||
&dummy_offset))
|
||||
continue;
|
||||
|
||||
if (!(conv= (*arg)->safe_charset_converter(coll.collation)))
|
||||
if (!(conv= (*arg)->safe_charset_converter(coll.collation)) &&
|
||||
((*arg)->collation.repertoire == MY_REPERTOIRE_ASCII))
|
||||
conv= new Item_func_conv_charset(*arg, coll.collation, 1);
|
||||
|
||||
if (!conv)
|
||||
{
|
||||
if (nargs >=2 && nargs <= 3)
|
||||
{
|
||||
|
|
43
sql/item.h
43
sql/item.h
|
@ -49,29 +49,50 @@ class DTCollation {
|
|||
public:
|
||||
CHARSET_INFO *collation;
|
||||
enum Derivation derivation;
|
||||
uint repertoire;
|
||||
|
||||
void set_repertoire_from_charset(CHARSET_INFO *cs)
|
||||
{
|
||||
repertoire= cs->state & MY_CS_PUREASCII ?
|
||||
MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30;
|
||||
}
|
||||
DTCollation()
|
||||
{
|
||||
collation= &my_charset_bin;
|
||||
derivation= DERIVATION_NONE;
|
||||
repertoire= MY_REPERTOIRE_UNICODE30;
|
||||
}
|
||||
DTCollation(CHARSET_INFO *collation_arg, Derivation derivation_arg)
|
||||
{
|
||||
collation= collation_arg;
|
||||
derivation= derivation_arg;
|
||||
set_repertoire_from_charset(collation_arg);
|
||||
}
|
||||
void set(DTCollation &dt)
|
||||
{
|
||||
collation= dt.collation;
|
||||
derivation= dt.derivation;
|
||||
repertoire= dt.repertoire;
|
||||
}
|
||||
void set(CHARSET_INFO *collation_arg, Derivation derivation_arg)
|
||||
{
|
||||
collation= collation_arg;
|
||||
derivation= derivation_arg;
|
||||
set_repertoire_from_charset(collation_arg);
|
||||
}
|
||||
void set(CHARSET_INFO *collation_arg,
|
||||
Derivation derivation_arg,
|
||||
uint repertoire_arg)
|
||||
{
|
||||
collation= collation_arg;
|
||||
derivation= derivation_arg;
|
||||
repertoire= repertoire_arg;
|
||||
}
|
||||
void set(CHARSET_INFO *collation_arg)
|
||||
{ collation= collation_arg; }
|
||||
{
|
||||
collation= collation_arg;
|
||||
set_repertoire_from_charset(collation_arg);
|
||||
}
|
||||
void set(Derivation derivation_arg)
|
||||
{ derivation= derivation_arg; }
|
||||
bool aggregate(DTCollation &dt, uint flags= 0);
|
||||
|
@ -1672,10 +1693,11 @@ class Item_string :public Item
|
|||
{
|
||||
public:
|
||||
Item_string(const char *str,uint length,
|
||||
CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE)
|
||||
CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE,
|
||||
uint repertoire= MY_REPERTOIRE_UNICODE30)
|
||||
{
|
||||
collation.set(cs, dv);
|
||||
str_value.set_or_copy_aligned(str,length,cs);
|
||||
str_value.set_or_copy_aligned(str, length, cs);
|
||||
collation.set(cs, dv, repertoire);
|
||||
/*
|
||||
We have to have a different max_length than 'length' here to
|
||||
ensure that we get the right length if we do use the item
|
||||
|
@ -1699,10 +1721,11 @@ public:
|
|||
fixed= 1;
|
||||
}
|
||||
Item_string(const char *name_par, const char *str, uint length,
|
||||
CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE)
|
||||
CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE,
|
||||
uint repertoire= MY_REPERTOIRE_UNICODE30)
|
||||
{
|
||||
collation.set(cs, dv);
|
||||
str_value.set_or_copy_aligned(str,length,cs);
|
||||
str_value.set_or_copy_aligned(str, length, cs);
|
||||
collation.set(cs, dv, repertoire);
|
||||
max_length= str_value.numchars()*cs->mbmaxlen;
|
||||
set_name(name_par, 0, cs);
|
||||
decimals=NOT_FIXED_DEC;
|
||||
|
@ -1718,6 +1741,12 @@ public:
|
|||
str_value.copy(str_arg, length_arg, collation.collation);
|
||||
max_length= str_value.numchars() * collation.collation->mbmaxlen;
|
||||
}
|
||||
void set_repertoire_from_value()
|
||||
{
|
||||
collation.repertoire= my_string_repertoire(str_value.charset(),
|
||||
str_value.ptr(),
|
||||
str_value.length());
|
||||
}
|
||||
enum Type type() const { return STRING_ITEM; }
|
||||
double val_real();
|
||||
longlong val_int();
|
||||
|
|
|
@ -3767,7 +3767,7 @@ static user_var_entry *get_variable(HASH *hash, LEX_STRING &name,
|
|||
entry->value=0;
|
||||
entry->length=0;
|
||||
entry->update_query_id=0;
|
||||
entry->collation.set(NULL, DERIVATION_IMPLICIT);
|
||||
entry->collation.set(NULL, DERIVATION_IMPLICIT, 0);
|
||||
entry->unsigned_flag= 0;
|
||||
/*
|
||||
If we are here, we were called from a SET or a query which sets a
|
||||
|
|
|
@ -2673,7 +2673,8 @@ void Item_func_set_collation::fix_length_and_dec()
|
|||
colname, args[0]->collation.collation->csname);
|
||||
return;
|
||||
}
|
||||
collation.set(set_collation, DERIVATION_EXPLICIT);
|
||||
collation.set(set_collation, DERIVATION_EXPLICIT,
|
||||
args[0]->collation.repertoire);
|
||||
max_length= args[0]->max_length;
|
||||
}
|
||||
|
||||
|
|
|
@ -1718,7 +1718,11 @@ void Item_func_date_format::fix_length_and_dec()
|
|||
Item *arg1= args[1]->this_item();
|
||||
|
||||
decimals=0;
|
||||
collation.set(thd->variables.collation_connection);
|
||||
CHARSET_INFO *cs= thd->variables.collation_connection;
|
||||
uint32 repertoire= arg1->collation.repertoire;
|
||||
if (!thd->variables.lc_time_names->is_ascii)
|
||||
repertoire|= MY_REPERTOIRE_EXTENDED;
|
||||
collation.set(cs, arg1->collation.derivation, repertoire);
|
||||
if (arg1->type() == STRING_ITEM)
|
||||
{ // Optimize the normal case
|
||||
fixed_length=1;
|
||||
|
|
|
@ -311,10 +311,12 @@ static char *get_text(Lex_input_stream *lip)
|
|||
uint found_escape=0;
|
||||
CHARSET_INFO *cs= lip->m_thd->charset();
|
||||
|
||||
lip->tok_bitmap= 0;
|
||||
sep= yyGetLast(); // String should end with this
|
||||
while (lip->ptr != lip->end_of_query)
|
||||
{
|
||||
c = yyGet();
|
||||
c= yyGet();
|
||||
lip->tok_bitmap|= c;
|
||||
#ifdef USE_MB
|
||||
{
|
||||
int l;
|
||||
|
@ -605,6 +607,7 @@ int MYSQLlex(void *arg, void *yythd)
|
|||
break;
|
||||
}
|
||||
yylval->lex_str.length= lip->yytoklen;
|
||||
lex->text_string_is_7bit= (lip->tok_bitmap & 0x80) ? 0 : 1;
|
||||
return(NCHAR_STRING);
|
||||
|
||||
case MY_LEX_IDENT_OR_HEX:
|
||||
|
@ -926,6 +929,7 @@ int MYSQLlex(void *arg, void *yythd)
|
|||
break;
|
||||
}
|
||||
yylval->lex_str.length=lip->yytoklen;
|
||||
lex->text_string_is_7bit= (lip->tok_bitmap & 0x80) ? 0 : 1;
|
||||
return(TEXT_STRING);
|
||||
|
||||
case MY_LEX_COMMENT: // Comment
|
||||
|
|
|
@ -957,6 +957,9 @@ public:
|
|||
|
||||
/** Position of ';' in the stream, to delimit multiple queries. */
|
||||
const char* found_semicolon;
|
||||
|
||||
/** Token character bitmaps, to detect 7bit strings. */
|
||||
uchar tok_bitmap;
|
||||
|
||||
/** SQL_MODE = IGNORE_SPACE. */
|
||||
bool ignore_space;
|
||||
|
@ -994,6 +997,7 @@ typedef struct st_lex : public Query_tables_list
|
|||
gptr yacc_yyss,yacc_yyvs;
|
||||
THD *thd;
|
||||
CHARSET_INFO *charset, *underscore_charset;
|
||||
bool text_string_is_7bit;
|
||||
/* store original leaf_tables for INSERT SELECT and PS/SP */
|
||||
TABLE_LIST *leaf_tables_insert;
|
||||
/* Position (first character index) of SELECT of CREATE VIEW statement */
|
||||
|
|
|
@ -263,6 +263,8 @@ bool String::needs_conversion(uint32 arg_length,
|
|||
(to_cs == &my_charset_bin) ||
|
||||
(to_cs == from_cs) ||
|
||||
my_charset_same(from_cs, to_cs) ||
|
||||
(my_charset_is_ascii_based(to_cs) &&
|
||||
my_charset_is_8bit_pure_ascii(from_cs)) ||
|
||||
((from_cs == &my_charset_bin) &&
|
||||
(!(*offset=(arg_length % to_cs->mbminlen)))))
|
||||
return FALSE;
|
||||
|
|
|
@ -7523,18 +7523,54 @@ opt_load_data_set_spec:
|
|||
/* Common definitions */
|
||||
|
||||
text_literal:
|
||||
TEXT_STRING_literal
|
||||
{
|
||||
THD *thd= YYTHD;
|
||||
$$ = new Item_string($1.str,$1.length,thd->variables.collation_connection);
|
||||
}
|
||||
| NCHAR_STRING
|
||||
{ $$= new Item_string($1.str,$1.length,national_charset_info); }
|
||||
| UNDERSCORE_CHARSET TEXT_STRING
|
||||
{ $$ = new Item_string($2.str,$2.length,Lex->underscore_charset); }
|
||||
| text_literal TEXT_STRING_literal
|
||||
{ ((Item_string*) $1)->append($2.str,$2.length); }
|
||||
;
|
||||
TEXT_STRING
|
||||
{
|
||||
LEX_STRING tmp;
|
||||
THD *thd= YYTHD;
|
||||
CHARSET_INFO *cs_con= thd->variables.collation_connection;
|
||||
CHARSET_INFO *cs_cli= thd->variables.character_set_client;
|
||||
uint repertoire= thd->lex->text_string_is_7bit &&
|
||||
my_charset_is_ascii_based(cs_cli) ?
|
||||
MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30;
|
||||
if (thd->charset_is_collation_connection ||
|
||||
(repertoire == MY_REPERTOIRE_ASCII &&
|
||||
my_charset_is_ascii_based(cs_con)))
|
||||
tmp= $1;
|
||||
else
|
||||
thd->convert_string(&tmp, cs_con, $1.str, $1.length, cs_cli);
|
||||
$$= new Item_string(tmp.str, tmp.length, cs_con,
|
||||
DERIVATION_COERCIBLE, repertoire);
|
||||
}
|
||||
| NCHAR_STRING
|
||||
{
|
||||
uint repertoire= Lex->text_string_is_7bit ?
|
||||
MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30;
|
||||
DBUG_ASSERT(my_charset_is_ascii_based(national_charset_info));
|
||||
$$= new Item_string($1.str, $1.length, national_charset_info,
|
||||
DERIVATION_COERCIBLE, repertoire);
|
||||
}
|
||||
| UNDERSCORE_CHARSET TEXT_STRING
|
||||
{
|
||||
$$= new Item_string($2.str, $2.length, Lex->underscore_charset);
|
||||
((Item_string*) $$)->set_repertoire_from_value();
|
||||
}
|
||||
| text_literal TEXT_STRING_literal
|
||||
{
|
||||
Item_string* item= (Item_string*) $1;
|
||||
item->append($2.str, $2.length);
|
||||
if (!(item->collation.repertoire & MY_REPERTOIRE_EXTENDED))
|
||||
{
|
||||
/*
|
||||
If the string has been pure ASCII so far,
|
||||
check the new part.
|
||||
*/
|
||||
CHARSET_INFO *cs= YYTHD->variables.collation_connection;
|
||||
item->collation.repertoire|= my_string_repertoire(cs,
|
||||
$2.str,
|
||||
$2.length);
|
||||
}
|
||||
}
|
||||
;
|
||||
|
||||
text_string:
|
||||
TEXT_STRING_literal
|
||||
|
@ -7606,20 +7642,22 @@ literal:
|
|||
| TRUE_SYM { $$= new Item_int((char*) "TRUE",1,1); }
|
||||
| HEX_NUM { $$ = new Item_hex_string($1.str, $1.length);}
|
||||
| BIN_NUM { $$= new Item_bin_string($1.str, $1.length); }
|
||||
| UNDERSCORE_CHARSET HEX_NUM
|
||||
{
|
||||
Item *tmp= new Item_hex_string($2.str, $2.length);
|
||||
/*
|
||||
it is OK only emulate fix_fieds, because we need only
|
||||
| UNDERSCORE_CHARSET HEX_NUM
|
||||
{
|
||||
Item *tmp= new Item_hex_string($2.str, $2.length);
|
||||
/*
|
||||
it is OK only emulate fix_fieds, because we need only
|
||||
value of constant
|
||||
*/
|
||||
String *str= tmp ?
|
||||
tmp->quick_fix_field(), tmp->val_str((String*) 0) :
|
||||
(String*) 0;
|
||||
$$= new Item_string(str ? str->ptr() : "",
|
||||
str ? str->length() : 0,
|
||||
Lex->underscore_charset);
|
||||
}
|
||||
*/
|
||||
String *str= tmp ?
|
||||
tmp->quick_fix_field(), tmp->val_str((String*) 0) :
|
||||
(String*) 0;
|
||||
$$= new Item_string(str ? str->ptr() : "",
|
||||
str ? str->length() : 0,
|
||||
Lex->underscore_charset);
|
||||
if ($$)
|
||||
((Item_string *) $$)->set_repertoire_from_value();
|
||||
}
|
||||
| UNDERSCORE_CHARSET BIN_NUM
|
||||
{
|
||||
Item *tmp= new Item_bin_string($2.str, $2.length);
|
||||
|
|
|
@ -179,14 +179,16 @@ is_case_sensitive(CHARSET_INFO *cs)
|
|||
cs->sort_order['a'] < cs->sort_order['B']) ? 1 : 0;
|
||||
}
|
||||
|
||||
|
||||
void dispcset(FILE *f,CHARSET_INFO *cs)
|
||||
{
|
||||
fprintf(f,"{\n");
|
||||
fprintf(f," %d,%d,%d,\n",cs->number,0,0);
|
||||
fprintf(f," MY_CS_COMPILED%s%s%s,\n",
|
||||
cs->state & MY_CS_BINSORT ? "|MY_CS_BINSORT" : "",
|
||||
cs->state & MY_CS_PRIMARY ? "|MY_CS_PRIMARY" : "",
|
||||
is_case_sensitive(cs) ? "|MY_CS_CSSORT" : "");
|
||||
fprintf(f," MY_CS_COMPILED%s%s%s%s,\n",
|
||||
cs->state & MY_CS_BINSORT ? "|MY_CS_BINSORT" : "",
|
||||
cs->state & MY_CS_PRIMARY ? "|MY_CS_PRIMARY" : "",
|
||||
is_case_sensitive(cs) ? "|MY_CS_CSSORT" : "",
|
||||
my_charset_is_8bit_pure_ascii(cs) ? "|MY_CS_PUREASCII" : "");
|
||||
|
||||
if (cs->name)
|
||||
{
|
||||
|
|
|
@ -6722,7 +6722,7 @@ CHARSET_INFO compiled_charsets[] = {
|
|||
#ifdef HAVE_CHARSET_ascii
|
||||
{
|
||||
11,0,0,
|
||||
MY_CS_COMPILED|MY_CS_PRIMARY,
|
||||
MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_PUREASCII,
|
||||
"ascii", /* cset name */
|
||||
"ascii_general_ci", /* coll name */
|
||||
"", /* comment */
|
||||
|
@ -7811,7 +7811,7 @@ CHARSET_INFO compiled_charsets[] = {
|
|||
#ifdef HAVE_CHARSET_ascii
|
||||
{
|
||||
65,0,0,
|
||||
MY_CS_COMPILED|MY_CS_BINSORT,
|
||||
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_PUREASCII,
|
||||
"ascii", /* cset name */
|
||||
"ascii_bin", /* coll name */
|
||||
"", /* comment */
|
||||
|
|
|
@ -306,3 +306,89 @@ my_bool my_parse_charset_xml(const char *buf, uint len,
|
|||
my_xml_parser_free(&p);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Check repertoire: detect pure ascii strings
|
||||
*/
|
||||
uint
|
||||
my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong length)
|
||||
{
|
||||
const char *strend= str + length;
|
||||
if (cs->mbminlen == 1)
|
||||
{
|
||||
for ( ; str < strend; str++)
|
||||
{
|
||||
if (((uchar) *str) > 0x7F)
|
||||
return MY_REPERTOIRE_UNICODE30;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
my_wc_t wc;
|
||||
int chlen;
|
||||
for (; (chlen= cs->cset->mb_wc(cs, &wc, str, strend)) > 0; str+= chlen)
|
||||
{
|
||||
if (wc > 0x7F)
|
||||
return MY_REPERTOIRE_UNICODE30;
|
||||
}
|
||||
}
|
||||
return MY_REPERTOIRE_ASCII;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Detect whether a character set is ASCII compatible.
|
||||
|
||||
Returns TRUE for:
|
||||
|
||||
- all 8bit character sets whose Unicode mapping of 0x7B is '{'
|
||||
(ignores swe7 which maps 0x7B to "LATIN LETTER A WITH DIAERESIS")
|
||||
|
||||
- all multi-byte character sets having mbminlen == 1
|
||||
(ignores ucs2 whose mbminlen is 2)
|
||||
|
||||
TODO:
|
||||
|
||||
When merging to 5.2, this function should be changed
|
||||
to check a new flag MY_CS_NONASCII,
|
||||
|
||||
return (cs->flag & MY_CS_NONASCII) ? 0 : 1;
|
||||
|
||||
This flag was previously added into 5.2 under terms
|
||||
of WL#3759 "Optimize identifier conversion in client-server protocol"
|
||||
especially to mark character sets not compatible with ASCII.
|
||||
|
||||
We won't backport this flag to 5.0 or 5.1.
|
||||
This function is Ok for 5.0 and 5.1, because we're not going
|
||||
to introduce new tricky character sets between 5.0 and 5.2.
|
||||
*/
|
||||
my_bool
|
||||
my_charset_is_ascii_based(CHARSET_INFO *cs)
|
||||
{
|
||||
return
|
||||
(cs->mbmaxlen == 1 && cs->tab_to_uni && cs->tab_to_uni['{'] == '{') ||
|
||||
(cs->mbminlen == 1 && cs->mbmaxlen > 1);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Detect if a character set is 8bit,
|
||||
and it is pure ascii, i.e. doesn't have
|
||||
characters outside U+0000..U+007F
|
||||
This functions is shared between "conf_to_src"
|
||||
and dynamic charsets loader in "mysqld".
|
||||
*/
|
||||
my_bool
|
||||
my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs)
|
||||
{
|
||||
size_t code;
|
||||
if (!cs->tab_to_uni)
|
||||
return 0;
|
||||
for (code= 0; code < 256; code++)
|
||||
{
|
||||
if (cs->tab_to_uni[code] > 0x7F)
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue