mirror of
https://github.com/MariaDB/server.git
synced 2025-01-31 02:51:44 +01:00
4941ac9192
(Variant#3: Allow cross-charset comparisons, use a special CHARSET_INFO to create lookup keys. Review input addressed.) Equalities that compare utf8mb{3,4}_general_ci strings, like: WHERE ... utf8mb3_key_col=utf8mb4_value (MB3-4-CMP) can now be used to construct ref[const] access and also participate in multiple-equalities. This means that utf8mb3_key_col can be used for key-lookups when compared with an utf8mb4 constant, field or expression using '=' or '<=>' comparison operators. This is controlled by optimizer_switch='cset_narrowing=on', which is OFF by default. IMPLEMENTATION Item value comparison in (MB3-4-CMP) is done using utf8mb4_general_ci. This is valid as any utf8mb3 value is also an utf8mb4 value. When making index lookup value for utf8mb3_key_col, we do "Charset Narrowing": characters that are in the Basic Multilingual Plane (=BMP) are copied as-is, as they can be represented in utf8mb3. Characters that are outside the BMP cannot be represented in utf8mb3 and are replaced with U+FFFD, the "Replacement Character". In utf8mb4_general_ci, the Replacement Character compares as equal to any character that's not in BMP. Because of this, the constructed lookup value will find all index records that would be considered equal by the original condition (MB3-4-CMP). Approved-by: Monty <monty@mariadb.org>
470 lines
14 KiB
Text
470 lines
14 KiB
Text
set
|
||
@tmp_csetn_os= @@optimizer_switch,
|
||
optimizer_switch='cset_narrowing=on';
|
||
set names utf8mb4;
|
||
create table t1 (
|
||
mb3name varchar(32),
|
||
mb3 varchar(32) collate utf8mb3_general_ci,
|
||
key(mb3)
|
||
);
|
||
insert into t1 select seq, seq from seq_1_to_10000;
|
||
insert into t1 values ('mb3-question-mark', '?');
|
||
insert into t1 values ('mb3-replacement-char', _utf8mb3 0xEFBFBD);
|
||
create table t10 (
|
||
pk int auto_increment primary key,
|
||
mb4name varchar(32),
|
||
mb4 varchar(32) character set utf8mb4 collate utf8mb4_general_ci
|
||
);
|
||
insert into t10 (mb4name, mb4) values
|
||
('mb4-question-mark','?'),
|
||
('mb4-replacement-char', _utf8mb4 0xEFBFBD),
|
||
('mb4-smiley', _utf8mb4 0xF09F988A),
|
||
('1', '1');
|
||
analyze table t1,t10 persistent for all;
|
||
Table Op Msg_type Msg_text
|
||
test.t1 analyze status Engine-independent statistics collected
|
||
test.t1 analyze status OK
|
||
test.t10 analyze status Engine-independent statistics collected
|
||
test.t10 analyze status OK
|
||
#
|
||
# Check that constants are already handled: the following should use
|
||
# ref/range, because constants are converted into utf8mb3.
|
||
#
|
||
select collation('abc');
|
||
collation('abc')
|
||
utf8mb4_general_ci
|
||
explain select * from t1 force index (mb3) where t1.mb3='abc';
|
||
id select_type table type possible_keys key key_len ref rows Extra
|
||
1 SIMPLE t1 ref mb3 mb3 99 const 1 Using index condition
|
||
explain select * from t1 force index (mb3) where t1.mb3 in ('abc','cde','xyz');
|
||
id select_type table type possible_keys key key_len ref rows Extra
|
||
1 SIMPLE t1 range mb3 mb3 99 NULL 3 Using index condition
|
||
explain select * from t1 force index (mb3) where t1.mb3 between 'abc' and 'acc';
|
||
id select_type table type possible_keys key key_len ref rows Extra
|
||
1 SIMPLE t1 range mb3 mb3 99 NULL 1 Using index condition
|
||
explain select * from t1 force index (mb3) where t1.mb3 <'000';
|
||
id select_type table type possible_keys key key_len ref rows Extra
|
||
1 SIMPLE t1 range mb3 mb3 99 NULL 1 Using index condition
|
||
# If a constant can't be represented in utf8mb3, an error is produced:
|
||
explain select * from t1 force index (mb3) where t1.mb3='😊';
|
||
ERROR HY000: Illegal mix of collations (utf8mb3_general_ci,IMPLICIT) and (utf8mb4_general_ci,COERCIBLE) for operation '='
|
||
#
|
||
# Check ref access on mb3_field=mb4_field
|
||
#
|
||
explain format=json
|
||
select * from t10,t1 where t10.mb4=t1.mb3;
|
||
EXPLAIN
|
||
{
|
||
"query_block": {
|
||
"select_id": 1,
|
||
"table": {
|
||
"table_name": "t10",
|
||
"access_type": "ALL",
|
||
"rows": 4,
|
||
"filtered": 100,
|
||
"attached_condition": "t10.mb4 is not null"
|
||
},
|
||
"table": {
|
||
"table_name": "t1",
|
||
"access_type": "ref",
|
||
"possible_keys": ["mb3"],
|
||
"key": "mb3",
|
||
"key_length": "99",
|
||
"used_key_parts": ["mb3"],
|
||
"ref": ["test.t10.mb4"],
|
||
"rows": 1,
|
||
"filtered": 100,
|
||
"index_condition": "t1.mb3 = t10.mb4"
|
||
}
|
||
}
|
||
}
|
||
select * from t10,t1 where t10.mb4=t1.mb3;
|
||
pk mb4name mb4 mb3name mb3
|
||
1 mb4-question-mark ? mb3-question-mark ?
|
||
2 mb4-replacement-char <09> mb3-replacement-char <09>
|
||
3 mb4-smiley 😊 mb3-replacement-char <09>
|
||
4 1 1 1 1
|
||
select * from t10,t1 use index() where t10.mb4=t1.mb3;
|
||
pk mb4name mb4 mb3name mb3
|
||
4 1 1 1 1
|
||
1 mb4-question-mark ? mb3-question-mark ?
|
||
2 mb4-replacement-char <09> mb3-replacement-char <09>
|
||
3 mb4-smiley 😊 mb3-replacement-char <09>
|
||
explain format=json
|
||
select * from t10,t1 where t10.mb4<=>t1.mb3;
|
||
EXPLAIN
|
||
{
|
||
"query_block": {
|
||
"select_id": 1,
|
||
"table": {
|
||
"table_name": "t10",
|
||
"access_type": "ALL",
|
||
"rows": 4,
|
||
"filtered": 100
|
||
},
|
||
"table": {
|
||
"table_name": "t1",
|
||
"access_type": "ref",
|
||
"possible_keys": ["mb3"],
|
||
"key": "mb3",
|
||
"key_length": "99",
|
||
"used_key_parts": ["mb3"],
|
||
"ref": ["test.t10.mb4"],
|
||
"rows": 1,
|
||
"filtered": 100,
|
||
"index_condition": "t10.mb4 <=> t1.mb3"
|
||
}
|
||
}
|
||
}
|
||
select * from t10,t1 where t10.mb4<=>t1.mb3;
|
||
pk mb4name mb4 mb3name mb3
|
||
1 mb4-question-mark ? mb3-question-mark ?
|
||
2 mb4-replacement-char <09> mb3-replacement-char <09>
|
||
3 mb4-smiley 😊 mb3-replacement-char <09>
|
||
4 1 1 1 1
|
||
set statement optimizer_switch='cset_narrowing=off', join_cache_level=0 for
|
||
explain format=json
|
||
select * from t10,t1 where t10.mb4=t1.mb3;
|
||
EXPLAIN
|
||
{
|
||
"query_block": {
|
||
"select_id": 1,
|
||
"table": {
|
||
"table_name": "t10",
|
||
"access_type": "ALL",
|
||
"rows": 4,
|
||
"filtered": 100
|
||
},
|
||
"table": {
|
||
"table_name": "t1",
|
||
"access_type": "ALL",
|
||
"rows": 10002,
|
||
"filtered": 100,
|
||
"attached_condition": "t10.mb4 = convert(t1.mb3 using utf8mb4)"
|
||
}
|
||
}
|
||
}
|
||
#
|
||
# Check ref access on mb3_field=mb4_expr
|
||
#
|
||
explain format=json
|
||
select * from t10,t1 where t1.mb3=concat('',t10.mb4);
|
||
EXPLAIN
|
||
{
|
||
"query_block": {
|
||
"select_id": 1,
|
||
"table": {
|
||
"table_name": "t10",
|
||
"access_type": "ALL",
|
||
"rows": 4,
|
||
"filtered": 100
|
||
},
|
||
"table": {
|
||
"table_name": "t1",
|
||
"access_type": "ref",
|
||
"possible_keys": ["mb3"],
|
||
"key": "mb3",
|
||
"key_length": "99",
|
||
"used_key_parts": ["mb3"],
|
||
"ref": ["func"],
|
||
"rows": 1,
|
||
"filtered": 100,
|
||
"index_condition": "t1.mb3 = concat('',t10.mb4)"
|
||
}
|
||
}
|
||
}
|
||
select * from t10,t1 where t1.mb3=concat('',t10.mb4);
|
||
pk mb4name mb4 mb3name mb3
|
||
1 mb4-question-mark ? mb3-question-mark ?
|
||
2 mb4-replacement-char <09> mb3-replacement-char <09>
|
||
3 mb4-smiley 😊 mb3-replacement-char <09>
|
||
4 1 1 1 1
|
||
select * from t10,t1 use index() where t1.mb3=concat('',t10.mb4);
|
||
pk mb4name mb4 mb3name mb3
|
||
4 1 1 1 1
|
||
1 mb4-question-mark ? mb3-question-mark ?
|
||
2 mb4-replacement-char <09> mb3-replacement-char <09>
|
||
3 mb4-smiley 😊 mb3-replacement-char <09>
|
||
# Check that ref optimizer gets the right constant.
|
||
# We need a const table for that, because key=const is handled by
|
||
# coercing the constant.
|
||
#
|
||
# So, we take the smiley:
|
||
select * from t10 where t10.pk=3;
|
||
pk mb4name mb4
|
||
3 mb4-smiley 😊
|
||
set optimizer_trace=1;
|
||
# And see that we've got the Replacement Character in the ranges:
|
||
explain
|
||
select * from t10, t1 where t10.mb4=t1.mb3 and t10.pk=3;
|
||
id select_type table type possible_keys key key_len ref rows Extra
|
||
1 SIMPLE t10 const PRIMARY PRIMARY 4 const 1
|
||
1 SIMPLE t1 ref mb3 mb3 99 const 3 Using index condition
|
||
select
|
||
json_detailed(json_extract(trace, '$**.range_scan_alternatives')) as JS
|
||
from
|
||
information_schema.optimizer_trace;
|
||
JS
|
||
[
|
||
[
|
||
{
|
||
"index": "mb3",
|
||
"ranges":
|
||
["(<28>) <= (mb3) <= (<28>)"],
|
||
"rowid_ordered": true,
|
||
"using_mrr": false,
|
||
"index_only": false,
|
||
"rows": 3,
|
||
"cost": 3.760377105,
|
||
"chosen": true
|
||
}
|
||
]
|
||
]
|
||
select * from t10, t1 where t10.mb4=t1.mb3 and t10.pk=3;
|
||
pk mb4name mb4 mb3name mb3
|
||
3 mb4-smiley 😊 mb3-replacement-char <09>
|
||
#
|
||
# Will range optimizer handle t1.mb3>t10.mb4? No...
|
||
#
|
||
explain format=json
|
||
select * from t10, t1 where (t1.mb3=t10.mb4 or t1.mb3='hello') and t10.pk=3;
|
||
EXPLAIN
|
||
{
|
||
"query_block": {
|
||
"select_id": 1,
|
||
"table": {
|
||
"table_name": "t10",
|
||
"access_type": "const",
|
||
"possible_keys": ["PRIMARY"],
|
||
"key": "PRIMARY",
|
||
"key_length": "4",
|
||
"used_key_parts": ["pk"],
|
||
"ref": ["const"],
|
||
"rows": 1,
|
||
"filtered": 100
|
||
},
|
||
"table": {
|
||
"table_name": "t1",
|
||
"access_type": "range",
|
||
"possible_keys": ["mb3"],
|
||
"key": "mb3",
|
||
"key_length": "99",
|
||
"used_key_parts": ["mb3"],
|
||
"rows": 4,
|
||
"filtered": 100,
|
||
"index_condition": "t1.mb3 = '????' or t1.mb3 = 'hello'"
|
||
}
|
||
}
|
||
}
|
||
explain format=json
|
||
select * from t10, t1 where t1.mb3>t10.mb4 and t10.pk=3;
|
||
EXPLAIN
|
||
{
|
||
"query_block": {
|
||
"select_id": 1,
|
||
"table": {
|
||
"table_name": "t10",
|
||
"access_type": "const",
|
||
"possible_keys": ["PRIMARY"],
|
||
"key": "PRIMARY",
|
||
"key_length": "4",
|
||
"used_key_parts": ["pk"],
|
||
"ref": ["const"],
|
||
"rows": 1,
|
||
"filtered": 100
|
||
},
|
||
"table": {
|
||
"table_name": "t1",
|
||
"access_type": "ALL",
|
||
"rows": 10002,
|
||
"filtered": 100,
|
||
"attached_condition": "convert(t1.mb3 using utf8mb4) > '????'"
|
||
}
|
||
}
|
||
}
|
||
# For comparison, it will handle it when collations match:
|
||
create table t2 (
|
||
mb4name varchar(32),
|
||
mb4 varchar(32) collate utf8mb4_general_ci,
|
||
key(mb4)
|
||
);
|
||
insert into t2 select * from t1;
|
||
explain format=json
|
||
select * from t10, t2 where t2.mb4>t10.mb4 and t10.pk=3;
|
||
EXPLAIN
|
||
{
|
||
"query_block": {
|
||
"select_id": 1,
|
||
"table": {
|
||
"table_name": "t10",
|
||
"access_type": "const",
|
||
"possible_keys": ["PRIMARY"],
|
||
"key": "PRIMARY",
|
||
"key_length": "4",
|
||
"used_key_parts": ["pk"],
|
||
"ref": ["const"],
|
||
"rows": 1,
|
||
"filtered": 100
|
||
},
|
||
"table": {
|
||
"table_name": "t2",
|
||
"access_type": "range",
|
||
"possible_keys": ["mb4"],
|
||
"key": "mb4",
|
||
"key_length": "131",
|
||
"used_key_parts": ["mb4"],
|
||
"rows": 3,
|
||
"filtered": 100,
|
||
"index_condition": "t2.mb4 > '????'"
|
||
}
|
||
}
|
||
}
|
||
#
|
||
# Check multiple equalities
|
||
#
|
||
# - ref acccess lookup keys do use equality substitution,
|
||
# - concat() arguments don't
|
||
explain format=json
|
||
select straight_join * from t10,t1 force index(mb3),t2
|
||
where
|
||
t1.mb3=t2.mb4 and t2.mb4=t10.mb4 and concat(t1.mb3, t2.mb4, t10.mb4)<>'Bebebe';
|
||
EXPLAIN
|
||
{
|
||
"query_block": {
|
||
"select_id": 1,
|
||
"table": {
|
||
"table_name": "t10",
|
||
"access_type": "ALL",
|
||
"rows": 4,
|
||
"filtered": 100,
|
||
"attached_condition": "t10.mb4 is not null and t10.mb4 is not null"
|
||
},
|
||
"table": {
|
||
"table_name": "t1",
|
||
"access_type": "ref",
|
||
"possible_keys": ["mb3"],
|
||
"key": "mb3",
|
||
"key_length": "99",
|
||
"used_key_parts": ["mb3"],
|
||
"ref": ["test.t10.mb4"],
|
||
"rows": 1,
|
||
"filtered": 100,
|
||
"index_condition": "t1.mb3 = t10.mb4"
|
||
},
|
||
"table": {
|
||
"table_name": "t2",
|
||
"access_type": "ref",
|
||
"possible_keys": ["mb4"],
|
||
"key": "mb4",
|
||
"key_length": "131",
|
||
"used_key_parts": ["mb4"],
|
||
"ref": ["test.t10.mb4"],
|
||
"rows": 1,
|
||
"filtered": 100,
|
||
"index_condition": "concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe'"
|
||
}
|
||
}
|
||
}
|
||
select json_detailed(json_extract(trace, '$**.condition_processing')) as JS
|
||
from information_schema.optimizer_trace;
|
||
JS
|
||
[
|
||
{
|
||
"condition": "WHERE",
|
||
"original_condition": "t1.mb3 = t2.mb4 and t2.mb4 = t10.mb4 and concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe'",
|
||
"steps":
|
||
[
|
||
{
|
||
"transformation": "equality_propagation",
|
||
"resulting_condition": "concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe' and multiple equal(t1.mb3, t2.mb4, t10.mb4)"
|
||
},
|
||
{
|
||
"transformation": "constant_propagation",
|
||
"resulting_condition": "concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe' and multiple equal(t1.mb3, t2.mb4, t10.mb4)"
|
||
},
|
||
{
|
||
"transformation": "trivial_condition_removal",
|
||
"resulting_condition": "concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe' and multiple equal(t1.mb3, t2.mb4, t10.mb4)"
|
||
}
|
||
]
|
||
}
|
||
]
|
||
select straight_join * from t10,t1 force index(mb3),t2
|
||
where
|
||
t1.mb3=t2.mb4 and t2.mb4=t10.mb4 and concat(t1.mb3, t2.mb4, t10.mb4)<>'Bebebe';
|
||
pk mb4name mb4 mb3name mb3 mb4name mb4
|
||
1 mb4-question-mark ? mb3-question-mark ? mb3-question-mark ?
|
||
2 mb4-replacement-char <09> mb3-replacement-char <09> mb3-replacement-char <09>
|
||
3 mb4-smiley 😊 mb3-replacement-char <09> mb3-replacement-char <09>
|
||
4 1 1 1 1 1 1
|
||
# Equality substitution doesn't happen for constants, for both narrowing
|
||
# and non-narrowing comparisons:
|
||
explain format=json
|
||
select * from t10,t1,t2
|
||
where
|
||
t1.mb3=t2.mb4 and t2.mb4=t10.mb4 and t10.mb4='hello' and
|
||
concat(t1.mb3, t2.mb4, t10.mb4)<>'Bebebe';
|
||
EXPLAIN
|
||
{
|
||
"query_block": {
|
||
"select_id": 1,
|
||
"table": {
|
||
"table_name": "t10",
|
||
"access_type": "ALL",
|
||
"rows": 4,
|
||
"filtered": 0.78125,
|
||
"attached_condition": "t10.mb4 = 'hello'"
|
||
},
|
||
"table": {
|
||
"table_name": "t1",
|
||
"access_type": "ref",
|
||
"possible_keys": ["mb3"],
|
||
"key": "mb3",
|
||
"key_length": "99",
|
||
"used_key_parts": ["mb3"],
|
||
"ref": ["const"],
|
||
"rows": 1,
|
||
"filtered": 100,
|
||
"index_condition": "t1.mb3 = t10.mb4"
|
||
},
|
||
"table": {
|
||
"table_name": "t2",
|
||
"access_type": "ref",
|
||
"possible_keys": ["mb4"],
|
||
"key": "mb4",
|
||
"key_length": "131",
|
||
"used_key_parts": ["mb4"],
|
||
"ref": ["const"],
|
||
"rows": 1,
|
||
"filtered": 100,
|
||
"index_condition": "t2.mb4 = t10.mb4 and concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe'"
|
||
}
|
||
}
|
||
}
|
||
select json_detailed(json_extract(trace, '$**.condition_processing')) as JS
|
||
from information_schema.optimizer_trace;
|
||
JS
|
||
[
|
||
{
|
||
"condition": "WHERE",
|
||
"original_condition": "t1.mb3 = t2.mb4 and t2.mb4 = t10.mb4 and t10.mb4 = 'hello' and concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe'",
|
||
"steps":
|
||
[
|
||
{
|
||
"transformation": "equality_propagation",
|
||
"resulting_condition": "t10.mb4 = 'hello' and concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe' and multiple equal(t1.mb3, t2.mb4, t10.mb4)"
|
||
},
|
||
{
|
||
"transformation": "constant_propagation",
|
||
"resulting_condition": "t10.mb4 = 'hello' and concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe' and multiple equal(t1.mb3, t2.mb4, t10.mb4)"
|
||
},
|
||
{
|
||
"transformation": "trivial_condition_removal",
|
||
"resulting_condition": "t10.mb4 = 'hello' and concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe' and multiple equal(t1.mb3, t2.mb4, t10.mb4)"
|
||
}
|
||
]
|
||
}
|
||
]
|
||
drop table t2;
|
||
drop table t1, t10;
|
||
set optimizer_switch=@tmp_csetn_os;
|