MDEV-32113: utf8mb3_key_col=utf8mb4_value cannot be used for ref
(Variant#3: Allow cross-charset comparisons, use a special
CHARSET_INFO to create lookup keys. Review input addressed.)
Equalities that compare utf8mb{3,4}_general_ci strings, like:
WHERE ... utf8mb3_key_col=utf8mb4_value (MB3-4-CMP)
can now be used to construct ref[const] access and also participate
in multiple-equalities.
This means that utf8mb3_key_col can be used for key-lookups when
compared with an utf8mb4 constant, field or expression using '=' or
'<=>' comparison operators.
This is controlled by optimizer_switch='cset_narrowing=on', which is
OFF by default.
IMPLEMENTATION
Item value comparison in (MB3-4-CMP) is done using utf8mb4_general_ci.
This is valid as any utf8mb3 value is also an utf8mb4 value.
When making index lookup value for utf8mb3_key_col, we do "Charset
Narrowing": characters that are in the Basic Multilingual Plane (=BMP) are
copied as-is, as they can be represented in utf8mb3. Characters that are
outside the BMP cannot be represented in utf8mb3 and are replaced
with U+FFFD, the "Replacement Character".
In utf8mb4_general_ci, the Replacement Character compares as equal to any
character that's not in BMP. Because of this, the constructed lookup value
will find all index records that would be considered equal by the original
condition (MB3-4-CMP).
Approved-by: Monty <monty@mariadb.org>
2023-09-19 17:22:49 +02:00
set
@tmp_csetn_os= @@optimizer_switch,
optimizer_switch='cset_narrowing=on';
set names utf8mb4;
create table t1 (
mb3name varchar(32),
mb3 varchar(32) collate utf8mb3_general_ci,
key(mb3)
);
insert into t1 select seq, seq from seq_1_to_10000;
insert into t1 values ('mb3-question-mark', '?');
insert into t1 values ('mb3-replacement-char', _utf8mb3 0xEFBFBD);
create table t10 (
pk int auto_increment primary key,
mb4name varchar(32),
mb4 varchar(32) character set utf8mb4 collate utf8mb4_general_ci
);
insert into t10 (mb4name, mb4) values
('mb4-question-mark','?'),
('mb4-replacement-char', _utf8mb4 0xEFBFBD),
('mb4-smiley', _utf8mb4 0xF09F988A),
('1', '1');
analyze table t1,t10 persistent for all;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK
test.t10 analyze status Engine-independent statistics collected
test.t10 analyze status OK
#
# Check that constants are already handled: the following should use
# ref/range, because constants are converted into utf8mb3.
#
select collation('abc');
collation('abc')
utf8mb4_general_ci
explain select * from t1 force index (mb3) where t1.mb3='abc';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref mb3 mb3 99 const 1 Using index condition
explain select * from t1 force index (mb3) where t1.mb3 in ('abc','cde','xyz');
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range mb3 mb3 99 NULL 3 Using index condition
explain select * from t1 force index (mb3) where t1.mb3 between 'abc' and 'acc';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range mb3 mb3 99 NULL 1 Using index condition
explain select * from t1 force index (mb3) where t1.mb3 <'000';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range mb3 mb3 99 NULL 1 Using index condition
# If a constant can't be represented in utf8mb3, an error is produced:
explain select * from t1 force index (mb3) where t1.mb3='😊';
ERROR HY000: Illegal mix of collations (utf8mb3_general_ci,IMPLICIT) and (utf8mb4_general_ci,COERCIBLE) for operation '='
#
# Check ref access on mb3_field=mb4_field
#
explain format=json
select * from t10,t1 where t10.mb4=t1.mb3;
EXPLAIN
{
"query_block": {
"select_id": 1,
2023-10-23 17:43:36 +02:00
"nested_loop": [
{
"table": {
"table_name": "t10",
"access_type": "ALL",
"rows": 4,
"filtered": 100,
"attached_condition": "t10.mb4 is not null"
}
},
{
"table": {
"table_name": "t1",
"access_type": "ref",
"possible_keys": ["mb3"],
"key": "mb3",
"key_length": "99",
"used_key_parts": ["mb3"],
"ref": ["test.t10.mb4"],
"rows": 1,
"filtered": 100,
"index_condition": "t1.mb3 = t10.mb4"
}
}
]
MDEV-32113: utf8mb3_key_col=utf8mb4_value cannot be used for ref
(Variant#3: Allow cross-charset comparisons, use a special
CHARSET_INFO to create lookup keys. Review input addressed.)
Equalities that compare utf8mb{3,4}_general_ci strings, like:
WHERE ... utf8mb3_key_col=utf8mb4_value (MB3-4-CMP)
can now be used to construct ref[const] access and also participate
in multiple-equalities.
This means that utf8mb3_key_col can be used for key-lookups when
compared with an utf8mb4 constant, field or expression using '=' or
'<=>' comparison operators.
This is controlled by optimizer_switch='cset_narrowing=on', which is
OFF by default.
IMPLEMENTATION
Item value comparison in (MB3-4-CMP) is done using utf8mb4_general_ci.
This is valid as any utf8mb3 value is also an utf8mb4 value.
When making index lookup value for utf8mb3_key_col, we do "Charset
Narrowing": characters that are in the Basic Multilingual Plane (=BMP) are
copied as-is, as they can be represented in utf8mb3. Characters that are
outside the BMP cannot be represented in utf8mb3 and are replaced
with U+FFFD, the "Replacement Character".
In utf8mb4_general_ci, the Replacement Character compares as equal to any
character that's not in BMP. Because of this, the constructed lookup value
will find all index records that would be considered equal by the original
condition (MB3-4-CMP).
Approved-by: Monty <monty@mariadb.org>
2023-09-19 17:22:49 +02:00
}
}
select * from t10,t1 where t10.mb4=t1.mb3;
pk mb4name mb4 mb3name mb3
1 mb4-question-mark ? mb3-question-mark ?
2 mb4-replacement-char <09> mb3-replacement-char <09>
3 mb4-smiley 😊 mb3-replacement-char <09>
4 1 1 1 1
select * from t10,t1 use index() where t10.mb4=t1.mb3;
pk mb4name mb4 mb3name mb3
4 1 1 1 1
1 mb4-question-mark ? mb3-question-mark ?
2 mb4-replacement-char <09> mb3-replacement-char <09>
3 mb4-smiley 😊 mb3-replacement-char <09>
explain format=json
select * from t10,t1 where t10.mb4<=>t1.mb3;
EXPLAIN
{
"query_block": {
"select_id": 1,
2023-10-23 17:43:36 +02:00
"nested_loop": [
{
"table": {
"table_name": "t10",
"access_type": "ALL",
"rows": 4,
"filtered": 100
}
},
{
"table": {
"table_name": "t1",
"access_type": "ref",
"possible_keys": ["mb3"],
"key": "mb3",
"key_length": "99",
"used_key_parts": ["mb3"],
"ref": ["test.t10.mb4"],
"rows": 1,
"filtered": 100,
"index_condition": "t10.mb4 <=> t1.mb3"
}
}
]
MDEV-32113: utf8mb3_key_col=utf8mb4_value cannot be used for ref
(Variant#3: Allow cross-charset comparisons, use a special
CHARSET_INFO to create lookup keys. Review input addressed.)
Equalities that compare utf8mb{3,4}_general_ci strings, like:
WHERE ... utf8mb3_key_col=utf8mb4_value (MB3-4-CMP)
can now be used to construct ref[const] access and also participate
in multiple-equalities.
This means that utf8mb3_key_col can be used for key-lookups when
compared with an utf8mb4 constant, field or expression using '=' or
'<=>' comparison operators.
This is controlled by optimizer_switch='cset_narrowing=on', which is
OFF by default.
IMPLEMENTATION
Item value comparison in (MB3-4-CMP) is done using utf8mb4_general_ci.
This is valid as any utf8mb3 value is also an utf8mb4 value.
When making index lookup value for utf8mb3_key_col, we do "Charset
Narrowing": characters that are in the Basic Multilingual Plane (=BMP) are
copied as-is, as they can be represented in utf8mb3. Characters that are
outside the BMP cannot be represented in utf8mb3 and are replaced
with U+FFFD, the "Replacement Character".
In utf8mb4_general_ci, the Replacement Character compares as equal to any
character that's not in BMP. Because of this, the constructed lookup value
will find all index records that would be considered equal by the original
condition (MB3-4-CMP).
Approved-by: Monty <monty@mariadb.org>
2023-09-19 17:22:49 +02:00
}
}
select * from t10,t1 where t10.mb4<=>t1.mb3;
pk mb4name mb4 mb3name mb3
1 mb4-question-mark ? mb3-question-mark ?
2 mb4-replacement-char <09> mb3-replacement-char <09>
3 mb4-smiley 😊 mb3-replacement-char <09>
4 1 1 1 1
set statement optimizer_switch='cset_narrowing=off', join_cache_level=0 for
explain format=json
select * from t10,t1 where t10.mb4=t1.mb3;
EXPLAIN
{
"query_block": {
"select_id": 1,
2023-10-23 17:43:36 +02:00
"nested_loop": [
{
"table": {
"table_name": "t10",
"access_type": "ALL",
"rows": 4,
"filtered": 100
}
},
{
"table": {
"table_name": "t1",
"access_type": "ALL",
"rows": 10002,
"filtered": 100,
"attached_condition": "t10.mb4 = convert(t1.mb3 using utf8mb4)"
}
}
]
MDEV-32113: utf8mb3_key_col=utf8mb4_value cannot be used for ref
(Variant#3: Allow cross-charset comparisons, use a special
CHARSET_INFO to create lookup keys. Review input addressed.)
Equalities that compare utf8mb{3,4}_general_ci strings, like:
WHERE ... utf8mb3_key_col=utf8mb4_value (MB3-4-CMP)
can now be used to construct ref[const] access and also participate
in multiple-equalities.
This means that utf8mb3_key_col can be used for key-lookups when
compared with an utf8mb4 constant, field or expression using '=' or
'<=>' comparison operators.
This is controlled by optimizer_switch='cset_narrowing=on', which is
OFF by default.
IMPLEMENTATION
Item value comparison in (MB3-4-CMP) is done using utf8mb4_general_ci.
This is valid as any utf8mb3 value is also an utf8mb4 value.
When making index lookup value for utf8mb3_key_col, we do "Charset
Narrowing": characters that are in the Basic Multilingual Plane (=BMP) are
copied as-is, as they can be represented in utf8mb3. Characters that are
outside the BMP cannot be represented in utf8mb3 and are replaced
with U+FFFD, the "Replacement Character".
In utf8mb4_general_ci, the Replacement Character compares as equal to any
character that's not in BMP. Because of this, the constructed lookup value
will find all index records that would be considered equal by the original
condition (MB3-4-CMP).
Approved-by: Monty <monty@mariadb.org>
2023-09-19 17:22:49 +02:00
}
}
#
# Check ref access on mb3_field=mb4_expr
#
explain format=json
select * from t10,t1 where t1.mb3=concat('',t10.mb4);
EXPLAIN
{
"query_block": {
"select_id": 1,
2023-10-23 17:43:36 +02:00
"nested_loop": [
{
"table": {
"table_name": "t10",
"access_type": "ALL",
"rows": 4,
"filtered": 100
}
},
{
"table": {
"table_name": "t1",
"access_type": "ref",
"possible_keys": ["mb3"],
"key": "mb3",
"key_length": "99",
"used_key_parts": ["mb3"],
"ref": ["func"],
"rows": 1,
"filtered": 100,
"index_condition": "t1.mb3 = concat('',t10.mb4)"
}
}
]
MDEV-32113: utf8mb3_key_col=utf8mb4_value cannot be used for ref
(Variant#3: Allow cross-charset comparisons, use a special
CHARSET_INFO to create lookup keys. Review input addressed.)
Equalities that compare utf8mb{3,4}_general_ci strings, like:
WHERE ... utf8mb3_key_col=utf8mb4_value (MB3-4-CMP)
can now be used to construct ref[const] access and also participate
in multiple-equalities.
This means that utf8mb3_key_col can be used for key-lookups when
compared with an utf8mb4 constant, field or expression using '=' or
'<=>' comparison operators.
This is controlled by optimizer_switch='cset_narrowing=on', which is
OFF by default.
IMPLEMENTATION
Item value comparison in (MB3-4-CMP) is done using utf8mb4_general_ci.
This is valid as any utf8mb3 value is also an utf8mb4 value.
When making index lookup value for utf8mb3_key_col, we do "Charset
Narrowing": characters that are in the Basic Multilingual Plane (=BMP) are
copied as-is, as they can be represented in utf8mb3. Characters that are
outside the BMP cannot be represented in utf8mb3 and are replaced
with U+FFFD, the "Replacement Character".
In utf8mb4_general_ci, the Replacement Character compares as equal to any
character that's not in BMP. Because of this, the constructed lookup value
will find all index records that would be considered equal by the original
condition (MB3-4-CMP).
Approved-by: Monty <monty@mariadb.org>
2023-09-19 17:22:49 +02:00
}
}
select * from t10,t1 where t1.mb3=concat('',t10.mb4);
pk mb4name mb4 mb3name mb3
1 mb4-question-mark ? mb3-question-mark ?
2 mb4-replacement-char <09> mb3-replacement-char <09>
3 mb4-smiley 😊 mb3-replacement-char <09>
4 1 1 1 1
select * from t10,t1 use index() where t1.mb3=concat('',t10.mb4);
pk mb4name mb4 mb3name mb3
4 1 1 1 1
1 mb4-question-mark ? mb3-question-mark ?
2 mb4-replacement-char <09> mb3-replacement-char <09>
3 mb4-smiley 😊 mb3-replacement-char <09>
# Check that ref optimizer gets the right constant.
# We need a const table for that, because key=const is handled by
# coercing the constant.
#
# So, we take the smiley:
select * from t10 where t10.pk=3;
pk mb4name mb4
3 mb4-smiley 😊
set optimizer_trace=1;
# And see that we've got the Replacement Character in the ranges:
explain
select * from t10, t1 where t10.mb4=t1.mb3 and t10.pk=3;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t10 const PRIMARY PRIMARY 4 const 1
1 SIMPLE t1 ref mb3 mb3 99 const 3 Using index condition
select
json_detailed(json_extract(trace, '$**.range_scan_alternatives')) as JS
from
information_schema.optimizer_trace;
JS
[
[
{
"index": "mb3",
"ranges":
["(<28> ) <= (mb3) <= (<28> )"],
"rowid_ordered": true,
"using_mrr": false,
"index_only": false,
"rows": 3,
"cost": 3.760377105,
"chosen": true
}
]
]
select * from t10, t1 where t10.mb4=t1.mb3 and t10.pk=3;
pk mb4name mb4 mb3name mb3
3 mb4-smiley 😊 mb3-replacement-char <09>
#
# Will range optimizer handle t1.mb3>t10.mb4? No...
#
explain format=json
select * from t10, t1 where (t1.mb3=t10.mb4 or t1.mb3='hello') and t10.pk=3;
EXPLAIN
{
"query_block": {
"select_id": 1,
2023-10-23 17:43:36 +02:00
"nested_loop": [
{
"table": {
"table_name": "t10",
"access_type": "const",
"possible_keys": ["PRIMARY"],
"key": "PRIMARY",
"key_length": "4",
"used_key_parts": ["pk"],
"ref": ["const"],
"rows": 1,
"filtered": 100
}
},
{
"table": {
"table_name": "t1",
"access_type": "range",
"possible_keys": ["mb3"],
"key": "mb3",
"key_length": "99",
"used_key_parts": ["mb3"],
"rows": 4,
"filtered": 100,
"index_condition": "t1.mb3 = '????' or t1.mb3 = 'hello'"
}
}
]
MDEV-32113: utf8mb3_key_col=utf8mb4_value cannot be used for ref
(Variant#3: Allow cross-charset comparisons, use a special
CHARSET_INFO to create lookup keys. Review input addressed.)
Equalities that compare utf8mb{3,4}_general_ci strings, like:
WHERE ... utf8mb3_key_col=utf8mb4_value (MB3-4-CMP)
can now be used to construct ref[const] access and also participate
in multiple-equalities.
This means that utf8mb3_key_col can be used for key-lookups when
compared with an utf8mb4 constant, field or expression using '=' or
'<=>' comparison operators.
This is controlled by optimizer_switch='cset_narrowing=on', which is
OFF by default.
IMPLEMENTATION
Item value comparison in (MB3-4-CMP) is done using utf8mb4_general_ci.
This is valid as any utf8mb3 value is also an utf8mb4 value.
When making index lookup value for utf8mb3_key_col, we do "Charset
Narrowing": characters that are in the Basic Multilingual Plane (=BMP) are
copied as-is, as they can be represented in utf8mb3. Characters that are
outside the BMP cannot be represented in utf8mb3 and are replaced
with U+FFFD, the "Replacement Character".
In utf8mb4_general_ci, the Replacement Character compares as equal to any
character that's not in BMP. Because of this, the constructed lookup value
will find all index records that would be considered equal by the original
condition (MB3-4-CMP).
Approved-by: Monty <monty@mariadb.org>
2023-09-19 17:22:49 +02:00
}
}
explain format=json
select * from t10, t1 where t1.mb3>t10.mb4 and t10.pk=3;
EXPLAIN
{
"query_block": {
"select_id": 1,
2023-10-23 17:43:36 +02:00
"nested_loop": [
{
"table": {
"table_name": "t10",
"access_type": "const",
"possible_keys": ["PRIMARY"],
"key": "PRIMARY",
"key_length": "4",
"used_key_parts": ["pk"],
"ref": ["const"],
"rows": 1,
"filtered": 100
}
},
{
"table": {
"table_name": "t1",
"access_type": "ALL",
"rows": 10002,
"filtered": 100,
"attached_condition": "convert(t1.mb3 using utf8mb4) > '????'"
}
}
]
MDEV-32113: utf8mb3_key_col=utf8mb4_value cannot be used for ref
(Variant#3: Allow cross-charset comparisons, use a special
CHARSET_INFO to create lookup keys. Review input addressed.)
Equalities that compare utf8mb{3,4}_general_ci strings, like:
WHERE ... utf8mb3_key_col=utf8mb4_value (MB3-4-CMP)
can now be used to construct ref[const] access and also participate
in multiple-equalities.
This means that utf8mb3_key_col can be used for key-lookups when
compared with an utf8mb4 constant, field or expression using '=' or
'<=>' comparison operators.
This is controlled by optimizer_switch='cset_narrowing=on', which is
OFF by default.
IMPLEMENTATION
Item value comparison in (MB3-4-CMP) is done using utf8mb4_general_ci.
This is valid as any utf8mb3 value is also an utf8mb4 value.
When making index lookup value for utf8mb3_key_col, we do "Charset
Narrowing": characters that are in the Basic Multilingual Plane (=BMP) are
copied as-is, as they can be represented in utf8mb3. Characters that are
outside the BMP cannot be represented in utf8mb3 and are replaced
with U+FFFD, the "Replacement Character".
In utf8mb4_general_ci, the Replacement Character compares as equal to any
character that's not in BMP. Because of this, the constructed lookup value
will find all index records that would be considered equal by the original
condition (MB3-4-CMP).
Approved-by: Monty <monty@mariadb.org>
2023-09-19 17:22:49 +02:00
}
}
# For comparison, it will handle it when collations match:
create table t2 (
mb4name varchar(32),
mb4 varchar(32) collate utf8mb4_general_ci,
key(mb4)
);
insert into t2 select * from t1;
explain format=json
select * from t10, t2 where t2.mb4>t10.mb4 and t10.pk=3;
EXPLAIN
{
"query_block": {
"select_id": 1,
2023-10-23 17:43:36 +02:00
"nested_loop": [
{
"table": {
"table_name": "t10",
"access_type": "const",
"possible_keys": ["PRIMARY"],
"key": "PRIMARY",
"key_length": "4",
"used_key_parts": ["pk"],
"ref": ["const"],
"rows": 1,
"filtered": 100
}
},
{
"table": {
"table_name": "t2",
"access_type": "range",
"possible_keys": ["mb4"],
"key": "mb4",
"key_length": "131",
"used_key_parts": ["mb4"],
"rows": 3,
"filtered": 100,
"index_condition": "t2.mb4 > '????'"
}
}
]
MDEV-32113: utf8mb3_key_col=utf8mb4_value cannot be used for ref
(Variant#3: Allow cross-charset comparisons, use a special
CHARSET_INFO to create lookup keys. Review input addressed.)
Equalities that compare utf8mb{3,4}_general_ci strings, like:
WHERE ... utf8mb3_key_col=utf8mb4_value (MB3-4-CMP)
can now be used to construct ref[const] access and also participate
in multiple-equalities.
This means that utf8mb3_key_col can be used for key-lookups when
compared with an utf8mb4 constant, field or expression using '=' or
'<=>' comparison operators.
This is controlled by optimizer_switch='cset_narrowing=on', which is
OFF by default.
IMPLEMENTATION
Item value comparison in (MB3-4-CMP) is done using utf8mb4_general_ci.
This is valid as any utf8mb3 value is also an utf8mb4 value.
When making index lookup value for utf8mb3_key_col, we do "Charset
Narrowing": characters that are in the Basic Multilingual Plane (=BMP) are
copied as-is, as they can be represented in utf8mb3. Characters that are
outside the BMP cannot be represented in utf8mb3 and are replaced
with U+FFFD, the "Replacement Character".
In utf8mb4_general_ci, the Replacement Character compares as equal to any
character that's not in BMP. Because of this, the constructed lookup value
will find all index records that would be considered equal by the original
condition (MB3-4-CMP).
Approved-by: Monty <monty@mariadb.org>
2023-09-19 17:22:49 +02:00
}
}
#
# Check multiple equalities
#
# - ref acccess lookup keys do use equality substitution,
# - concat() arguments don't
explain format=json
select straight_join * from t10,t1 force index(mb3),t2
where
t1.mb3=t2.mb4 and t2.mb4=t10.mb4 and concat(t1.mb3, t2.mb4, t10.mb4)<>'Bebebe';
EXPLAIN
{
"query_block": {
"select_id": 1,
2023-10-23 17:43:36 +02:00
"nested_loop": [
{
"table": {
"table_name": "t10",
"access_type": "ALL",
"rows": 4,
"filtered": 100,
"attached_condition": "t10.mb4 is not null and t10.mb4 is not null"
}
},
{
"table": {
"table_name": "t1",
"access_type": "ref",
"possible_keys": ["mb3"],
"key": "mb3",
"key_length": "99",
"used_key_parts": ["mb3"],
"ref": ["test.t10.mb4"],
"rows": 1,
"filtered": 100,
"index_condition": "t1.mb3 = t10.mb4"
}
},
{
"table": {
"table_name": "t2",
"access_type": "ref",
"possible_keys": ["mb4"],
"key": "mb4",
"key_length": "131",
"used_key_parts": ["mb4"],
"ref": ["test.t10.mb4"],
"rows": 1,
"filtered": 100,
"index_condition": "concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe'"
}
}
]
MDEV-32113: utf8mb3_key_col=utf8mb4_value cannot be used for ref
(Variant#3: Allow cross-charset comparisons, use a special
CHARSET_INFO to create lookup keys. Review input addressed.)
Equalities that compare utf8mb{3,4}_general_ci strings, like:
WHERE ... utf8mb3_key_col=utf8mb4_value (MB3-4-CMP)
can now be used to construct ref[const] access and also participate
in multiple-equalities.
This means that utf8mb3_key_col can be used for key-lookups when
compared with an utf8mb4 constant, field or expression using '=' or
'<=>' comparison operators.
This is controlled by optimizer_switch='cset_narrowing=on', which is
OFF by default.
IMPLEMENTATION
Item value comparison in (MB3-4-CMP) is done using utf8mb4_general_ci.
This is valid as any utf8mb3 value is also an utf8mb4 value.
When making index lookup value for utf8mb3_key_col, we do "Charset
Narrowing": characters that are in the Basic Multilingual Plane (=BMP) are
copied as-is, as they can be represented in utf8mb3. Characters that are
outside the BMP cannot be represented in utf8mb3 and are replaced
with U+FFFD, the "Replacement Character".
In utf8mb4_general_ci, the Replacement Character compares as equal to any
character that's not in BMP. Because of this, the constructed lookup value
will find all index records that would be considered equal by the original
condition (MB3-4-CMP).
Approved-by: Monty <monty@mariadb.org>
2023-09-19 17:22:49 +02:00
}
}
select json_detailed(json_extract(trace, '$**.condition_processing')) as JS
from information_schema.optimizer_trace;
JS
[
{
"condition": "WHERE",
"original_condition": "t1.mb3 = t2.mb4 and t2.mb4 = t10.mb4 and concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe'",
"steps":
[
{
"transformation": "equality_propagation",
"resulting_condition": "concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe' and multiple equal(t1.mb3, t2.mb4, t10.mb4)"
},
{
"transformation": "constant_propagation",
"resulting_condition": "concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe' and multiple equal(t1.mb3, t2.mb4, t10.mb4)"
},
{
"transformation": "trivial_condition_removal",
"resulting_condition": "concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe' and multiple equal(t1.mb3, t2.mb4, t10.mb4)"
}
]
}
]
select straight_join * from t10,t1 force index(mb3),t2
where
t1.mb3=t2.mb4 and t2.mb4=t10.mb4 and concat(t1.mb3, t2.mb4, t10.mb4)<>'Bebebe';
pk mb4name mb4 mb3name mb3 mb4name mb4
1 mb4-question-mark ? mb3-question-mark ? mb3-question-mark ?
2 mb4-replacement-char <09> mb3-replacement-char <09> mb3-replacement-char <09>
3 mb4-smiley 😊 mb3-replacement-char <09> mb3-replacement-char <09>
4 1 1 1 1 1 1
# Equality substitution doesn't happen for constants, for both narrowing
# and non-narrowing comparisons:
explain format=json
select * from t10,t1,t2
where
t1.mb3=t2.mb4 and t2.mb4=t10.mb4 and t10.mb4='hello' and
concat(t1.mb3, t2.mb4, t10.mb4)<>'Bebebe';
EXPLAIN
{
"query_block": {
"select_id": 1,
2023-10-23 17:43:36 +02:00
"nested_loop": [
{
"table": {
"table_name": "t10",
"access_type": "ALL",
"rows": 4,
"filtered": 25,
"attached_condition": "t10.mb4 = 'hello'"
}
},
{
"table": {
"table_name": "t1",
"access_type": "ref",
"possible_keys": ["mb3"],
"key": "mb3",
"key_length": "99",
"used_key_parts": ["mb3"],
"ref": ["const"],
"rows": 1,
"filtered": 100,
"index_condition": "t1.mb3 = t10.mb4"
}
},
{
"table": {
"table_name": "t2",
"access_type": "ref",
"possible_keys": ["mb4"],
"key": "mb4",
"key_length": "131",
"used_key_parts": ["mb4"],
"ref": ["const"],
"rows": 1,
"filtered": 100,
"index_condition": "t2.mb4 = t10.mb4 and concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe'"
}
}
]
MDEV-32113: utf8mb3_key_col=utf8mb4_value cannot be used for ref
(Variant#3: Allow cross-charset comparisons, use a special
CHARSET_INFO to create lookup keys. Review input addressed.)
Equalities that compare utf8mb{3,4}_general_ci strings, like:
WHERE ... utf8mb3_key_col=utf8mb4_value (MB3-4-CMP)
can now be used to construct ref[const] access and also participate
in multiple-equalities.
This means that utf8mb3_key_col can be used for key-lookups when
compared with an utf8mb4 constant, field or expression using '=' or
'<=>' comparison operators.
This is controlled by optimizer_switch='cset_narrowing=on', which is
OFF by default.
IMPLEMENTATION
Item value comparison in (MB3-4-CMP) is done using utf8mb4_general_ci.
This is valid as any utf8mb3 value is also an utf8mb4 value.
When making index lookup value for utf8mb3_key_col, we do "Charset
Narrowing": characters that are in the Basic Multilingual Plane (=BMP) are
copied as-is, as they can be represented in utf8mb3. Characters that are
outside the BMP cannot be represented in utf8mb3 and are replaced
with U+FFFD, the "Replacement Character".
In utf8mb4_general_ci, the Replacement Character compares as equal to any
character that's not in BMP. Because of this, the constructed lookup value
will find all index records that would be considered equal by the original
condition (MB3-4-CMP).
Approved-by: Monty <monty@mariadb.org>
2023-09-19 17:22:49 +02:00
}
}
select json_detailed(json_extract(trace, '$**.condition_processing')) as JS
from information_schema.optimizer_trace;
JS
[
{
"condition": "WHERE",
"original_condition": "t1.mb3 = t2.mb4 and t2.mb4 = t10.mb4 and t10.mb4 = 'hello' and concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe'",
"steps":
[
{
"transformation": "equality_propagation",
"resulting_condition": "t10.mb4 = 'hello' and concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe' and multiple equal(t1.mb3, t2.mb4, t10.mb4)"
},
{
"transformation": "constant_propagation",
"resulting_condition": "t10.mb4 = 'hello' and concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe' and multiple equal(t1.mb3, t2.mb4, t10.mb4)"
},
{
"transformation": "trivial_condition_removal",
"resulting_condition": "t10.mb4 = 'hello' and concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe' and multiple equal(t1.mb3, t2.mb4, t10.mb4)"
}
]
}
]
drop table t2;
drop table t1, t10;
set optimizer_switch=@tmp_csetn_os;