set @tmp_csetn_os= @@optimizer_switch, optimizer_switch='cset_narrowing=on'; set names utf8mb4; create table t1 ( mb3name varchar(32), mb3 varchar(32) collate utf8mb3_general_ci, key(mb3) ); insert into t1 select seq, seq from seq_1_to_10000; insert into t1 values ('mb3-question-mark', '?'); insert into t1 values ('mb3-replacement-char', _utf8mb3 0xEFBFBD); create table t10 ( pk int auto_increment primary key, mb4name varchar(32), mb4 varchar(32) character set utf8mb4 collate utf8mb4_general_ci ); insert into t10 (mb4name, mb4) values ('mb4-question-mark','?'), ('mb4-replacement-char', _utf8mb4 0xEFBFBD), ('mb4-smiley', _utf8mb4 0xF09F988A), ('1', '1'); analyze table t1,t10 persistent for all; Table Op Msg_type Msg_text test.t1 analyze status Engine-independent statistics collected test.t1 analyze status OK test.t10 analyze status Engine-independent statistics collected test.t10 analyze status OK # # Check that constants are already handled: the following should use # ref/range, because constants are converted into utf8mb3. # select collation('abc'); collation('abc') utf8mb4_uca1400_ai_ci explain select * from t1 force index (mb3) where t1.mb3='abc'; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 ref mb3 mb3 99 const 1 Using index condition explain select * from t1 force index (mb3) where t1.mb3 in ('abc','cde','xyz'); id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 range mb3 mb3 99 NULL 3 Using index condition explain select * from t1 force index (mb3) where t1.mb3 between 'abc' and 'acc'; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 range mb3 mb3 99 NULL 1 Using index condition explain select * from t1 force index (mb3) where t1.mb3 <'000'; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 range mb3 mb3 99 NULL 1 Using index condition # If a constant can't be represented in utf8mb3, an error is produced: explain select * from t1 force index (mb3) where t1.mb3='😊'; ERROR HY000: Illegal mix of collations (utf8mb3_general_ci,IMPLICIT) and (utf8mb4_uca1400_ai_ci,COERCIBLE) for operation '=' # # Check ref access on mb3_field=mb4_field # explain format=json select * from t10,t1 where t10.mb4=t1.mb3; EXPLAIN { "query_block": { "select_id": 1, "cost": "COST_REPLACED", "nested_loop": [ { "table": { "table_name": "t10", "access_type": "ALL", "loops": 1, "rows": 4, "cost": "COST_REPLACED", "filtered": 100, "attached_condition": "t10.mb4 is not null" } }, { "table": { "table_name": "t1", "access_type": "ref", "possible_keys": ["mb3"], "key": "mb3", "key_length": "99", "used_key_parts": ["mb3"], "ref": ["test.t10.mb4"], "loops": 4, "rows": 1, "cost": "COST_REPLACED", "filtered": 100, "index_condition": "t1.mb3 = t10.mb4" } } ] } } select * from t10,t1 where t10.mb4=t1.mb3; pk mb4name mb4 mb3name mb3 1 mb4-question-mark ? mb3-question-mark ? 2 mb4-replacement-char � mb3-replacement-char � 3 mb4-smiley 😊 mb3-replacement-char � 4 1 1 1 1 select * from t10,t1 use index() where t10.mb4=t1.mb3; pk mb4name mb4 mb3name mb3 4 1 1 1 1 1 mb4-question-mark ? mb3-question-mark ? 2 mb4-replacement-char � mb3-replacement-char � 3 mb4-smiley 😊 mb3-replacement-char � explain format=json select * from t10,t1 where t10.mb4<=>t1.mb3; EXPLAIN { "query_block": { "select_id": 1, "cost": "COST_REPLACED", "nested_loop": [ { "table": { "table_name": "t10", "access_type": "ALL", "loops": 1, "rows": 4, "cost": "COST_REPLACED", "filtered": 100 } }, { "table": { "table_name": "t1", "access_type": "ref", "possible_keys": ["mb3"], "key": "mb3", "key_length": "99", "used_key_parts": ["mb3"], "ref": ["test.t10.mb4"], "loops": 4, "rows": 1, "cost": "COST_REPLACED", "filtered": 100, "index_condition": "t10.mb4 <=> t1.mb3" } } ] } } select * from t10,t1 where t10.mb4<=>t1.mb3; pk mb4name mb4 mb3name mb3 1 mb4-question-mark ? mb3-question-mark ? 2 mb4-replacement-char � mb3-replacement-char � 3 mb4-smiley 😊 mb3-replacement-char � 4 1 1 1 1 set statement optimizer_switch='cset_narrowing=off', join_cache_level=0 for explain format=json select * from t10,t1 where t10.mb4=t1.mb3; EXPLAIN { "query_block": { "select_id": 1, "cost": "COST_REPLACED", "nested_loop": [ { "table": { "table_name": "t10", "access_type": "ALL", "loops": 1, "rows": 4, "cost": "COST_REPLACED", "filtered": 100 } }, { "table": { "table_name": "t1", "access_type": "ALL", "loops": 4, "rows": 10002, "cost": "COST_REPLACED", "filtered": 100, "attached_condition": "t10.mb4 = convert(t1.mb3 using utf8mb4)" } } ] } } # # Check ref access on mb3_field=mb4_expr # explain format=json select * from t10,t1 where t1.mb3=concat('',t10.mb4); EXPLAIN { "query_block": { "select_id": 1, "cost": "COST_REPLACED", "nested_loop": [ { "table": { "table_name": "t10", "access_type": "ALL", "loops": 1, "rows": 4, "cost": "COST_REPLACED", "filtered": 100 } }, { "table": { "table_name": "t1", "access_type": "ref", "possible_keys": ["mb3"], "key": "mb3", "key_length": "99", "used_key_parts": ["mb3"], "ref": ["func"], "loops": 4, "rows": 1, "cost": "COST_REPLACED", "filtered": 100, "index_condition": "t1.mb3 = concat('',t10.mb4)" } } ] } } select * from t10,t1 where t1.mb3=concat('',t10.mb4); pk mb4name mb4 mb3name mb3 1 mb4-question-mark ? mb3-question-mark ? 2 mb4-replacement-char � mb3-replacement-char � 3 mb4-smiley 😊 mb3-replacement-char � 4 1 1 1 1 select * from t10,t1 use index() where t1.mb3=concat('',t10.mb4); pk mb4name mb4 mb3name mb3 4 1 1 1 1 1 mb4-question-mark ? mb3-question-mark ? 2 mb4-replacement-char � mb3-replacement-char � 3 mb4-smiley 😊 mb3-replacement-char � # Check that ref optimizer gets the right constant. # We need a const table for that, because key=const is handled by # coercing the constant. # # So, we take the smiley: select * from t10 where t10.pk=3; pk mb4name mb4 3 mb4-smiley 😊 set optimizer_trace=1; # And see that we've got the Replacement Character in the ranges: explain select * from t10, t1 where t10.mb4=t1.mb3 and t10.pk=3; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t10 const PRIMARY PRIMARY 4 const 1 1 SIMPLE t1 ref mb3 mb3 99 const 3 Using index condition select json_detailed(json_extract(trace, '$**.range_scan_alternatives')) as JS from information_schema.optimizer_trace; JS [ [ { "index": "mb3", "ranges": ["(�) <= (mb3) <= (�)"], "rowid_ordered": true, "using_mrr": false, "index_only": false, "rows": 3, "cost": "COST_REPLACED", "chosen": true } ] ] select * from t10, t1 where t10.mb4=t1.mb3 and t10.pk=3; pk mb4name mb4 mb3name mb3 3 mb4-smiley 😊 mb3-replacement-char � # # Will range optimizer handle t1.mb3>t10.mb4? No... # explain format=json select * from t10, t1 where (t1.mb3=t10.mb4 or t1.mb3='hello') and t10.pk=3; EXPLAIN { "query_block": { "select_id": 1, "cost": "COST_REPLACED", "nested_loop": [ { "table": { "table_name": "t10", "access_type": "const", "possible_keys": ["PRIMARY"], "key": "PRIMARY", "key_length": "4", "used_key_parts": ["pk"], "ref": ["const"], "rows": 1, "filtered": 100 } }, { "table": { "table_name": "t1", "access_type": "range", "possible_keys": ["mb3"], "key": "mb3", "key_length": "99", "used_key_parts": ["mb3"], "loops": 1, "rows": 4, "cost": "COST_REPLACED", "filtered": 100, "index_condition": "t1.mb3 = '????' or t1.mb3 = 'hello'" } } ] } } explain format=json select * from t10, t1 where t1.mb3>t10.mb4 and t10.pk=3; EXPLAIN { "query_block": { "select_id": 1, "cost": "COST_REPLACED", "nested_loop": [ { "table": { "table_name": "t10", "access_type": "const", "possible_keys": ["PRIMARY"], "key": "PRIMARY", "key_length": "4", "used_key_parts": ["pk"], "ref": ["const"], "rows": 1, "filtered": 100 } }, { "table": { "table_name": "t1", "access_type": "ALL", "loops": 1, "rows": 10002, "cost": "COST_REPLACED", "filtered": 100, "attached_condition": "convert(t1.mb3 using utf8mb4) > '????'" } } ] } } # For comparison, it will handle it when collations match: create table t2 ( mb4name varchar(32), mb4 varchar(32) collate utf8mb4_general_ci, key(mb4) ); insert into t2 select * from t1; explain format=json select * from t10, t2 where t2.mb4>t10.mb4 and t10.pk=3; EXPLAIN { "query_block": { "select_id": 1, "cost": "COST_REPLACED", "nested_loop": [ { "table": { "table_name": "t10", "access_type": "const", "possible_keys": ["PRIMARY"], "key": "PRIMARY", "key_length": "4", "used_key_parts": ["pk"], "ref": ["const"], "rows": 1, "filtered": 100 } }, { "table": { "table_name": "t2", "access_type": "range", "possible_keys": ["mb4"], "key": "mb4", "key_length": "131", "used_key_parts": ["mb4"], "loops": 1, "rows": 3, "cost": "COST_REPLACED", "filtered": 100, "index_condition": "t2.mb4 > '????'" } } ] } } # # Check multiple equalities # # - ref acccess lookup keys do use equality substitution, # - concat() arguments don't explain format=json select straight_join * from t10,t1 force index(mb3),t2 where t1.mb3=t2.mb4 and t2.mb4=t10.mb4 and concat(t1.mb3, t2.mb4, t10.mb4)<>'Bebebe'; EXPLAIN { "query_block": { "select_id": 1, "cost": "COST_REPLACED", "nested_loop": [ { "table": { "table_name": "t10", "access_type": "ALL", "loops": 1, "rows": 4, "cost": "COST_REPLACED", "filtered": 100, "attached_condition": "t10.mb4 is not null and t10.mb4 is not null" } }, { "table": { "table_name": "t1", "access_type": "ref", "possible_keys": ["mb3"], "key": "mb3", "key_length": "99", "used_key_parts": ["mb3"], "ref": ["test.t10.mb4"], "loops": 4, "rows": 1, "cost": "COST_REPLACED", "filtered": 100, "index_condition": "t1.mb3 = t10.mb4" } }, { "table": { "table_name": "t2", "access_type": "ref", "possible_keys": ["mb4"], "key": "mb4", "key_length": "131", "used_key_parts": ["mb4"], "ref": ["test.t10.mb4"], "loops": 4, "rows": 1, "cost": "COST_REPLACED", "filtered": 100, "index_condition": "concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe'" } } ] } } select json_detailed(json_extract(trace, '$**.condition_processing')) as JS from information_schema.optimizer_trace; JS [ { "condition": "WHERE", "original_condition": "t1.mb3 = t2.mb4 and t2.mb4 = t10.mb4 and concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe'", "steps": [ { "transformation": "equality_propagation", "resulting_condition": "concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe' and multiple equal(t1.mb3, t2.mb4, t10.mb4)" }, { "transformation": "constant_propagation", "resulting_condition": "concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe' and multiple equal(t1.mb3, t2.mb4, t10.mb4)" }, { "transformation": "trivial_condition_removal", "resulting_condition": "concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe' and multiple equal(t1.mb3, t2.mb4, t10.mb4)" } ] } ] select straight_join * from t10,t1 force index(mb3),t2 where t1.mb3=t2.mb4 and t2.mb4=t10.mb4 and concat(t1.mb3, t2.mb4, t10.mb4)<>'Bebebe'; pk mb4name mb4 mb3name mb3 mb4name mb4 1 mb4-question-mark ? mb3-question-mark ? mb3-question-mark ? 2 mb4-replacement-char � mb3-replacement-char � mb3-replacement-char � 3 mb4-smiley 😊 mb3-replacement-char � mb3-replacement-char � 4 1 1 1 1 1 1 # Equality substitution doesn't happen for constants, for both narrowing # and non-narrowing comparisons: explain format=json select * from t10,t1,t2 where t1.mb3=t2.mb4 and t2.mb4=t10.mb4 and t10.mb4='hello' and concat(t1.mb3, t2.mb4, t10.mb4)<>'Bebebe'; EXPLAIN { "query_block": { "select_id": 1, "cost": "COST_REPLACED", "nested_loop": [ { "table": { "table_name": "t10", "access_type": "ALL", "loops": 1, "rows": 4, "cost": "COST_REPLACED", "filtered": 25, "attached_condition": "t10.mb4 = 'hello'" } }, { "table": { "table_name": "t1", "access_type": "ref", "possible_keys": ["mb3"], "key": "mb3", "key_length": "99", "used_key_parts": ["mb3"], "ref": ["const"], "loops": 1, "rows": 1, "cost": "COST_REPLACED", "filtered": 100, "index_condition": "t1.mb3 = t10.mb4" } }, { "table": { "table_name": "t2", "access_type": "ref", "possible_keys": ["mb4"], "key": "mb4", "key_length": "131", "used_key_parts": ["mb4"], "ref": ["const"], "loops": 1, "rows": 1, "cost": "COST_REPLACED", "filtered": 100, "index_condition": "t2.mb4 = t10.mb4 and concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe'" } } ] } } select json_detailed(json_extract(trace, '$**.condition_processing')) as JS from information_schema.optimizer_trace; JS [ { "condition": "WHERE", "original_condition": "t1.mb3 = t2.mb4 and t2.mb4 = t10.mb4 and t10.mb4 = 'hello' and concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe'", "steps": [ { "transformation": "equality_propagation", "resulting_condition": "t10.mb4 = 'hello' and concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe' and multiple equal(t1.mb3, t2.mb4, t10.mb4)" }, { "transformation": "constant_propagation", "resulting_condition": "t10.mb4 = 'hello' and concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe' and multiple equal(t1.mb3, t2.mb4, t10.mb4)" }, { "transformation": "trivial_condition_removal", "resulting_condition": "t10.mb4 = 'hello' and concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe' and multiple equal(t1.mb3, t2.mb4, t10.mb4)" } ] } ] drop table t2; drop table t1, t10; set optimizer_switch=@tmp_csetn_os;