MDEV-36765: followup 4: Fixups to previous fixes

- Add a testcase showing JSON_HB histograms handle multi-byte characters
  correctly.
- Make Item_func_json_unquote::val_str() handle situation where
  it is reading non-UTF8 "JSON" and transcoding it into UTF-8.
  (the JSON spec only allows UTF8 but MariaDB's implementation
   supports non-UTF8 as well)
- Make Item_func_json_search::compare_json_value_wild() handle
  json_unescape()'s return values in the same way its done in other
  places.
- Coding style fixes.
This commit is contained in:
Sergei Petrunia 2025-06-09 16:40:27 +03:00
commit cce76e7225
3 changed files with 284 additions and 5 deletions

View file

@ -7812,5 +7812,218 @@ min_value max_value hist_type
部門 部門
部門 JSON_HB
DROP TABLE t1;
DELETE FROM mysql.column_stats;
create table t1 (
col1 varchar(10) charset utf8
);
set names utf8;
select hex('б'), collation('б');
hex('б') collation('б')
D0B1 utf8mb3_general_ci
insert into t1 values
('а'),('б'),('в'),('г'),('д'),('е'),('ж'),('з'),('и'),('й');
analyze table t1 persistent for all;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK
select hex(col1) from t1;
hex(col1)
D0B0
D0B1
D0B2
D0B3
D0B4
D0B5
D0B6
D0B7
D0B8
D0B9
select json_detailed(json_extract(histogram, '$**.histogram_hb'))
from mysql.column_stats where db_name=database() and table_name='t1';
json_detailed(json_extract(histogram, '$**.histogram_hb'))
[
[
{
"start": "а",
"size": 0.1,
"ndv": 1
},
{
"start": "б",
"size": 0.1,
"ndv": 1
},
{
"start": "в",
"size": 0.1,
"ndv": 1
},
{
"start": "г",
"size": 0.1,
"ndv": 1
},
{
"start": "д",
"size": 0.1,
"ndv": 1
},
{
"start": "е",
"size": 0.1,
"ndv": 1
},
{
"start": "ж",
"size": 0.1,
"ndv": 1
},
{
"start": "з",
"size": 0.1,
"ndv": 1
},
{
"start": "и",
"size": 0.1,
"ndv": 1
},
{
"start": "й",
"end": "й",
"size": 0.1,
"ndv": 1
}
]
]
explain extended select * from t1 where col1 < 'а';
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10 10.00 Using where
Warnings:
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where `test`.`t1`.`col1` < 'а'
explain extended select * from t1 where col1 < 'в';
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10 20.00 Using where
Warnings:
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where `test`.`t1`.`col1` < 'в'
explain extended select * from t1 where col1 < 'д';
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10 40.00 Using where
Warnings:
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where `test`.`t1`.`col1` < 'д'
explain extended select * from t1 where col1 < 'ж';
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10 60.00 Using where
Warnings:
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where `test`.`t1`.`col1` < 'ж'
explain extended select * from t1 where col1 < 'й';
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10 90.00 Using where
Warnings:
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where `test`.`t1`.`col1` < 'й'
delete from t1;
insert into t1 values
('"а'),('"б'),('"в'),('"г'),('"д'),('"е'),('"ж'),('"з'),('"и'),('"й');
analyze table t1 persistent for all;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK
select json_detailed(json_extract(histogram, '$**.histogram_hb'))
from mysql.column_stats where db_name=database() and table_name='t1';
json_detailed(json_extract(histogram, '$**.histogram_hb'))
[
[
{
"start": "\"а",
"size": 0.1,
"ndv": 1
},
{
"start": "\"б",
"size": 0.1,
"ndv": 1
},
{
"start": "\"в",
"size": 0.1,
"ndv": 1
},
{
"start": "\"г",
"size": 0.1,
"ndv": 1
},
{
"start": "\"д",
"size": 0.1,
"ndv": 1
},
{
"start": "\"е",
"size": 0.1,
"ndv": 1
},
{
"start": "\"ж",
"size": 0.1,
"ndv": 1
},
{
"start": "\"з",
"size": 0.1,
"ndv": 1
},
{
"start": "\"и",
"size": 0.1,
"ndv": 1
},
{
"start": "\"й",
"end": "\"й",
"size": 0.1,
"ndv": 1
}
]
]
select hex(col1) from t1;
hex(col1)
22D0B9
22D0B8
22D0B7
22D0B6
22D0B5
22D0B4
22D0B3
22D0B2
22D0B1
22D0B0
explain extended select * from t1 where col1 < '"а';
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10 10.00 Using where
Warnings:
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where `test`.`t1`.`col1` < '"а'
explain extended select * from t1 where col1 < '"в';
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10 20.00 Using where
Warnings:
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where `test`.`t1`.`col1` < '"в'
explain extended select * from t1 where col1 < '"д';
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10 40.00 Using where
Warnings:
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where `test`.`t1`.`col1` < '"д'
explain extended select * from t1 where col1 < '"ж';
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10 60.00 Using where
Warnings:
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where `test`.`t1`.`col1` < '"ж'
explain extended select * from t1 where col1 < '"й';
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10 90.00 Using where
Warnings:
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where `test`.`t1`.`col1` < '"й'
drop table t1;
select JSON_UNQUOTE(CONVERT('"ФФ"' using cp1251));
JSON_UNQUOTE(CONVERT('"ФФ"' using cp1251))
ФФ
# End of 10.11 tests

View file

@ -492,6 +492,43 @@ SELECT min_value, max_value, hist_type
FROM mysql.column_stats WHERE db_name = 'test' AND table_name = 't1';
DROP TABLE t1;
DELETE FROM mysql.column_stats;
create table t1 (
col1 varchar(10) charset utf8
);
set names utf8;
select hex('б'), collation('б');
insert into t1 values
('а'),('б'),('в'),('г'),('д'),('е'),('ж'),('з'),('и'),('й');
analyze table t1 persistent for all;
select hex(col1) from t1;
select json_detailed(json_extract(histogram, '$**.histogram_hb'))
from mysql.column_stats where db_name=database() and table_name='t1';
explain extended select * from t1 where col1 < 'а';
explain extended select * from t1 where col1 < 'в';
explain extended select * from t1 where col1 < 'д';
explain extended select * from t1 where col1 < 'ж';
explain extended select * from t1 where col1 < 'й';
delete from t1;
insert into t1 values
('"а'),('"б'),('"в'),('"г'),('"д'),('"е'),('"ж'),('"з'),('"и'),('"й');
analyze table t1 persistent for all;
select json_detailed(json_extract(histogram, '$**.histogram_hb'))
from mysql.column_stats where db_name=database() and table_name='t1';
select hex(col1) from t1;
explain extended select * from t1 where col1 < '"а';
explain extended select * from t1 where col1 < '"в';
explain extended select * from t1 where col1 < '"д';
explain extended select * from t1 where col1 < '"ж';
explain extended select * from t1 where col1 < '"й';
drop table t1;
# JSON_UNQUOTE was touched by this patch also
select JSON_UNQUOTE(CONVERT('"ФФ"' using cp1251));
--echo # End of 10.11 tests

View file

@ -916,14 +916,25 @@ String *Item_func_json_unquote::val_str(String *str)
if (unlikely(je.s.error) || je.value_type != JSON_VALUE_STRING)
return js;
int buf_len= je.value_len;
if (js->charset()->cset != my_charset_utf8mb4_bin.cset)
{
/*
json_unquote() will be transcoding between charsets. We don't know
how much buffer space we'll need. Assume that each byte in the source
will require mbmaxlen bytes in the output.
*/
buf_len *= my_charset_utf8mb4_bin.mbmaxlen;
}
str->length(0);
str->set_charset(&my_charset_utf8mb4_bin);
if (str->realloc_with_extra_if_needed(je.value_len) ||
if (str->realloc_with_extra_if_needed(buf_len) ||
(c_len= json_unescape(js->charset(),
je.value, je.value + je.value_len,
&my_charset_utf8mb4_bin,
(uchar *) str->ptr(), (uchar *) (str->ptr() + je.value_len))) < 0)
(uchar *) str->ptr(), (uchar *) (str->ptr() + buf_len))) < 0)
goto error;
str->length(c_len);
@ -933,7 +944,7 @@ error:
if (current_thd)
{
if (c_len == JSON_ERROR_OUT_OF_SPACE)
my_error(ER_OUTOFMEMORY, MYF(0), je.value_len);
my_error(ER_OUTOFMEMORY, MYF(0), buf_len);
else if (c_len == JSON_ERROR_ILLEGAL_SYMBOL)
push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
ER_JSON_BAD_CHR, ER_THD(current_thd, ER_JSON_BAD_CHR),
@ -3937,7 +3948,21 @@ int Item_func_json_search::compare_json_value_wild(json_engine_t *je,
(uchar *) (esc_value.ptr() +
esc_value.alloced_length()));
if (esc_len <= 0)
{
if (current_thd)
{
if (esc_len == JSON_ERROR_OUT_OF_SPACE)
my_error(ER_OUTOFMEMORY, MYF(0), je->value_len);
else if (esc_len == JSON_ERROR_ILLEGAL_SYMBOL)
{
push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
ER_JSON_BAD_CHR, ER_THD(current_thd, ER_JSON_BAD_CHR),
0, "comparison",
(int)(je->s.c_str - je->value));
}
}
return 0;
}
return collation.collation->wildcmp(
esc_value.ptr(), esc_value.ptr() + esc_len,
@ -4207,9 +4232,11 @@ int Arg_comparator::compare_json_str_basic(Item *j, Item *s)
if (c_len == JSON_ERROR_OUT_OF_SPACE)
my_error(ER_OUTOFMEMORY, MYF(0), je.value_len);
else if (c_len == JSON_ERROR_ILLEGAL_SYMBOL)
{
push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
ER_JSON_BAD_CHR, ER_THD(current_thd, ER_JSON_BAD_CHR),
0, "comparison", (int)((const char *) je.s.c_str - js->ptr()));
}
}
goto error;
}
@ -4271,9 +4298,11 @@ int Arg_comparator::compare_e_json_str_basic(Item *j, Item *s)
if (c_len == JSON_ERROR_OUT_OF_SPACE)
my_error(ER_OUTOFMEMORY, MYF(0), value_len);
else if (c_len == JSON_ERROR_ILLEGAL_SYMBOL)
{
push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
ER_JSON_BAD_CHR, ER_THD(current_thd, ER_JSON_BAD_CHR),
0, "equality comparison", 0);
}
}
return 1;
}