mariadb/mysql-test/main/natural_sort_key.result
Vladislav Vaintroub b3cedf63a3 MDEV-4742 - address review comments.
- Remove second optional parameter to natural_sort_key(), and all fraction
handling.

- Rename natsort_num2str() to natsort_encode_length() to show the intention
that it encodes string *lengths*, and not encode whitespaces and what not.

Handles lengths for which log10(len) >= 10,  even if they do not happen for
MariaDB Strings (where length is limited by 32bit, and log10(len) is <= 9)

- Do not let natural sort key grow past max_packet_length.


- Split Item_func_natural_sort_key::val_str() further and add
natsort_encode_numeric_string(), which contains comment on how
whitespaces are handled.

- Simplify, and speedup to_natsort_key() in common case, by removing
handling of weird charsets utf16/32, that encode numbers in several bytes.
In rare cases utf16/32 is used, we'll convert to utf8 prior to
creating keys, and back to original charset afterwards.
2021-10-14 12:13:04 +02:00

108 lines
3.9 KiB
Text

SET NAMES utf8mb4;
SELECT NATURAL_SORT_KEY(NULL);
NATURAL_SORT_KEY(NULL)
NULL
SELECT '' c WHERE 0 UNION VALUES('a10'),('a9'),('a1000'), ('a0'),('b'),('b0') ORDER BY NATURAL_SORT_KEY(c);
c
a0
a9
a10
a1000
b
b0
SELECT NATURAL_SORT_KEY(repeat('a1',@@max_allowed_packet/2-1));
NATURAL_SORT_KEY(repeat('a1',@@max_allowed_packet/2-1))
NULL
Warnings:
Warning 1301 Result of natural_sort_key() was larger than max_allowed_packet (16777216) - truncated
CREATE TABLE t1(
c VARCHAR(30) CHARACTER SET latin1 COLLATE latin1_bin,
k VARCHAR(45) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci AS (NATURAL_SORT_KEY(CONVERT(c USING utf8mb4))) INVISIBLE,
KEY(k,c)) ENGINE=InnoDB;
INSERT INTO t1 values
('A1'),('a1'),('A100'),('a100'),('A2'),('ä2'),('a2'),('A99'),
('äb'),('B1'),('B100'),('B9'),('C'),('100');
EXPLAIN SELECT c FROM t1 ORDER BY k,c;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index NULL k 216 NULL 14 Using index
#Natural sort order.
SELECT c FROM t1 ORDER BY k,c;
c
100
A1
a1
A2
a2
ä2
A99
A100
a100
äb
B1
B9
B100
C
#Unnatural but unicode aware) sort order
SELECT c FROM t1 ORDER BY CONVERT(c USING utf8mb4) COLLATE utf8mb4_unicode_ci,c;
c
100
A1
a1
A100
a100
A2
a2
ä2
A99
äb
B1
B100
B9
C
CREATE TABLE t2 AS SELECT c, NATURAL_SORT_KEY(c) FROM t1 WHERE 0;
SHOW CREATE TABLE t2;
Table Create Table
t2 CREATE TABLE `t2` (
`c` varchar(30) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
`NATURAL_SORT_KEY(c)` varchar(60) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
DROP TABLE t1,t2;
SELECT RPAD(val,28,' ') value , RPAD(NATURAL_SORT_KEY(val),35,' ') sortkey , LENGTH(NATURAL_SORT_KEY(val)) - LENGTH(val) encoding_overhead
FROM
(
SELECT 0 val
UNION VALUES ('1'),('01'),('0001')
UNION SELECT CONCAT('1',repeat('0',seq)) FROM seq_1_to_27
) AS numbers ORDER BY sortkey;
value sortkey encoding_overhead
0 000 2
1 010 2
01 011 1
0001 013 -1
10 1100 2
100 21000 2
1000 310000 2
10000 4100000 2
100000 51000000 2
1000000 610000000 2
10000000 7100000000 2
100000000 81000000000 2
1000000000 9010000000000 3
10000000000 91100000000000 3
100000000000 921000000000000 3
1000000000000 9310000000000000 3
10000000000000 94100000000000000 3
100000000000000 951000000000000000 3
1000000000000000 9610000000000000000 3
10000000000000000 97100000000000000000 3
100000000000000000 981000000000000000000 3
1000000000000000000 99010000000000000000000 4
10000000000000000000 991100000000000000000000 4
100000000000000000000 9921000000000000000000000 4
1000000000000000000000 99310000000000000000000000 4
10000000000000000000000 994100000000000000000000000 4
100000000000000000000000 9951000000000000000000000000 4
1000000000000000000000000 99610000000000000000000000000 4
10000000000000000000000000 997100000000000000000000000000 4
100000000000000000000000000 9981000000000000000000000000000 4
1000000000000000000000000000 99901271000000000000000000000000000 8