mirror of
https://github.com/MariaDB/server.git
synced 2025-01-16 12:02:42 +01:00
b3cedf63a3
- Remove second optional parameter to natural_sort_key(), and all fraction handling. - Rename natsort_num2str() to natsort_encode_length() to show the intention that it encodes string *lengths*, and not encode whitespaces and what not. Handles lengths for which log10(len) >= 10, even if they do not happen for MariaDB Strings (where length is limited by 32bit, and log10(len) is <= 9) - Do not let natural sort key grow past max_packet_length. - Split Item_func_natural_sort_key::val_str() further and add natsort_encode_numeric_string(), which contains comment on how whitespaces are handled. - Simplify, and speedup to_natsort_key() in common case, by removing handling of weird charsets utf16/32, that encode numbers in several bytes. In rare cases utf16/32 is used, we'll convert to utf8 prior to creating keys, and back to original charset afterwards.
108 lines
3.9 KiB
Text
108 lines
3.9 KiB
Text
SET NAMES utf8mb4;
|
|
SELECT NATURAL_SORT_KEY(NULL);
|
|
NATURAL_SORT_KEY(NULL)
|
|
NULL
|
|
SELECT '' c WHERE 0 UNION VALUES('a10'),('a9'),('a1000'), ('a0'),('b'),('b0') ORDER BY NATURAL_SORT_KEY(c);
|
|
c
|
|
a0
|
|
a9
|
|
a10
|
|
a1000
|
|
b
|
|
b0
|
|
SELECT NATURAL_SORT_KEY(repeat('a1',@@max_allowed_packet/2-1));
|
|
NATURAL_SORT_KEY(repeat('a1',@@max_allowed_packet/2-1))
|
|
NULL
|
|
Warnings:
|
|
Warning 1301 Result of natural_sort_key() was larger than max_allowed_packet (16777216) - truncated
|
|
CREATE TABLE t1(
|
|
c VARCHAR(30) CHARACTER SET latin1 COLLATE latin1_bin,
|
|
k VARCHAR(45) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci AS (NATURAL_SORT_KEY(CONVERT(c USING utf8mb4))) INVISIBLE,
|
|
KEY(k,c)) ENGINE=InnoDB;
|
|
INSERT INTO t1 values
|
|
('A1'),('a1'),('A100'),('a100'),('A2'),('ä2'),('a2'),('A99'),
|
|
('äb'),('B1'),('B100'),('B9'),('C'),('100');
|
|
EXPLAIN SELECT c FROM t1 ORDER BY k,c;
|
|
id select_type table type possible_keys key key_len ref rows Extra
|
|
1 SIMPLE t1 index NULL k 216 NULL 14 Using index
|
|
#Natural sort order.
|
|
SELECT c FROM t1 ORDER BY k,c;
|
|
c
|
|
100
|
|
A1
|
|
a1
|
|
A2
|
|
a2
|
|
ä2
|
|
A99
|
|
A100
|
|
a100
|
|
äb
|
|
B1
|
|
B9
|
|
B100
|
|
C
|
|
#Unnatural but unicode aware) sort order
|
|
SELECT c FROM t1 ORDER BY CONVERT(c USING utf8mb4) COLLATE utf8mb4_unicode_ci,c;
|
|
c
|
|
100
|
|
A1
|
|
a1
|
|
A100
|
|
a100
|
|
A2
|
|
a2
|
|
ä2
|
|
A99
|
|
äb
|
|
B1
|
|
B100
|
|
B9
|
|
C
|
|
CREATE TABLE t2 AS SELECT c, NATURAL_SORT_KEY(c) FROM t1 WHERE 0;
|
|
SHOW CREATE TABLE t2;
|
|
Table Create Table
|
|
t2 CREATE TABLE `t2` (
|
|
`c` varchar(30) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
|
|
`NATURAL_SORT_KEY(c)` varchar(60) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL
|
|
) ENGINE=MyISAM DEFAULT CHARSET=latin1
|
|
DROP TABLE t1,t2;
|
|
SELECT RPAD(val,28,' ') value , RPAD(NATURAL_SORT_KEY(val),35,' ') sortkey , LENGTH(NATURAL_SORT_KEY(val)) - LENGTH(val) encoding_overhead
|
|
FROM
|
|
(
|
|
SELECT 0 val
|
|
UNION VALUES ('1'),('01'),('0001')
|
|
UNION SELECT CONCAT('1',repeat('0',seq)) FROM seq_1_to_27
|
|
) AS numbers ORDER BY sortkey;
|
|
value sortkey encoding_overhead
|
|
0 000 2
|
|
1 010 2
|
|
01 011 1
|
|
0001 013 -1
|
|
10 1100 2
|
|
100 21000 2
|
|
1000 310000 2
|
|
10000 4100000 2
|
|
100000 51000000 2
|
|
1000000 610000000 2
|
|
10000000 7100000000 2
|
|
100000000 81000000000 2
|
|
1000000000 9010000000000 3
|
|
10000000000 91100000000000 3
|
|
100000000000 921000000000000 3
|
|
1000000000000 9310000000000000 3
|
|
10000000000000 94100000000000000 3
|
|
100000000000000 951000000000000000 3
|
|
1000000000000000 9610000000000000000 3
|
|
10000000000000000 97100000000000000000 3
|
|
100000000000000000 981000000000000000000 3
|
|
1000000000000000000 99010000000000000000000 4
|
|
10000000000000000000 991100000000000000000000 4
|
|
100000000000000000000 9921000000000000000000000 4
|
|
1000000000000000000000 99310000000000000000000000 4
|
|
10000000000000000000000 994100000000000000000000000 4
|
|
100000000000000000000000 9951000000000000000000000000 4
|
|
1000000000000000000000000 99610000000000000000000000000 4
|
|
10000000000000000000000000 997100000000000000000000000000 4
|
|
100000000000000000000000000 9981000000000000000000000000000 4
|
|
1000000000000000000000000000 99901271000000000000000000000000000 8
|