mirror of
https://github.com/MariaDB/server.git
synced 2025-01-25 00:04:33 +01:00
1bcd2bebc6
Do not pass PCRE_UCP flag for binary data. This makes bytes 0x80..FF not to belong to generic character classes \d (digit) and \w (word character). SELECT 0xFF RLIKE '\\w'; -> 0 Note, this change does not affect non-binary data, which is still examined with the PCRE_UCP flag by default.
351 lines
10 KiB
Text
351 lines
10 KiB
Text
|
||
SET NAMES utf8;
|
||
|
||
--echo #
|
||
--echo # MDEV-4425 REGEXP enhancements
|
||
--echo #
|
||
|
||
--echo #
|
||
--echo # Checking RLIKE
|
||
--echo #
|
||
|
||
# Checking that à is a single character
|
||
SELECT 'à' RLIKE '^.$';
|
||
|
||
# Checking \x{FFFF} syntax and case sensitivity
|
||
SELECT 'à' RLIKE '\\x{00E0}';
|
||
SELECT 'À' RLIKE '\\x{00E0}';
|
||
SELECT 'à' RLIKE '\\x{00C0}';
|
||
SELECT 'À' RLIKE '\\x{00C0}';
|
||
SELECT 'à' RLIKE '\\x{00E0}' COLLATE utf8_bin;
|
||
SELECT 'À' RLIKE '\\x{00E0}' COLLATE utf8_bin;
|
||
SELECT 'à' RLIKE '\\x{00C0}' COLLATE utf8_bin;
|
||
SELECT 'À' RLIKE '\\x{00C0}' COLLATE utf8_bin;
|
||
|
||
# Checking how (?i) and (?-i) affect case sensitivity
|
||
CREATE TABLE t1 (s VARCHAR(10) CHARACTER SET utf8);
|
||
INSERT INTO t1 VALUES ('a'),('A');
|
||
CREATE TABLE t2 (p VARCHAR(10) CHARACTER SET utf8);
|
||
INSERT INTO t2 VALUES ('a'),('(?i)a'),('(?-i)a'),('A'),('(?i)A'),('(?-i)A');
|
||
SELECT s,p,s RLIKE p, s COLLATE utf8_bin RLIKE p FROM t1,t2 ORDER BY BINARY s, BINARY p;
|
||
DROP TABLE t1,t2;
|
||
|
||
|
||
# Checking Unicode character classes
|
||
CREATE TABLE t1 (ch VARCHAR(22)) CHARACTER SET utf8;
|
||
CREATE TABLE t2 (class VARCHAR(32)) CHARACTER SET utf8;
|
||
INSERT INTO t1 VALUES ('Я'),('Σ'),('A'),('À');
|
||
INSERT INTO t1 VALUES ('я'),('σ'),('a'),('à');
|
||
INSERT INTO t1 VALUES ('㐗'),('갷'),('ප');
|
||
INSERT INTO t1 VALUES ('1'),('௨');
|
||
INSERT INTO t2 VALUES ('\\p{Cyrillic}'),('\\p{Greek}'),('\\p{Latin}');
|
||
INSERT INTO t2 VALUES ('\\p{Han}'),('\\p{Hangul}');
|
||
INSERT INTO t2 VALUES ('\\p{Sinhala}'), ('\\p{Tamil}');
|
||
INSERT INTO t2 VALUES ('\\p{L}'),('\\p{Ll}'),('\\p{Lu}'),('\\p{L&}');
|
||
INSERT INTO t2 VALUES ('[[:alpha:]]'),('[[:digit:]]');
|
||
SELECT class, ch, ch RLIKE class FROM t1, t2 ORDER BY class, BINARY ch;
|
||
DROP TABLE t1, t2;
|
||
|
||
# Checking that UCP is disabled by default for binary data
|
||
SELECT 0xFF RLIKE '\\w';
|
||
SELECT 0xFF RLIKE '(*UCP)\\w';
|
||
|
||
# newline character
|
||
SELECT '\n' RLIKE '(*CR)';
|
||
SELECT '\n' RLIKE '(*LF)';
|
||
SELECT '\n' RLIKE '(*CRLF)';
|
||
SELECT '\n' RLIKE '(*ANYCRLF)';
|
||
SELECT '\n' RLIKE '(*ANY)';
|
||
|
||
SELECT 'a\nb' RLIKE '(*LF)(?m)^a$';
|
||
SELECT 'a\nb' RLIKE '(*CR)(?m)^a$';
|
||
SELECT 'a\nb' RLIKE '(*CRLF)(?m)^a$';
|
||
SELECT 'a\nb' RLIKE '(*ANYCRLF)(?m)^a$';
|
||
|
||
SELECT 'a\rb' RLIKE '(*LF)(?m)^a$';
|
||
SELECT 'a\rb' RLIKE '(*CR)(?m)^a$';
|
||
SELECT 'a\rb' RLIKE '(*CRLF)(?m)^a$';
|
||
SELECT 'a\rb' RLIKE '(*ANYCRLF)(?m)^a$';
|
||
|
||
SELECT 'a\r\nb' RLIKE '(*LF)(?m)^a$';
|
||
SELECT 'a\r\nb' RLIKE '(*CR)(?m)^a$';
|
||
SELECT 'a\r\nb' RLIKE '(*CRLF)(?m)^a$';
|
||
SELECT 'a\r\nb' RLIKE '(*ANYCRLF)(?m)^a$';
|
||
|
||
#backreference
|
||
SELECT 'aa' RLIKE '(a)\\g1';
|
||
SELECT 'aa bb' RLIKE '(a)\\g1 (b)\\g2';
|
||
|
||
#repitition
|
||
SELECT 'aaaaa' RLIKE 'a{0,5}';
|
||
SELECT 'aaaaa' RLIKE 'a{1,3}';
|
||
SELECT 'aaaaa' RLIKE 'a{0,}';
|
||
SELECT 'aaaaa' RLIKE 'a{10,20}';
|
||
|
||
#Recursion
|
||
SELECT 'aabb' RLIKE 'a(?R)?b';
|
||
SELECT 'aabb' RLIKE 'aa(?R)?bb';
|
||
|
||
#subroutine
|
||
#SELECT 'abbbc' RLIKE '(a(b|(?1))*c)';
|
||
#SELECT 'abca' RLIKE '([abc])(?1){3}';
|
||
|
||
#Atomic grouping
|
||
SELECT 'abcc' RLIKE 'a(?>bc|b)c';
|
||
SELECT 'abc' RLIKE 'a(?>bc|b)c';
|
||
|
||
#lookahead - negative
|
||
SELECT 'ab' RLIKE 'a(?!b)';
|
||
SELECT 'ac' RLIKE 'a(?!b)';
|
||
|
||
#lookahead - positive
|
||
SELECT 'ab' RLIKE 'a(?=b)';
|
||
SELECT 'ac' RLIKE 'a(?=b)';
|
||
|
||
#lookbehind - negative
|
||
SELECT 'ab' RLIKE '(?<!a)b';
|
||
SELECT 'cb' RLIKE '(?<!a)b';
|
||
|
||
#lookbehind - positive
|
||
SELECT 'ab' RLIKE '(?<=a)b';
|
||
SELECT 'cb' RLIKE '(?<=a)b';
|
||
|
||
# named subpatterns
|
||
SELECT 'aa' RLIKE '(?P<pattern>a)(?P=pattern)';
|
||
SELECT 'aba' RLIKE '(?P<pattern>a)b(?P=pattern)';
|
||
|
||
#comments
|
||
SELECT 'a' RLIKE 'a(?#comment)';
|
||
SELECT 'aa' RLIKE 'a(?#comment)a';
|
||
SELECT 'aba' RLIKE 'a(?#b)a';
|
||
|
||
#ungreedy maching
|
||
#SELECT 'ddd <ab>cc</ab> eee' RLIKE '<.+?>';
|
||
|
||
#Extended character classes
|
||
SELECT 'aaa' RLIKE '\\W\\W\\W';
|
||
SELECT '%' RLIKE '\\W';
|
||
SELECT '%a$' RLIKE '\\W.\\W';
|
||
|
||
SELECT '123' RLIKE '\\d\\d\\d';
|
||
SELECT 'aaa' RLIKE '\\d\\d\\d';
|
||
SELECT '1a3' RLIKE '\\d.\\d';
|
||
SELECT 'a1b' RLIKE '\\d.\\d';
|
||
|
||
SELECT '8' RLIKE '\\D';
|
||
SELECT 'a' RLIKE '\\D';
|
||
SELECT '%' RLIKE '\\D';
|
||
SELECT 'a1' RLIKE '\\D\\d';
|
||
SELECT 'a1' RLIKE '\\d\\D';
|
||
|
||
SELECT '\t' RLIKE '\\s';
|
||
SELECT '\r' RLIKE '\\s';
|
||
SELECT '\n' RLIKE '\\s';
|
||
SELECT '\v' RLIKE '\\s';
|
||
|
||
SELECT 'a' RLIKE '\\S';
|
||
SELECT '1' RLIKE '\\S';
|
||
SELECT '!' RLIKE '\\S';
|
||
SELECT '.' RLIKE '\\S';
|
||
|
||
# checking 0x00 bytes
|
||
# Bug#70470 REGEXP fails to find matches after NUL character
|
||
SELECT 'abc\0def' REGEXP 'def';
|
||
SELECT 'abc\0def' REGEXP 'abc\\x{00}def';
|
||
SELECT HEX(REGEXP_SUBSTR('abc\0def','abc\\x{00}def'));
|
||
|
||
|
||
--echo #
|
||
--echo # Checking REGEXP_REPLACE
|
||
--echo #
|
||
|
||
# Check data type
|
||
CREATE TABLE t1 AS SELECT REGEXP_REPLACE('abc','b','x');
|
||
SHOW CREATE TABLE t1;
|
||
DROP TABLE t1;
|
||
|
||
# Check print()
|
||
EXPLAIN EXTENDED SELECT REGEXP_REPLACE('abc','b','x');
|
||
|
||
# Check decimals
|
||
CREATE TABLE t1 AS SELECT REGEXP_REPLACE('abc','b','x')+0;
|
||
SHOW CREATE TABLE t1;
|
||
DROP TABLE t1;
|
||
|
||
# Return NULL if any of the arguments are NULL
|
||
SELECT REGEXP_REPLACE(NULL,'b','c');
|
||
SELECT REGEXP_REPLACE('a',NULL,'c');
|
||
SELECT REGEXP_REPLACE('a','b',NULL);
|
||
|
||
# Return the original string if no match
|
||
SELECT REGEXP_REPLACE('a','x','b');
|
||
|
||
# Return the original string for an empty pattern
|
||
SELECT REGEXP_REPLACE('a','','b');
|
||
|
||
# Check that replace stops on the first empty match
|
||
# 'a5b' matches the pattern and '5' is replaced to 'x'
|
||
# then 'ab' matches the pattern, but the match '5*' is empty,
|
||
# so replacing stops here.
|
||
SELECT REGEXP_REPLACE('a5b ab a5b','(?<=a)5*(?=b)','x');
|
||
|
||
# A modified version of the previous example,
|
||
# to check that all matches are replaced if no empty match is met.
|
||
SELECT REGEXP_REPLACE('a5b a5b a5b','(?<=a)5*(?=b)','x');
|
||
|
||
|
||
# Check that case sensitiviry respects the collation
|
||
SELECT REGEXP_REPLACE('A','a','b');
|
||
SELECT REGEXP_REPLACE('a','A','b');
|
||
SELECT REGEXP_REPLACE('A' COLLATE utf8_bin,'a','b');
|
||
SELECT REGEXP_REPLACE('a' COLLATE utf8_bin,'A','b');
|
||
|
||
# Pattern references in the "replace" string
|
||
SELECT REGEXP_REPLACE('James Bond', '(.*) (.*)', '\\2, \\1 \\2');
|
||
|
||
# Checking with UTF8
|
||
SELECT REGEXP_REPLACE('абвгд','в','ц');
|
||
|
||
# Check that it does not treat binary strings as UTF8
|
||
SELECT REGEXP_REPLACE('г',0xB3,0xB4);
|
||
|
||
# Check that it replaces all matches by default
|
||
SELECT REGEXP_REPLACE('aaaa','a','b');
|
||
|
||
# Replace all matches except the first letter
|
||
SELECT REGEXP_REPLACE('aaaa','(?<=.)a','b');
|
||
|
||
# Replace all matches except the last letter
|
||
SELECT REGEXP_REPLACE('aaaa','a(?=.)','b');
|
||
|
||
# Replace all matches except the first and the last letter
|
||
SELECT REGEXP_REPLACE('aaaa','(?<=.)a(?=.)','b');
|
||
|
||
# newline character
|
||
SELECT REGEXP_REPLACE('a\nb','(*LF)(?m)^a$','c');
|
||
SELECT REGEXP_REPLACE('a\nb','(*CR)(?m)^a$','c');
|
||
SELECT REGEXP_REPLACE('a\nb','(*CRLF)(?m)^a$','c');
|
||
SELECT REGEXP_REPLACE('a\nb','(*ANYCRLF)(?m)^a$','c');
|
||
|
||
SELECT REGEXP_REPLACE('a\rb','(*LF)(?m)^a$','c');
|
||
SELECT REGEXP_REPLACE('a\rb','(*CR)(?m)^a$','c');
|
||
SELECT REGEXP_REPLACE('a\rb','(*CRLF)(?m)^a$','c');
|
||
SELECT REGEXP_REPLACE('a\rb','(*ANYCRLF)(?m)^a$','c');
|
||
|
||
SELECT REGEXP_REPLACE('a\r\nb','(*LF)(?m)^a$','c');
|
||
SELECT REGEXP_REPLACE('a\r\nb','(*CR)(?m)^a$','c');
|
||
SELECT REGEXP_REPLACE('a\r\nb','(*CRLF)(?m)^a$','c');
|
||
SELECT REGEXP_REPLACE('a\r\nb','(*ANYCRLF)(?m)^a$','c');
|
||
|
||
#backreference
|
||
SELECT REGEXP_REPLACE('aa','(a)\\g1','b');
|
||
SELECT REGEXP_REPLACE('aa bb','(a)\\g1 (b)\\g2','c');
|
||
|
||
#repitition
|
||
SELECT REGEXP_REPLACE('aaaaa','a{1,3}','b');
|
||
SELECT REGEXP_REPLACE('aaaaa','a{10,20}','b');
|
||
|
||
#Recursion
|
||
SELECT REGEXP_REPLACE('daabbd','a(?R)?b','c');
|
||
SELECT REGEXP_REPLACE('daabbd','aa(?R)?bb','c');
|
||
|
||
#Atomic grouping
|
||
SELECT REGEXP_REPLACE('dabccd','a(?>bc|b)c','e');
|
||
SELECT REGEXP_REPLACE('dabcd','a(?>bc|b)c','e');
|
||
|
||
#lookahead - negative
|
||
SELECT REGEXP_REPLACE('ab','a(?!b)','e');
|
||
SELECT REGEXP_REPLACE('ac','a(?!b)','e');
|
||
|
||
#lookahead - positive
|
||
SELECT REGEXP_REPLACE('ab','a(?=b)','e');
|
||
SELECT REGEXP_REPLACE('ac','a(?=b)','e');
|
||
|
||
#lookbehind - negative
|
||
SELECT REGEXP_REPLACE('ab','(?<!a)b','e');
|
||
SELECT REGEXP_REPLACE('cb','(?<!a)b','e');
|
||
|
||
#lookbehind - positive
|
||
SELECT REGEXP_REPLACE('ab','(?<=a)b','e');
|
||
SELECT REGEXP_REPLACE('cb','(?<=a)b','e');
|
||
|
||
# named subpatterns
|
||
SELECT REGEXP_REPLACE('aa','(?P<pattern>a)(?P=pattern)','b');
|
||
SELECT REGEXP_REPLACE('aba','(?P<pattern>a)b(?P=pattern)','c');
|
||
|
||
#comments
|
||
SELECT REGEXP_REPLACE('a','a(?#comment)','e');
|
||
SELECT REGEXP_REPLACE('aa','a(?#comment)a','e');
|
||
SELECT REGEXP_REPLACE('aba','a(?#b)a','e');
|
||
|
||
#ungreedy maching
|
||
SELECT REGEXP_REPLACE('ddd<ab>cc</ab>eee','<.+?>','*');
|
||
|
||
#Extended character classes
|
||
SELECT REGEXP_REPLACE('aaa','\\W\\W\\W','e');
|
||
SELECT REGEXP_REPLACE('aaa','\\w\\w\\w','e');
|
||
SELECT REGEXP_REPLACE('%','\\W','e');
|
||
SELECT REGEXP_REPLACE('%a$','\\W.\\W','e');
|
||
SELECT REGEXP_REPLACE('%a$','\\W\\w\\W','e');
|
||
|
||
SELECT REGEXP_REPLACE('123','\\d\\d\\d\\d\\d\\d','e');
|
||
SELECT REGEXP_REPLACE('123','\\d\\d\\d','e');
|
||
SELECT REGEXP_REPLACE('aaa','\\d\\d\\d','e');
|
||
SELECT REGEXP_REPLACE('1a3','\\d.\\d\\d.\\d','e');
|
||
SELECT REGEXP_REPLACE('1a3','\\d.\\d','e');
|
||
SELECT REGEXP_REPLACE('a1b','\\d.\\d','e');
|
||
|
||
SELECT REGEXP_REPLACE('8','\\D','e');
|
||
SELECT REGEXP_REPLACE('a','\\D','e');
|
||
SELECT REGEXP_REPLACE('%','\\D','e');
|
||
SELECT REGEXP_REPLACE('a1','\\D\\d','e');
|
||
SELECT REGEXP_REPLACE('a1','\\d\\D','e');
|
||
|
||
SELECT REGEXP_REPLACE('\t','\\s','e');
|
||
SELECT REGEXP_REPLACE('\r','\\s','e');
|
||
SELECT REGEXP_REPLACE('\n','\\s','e');
|
||
|
||
SELECT REGEXP_REPLACE('a','\\S','e');
|
||
SELECT REGEXP_REPLACE('1','\\S','e');
|
||
SELECT REGEXP_REPLACE('!','\\S','e');
|
||
SELECT REGEXP_REPLACE('.','\\S','e');
|
||
|
||
--echo #
|
||
--echo # Checking REGEXP_INSTR
|
||
--echo #
|
||
SELECT REGEXP_INSTR('abcd','X');
|
||
SELECT REGEXP_INSTR('abcd','a');
|
||
SELECT REGEXP_INSTR('abcd','b');
|
||
SELECT REGEXP_INSTR('abcd','c');
|
||
SELECT REGEXP_INSTR('abcd','d');
|
||
SELECT REGEXP_INSTR('aaaa','(?<=a)a');
|
||
|
||
SELECT REGEXP_INSTR('вася','в');
|
||
SELECT REGEXP_INSTR('вася','а');
|
||
SELECT REGEXP_INSTR('вася','с');
|
||
SELECT REGEXP_INSTR('вася','я');
|
||
SELECT REGEXP_INSTR(CONVERT('вася' USING koi8r), CONVERT('в' USING koi8r));
|
||
SELECT REGEXP_INSTR(CONVERT('вася' USING koi8r), CONVERT('а' USING koi8r));
|
||
SELECT REGEXP_INSTR(CONVERT('вася' USING koi8r), CONVERT('с' USING koi8r));
|
||
SELECT REGEXP_INSTR(CONVERT('вася' USING koi8r), CONVERT('я' USING koi8r));
|
||
|
||
|
||
--echo #
|
||
--echo # Checking REGEXP_SUBSTR
|
||
--echo #
|
||
|
||
# Check data type
|
||
CREATE TABLE t1 AS SELECT REGEXP_SUBSTR('abc','b');
|
||
SHOW CREATE TABLE t1;
|
||
DROP TABLE t1;
|
||
|
||
# Check print()
|
||
EXPLAIN EXTENDED SELECT REGEXP_SUBSTR('abc','b');
|
||
|
||
# Check decimals
|
||
CREATE TABLE t1 AS SELECT REGEXP_SUBSTR('abc','b')+0;
|
||
SHOW CREATE TABLE t1;
|
||
DROP TABLE t1;
|
||
|
||
|
||
SELECT REGEXP_SUBSTR('See https://mariadb.org/en/foundation/ for details', 'https?://[^/]*');
|