mirror of
https://github.com/MariaDB/server.git
synced 2025-01-16 12:02:42 +01:00
MDEV-4425 Regexp enhancements
Do not pass PCRE_UCP flag for binary data. This makes bytes 0x80..FF not to belong to generic character classes \d (digit) and \w (word character). SELECT 0xFF RLIKE '\\w'; -> 0 Note, this change does not affect non-binary data, which is still examined with the PCRE_UCP flag by default.
This commit is contained in:
parent
43c09c15ff
commit
1bcd2bebc6
3 changed files with 13 additions and 2 deletions
|
@ -234,6 +234,12 @@ class ch ch RLIKE class
|
|||
\p{Tamil} 㐗 0
|
||||
\p{Tamil} 갷 0
|
||||
DROP TABLE t1, t2;
|
||||
SELECT 0xFF RLIKE '\\w';
|
||||
0xFF RLIKE '\\w'
|
||||
0
|
||||
SELECT 0xFF RLIKE '(*UCP)\\w';
|
||||
0xFF RLIKE '(*UCP)\\w'
|
||||
1
|
||||
SELECT '\n' RLIKE '(*CR)';
|
||||
'\n' RLIKE '(*CR)'
|
||||
1
|
||||
|
|
|
@ -46,6 +46,10 @@ INSERT INTO t2 VALUES ('[[:alpha:]]'),('[[:digit:]]');
|
|||
SELECT class, ch, ch RLIKE class FROM t1, t2 ORDER BY class, BINARY ch;
|
||||
DROP TABLE t1, t2;
|
||||
|
||||
# Checking that UCP is disabled by default for binary data
|
||||
SELECT 0xFF RLIKE '\\w';
|
||||
SELECT 0xFF RLIKE '(*UCP)\\w';
|
||||
|
||||
# newline character
|
||||
SELECT '\n' RLIKE '(*CR)';
|
||||
SELECT '\n' RLIKE '(*LF)';
|
||||
|
|
|
@ -1511,8 +1511,9 @@ public:
|
|||
{}
|
||||
void init(CHARSET_INFO *data_charset, int extra_flags, uint nsubpatterns)
|
||||
{
|
||||
m_library_flags= PCRE_UCP | extra_flags |
|
||||
(data_charset != &my_charset_bin ? PCRE_UTF8 : 0) |
|
||||
m_library_flags= extra_flags |
|
||||
(data_charset != &my_charset_bin ?
|
||||
(PCRE_UTF8 | PCRE_UCP) : 0) |
|
||||
((data_charset->state &
|
||||
(MY_CS_BINSORT | MY_CS_CSSORT)) ? 0 : PCRE_CASELESS);
|
||||
|
||||
|
|
Loading…
Reference in a new issue