MDEV-14024 PCRE2.

Related changes in the server code.
This commit is contained in:
Alexey Botchkov 2019-12-17 01:37:59 +04:00 committed by Sergei Golubchik
parent ce70573f62
commit 9dadfdcde5
29 changed files with 122 additions and 288 deletions

View file

@ -350,7 +350,7 @@ IF(NOT HAVE_CXX_NEW)
ENDIF()
# Find header files from the bundled libraries
# (wolfssl, readline, pcre, etc)
# (wolfssl, readline, pcre2, etc)
# before the ones installed in the system
SET(CMAKE_INCLUDE_DIRECTORIES_PROJECT_BEFORE ON)

View file

@ -46,7 +46,7 @@ ENDIF(UNIX)
MYSQL_ADD_EXECUTABLE(mysqltest mysqltest.cc COMPONENT Test)
SET_SOURCE_FILES_PROPERTIES(mysqltest.cc PROPERTIES COMPILE_FLAGS "-DTHREADS")
TARGET_LINK_LIBRARIES(mysqltest ${CLIENT_LIB} pcreposix pcre)
TARGET_LINK_LIBRARIES(mysqltest ${CLIENT_LIB} pcre2-posix pcre2-8)
SET_TARGET_PROPERTIES(mysqltest PROPERTIES ENABLE_EXPORTS TRUE)

View file

@ -44,8 +44,8 @@
#include <hash.h>
#include <stdarg.h>
#include <violite.h>
#define PCRE_STATIC 1 /* Important on Windows */
#include "pcreposix.h" /* pcreposix regex library */
#define PCRE2_STATIC 1 /* Important on Windows */
#include "pcre2posix.h" /* pcreposix regex library */
#ifdef HAVE_SYS_WAIT_H
#include <sys/wait.h>
#endif

View file

@ -5,24 +5,17 @@ SET(WITH_PCRE "auto" CACHE STRING
MACRO (CHECK_PCRE)
IF(WITH_PCRE STREQUAL "system" OR WITH_PCRE STREQUAL "auto")
CHECK_LIBRARY_EXISTS(pcre pcre_stack_guard "" HAVE_PCRE_STACK_GUARD)
IF(NOT CMAKE_CROSSCOMPILING)
SET(CMAKE_REQUIRED_LIBRARIES "pcre")
CHECK_C_SOURCE_RUNS("
#include <pcre.h>
int main() {
return -pcre_exec(NULL, NULL, NULL, -999, -999, 0, NULL, 0) < 256;
}" PCRE_STACK_SIZE_OK)
SET(CMAKE_REQUIRED_LIBRARIES)
ENDIF()
CHECK_LIBRARY_EXISTS(pcre2-8 pcre2_match_8 "" HAVE_PCRE2)
ENDIF()
IF(NOT HAVE_PCRE_STACK_GUARD OR NOT PCRE_STACK_SIZE_OK OR
WITH_PCRE STREQUAL "bundled")
IF(NOT HAVE_PCRE2 OR WITH_PCRE STREQUAL "bundled")
IF (WITH_PCRE STREQUAL "system")
MESSAGE(FATAL_ERROR "system pcre is not found or unusable")
MESSAGE(FATAL_ERROR "system pcre2-8 library is not found or unusable")
ENDIF()
SET(PCRE_INCLUDES ${CMAKE_BINARY_DIR}/pcre ${CMAKE_SOURCE_DIR}/pcre)
ADD_SUBDIRECTORY(pcre)
SET(PCRE_INCLUDES ${CMAKE_BINARY_DIR}/pcre2 ${CMAKE_SOURCE_DIR}/pcre2
${CMAKE_BINARY_DIR}/pcre2/src ${CMAKE_SOURCE_DIR}/pcre2/src)
SET(PCRE2_BUILD_TESTS OFF CACHE BOOL "Disable tests.")
SET(PCRE2_BUILD_PCRE2GREP OFF CACHE BOOL "Disable pcre2grep")
ADD_SUBDIRECTORY(pcre2)
ENDIF()
ENDMACRO()

View file

@ -549,6 +549,7 @@
#define PACKAGE_VERSION "@VERSION@"
#define VERSION "@VERSION@"
#define PROTOCOL_VERSION 10
#define PCRE2_CODE_UNIT_WIDTH 8
#define MALLOC_LIBRARY "@MALLOC_LIBRARY@"

View file

@ -37,7 +37,7 @@ INCLUDE_DIRECTORIES(
)
IF(NOT HAVE_SYSTEM_REGEX)
INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/pcre)
INCLUDE_DIRECTORIES(${PCRE_INCLUDES})
ENDIF()
@ -91,7 +91,7 @@ ADD_SUBDIRECTORY(crc)
TARGET_LINK_LIBRARIES(mariabackup sql sql_builtins crc)
IF(NOT HAVE_SYSTEM_REGEX)
TARGET_LINK_LIBRARIES(mariabackup pcreposix)
TARGET_LINK_LIBRARIES(mariabackup pcre2-posix)
ENDIF()

View file

@ -25,7 +25,8 @@ my_regex is used on Windows and native calls are used on POSIX platforms. */
#ifdef HAVE_SYSTEM_REGEX
#include <regex.h>
#else
#include <pcreposix.h>
#define PCRE2_STATIC 1 /* Important on Windows */
#include <pcre2posix.h>
#endif
typedef regex_t* xb_regex_t;

View file

@ -154,7 +154,7 @@ ENDIF()
SET(LIBS
dbug strings mysys mysys_ssl pcre vio
dbug strings mysys mysys_ssl pcre2-8 vio
${ZLIB_LIBRARY} ${SSL_LIBRARIES}
${LIBWRAP} ${LIBCRYPT} ${LIBDL}
${EMBEDDED_PLUGIN_LIBS}

View file

@ -34,7 +34,7 @@ ENDIF(UNIX)
MYSQL_ADD_EXECUTABLE(mysqltest_embedded ../../client/mysqltest.cc
COMPONENT Test)
TARGET_LINK_LIBRARIES(mysqltest_embedded mysqlserver pcreposix pcre)
TARGET_LINK_LIBRARIES(mysqltest_embedded mysqlserver pcre2-8 pcre2-posix)
IF(CMAKE_GENERATOR MATCHES "Xcode")
# It does not seem possible to tell Xcode the resulting target might need

View file

@ -793,7 +793,7 @@ SELECT 'a\nb' RLIKE '(?-s)a.b';
0
SET default_regex_flags=DEFAULT;
SELECT REGEXP_SUBSTR('Monday Mon','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$');
ERROR 42000: Got error 'two named subpatterns have the same name at offset 29' from regexp
ERROR 42000: Got error 'two named subpatterns have the same name (PCRE2_DUPNAMES not set' from regexp
SET default_regex_flags='DUPNAMES';
SELECT REGEXP_SUBSTR('Monday Mon','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$');
REGEXP_SUBSTR('Monday Mon','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$')
@ -817,8 +817,7 @@ SELECT 'AB' RLIKE 'A# this is a comment\nB';
1
SET default_regex_flags=DEFAULT;
SELECT 'Aq' RLIKE 'A\\q';
'Aq' RLIKE 'A\\q'
1
ERROR 42000: Got error 'unrecognized character follows \ at offset 2' from regexp
SET default_regex_flags='EXTRA';
SELECT 'Aq' RLIKE 'A\\q';
ERROR 42000: Got error 'unrecognized character follows \ at offset 2' from regexp
@ -861,7 +860,7 @@ SELECT 0xE001 REGEXP @regCheck;
0xE001 REGEXP @regCheck
0
Warnings:
Warning 1139 Got error 'pcre_exec: Invalid utf8 byte sequence in the subject string' from regexp
Warning 1139 Got error 'UTF-8 error: 1 byte missing at end' from regexp
# Testing workaround N1: This makes the pattern to be a binary string:
SET NAMES latin1;
SET @regCheck= X'E001';
@ -883,40 +882,31 @@ CAST(0xE001 AS BINARY) REGEXP @regCheck
# MDEV-12420: Testing recursion overflow
SELECT 1 FROM dual WHERE ('Alpha,Bravo,Charlie,Delta,Echo,Foxtrot,StrataCentral,Golf,Hotel,India,Juliet,Kilo,Lima,Mike,StrataL3,November,Oscar,StrataL2,Sand,P3,P4SwitchTest,Arsys,Poppa,ExtensionMgr,Arp,Quebec,Romeo,StrataApiV2,PtReyes,Sierra,SandAcl,Arrow,Artools,BridgeTest,Tango,SandT,PAlaska,Namespace,Agent,Qos,PatchPanel,ProjectReport,Ark,Gimp,Agent,SliceAgent,Arnet,Bgp,Ale,Tommy,Central,AsicPktTestLib,Hsc,SandL3,Abuild,Pca9555,Standby,ControllerDut,CalSys,SandLib,Sb820,PointV2,BfnLib,Evpn,BfnSdk,Sflow,ManagementActive,AutoTest,GatedTest,Bgp,Sand,xinetd,BfnAgentLib,bf-utils,Hello,BfnState,Eos,Artest,Qos,Scd,ThermoMgr,Uniform,EosUtils,Eb,FanController,Central,BfnL3,BfnL2,tcp_wrappers,Victor,Environment,Route,Failover,Whiskey,Xray,Gimp,BfnFixed,Strata,SoCal,XApi,Msrp,XpProfile,tcpdump,PatchPanel,ArosTest,FhTest,Arbus,XpAcl,MacConc,XpApi,telnet,QosTest,Alpha2,BfnVlan,Stp,VxlanControllerTest,MplsAgent,Bravo2,Lanz,BfnMbb,Intf,XCtrl,Unicast,SandTunnel,L3Unicast,Ipsec,MplsTest,Rsvp,EthIntf,StageMgr,Sol,MplsUtils,Nat,Ira,P4NamespaceDut,Counters,Charlie2,Aqlc,Mlag,Power,OpenFlow,Lag,RestApi,BfdTest,strongs,Sfa,CEosUtils,Adt746,MaintenanceMode,MlagDut,EosImage,IpEth,MultiProtocol,Launcher,Max3179,Snmp,Acl,IpEthTest,PhyEee,bf-syslibs,tacc,XpL2,p4-ar-switch,p4-bf-switch,LdpTest,BfnPhy,Mirroring,Phy6,Ptp' REGEXP '^((?!\b(Strata|StrataApi|StrataApiV2)\b).)*$');
1
Warnings:
Warning 1139 Got error 'pcre_exec: recursion limit of NUM exceeded' from regexp
1
SELECT CONCAT(REPEAT('100,',60),'101') RLIKE '^(([1-9][0-9]*),)*[1-9][0-9]*$';
CONCAT(REPEAT('100,',60),'101') RLIKE '^(([1-9][0-9]*),)*[1-9][0-9]*$'
1
SELECT CONCAT(REPEAT('100,',200),'101') RLIKE '^(([1-9][0-9]*),)*[1-9][0-9]*$';
CONCAT(REPEAT('100,',200),'101') RLIKE '^(([1-9][0-9]*),)*[1-9][0-9]*$'
0
Warnings:
Warning 1139 Got error 'pcre_exec: recursion limit of NUM exceeded' from regexp
1
SELECT REGEXP_INSTR(CONCAT(REPEAT('100,',60),'101'), '^(([1-9][0-9]*),)*[1-9][0-9]*$');
REGEXP_INSTR(CONCAT(REPEAT('100,',60),'101'), '^(([1-9][0-9]*),)*[1-9][0-9]*$')
1
SELECT REGEXP_INSTR(CONCAT(REPEAT('100,',200),'101'), '^(([1-9][0-9]*),)*[1-9][0-9]*$');
REGEXP_INSTR(CONCAT(REPEAT('100,',200),'101'), '^(([1-9][0-9]*),)*[1-9][0-9]*$')
0
Warnings:
Warning 1139 Got error 'pcre_exec: recursion limit of NUM exceeded' from regexp
1
SELECT LENGTH(REGEXP_SUBSTR(CONCAT(REPEAT('100,',60),'101'), '^(([1-9][0-9]*),)*[1-9][0-9]*$'));
LENGTH(REGEXP_SUBSTR(CONCAT(REPEAT('100,',60),'101'), '^(([1-9][0-9]*),)*[1-9][0-9]*$'))
243
SELECT LENGTH(REGEXP_SUBSTR(CONCAT(REPEAT('100,',200),'101'), '^(([1-9][0-9]*),)*[1-9][0-9]*$'));
LENGTH(REGEXP_SUBSTR(CONCAT(REPEAT('100,',200),'101'), '^(([1-9][0-9]*),)*[1-9][0-9]*$'))
0
Warnings:
Warning 1139 Got error 'pcre_exec: recursion limit of NUM exceeded' from regexp
803
SELECT LENGTH(REGEXP_REPLACE(CONCAT(REPEAT('100,',60),'101'), '^(([1-9][0-9]*),)*[1-9][0-9]*$', ''));
LENGTH(REGEXP_REPLACE(CONCAT(REPEAT('100,',60),'101'), '^(([1-9][0-9]*),)*[1-9][0-9]*$', ''))
0
SELECT LENGTH(REGEXP_REPLACE(CONCAT(REPEAT('100,',200),'101'), '^(([1-9][0-9]*),)*[1-9][0-9]*$', ''));
LENGTH(REGEXP_REPLACE(CONCAT(REPEAT('100,',200),'101'), '^(([1-9][0-9]*),)*[1-9][0-9]*$', ''))
803
Warnings:
Warning 1139 Got error 'pcre_exec: recursion limit of NUM exceeded' from regexp
0
SELECT REGEXP_INSTR('a_kollision', 'oll');
REGEXP_INSTR('a_kollision', 'oll')
4

View file

@ -382,6 +382,7 @@ SELECT 'AB' RLIKE 'A B';
SELECT 'AB' RLIKE 'A# this is a comment\nB';
SET default_regex_flags=DEFAULT;
--error ER_REGEXP_ERROR
SELECT 'Aq' RLIKE 'A\\q';
SET default_regex_flags='EXTRA';
--error ER_REGEXP_ERROR

View file

@ -202,7 +202,8 @@ The following specify which files/extra groups are read (specified before remain
using the password expiration options in ALTER USER.
--default-regex-flags=name
Default flags for the regex library. Any combination of:
DOTALL, DUPNAMES, EXTENDED, EXTRA, MULTILINE, UNGREEDY
DOTALL, DUPNAMES, EXTENDED, EXTENDED_MORE, EXTRA,
MULTILINE, UNGREEDY
--default-storage-engine=name
The default storage engine for new tables
--default-time-zone=name

View file

@ -13,8 +13,8 @@ SELECT @@default_regex_flags;
SET default_regex_flags='UNKNOWN';
ERROR 42000: Variable 'default_regex_flags' can't be set to the value of 'UNKNOWN'
SET default_regex_flags=123;
ERROR 42000: Variable 'default_regex_flags' can't be set to the value of '123'
SET default_regex_flags=325;
ERROR 42000: Variable 'default_regex_flags' can't be set to the value of '325'
SET default_regex_flags=123.0;
ERROR 42000: Incorrect argument type to variable 'default_regex_flags'
SET default_regex_flags=123e0;
@ -31,7 +31,7 @@ DOTALL
SET @@default_regex_flags=63;
SELECT @@default_regex_flags;
@@default_regex_flags
DOTALL,DUPNAMES,EXTENDED,EXTRA,MULTILINE,UNGREEDY
DOTALL,DUPNAMES,EXTENDED,EXTENDED_MORE,EXTRA,MULTILINE
SET @@default_regex_flags='DOTALL';
SELECT @@default_regex_flags;
@@default_regex_flags

View file

@ -740,7 +740,7 @@ VARIABLE_COMMENT Default flags for the regex library
NUMERIC_MIN_VALUE NULL
NUMERIC_MAX_VALUE NULL
NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST DOTALL,DUPNAMES,EXTENDED,EXTRA,MULTILINE,UNGREEDY
ENUM_VALUE_LIST DOTALL,DUPNAMES,EXTENDED,EXTENDED_MORE,EXTRA,MULTILINE,UNGREEDY
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME DEFAULT_STORAGE_ENGINE

View file

@ -750,7 +750,7 @@ VARIABLE_COMMENT Default flags for the regex library
NUMERIC_MIN_VALUE NULL
NUMERIC_MAX_VALUE NULL
NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST DOTALL,DUPNAMES,EXTENDED,EXTRA,MULTILINE,UNGREEDY
ENUM_VALUE_LIST DOTALL,DUPNAMES,EXTENDED,EXTENDED_MORE,EXTRA,MULTILINE,UNGREEDY
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME DEFAULT_STORAGE_ENGINE

View file

@ -9,7 +9,7 @@ SELECT @@default_regex_flags;
--error ER_WRONG_VALUE_FOR_VAR
SET default_regex_flags='UNKNOWN';
--error ER_WRONG_VALUE_FOR_VAR
SET default_regex_flags=123;
SET default_regex_flags=325;
--error ER_WRONG_TYPE_FOR_VAR
SET default_regex_flags=123.0;
--error ER_WRONG_TYPE_FOR_VAR

View file

@ -193,7 +193,7 @@ RECOMPILE_FOR_EMBEDDED)
ADD_LIBRARY(sql STATIC ${SQL_SOURCE})
DTRACE_INSTRUMENT(sql)
TARGET_LINK_LIBRARIES(sql
mysys mysys_ssl dbug strings vio pcre
mysys mysys_ssl dbug strings vio pcre2-8
${LIBWRAP} ${LIBCRYPT} ${LIBDL} ${CMAKE_THREAD_LIBS_INIT}
${SSL_LIBRARIES}
${LIBSYSTEMD})

View file

@ -5827,15 +5827,6 @@ int Regexp_processor_pcre::default_regex_flags()
return default_regex_flags_pcre(current_thd);
}
void Regexp_processor_pcre::set_recursion_limit(THD *thd)
{
long stack_used;
DBUG_ASSERT(thd == current_thd);
stack_used= available_stack_size(thd->thread_stack, &stack_used);
m_pcre_extra.match_limit_recursion=
(ulong)((my_thread_stack_size - STACK_MIN_SIZE - stack_used)/my_pcre_frame_size);
}
/**
Convert string to lib_charset, if needed.
@ -5869,8 +5860,8 @@ String *Regexp_processor_pcre::convert_if_needed(String *str, String *converter)
bool Regexp_processor_pcre::compile(String *pattern, bool send_error)
{
const char *pcreErrorStr;
int pcreErrorOffset;
int pcreErrorNumber;
PCRE2_SIZE pcreErrorOffset;
if (is_compiled())
{
@ -5883,19 +5874,30 @@ bool Regexp_processor_pcre::compile(String *pattern, bool send_error)
if (!(pattern= convert_if_needed(pattern, &pattern_converter)))
return true;
m_pcre= pcre_compile(pattern->c_ptr_safe(), m_library_flags,
&pcreErrorStr, &pcreErrorOffset, NULL);
m_pcre= pcre2_compile((PCRE2_SPTR8) pattern->ptr(), pattern->length(),
m_library_flags,
&pcreErrorNumber, &pcreErrorOffset, NULL);
if (unlikely(m_pcre == NULL))
{
if (send_error)
{
char buff[MAX_FIELD_WIDTH];
my_snprintf(buff, sizeof(buff), "%s at offset %d", pcreErrorStr, pcreErrorOffset);
int lmsg= pcre2_get_error_message(pcreErrorNumber,
(PCRE2_UCHAR8 *)buff, sizeof(buff));
if (lmsg >= 0)
my_snprintf(buff+lmsg, sizeof(buff)-lmsg,
" at offset %d", pcreErrorOffset);
my_error(ER_REGEXP_ERROR, MYF(0), buff);
}
return true;
}
m_pcre_match_data= pcre2_match_data_create_from_pattern(m_pcre, NULL);
if (m_pcre_match_data == NULL)
{
my_error(ER_OUT_OF_RESOURCES, MYF(0));
return true;
}
return false;
}
@ -5916,124 +5918,46 @@ bool Regexp_processor_pcre::compile(Item *item, bool send_error)
*/
void Regexp_processor_pcre::pcre_exec_warn(int rc) const
{
char buf[64];
const char *errmsg= NULL;
PCRE2_UCHAR8 buf[128];
THD *thd= current_thd;
/*
Make a descriptive message only for those pcre_exec() error codes
that can actually happen in MariaDB.
*/
switch (rc)
int errlen= pcre2_get_error_message(rc, buf, sizeof(buf));
if (errlen <= 0)
{
case PCRE_ERROR_NULL:
errmsg= "pcre_exec: null argument passed";
break;
case PCRE_ERROR_BADOPTION:
errmsg= "pcre_exec: bad option";
break;
case PCRE_ERROR_BADMAGIC:
errmsg= "pcre_exec: bad magic - not a compiled regex";
break;
case PCRE_ERROR_UNKNOWN_OPCODE:
errmsg= "pcre_exec: error in compiled regex";
break;
case PCRE_ERROR_NOMEMORY:
errmsg= "pcre_exec: Out of memory";
break;
case PCRE_ERROR_NOSUBSTRING:
errmsg= "pcre_exec: no substring";
break;
case PCRE_ERROR_MATCHLIMIT:
errmsg= "pcre_exec: match limit exceeded";
break;
case PCRE_ERROR_CALLOUT:
errmsg= "pcre_exec: callout error";
break;
case PCRE_ERROR_BADUTF8:
errmsg= "pcre_exec: Invalid utf8 byte sequence in the subject string";
break;
case PCRE_ERROR_BADUTF8_OFFSET:
errmsg= "pcre_exec: Started at invalid location within utf8 byte sequence";
break;
case PCRE_ERROR_PARTIAL:
errmsg= "pcre_exec: partial match";
break;
case PCRE_ERROR_INTERNAL:
errmsg= "pcre_exec: internal error";
break;
case PCRE_ERROR_BADCOUNT:
errmsg= "pcre_exec: ovesize is negative";
break;
case PCRE_ERROR_RECURSIONLIMIT:
my_snprintf(buf, sizeof(buf), "pcre_exec: recursion limit of %ld exceeded",
m_pcre_extra.match_limit_recursion);
errmsg= buf;
break;
case PCRE_ERROR_BADNEWLINE:
errmsg= "pcre_exec: bad newline options";
break;
case PCRE_ERROR_BADOFFSET:
errmsg= "pcre_exec: start offset negative or greater than string length";
break;
case PCRE_ERROR_SHORTUTF8:
errmsg= "pcre_exec: ended in middle of utf8 sequence";
break;
case PCRE_ERROR_JIT_STACKLIMIT:
errmsg= "pcre_exec: insufficient stack memory for JIT compile";
break;
case PCRE_ERROR_RECURSELOOP:
errmsg= "pcre_exec: Recursion loop detected";
break;
case PCRE_ERROR_BADMODE:
errmsg= "pcre_exec: compiled pattern passed to wrong bit library function";
break;
case PCRE_ERROR_BADENDIANNESS:
errmsg= "pcre_exec: compiled pattern passed to wrong endianness processor";
break;
case PCRE_ERROR_JIT_BADOPTION:
errmsg= "pcre_exec: bad jit option";
break;
case PCRE_ERROR_BADLENGTH:
errmsg= "pcre_exec: negative length";
break;
default:
/*
As other error codes should normally not happen,
we just report the error code without textual description
of the code.
*/
my_snprintf(buf, sizeof(buf), "pcre_exec: Internal error (%d)", rc);
errmsg= buf;
my_snprintf((char *)buf, sizeof(buf), "pcre_exec: Internal error (%d)", rc);
}
push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
ER_REGEXP_ERROR, ER_THD(thd, ER_REGEXP_ERROR), errmsg);
ER_REGEXP_ERROR, ER_THD(thd, ER_REGEXP_ERROR), buf);
}
/**
Call pcre_exec() and send a warning if pcre_exec() returned with an error.
*/
int Regexp_processor_pcre::pcre_exec_with_warn(const pcre *code,
const pcre_extra *extra,
int Regexp_processor_pcre::pcre_exec_with_warn(const pcre2_code *code,
pcre2_match_data *data,
const char *subject,
int length, int startoffset,
int options, int *ovector,
int ovecsize)
int options)
{
int rc= pcre_exec(code, extra, subject, length,
startoffset, options, ovector, ovecsize);
int rc= pcre2_match(code, (PCRE2_SPTR8) subject, (PCRE2_SIZE) length,
(PCRE2_SIZE) startoffset, options, data, NULL);
DBUG_EXECUTE_IF("pcre_exec_error_123", rc= -123;);
if (unlikely(rc < PCRE_ERROR_NOMATCH))
if (unlikely(rc < PCRE2_ERROR_NOMATCH))
{
m_SubStrVec= NULL;
pcre_exec_warn(rc);
}
else
m_SubStrVec= pcre2_get_ovector_pointer(data);
return rc;
}
bool Regexp_processor_pcre::exec(const char *str, size_t length, size_t offset)
{
m_pcre_exec_rc= pcre_exec_with_warn(m_pcre, &m_pcre_extra, str, (int)length, (int)offset, 0,
m_SubStrVec, array_elements(m_SubStrVec));
m_pcre_exec_rc= pcre_exec_with_warn(m_pcre, m_pcre_match_data,
str, (int)length, (int)offset, 0);
return false;
}
@ -6043,10 +5967,8 @@ bool Regexp_processor_pcre::exec(String *str, int offset,
{
if (!(str= convert_if_needed(str, &subject_converter)))
return true;
m_pcre_exec_rc= pcre_exec_with_warn(m_pcre, &m_pcre_extra,
str->c_ptr_safe(), str->length(),
offset, 0,
m_SubStrVec, array_elements(m_SubStrVec));
m_pcre_exec_rc= pcre_exec_with_warn(m_pcre, m_pcre_match_data,
str->ptr(), str->length(), offset, 0);
if (m_pcre_exec_rc > 0)
{
uint i;
@ -6096,12 +6018,6 @@ void Regexp_processor_pcre::fix_owner(Item_func *owner,
}
bool Item_func_regex::fix_fields(THD *thd, Item **ref)
{
re.set_recursion_limit(thd);
return Item_bool_func::fix_fields(thd, ref);
}
bool
Item_func_regex::fix_length_and_dec()
{
@ -6128,13 +6044,6 @@ longlong Item_func_regex::val_int()
}
bool Item_func_regexp_instr::fix_fields(THD *thd, Item **ref)
{
re.set_recursion_limit(thd);
return Item_int_func::fix_fields(thd, ref);
}
bool
Item_func_regexp_instr::fix_length_and_dec()
{
@ -6157,7 +6066,7 @@ longlong Item_func_regexp_instr::val_int()
if ((null_value= re.exec(args[0], 0, 1)))
return 0;
return re.match() ? re.subpattern_start(0) + 1 : 0;
return re.match() ? (longlong) (re.subpattern_start(0) + 1) : 0;
}

View file

@ -24,8 +24,8 @@
#endif
#include "item_func.h" /* Item_int_func, Item_bool_func */
#define PCRE_STATIC 1 /* Important on Windows */
#include "pcre.h" /* pcre header file */
#define PCRE2_STATIC 1 /* Important on Windows */
#include "pcre2.h" /* pcre2 header file */
#include "item.h"
extern Item_result item_cmp_type(Item_result a,Item_result b);
@ -2804,41 +2804,39 @@ public:
class Regexp_processor_pcre
{
pcre *m_pcre;
pcre_extra m_pcre_extra;
pcre2_code *m_pcre;
pcre2_match_data *m_pcre_match_data;
bool m_conversion_is_needed;
bool m_is_const;
int m_library_flags;
CHARSET_INFO *m_library_charset;
String m_prev_pattern;
int m_pcre_exec_rc;
int m_SubStrVec[30];
PCRE2_SIZE *m_SubStrVec;
void pcre_exec_warn(int rc) const;
int pcre_exec_with_warn(const pcre *code, const pcre_extra *extra,
int pcre_exec_with_warn(const pcre2_code *code,
pcre2_match_data *data,
const char *subject, int length, int startoffset,
int options, int *ovector, int ovecsize);
int options);
public:
String *convert_if_needed(String *src, String *converter);
String subject_converter;
String pattern_converter;
String replace_converter;
Regexp_processor_pcre() :
m_pcre(NULL), m_conversion_is_needed(true), m_is_const(0),
m_pcre(NULL), m_pcre_match_data(NULL),
m_conversion_is_needed(true), m_is_const(0),
m_library_flags(0),
m_library_charset(&my_charset_utf8mb3_general_ci)
{
m_pcre_extra.flags= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
m_pcre_extra.match_limit_recursion= 100L;
}
{}
int default_regex_flags();
void set_recursion_limit(THD *);
void init(CHARSET_INFO *data_charset, int extra_flags)
{
m_library_flags= default_regex_flags() | extra_flags |
(data_charset != &my_charset_bin ?
(PCRE_UTF8 | PCRE_UCP) : 0) |
(PCRE2_UTF | PCRE2_UCP) : 0) |
((data_charset->state &
(MY_CS_BINSORT | MY_CS_CSSORT)) ? 0 : PCRE_CASELESS);
(MY_CS_BINSORT | MY_CS_CSSORT)) ? 0 : PCRE2_CASELESS);
// Convert text data to utf-8.
m_library_charset= data_charset == &my_charset_bin ?
@ -2859,26 +2857,28 @@ public:
bool exec(Item *item, int offset, uint n_result_offsets_to_convert);
bool match() const { return m_pcre_exec_rc < 0 ? 0 : 1; }
int nsubpatterns() const { return m_pcre_exec_rc <= 0 ? 0 : m_pcre_exec_rc; }
int subpattern_start(int n) const
size_t subpattern_start(int n) const
{
return m_pcre_exec_rc <= 0 ? 0 : m_SubStrVec[n * 2];
}
int subpattern_end(int n) const
size_t subpattern_end(int n) const
{
return m_pcre_exec_rc <= 0 ? 0 : m_SubStrVec[n * 2 + 1];
}
int subpattern_length(int n) const
size_t subpattern_length(int n) const
{
return subpattern_end(n) - subpattern_start(n);
}
void reset()
{
m_pcre= NULL;
m_pcre_match_data= NULL;
m_prev_pattern.length(0);
}
void cleanup()
{
pcre_free(m_pcre);
pcre2_match_data_free(m_pcre_match_data);
pcre2_code_free(m_pcre);
reset();
}
bool is_compiled() const { return m_pcre != NULL; }
@ -2903,7 +2903,6 @@ public:
DBUG_VOID_RETURN;
}
longlong val_int();
bool fix_fields(THD *thd, Item **ref);
bool fix_length_and_dec();
const char *func_name() const { return "regexp"; }
enum precedence precedence() const { return CMP_PRECEDENCE; }
@ -2944,7 +2943,6 @@ public:
DBUG_VOID_RETURN;
}
longlong val_int();
bool fix_fields(THD *thd, Item **ref);
bool fix_length_and_dec();
const char *func_name() const { return "regexp_instr"; }
Item *get_copy(THD *thd) { return 0; }

View file

@ -1302,13 +1302,6 @@ bool Item_func_replace::fix_length_and_dec()
/*********************************************************************/
bool Item_func_regexp_replace::fix_fields(THD *thd, Item **ref)
{
re.set_recursion_limit(thd);
return Item_str_func::fix_fields(thd, ref);
}
bool Item_func_regexp_replace::fix_length_and_dec()
{
if (agg_arg_charsets_for_string_result_with_comparison(collation, args, 3))
@ -1360,7 +1353,7 @@ bool Item_func_regexp_replace::append_replacement(String *str,
if (n < re.nsubpatterns())
{
/* A valid sub-pattern reference found */
int pbeg= re.subpattern_start(n), plength= re.subpattern_end(n) - pbeg;
size_t pbeg= re.subpattern_start(n), plength= re.subpattern_end(n) - pbeg;
if (str->append(source->str + pbeg, plength, cs))
return true;
}
@ -1389,7 +1382,7 @@ String *Item_func_regexp_replace::val_str(String *str)
String *source= args[0]->val_str(&tmp0);
String *replace= args[2]->val_str(&tmp2);
LEX_CSTRING src, rpl;
int startoffset= 0;
size_t startoffset= 0;
if ((null_value= (args[0]->null_value || args[2]->null_value ||
re.recompile(args[1]))))
@ -1418,7 +1411,8 @@ String *Item_func_regexp_replace::val_str(String *str)
Append the rest of the source string
starting from startoffset until the end of the source.
*/
if (str->append(src.str + startoffset, src.length - startoffset, re.library_charset()))
if (str->append(src.str + startoffset, src.length - startoffset,
re.library_charset()))
goto err;
return str;
}
@ -1427,7 +1421,8 @@ String *Item_func_regexp_replace::val_str(String *str)
Append prefix, the part before the matching pattern.
starting from startoffset until the next match
*/
if (str->append(src.str + startoffset, re.subpattern_start(0) - startoffset, re.library_charset()))
if (str->append(src.str + startoffset,
re.subpattern_start(0) - startoffset, re.library_charset()))
goto err;
// Append replacement
@ -1445,13 +1440,6 @@ err:
}
bool Item_func_regexp_substr::fix_fields(THD *thd, Item **ref)
{
re.set_recursion_limit(thd);
return Item_str_func::fix_fields(thd, ref);
}
bool Item_func_regexp_substr::fix_length_and_dec()
{
if (agg_arg_charsets_for_string_result_with_comparison(collation, args, 2))
@ -1486,8 +1474,7 @@ String *Item_func_regexp_substr::val_str(String *str)
return str;
if (str->append(source->ptr() + re.subpattern_start(0),
re.subpattern_end(0) - re.subpattern_start(0),
re.library_charset()))
re.subpattern_length(0), re.library_charset()))
goto err;
return str;

View file

@ -374,7 +374,6 @@ public:
DBUG_VOID_RETURN;
}
String *val_str(String *str);
bool fix_fields(THD *thd, Item **ref);
bool fix_length_and_dec();
const char *func_name() const { return "regexp_replace"; }
Item *get_copy(THD *thd) { return 0;}
@ -396,7 +395,6 @@ public:
DBUG_VOID_RETURN;
}
String *val_str(String *str);
bool fix_fields(THD *thd, Item **ref);
bool fix_length_and_dec();
const char *func_name() const { return "regexp_substr"; }
Item *get_copy(THD *thd) { return 0; }

View file

@ -113,7 +113,6 @@
#include "sp_rcontext.h"
#include "sp_cache.h"
#include "sql_reload.h" // reload_acl_and_cache
#include "pcre.h"
#ifdef HAVE_POLL_H
#include <poll.h>
@ -3260,20 +3259,6 @@ static void init_libstrings()
#endif
}
ulonglong my_pcre_frame_size;
static void init_pcre()
{
pcre_malloc= pcre_stack_malloc= my_str_malloc_mysqld;
pcre_free= pcre_stack_free= my_free;
pcre_stack_guard= check_enough_stack_size_slow;
/* See http://pcre.org/original/doc/html/pcrestack.html */
my_pcre_frame_size= -pcre_exec(NULL, NULL, NULL, -999, -999, 0, NULL, 0);
// pcre can underestimate its stack usage. Use a safe value, as in the manual
set_if_bigger(my_pcre_frame_size, 500);
my_pcre_frame_size += 16; // Again, safety margin, see the manual
}
/**
Initialize one of the global date/time format variables.
@ -4130,7 +4115,6 @@ static int init_common_variables()
if (item_create_init())
return 1;
item_init();
init_pcre();
/*
Process a comma-separated character set list and choose
the first available character set. This is mostly for

View file

@ -575,8 +575,6 @@ extern pthread_t signal_thread;
extern struct st_VioSSLFd * ssl_acceptor_fd;
#endif /* HAVE_OPENSSL */
extern ulonglong my_pcre_frame_size;
/*
The following variables were under INNODB_COMPABILITY_HOOKS
*/

View file

@ -443,7 +443,7 @@ sql_mode_t expand_sql_mode(sql_mode_t sql_mode);
const char *sql_mode_string_representation(uint bit_number);
bool sql_mode_string_representation(THD *thd, sql_mode_t sql_mode,
LEX_CSTRING *ls);
int default_regex_flags_pcre(const THD *thd);
int default_regex_flags_pcre(THD *thd);
extern sys_var *Sys_autocommit_ptr, *Sys_last_gtid_ptr,
*Sys_character_set_client_ptr, *Sys_character_set_connection_ptr,

View file

@ -5994,29 +5994,40 @@ static const char *default_regex_flags_names[]=
"DOTALL", // (?s) . matches anything including NL
"DUPNAMES", // (?J) Allow duplicate names for subpatterns
"EXTENDED", // (?x) Ignore white space and # comments
"EXTRA", // (?X) extra features (e.g. error on unknown escape character)
"EXTENDED_MORE",//(?xx) Ignore white space and # comments inside cheracter
"EXTRA", // means nothing since PCRE2
"MULTILINE", // (?m) ^ and $ match newlines within data
"UNGREEDY", // (?U) Invert greediness of quantifiers
0
};
static const int default_regex_flags_to_pcre[]=
{
PCRE_DOTALL,
PCRE_DUPNAMES,
PCRE_EXTENDED,
PCRE_EXTRA,
PCRE_MULTILINE,
PCRE_UNGREEDY,
PCRE2_DOTALL,
PCRE2_DUPNAMES,
PCRE2_EXTENDED,
PCRE2_EXTENDED_MORE,
-1, /* EXTRA flag not available since PCRE2 */
PCRE2_MULTILINE,
PCRE2_UNGREEDY,
0
};
int default_regex_flags_pcre(const THD *thd)
int default_regex_flags_pcre(THD *thd)
{
ulonglong src= thd->variables.default_regex_flags;
int i, res;
for (i= res= 0; default_regex_flags_to_pcre[i]; i++)
{
if (src & (1ULL << i))
{
if (default_regex_flags_to_pcre[i] < 0)
{
push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
ER_UNKNOWN_ERROR,
"PCRE2 doens't support the EXTRA flag. Ignored.");
continue;
}
res|= default_regex_flags_to_pcre[i];
}
}
return res;
}

View file

@ -189,11 +189,7 @@ else()
set(MYSQL_VARIANT "MySQL")
endif()
if(EXISTS "${MYSQL_SOURCE_DIR}/pcre")
set(MYSQL_REGEX_INCLUDE_DIR "${MYSQL_SOURCE_DIR}/pcre")
else()
set(MYSQL_REGEX_INCLUDE_DIR "${MYSQL_SOURCE_DIR}/regex")
endif()
set(MYSQL_REGEX_INCLUDE_DIR "${MYSQL_SOURCE_DIR}/regex")
if(EXISTS "${MYSQL_SOURCE_DIR}/extra/rapidjson")
set(MYSQL_RAPIDJSON_INCLUDE_DIR "${MYSQL_SOURCE_DIR}/extra/rapidjson/include")

View file

@ -186,11 +186,7 @@ AC_DEFUN([CONFIG_OPTION_MYSQL],[
mysql_regex_include_dir="$ac_mysql_source_dir/extra/regex"
MYSQL_INCLUDES="$MYSQL_INCLUDES -I$mysql_regex_include_dir"
else
if test -d "$ac_mysql_source_dir/pcre"; then
mysql_regex_include_dir="$ac_mysql_source_dir/pcre"
else
mysql_regex_include_dir="$ac_mysql_source_dir/regex"
fi
mysql_regex_include_dir="$ac_mysql_source_dir/regex"
MYSQL_INCLUDES="$MYSQL_INCLUDES -I$mysql_regex_include_dir"
fi
if test -d "$ac_mysql_source_dir/libbinlogevents"; then

View file

@ -1613,30 +1613,6 @@ AC_SUBST(ONIGMO_CFLAGS)
AC_SUBST(ONIGMO_LIBS)
AM_CONDITIONAL(WITH_BUNDLED_ONIGMO, test "$with_onigmo" != "no" -a "x$have_onigmo" != "xyes")
# PCRE
GRN_WITH_PCRE=no
AC_ARG_WITH(pcre,
[AS_HELP_STRING([--without-pcre],
[Don't use PCRE for groonga-httpd. [default=auto-detect]])],
[with_pcre="$withval"],
[with_pcre="auto"])
if test "x$with_pcre" != "xno"; then
m4_ifdef([PKG_CHECK_MODULES], [
PKG_CHECK_MODULES([PCRE], [libpcre],
[_PKG_CONFIG(PCRE_LIBS_ONLY_L, [libs-only-L], [libpcre])
PCRE_LIBS_ONLY_L="$pkg_cv_PCRE_LIBS_ONLY_L"
GRN_WITH_PCRE=yes],
[GRN_WITH_PCRE=no])
],
[GRN_WITH_PCRE=no])
if test "x$with_pcre" = "xyes" -a "$GRN_WITH_PCRE" != "yes"; then
AC_MSG_ERROR("No PCRE found")
fi
fi
AC_SUBST(GRN_WITH_PCRE)
AC_SUBST(PCRE_CFLAGS)
AC_SUBST(PCRE_LIBS_ONLY_L)
# SSL
GRN_WITH_SSL=no
AC_ARG_WITH(ssl,
@ -1788,11 +1764,6 @@ echo "groonga-httpd:"
echo " enable: $enable_groonga_httpd"
if test "$enable_groonga_httpd" = "yes"; then
echo " default database path: $GROONGA_HTTPD_DEFAULT_DATABASE_PATH"
echo " PCRE: $GRN_WITH_PCRE"
if test "$GRN_WITH_PCRE" = "yes"; then
echo " CFLAGS: $PCRE_CFLAGS"
echo " LIBS only -L: $PCRE_LIBS_ONLY_L"
fi
echo " SSL: $GRN_WITH_SSL"
if test "$GRN_WITH_SSL" = "yes"; then
echo " CFLAGS: $SSL_CFLAGS"

View file

@ -23,7 +23,6 @@ case "${TRAVIS_OS_NAME}" in
brew outdated pkg-config || brew upgrade pkg-config
brew reinstall libtool
brew outdated libevent || brew upgrade libevent
brew outdated pcre || brew upgrade pcre
brew install \
autoconf-archive \
msgpack \