From eadd87880887e6ca73e6f292c9d856df7e56c3c0 Mon Sep 17 00:00:00 2001
From: Alexander Barkov <bar@mariadb.com>
Date: Fri, 8 Oct 2021 20:44:38 +0400
Subject: [PATCH] MDEV-23269 SIGSEGV in ft_boolean_check_syntax_string on
 setting ft_boolean_syntax

The crash happened because my_isalnum() does not support character
sets with mbminlen>1.

The value of "ft_boolean_syntax" is converted to utf8 in do_string_check().
So calling my_isalnum() is combination with "default_charset_info" was wrong.

Adding new parameters (size_t length, CHARSET_INFO *cs) to
ft_boolean_check_syntax_string() and passing self->charset(thd)
as the character set.
---
 include/ft_global.h                     |  3 ++-
 mysql-test/r/ctype_utf16_def.result     |  5 +++++
 mysql-test/r/ctype_utf32_def.result     |  6 ++++++
 mysql-test/t/ctype_utf16_def.test       |  7 +++++++
 mysql-test/t/ctype_utf32_def-master.opt |  1 +
 mysql-test/t/ctype_utf32_def.test       |  9 +++++++++
 sql/mysqld.cc                           |  4 +++-
 sql/sys_vars.cc                         |  4 +++-
 storage/myisam/ft_parser.c              | 13 ++++++++++---
 9 files changed, 46 insertions(+), 6 deletions(-)
 create mode 100644 mysql-test/r/ctype_utf32_def.result
 create mode 100644 mysql-test/t/ctype_utf32_def-master.opt
 create mode 100644 mysql-test/t/ctype_utf32_def.test

diff --git a/include/ft_global.h b/include/ft_global.h
index 725363c3aa8..9f2d52610ba 100644
--- a/include/ft_global.h
+++ b/include/ft_global.h
@@ -90,7 +90,8 @@ void ft_free_stopwords(void);
 
 FT_INFO *ft_init_search(uint,void *, uint, uchar *, size_t,
                         CHARSET_INFO *, uchar *);
-my_bool ft_boolean_check_syntax_string(const uchar *);
+my_bool ft_boolean_check_syntax_string(const uchar *, size_t length,
+                                       CHARSET_INFO *cs);
 
 /* Internal symbols for fulltext between maria and MyISAM */
 
diff --git a/mysql-test/r/ctype_utf16_def.result b/mysql-test/r/ctype_utf16_def.result
index 98b6f7d913d..b5827d45619 100644
--- a/mysql-test/r/ctype_utf16_def.result
+++ b/mysql-test/r/ctype_utf16_def.result
@@ -8,3 +8,8 @@ character_set_server	utf16
 SHOW VARIABLES LIKE 'ft_stopword_file';
 Variable_name	Value
 ft_stopword_file	(built-in)
+#
+# MDEV-23269 SIGSEGV in ft_boolean_check_syntax_string on setting ft_boolean_syntax
+#
+SET GLOBAL ft_boolean_syntax='+ -><()~*:""&|';
+SET GLOBAL ft_boolean_syntax=DEFAULT;
diff --git a/mysql-test/r/ctype_utf32_def.result b/mysql-test/r/ctype_utf32_def.result
new file mode 100644
index 00000000000..611072eb75b
--- /dev/null
+++ b/mysql-test/r/ctype_utf32_def.result
@@ -0,0 +1,6 @@
+call mtr.add_suppression("'utf32' can not be used as client character set");
+#
+# MDEV-23269 SIGSEGV in ft_boolean_check_syntax_string on setting ft_boolean_syntax
+#
+SET GLOBAL ft_boolean_syntax='+ -><()~*:""&|';
+SET GLOBAL ft_boolean_syntax=DEFAULT;
diff --git a/mysql-test/t/ctype_utf16_def.test b/mysql-test/t/ctype_utf16_def.test
index 0829cd53285..c6de842f618 100644
--- a/mysql-test/t/ctype_utf16_def.test
+++ b/mysql-test/t/ctype_utf16_def.test
@@ -7,3 +7,10 @@ call mtr.add_suppression("'utf16' can not be used as client character set");
 SHOW VARIABLES LIKE 'collation_server';
 SHOW VARIABLES LIKE 'character_set_server';
 SHOW VARIABLES LIKE 'ft_stopword_file';
+
+--echo #
+--echo # MDEV-23269 SIGSEGV in ft_boolean_check_syntax_string on setting ft_boolean_syntax
+--echo #
+
+SET GLOBAL ft_boolean_syntax='+ -><()~*:""&|';
+SET GLOBAL ft_boolean_syntax=DEFAULT;
diff --git a/mysql-test/t/ctype_utf32_def-master.opt b/mysql-test/t/ctype_utf32_def-master.opt
new file mode 100644
index 00000000000..3b0880cbff3
--- /dev/null
+++ b/mysql-test/t/ctype_utf32_def-master.opt
@@ -0,0 +1 @@
+--character-set-server=utf32,latin1 --collation-server=utf32_general_ci
diff --git a/mysql-test/t/ctype_utf32_def.test b/mysql-test/t/ctype_utf32_def.test
new file mode 100644
index 00000000000..e23f96052d3
--- /dev/null
+++ b/mysql-test/t/ctype_utf32_def.test
@@ -0,0 +1,9 @@
+--source include/have_utf32.inc
+call mtr.add_suppression("'utf32' can not be used as client character set");
+
+--echo #
+--echo # MDEV-23269 SIGSEGV in ft_boolean_check_syntax_string on setting ft_boolean_syntax
+--echo #
+
+SET GLOBAL ft_boolean_syntax='+ -><()~*:""&|';
+SET GLOBAL ft_boolean_syntax=DEFAULT;
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 9afc701c6ba..d723c36e4cb 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -9666,7 +9666,9 @@ static int get_options(int *argc_ptr, char ***argv_ptr)
   if (global_system_variables.low_priority_updates)
     thr_upgraded_concurrent_insert_lock= TL_WRITE_LOW_PRIORITY;
 
-  if (ft_boolean_check_syntax_string((uchar*) ft_boolean_syntax))
+  if (ft_boolean_check_syntax_string((uchar*) ft_boolean_syntax,
+                                     strlen(ft_boolean_syntax),
+                                     system_charset_info))
   {
     sql_print_error("Invalid ft-boolean-syntax string: %s\n",
                     ft_boolean_syntax);
diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
index e4de3d8d0aa..f231f49a667 100644
--- a/sql/sys_vars.cc
+++ b/sql/sys_vars.cc
@@ -1022,7 +1022,9 @@ static Sys_var_ulong Sys_flush_time(
 static bool check_ftb_syntax(sys_var *self, THD *thd, set_var *var)
 {
   return ft_boolean_check_syntax_string((uchar*)
-                      (var->save_result.string_value.str));
+                      (var->save_result.string_value.str),
+                      var->save_result.string_value.length,
+                      self->charset(thd));
 }
 static bool query_cache_flush(sys_var *self, THD *thd, enum_var_type type)
 {
diff --git a/storage/myisam/ft_parser.c b/storage/myisam/ft_parser.c
index f6930e91e6e..b2170a93cde 100644
--- a/storage/myisam/ft_parser.c
+++ b/storage/myisam/ft_parser.c
@@ -78,18 +78,25 @@ FT_WORD * ft_linearize(TREE *wtree, MEM_ROOT *mem_root)
   DBUG_RETURN(wlist);
 }
 
-my_bool ft_boolean_check_syntax_string(const uchar *str)
+my_bool ft_boolean_check_syntax_string(const uchar *str, size_t length,
+                                       CHARSET_INFO *cs)
 {
   uint i, j;
 
+  if (cs->mbminlen != 1)
+  {
+    DBUG_ASSERT(0);
+    return 1;
+  }
+
   if (!str ||
-      (strlen((char*) str)+1 != sizeof(DEFAULT_FTB_SYNTAX)) ||
+      (length + 1 != sizeof(DEFAULT_FTB_SYNTAX)) ||
       (str[0] != ' ' && str[1] != ' '))
     return 1;
   for (i=0; i<sizeof(DEFAULT_FTB_SYNTAX); i++)
   {
     /* limiting to 7-bit ascii only */
-    if ((unsigned char)(str[i]) > 127 || my_isalnum(default_charset_info, str[i]))
+    if ((unsigned char)(str[i]) > 127 || my_isalnum(cs, str[i]))
       return 1;
     for (j=0; j<i; j++)
       if (str[i] == str[j] && (i != 11 || j != 10))