Bug #6737: REGEXP gives wrong result with case sensitive collation:

- A new flag MY_CS_CSSORT was introduced for case sensitivity. - Item_func_regexp doesn't substiture ICASE not only for binary collations but for case sensitive collations as well.
2025-01-17 20:42:30 +01:00 · 2004-11-22 11:58:40 +04:00 · 2004-11-22 11:58:40 +04:00 · e3b94d4ef5
commit e3b94d4ef5
parent 7f5661ae44
7 changed files with 51 additions and 22 deletions
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@ -63,7 +63,7 @@ typedef struct unicase_info_st
 #define MY_CS_UNICODE	128    /* is a charset is full unicode   */
 #define MY_CS_READY	256    /* if a charset is initialized    */
 #define MY_CS_AVAILABLE	512    /* If either compiled-in or loaded*/
-
+#define MY_CS_CSSORT	1024   /* if case sensitive sort order   */	
 #define MY_CHARSET_UNDEFINED 0


--- a/mysql-test/r/ctype_latin1.result
+++ b/mysql-test/r/ctype_latin1.result
@ -296,3 +296,12 @@ FD	C3BD	FD	1
 FE	C3BE	FE	1
 FF	C3BF	FF	1
 DROP TABLE t1;
+select 'a' regexp 'A' collate latin1_general_ci;
+'a' regexp 'A' collate latin1_general_ci
+1
+select 'a' regexp 'A' collate latin1_general_cs;
+'a' regexp 'A' collate latin1_general_cs
+0
+select 'a' regexp 'A' collate latin1_bin;
+'a' regexp 'A' collate latin1_bin
+0
--- a/mysql-test/t/ctype_latin1.test
+++ b/mysql-test/t/ctype_latin1.test
@ -53,3 +53,10 @@ SELECT
  hex(@l:=convert(@u using latin1)),
  a=@l FROM t1;
 DROP TABLE t1;
+
+#
+# Bug #6737: REGEXP gives wrong result with case sensitive collation
+#
+select 'a' regexp 'A' collate latin1_general_ci;
+select 'a' regexp 'A' collate latin1_general_cs;
+select 'a' regexp 'A' collate latin1_bin;
--- a/mysys/charset.c
+++ b/mysys/charset.c
@ -228,6 +228,7 @@ static int add_collation(CHARSET_INFO *cs)
      }
      else
      {
+        uchar *sort_order= all_charsets[cs->number]->sort_order;
        simple_cs_init_functions(all_charsets[cs->number]);
        new->mbminlen= 1;
        new->mbmaxlen= 1;
@ -236,6 +237,16 @@ static int add_collation(CHARSET_INFO *cs)
          all_charsets[cs->number]->state |= MY_CS_LOADED;
        }
        all_charsets[cs->number]->state|= MY_CS_AVAILABLE;
+        
+        /*
+          Check if case sensitive sort order: A < a < B.
+          We need MY_CS_FLAG for regex library, and for
+          case sensitivity flag for 5.0 client protocol,
+          to support isCaseSensitive() method in JDBC driver 
+        */
+        if (sort_order && sort_order['A'] < sort_order['a'] &&
+                          sort_order['a'] < sort_order['B'])
+          all_charsets[cs->number]->state|= MY_CS_CSSORT; 
      }
    }
    else
--- a/sql/item_cmpfunc.cc
+++ b/sql/item_cmpfunc.cc
@ -2364,11 +2364,12 @@ Item_func_regex::fix_fields(THD *thd, TABLE_LIST *tables, Item **ref)
      return 0;
    }
    int error;
-    if ((error=regcomp(&preg,res->c_ptr(),
-		       (cmp_collation.collation->state & MY_CS_BINSORT) ?
-		       REG_EXTENDED | REG_NOSUB :
-		       REG_EXTENDED | REG_NOSUB | REG_ICASE,
-		       cmp_collation.collation)))
+    if ((error= regcomp(&preg,res->c_ptr(),
+                        ((cmp_collation.collation->state & MY_CS_BINSORT) ||
+                         (cmp_collation.collation->state & MY_CS_CSSORT)) ?
+                         REG_EXTENDED | REG_NOSUB :
+                         REG_EXTENDED | REG_NOSUB | REG_ICASE,
+                        cmp_collation.collation)))
    {
      (void) regerror(error,&preg,buff,sizeof(buff));
      my_printf_error(ER_REGEXP_ERROR,ER(ER_REGEXP_ERROR),MYF(0),buff);
@ -2416,10 +2417,11 @@ longlong Item_func_regex::val_int()
 	regex_compiled=0;
      }
      if (regcomp(&preg,res2->c_ptr(),
-		  (cmp_collation.collation->state & MY_CS_BINSORT) ?
-		  REG_EXTENDED | REG_NOSUB :
-		  REG_EXTENDED | REG_NOSUB | REG_ICASE,
-		  cmp_collation.collation))
+                  ((cmp_collation.collation->state & MY_CS_BINSORT) ||
+                   (cmp_collation.collation->state & MY_CS_CSSORT)) ?
+                   REG_EXTENDED | REG_NOSUB :
+                   REG_EXTENDED | REG_NOSUB | REG_ICASE,
+                   cmp_collation.collation))
      {
 	null_value=1;
 	return 0;
--- a/strings/ctype-czech.c
+++ b/strings/ctype-czech.c
@ -589,12 +589,12 @@ static MY_COLLATION_HANDLER my_collation_latin2_czech_ci_handler =

 CHARSET_INFO my_charset_latin2_czech_ci =
 {
-    2,0,0,				/* number */
-    MY_CS_COMPILED|MY_CS_STRNXFRM,	/* state      */
-    "latin2",				/* cs name    */
-    "latin2_czech_cs",			/* name */
-    "",					/* comment    */
-    NULL,				/* tailoring */
+    2,0,0,                                      /* number    */
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_CSSORT, /* state     */
+    "latin2",                                   /* cs name   */
+    "latin2_czech_cs",                          /* name      */
+    "",                                         /* comment   */
+    NULL,                                       /* tailoring */
    ctype_czech,
    to_lower_czech,
    to_upper_czech,
--- a/strings/ctype-win1250ch.c
+++ b/strings/ctype-win1250ch.c
@ -624,12 +624,12 @@ static MY_COLLATION_HANDLER my_collation_czech_ci_handler =

 CHARSET_INFO my_charset_cp1250_czech_ci =
 {
-  34,0,0,			/* number    */
-  MY_CS_COMPILED|MY_CS_STRNXFRM,		/* state     */
-  "cp1250",			/* cs name    */
-  "cp1250_czech_cs",		/* name      */
-  "",				/* comment   */
-  NULL,				/* tailoring */
+  34,0,0,                                     /* number    */
+  MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_CSSORT, /* state     */
+  "cp1250",                                   /* cs name   */
+  "cp1250_czech_cs",                          /* name      */
+  "",                                         /* comment   */
+  NULL,                                       /* tailoring */
  ctype_win1250ch,
  to_lower_win1250ch,
  to_upper_win1250ch,