From f552febe4315875143d952e41b2b5d17ca29b39c Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Fri, 17 Nov 2023 17:41:23 +0000 Subject: [PATCH] MDEV-30879 Add support for up to BASE 62 to CONV() BASE 62 uses 0-9, A-Z and then a-z to give the numbers 0-61. This patch increases the range of the string functions to cover this. Based on ideas and tests in PR #2589, but re-written into the charset functions. Includes fix by Sergei, UBSAN complained: ctype-simple.c:683:38: runtime error: negation of -9223372036854775808 cannot be represented in type 'long long int'; cast to an unsigned type to negate this value to itself Co-authored-by: Weijun Huang Co-authored-by: Sergei Golubchik --- include/m_string.h | 1 + mysql-test/main/func_str.result | 64 +++++++++++++++++++++++++++++++-- mysql-test/main/func_str.test | 39 +++++++++++++++++++- sql/item_strfunc.cc | 4 +-- strings/ctype-simple.c | 22 +++++++++++- strings/ctype-ucs2.c | 8 +++++ strings/int2str.c | 15 +++++--- strings/longlong2str.c | 15 +++++--- strings/str2int.c | 10 +++--- strings/string.doc | 4 +-- 10 files changed, 160 insertions(+), 22 deletions(-) diff --git a/include/m_string.h b/include/m_string.h index 0360cd3868e..fcbb3769192 100644 --- a/include/m_string.h +++ b/include/m_string.h @@ -74,6 +74,7 @@ extern "C" { #endif /* Declared in int2str() */ +extern const char _dig_vec_base62[]; extern const char _dig_vec_upper[]; extern const char _dig_vec_lower[]; diff --git a/mysql-test/main/func_str.result b/mysql-test/main/func_str.result index 16e47a8d95d..2e6e27136bf 100644 --- a/mysql-test/main/func_str.result +++ b/mysql-test/main/func_str.result @@ -1078,8 +1078,8 @@ lpad(12345, 5, "#") SELECT conv(71, 10, 36), conv('1Z', 36, 10); conv(71, 10, 36) conv('1Z', 36, 10) 1Z 71 -SELECT conv(71, 10, 37), conv('1Z', 37, 10), conv(0,1,10),conv(0,0,10), conv(0,-1,10); -conv(71, 10, 37) conv('1Z', 37, 10) conv(0,1,10) conv(0,0,10) conv(0,-1,10) +SELECT conv(71, 10, 63), conv('1Z', 63, 10), conv(0,1,10),conv(0,0,10), conv(0,-1,10); +conv(71, 10, 63) conv('1Z', 63, 10) conv(0,1,10) conv(0,0,10) conv(0,-1,10) NULL NULL NULL NULL NULL create table t1 (id int(1), str varchar(10)) DEFAULT CHARSET=utf8; insert into t1 values (1,'aaaaaaaaaa'), (2,'bbbbbbbbbb'); @@ -5535,3 +5535,63 @@ aes_encrypt(a,a) is null # # End of 11.2 tests # +# +# MDEV-30879 Add conversion to based 62 for CONV function +# +SELECT CONV('1z', 62, 10); +CONV('1z', 62, 10) +123 +SELECT CONV('1Z', 62, 10); +CONV('1Z', 62, 10) +97 +SELECT CONV('-1Z', 62, 10); +CONV('-1Z', 62, 10) +18446744073709551519 +SELECT CONV('-1Z', -62, 10); +CONV('-1Z', -62, 10) +18446744073709551519 +SELECT CONV('-1Z', 62, -10); +CONV('-1Z', 62, -10) +-97 +SELECT CONV('-1Z', -62, -10); +CONV('-1Z', -62, -10) +-97 +SELECT CONV('AzL8n0Y58m7', 62, 10); +CONV('AzL8n0Y58m7', 62, 10) +9223372036854775807 +SELECT CONV('LygHa16AHYE', 62, 10); +CONV('LygHa16AHYE', 62, 10) +18446744073709551614 +SELECT CONV('LygHa16AHYF', 62, 10); +CONV('LygHa16AHYF', 62, 10) +18446744073709551615 +SELECT CONV('LygHa16AHZ0', 62, 10); +CONV('LygHa16AHZ0', 62, 10) +18446744073709551615 +SELECT CONV('-AzL8n0Y58m7', -62, -10); +CONV('-AzL8n0Y58m7', -62, -10) +-9223372036854775807 +SELECT CONV('-AzL8n0Y58m8', -62, -10); +CONV('-AzL8n0Y58m8', -62, -10) +-9223372036854775808 +SELECT CONV('-AzL8n0Y58m9', -62, -10); +CONV('-AzL8n0Y58m9', -62, -10) +-9223372036854775808 +SELECT CONV('-LygHa16AHZ0', -62, -10); +CONV('-LygHa16AHZ0', -62, -10) +-9223372036854775808 +SELECT CONV('LygHa16AHYF', 63, 10); +CONV('LygHa16AHYF', 63, 10) +NULL +SELECT CONV(18446744073709551615, 10, 63); +CONV(18446744073709551615, 10, 63) +NULL +SELECT CONV(18446744073709551615, 10, 62); +CONV(18446744073709551615, 10, 62) +LygHa16AHYF +SELECT CONV(-9223372036854775808, -10, -62); +CONV(-9223372036854775808, -10, -62) +-AzL8n0Y58m8 +# +# End of 11.4 tests +# diff --git a/mysql-test/main/func_str.test b/mysql-test/main/func_str.test index 4c579b50b8c..91b57e354ae 100644 --- a/mysql-test/main/func_str.test +++ b/mysql-test/main/func_str.test @@ -570,7 +570,7 @@ SELECT lpad(12345, 5, "#"); # SELECT conv(71, 10, 36), conv('1Z', 36, 10); -SELECT conv(71, 10, 37), conv('1Z', 37, 10), conv(0,1,10),conv(0,0,10), conv(0,-1,10); +SELECT conv(71, 10, 63), conv('1Z', 63, 10), conv(0,1,10),conv(0,0,10), conv(0,-1,10); # # Bug in SUBSTRING when mixed with CONCAT and ORDER BY (Bug #3089) @@ -2481,3 +2481,40 @@ select aes_encrypt(a,a) is null from (values('a'),(NULL),('b')) x; --echo # --echo # End of 11.2 tests --echo # + +--echo # +--echo # MDEV-30879 Add conversion to based 62 for CONV function +--echo # + +SELECT CONV('1z', 62, 10); +SELECT CONV('1Z', 62, 10); + +SELECT CONV('-1Z', 62, 10); +SELECT CONV('-1Z', -62, 10); +SELECT CONV('-1Z', 62, -10); +SELECT CONV('-1Z', -62, -10); + +# Check limits +SELECT CONV('AzL8n0Y58m7', 62, 10); +SELECT CONV('LygHa16AHYE', 62, 10); +SELECT CONV('LygHa16AHYF', 62, 10); + +# Overflow doesn't appear to warn, but does overflow +SELECT CONV('LygHa16AHZ0', 62, 10); + +SELECT CONV('-AzL8n0Y58m7', -62, -10); +SELECT CONV('-AzL8n0Y58m8', -62, -10); +SELECT CONV('-AzL8n0Y58m9', -62, -10); +SELECT CONV('-LygHa16AHZ0', -62, -10); + +# Should NULL +SELECT CONV('LygHa16AHYF', 63, 10); +SELECT CONV(18446744073709551615, 10, 63); + +# Test 10 -> 62 +SELECT CONV(18446744073709551615, 10, 62); +SELECT CONV(-9223372036854775808, -10, -62); + +--echo # +--echo # End of 11.4 tests +--echo # diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index 9bf98948487..216bafea299 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -3936,8 +3936,8 @@ String *Item_func_conv::val_str(String *str) // Note that abs(INT_MIN) is undefined. if (args[0]->null_value || args[1]->null_value || args[2]->null_value || from_base == INT_MIN || to_base == INT_MIN || - abs(to_base) > 36 || abs(to_base) < 2 || - abs(from_base) > 36 || abs(from_base) < 2 || !(res->length())) + abs(to_base) > 62 || abs(to_base) < 2 || + abs(from_base) > 62 || abs(from_base) < 2 || !(res->length())) { null_value= 1; return NULL; diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c index 94dc7228cf5..83a366028ca 100644 --- a/strings/ctype-simple.c +++ b/strings/ctype-simple.c @@ -451,7 +451,11 @@ long my_strntol_8bit(CHARSET_INFO *cs, else if (c>='A' && c<='Z') c = c - 'A' + 10; else if (c>='a' && c<='z') + { c = c - 'a' + 10; + if (base > 36) + c += 26; + } else break; if (c >= base) @@ -546,7 +550,11 @@ ulong my_strntoul_8bit(CHARSET_INFO *cs, else if (c>='A' && c<='Z') c = c - 'A' + 10; else if (c>='a' && c<='z') + { c = c - 'a' + 10; + if (base > 36) + c += 26; + } else break; if (c >= base) @@ -634,7 +642,11 @@ longlong my_strntoll_8bit(CHARSET_INFO *cs __attribute__((unused)), else if (c>='A' && c<='Z') c = c - 'A' + 10; else if (c>='a' && c<='z') + { c = c - 'a' + 10; + if (base > 36) + c += 26; + } else break; if (c >= base) @@ -656,8 +668,12 @@ longlong my_strntoll_8bit(CHARSET_INFO *cs __attribute__((unused)), if (negative) { - if (i > (ulonglong) LONGLONG_MIN) + if (i >= (ulonglong) LONGLONG_MIN) + { + if (i == (ulonglong) LONGLONG_MIN) + return LONGLONG_MIN; overflow = 1; + } } else if (i > (ulonglong) LONGLONG_MAX) overflow = 1; @@ -731,7 +747,11 @@ ulonglong my_strntoull_8bit(CHARSET_INFO *cs, else if (c>='A' && c<='Z') c = c - 'A' + 10; else if (c>='a' && c<='z') + { c = c - 'a' + 10; + if (base > 36) + c += 26; + } else break; if (c >= base) diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index 5d67762ac2f..8637f6406e7 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -462,7 +462,11 @@ bs: else if ( wc>='A' && wc<='Z') wc = wc - 'A' + 10; else if ( wc>='a' && wc<='z') + { wc = wc - 'a' + 10; + if (base > 36) + wc += 26; + } else break; if ((int)wc >= base) @@ -575,7 +579,11 @@ bs: else if ( wc>='A' && wc<='Z') wc = wc - 'A' + 10; else if ( wc>='a' && wc<='z') + { wc = wc - 'a' + 10; + if (base > 36) + wc += 26; + } else break; if ((int)wc >= base) diff --git a/strings/int2str.c b/strings/int2str.c index 9d099d2e7d1..7fc608172fd 100644 --- a/strings/int2str.c +++ b/strings/int2str.c @@ -31,6 +31,8 @@ /* _dig_vec arrays are public because they are used in several outer places. */ +const char _dig_vec_base62[] = + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; const char _dig_vec_upper[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; const char _dig_vec_lower[] = @@ -50,7 +52,7 @@ const char _dig_vec_lower[] = DESCRIPTION Converts the (long) integer value to its character form and moves it to the destination buffer followed by a terminating NUL. - If radix is -2..-36, val is taken to be SIGNED, if radix is 2..36, val is + If radix is -2..-62, val is taken to be SIGNED, if radix is 2..62, val is taken to be UNSIGNED. That is, val is signed if and only if radix is. All other radixes treated as bad and nothing will be changed in this case. @@ -68,12 +70,17 @@ int2str(register long int val, register char *dst, register int radix, char buffer[65]; register char *p; long int new_val; - const char *dig_vec= upcase ? _dig_vec_upper : _dig_vec_lower; + const char *dig_vec; ulong uval= (ulong) val; + if (radix < -36 || radix > 36) + dig_vec= _dig_vec_base62; + else + dig_vec= upcase ? _dig_vec_upper : _dig_vec_lower; + if (radix < 0) { - if (radix < -36 || radix > -2) + if (radix < -62 || radix > -2) return NullS; if (val < 0) { @@ -83,7 +90,7 @@ int2str(register long int val, register char *dst, register int radix, } radix = -radix; } - else if (radix > 36 || radix < 2) + else if (radix > 62 || radix < 2) return NullS; /* diff --git a/strings/longlong2str.c b/strings/longlong2str.c index ab46353d5e1..9477e01be77 100644 --- a/strings/longlong2str.c +++ b/strings/longlong2str.c @@ -35,8 +35,8 @@ result is normally a pointer to this NUL character, but if the radix is dud the result will be NullS and nothing will be changed. - If radix is -2..-36, val is taken to be SIGNED. - If radix is 2.. 36, val is taken to be UNSIGNED. + If radix is -2..-62, val is taken to be SIGNED. + If radix is 2.. 62, val is taken to be UNSIGNED. That is, val is signed if and only if radix is. You will normally use radix -10 only through itoa and ltoa, for radix 2, 8, or 16 unsigned is what you generally want. @@ -63,12 +63,17 @@ char *ll2str(longlong val,char *dst,int radix, int upcase) char buffer[65]; register char *p; long long_val; - const char *dig_vec= upcase ? _dig_vec_upper : _dig_vec_lower; + const char *dig_vec; ulonglong uval= (ulonglong) val; + if (radix < -36 || radix > 36) + dig_vec= _dig_vec_base62; + else + dig_vec= upcase ? _dig_vec_upper : _dig_vec_lower; + if (radix < 0) { - if (radix < -36 || radix > -2) return (char*) 0; + if (radix < -62 || radix > -2) return (char*) 0; if (val < 0) { *dst++ = '-'; /* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */ @@ -78,7 +83,7 @@ char *ll2str(longlong val,char *dst,int radix, int upcase) } else { - if (radix > 36 || radix < 2) return (char*) 0; + if (radix > 62 || radix < 2) return (char*) 0; } if (uval == 0) { diff --git a/strings/str2int.c b/strings/str2int.c index 439a50ddef6..ab91e862e8b 100644 --- a/strings/str2int.c +++ b/strings/str2int.c @@ -55,9 +55,9 @@ #include "my_sys.h" /* defines errno */ #include -#define char_val(X) (X >= '0' && X <= '9' ? X-'0' :\ +#define char_val(X, Y) (X >= '0' && X <= '9' ? X-'0' :\ X >= 'A' && X <= 'Z' ? X-'A'+10 :\ - X >= 'a' && X <= 'z' ? X-'a'+10 :\ + X >= 'a' && X <= 'z' ? (Y <= 36 ? X-'a'+10 : X-'a'+36) :\ '\177') char *str2int(register const char *src, register int radix, long int lower, @@ -76,10 +76,10 @@ char *str2int(register const char *src, register int radix, long int lower, *val = 0; - /* Check that the radix is in the range 2..36 */ + /* Check that the radix is in the range 2..62 */ #ifndef DBUG_OFF - if (radix < 2 || radix > 36) { + if (radix < 2 || radix > 62) { errno=EDOM; return NullS; } @@ -126,7 +126,7 @@ char *str2int(register const char *src, register int radix, long int lower, to left in order to avoid overflow. Answer is after last digit. */ - for (n = 0; (digits[n]=char_val(*src)) < radix && n < 20; n++,src++) ; + for (n = 0; (digits[n]=char_val(*src, radix)) < radix && n < 20; n++,src++) ; /* Check that there is at least one digit */ diff --git a/strings/string.doc b/strings/string.doc index 3e5b607347a..4050f6afb66 100644 --- a/strings/string.doc +++ b/strings/string.doc @@ -22,8 +22,8 @@ Speciella anv the destination string "dst" followed by a terminating NUL. The result is normally a pointer to this NUL character, but if the radix is dud the result will be NullS and nothing will be changed. - If radix is -2..-36, val is taken to be SIGNED. - If radix is 2.. 36, val is taken to be UNSIGNED. + If radix is -2..-62, val is taken to be SIGNED. + If radix is 2.. 62, val is taken to be UNSIGNED. That is, val is signed if and only if radix is. You will normally use radix -10 only through itoa and ltoa, for radix 2, 8, or 16 unsigned is what you generally want.