mirror of
https://github.com/MariaDB/server.git
synced 2025-01-15 19:42:28 +01:00
MDEV-30879 Add support for up to BASE 62 to CONV()
BASE 62 uses 0-9, A-Z and then a-z to give the numbers 0-61. This patch increases the range of the string functions to cover this. Based on ideas and tests in PR #2589, but re-written into the charset functions. Includes fix by Sergei, UBSAN complained: ctype-simple.c:683:38: runtime error: negation of -9223372036854775808 cannot be represented in type 'long long int'; cast to an unsigned type to negate this value to itself Co-authored-by: Weijun Huang <huangweijun1001@gmail.com> Co-authored-by: Sergei Golubchik <serg@mariadb.org>
This commit is contained in:
parent
be6d48fd53
commit
f552febe43
10 changed files with 160 additions and 22 deletions
|
@ -74,6 +74,7 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
/* Declared in int2str() */
|
||||
extern const char _dig_vec_base62[];
|
||||
extern const char _dig_vec_upper[];
|
||||
extern const char _dig_vec_lower[];
|
||||
|
||||
|
|
|
@ -1078,8 +1078,8 @@ lpad(12345, 5, "#")
|
|||
SELECT conv(71, 10, 36), conv('1Z', 36, 10);
|
||||
conv(71, 10, 36) conv('1Z', 36, 10)
|
||||
1Z 71
|
||||
SELECT conv(71, 10, 37), conv('1Z', 37, 10), conv(0,1,10),conv(0,0,10), conv(0,-1,10);
|
||||
conv(71, 10, 37) conv('1Z', 37, 10) conv(0,1,10) conv(0,0,10) conv(0,-1,10)
|
||||
SELECT conv(71, 10, 63), conv('1Z', 63, 10), conv(0,1,10),conv(0,0,10), conv(0,-1,10);
|
||||
conv(71, 10, 63) conv('1Z', 63, 10) conv(0,1,10) conv(0,0,10) conv(0,-1,10)
|
||||
NULL NULL NULL NULL NULL
|
||||
create table t1 (id int(1), str varchar(10)) DEFAULT CHARSET=utf8;
|
||||
insert into t1 values (1,'aaaaaaaaaa'), (2,'bbbbbbbbbb');
|
||||
|
@ -5535,3 +5535,63 @@ aes_encrypt(a,a) is null
|
|||
#
|
||||
# End of 11.2 tests
|
||||
#
|
||||
#
|
||||
# MDEV-30879 Add conversion to based 62 for CONV function
|
||||
#
|
||||
SELECT CONV('1z', 62, 10);
|
||||
CONV('1z', 62, 10)
|
||||
123
|
||||
SELECT CONV('1Z', 62, 10);
|
||||
CONV('1Z', 62, 10)
|
||||
97
|
||||
SELECT CONV('-1Z', 62, 10);
|
||||
CONV('-1Z', 62, 10)
|
||||
18446744073709551519
|
||||
SELECT CONV('-1Z', -62, 10);
|
||||
CONV('-1Z', -62, 10)
|
||||
18446744073709551519
|
||||
SELECT CONV('-1Z', 62, -10);
|
||||
CONV('-1Z', 62, -10)
|
||||
-97
|
||||
SELECT CONV('-1Z', -62, -10);
|
||||
CONV('-1Z', -62, -10)
|
||||
-97
|
||||
SELECT CONV('AzL8n0Y58m7', 62, 10);
|
||||
CONV('AzL8n0Y58m7', 62, 10)
|
||||
9223372036854775807
|
||||
SELECT CONV('LygHa16AHYE', 62, 10);
|
||||
CONV('LygHa16AHYE', 62, 10)
|
||||
18446744073709551614
|
||||
SELECT CONV('LygHa16AHYF', 62, 10);
|
||||
CONV('LygHa16AHYF', 62, 10)
|
||||
18446744073709551615
|
||||
SELECT CONV('LygHa16AHZ0', 62, 10);
|
||||
CONV('LygHa16AHZ0', 62, 10)
|
||||
18446744073709551615
|
||||
SELECT CONV('-AzL8n0Y58m7', -62, -10);
|
||||
CONV('-AzL8n0Y58m7', -62, -10)
|
||||
-9223372036854775807
|
||||
SELECT CONV('-AzL8n0Y58m8', -62, -10);
|
||||
CONV('-AzL8n0Y58m8', -62, -10)
|
||||
-9223372036854775808
|
||||
SELECT CONV('-AzL8n0Y58m9', -62, -10);
|
||||
CONV('-AzL8n0Y58m9', -62, -10)
|
||||
-9223372036854775808
|
||||
SELECT CONV('-LygHa16AHZ0', -62, -10);
|
||||
CONV('-LygHa16AHZ0', -62, -10)
|
||||
-9223372036854775808
|
||||
SELECT CONV('LygHa16AHYF', 63, 10);
|
||||
CONV('LygHa16AHYF', 63, 10)
|
||||
NULL
|
||||
SELECT CONV(18446744073709551615, 10, 63);
|
||||
CONV(18446744073709551615, 10, 63)
|
||||
NULL
|
||||
SELECT CONV(18446744073709551615, 10, 62);
|
||||
CONV(18446744073709551615, 10, 62)
|
||||
LygHa16AHYF
|
||||
SELECT CONV(-9223372036854775808, -10, -62);
|
||||
CONV(-9223372036854775808, -10, -62)
|
||||
-AzL8n0Y58m8
|
||||
#
|
||||
# End of 11.4 tests
|
||||
#
|
||||
|
|
|
@ -570,7 +570,7 @@ SELECT lpad(12345, 5, "#");
|
|||
#
|
||||
|
||||
SELECT conv(71, 10, 36), conv('1Z', 36, 10);
|
||||
SELECT conv(71, 10, 37), conv('1Z', 37, 10), conv(0,1,10),conv(0,0,10), conv(0,-1,10);
|
||||
SELECT conv(71, 10, 63), conv('1Z', 63, 10), conv(0,1,10),conv(0,0,10), conv(0,-1,10);
|
||||
|
||||
#
|
||||
# Bug in SUBSTRING when mixed with CONCAT and ORDER BY (Bug #3089)
|
||||
|
@ -2481,3 +2481,40 @@ select aes_encrypt(a,a) is null from (values('a'),(NULL),('b')) x;
|
|||
--echo #
|
||||
--echo # End of 11.2 tests
|
||||
--echo #
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-30879 Add conversion to based 62 for CONV function
|
||||
--echo #
|
||||
|
||||
SELECT CONV('1z', 62, 10);
|
||||
SELECT CONV('1Z', 62, 10);
|
||||
|
||||
SELECT CONV('-1Z', 62, 10);
|
||||
SELECT CONV('-1Z', -62, 10);
|
||||
SELECT CONV('-1Z', 62, -10);
|
||||
SELECT CONV('-1Z', -62, -10);
|
||||
|
||||
# Check limits
|
||||
SELECT CONV('AzL8n0Y58m7', 62, 10);
|
||||
SELECT CONV('LygHa16AHYE', 62, 10);
|
||||
SELECT CONV('LygHa16AHYF', 62, 10);
|
||||
|
||||
# Overflow doesn't appear to warn, but does overflow
|
||||
SELECT CONV('LygHa16AHZ0', 62, 10);
|
||||
|
||||
SELECT CONV('-AzL8n0Y58m7', -62, -10);
|
||||
SELECT CONV('-AzL8n0Y58m8', -62, -10);
|
||||
SELECT CONV('-AzL8n0Y58m9', -62, -10);
|
||||
SELECT CONV('-LygHa16AHZ0', -62, -10);
|
||||
|
||||
# Should NULL
|
||||
SELECT CONV('LygHa16AHYF', 63, 10);
|
||||
SELECT CONV(18446744073709551615, 10, 63);
|
||||
|
||||
# Test 10 -> 62
|
||||
SELECT CONV(18446744073709551615, 10, 62);
|
||||
SELECT CONV(-9223372036854775808, -10, -62);
|
||||
|
||||
--echo #
|
||||
--echo # End of 11.4 tests
|
||||
--echo #
|
||||
|
|
|
@ -3936,8 +3936,8 @@ String *Item_func_conv::val_str(String *str)
|
|||
// Note that abs(INT_MIN) is undefined.
|
||||
if (args[0]->null_value || args[1]->null_value || args[2]->null_value ||
|
||||
from_base == INT_MIN || to_base == INT_MIN ||
|
||||
abs(to_base) > 36 || abs(to_base) < 2 ||
|
||||
abs(from_base) > 36 || abs(from_base) < 2 || !(res->length()))
|
||||
abs(to_base) > 62 || abs(to_base) < 2 ||
|
||||
abs(from_base) > 62 || abs(from_base) < 2 || !(res->length()))
|
||||
{
|
||||
null_value= 1;
|
||||
return NULL;
|
||||
|
|
|
@ -451,7 +451,11 @@ long my_strntol_8bit(CHARSET_INFO *cs,
|
|||
else if (c>='A' && c<='Z')
|
||||
c = c - 'A' + 10;
|
||||
else if (c>='a' && c<='z')
|
||||
{
|
||||
c = c - 'a' + 10;
|
||||
if (base > 36)
|
||||
c += 26;
|
||||
}
|
||||
else
|
||||
break;
|
||||
if (c >= base)
|
||||
|
@ -546,7 +550,11 @@ ulong my_strntoul_8bit(CHARSET_INFO *cs,
|
|||
else if (c>='A' && c<='Z')
|
||||
c = c - 'A' + 10;
|
||||
else if (c>='a' && c<='z')
|
||||
{
|
||||
c = c - 'a' + 10;
|
||||
if (base > 36)
|
||||
c += 26;
|
||||
}
|
||||
else
|
||||
break;
|
||||
if (c >= base)
|
||||
|
@ -634,7 +642,11 @@ longlong my_strntoll_8bit(CHARSET_INFO *cs __attribute__((unused)),
|
|||
else if (c>='A' && c<='Z')
|
||||
c = c - 'A' + 10;
|
||||
else if (c>='a' && c<='z')
|
||||
{
|
||||
c = c - 'a' + 10;
|
||||
if (base > 36)
|
||||
c += 26;
|
||||
}
|
||||
else
|
||||
break;
|
||||
if (c >= base)
|
||||
|
@ -656,8 +668,12 @@ longlong my_strntoll_8bit(CHARSET_INFO *cs __attribute__((unused)),
|
|||
|
||||
if (negative)
|
||||
{
|
||||
if (i > (ulonglong) LONGLONG_MIN)
|
||||
if (i >= (ulonglong) LONGLONG_MIN)
|
||||
{
|
||||
if (i == (ulonglong) LONGLONG_MIN)
|
||||
return LONGLONG_MIN;
|
||||
overflow = 1;
|
||||
}
|
||||
}
|
||||
else if (i > (ulonglong) LONGLONG_MAX)
|
||||
overflow = 1;
|
||||
|
@ -731,7 +747,11 @@ ulonglong my_strntoull_8bit(CHARSET_INFO *cs,
|
|||
else if (c>='A' && c<='Z')
|
||||
c = c - 'A' + 10;
|
||||
else if (c>='a' && c<='z')
|
||||
{
|
||||
c = c - 'a' + 10;
|
||||
if (base > 36)
|
||||
c += 26;
|
||||
}
|
||||
else
|
||||
break;
|
||||
if (c >= base)
|
||||
|
|
|
@ -462,7 +462,11 @@ bs:
|
|||
else if ( wc>='A' && wc<='Z')
|
||||
wc = wc - 'A' + 10;
|
||||
else if ( wc>='a' && wc<='z')
|
||||
{
|
||||
wc = wc - 'a' + 10;
|
||||
if (base > 36)
|
||||
wc += 26;
|
||||
}
|
||||
else
|
||||
break;
|
||||
if ((int)wc >= base)
|
||||
|
@ -575,7 +579,11 @@ bs:
|
|||
else if ( wc>='A' && wc<='Z')
|
||||
wc = wc - 'A' + 10;
|
||||
else if ( wc>='a' && wc<='z')
|
||||
{
|
||||
wc = wc - 'a' + 10;
|
||||
if (base > 36)
|
||||
wc += 26;
|
||||
}
|
||||
else
|
||||
break;
|
||||
if ((int)wc >= base)
|
||||
|
|
|
@ -31,6 +31,8 @@
|
|||
/*
|
||||
_dig_vec arrays are public because they are used in several outer places.
|
||||
*/
|
||||
const char _dig_vec_base62[] =
|
||||
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
|
||||
const char _dig_vec_upper[] =
|
||||
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
||||
const char _dig_vec_lower[] =
|
||||
|
@ -50,7 +52,7 @@ const char _dig_vec_lower[] =
|
|||
DESCRIPTION
|
||||
Converts the (long) integer value to its character form and moves it to
|
||||
the destination buffer followed by a terminating NUL.
|
||||
If radix is -2..-36, val is taken to be SIGNED, if radix is 2..36, val is
|
||||
If radix is -2..-62, val is taken to be SIGNED, if radix is 2..62, val is
|
||||
taken to be UNSIGNED. That is, val is signed if and only if radix is.
|
||||
All other radixes treated as bad and nothing will be changed in this case.
|
||||
|
||||
|
@ -68,12 +70,17 @@ int2str(register long int val, register char *dst, register int radix,
|
|||
char buffer[65];
|
||||
register char *p;
|
||||
long int new_val;
|
||||
const char *dig_vec= upcase ? _dig_vec_upper : _dig_vec_lower;
|
||||
const char *dig_vec;
|
||||
ulong uval= (ulong) val;
|
||||
|
||||
if (radix < -36 || radix > 36)
|
||||
dig_vec= _dig_vec_base62;
|
||||
else
|
||||
dig_vec= upcase ? _dig_vec_upper : _dig_vec_lower;
|
||||
|
||||
if (radix < 0)
|
||||
{
|
||||
if (radix < -36 || radix > -2)
|
||||
if (radix < -62 || radix > -2)
|
||||
return NullS;
|
||||
if (val < 0)
|
||||
{
|
||||
|
@ -83,7 +90,7 @@ int2str(register long int val, register char *dst, register int radix,
|
|||
}
|
||||
radix = -radix;
|
||||
}
|
||||
else if (radix > 36 || radix < 2)
|
||||
else if (radix > 62 || radix < 2)
|
||||
return NullS;
|
||||
|
||||
/*
|
||||
|
|
|
@ -35,8 +35,8 @@
|
|||
result is normally a pointer to this NUL character, but if the radix
|
||||
is dud the result will be NullS and nothing will be changed.
|
||||
|
||||
If radix is -2..-36, val is taken to be SIGNED.
|
||||
If radix is 2.. 36, val is taken to be UNSIGNED.
|
||||
If radix is -2..-62, val is taken to be SIGNED.
|
||||
If radix is 2.. 62, val is taken to be UNSIGNED.
|
||||
That is, val is signed if and only if radix is. You will normally
|
||||
use radix -10 only through itoa and ltoa, for radix 2, 8, or 16
|
||||
unsigned is what you generally want.
|
||||
|
@ -63,12 +63,17 @@ char *ll2str(longlong val,char *dst,int radix, int upcase)
|
|||
char buffer[65];
|
||||
register char *p;
|
||||
long long_val;
|
||||
const char *dig_vec= upcase ? _dig_vec_upper : _dig_vec_lower;
|
||||
const char *dig_vec;
|
||||
ulonglong uval= (ulonglong) val;
|
||||
|
||||
if (radix < -36 || radix > 36)
|
||||
dig_vec= _dig_vec_base62;
|
||||
else
|
||||
dig_vec= upcase ? _dig_vec_upper : _dig_vec_lower;
|
||||
|
||||
if (radix < 0)
|
||||
{
|
||||
if (radix < -36 || radix > -2) return (char*) 0;
|
||||
if (radix < -62 || radix > -2) return (char*) 0;
|
||||
if (val < 0) {
|
||||
*dst++ = '-';
|
||||
/* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */
|
||||
|
@ -78,7 +83,7 @@ char *ll2str(longlong val,char *dst,int radix, int upcase)
|
|||
}
|
||||
else
|
||||
{
|
||||
if (radix > 36 || radix < 2) return (char*) 0;
|
||||
if (radix > 62 || radix < 2) return (char*) 0;
|
||||
}
|
||||
if (uval == 0)
|
||||
{
|
||||
|
|
|
@ -55,9 +55,9 @@
|
|||
#include "my_sys.h" /* defines errno */
|
||||
#include <errno.h>
|
||||
|
||||
#define char_val(X) (X >= '0' && X <= '9' ? X-'0' :\
|
||||
#define char_val(X, Y) (X >= '0' && X <= '9' ? X-'0' :\
|
||||
X >= 'A' && X <= 'Z' ? X-'A'+10 :\
|
||||
X >= 'a' && X <= 'z' ? X-'a'+10 :\
|
||||
X >= 'a' && X <= 'z' ? (Y <= 36 ? X-'a'+10 : X-'a'+36) :\
|
||||
'\177')
|
||||
|
||||
char *str2int(register const char *src, register int radix, long int lower,
|
||||
|
@ -76,10 +76,10 @@ char *str2int(register const char *src, register int radix, long int lower,
|
|||
|
||||
*val = 0;
|
||||
|
||||
/* Check that the radix is in the range 2..36 */
|
||||
/* Check that the radix is in the range 2..62 */
|
||||
|
||||
#ifndef DBUG_OFF
|
||||
if (radix < 2 || radix > 36) {
|
||||
if (radix < 2 || radix > 62) {
|
||||
errno=EDOM;
|
||||
return NullS;
|
||||
}
|
||||
|
@ -126,7 +126,7 @@ char *str2int(register const char *src, register int radix, long int lower,
|
|||
to left in order to avoid overflow. Answer is after last digit.
|
||||
*/
|
||||
|
||||
for (n = 0; (digits[n]=char_val(*src)) < radix && n < 20; n++,src++) ;
|
||||
for (n = 0; (digits[n]=char_val(*src, radix)) < radix && n < 20; n++,src++) ;
|
||||
|
||||
/* Check that there is at least one digit */
|
||||
|
||||
|
|
|
@ -22,8 +22,8 @@ Speciella anv
|
|||
the destination string "dst" followed by a terminating NUL. The
|
||||
result is normally a pointer to this NUL character, but if the radix
|
||||
is dud the result will be NullS and nothing will be changed.
|
||||
If radix is -2..-36, val is taken to be SIGNED.
|
||||
If radix is 2.. 36, val is taken to be UNSIGNED.
|
||||
If radix is -2..-62, val is taken to be SIGNED.
|
||||
If radix is 2.. 62, val is taken to be UNSIGNED.
|
||||
That is, val is signed if and only if radix is. You will normally
|
||||
use radix -10 only through itoa and ltoa, for radix 2, 8, or 16
|
||||
unsigned is what you generally want.
|
||||
|
|
Loading…
Reference in a new issue