MDEV-35620 UBSAN: runtime error: applying zero offset to null pointer

in _ma_unique_hash, skip_trailing_space, my_hash_sort_mb_nopad_bin and my_strnncollsp_utf8mb4_bin

UBSAN detected the nullptr-with-offset in a few places
when handling empty blobs.

Fix:
- Adding DBUG_ASSERT(source_string) into all hash_sort() implementations
  to catch this problem in non-UBSAN debug builds.
- Fixing mi_unique_hash(), mi_unique_comp(),
  _ma_unique_hash(), _ma_unique_comp() to replace NULL pointer to
  an empty string ponter..

Note, we should also add DBUG_ASSERT(source_string != NULL) into
all implementations of strnncoll*(). But I'm afraid the patch
is going to be too long and too dangerous for 10.5.
This commit is contained in:
Alexander Barkov 2025-02-03 15:00:35 +04:00
commit 583b39811c
13 changed files with 81 additions and 3 deletions

View file

@ -294,6 +294,7 @@ void my_hash_sort_bin(CHARSET_INFO *cs __attribute__((unused)),
const uchar *end = key + len;
ulong tmp1= *nr1;
ulong tmp2= *nr2;
DBUG_ASSERT(key); /* Avoid UBSAN nullptr-with-offset */
for (; key < end ; key++)
{
@ -314,6 +315,7 @@ void my_hash_sort_8bit_bin(CHARSET_INFO *cs __attribute__((unused)),
'A ' and 'A' as identical
*/
const uchar *end= skip_trailing_space(key, len);
DBUG_ASSERT(key); /* Avoid UBSAN nullptr-with-offset */
my_hash_sort_bin(cs, key, end - key, nr1, nr2);
}

View file

@ -703,7 +703,8 @@ void my_hash_sort_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
{
const uchar *end;
register ulong m1= *nr1, m2= *nr2;
DBUG_ASSERT(key); /* Avoid UBSAN nullptr-with-offset */
/*
Remove end space. We have to do this to be able to compare
'AE' and 'Ä' as identical

View file

@ -618,6 +618,7 @@ my_hash_sort_mb_nopad_bin(CHARSET_INFO *cs __attribute__((unused)),
{
register ulong m1= *nr1, m2= *nr2;
const uchar *end= key + len;
DBUG_ASSERT(key); /* Avoid UBSAN nullptr-with-offset */
for (; key < end ; key++)
{
MY_HASH_ADD(m1, m2, (uint)*key);
@ -636,6 +637,7 @@ my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
'A ' and 'A' as identical
*/
const uchar *end= skip_trailing_space(key, len);
DBUG_ASSERT(key); /* Avoid UBSAN nullptr-with-offset */
my_hash_sort_mb_nopad_bin(cs, key, end - key, nr1, nr2);
}

View file

@ -347,6 +347,7 @@ void my_hash_sort_simple_nopad(CHARSET_INFO *cs,
register const uchar *sort_order=cs->sort_order;
const uchar *end= key + len;
register ulong m1= *nr1, m2= *nr2;
DBUG_ASSERT(key); /* Avoid UBSAN nullptr-with-offset */
for (; key < (uchar*) end ; key++)
{
MY_HASH_ADD(m1, m2, (uint) sort_order[(uint) *key]);
@ -363,6 +364,7 @@ void my_hash_sort_simple(CHARSET_INFO *cs,
register const uchar *sort_order=cs->sort_order;
const uchar *end;
uint16 space_weight= sort_order[' '];
DBUG_ASSERT(key); /* Avoid UBSAN nullptr-with-offset */
/*
Remove all trailing characters that are equal to space.

View file

@ -537,6 +537,7 @@ MY_FUNCTION_NAME(hash_sort)(CHARSET_INFO *cs,
my_uca_scanner scanner;
int space_weight= my_space_weight(&cs->uca->level[0]);
register ulong m1= *nr1, m2= *nr2;
DBUG_ASSERT(s); /* Avoid UBSAN nullptr-with-offset */
my_uca_scanner_init_any(&scanner, cs, &cs->uca->level[0], s, slen);
@ -590,6 +591,7 @@ MY_FUNCTION_NAME(hash_sort_nopad)(CHARSET_INFO *cs,
int s_res;
my_uca_scanner scanner;
register ulong m1= *nr1, m2= *nr2;
DBUG_ASSERT(s); /* Avoid UBSAN nullptr-with-offset */
my_uca_scanner_init_any(&scanner, cs, &cs->uca->level[0], s, slen);

View file

@ -1359,6 +1359,7 @@ my_hash_sort_utf16_nopad(CHARSET_INFO *cs,
const uchar *e= s + slen;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
register ulong m1= *nr1, m2= *nr2;
DBUG_ASSERT(s); /* Avoid UBSAN nullptr-with-offset */
while ((s < e) && (res= mb_wc(cs, &wc, (uchar *) s, (uchar *) e)) > 0)
{
@ -1376,6 +1377,7 @@ my_hash_sort_utf16(CHARSET_INFO *cs, const uchar *s, size_t slen,
ulong *nr1, ulong *nr2)
{
size_t lengthsp= my_ci_lengthsp(cs, (const char *) s, slen);
DBUG_ASSERT(s); /* Avoid UBSAN nullptr-with-offset */
my_hash_sort_utf16_nopad(cs, s, lengthsp, nr1, nr2);
}
@ -1486,6 +1488,7 @@ my_hash_sort_utf16_nopad_bin(CHARSET_INFO *cs __attribute__((unused)),
{
const uchar *end= pos + len;
register ulong m1= *nr1, m2= *nr2;
DBUG_ASSERT(pos); /* Avoid UBSAN nullptr-with-offset */
for ( ; pos < end ; pos++)
{
@ -1501,6 +1504,7 @@ my_hash_sort_utf16_bin(CHARSET_INFO *cs,
const uchar *pos, size_t len, ulong *nr1, ulong *nr2)
{
size_t lengthsp= my_ci_lengthsp(cs, (const char *) pos, len);
DBUG_ASSERT(pos); /* Avoid UBSAN nullptr-with-offset */
my_hash_sort_utf16_nopad_bin(cs, pos, lengthsp, nr1, nr2);
}
@ -2250,6 +2254,7 @@ my_hash_sort_utf32_nopad(CHARSET_INFO *cs, const uchar *s, size_t slen,
const uchar *e= s + slen;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
register ulong m1= *nr1, m2= *nr2;
DBUG_ASSERT(s); /* Avoid UBSAN nullptr-with-offset */
while ((res= my_utf32_uni(cs, &wc, (uchar*) s, (uchar*) e)) > 0)
{
@ -2270,6 +2275,7 @@ my_hash_sort_utf32(CHARSET_INFO *cs, const uchar *s, size_t slen,
ulong *nr1, ulong *nr2)
{
size_t lengthsp= my_lengthsp_utf32(cs, (const char *) s, slen);
DBUG_ASSERT(s); /* Avoid UBSAN nullptr-with-offset */
my_hash_sort_utf32_nopad(cs, s, lengthsp, nr1, nr2);
}
@ -3139,6 +3145,7 @@ my_hash_sort_ucs2_nopad(CHARSET_INFO *cs, const uchar *s, size_t slen,
const uchar *e=s+slen;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
register ulong m1= *nr1, m2= *nr2;
DBUG_ASSERT(s); /* Avoid UBSAN nullptr-with-offset */
while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e)) >0)
{
@ -3155,6 +3162,7 @@ static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, size_t slen,
ulong *nr1, ulong *nr2)
{
size_t lengthsp= my_lengthsp_mb2(cs, (const char *) s, slen);
DBUG_ASSERT(s); /* Avoid UBSAN nullptr-with-offset */
my_hash_sort_ucs2_nopad(cs, s, lengthsp, nr1, nr2);
}
@ -3279,6 +3287,7 @@ my_hash_sort_ucs2_nopad_bin(CHARSET_INFO *cs __attribute__((unused)),
{
const uchar *end= key + len;
register ulong m1= *nr1, m2= *nr2;
DBUG_ASSERT(key); /* Avoid UBSAN nullptr-with-offset */
for ( ; key < end ; key++)
{
MY_HASH_ADD(m1, m2, (uint)*key);
@ -3293,6 +3302,7 @@ my_hash_sort_ucs2_bin(CHARSET_INFO *cs,
const uchar *key, size_t len, ulong *nr1, ulong *nr2)
{
size_t lengthsp= my_lengthsp_mb2(cs, (const char *) key, len);
DBUG_ASSERT(key); /* Avoid UBSAN nullptr-with-offset */
my_hash_sort_ucs2_nopad_bin(cs, key, lengthsp, nr1, nr2);
}

View file

@ -4977,6 +4977,7 @@ static void my_hash_sort_utf8mb3_nopad(CHARSET_INFO *cs, const uchar *s, size_t
const uchar *e= s+slen;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
register ulong m1= *nr1, m2= *nr2;
DBUG_ASSERT(s); /* Avoid UBSAN nullptr-with-offset */
while ((s < e) && (res=my_utf8mb3_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 )
{
@ -4997,6 +4998,7 @@ static void my_hash_sort_utf8mb3(CHARSET_INFO *cs, const uchar *s, size_t slen,
'A ' and 'A' as identical
*/
const uchar *e= skip_trailing_space(s, slen);
DBUG_ASSERT(s); /* Avoid UBSAN nullptr-with-offset */
my_hash_sort_utf8mb3_nopad(cs, s, e - s, nr1, nr2);
}
@ -7414,6 +7416,7 @@ my_hash_sort_utf8mb4_nopad(CHARSET_INFO *cs, const uchar *s, size_t slen,
const uchar *e= s + slen;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
register ulong m1= *nr1, m2= *nr2;
DBUG_ASSERT(s); /* Avoid UBSAN nullptr-with-offset */
while ((res= my_mb_wc_utf8mb4(cs, &wc, (uchar*) s, (uchar*) e)) > 0)
{
@ -7446,6 +7449,7 @@ my_hash_sort_utf8mb4(CHARSET_INFO *cs, const uchar *s, size_t slen,
'A ' and 'A' as identical
*/
const uchar *e= skip_trailing_space(s, slen);
DBUG_ASSERT(s); /* Avoid UBSAN nullptr-with-offset */
my_hash_sort_utf8mb4_nopad(cs, s, e - s, nr1, nr2);
}

View file

@ -81,7 +81,7 @@
static inline const uchar *skip_trailing_space(const uchar *ptr,size_t len)
{
const uchar *end= ptr + len;
DBUG_ASSERT(ptr); /* Avoid UBSAN nullptr-with-offset */
if (len > 20)
{
const uchar *end_words= (const uchar *)(intptr)