MDEV-35620 UBSAN: runtime error: applying zero offset to null pointer

in _ma_unique_hash, skip_trailing_space, my_hash_sort_mb_nopad_bin and my_strnncollsp_utf8mb4_bin

UBSAN detected the nullptr-with-offset in a few places
when handling empty blobs.

Fix:
- Adding DBUG_ASSERT(source_string) into all hash_sort() implementations
  to catch this problem in non-UBSAN debug builds.
- Fixing mi_unique_hash(), mi_unique_comp(),
  _ma_unique_hash(), _ma_unique_comp() to replace NULL pointer to
  an empty string ponter..

Note, we should also add DBUG_ASSERT(source_string != NULL) into
all implementations of strnncoll*(). But I'm afraid the patch
is going to be too long and too dangerous for 10.5.
This commit is contained in:
Alexander Barkov 2025-02-03 15:00:35 +04:00
parent 10fd2c207a
commit 583b39811c
13 changed files with 81 additions and 3 deletions

View File

@ -1451,4 +1451,26 @@ DROP TABLE t1, t2;
#
CREATE TABLE t1 (pk INT, a TEXT NOT NULL DEFAULT '', PRIMARY KEY (pk), b INT AUTO_INCREMENT, UNIQUE(b), UNIQUE (a,b)) ENGINE=myisam;
ERROR HY000: AUTO_INCREMENT column `b` cannot be used in the UNIQUE index `a`
#
# MDEV-35620 UBSAN: runtime error: applying zero offset to null pointer in _ma_unique_hash, skip_trailing_space, my_hash_sort_mb_nopad_bin and my_strnncollsp_utf8mb4_bin
#
# Disable result log. The exact result is not important.
# We just need to make sure UBSAN nullptr-with-offset is not reported.
SELECT DISTINCT user,authentication_string FROM mysql.user;
SELECT DISTINCT USER,PASSWORD FROM mysql.user;
SELECT DISTINCT USER,plugin FROM mysql.user;
# Enabling result log again.
create or replace table t1 (t text) engine=aria;
insert into t1 values ('');
insert into t1 values (NULL);
select distinct t from t1;
t
NULL
alter table t1 ENGINE=MyISAM;
select distinct t from t1;
t
NULL
DROP TABLE t1;
# End of 10.5 tests

View File

@ -551,4 +551,26 @@ DROP TABLE t1, t2;
--error ER_NO_AUTOINCREMENT_WITH_UNIQUE
CREATE TABLE t1 (pk INT, a TEXT NOT NULL DEFAULT '', PRIMARY KEY (pk), b INT AUTO_INCREMENT, UNIQUE(b), UNIQUE (a,b)) ENGINE=myisam;
--echo #
--echo # MDEV-35620 UBSAN: runtime error: applying zero offset to null pointer in _ma_unique_hash, skip_trailing_space, my_hash_sort_mb_nopad_bin and my_strnncollsp_utf8mb4_bin
--echo #
--echo # Disable result log. The exact result is not important.
--echo # We just need to make sure UBSAN nullptr-with-offset is not reported.
--disable_result_log
SELECT DISTINCT user,authentication_string FROM mysql.user;
SELECT DISTINCT USER,PASSWORD FROM mysql.user;
SELECT DISTINCT USER,plugin FROM mysql.user;
--enable_result_log
--echo # Enabling result log again.
create or replace table t1 (t text) engine=aria;
insert into t1 values ('');
insert into t1 values (NULL);
select distinct t from t1;
alter table t1 ENGINE=MyISAM;
select distinct t from t1;
DROP TABLE t1;
--echo # End of 10.5 tests

View File

@ -150,7 +150,6 @@ static const Native_func_registry func_array_vers[] =
{ { C_STRING_WITH_LEN("TRT_TRX_ID") }, BUILDER(Create_func_trt<TR_table::FLD_TRX_ID>)},
{ { C_STRING_WITH_LEN("TRT_TRX_SEES") }, BUILDER(Create_func_trt_trx_sees<Item_func_trt_trx_sees>)},
{ { C_STRING_WITH_LEN("TRT_TRX_SEES_EQ") }, BUILDER(Create_func_trt_trx_sees<Item_func_trt_trx_sees_eq>)},
{ {0, 0}, NULL}
};

View File

@ -139,6 +139,8 @@ ha_checksum _ma_unique_hash(MARIA_UNIQUEDEF *def, const uchar *record)
{
uint tmp_length= _ma_calc_blob_length(keyseg->bit_start,pos);
memcpy((void*) &pos,pos+keyseg->bit_start,sizeof(char*));
if (!pos)
pos= (const uchar*) ""; /* hash_sort does not support NULL ptr */
if (!length || length > tmp_length)
length=tmp_length; /* The whole blob */
}
@ -236,6 +238,10 @@ my_bool _ma_unique_comp(MARIA_UNIQUEDEF *def, const uchar *a, const uchar *b,
}
memcpy((void*) &pos_a, pos_a+keyseg->bit_start, sizeof(char*));
memcpy((void*) &pos_b, pos_b+keyseg->bit_start, sizeof(char*));
if (pos_a == 0)
pos_a= (const uchar *) ""; /* Avoid UBSAN nullptr-with-offset */
if (pos_b == 0)
pos_b= (const uchar *) ""; /* Avoid UBSAN nullptr-with-offset */
}
if (type == HA_KEYTYPE_TEXT/* the CHAR data type*/)
{

View File

@ -115,6 +115,8 @@ ha_checksum mi_unique_hash(MI_UNIQUEDEF *def, const uchar *record)
{
uint tmp_length=_mi_calc_blob_length(keyseg->bit_start,pos);
memcpy((char**) &pos, pos+keyseg->bit_start, sizeof(char*));
if (!pos)
pos= (const uchar*) ""; /* hash_sort does not support NULL ptr */
if (!length || length > tmp_length)
length=tmp_length; /* The whole blob */
}
@ -211,6 +213,10 @@ int mi_unique_comp(MI_UNIQUEDEF *def, const uchar *a, const uchar *b,
}
memcpy((char**) &pos_a, pos_a+keyseg->bit_start, sizeof(char*));
memcpy((char**) &pos_b, pos_b+keyseg->bit_start, sizeof(char*));
if (pos_a == 0)
pos_a= (const uchar *) ""; /* Avoid UBSAN nullptr-with-offset */
if (pos_b == 0)
pos_b= (const uchar *) ""; /* Avoid UBSAN nullptr-with-offset */
}
if (type == HA_KEYTYPE_TEXT/*The CHAR data type*/)
{

View File

@ -294,6 +294,7 @@ void my_hash_sort_bin(CHARSET_INFO *cs __attribute__((unused)),
const uchar *end = key + len;
ulong tmp1= *nr1;
ulong tmp2= *nr2;
DBUG_ASSERT(key); /* Avoid UBSAN nullptr-with-offset */
for (; key < end ; key++)
{
@ -314,6 +315,7 @@ void my_hash_sort_8bit_bin(CHARSET_INFO *cs __attribute__((unused)),
'A ' and 'A' as identical
*/
const uchar *end= skip_trailing_space(key, len);
DBUG_ASSERT(key); /* Avoid UBSAN nullptr-with-offset */
my_hash_sort_bin(cs, key, end - key, nr1, nr2);
}

View File

@ -703,7 +703,8 @@ void my_hash_sort_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
{
const uchar *end;
register ulong m1= *nr1, m2= *nr2;
DBUG_ASSERT(key); /* Avoid UBSAN nullptr-with-offset */
/*
Remove end space. We have to do this to be able to compare
'AE' and 'Ä' as identical

View File

@ -618,6 +618,7 @@ my_hash_sort_mb_nopad_bin(CHARSET_INFO *cs __attribute__((unused)),
{
register ulong m1= *nr1, m2= *nr2;
const uchar *end= key + len;
DBUG_ASSERT(key); /* Avoid UBSAN nullptr-with-offset */
for (; key < end ; key++)
{
MY_HASH_ADD(m1, m2, (uint)*key);
@ -636,6 +637,7 @@ my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
'A ' and 'A' as identical
*/
const uchar *end= skip_trailing_space(key, len);
DBUG_ASSERT(key); /* Avoid UBSAN nullptr-with-offset */
my_hash_sort_mb_nopad_bin(cs, key, end - key, nr1, nr2);
}

View File

@ -347,6 +347,7 @@ void my_hash_sort_simple_nopad(CHARSET_INFO *cs,
register const uchar *sort_order=cs->sort_order;
const uchar *end= key + len;
register ulong m1= *nr1, m2= *nr2;
DBUG_ASSERT(key); /* Avoid UBSAN nullptr-with-offset */
for (; key < (uchar*) end ; key++)
{
MY_HASH_ADD(m1, m2, (uint) sort_order[(uint) *key]);
@ -363,6 +364,7 @@ void my_hash_sort_simple(CHARSET_INFO *cs,
register const uchar *sort_order=cs->sort_order;
const uchar *end;
uint16 space_weight= sort_order[' '];
DBUG_ASSERT(key); /* Avoid UBSAN nullptr-with-offset */
/*
Remove all trailing characters that are equal to space.

View File

@ -537,6 +537,7 @@ MY_FUNCTION_NAME(hash_sort)(CHARSET_INFO *cs,
my_uca_scanner scanner;
int space_weight= my_space_weight(&cs->uca->level[0]);
register ulong m1= *nr1, m2= *nr2;
DBUG_ASSERT(s); /* Avoid UBSAN nullptr-with-offset */
my_uca_scanner_init_any(&scanner, cs, &cs->uca->level[0], s, slen);
@ -590,6 +591,7 @@ MY_FUNCTION_NAME(hash_sort_nopad)(CHARSET_INFO *cs,
int s_res;
my_uca_scanner scanner;
register ulong m1= *nr1, m2= *nr2;
DBUG_ASSERT(s); /* Avoid UBSAN nullptr-with-offset */
my_uca_scanner_init_any(&scanner, cs, &cs->uca->level[0], s, slen);

View File

@ -1359,6 +1359,7 @@ my_hash_sort_utf16_nopad(CHARSET_INFO *cs,
const uchar *e= s + slen;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
register ulong m1= *nr1, m2= *nr2;
DBUG_ASSERT(s); /* Avoid UBSAN nullptr-with-offset */
while ((s < e) && (res= mb_wc(cs, &wc, (uchar *) s, (uchar *) e)) > 0)
{
@ -1376,6 +1377,7 @@ my_hash_sort_utf16(CHARSET_INFO *cs, const uchar *s, size_t slen,
ulong *nr1, ulong *nr2)
{
size_t lengthsp= my_ci_lengthsp(cs, (const char *) s, slen);
DBUG_ASSERT(s); /* Avoid UBSAN nullptr-with-offset */
my_hash_sort_utf16_nopad(cs, s, lengthsp, nr1, nr2);
}
@ -1486,6 +1488,7 @@ my_hash_sort_utf16_nopad_bin(CHARSET_INFO *cs __attribute__((unused)),
{
const uchar *end= pos + len;
register ulong m1= *nr1, m2= *nr2;
DBUG_ASSERT(pos); /* Avoid UBSAN nullptr-with-offset */
for ( ; pos < end ; pos++)
{
@ -1501,6 +1504,7 @@ my_hash_sort_utf16_bin(CHARSET_INFO *cs,
const uchar *pos, size_t len, ulong *nr1, ulong *nr2)
{
size_t lengthsp= my_ci_lengthsp(cs, (const char *) pos, len);
DBUG_ASSERT(pos); /* Avoid UBSAN nullptr-with-offset */
my_hash_sort_utf16_nopad_bin(cs, pos, lengthsp, nr1, nr2);
}
@ -2250,6 +2254,7 @@ my_hash_sort_utf32_nopad(CHARSET_INFO *cs, const uchar *s, size_t slen,
const uchar *e= s + slen;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
register ulong m1= *nr1, m2= *nr2;
DBUG_ASSERT(s); /* Avoid UBSAN nullptr-with-offset */
while ((res= my_utf32_uni(cs, &wc, (uchar*) s, (uchar*) e)) > 0)
{
@ -2270,6 +2275,7 @@ my_hash_sort_utf32(CHARSET_INFO *cs, const uchar *s, size_t slen,
ulong *nr1, ulong *nr2)
{
size_t lengthsp= my_lengthsp_utf32(cs, (const char *) s, slen);
DBUG_ASSERT(s); /* Avoid UBSAN nullptr-with-offset */
my_hash_sort_utf32_nopad(cs, s, lengthsp, nr1, nr2);
}
@ -3139,6 +3145,7 @@ my_hash_sort_ucs2_nopad(CHARSET_INFO *cs, const uchar *s, size_t slen,
const uchar *e=s+slen;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
register ulong m1= *nr1, m2= *nr2;
DBUG_ASSERT(s); /* Avoid UBSAN nullptr-with-offset */
while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e)) >0)
{
@ -3155,6 +3162,7 @@ static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, size_t slen,
ulong *nr1, ulong *nr2)
{
size_t lengthsp= my_lengthsp_mb2(cs, (const char *) s, slen);
DBUG_ASSERT(s); /* Avoid UBSAN nullptr-with-offset */
my_hash_sort_ucs2_nopad(cs, s, lengthsp, nr1, nr2);
}
@ -3279,6 +3287,7 @@ my_hash_sort_ucs2_nopad_bin(CHARSET_INFO *cs __attribute__((unused)),
{
const uchar *end= key + len;
register ulong m1= *nr1, m2= *nr2;
DBUG_ASSERT(key); /* Avoid UBSAN nullptr-with-offset */
for ( ; key < end ; key++)
{
MY_HASH_ADD(m1, m2, (uint)*key);
@ -3293,6 +3302,7 @@ my_hash_sort_ucs2_bin(CHARSET_INFO *cs,
const uchar *key, size_t len, ulong *nr1, ulong *nr2)
{
size_t lengthsp= my_lengthsp_mb2(cs, (const char *) key, len);
DBUG_ASSERT(key); /* Avoid UBSAN nullptr-with-offset */
my_hash_sort_ucs2_nopad_bin(cs, key, lengthsp, nr1, nr2);
}

View File

@ -4977,6 +4977,7 @@ static void my_hash_sort_utf8mb3_nopad(CHARSET_INFO *cs, const uchar *s, size_t
const uchar *e= s+slen;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
register ulong m1= *nr1, m2= *nr2;
DBUG_ASSERT(s); /* Avoid UBSAN nullptr-with-offset */
while ((s < e) && (res=my_utf8mb3_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 )
{
@ -4997,6 +4998,7 @@ static void my_hash_sort_utf8mb3(CHARSET_INFO *cs, const uchar *s, size_t slen,
'A ' and 'A' as identical
*/
const uchar *e= skip_trailing_space(s, slen);
DBUG_ASSERT(s); /* Avoid UBSAN nullptr-with-offset */
my_hash_sort_utf8mb3_nopad(cs, s, e - s, nr1, nr2);
}
@ -7414,6 +7416,7 @@ my_hash_sort_utf8mb4_nopad(CHARSET_INFO *cs, const uchar *s, size_t slen,
const uchar *e= s + slen;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
register ulong m1= *nr1, m2= *nr2;
DBUG_ASSERT(s); /* Avoid UBSAN nullptr-with-offset */
while ((res= my_mb_wc_utf8mb4(cs, &wc, (uchar*) s, (uchar*) e)) > 0)
{
@ -7446,6 +7449,7 @@ my_hash_sort_utf8mb4(CHARSET_INFO *cs, const uchar *s, size_t slen,
'A ' and 'A' as identical
*/
const uchar *e= skip_trailing_space(s, slen);
DBUG_ASSERT(s); /* Avoid UBSAN nullptr-with-offset */
my_hash_sort_utf8mb4_nopad(cs, s, e - s, nr1, nr2);
}

View File

@ -81,7 +81,7 @@
static inline const uchar *skip_trailing_space(const uchar *ptr,size_t len)
{
const uchar *end= ptr + len;
DBUG_ASSERT(ptr); /* Avoid UBSAN nullptr-with-offset */
if (len > 20)
{
const uchar *end_words= (const uchar *)(intptr)