MDEV-18734 ASAN heap-use-after-free upon sorting by blob column from partitioned table

ha_partition stores records in array of m_ordered_rec_buffer and uses
it for prio queue in ordered index scan. When the records are restored
from the array the blob buffers may be already freed or rewritten.

The solution is to take temporary ownership of cached blob buffers via
String::swap(). When the record is restored from m_ordered_rec_buffer
the ownership is returned to table fields.

Cleanups:

init_record_priority_queue(): removed needless !m_ordered_rec_buffer
check as there is same assertion few lines before.

dbug_print_row() for arbitrary row pointer
This commit is contained in:
Aleksey Midenkov 2021-08-05 23:48:02 +03:00
parent b8deb02859
commit 160d97a4aa
9 changed files with 382 additions and 60 deletions

View file

@ -46,6 +46,42 @@ connection slave;
drop table federated.t1_1;
drop table federated.t1_2;
End of 5.1 tests
#
# MDEV-18734 ASAN heap-use-after-free upon sorting by blob column from partitioned table
#
connection slave;
use federated;
create table t1_1 (x int, b text, key(x));
create table t1_2 (x int, b text, key(x));
connection master;
create table t1 (x int, b text, key(x)) engine=federated
partition by range columns (x) (
partition p1 values less than (40) connection='mysql://root@127.0.0.1:SLAVE_PORT/federated/t1_1',
partition pn values less than (maxvalue) connection='mysql://root@127.0.0.1:SLAVE_PORT/federated/t1_2'
);
insert t1 values (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8);
insert t1 select x + 8, x + 8 from t1;
insert t1 select x + 16, x + 16 from t1;
insert t1 select x + 49, repeat(x + 49, 100) from t1;
flush tables;
# This produces wrong result before MDEV-17573
select x, left(b, 10) from t1 where x > 30 and x < 60 order by b;
x left(b, 10)
31 31
32 32
50 5050505050
51 5151515151
52 5252525252
53 5353535353
54 5454545454
55 5555555555
56 5656565656
57 5757575757
58 5858585858
59 5959595959
drop table t1;
connection slave;
drop table t1_1, t1_2;
connection master;
DROP TABLE IF EXISTS federated.t1;
DROP DATABASE IF EXISTS federated;

View file

@ -50,4 +50,29 @@ drop table federated.t1_2;
--echo End of 5.1 tests
--echo #
--echo # MDEV-18734 ASAN heap-use-after-free upon sorting by blob column from partitioned table
--echo #
connection slave;
use federated;
create table t1_1 (x int, b text, key(x));
create table t1_2 (x int, b text, key(x));
connection master;
--replace_result $SLAVE_MYPORT SLAVE_PORT
eval create table t1 (x int, b text, key(x)) engine=federated
partition by range columns (x) (
partition p1 values less than (40) connection='mysql://root@127.0.0.1:$SLAVE_MYPORT/federated/t1_1',
partition pn values less than (maxvalue) connection='mysql://root@127.0.0.1:$SLAVE_MYPORT/federated/t1_2'
);
insert t1 values (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8);
insert t1 select x + 8, x + 8 from t1;
insert t1 select x + 16, x + 16 from t1;
insert t1 select x + 49, repeat(x + 49, 100) from t1;
flush tables;
--echo # This produces wrong result before MDEV-17573
select x, left(b, 10) from t1 where x > 30 and x < 60 order by b;
drop table t1;
connection slave;
drop table t1_1, t1_2;
source include/federated_cleanup.inc;

View file

@ -28,3 +28,76 @@ set statement sql_mode= '' for update t1 set i= 1, v= 2;
Warnings:
Warning 1906 The value specified for generated column 'v' in table 't1' has been ignored
drop table t1;
#
# MDEV-18734 ASAN heap-use-after-free in my_strnxfrm_simple_internal upon update on versioned partitioned table
#
# Cover queue_fix() in ha_partition::handle_ordered_index_scan()
create or replace table t1 (
x int auto_increment primary key,
b text, v mediumtext as (b) virtual,
index (v(10))
) partition by range columns (x) (
partition p1 values less than (3),
partition p2 values less than (6),
partition p3 values less than (9),
partition p4 values less than (12),
partition p5 values less than (15),
partition p6 values less than (17),
partition p7 values less than (19),
partition p8 values less than (21),
partition p9 values less than (23),
partition p10 values less than (25),
partition p11 values less than (27),
partition p12 values less than (29),
partition p13 values less than (31),
partition p14 values less than (33),
partition p15 values less than (35),
partition pn values less than (maxvalue));
insert into t1 (b) values
(repeat('q', 8192)), (repeat('z', 8192)), (repeat('a', 8192)), (repeat('b', 8192)),
(repeat('x', 8192)), (repeat('y', 8192));
insert t1 (b) select b from t1;
insert t1 (b) select b from t1;
insert t1 (b) select b from t1;
insert t1 (b) select b from t1;
select x, left(b, 10), left(v, 10) from t1 where x > 30 and x < 60 order by v;
x left(b, 10) left(v, 10)
33 aaaaaaaaaa aaaaaaaaaa
39 aaaaaaaaaa aaaaaaaaaa
45 aaaaaaaaaa aaaaaaaaaa
51 aaaaaaaaaa aaaaaaaaaa
57 aaaaaaaaaa aaaaaaaaaa
34 bbbbbbbbbb bbbbbbbbbb
40 bbbbbbbbbb bbbbbbbbbb
46 bbbbbbbbbb bbbbbbbbbb
52 bbbbbbbbbb bbbbbbbbbb
58 bbbbbbbbbb bbbbbbbbbb
31 qqqqqqqqqq qqqqqqqqqq
37 qqqqqqqqqq qqqqqqqqqq
43 qqqqqqqqqq qqqqqqqqqq
49 qqqqqqqqqq qqqqqqqqqq
55 qqqqqqqqqq qqqqqqqqqq
35 xxxxxxxxxx xxxxxxxxxx
41 xxxxxxxxxx xxxxxxxxxx
47 xxxxxxxxxx xxxxxxxxxx
53 xxxxxxxxxx xxxxxxxxxx
59 xxxxxxxxxx xxxxxxxxxx
36 yyyyyyyyyy yyyyyyyyyy
42 yyyyyyyyyy yyyyyyyyyy
48 yyyyyyyyyy yyyyyyyyyy
54 yyyyyyyyyy yyyyyyyyyy
32 zzzzzzzzzz zzzzzzzzzz
38 zzzzzzzzzz zzzzzzzzzz
44 zzzzzzzzzz zzzzzzzzzz
50 zzzzzzzzzz zzzzzzzzzz
56 zzzzzzzzzz zzzzzzzzzz
update t1 set b= 'bar' where v > 'a' limit 20;
drop table t1;
# Cover return_top_record() in ha_partition::handle_ordered_index_scan()
create table t1 (x int primary key, b tinytext, v text as (b) virtual)
partition by range columns (x) (
partition p1 values less than (4),
partition pn values less than (maxvalue));
insert into t1 (x, b) values (1, ''), (2, ''), (3, 'a'), (4, 'b');
update t1 set b= 'bar' where x > 0 order by v limit 2;
drop table t1;

View file

@ -30,3 +30,51 @@ subpartition by hash(v) subpartitions 3 (
insert t1 set i= 0;
set statement sql_mode= '' for update t1 set i= 1, v= 2;
drop table t1;
--echo #
--echo # MDEV-18734 ASAN heap-use-after-free in my_strnxfrm_simple_internal upon update on versioned partitioned table
--echo #
--echo # Cover queue_fix() in ha_partition::handle_ordered_index_scan()
create or replace table t1 (
x int auto_increment primary key,
b text, v mediumtext as (b) virtual,
index (v(10))
) partition by range columns (x) (
partition p1 values less than (3),
partition p2 values less than (6),
partition p3 values less than (9),
partition p4 values less than (12),
partition p5 values less than (15),
partition p6 values less than (17),
partition p7 values less than (19),
partition p8 values less than (21),
partition p9 values less than (23),
partition p10 values less than (25),
partition p11 values less than (27),
partition p12 values less than (29),
partition p13 values less than (31),
partition p14 values less than (33),
partition p15 values less than (35),
partition pn values less than (maxvalue));
insert into t1 (b) values
(repeat('q', 8192)), (repeat('z', 8192)), (repeat('a', 8192)), (repeat('b', 8192)),
(repeat('x', 8192)), (repeat('y', 8192));
insert t1 (b) select b from t1;
insert t1 (b) select b from t1;
insert t1 (b) select b from t1;
insert t1 (b) select b from t1;
select x, left(b, 10), left(v, 10) from t1 where x > 30 and x < 60 order by v;
update t1 set b= 'bar' where v > 'a' limit 20;
drop table t1;
--echo # Cover return_top_record() in ha_partition::handle_ordered_index_scan()
create table t1 (x int primary key, b tinytext, v text as (b) virtual)
partition by range columns (x) (
partition p1 values less than (4),
partition pn values less than (maxvalue));
insert into t1 (x, b) values (1, ''), (2, ''), (3, 'a'), (4, 'b');
update t1 set b= 'bar' where x > 0 order by v limit 2;
drop table t1;

View file

@ -8318,6 +8318,7 @@ int Field_blob::store(const char *from,uint length,CHARSET_INFO *cs)
copy_length= copier.well_formed_copy(field_charset,
(char*) value.ptr(), new_length,
cs, from, length);
value.length(copy_length);
Field_blob::store_length(copy_length);
bmove(ptr+packlength,(uchar*) &tmp,sizeof(char*));

View file

@ -3465,6 +3465,12 @@ public:
uchar *new_ptr, uint32 length,
uchar *new_null_ptr, uint new_null_bit);
void sql_type(String &str) const;
/**
Copy blob buffer into internal storage "value" and update record pointer.
@retval true Memory allocation error
@retval false Success
*/
inline bool copy()
{
uchar *tmp= get_ptr();
@ -3477,6 +3483,33 @@ public:
memcpy(ptr+packlength, &tmp, sizeof(char*));
return 0;
}
void swap(String &inout, bool set_read_value)
{
if (set_read_value)
read_value.swap(inout);
else
value.swap(inout);
}
/**
Return pointer to blob cache or NULL if not cached.
*/
String * cached(bool *set_read_value)
{
char *tmp= (char *) get_ptr();
if (!value.is_empty() && tmp == value.ptr())
{
*set_read_value= false;
return &value;
}
if (!read_value.is_empty() && tmp == read_value.ptr())
{
*set_read_value= true;
return &read_value;
}
return NULL;
}
/* store value for the duration of the current read record */
inline void swap_value_and_read_value()
{

View file

@ -608,6 +608,15 @@ const char* dbug_print_table_row(TABLE *table)
}
const char* dbug_print_row(TABLE *table, uchar *rec)
{
table->move_fields(table->field, rec, table->record[0]);
const char* ret= dbug_print_table_row(table);
table->move_fields(table->field, table->record[0], rec);
return ret;
}
/*
Print a text, SQL-like record representation into dbug trace.

View file

@ -5103,59 +5103,69 @@ bool ha_partition::init_record_priority_queue()
/*
Initialize the ordered record buffer.
*/
if (!m_ordered_rec_buffer)
uint alloc_len;
uint used_parts= bitmap_bits_set(&m_part_info->read_partitions);
/* Allocate record buffer for each used partition. */
m_priority_queue_rec_len= m_rec_length + ORDERED_REC_OFFSET;
if (!m_using_extended_keys)
m_priority_queue_rec_len += m_file[0]->ref_length;
alloc_len= used_parts * m_priority_queue_rec_len;
/* Allocate a key for temporary use when setting up the scan. */
alloc_len+= table_share->max_key_length;
Ordered_blob_storage **blob_storage;
Ordered_blob_storage *objs;
const size_t n_all= used_parts * table->s->blob_fields;
if (!my_multi_malloc(MYF(MY_WME), &m_ordered_rec_buffer, alloc_len,
&blob_storage, n_all * sizeof(Ordered_blob_storage *),
&objs, n_all * sizeof(Ordered_blob_storage), NULL))
DBUG_RETURN(true);
/*
We set-up one record per partition and each record has 2 bytes in
front where the partition id is written. This is used by ordered
index_read.
We also set-up a reference to the first record for temporary use in
setting up the scan.
*/
char *ptr= (char*) m_ordered_rec_buffer;
uint i;
for (i= bitmap_get_first_set(&m_part_info->read_partitions);
i < m_tot_parts;
i= bitmap_get_next_set(&m_part_info->read_partitions, i))
{
uint alloc_len;
uint used_parts= bitmap_bits_set(&m_part_info->read_partitions);
/* Allocate record buffer for each used partition. */
m_priority_queue_rec_len= m_rec_length + PARTITION_BYTES_IN_POS;
if (!m_using_extended_keys)
m_priority_queue_rec_len += m_file[0]->ref_length;
alloc_len= used_parts * m_priority_queue_rec_len;
/* Allocate a key for temporary use when setting up the scan. */
alloc_len+= table_share->max_key_length;
DBUG_PRINT("info", ("init rec-buf for part %u", i));
if (table->s->blob_fields)
{
for (uint j= 0; j < table->s->blob_fields; ++j, ++objs)
blob_storage[j]= new (objs) Ordered_blob_storage;
*((Ordered_blob_storage ***) ptr)= blob_storage;
blob_storage+= table->s->blob_fields;
}
int2store(ptr + sizeof(String **), i);
ptr+= m_priority_queue_rec_len;
}
m_start_key.key= (const uchar*)ptr;
if (!(m_ordered_rec_buffer= (uchar*)my_malloc(alloc_len, MYF(MY_WME))))
DBUG_RETURN(true);
/*
We set-up one record per partition and each record has 2 bytes in
front where the partition id is written. This is used by ordered
index_read.
We also set-up a reference to the first record for temporary use in
setting up the scan.
*/
char *ptr= (char*) m_ordered_rec_buffer;
uint i;
for (i= bitmap_get_first_set(&m_part_info->read_partitions);
i < m_tot_parts;
i= bitmap_get_next_set(&m_part_info->read_partitions, i))
{
DBUG_PRINT("info", ("init rec-buf for part %u", i));
int2store(ptr, i);
ptr+= m_priority_queue_rec_len;
}
m_start_key.key= (const uchar*)ptr;
/* Initialize priority queue, initialized to reading forward. */
int (*cmp_func)(void *, uchar *, uchar *);
void *cmp_arg;
if (!m_using_extended_keys)
{
cmp_func= cmp_key_rowid_part_id;
cmp_arg= (void*)this;
}
else
{
cmp_func= cmp_key_part_id;
cmp_arg= (void*)m_curr_key_info;
}
if (init_queue(&m_queue, used_parts, 0, 0, cmp_func, cmp_arg, 0, 0))
{
my_free(m_ordered_rec_buffer);
m_ordered_rec_buffer= NULL;
DBUG_RETURN(true);
}
/* Initialize priority queue, initialized to reading forward. */
int (*cmp_func)(void *, uchar *, uchar *);
void *cmp_arg;
if (!m_using_extended_keys)
{
cmp_func= cmp_key_rowid_part_id;
cmp_arg= (void*)this;
}
else
{
cmp_func= cmp_key_part_id;
cmp_arg= (void*)m_curr_key_info;
}
if (init_queue(&m_queue, used_parts, ORDERED_PART_NUM_OFFSET,
0, cmp_func, cmp_arg, 0, 0))
{
my_free(m_ordered_rec_buffer);
m_ordered_rec_buffer= NULL;
DBUG_RETURN(true);
}
DBUG_RETURN(false);
}
@ -5170,6 +5180,20 @@ void ha_partition::destroy_record_priority_queue()
DBUG_ENTER("ha_partition::destroy_record_priority_queue");
if (m_ordered_rec_buffer)
{
if (table->s->blob_fields)
{
char *ptr= (char *) m_ordered_rec_buffer;
for (uint i= bitmap_get_first_set(&m_part_info->read_partitions);
i < m_tot_parts;
i= bitmap_get_next_set(&m_part_info->read_partitions, i))
{
Ordered_blob_storage **blob_storage= *((Ordered_blob_storage ***) ptr);
for (uint b= 0; b < table->s->blob_fields; ++b)
blob_storage[b]->blob.free();
ptr+= m_priority_queue_rec_len;
}
}
delete_queue(&m_queue);
my_free(m_ordered_rec_buffer);
m_ordered_rec_buffer= NULL;
@ -5394,7 +5418,7 @@ static int cmp_part_ids(uchar *ref1, uchar *ref2)
extern "C" int cmp_key_part_id(void *key_p, uchar *ref1, uchar *ref2)
{
int res;
if ((res= key_rec_cmp(key_p, ref1 + PARTITION_BYTES_IN_POS,
if ((res= key_rec_cmp(key_p, ref1 + PARTITION_BYTES_IN_POS,
ref2 + PARTITION_BYTES_IN_POS)))
{
return res;
@ -6133,8 +6157,8 @@ int ha_partition::handle_ordered_index_scan(uchar *buf, bool reverse_order)
{
DBUG_PRINT("info", ("reading from part %u (scan_type: %u)",
i, m_index_scan_type));
DBUG_ASSERT(i == uint2korr(part_rec_buf_ptr));
uchar *rec_buf_ptr= part_rec_buf_ptr + PARTITION_BYTES_IN_POS;
DBUG_ASSERT(i == uint2korr(part_rec_buf_ptr + ORDERED_PART_NUM_OFFSET));
uchar *rec_buf_ptr= part_rec_buf_ptr + ORDERED_REC_OFFSET;
int error;
handler *file= m_file[i];
@ -6162,6 +6186,7 @@ int ha_partition::handle_ordered_index_scan(uchar *buf, bool reverse_order)
error= file->read_range_first(m_start_key.key? &m_start_key: NULL,
end_range, eq_range, TRUE);
memcpy(rec_buf_ptr, table->record[0], m_rec_length);
reverse_order= FALSE;
break;
}
@ -6181,6 +6206,11 @@ int ha_partition::handle_ordered_index_scan(uchar *buf, bool reverse_order)
Initialize queue without order first, simply insert
*/
queue_element(&m_queue, j++)= part_rec_buf_ptr;
if (table->s->blob_fields)
{
Ordered_blob_storage **storage= *((Ordered_blob_storage ***) part_rec_buf_ptr);
swap_blobs(rec_buf_ptr, storage, false);
}
}
else if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
{
@ -6229,10 +6259,15 @@ void ha_partition::return_top_record(uchar *buf)
{
uint part_id;
uchar *key_buffer= queue_top(&m_queue);
uchar *rec_buffer= key_buffer + PARTITION_BYTES_IN_POS;
uchar *rec_buffer= key_buffer + ORDERED_REC_OFFSET;
part_id= uint2korr(key_buffer);
part_id= uint2korr(key_buffer + ORDERED_PART_NUM_OFFSET);
memcpy(buf, rec_buffer, m_rec_length);
if (table->s->blob_fields)
{
Ordered_blob_storage **storage= *((Ordered_blob_storage ***) key_buffer);
swap_blobs(buf, storage, true);
}
m_last_part= part_id;
m_top_entry= part_id;
}
@ -6268,7 +6303,7 @@ int ha_partition::handle_ordered_index_scan_key_not_found()
This partition is used and did return HA_ERR_KEY_NOT_FOUND
in index_read_map.
*/
curr_rec_buf= part_buf + PARTITION_BYTES_IN_POS;
curr_rec_buf= part_buf + ORDERED_REC_OFFSET;
error= m_file[i]->ha_index_next(curr_rec_buf);
/* HA_ERR_KEY_NOT_FOUND is not allowed from index_next! */
DBUG_ASSERT(error != HA_ERR_KEY_NOT_FOUND);
@ -6293,6 +6328,48 @@ int ha_partition::handle_ordered_index_scan_key_not_found()
}
void ha_partition::swap_blobs(uchar * rec_buf, Ordered_blob_storage ** storage, bool restore)
{
uint *ptr, *end;
uint blob_n= 0;
table->move_fields(table->field, rec_buf, table->record[0]);
for (ptr= table->s->blob_field, end= ptr + table->s->blob_fields;
ptr != end; ++ptr, ++blob_n)
{
DBUG_ASSERT(*ptr < table->s->fields);
Field_blob *blob= (Field_blob*) table->field[*ptr];
DBUG_ASSERT(blob->flags & BLOB_FLAG);
DBUG_ASSERT(blob->field_index == *ptr);
if (!bitmap_is_set(table->read_set, *ptr) || blob->is_null())
continue;
Ordered_blob_storage &s= *storage[blob_n];
if (restore)
{
/*
We protect only blob cache (value or read_value). If the cache was
empty that doesn't mean the blob was empty. Blobs allocated by a
storage engine should work just fine.
*/
if (!s.blob.is_empty())
blob->swap(s.blob, s.set_read_value);
}
else
{
bool set_read_value;
String *cached= blob->cached(&set_read_value);
if (cached)
{
cached->swap(s.blob);
s.set_read_value= set_read_value;
}
}
}
table->move_fields(table->field, table->record[0], rec_buf);
}
/*
Common routine to handle index_next with ordered results
@ -6311,7 +6388,8 @@ int ha_partition::handle_ordered_next(uchar *buf, bool is_next_same)
{
int error;
uint part_id= m_top_entry;
uchar *rec_buf= queue_top(&m_queue) + PARTITION_BYTES_IN_POS;
uchar *part_rec_buf_ptr= queue_top(&m_queue);
uchar *rec_buf= part_rec_buf_ptr + ORDERED_REC_OFFSET;
handler *file;
DBUG_ENTER("ha_partition::handle_ordered_next");
@ -6354,7 +6432,15 @@ int ha_partition::handle_ordered_next(uchar *buf, bool is_next_same)
if (m_index_scan_type == partition_read_range)
{
error= file->read_range_next();
memcpy(rec_buf, table->record[0], m_rec_length);
if (!error)
{
memcpy(rec_buf, table->record[0], m_rec_length);
if (table->s->blob_fields)
{
Ordered_blob_storage **storage= *((Ordered_blob_storage ***) part_rec_buf_ptr);
swap_blobs(rec_buf, storage, false);
}
}
}
else if (!is_next_same)
error= file->ha_index_next(rec_buf);
@ -6410,7 +6496,7 @@ int ha_partition::handle_ordered_prev(uchar *buf)
{
int error;
uint part_id= m_top_entry;
uchar *rec_buf= queue_top(&m_queue) + PARTITION_BYTES_IN_POS;
uchar *rec_buf= queue_top(&m_queue) + ORDERED_REC_OFFSET;
handler *file= m_file[part_id];
DBUG_ENTER("ha_partition::handle_ordered_prev");

View file

@ -21,7 +21,17 @@
#include "sql_partition.h" /* part_id_range, partition_element */
#include "queues.h" /* QUEUE */
struct Ordered_blob_storage
{
String blob;
bool set_read_value;
Ordered_blob_storage() : set_read_value(false)
{}
};
#define PARTITION_BYTES_IN_POS 2
#define ORDERED_PART_NUM_OFFSET sizeof(Ordered_blob_storage **)
#define ORDERED_REC_OFFSET (ORDERED_PART_NUM_OFFSET + PARTITION_BYTES_IN_POS)
/** Struct used for partition_name_hash */
@ -630,6 +640,7 @@ private:
int handle_ordered_next(uchar * buf, bool next_same);
int handle_ordered_prev(uchar * buf);
void return_top_record(uchar * buf);
void swap_blobs(uchar* rec_buf, Ordered_blob_storage ** storage, bool restore);
public:
/*
-------------------------------------------------------------------------