mirror of
https://github.com/MariaDB/server.git
synced 2025-01-19 13:32:33 +01:00
MDEV-10814: Innodb large allocations - madvise - Don't dump
Note: Linux only Core dumps of large buffer pool pages take time and space and pose potential data expose in scenarios where data-at-rest encryption is deployed. Here we use madvise(MADV_DONT_DUMP) on large memory allocations used by the innodb buffer pool, log_sys and recv_sys. The effect of this system call is that these memory areas will not appear in a core dump. Data from these buffers is rarely useful in fault diagnosis. log_sys and recv_sys structures now use large memory allocations for their large buffer. Debug builds don't include the madvise syscall and as such will include full core dumps. A function, buf_madvise_do_dump, is added but never called. It is there to be called from a debugger to re-enable the core dumping of all of these pages if for some reason the entire contents of these buffers are needed. Idea thanks to Hartmut Holzgraefe
This commit is contained in:
parent
990289a78f
commit
b600f30786
8 changed files with 180 additions and 41 deletions
|
@ -1173,6 +1173,57 @@ buf_page_is_corrupted(
|
|||
}
|
||||
|
||||
#ifndef UNIV_INNOCHECKSUM
|
||||
|
||||
#if defined(DBUG_OFF) && defined(HAVE_MADVISE) && defined(MADV_DODUMP)
|
||||
/** Enable buffers to be dumped to core files
|
||||
|
||||
A convience function, not called anyhwere directly however
|
||||
it is left available for gdb or any debugger to call
|
||||
in the event that you want all of the memory to be dumped
|
||||
to a core file.
|
||||
|
||||
Returns number of errors found in madvise calls. */
|
||||
int
|
||||
buf_madvise_do_dump()
|
||||
{
|
||||
int ret= 0;
|
||||
buf_pool_t* buf_pool;
|
||||
ulint n;
|
||||
buf_chunk_t* chunk;
|
||||
|
||||
/* mirrors allocation in log_sys_init() */
|
||||
if (log_sys->buf)
|
||||
{
|
||||
ret+= madvise(log_sys->first_in_use ? log_sys->buf
|
||||
: log_sys->buf - log_sys->buf_size,
|
||||
log_sys->buf_size,
|
||||
MADV_DODUMP);
|
||||
}
|
||||
/* mirrors recv_sys_init() */
|
||||
if (recv_sys->buf)
|
||||
{
|
||||
ret+= madvise(recv_sys->buf, recv_sys->len, MADV_DODUMP);
|
||||
}
|
||||
|
||||
buf_pool_mutex_enter_all();
|
||||
|
||||
for (int i= 0; i < srv_buf_pool_instances; i++)
|
||||
{
|
||||
buf_pool = buf_pool_from_array(i);
|
||||
chunk = buf_pool->chunks;
|
||||
|
||||
for (int n = buf_pool->n_chunks; n--; chunk++)
|
||||
{
|
||||
ret+= madvise(chunk->mem, chunk->mem_size(), MADV_DODUMP);
|
||||
}
|
||||
}
|
||||
|
||||
buf_pool_mutex_exit_all();
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
/** Dump a page to stderr.
|
||||
@param[in] read_buf database page
|
||||
@param[in] page_size page size */
|
||||
|
@ -1502,7 +1553,7 @@ buf_chunk_init(
|
|||
DBUG_EXECUTE_IF("ib_buf_chunk_init_fails", return(NULL););
|
||||
|
||||
chunk->mem = buf_pool->allocator.allocate_large(mem_size,
|
||||
&chunk->mem_pfx);
|
||||
&chunk->mem_pfx, true);
|
||||
|
||||
if (UNIV_UNLIKELY(chunk->mem == NULL)) {
|
||||
|
||||
|
@ -1796,7 +1847,8 @@ buf_pool_init_instance(
|
|||
}
|
||||
|
||||
buf_pool->allocator.deallocate_large(
|
||||
chunk->mem, &chunk->mem_pfx);
|
||||
chunk->mem, &chunk->mem_pfx, chunk->mem_size(),
|
||||
true);
|
||||
}
|
||||
ut_free(buf_pool->chunks);
|
||||
buf_pool_mutex_exit(buf_pool);
|
||||
|
@ -1943,7 +1995,7 @@ buf_pool_free_instance(
|
|||
}
|
||||
|
||||
buf_pool->allocator.deallocate_large(
|
||||
chunk->mem, &chunk->mem_pfx);
|
||||
chunk->mem, &chunk->mem_pfx, true);
|
||||
}
|
||||
|
||||
for (ulint i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; ++i) {
|
||||
|
@ -2819,7 +2871,7 @@ withdraw_retry:
|
|||
}
|
||||
|
||||
buf_pool->allocator.deallocate_large(
|
||||
chunk->mem, &chunk->mem_pfx);
|
||||
chunk->mem, &chunk->mem_pfx, true);
|
||||
|
||||
sum_freed += chunk->size;
|
||||
|
||||
|
|
|
@ -614,15 +614,15 @@ struct log_t{
|
|||
mtr_commit and still ensure that
|
||||
insertions in the flush_list happen
|
||||
in the LSN order. */
|
||||
byte* buf_ptr; /*!< unaligned log buffer, which should
|
||||
be of double of buf_size */
|
||||
byte* buf; /*!< log buffer currently in use;
|
||||
this could point to either the first
|
||||
half of the aligned(buf_ptr) or the
|
||||
byte* buf; /*!< Memory of double the buf_size is
|
||||
allocated here. This pointer will change
|
||||
however to either the first half or the
|
||||
second half in turns, so that log
|
||||
write/flush to disk don't block
|
||||
concurrent mtrs which will write
|
||||
log to this buffer */
|
||||
log to this buffer. Care to switch back
|
||||
to the first half before freeing/resizing
|
||||
must be undertaken. */
|
||||
bool first_in_use; /*!< true if buf points to the first
|
||||
half of the aligned(buf_ptr), false
|
||||
if the second half */
|
||||
|
|
|
@ -217,6 +217,7 @@ struct recv_sys_t{
|
|||
/*!< this is TRUE when a log rec application
|
||||
batch is running */
|
||||
byte* buf; /*!< buffer for parsing log records */
|
||||
size_t buf_size; /*!< size of buf */
|
||||
ulint len; /*!< amount of data in buf */
|
||||
lsn_t parse_start_lsn;
|
||||
/*!< this is the lsn from which we were able to
|
||||
|
|
|
@ -129,6 +129,10 @@ InnoDB:
|
|||
#include <string.h> /* strlen(), strrchr(), strncmp() */
|
||||
|
||||
#include "my_global.h" /* needed for headers from mysql/psi/ */
|
||||
#if !defined(DBUG_OFF) && defined(HAVE_MADVISE)
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
/* JAN: TODO: missing 5.7 header */
|
||||
#ifdef HAVE_MYSQL_MEMORY_H
|
||||
#include "mysql/psi/mysql_memory.h" /* PSI_MEMORY_CALL() */
|
||||
|
@ -234,6 +238,45 @@ struct ut_new_pfx_t {
|
|||
#endif
|
||||
};
|
||||
|
||||
static void ut_allocate_trace_dontdump(void * ptr,
|
||||
size_t bytes,
|
||||
bool dontdump,
|
||||
ut_new_pfx_t* pfx,
|
||||
const char* file)
|
||||
{
|
||||
ut_a(ptr != NULL);
|
||||
|
||||
#if defined(DBUG_OFF) && defined(HAVE_MADVISE) && defined(MADV_DONTDUMP)
|
||||
if (dontdump && madvise(ptr, bytes, MADV_DONTDUMP)) {
|
||||
ib::warn() << "Failed to set memory to DONTDUMP: "
|
||||
<< strerror(errno)
|
||||
<< " ptr " << ptr
|
||||
<< " size " << bytes;
|
||||
}
|
||||
#endif
|
||||
if (pfx != NULL) {
|
||||
#ifdef UNIV_PFS_MEMORY
|
||||
allocate_trace(bytes, file, pfx);
|
||||
#endif /* UNIV_PFS_MEMORY */
|
||||
pfx->m_size = bytes;
|
||||
}
|
||||
}
|
||||
|
||||
static void ut_dodump(void* ptr, size_t m_size)
|
||||
{
|
||||
if (ptr == NULL) {
|
||||
return;
|
||||
}
|
||||
#if defined(DBUG_OFF) && defined(HAVE_MADVISE) && defined(MADV_DODUMP)
|
||||
if (madvise(ptr, m_size, MADV_DODUMP)) {
|
||||
ib::warn() << "Failed to set memory to DODUMP: "
|
||||
<< strerror(errno)
|
||||
<< " ptr " << ptr
|
||||
<< " size " << m_size;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/** Allocator class for allocating memory from inside std::* containers.
|
||||
@tparam T type of allocated object
|
||||
@tparam oom_fatal whether to commit suicide when running out of memory */
|
||||
|
@ -294,6 +337,7 @@ public:
|
|||
@param[in] file file name of the caller
|
||||
@param[in] set_to_zero if true, then the returned memory is
|
||||
initialized with 0x0 bytes.
|
||||
@param[in] throw_on_error if true, raize exception if too big
|
||||
@return pointer to the allocated memory */
|
||||
pointer
|
||||
allocate(
|
||||
|
@ -566,6 +610,8 @@ public:
|
|||
/** Allocate a large chunk of memory that can hold 'n_elements'
|
||||
objects of type 'T' and trace the allocation.
|
||||
@param[in] n_elements number of elements
|
||||
@param[in] dontdump if true, advise the OS is not to core
|
||||
dump this memory.
|
||||
@param[out] pfx storage for the description of the
|
||||
allocated memory. The caller must provide space for this one and keep
|
||||
it until the memory is no longer needed and then pass it to
|
||||
|
@ -574,7 +620,8 @@ public:
|
|||
pointer
|
||||
allocate_large(
|
||||
size_type n_elements,
|
||||
ut_new_pfx_t* pfx)
|
||||
ut_new_pfx_t* pfx,
|
||||
bool dontdump = false)
|
||||
{
|
||||
if (n_elements == 0 || n_elements > max_size()) {
|
||||
return(NULL);
|
||||
|
@ -585,13 +632,11 @@ public:
|
|||
pointer ptr = reinterpret_cast<pointer>(
|
||||
os_mem_alloc_large(&n_bytes));
|
||||
|
||||
#ifdef UNIV_PFS_MEMORY
|
||||
if (ptr != NULL) {
|
||||
allocate_trace(n_bytes, NULL, pfx);
|
||||
if (ptr == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
#else
|
||||
pfx->m_size = n_bytes;
|
||||
#endif /* UNIV_PFS_MEMORY */
|
||||
|
||||
ut_allocate_trace_dontdump(ptr, n_bytes, dontdump, pfx, NULL);
|
||||
|
||||
return(ptr);
|
||||
}
|
||||
|
@ -600,17 +645,26 @@ public:
|
|||
deallocation.
|
||||
@param[in,out] ptr pointer to memory to free
|
||||
@param[in] pfx descriptor of the memory, as returned by
|
||||
allocate_large(). */
|
||||
allocate_large().
|
||||
@param[in] dodump if true, advise the OS to include this
|
||||
memory again if a core dump occurs. */
|
||||
void
|
||||
deallocate_large(
|
||||
pointer ptr,
|
||||
const ut_new_pfx_t* pfx)
|
||||
const ut_new_pfx_t* pfx,
|
||||
size_t size,
|
||||
bool dodump = false)
|
||||
{
|
||||
if (dodump) {
|
||||
ut_dodump(ptr, size);
|
||||
}
|
||||
#ifdef UNIV_PFS_MEMORY
|
||||
deallocate_trace(pfx);
|
||||
if (pfx) {
|
||||
deallocate_trace(pfx);
|
||||
}
|
||||
#endif /* UNIV_PFS_MEMORY */
|
||||
|
||||
os_mem_free_large(ptr, pfx->m_size);
|
||||
os_mem_free_large(ptr, size);
|
||||
}
|
||||
|
||||
#ifdef UNIV_PFS_MEMORY
|
||||
|
@ -842,6 +896,10 @@ ut_delete_array(
|
|||
ut_allocator<byte>(key).allocate( \
|
||||
n_bytes, NULL, __FILE__, false, false))
|
||||
|
||||
#define ut_malloc_dontdump(n_bytes) static_cast<void*>( \
|
||||
ut_allocator<byte>(PSI_NOT_INSTRUMENTED).allocate_large( \
|
||||
n_bytes, true))
|
||||
|
||||
#define ut_zalloc(n_bytes, key) static_cast<void*>( \
|
||||
ut_allocator<byte>(key).allocate( \
|
||||
n_bytes, NULL, __FILE__, true, false))
|
||||
|
@ -865,6 +923,10 @@ ut_delete_array(
|
|||
#define ut_free(ptr) ut_allocator<byte>(PSI_NOT_INSTRUMENTED).deallocate( \
|
||||
reinterpret_cast<byte*>(ptr))
|
||||
|
||||
#define ut_free_dodump(ptr, size) static_cast<void*>( \
|
||||
ut_allocator<byte>(PSI_NOT_INSTRUMENTED).deallocate_large( \
|
||||
ptr, NULL, size, true))
|
||||
|
||||
#else /* UNIV_PFS_MEMORY */
|
||||
|
||||
/* Fallbacks when memory tracing is disabled at compile time. */
|
||||
|
@ -887,6 +949,14 @@ ut_delete_array(
|
|||
|
||||
#define ut_malloc_nokey(n_bytes) ::malloc(n_bytes)
|
||||
|
||||
static inline void *ut_malloc_dontdump(size_t n_bytes)
|
||||
{
|
||||
void *ptr = os_mem_alloc_large(&n_bytes);
|
||||
|
||||
ut_allocate_trace_dontdump(ptr, n_bytes, true, NULL, NULL);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
#define ut_zalloc_nokey(n_bytes) ::calloc(1, n_bytes)
|
||||
|
||||
#define ut_zalloc_nokey_nofatal(n_bytes) ::calloc(1, n_bytes)
|
||||
|
@ -895,6 +965,12 @@ ut_delete_array(
|
|||
|
||||
#define ut_free(ptr) ::free(ptr)
|
||||
|
||||
static inline void ut_free_dodump(void *ptr, size_t size)
|
||||
{
|
||||
ut_dodump(ptr, size);
|
||||
os_mem_free_large(ptr, size);
|
||||
}
|
||||
|
||||
#endif /* UNIV_PFS_MEMORY */
|
||||
|
||||
#endif /* ut0new_h */
|
||||
|
|
|
@ -226,16 +226,18 @@ log_buffer_extend(
|
|||
log_sys->buf_free -= move_start;
|
||||
log_sys->buf_next_to_write -= move_start;
|
||||
|
||||
/* free previous after getting the right address */
|
||||
if (!log_sys->first_in_use) {
|
||||
log_sys->buf -= log_sys->buf_size;
|
||||
}
|
||||
ut_free_dodump(log_sys->buf, log_sys->buf_size * 2);
|
||||
|
||||
/* reallocate log buffer */
|
||||
srv_log_buffer_size = len / UNIV_PAGE_SIZE + 1;
|
||||
ut_free(log_sys->buf_ptr);
|
||||
|
||||
log_sys->buf_size = LOG_BUFFER_SIZE;
|
||||
|
||||
log_sys->buf_ptr = static_cast<byte*>(
|
||||
ut_zalloc_nokey(log_sys->buf_size * 2 + OS_FILE_LOG_BLOCK_SIZE));
|
||||
log_sys->buf = static_cast<byte*>(
|
||||
ut_align(log_sys->buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
|
||||
ut_malloc_dontdump(log_sys->buf_size * 2));
|
||||
|
||||
log_sys->first_in_use = true;
|
||||
|
||||
|
@ -723,10 +725,8 @@ log_sys_init()
|
|||
|
||||
log_sys->buf_size = LOG_BUFFER_SIZE;
|
||||
|
||||
log_sys->buf_ptr = static_cast<byte*>(
|
||||
ut_zalloc_nokey(log_sys->buf_size * 2 + OS_FILE_LOG_BLOCK_SIZE));
|
||||
log_sys->buf = static_cast<byte*>(
|
||||
ut_align(log_sys->buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
|
||||
ut_malloc_dontdump(log_sys->buf_size * 2));
|
||||
|
||||
log_sys->first_in_use = true;
|
||||
|
||||
|
@ -1085,12 +1085,12 @@ log_buffer_switch()
|
|||
OS_FILE_LOG_BLOCK_SIZE);
|
||||
|
||||
if (log_sys->first_in_use) {
|
||||
ut_ad(log_sys->buf == ut_align(log_sys->buf_ptr,
|
||||
ut_ad(log_sys->buf == ut_align(log_sys->buf,
|
||||
OS_FILE_LOG_BLOCK_SIZE));
|
||||
log_sys->buf += log_sys->buf_size;
|
||||
} else {
|
||||
log_sys->buf -= log_sys->buf_size;
|
||||
ut_ad(log_sys->buf == ut_align(log_sys->buf_ptr,
|
||||
ut_ad(log_sys->buf == ut_align(log_sys->buf,
|
||||
OS_FILE_LOG_BLOCK_SIZE));
|
||||
}
|
||||
|
||||
|
@ -2254,8 +2254,10 @@ log_shutdown()
|
|||
{
|
||||
log_group_close_all();
|
||||
|
||||
ut_free(log_sys->buf_ptr);
|
||||
log_sys->buf_ptr = NULL;
|
||||
if (!log_sys->first_in_use) {
|
||||
log_sys->buf -= log_sys->buf_size;
|
||||
}
|
||||
ut_free_dodump(log_sys->buf, log_sys->buf_size * 2);
|
||||
log_sys->buf = NULL;
|
||||
ut_free(log_sys->checkpoint_buf_ptr);
|
||||
log_sys->checkpoint_buf_ptr = NULL;
|
||||
|
|
|
@ -438,7 +438,9 @@ recv_sys_close()
|
|||
os_event_destroy(recv_sys->flush_end);
|
||||
}
|
||||
|
||||
ut_free(recv_sys->buf);
|
||||
if (recv_sys->buf != NULL) {
|
||||
ut_free_dodump(recv_sys->buf, recv_sys->buf_size);
|
||||
}
|
||||
|
||||
ut_ad(!recv_writer_thread_active);
|
||||
mutex_free(&recv_sys->writer_mutex);
|
||||
|
@ -553,7 +555,8 @@ recv_sys_init()
|
|||
}
|
||||
|
||||
recv_sys->buf = static_cast<byte*>(
|
||||
ut_malloc_nokey(RECV_PARSING_BUF_SIZE));
|
||||
ut_malloc_dontdump(RECV_PARSING_BUF_SIZE));
|
||||
recv_sys->buf_size = RECV_PARSING_BUF_SIZE;
|
||||
|
||||
recv_sys->addr_hash = hash_create(size / 512);
|
||||
recv_sys->progress_time = ut_time();
|
||||
|
@ -588,8 +591,9 @@ recv_sys_debug_free(void)
|
|||
|
||||
hash_table_free(recv_sys->addr_hash);
|
||||
mem_heap_free(recv_sys->heap);
|
||||
ut_free(recv_sys->buf);
|
||||
ut_free_dodump(recv_sys->buf, recv_sys->buf_size);
|
||||
|
||||
recv_sys->buf_size = 0;
|
||||
recv_sys->buf = NULL;
|
||||
recv_sys->heap = NULL;
|
||||
recv_sys->addr_hash = NULL;
|
||||
|
|
|
@ -71,6 +71,7 @@ enum row_op {
|
|||
/** Log block for modifications during online ALTER TABLE */
|
||||
struct row_log_buf_t {
|
||||
byte* block; /*!< file block buffer */
|
||||
size_t size; /*!< length of block in bytes */
|
||||
ut_new_pfx_t block_pfx; /*!< opaque descriptor of "block". Set
|
||||
by ut_allocator::allocate_large() and fed to
|
||||
ut_allocator::deallocate_large(). */
|
||||
|
@ -265,6 +266,7 @@ row_log_block_allocate(
|
|||
if (log_buf.block == NULL) {
|
||||
DBUG_RETURN(false);
|
||||
}
|
||||
log_buf.size = srv_sort_buf_size;
|
||||
}
|
||||
DBUG_RETURN(true);
|
||||
}
|
||||
|
@ -279,7 +281,7 @@ row_log_block_free(
|
|||
DBUG_ENTER("row_log_block_free");
|
||||
if (log_buf.block != NULL) {
|
||||
ut_allocator<byte>(mem_key_row_log_buf).deallocate_large(
|
||||
log_buf.block, &log_buf.block_pfx);
|
||||
log_buf.block, &log_buf.block_pfx, log_buf.size);
|
||||
log_buf.block = NULL;
|
||||
}
|
||||
DBUG_VOID_RETURN;
|
||||
|
|
|
@ -4619,6 +4619,7 @@ row_merge_build_indexes(
|
|||
merge_file_t* merge_files;
|
||||
row_merge_block_t* block;
|
||||
ut_new_pfx_t block_pfx;
|
||||
size_t block_size;
|
||||
ut_new_pfx_t crypt_pfx;
|
||||
row_merge_block_t* crypt_block = NULL;
|
||||
ulint i;
|
||||
|
@ -4654,7 +4655,8 @@ row_merge_build_indexes(
|
|||
|
||||
/* This will allocate "3 * srv_sort_buf_size" elements of type
|
||||
row_merge_block_t. The latter is defined as byte. */
|
||||
block = alloc.allocate_large(3 * srv_sort_buf_size, &block_pfx);
|
||||
block_size = 3 * srv_sort_buf_size;
|
||||
block = alloc.allocate_large(block_size, &block_pfx);
|
||||
|
||||
if (block == NULL) {
|
||||
DBUG_RETURN(DB_OUT_OF_MEMORY);
|
||||
|
@ -4664,7 +4666,7 @@ row_merge_build_indexes(
|
|||
|
||||
if (log_tmp_is_encrypted()) {
|
||||
crypt_block = static_cast<row_merge_block_t*>(
|
||||
alloc.allocate_large(3 * srv_sort_buf_size,
|
||||
alloc.allocate_large(block_size,
|
||||
&crypt_pfx));
|
||||
|
||||
if (crypt_block == NULL) {
|
||||
|
@ -5035,10 +5037,10 @@ func_exit:
|
|||
|
||||
ut_free(merge_files);
|
||||
|
||||
alloc.deallocate_large(block, &block_pfx);
|
||||
alloc.deallocate_large(block, &block_pfx, block_size);
|
||||
|
||||
if (crypt_block) {
|
||||
alloc.deallocate_large(crypt_block, &crypt_pfx);
|
||||
alloc.deallocate_large(crypt_block, &crypt_pfx, block_size);
|
||||
}
|
||||
|
||||
DICT_TF2_FLAG_UNSET(new_table, DICT_TF2_FTS_ADD_DOC_ID);
|
||||
|
|
Loading…
Reference in a new issue