ndb - bug#34378

Using more than 16g can cause record-pool ptr.i values to overflow
  Fix by splitting memory into 2 zones, lo(16g)/hi(rest)
  When record pools only use zone_lo, and datamemory, buffers etc...can use any
This commit is contained in:
jonas@perch.ndb.mysql.com 2008-02-08 15:17:45 +01:00
parent ce36eea75c
commit 02a56b9e23
4 changed files with 336 additions and 95 deletions

View file

@ -918,7 +918,7 @@ Lgman::alloc_logbuffer_memory(Ptr<Logfile_group> ptr, Uint32 bytes)
{
Uint32 ptrI;
Uint32 cnt = pages > 64 ? 64 : pages;
m_ctx.m_mm.alloc(&ptrI, &cnt, 1);
m_ctx.m_mm.alloc_pages(RG_DISK_OPERATIONS, &ptrI, &cnt, 1);
if (cnt)
{
Buffer_idx range;
@ -1037,7 +1037,7 @@ Lgman::free_logbuffer_memory(Ptr<Logfile_group> ptr)
ndbrequire(map.next(it));
tmp[1] = *it.data;
m_ctx.m_mm.release(range.m_ptr_i, range.m_idx);
m_ctx.m_mm.release_pages(RG_DISK_OPERATIONS, range.m_ptr_i, range.m_idx);
map.next(it);
}
map.release();

View file

@ -20,7 +20,8 @@
void*
Pool_context::alloc_page(Uint32 type_id, Uint32 *i)
{
return m_block->m_ctx.m_mm.alloc_page(type_id, i);
return m_block->m_ctx.m_mm.alloc_page(type_id, i,
Ndbd_mem_manager::NDB_ZONE_LO);
}
void

View file

@ -25,19 +25,22 @@ extern EventLogger g_eventLogger;
extern EventLogger g_eventLogger;
#endif
#ifdef NDBD_MALLOC_METHOD
#if NDBD_MALLOC_METHOD == sbrk
static const char * f_method = "sbrk";
static int f_method_idx = 0;
#ifdef NDBD_MALLOC_METHOD_SBRK
static const char * f_method = "SMsm";
#else
static const char * f_method = "malloc";
#endif
#elif SIZEOF_CHARP == 8
static const char * f_method = "sbrk";
#else
static const char * f_method = "malloc";
static const char * f_method = "MSms";
#endif
#define MAX_CHUNKS 10
#define ZONE_LO 0
#define ZONE_HI 1
/**
* POOL_RECORD_BITS == 13 => 32 - 13 = 19 bits for page
*/
#define ZONE_LO_BOUND (1u << 19)
struct InitChunk
{
Uint32 m_cnt;
@ -54,28 +57,42 @@ do_malloc(Uint32 pages, InitChunk* chunk)
pages += 1;
void * ptr = 0;
Uint32 sz = pages;
if (strcmp(f_method, "sbrk") == 0)
retry:
char method = f_method[f_method_idx];
switch(method){
case 0:
return false;
case 'S':
case 's':
{
ptr = 0;
while (ptr == 0)
{
ptr = sbrk(sizeof(Alloc_page) * sz);
if (ptr == (void*)-1)
{
if (method == 'S')
{
f_method_idx++;
goto retry;
}
ptr = 0;
sz = 1 + (9 * sz) / 10;
if (pages >= 32 && sz < 32)
{
sz = pages;
f_method = "malloc";
g_eventLogger.info("sbrk(%lld) failed, trying malloc",
(Uint64)(sizeof(Alloc_page) * sz));
break;
f_method_idx++;
goto retry;
}
}
}
break;
}
if (strcmp(f_method, "malloc") == 0)
case 'M':
case 'm':
{
ptr = 0;
while (ptr == 0)
@ -83,15 +100,26 @@ do_malloc(Uint32 pages, InitChunk* chunk)
ptr = malloc(sizeof(Alloc_page) * sz);
if (ptr == 0)
{
if (method == 'M')
{
f_method_idx++;
goto retry;
}
sz = 1 + (9 * sz) / 10;
if (pages >= 32 && sz < 32)
{
return false;
f_method_idx++;
goto retry;
}
}
}
break;
}
default:
return false;
}
chunk->m_cnt = sz;
chunk->m_ptr = (Alloc_page*)ptr;
const UintPtr align = sizeof(Alloc_page) - 1;
@ -151,6 +179,12 @@ Ndbd_mem_manager::Ndbd_mem_manager()
}
}
/**
* m_min = reserved
* m_curr = current
* m_max = max alloc, 0 = no limit
*/
void
Ndbd_mem_manager::set_resource_limit(const Resource_limit& rl)
{
@ -176,6 +210,40 @@ Ndbd_mem_manager::get_resource_limit(Uint32 id, Resource_limit& rl) const
return false;
}
static
inline
void
check_resource_limits(Resource_limit* rl)
{
#ifdef VM_TRACE
Uint32 curr = 0;
Uint32 res_alloc = 0;
Uint32 shared_alloc = 0;
Uint32 sumres = 0;
for (Uint32 i = 1; i<XX_RL_COUNT; i++)
{
curr += rl[i].m_curr;
sumres += rl[i].m_min;
assert(rl[i].m_max == 0 || rl[i].m_curr <= rl[i].m_max);
if (rl[i].m_curr > rl[i].m_min)
{
shared_alloc += rl[i].m_curr - rl[i].m_min;
res_alloc += rl[i].m_min;
}
else
{
res_alloc += rl[i].m_curr;
}
}
assert(curr == rl[0].m_curr);
assert(res_alloc + shared_alloc == curr);
assert(res_alloc <= sumres);
assert(sumres == res_alloc + rl[0].m_min);
assert(rl[0].m_curr <= rl[0].m_max);
#endif
}
bool
Ndbd_mem_manager::init(bool alloc_less_memory)
{
@ -292,6 +360,8 @@ Ndbd_mem_manager::init(bool alloc_less_memory)
grow(chunks[i].m_start, chunks[i].m_cnt);
}
check_resource_limits(m_resource_limit);
return true;
}
@ -321,35 +391,68 @@ Ndbd_mem_manager::grow(Uint32 start, Uint32 cnt)
cnt--; // last page is always marked as empty
}
if (!m_used_bitmap_pages.get(start_bmp))
{
if (start != (start_bmp << BPP_2LOG))
{
ndbout_c("ndbd_malloc_impl.cpp:%d:grow(%d, %d) %d!=%d"
" - Unable to use due to bitmap pages missaligned!!",
__LINE__, start, cnt, start, (start_bmp << BPP_2LOG));
g_eventLogger.error("ndbd_malloc_impl.cpp:%d:grow(%d, %d)"
" - Unable to use due to bitmap pages missaligned!!",
__LINE__, start, cnt);
return;
}
for (Uint32 i = 0; i<m_used_bitmap_pages.size(); i++)
if (m_used_bitmap_pages[i] == start_bmp)
goto found;
#ifdef UNIT_TEST
ndbout_c("creating bitmap page %d", start_bmp);
#endif
if (start != (start_bmp << BPP_2LOG))
{
ndbout_c("ndbd_malloc_impl.cpp:%d:grow(%d, %d) %d!=%d not using %uMb"
" - Unable to use due to bitmap pages missaligned!!",
__LINE__, start, cnt, start, (start_bmp << BPP_2LOG),
(cnt >> (20 - 15)));
g_eventLogger.error("ndbd_malloc_impl.cpp:%d:grow(%d, %d) not using %uMb"
" - Unable to use due to bitmap pages missaligned!!",
__LINE__, start, cnt,
(cnt >> (20 - 15)));
dump();
return;
}
#ifdef UNIT_TEST
ndbout_c("creating bitmap page %d", start_bmp);
#endif
{
Alloc_page* bmp = m_base_page + start;
memset(bmp, 0, sizeof(Alloc_page));
m_used_bitmap_pages.set(start_bmp);
cnt--;
start++;
}
m_used_bitmap_pages.push_back(start_bmp);
found:
if (cnt)
{
m_resource_limit[0].m_curr += cnt;
m_resource_limit[0].m_max += cnt;
release(start, cnt);
if (start >= ZONE_LO_BOUND)
{
Uint64 mbytes = ((Uint64(cnt) * 32) + 1023) / 1024;
ndbout_c("Adding %uMb to ZONE_HI (%u,%u)", (Uint32)mbytes, start, cnt);
release(start, cnt);
}
else if (start + cnt <= ZONE_LO_BOUND)
{
Uint64 mbytes = ((Uint64(cnt)*32) + 1023) / 1024;
ndbout_c("Adding %uMb to ZONE_LO (%u,%u)", (Uint32)mbytes, start, cnt);
release(start, cnt);
}
else
{
Uint32 cnt0 = ZONE_LO_BOUND - start;
Uint32 cnt1 = start + cnt - ZONE_LO_BOUND;
Uint64 mbytes0 = ((Uint64(cnt0)*32) + 1023) / 1024;
Uint64 mbytes1 = ((Uint64(cnt1)*32) + 1023) / 1024;
ndbout_c("Adding %uMb to ZONE_LO (split %u,%u)", (Uint32)mbytes0,
start, cnt0);
ndbout_c("Adding %uMb to ZONE_HI (split %u,%u)", (Uint32)mbytes1,
ZONE_LO_BOUND, cnt1);
release(start, cnt0);
release(ZONE_LO_BOUND, cnt1);
}
}
}
@ -362,40 +465,58 @@ Ndbd_mem_manager::release(Uint32 start, Uint32 cnt)
set(start, start+cnt-1);
release_impl(start, cnt);
Uint32 zone = start < ZONE_LO_BOUND ? 0 : 1;
release_impl(zone, start, cnt);
}
void
Ndbd_mem_manager::release_impl(Uint32 start, Uint32 cnt)
Ndbd_mem_manager::release_impl(Uint32 zone, Uint32 start, Uint32 cnt)
{
assert(start);
Uint32 test = check(start-1, start+cnt);
if (test & 1)
if (start != ZONE_LO_BOUND && test & 1)
{
Free_page_data *fd = get_free_page_data(m_base_page + start - 1,
start - 1);
Uint32 sz = fd->m_size;
Uint32 left = start - sz;
remove_free_list(left, fd->m_list);
remove_free_list(zone, left, fd->m_list);
cnt += sz;
start = left;
}
Uint32 right = start + cnt;
if (test & 2)
if (right != ZONE_LO_BOUND && test & 2)
{
Free_page_data *fd = get_free_page_data(m_base_page+right, right);
Uint32 sz = fd->m_size;
remove_free_list(right, fd->m_list);
remove_free_list(zone, right, fd->m_list);
cnt += sz;
}
insert_free_list(start, cnt);
insert_free_list(zone, start, cnt);
}
void
Ndbd_mem_manager::alloc(Uint32* ret, Uint32 *pages, Uint32 min)
Ndbd_mem_manager::alloc(AllocZone zone,
Uint32* ret, Uint32 *pages, Uint32 min)
{
if (zone == NDB_ZONE_ANY)
{
Uint32 save = * pages;
alloc_impl(ZONE_HI, ret, pages, min);
if (*pages)
return;
* pages = save;
}
alloc_impl(ZONE_LO, ret, pages, min);
}
void
Ndbd_mem_manager::alloc_impl(Uint32 zone,
Uint32* ret, Uint32 *pages, Uint32 min)
{
Int32 i;
Uint32 start;
@ -407,19 +528,19 @@ Ndbd_mem_manager::alloc(Uint32* ret, Uint32 *pages, Uint32 min)
for (i = list; i < 16; i++)
{
if ((start = m_buddy_lists[i]))
if ((start = m_buddy_lists[zone][i]))
{
/* ---------------------------------------------------------------- */
/* PROPER AMOUNT OF PAGES WERE FOUND. NOW SPLIT THE FOUND */
/* AREA AND RETURN THE PART NOT NEEDED. */
/* ---------------------------------------------------------------- */
Uint32 sz = remove_free_list(start, i);
Uint32 sz = remove_free_list(zone, start, i);
Uint32 extra = sz - cnt;
assert(sz >= cnt);
if (extra)
{
insert_free_list(start + cnt, extra);
insert_free_list(zone, start + cnt, extra);
clear_and_set(start, start+cnt-1);
}
else
@ -427,8 +548,7 @@ Ndbd_mem_manager::alloc(Uint32* ret, Uint32 *pages, Uint32 min)
clear(start, start+cnt-1);
}
* ret = start;
m_resource_limit[0].m_curr += cnt;
assert(m_resource_limit[0].m_curr <= m_resource_limit[0].m_max);
assert(m_resource_limit[0].m_curr + cnt <= m_resource_limit[0].m_max);
return;
}
}
@ -442,13 +562,13 @@ Ndbd_mem_manager::alloc(Uint32* ret, Uint32 *pages, Uint32 min)
assert((Int32)list >= min_list);
for (i = list - 1; i >= min_list; i--)
{
if ((start = m_buddy_lists[i]))
if ((start = m_buddy_lists[zone][i]))
{
Uint32 sz = remove_free_list(start, i);
Uint32 sz = remove_free_list(zone, start, i);
Uint32 extra = sz - cnt;
if (sz > cnt)
{
insert_free_list(start + cnt, extra);
insert_free_list(zone, start + cnt, extra);
sz -= extra;
clear_and_set(start, start+sz-1);
}
@ -459,8 +579,7 @@ Ndbd_mem_manager::alloc(Uint32* ret, Uint32 *pages, Uint32 min)
* ret = start;
* pages = sz;
m_resource_limit[0].m_curr += sz;
assert(m_resource_limit[0].m_curr <= m_resource_limit[0].m_max);
assert(m_resource_limit[0].m_curr + sz <= m_resource_limit[0].m_max);
return;
}
}
@ -468,12 +587,12 @@ Ndbd_mem_manager::alloc(Uint32* ret, Uint32 *pages, Uint32 min)
}
void
Ndbd_mem_manager::insert_free_list(Uint32 start, Uint32 size)
Ndbd_mem_manager::insert_free_list(Uint32 zone, Uint32 start, Uint32 size)
{
Uint32 list = ndb_log2(size) - 1;
Uint32 last = start + size - 1;
Uint32 head = m_buddy_lists[list];
Uint32 head = m_buddy_lists[zone][list];
Free_page_data* fd_first = get_free_page_data(m_base_page+start,
start);
fd_first->m_list = list;
@ -495,11 +614,11 @@ Ndbd_mem_manager::insert_free_list(Uint32 start, Uint32 size)
fd->m_prev = start;
}
m_buddy_lists[list] = start;
m_buddy_lists[zone][list] = start;
}
Uint32
Ndbd_mem_manager::remove_free_list(Uint32 start, Uint32 list)
Ndbd_mem_manager::remove_free_list(Uint32 zone, Uint32 start, Uint32 list)
{
Free_page_data* fd = get_free_page_data(m_base_page+start, start);
Uint32 size = fd->m_size;
@ -509,7 +628,7 @@ Ndbd_mem_manager::remove_free_list(Uint32 start, Uint32 list)
if (prev)
{
assert(m_buddy_lists[list] != start);
assert(m_buddy_lists[zone][list] != start);
fd = get_free_page_data(m_base_page+prev, prev);
assert(fd->m_next == start);
assert(fd->m_list == list);
@ -517,8 +636,8 @@ Ndbd_mem_manager::remove_free_list(Uint32 start, Uint32 list)
}
else
{
assert(m_buddy_lists[list] == start);
m_buddy_lists[list] = next;
assert(m_buddy_lists[zone][list] == start);
m_buddy_lists[zone][list] = next;
}
if (next)
@ -535,42 +654,62 @@ Ndbd_mem_manager::remove_free_list(Uint32 start, Uint32 list)
void
Ndbd_mem_manager::dump() const
{
for(Uint32 i = 0; i<16; i++)
for (Uint32 zone = 0; zone < 2; zone ++)
{
printf(" list: %d - ", i);
Uint32 head = m_buddy_lists[i];
while(head)
for (Uint32 i = 0; i<16; i++)
{
Free_page_data* fd = get_free_page_data(m_base_page+head, head);
printf("[ i: %d prev %d next %d list %d size %d ] ",
head, fd->m_prev, fd->m_next, fd->m_list, fd->m_size);
head = fd->m_next;
printf(" list: %d - ", i);
Uint32 head = m_buddy_lists[zone][i];
while(head)
{
Free_page_data* fd = get_free_page_data(m_base_page+head, head);
printf("[ i: %d prev %d next %d list %d size %d ] ",
head, fd->m_prev, fd->m_next, fd->m_list, fd->m_size);
head = fd->m_next;
}
printf("EOL\n");
}
for (Uint32 i = 0; i<XX_RL_COUNT; i++)
{
printf("ri: %d min: %d curr: %d max: %d\n",
i,
m_resource_limit[i].m_min,
m_resource_limit[i].m_curr,
m_resource_limit[i].m_max);
}
printf("EOL\n");
}
}
void*
Ndbd_mem_manager::alloc_page(Uint32 type, Uint32* i)
Ndbd_mem_manager::alloc_page(Uint32 type, Uint32* i, AllocZone zone)
{
Uint32 idx = type & RG_MASK;
assert(idx && idx < XX_RL_COUNT);
Resource_limit tot = m_resource_limit[0];
Resource_limit rl = m_resource_limit[idx];
Uint32 add = (rl.m_curr < rl.m_min) ? 0 : 1; // Over min ?
Uint32 cnt = 1;
Uint32 res0 = (rl.m_curr < rl.m_min) ? 1 : 0;
Uint32 limit = (rl.m_max == 0 || rl.m_curr < rl.m_max) ? 0 : 1; // Over limit
Uint32 free = (tot.m_min + tot.m_curr < tot.m_max) ? 1 : 0; // Has free
if (likely(add == 0 || (limit == 0 && free == 1)))
assert(tot.m_min >= res0);
if (likely(res0 == 1 || (limit == 0 && free == 1)))
{
Uint32 cnt = 1;
alloc(i, &cnt, 1);
assert(cnt);
m_resource_limit[0].m_curr = tot.m_curr + add;
m_resource_limit[idx].m_curr = rl.m_curr + 1;
return m_base_page + *i;
alloc(zone, i, &cnt, 1);
if (likely(cnt))
{
m_resource_limit[0].m_curr = tot.m_curr + cnt;
m_resource_limit[0].m_min = tot.m_min - res0;
m_resource_limit[idx].m_curr = rl.m_curr + cnt;
check_resource_limits(m_resource_limit);
return m_base_page + *i;
}
}
return 0;
}
@ -582,10 +721,102 @@ Ndbd_mem_manager::release_page(Uint32 type, Uint32 i)
Resource_limit tot = m_resource_limit[0];
Resource_limit rl = m_resource_limit[idx];
Uint32 sub = (rl.m_curr < rl.m_min) ? 0 : 1; // Over min ?
Uint32 sub = (rl.m_curr <= rl.m_min) ? 1 : 0; // Over min ?
release(i, 1);
m_resource_limit[0].m_curr = tot.m_curr - sub;
m_resource_limit[0].m_curr = tot.m_curr - 1;
m_resource_limit[0].m_min = tot.m_min + sub;
m_resource_limit[idx].m_curr = rl.m_curr - 1;
check_resource_limits(m_resource_limit);
}
void
Ndbd_mem_manager::alloc_pages(Uint32 type, Uint32* i, Uint32 *cnt, Uint32 min)
{
Uint32 idx = type & RG_MASK;
assert(idx && idx < XX_RL_COUNT);
Resource_limit tot = m_resource_limit[0];
Resource_limit rl = m_resource_limit[idx];
Uint32 req = *cnt;
Uint32 max = rl.m_max - rl.m_curr;
Uint32 res0 = rl.m_min - rl.m_curr;
Uint32 free_shared = tot.m_max - (tot.m_min + tot.m_curr);
Uint32 res1;
if (rl.m_curr + req <= rl.m_min)
{
// all is reserved...
res0 = req;
res1 = 0;
}
else
{
req = rl.m_max ? max : req;
res0 = (rl.m_curr > rl.m_min) ? 0 : res0;
res1 = req - res0;
if (unlikely(res1 > free_shared))
{
res1 = free_shared;
req = res0 + res1;
}
}
// req = pages to alloc
// res0 = portion that is reserved
// res1 = part that is over reserver
assert (res0 + res1 == req);
assert (tot.m_min >= res0);
if (likely(req))
{
// Hi order allocations can always use any zone
alloc(NDB_ZONE_ANY, i, &req, 1);
* cnt = req;
if (unlikely(req < res0)) // Got min than what was reserved :-(
{
res0 = req;
}
assert(tot.m_min >= res0);
assert(tot.m_curr + req <= tot.m_max);
m_resource_limit[0].m_curr = tot.m_curr + req;
m_resource_limit[0].m_min = tot.m_min - res0;
m_resource_limit[idx].m_curr = rl.m_curr + req;
check_resource_limits(m_resource_limit);
return ;
}
* cnt = req;
return;
}
void
Ndbd_mem_manager::release_pages(Uint32 type, Uint32 i, Uint32 cnt)
{
Uint32 idx = type & RG_MASK;
assert(idx && idx < XX_RL_COUNT);
Resource_limit tot = m_resource_limit[0];
Resource_limit rl = m_resource_limit[idx];
release(i, cnt);
Uint32 currnew = rl.m_curr - cnt;
if (rl.m_curr > rl.m_min)
{
if (currnew < rl.m_min)
{
m_resource_limit[0].m_min = tot.m_min + (rl.m_min - currnew);
}
}
else
{
m_resource_limit[0].m_min = tot.m_min + cnt;
}
m_resource_limit[0].m_curr = tot.m_curr - cnt;
m_resource_limit[idx].m_curr = currnew;
check_resource_limits(m_resource_limit);
}
#ifdef UNIT_TEST
@ -781,3 +1012,4 @@ main(int argc, char** argv)
template class Vector<Chunk>;
#endif
template class Vector<Uint32>;

View file

@ -20,6 +20,7 @@
#include <Bitmask.hpp>
#include <assert.h>
#include "Pool.hpp"
#include <Vector.hpp>
/**
* 13 -> 8192 words -> 32768 bytes
@ -59,16 +60,19 @@ public:
bool init(bool allow_alloc_less_than_requested = true);
void* get_memroot() const { return (void*)m_base_page;}
void alloc(Uint32* ret, Uint32 *pages, Uint32 min_requested);
void release(Uint32 start, Uint32 cnt);
void dump() const ;
void* alloc_page(Uint32 type, Uint32* i);
enum AllocZone
{
NDB_ZONE_LO = 0, // Only allocate with page_id < (1 << 13)
NDB_ZONE_ANY = 1 // Allocate with any page_id
};
void* alloc_page(Uint32 type, Uint32* i, enum AllocZone);
void release_page(Uint32 type, Uint32 i);
void* alloc_pages(Uint32 type, Uint32* i, Uint32 *cnt, Uint32 min = 1);
void release_pages(Uint32 type, Uint32 i, void*p, Uint32 cnt);
void alloc_pages(Uint32 type, Uint32* i, Uint32 *cnt, Uint32 min = 1);
void release_pages(Uint32 type, Uint32 i, Uint32 cnt);
/**
* Compute 2log of size
@ -80,25 +84,29 @@ public:
private:
void grow(Uint32 start, Uint32 cnt);
#define XX_RL_COUNT 3
#define XX_RL_COUNT 4
/**
* Return pointer to free page data on page
*/
static Free_page_data* get_free_page_data(Alloc_page*, Uint32 idx);
Bitmask<1> m_used_bitmap_pages;
Vector<Uint32> m_used_bitmap_pages;
Uint32 m_buddy_lists[16];
Uint32 m_buddy_lists[2][16];
Resource_limit m_resource_limit[XX_RL_COUNT]; // RG_COUNT in record_types.hpp
Alloc_page * m_base_page;
void release_impl(Uint32 start, Uint32 cnt);
void insert_free_list(Uint32 start, Uint32 cnt);
Uint32 remove_free_list(Uint32 start, Uint32 list);
void release_impl(Uint32 zone, Uint32 start, Uint32 cnt);
void insert_free_list(Uint32 zone, Uint32 start, Uint32 cnt);
Uint32 remove_free_list(Uint32 zone, Uint32 start, Uint32 list);
void set(Uint32 first, Uint32 last);
void clear(Uint32 first, Uint32 last);
void clear_and_set(Uint32 first, Uint32 last);
Uint32 check(Uint32 first, Uint32 last);
void alloc(AllocZone, Uint32* ret, Uint32 *pages, Uint32 min_requested);
void alloc_impl(Uint32 zone, Uint32* ret, Uint32 *pages, Uint32 min);
void release(Uint32 start, Uint32 cnt);
};
inline