diff --git a/storage/ndb/src/kernel/vm/SuperPool.cpp b/storage/ndb/src/kernel/vm/SuperPool.cpp index 65e5dd99629..c61bfcc3541 100644 --- a/storage/ndb/src/kernel/vm/SuperPool.cpp +++ b/storage/ndb/src/kernel/vm/SuperPool.cpp @@ -17,28 +17,33 @@ #include #include "SuperPool.hpp" +#define SP_ALIGN(sz, al) (((sz) + (al) - 1) & ~((al) - 1)) + +// This is used for m_freeRecI when there is no record pool page. +#define NNIL 0xffffffff + // SuperPool SuperPool::SuperPool(Uint32 pageSize, Uint32 pageBits) : - m_pageSize(SP_ALIGN_SIZE(pageSize, SP_ALIGN)), + m_pageSize(pageSize), m_pageBits(pageBits), + m_recBits(32 - m_pageBits), + m_recMask((1 << m_recBits) - 1), m_memRoot(0), m_pageEnt(0), - m_typeCheck(0), - m_typeSeq(0), - m_pageList(), - m_totalSize(0), - m_initSize(0), - m_incrSize(0), - m_maxSize(0) + m_pageType(0), + m_freeList(), + m_initPages(0), + m_incrPages(0), + m_maxPages(0), + m_totPages(0), + m_typeCount(0), + m_groupMinPct(0), + m_groupMinPages(0), + m_groupTotPages(0) { - assert(5 <= pageBits <= 30); -} - -bool -SuperPool::init() -{ - return true; + assert(m_pageSize != 0 && (m_pageSize & (m_pageSize - 1)) == 0); + assert(m_pageBits <= 30); } SuperPool::~SuperPool() @@ -47,13 +52,15 @@ SuperPool::~SuperPool() SuperPool::PageEnt::PageEnt() : m_pageType(0), - m_freeRecI(RNIL), m_useCount(0), + m_freeRecI(NNIL), m_nextPageI(RNIL), m_prevPageI(RNIL) { } +// page list routines + SuperPool::PageList::PageList() : m_headPageI(RNIL), m_tailPageI(RNIL), @@ -66,55 +73,29 @@ SuperPool::PageList::PageList(PtrI pageI) : m_tailPageI(pageI), m_pageCount(1) { -} - -SuperPool::RecInfo::RecInfo(Uint32 recType, Uint32 recSize) : - m_recType(recType), - m_recSize(recSize), - m_maxUseCount(0), - m_currPageI(RNIL), - m_currFreeRecI(RNIL), - m_currUseCount(0), - m_totalUseCount(0), - m_totalRecCount(0), - m_freeList(), - m_activeList(), - m_fullList() -{ -} - -SuperPool::PtrI -SuperPool::getPageI(void* pageP) -{ - const Uint32 pageSize = m_pageSize; - const Uint32 pageBits = m_pageBits; - const Uint32 recBits = 32 - pageBits; - void* const memRoot = m_memRoot; - assert(pageP == SP_ALIGN_PTR(pageP, memRoot, pageSize)); - my_ptrdiff_t ipL = ((Uint8*)pageP - (Uint8*)memRoot) / pageSize; - Int32 ip = (Int32)ipL; - Int32 lim = 1 << (pageBits - 1); - assert(ip == ipL && -lim <= ip && ip < lim && ip != -1); - PtrI pageI = ip << recBits; - assert(pageP == getPageP(pageI)); - return pageI; + assert(pageI != RNIL); } void SuperPool::movePages(PageList& pl1, PageList& pl2) { - const Uint32 recBits = 32 - m_pageBits; + PtrI pageI1 = pl1.m_tailPageI; + PtrI pageI2 = pl2.m_headPageI; if (pl1.m_pageCount != 0) { + assert(pageI1 != RNIL); if (pl2.m_pageCount != 0) { - PtrI pageI1 = pl1.m_tailPageI; - PtrI pageI2 = pl2.m_headPageI; + assert(pageI2 != RNIL); PageEnt& pe1 = getPageEnt(pageI1); PageEnt& pe2 = getPageEnt(pageI2); pe1.m_nextPageI = pageI2; pe2.m_prevPageI = pageI1; + pl1.m_tailPageI = pl2.m_tailPageI; pl1.m_pageCount += pl2.m_pageCount; + } else { + assert(pageI2 == RNIL); } } else { + assert(pageI1 == RNIL); pl1 = pl2; } pl2.m_headPageI = pl2.m_tailPageI = RNIL; @@ -124,6 +105,9 @@ SuperPool::movePages(PageList& pl1, PageList& pl2) void SuperPool::addHeadPage(PageList& pl, PtrI pageI) { + assert(pageI != RNIL); + PageEnt& pe = getPageEnt(pageI); + assert(pe.m_nextPageI == RNIL & pe.m_prevPageI == RNIL); PageList pl2(pageI); movePages(pl2, pl); pl = pl2; @@ -132,6 +116,9 @@ SuperPool::addHeadPage(PageList& pl, PtrI pageI) void SuperPool::addTailPage(PageList& pl, PtrI pageI) { + assert(pageI != RNIL); + PageEnt& pe = getPageEnt(pageI); + assert(pe.m_nextPageI == RNIL & pe.m_prevPageI == RNIL); PageList pl2(pageI); movePages(pl, pl2); } @@ -139,81 +126,187 @@ SuperPool::addTailPage(PageList& pl, PtrI pageI) void SuperPool::removePage(PageList& pl, PtrI pageI) { + assert(pageI != RNIL); PageEnt& pe = getPageEnt(pageI); - PtrI pageI1 = pe.m_prevPageI; - PtrI pageI2 = pe.m_nextPageI; - if (pageI1 != RNIL) { - PageEnt& pe1 = getPageEnt(pageI1); - pe1.m_nextPageI = pageI2; - if (pageI2 != RNIL) { - PageEnt& pe2 = getPageEnt(pageI2); - pe2.m_prevPageI = pageI1; - } else { - pl.m_tailPageI = pageI1; - } + if (pe.m_nextPageI != RNIL) { + assert(pl.m_tailPageI != pageI); + PageEnt& nextPe = getPageEnt(pe.m_nextPageI); + nextPe.m_prevPageI = pe.m_prevPageI; } else { - if (pageI2 != RNIL) { - PageEnt& pe2 = getPageEnt(pageI2); - pe2.m_prevPageI = pageI1; - pl.m_headPageI = pageI2; - } else { - pl.m_headPageI = pl.m_tailPageI = RNIL; - } + assert(pl.m_tailPageI == pageI); + pl.m_tailPageI = pe.m_prevPageI; } - pe.m_prevPageI = pe.m_nextPageI = RNIL; + if (pe.m_prevPageI != RNIL) { + assert(pl.m_headPageI != pageI); + PageEnt& prevPe = getPageEnt(pe.m_prevPageI); + prevPe.m_nextPageI = pe.m_nextPageI; + } else { + assert(pl.m_headPageI == pageI); + pl.m_headPageI = pe.m_nextPageI; + } + pe.m_nextPageI = RNIL; + pe.m_prevPageI = RNIL; assert(pl.m_pageCount != 0); pl.m_pageCount--; } -void -SuperPool::setCurrPage(RecInfo& ri, PtrI newPageI) +// reverse mapping + +SuperPool::PtrI +SuperPool::getPageI(void* pageP) { - PtrI oldPageI = ri.m_currPageI; - if (oldPageI != RNIL) { - // copy from cache - PageEnt& pe = getPageEnt(oldPageI); - pe.m_freeRecI = ri.m_currFreeRecI; - pe.m_useCount = ri.m_currUseCount; - // add to right list according to "pp2" policy - if (pe.m_useCount == 0) { - pe.m_pageType = 0; - addHeadPage(m_pageList, oldPageI); - ri.m_totalRecCount -= ri.m_maxUseCount; - } else if (pe.m_useCount < ri.m_maxUseCount) { - addHeadPage(ri.m_activeList, oldPageI); - } else { - addHeadPage(ri.m_fullList, oldPageI); - } + Uint32 pageSize = m_pageSize; + Uint32 pageBits = m_pageBits; + Uint32 recBits = m_recBits; + void* memRoot = m_memRoot; + my_ptrdiff_t ipL = (Uint8*)pageP - (Uint8*)memRoot; + assert(ipL % pageSize == 0); + ipL /= (Int32)pageSize; + Int32 ip = (Int32)ipL; + Int32 lim = 1 << (pageBits - 1); + if (! (ip == ipL && -lim <= ip && ip < lim && ip != -1)) { + // page was too distant from memory root + return RNIL; } - if (newPageI != RNIL) { - PageEnt& pe = getPageEnt(newPageI); - // copy to cache - ri.m_currPageI = newPageI; - ri.m_currFreeRecI = pe.m_freeRecI; - ri.m_currUseCount = pe.m_useCount; - // remove from right list - if (pe.m_useCount == 0) { - removePage(ri.m_freeList, newPageI); - } else if (pe.m_useCount < ri.m_maxUseCount) { - removePage(ri.m_activeList, newPageI); - } else { - removePage(ri.m_fullList, newPageI); - } - } else { - ri.m_currPageI = RNIL; - ri.m_currFreeRecI = RNIL; - ri.m_currUseCount = 0; + PtrI pageI = ip << recBits; + assert(pageP == getPageP(pageI)); + return pageI; +} + +// record pool + +SuperPool::RecInfo::RecInfo(GroupPool& gp, Uint32 recSize) : + m_groupPool(gp), + m_recSize(recSize), + m_recType(0), + m_maxPerPage(0), + m_freeRecI(NNIL), + m_useCount(0), + m_pageList(), + m_hyX(1), + m_hyY(2) +{ + SuperPool& sp = gp.m_superPool; + m_recType = (sp.m_typeCount++ << 1) | 1; + assert(m_recSize == SP_ALIGN(m_recSize, sizeof(Uint32))); + { // compute max records per page + Uint32 n1 = sp.m_pageSize / m_recSize; + Uint32 b2 = (sp.m_recBits < 16 ? sp.m_recBits : 16); + Uint32 n2 = (1 << b2) - 1; // last is reserved + m_maxPerPage = (n1 < n2 ? n1 : n2); + assert(m_maxPerPage != 0); } } +Uint32 +SuperPool::getFreeCount(RecInfo& ri, PtrI recI) +{ + Uint32 n = 0; + Uint32 recMask = m_recMask; + Uint32 loopRecI = recI; + while ((loopRecI & recMask) != recMask) { + n++; + void* loopRecP = getRecP(loopRecI, ri); + loopRecI = *(Uint32*)loopRecP; + } + assert(n == (Uint16)n); + return n; +} + +Uint32 +SuperPool::getRecPageCount(RecInfo& ri) +{ + Uint32 n = 0; + for (Uint32 k = 0; k <= 2; k++) + n += ri.m_pageList[k].m_pageCount; + if (ri.m_freeRecI != NNIL) + n += 1; + return n; +} + +Uint32 +SuperPool::getRecTotCount(RecInfo& ri) +{ + return ri.m_maxPerPage * getRecPageCount(ri); +} + +Uint32 +SuperPool::getRecUseCount(RecInfo& ri) +{ + Uint32 n = ri.m_useCount; + // current page does not keep count + if (ri.m_freeRecI != NNIL) { + Uint32 maxPerPage = ri.m_maxPerPage; + Uint32 freeCount = getFreeCount(ri, ri.m_freeRecI); + assert(maxPerPage >= freeCount); + n += maxPerPage - freeCount; + } + return n; +} + +// current page + +Uint32 +SuperPool::getRecPageList(RecInfo& ri, PageEnt& pe) +{ + if (pe.m_useCount == 0) + return 0; + if (pe.m_useCount < ri.m_maxPerPage) + return 1; + if (pe.m_useCount == ri.m_maxPerPage) + return 2; + assert(false); + return ~(Uint32)0; +} + +void +SuperPool::addCurrPage(RecInfo& ri, PtrI pageI) +{ + PageEnt& pe = getPageEnt(pageI); + ri.m_freeRecI = pe.m_freeRecI; + // remove from right list + Uint32 k = getRecPageList(ri, pe); + assert(k != 2); + removePage(ri.m_pageList[k], pageI); + assert(ri.m_useCount >= pe.m_useCount); + ri.m_useCount -= pe.m_useCount; +} + +void +SuperPool::removeCurrPage(RecInfo& ri) +{ + Uint32 recMask = m_recMask; + PtrI pageI = ri.m_freeRecI & ~ m_recMask; + // update page entry + PageEnt& pe = getPageEnt(pageI); + pe.m_freeRecI = ri.m_freeRecI; + Uint32 maxPerPage = ri.m_maxPerPage; + Uint32 freeCount = getFreeCount(ri, pe.m_freeRecI); + assert(maxPerPage >= freeCount); + pe.m_useCount = maxPerPage - freeCount; + // add to right list + Uint32 k = getRecPageList(ri, pe); + addHeadPage(ri.m_pageList[k], pageI); + ri.m_useCount += pe.m_useCount; + ri.m_freeRecI = NNIL; + if (k == 0) { + freeRecPages(ri); + } +} + +// page allocation + bool SuperPool::getAvailPage(RecInfo& ri) { PtrI pageI; - if ((pageI = ri.m_activeList.m_headPageI) != RNIL || - (pageI = ri.m_freeList.m_headPageI) != RNIL || + if ((pageI = ri.m_pageList[1].m_headPageI) != RNIL || + (pageI = ri.m_pageList[0].m_headPageI) != RNIL || (pageI = getFreePage(ri)) != RNIL) { - setCurrPage(ri, pageI); + // the page is in record pool now + if (ri.m_freeRecI != NNIL) + removeCurrPage(ri); + addCurrPage(ri, pageI); return true; } return false; @@ -222,121 +315,310 @@ SuperPool::getAvailPage(RecInfo& ri) SuperPool::PtrI SuperPool::getFreePage(RecInfo& ri) { + GroupPool& gp = ri.m_groupPool; PtrI pageI; - if (m_pageList.m_pageCount != 0) { - pageI = m_pageList.m_headPageI; - removePage(m_pageList, pageI); - } else { - pageI = getNewPage(); - if (pageI == RNIL) - return RNIL; + if ((pageI = getFreePage(gp)) != RNIL) { + initFreePage(ri, pageI); + addHeadPage(ri.m_pageList[0], pageI); + return pageI; } + return RNIL; +} + +SuperPool::PtrI +SuperPool::getFreePage(GroupPool& gp) +{ + PtrI pageI; + if ((pageI = gp.m_freeList.m_headPageI) != RNIL) { + removePage(gp.m_freeList, pageI); + return pageI; + } + if (gp.m_totPages < getMaxPages(gp) && + (pageI = getFreePage()) != RNIL) { + gp.m_totPages++; + return pageI; + } + return RNIL; +} + +SuperPool::PtrI +SuperPool::getFreePage() +{ + PtrI pageI; + if ((pageI = m_freeList.m_headPageI) != RNIL) { + removePage(m_freeList, pageI); + return pageI; + } + if ((pageI = getNewPage()) != RNIL) { + return pageI; + } + return RNIL; +} + +void +SuperPool::initFreePage(RecInfo& ri, PtrI pageI) +{ void* pageP = getPageP(pageI); // set up free record list - Uint32 maxUseCount = ri.m_maxUseCount; + Uint32 num = ri.m_maxPerPage; Uint32 recSize = ri.m_recSize; void* recP = (Uint8*)pageP; Uint32 irNext = 1; - while (irNext < maxUseCount) { + while (irNext < num) { *(Uint32*)recP = pageI | irNext; recP = (Uint8*)recP + recSize; irNext++; } - *(Uint32*)recP = RNIL; - // add to total record count - ri.m_totalRecCount += maxUseCount; + // terminator has all recBits set + *(Uint32*)recP = pageI | m_recMask; // set up new page entry PageEnt& pe = getPageEnt(pageI); new (&pe) PageEnt(); pe.m_pageType = ri.m_recType; pe.m_freeRecI = pageI | 0; pe.m_useCount = 0; - // set type check bits - setCheckBits(pageI, ri.m_recType); - // add to record pool free list - addHeadPage(ri.m_freeList, pageI); - return pageI; + // set type check byte + Uint32 ip = pageI >> m_recBits; + m_pageType[ip] = (ri.m_recType & 0xFF); +} + +// release + +void +SuperPool::releaseNotCurrent(RecInfo& ri, PtrI recI) +{ + PageEnt& pe = getPageEnt(recI); + void* recP = getRecP(recI, ri); + *(Uint32*)recP = pe.m_freeRecI; + pe.m_freeRecI = recI; + PtrI pageI = recI & ~ m_recMask; + Uint32 maxPerPage = ri.m_maxPerPage; + // move to right list + Uint32 k1 = getRecPageList(ri, pe); + assert(pe.m_useCount != 0); + pe.m_useCount--; + Uint32 k2 = getRecPageList(ri, pe); + if (k1 != k2) { + removePage(ri.m_pageList[k1], pageI); + addHeadPage(ri.m_pageList[k2], pageI); + if (k2 == 0) { + freeRecPages(ri); + } + } + assert(ri.m_useCount != 0); + ri.m_useCount--; } void -SuperPool::setSizes(size_t initSize, size_t incrSize, size_t maxSize) +SuperPool::freeRecPages(RecInfo& ri) { - const Uint32 pageSize = m_pageSize; - m_initSize = SP_ALIGN_SIZE(initSize, pageSize); - m_incrSize = SP_ALIGN_SIZE(incrSize, pageSize); - m_maxSize = SP_ALIGN_SIZE(maxSize, pageSize); + // ignore current page + Uint32 useCount = ri.m_useCount; + Uint32 totCount = 0; + for (uint32 k = 0; k <= 2; k++) + totCount += ri.m_pageList[k].m_pageCount; + totCount *= ri.m_maxPerPage; + assert(totCount >= useCount); + if ((totCount - useCount) * ri.m_hyY < useCount * ri.m_hyX) + return; + // free all free pages + GroupPool& gp = ri.m_groupPool; + Uint32 minPages = getMinPages(gp); + PageList& pl = ri.m_pageList[0]; + while (pl.m_pageCount != 0) { + PtrI pageI = pl.m_headPageI; + removePage(pl, pageI); + PageEnt& pe = getPageEnt(pageI); + pe.m_pageType = 0; + pe.m_freeRecI = NNIL; + Uint32 ip = pageI >> m_recBits; + m_pageType[ip] = 0; + if (gp.m_totPages <= minPages) { + addHeadPage(gp.m_freeList, pageI); + } else { + // return excess to super pool + addHeadPage(m_freeList, pageI); + assert(gp.m_totPages != 0); + gp.m_totPages--; + } + } } +void +SuperPool::freeAllRecPages(RecInfo& ri, bool force) +{ + GroupPool& gp = ri.m_groupPool; + if (ri.m_freeRecI != NNIL) + removeCurrPage(ri); + assert(force || ri.m_useCount == 0); + for (Uint32 k = 0; k <= 2; k++) + movePages(gp.m_freeList, ri.m_pageList[k]); +} + +// size parameters + +void +SuperPool::setInitPages(Uint32 initPages) +{ + m_initPages = initPages; +} + +void +SuperPool::setIncrPages(Uint32 incrPages) +{ + m_incrPages = incrPages; +} + +void +SuperPool::setMaxPages(Uint32 maxPages) +{ + m_maxPages = maxPages; +} + +Uint32 +SuperPool::getGpMinPages() +{ + Uint32 minPages = (m_groupMinPct * m_totPages) / 100; + if (minPages < m_groupMinPages) + minPages = m_groupMinPages; + return minPages; +} + +Uint32 +SuperPool::getMinPages(GroupPool& gp) +{ + Uint32 minPages = (gp.m_minPct * m_totPages) / 100; + if (minPages < gp.m_minPages) + minPages = gp.m_minPages; + return minPages; +} + +Uint32 +SuperPool::getMaxPages(GroupPool& gp) +{ + Uint32 n1 = getGpMinPages(); + Uint32 n2 = getMinPages(gp); + assert(n1 >= n2); + // pages reserved by other groups + Uint32 n3 = n1 - n2; + // rest can be claimed + Uint32 n4 = (m_totPages >= n3 ? m_totPages - n3 : 0); + return n4; +} + +// debug + void SuperPool::verify(RecInfo& ri) { - PageList* plList[3] = { &ri.m_freeList, &ri.m_activeList, &ri.m_fullList }; - for (int i = 0; i < 3; i++) { - PageList& pl = *plList[i]; - unsigned count = 0; + GroupPool& gp = ri.m_groupPool; + verifyPageList(m_freeList); + verifyPageList(gp.m_freeList); + for (Uint32 k = 0; k <= 2; k++) { + PageList& pl = ri.m_pageList[k]; + verifyPageList(pl); PtrI pageI = pl.m_headPageI; while (pageI != RNIL) { PageEnt& pe = getPageEnt(pageI); - PtrI pageI1 = pe.m_prevPageI; - PtrI pageI2 = pe.m_nextPageI; - if (count == 0) { - assert(pageI1 == RNIL); - } else { - assert(pageI1 != RNIL); - PageEnt& pe1 = getPageEnt(pageI1); - assert(pe1.m_nextPageI == pageI); - if (pageI2 != RNIL) { - PageEnt& pe2 = getPageEnt(pageI2); - assert(pe2.m_prevPageI == pageI); - } - } - pageI = pageI2; - count++; + assert(pe.m_pageType == ri.m_recType); + Uint32 maxPerPage = ri.m_maxPerPage; + Uint32 freeCount = getFreeCount(ri, pe.m_freeRecI); + assert(maxPerPage >= freeCount); + Uint32 useCount = maxPerPage - freeCount; + assert(pe.m_useCount == useCount); + assert(k != 0 || useCount == 0); + assert(k != 1 || (useCount != 0 && freeCount != 0)); + assert(k != 2 || freeCount == 0); + pageI = pe.m_nextPageI; } - assert(pl.m_pageCount == count); } } +void +SuperPool::verifyPageList(PageList& pl) +{ + Uint32 count = 0; + PtrI pageI = pl.m_headPageI; + while (pageI != RNIL) { + PageEnt& pe = getPageEnt(pageI); + if (pe.m_prevPageI == RNIL) { + assert(count == 0); + } else { + PageEnt& prevPe = getPageEnt(pe.m_prevPageI); + assert(prevPe.m_nextPageI == pageI); + } + if (pe.m_nextPageI == RNIL) { + assert(pl.m_tailPageI == pageI); + } else { + PageEnt& nextPe = getPageEnt(pe.m_nextPageI); + assert(nextPe.m_prevPageI == pageI); + } + if (pe.m_pageType != 0) { + assert(pe.m_freeRecI != NNIL); + PageEnt& pe2 = getPageEnt(pe.m_freeRecI); + assert(&pe == &pe2); + } else { + assert(pe.m_freeRecI == NNIL); + } + pageI = pe.m_nextPageI; + count++; + } + assert(pl.m_pageCount == count); +} + +// GroupPool + +GroupPool::GroupPool(SuperPool& sp) : + m_superPool(sp), + m_minPct(0), + m_minPages(0), + m_totPages(0), + m_freeList() +{ +} + +GroupPool::~GroupPool() +{ +} + +void +GroupPool::setMinPct(Uint32 minPct) +{ + SuperPool& sp = m_superPool; + // subtract any previous value + assert(sp.m_groupMinPct >= m_minPct); + sp.m_groupMinPct -= m_minPct; + // add new value + sp.m_groupMinPct += minPct; + m_minPct = minPct; +} + +void +GroupPool::setMinPages(Uint32 minPages) +{ + SuperPool& sp = m_superPool; + // subtract any previous value + assert(sp.m_groupMinPages >= m_minPages); + sp.m_groupMinPages -= m_minPages; + // add new value + sp.m_groupMinPages += minPages; + m_minPages = minPages; +} + // HeapPool HeapPool::HeapPool(Uint32 pageSize, Uint32 pageBits) : SuperPool(pageSize, pageBits), m_areaHead(), m_currArea(&m_areaHead), - m_lastArea(&m_areaHead), - m_mallocPart(4) + m_lastArea(&m_areaHead) { } -bool -HeapPool::init() -{ - const Uint32 pageBits = m_pageBits; - if (! SuperPool::init()) - return false;; - // allocate page entry array - Uint32 peBytes = (1 << pageBits) * sizeof(PageEnt); - m_pageEnt = static_cast(malloc(peBytes)); - if (m_pageEnt == 0) - return false; - memset(m_pageEnt, 0, peBytes); - // allocate type check array - Uint32 tcWords = 1 << (pageBits - (5 - SP_CHECK_LOG2)); - m_typeCheck = static_cast(malloc(tcWords << 2)); - if (m_typeCheck == 0) - return false; - memset(m_typeCheck, 0, tcWords << 2); - // allocate initial data - assert(m_totalSize == 0); - if (! allocMoreData(m_initSize)) - return false; - return true; -} - HeapPool::~HeapPool() { free(m_pageEnt); - free(m_typeCheck); + free(m_pageType); Area* ap; while ((ap = m_areaHead.m_nextArea) != 0) { m_areaHead.m_nextArea = ap->m_nextArea; @@ -349,29 +631,28 @@ HeapPool::Area::Area() : m_nextArea(0), m_firstPageI(RNIL), m_currPage(0), - m_numPages(0), - m_memory(0) + m_memory(0), + m_pages(0), + m_numPages(0) { } SuperPool::PtrI HeapPool::getNewPage() { - const Uint32 pageSize = m_pageSize; - const Uint32 pageBits = m_pageBits; - const Uint32 recBits= 32 - pageBits; Area* ap = m_currArea; if (ap->m_currPage == ap->m_numPages) { // area is used up if (ap->m_nextArea == 0) { - // todo dynamic increase - assert(m_incrSize == 0); - return RNIL; + if (! allocMemory()) + return RNIL; } ap = m_currArea = ap->m_nextArea; + assert(ap != 0); } assert(ap->m_currPage < ap->m_numPages); PtrI pageI = ap->m_firstPageI; + Uint32 recBits = m_recBits; Int32 ip = (Int32)pageI >> recBits; ip += ap->m_currPage; pageI = ip << recBits; @@ -380,63 +661,90 @@ HeapPool::getNewPage() } bool -HeapPool::allocMoreData(size_t size) +HeapPool::allocInit() { - const Uint32 pageSize = m_pageSize; - const Uint32 pageBits = m_pageBits; - const Uint32 recBits = 32 - pageBits; - const Uint32 incrSize = m_incrSize; - const Uint32 incrPages = incrSize / pageSize; - const Uint32 mallocPart = m_mallocPart; - size = SP_ALIGN_SIZE(size, pageSize); - if (incrSize != 0) - size = SP_ALIGN_SIZE(size, incrSize); - Uint32 needPages = size / pageSize; - while (needPages != 0) { - Uint32 wantPages = needPages; - if (incrPages != 0 && wantPages > incrPages) - wantPages = incrPages; - Uint32 tryPages = 0; - void* p1 = 0; - for (Uint32 i = mallocPart; i > 0 && p1 == 0; i--) { - // one page is usually wasted due to alignment to memory root - tryPages = ((wantPages + 1) * i) / mallocPart; - if (tryPages < 2) - break; - p1 = malloc(pageSize * tryPages); - } - if (p1 == 0) + Uint32 pageCount = (1 << m_pageBits); + if (m_pageEnt == 0) { + // allocate page entry array + Uint32 bytes = pageCount * sizeof(PageEnt); + m_pageEnt = static_cast(malloc(bytes)); + if (m_pageEnt == 0) return false; - if (m_memRoot == 0) { - // set memory root at first "big" alloc - // assume malloc header makes later ip = -1 impossible - m_memRoot = p1; - } - void* p2 = SP_ALIGN_PTR(p1, m_memRoot, pageSize); - Uint32 numPages = tryPages - (p1 != p2); - my_ptrdiff_t ipL = ((Uint8*)p2 - (Uint8*)m_memRoot) / pageSize; - Int32 ip = (Int32)ipL; - Int32 lim = 1 << (pageBits - 1); - if (! (ip == ipL && -lim <= ip && ip + numPages < lim)) { - free(p1); + for (Uint32 i = 0; i < pageCount; i++) + new (&m_pageEnt[i]) PageEnt(); + } + if (m_pageType == 0) { + // allocate type check array + Uint32 bytes = pageCount; + m_pageType = static_cast(malloc(bytes)); + if (m_pageType == 0) return false; - } - assert(ip != -1); - PtrI pageI = ip << recBits; - needPages = (needPages >= numPages ? needPages - numPages : 0); - m_totalSize += numPages * pageSize; - // allocate new area - Area* ap = static_cast(malloc(sizeof(Area))); - if (ap == 0) { - free(p1); - return false; - } - new (ap) Area(); - ap->m_firstPageI = pageI; - ap->m_numPages = numPages; - ap->m_memory = p1; - m_lastArea->m_nextArea = ap; - m_lastArea = ap; + memset(m_pageType, 0, bytes); + } + return true; +} + +bool +HeapPool::allocArea(Area* ap, Uint32 tryPages) +{ + Uint32 pageSize = m_pageSize; + // one page is usually lost due to alignment + Uint8* p1 = (Uint8*)malloc(pageSize * (tryPages + 1)); + if (p1 == 0) + return false; + // align + UintPtr n1 = (UintPtr)p1; + UintPtr n2 = SP_ALIGN(n1, (UintPtr)pageSize); + Uint8* p2 = p1 + (n2 - n1); + assert(p2 >= p1 && p2 - p1 < pageSize && (UintPtr)p2 % pageSize == 0); + // set memory root to first allocated page + if (m_memRoot == 0) + m_memRoot = p2; + // convert to i-value + Uint32 pageI = getPageI(p2); + ap->m_firstPageI = pageI; + ap->m_currPage = 0; + ap->m_memory = p1; + ap->m_pages = p2; + ap->m_numPages = tryPages + (p1 == p2); + return true; +} + +bool +HeapPool::allocMemory() +{ + if (! allocInit()) + return false; + // compute number of additional pages needed + if (m_maxPages <= m_totPages) + return false; + Uint32 needPages = (m_totPages == 0 ? m_initPages : m_incrPages); + if (needPages > m_maxPages - m_totPages) + needPages = m_maxPages - m_totPages; + while (needPages != 0) { + // add new area + Area* ap = static_cast(malloc(sizeof(Area))); + if (ap == 0) + return false; + new (ap) Area(); + m_lastArea->m_nextArea = ap; + m_lastArea = ap; + // initial malloc is done in m_incrPages pieces + Uint32 wantPages = needPages; + if (m_incrPages != 0 && wantPages > m_incrPages) + wantPages = m_incrPages; + Uint32 tryPages = wantPages; + while (tryPages != 0) { + if (allocArea(ap, tryPages)) + break; + tryPages /= 2; + } + if (tryPages == 0) + return false; + // update counts + Uint32 numPages = ap->m_numPages; + m_totPages += numPages; + needPages = (needPages > numPages ? needPages - numPages : 0); } return true; } diff --git a/storage/ndb/src/kernel/vm/SuperPool.hpp b/storage/ndb/src/kernel/vm/SuperPool.hpp index 157c75aa0d5..187383f5f71 100644 --- a/storage/ndb/src/kernel/vm/SuperPool.hpp +++ b/storage/ndb/src/kernel/vm/SuperPool.hpp @@ -22,20 +22,18 @@ #include #include -#define NDB_SP_VERIFY_LEVEL 1 - /* * SuperPool - super pool for record pools (abstract class) * - * Documents SuperPool and RecordPool. + * Documents: SuperPool GroupPool RecordPool * - * GENERAL + * SUPER POOL * * A "super pool" is a shared pool of pages of fixed size. A "record * pool" is a pool of records of fixed size. One super pool instance is - * used by any number of record pools to allocate their memory. - * A special case is a "page pool" where a record is a simple page, - * possibly smaller than super pool page. + * used by a number of record pools to allocate their memory. A special + * case is a "page pool" where a record is a simple page whose size + * divides super pool page size. * * A record pool allocates memory in pages. Thus each used page is * associated with one record pool and one record type. The records on @@ -49,27 +47,26 @@ * record is stored as an "i-value" from which the record pointer "p" is * computed. In super pool the i-value is a Uint32 with two parts: * - * - "ip" index of page within super pool (high pageBits) - * - "ir" index of record within page (low recBits) + * - "ip" index of page within super pool (high "pageBits") + * - "ir" index of record within page (low "recBits") + * + * At most 16 recBits are used, the rest are zero. * * The translation between "ip" and page address is described in next * section. Once page address is known, the record address is found * from "ir" in the obvious way. * - * The main advantage with i-value is that it can be verified. The - * level of verification depends on compile type (release, debug). + * One advantage of i-value is that it can be verified. The level of + * verification can depend on compile options. * - * - "v0" minimal sanity check - * - "v1" check record type matches page type, see below - * - "v2" check record is in use (not yet implemented) + * - "v1" check i-value specifies valid page + * - "v2" check record type matches page type, see below + * - "v3" check record is in use + * - "v4" check unused record is unmodified * * Another advantage of a 32-bit i-value is that it extends the space of * 32-bit addressable records on a 64-bit platform. * - * RNIL is 0xffffff00 and indicates NULL i-value. To avoid hitting RNIL - * it is required that pageBits <= 30 and that the maximum value of the - * range (2^pageBits-1) is not used. - * * MEMORY ROOT * * This super pool requires a "memory root" i.e. a memory address such @@ -77,13 +74,28 @@ * * page address = memory root + (signed)ip * page size * - * This is possible on most platforms, provided that the memory root and + * This is possible on all platforms, provided that the memory root and * all pages are either on the heap or on the stack, in order to keep * the size of "ip" reasonably small. * * The cast (signed)ip is done as integer of pageBits bits. "ip" has * same sign bit as i-value "i" so (signed)ip = (Int32)i >> recBits. - * The RNIL restriction can be expressed as (signed)ip != -1. + * + * RESERVED I-VALUES + * + * RNIL is 0xffffff00 (signed -256). It is used everywhere in NDB as + * "null pointer" i.e. as an i-value which does not point to a record. + * In addition the signed values -255 to -1 are reserved for use by the + * application. + * + * An i-value with all "ir" bits set is used as terminator in free + * record list. Unlike RNIL, it still has valid page bits "ip". + * + * Following restrictions avoid hitting the reserved values: + * + * - pageBits is <= 30 + * - the maximum "ip" value 2^pageBits-1 (signed -1) is not used + * - the maximum "ir" value 2^recBits-1 is not used * * PAGE ENTRIES * @@ -95,37 +107,54 @@ * - pointers (as i-values) to next and previous page in list * * Page entry cannot be stored on the page itself since this prevents - * aligning pages to OS block size and the use of BATs (don't ask) for - * page pools in NDB. For now the implementation provides an array of - * page entries with place for all (2^pageBits) entries. + * aligning pages to OS block size and the use of BATs for page pools in + * NDB. For now the implementation provides an array of page entries + * with place for all potential (2^pageBits) entries. * * PAGE TYPE * - * Page type is (in principle) unique to the record pool using the super - * pool. It is assigned in record pool constructor. Page type zero - * means that the page is free i.e. not allocated to a record pool. + * Page type is unique to the record pool using the super pool. It is + * assigned in record pool constructor. Page type zero means that the + * page is free i.e. not allocated to a record pool. * - * Each "i-p" conversion checks ("v1") that the record belongs to same + * Each "i-p" conversion checks ("v2") that the record belongs to same * pool as the page. This check is much more common than page or record - * allocation. To make it cache effective, there is a separate array of - * reduced "type bits" (computed from real type). + * allocation. To make it cache effective, there is a separate page + * type array. It truncates type to one non-zero byte. * - * FREE LISTS + * GROUP POOL * - * A record is either used or on the free list of the record pool. - * A page has a use count i.e. number of used records. When use count - * drops to zero the page can be returned to the super pool. This is - * not necessarily done at once, or ever. + * Each record pool belongs to a group. The group specifies minimum + * size or memory percentage the group must be able to allocate. The + * sum of the minimum sizes of group pools is normally smaller than + * super pool size. This provides unclaimed memory which a group can + * use temporarily to allocate more than its minimum. * - * To make freeing pages feasible, the record pool free list has two - * levels. There are available pages (some free) and a singly linked - * free list within the page. A page allocated to record pool is on one - * of 4 lists: + * The record pools within a group compete freely for the available + * memory within the group. * - * - free page list (all free, available) - * - active page list (some free, some used, available) - * - full page list (none free) - * - current page (list of 1), see below + * Typical exmaple is group of all metadata pools. The group allows + * specifying the memory to reserve for metadata, without having to + * specify number of tables, attributes, indexes, triggers, etc. + * + * PAGE LISTS + * + * Super pool has free page list. Each group pool uses it to allocate + * its own free page list. And each record pool within the group uses + * the group's free list to allocate its pages. + * + * A page allocated to a record pool has a use count i.e. number of used + * records. When use count drops to zero the page can be returned to + * the group. This is not necessarily done at once. + * + * The list of free records in a record pool has two levels. There are + * available pages (some free) and a singly linked free list within the + * page. A page allocated to record pool is on one of 4 lists: + * + * - free page (all free, available, could be returned to group) + * - busy page (some free, some used, available) + * - full page (none free) + * - current page (list of one), see below * * Some usage types (temporary pools) may never free records. They pay * a small penalty for the extra overhead. @@ -133,7 +162,7 @@ * RECORD POOL * * A pool of records which allocates its memory from a super pool - * instance specified in the constructor. There are 3 basic operations: + * instance via a group pool. There are 3 basic operations: * * - getPtr - translate i-value to pointer-to-record p * - seize - allocate record @@ -141,76 +170,64 @@ * * CURRENT PAGE * - * getPtr is a fast computation which does not touch the page. For - * seize and release there is an optimization: + * getPtr is a fast computation which does not touch the page entry. + * For seize (and release) there is a small optimization. * - * Define "current page" as page of latest seize or release. Its page - * entry is cached under record pool instance. The page is removed from - * its normal list. Seize and release on current page are fast and - * avoid touching the page. The current page is used until + * The "current page" is the page of latest seize. It is unlinked from + * its normal list and the free record pointer is stored under record + * pool instance. * - * - seize and current page is full - * - release and the page is not current page + * The page remains current until there is a seize and the page is full. + * Then the real page entry and its list membership are updated, and + * a new page is made current. * - * Then the real page entry is updated and the page is added to the - * appropriate list, and a new page is made current. + * This implies that each (active) record pool allocates at least one + * page which is never returned to the group. * * PAGE POLICY * - * Allocating new page to record pool is expensive. Therefore record - * pool should not always return empty pages to super pool. There are - * two trivial policies, each with problems: + * A group pool returns its "excess" (above minimum) free pages to the + * super pool immediately. * - * - "pp1" never return empty page to super pool - * - "pp2" always return empty page to super pool + * Allocating a new page to a record pool is expensive due to free list + * setup. Therefore a record pool should not always return empty pages + * to the group. Policies: * - * This implementation uses "pp2" for now. A real policy is implemented - * in next version. + * - "pp1" never return empty page to the group + * - "pp2" always return empty (non-current) page to the group + * - "pp3" simple hysteresis * - * OPEN ISSUES AND LIMITATIONS + * Last one "pp3" is used. It works as follows: * - * - smarter (virtual) placement of check bits & page entries - * - should getPtr etc be inlined? (too much code) - * - real page policy - * - other implementations (only HeapPool is done) - * - super pool list of all record pools, for statistics etc - * - access by multiple threads is not supported + * When a page becomes free, check if number of free records exceeds + * some fixed fraction of all records. If it does, move all free pages + * to the group. Current page is ignored in the check. + * + * TODO + * + * Define abstract class SuperAlloc. Make SuperPool a concrete class + * with SuperAlloc instance in ctor. Replace HeapPool by HeapAlloc. */ -// align size -#define SP_ALIGN_SIZE(sz, al) \ - (((sz) + (al) - 1) & ~((al) - 1)) - -// align pointer relative to base -#define SP_ALIGN_PTR(p, base, al) \ - (void*)((Uint8*)(base) + SP_ALIGN_SIZE((Uint8*)(p) - (Uint8*)(base), (al))) +// Types forward. +class GroupPool; class SuperPool { public: - // Type of i-value, used to reference both pages and records. Page - // index "ip" occupies the high bits. The i-value of a page is same - // as i-value of record 0 on the page. + // Type of i-value, used to reference both pages and records. typedef Uint32 PtrI; - // Size and address alignment given as number of bytes (power of 2). - STATIC_CONST( SP_ALIGN = 8 ); - - // Page entry. Current|y allocated as array of (2^pageBits). + // Page entry. struct PageEnt { PageEnt(); - Uint32 m_pageType; - Uint32 m_freeRecI; - Uint32 m_useCount; + Uint16 m_pageType; // zero if not in record pool + Uint16 m_useCount; // used records on the page + PtrI m_freeRecI; // first free record on the page PtrI m_nextPageI; PtrI m_prevPageI; }; - // Number of bits for cache effective type check given as log of 2. - // Example: 2 means 4 bits and uses 32k for 2g of 32k pages. - STATIC_CONST( SP_CHECK_LOG2 = 2 ); - - // Doubly-linked list of pages. There is one free list in super pool - // and free, active, full list in each record pool. + // Doubly-linked list of page entries. struct PageList { PageList(); PageList(PtrI pageI); @@ -219,54 +236,13 @@ public: Uint32 m_pageCount; }; - // Record pool information. Each record pool instance contains one. - struct RecInfo { - RecInfo(Uint32 recType, Uint32 recSize); - const Uint32 m_recType; - const Uint32 m_recSize; - Uint32 m_maxUseCount; // could be computed - Uint32 m_currPageI; // current page - Uint32 m_currFreeRecI; - Uint32 m_currUseCount; - Uint32 m_totalUseCount; // total per pool - Uint32 m_totalRecCount; - PageList m_freeList; - PageList m_activeList; - PageList m_fullList; - }; - - // Constructor. Gives page size in bytes (excluding page header) and + // Constructor. Gives page size in bytes (must be power of 2) and // number of bits to use for page index "ip" in i-value. SuperPool(Uint32 pageSize, Uint32 pageBits); - // Initialize. Must be called after setting sizes or other parameters - // and before the pool is used. - virtual bool init(); - // Destructor. virtual ~SuperPool() = 0; - // Translate i-value to page entry. - PageEnt& getPageEnt(PtrI pageI); - - // Translate i-value to page address. - void* getPageP(PtrI pageI); - - // Translate page address to i-value (unused). - PtrI getPageI(void* pageP); - - // Given type, return non-zero reduced type check bits. - Uint32 makeCheckBits(Uint32 type); - - // Get type check bits from type check array. - Uint32 getCheckBits(PtrI pageI); - - // Set type check bits in type check array. - void setCheckBits(PtrI pageI, Uint32 type); - - // Translate i-value to record address. - void* getRecP(PtrI recI, RecInfo& ri); - // Move all pages from second list to end of first list. void movePages(PageList& pl1, PageList& pl2); @@ -279,174 +255,190 @@ public: // Remove any page from page list. void removePage(PageList& pl, PtrI pageI); - // Set current page. Previous current page is updated and added to - // appropriate list. - void setCurrPage(RecInfo& ri, PtrI pageI); + // Translate i-value ("ri" ignored) to page entry. + PageEnt& getPageEnt(PtrI pageI); + + // Translate i-value ("ri" ignored) to page address. + void* getPageP(PtrI pageI); + + // Translate page address to i-value. Address must be page-aligned to + // memory root. Returns RNIL if "ip" range exceeded. + PtrI getPageI(void* pageP); + + // Record pool info. + struct RecInfo { + RecInfo(GroupPool& gp, Uint32 recSize); + GroupPool& m_groupPool; + Uint32 m_recSize; + Uint16 m_recType; + Uint16 m_maxPerPage; + PtrI m_freeRecI; // first free record on current page + Uint32 m_useCount; // used records excluding current page + PageList m_pageList[3]; // 0-free 1-busy 2-full + Uint16 m_hyX; // hysteresis fraction x/y in "pp3" + Uint16 m_hyY; + }; + + // Translate i-value to record address. + void* getRecP(PtrI recI, RecInfo& ri); + + // Count records on page free list. + Uint32 getFreeCount(RecInfo& ri, PtrI freeRecPtrI); + + // Compute total number of pages in pool. + Uint32 getRecPageCount(RecInfo& ri); + + // Compute total number of records (used or not) in pool. + Uint32 getRecTotCount(RecInfo& ri); + + // Compute total number of used records in pool. + Uint32 getRecUseCount(RecInfo& ri); + + // Compute record pool page list index (0,1,2). + Uint32 getRecPageList(RecInfo& ri, PageEnt& pe); + + // Add current page. + void addCurrPage(RecInfo& ri, PtrI pageI); + + // Remove current page. + void removeCurrPage(RecInfo& ri); // Get page with some free records and make it current. Takes head of - // active or free list, or else gets free page from super pool. + // used or free list, or else gets free page from group pool. bool getAvailPage(RecInfo& ri); - // Get free page from super pool and add it to record pool free list. - // This is an expensive subroutine of getAvailPage(). + // Get free page from group pool and add it to record pool free list. + // This is an expensive subroutine of getAvailPage(RecInfo&): PtrI getFreePage(RecInfo& ri); - // Get new free page from the implementation. + // Get free detached (not on list) page from group pool. + PtrI getFreePage(GroupPool& gp); + + // Get free detached page from super pool. + PtrI getFreePage(); + + // Get new free detached page from the implementation. virtual PtrI getNewPage() = 0; - // Set 3 size parameters, rounded to page size. If called before - // init() then init() allocates the initial size. - void setSizes(size_t initSize = 0, size_t incrSize = 0, size_t maxSize = 0); + // Initialize free list etc. Subroutine of getFreePage(RecInfo&). + void initFreePage(RecInfo& ri, PtrI pageI); - const Uint32 m_pageSize; - const Uint32 m_pageBits; - // implementation must set up these pointers - void* m_memRoot; - PageEnt* m_pageEnt; - Uint32* m_typeCheck; - Uint32 m_typeSeq; - PageList m_pageList; - size_t m_totalSize; - size_t m_initSize; - size_t m_incrSize; - size_t m_maxSize; + // Release record which is not on current page. + void releaseNotCurrent(RecInfo& ri, PtrI recI); + + // Free pages from record pool according to page policy. + void freeRecPages(RecInfo& ri); + + // Free all pages in record pool. + void freeAllRecPages(RecInfo& ri, bool force); + + // Set pool size parameters in pages. Call allocMemory() for changes + // (such as extra mallocs) to take effect. + void setInitPages(Uint32 initPages); + void setIncrPages(Uint32 incrPages); + void setMaxPages(Uint32 maxPages); + + // Get number of pages reserved by all groups. + Uint32 getGpMinPages(); + + // Get number of pages reserved to a group. + Uint32 getMinPages(GroupPool& gp); + + // Get max number of pages a group can try to allocate. + Uint32 getMaxPages(GroupPool& gp); + + // Allocate more memory according to current parameters. Returns + // false if no new memory was allocated. Otherwise returns true, + // even if the amount allocated was less than requested. + virtual bool allocMemory() = 0; // Debugging. void verify(RecInfo& ri); + void verifyPageList(PageList& pl); + + // Super pool parameters. + const Uint32 m_pageSize; + const Uint16 m_pageBits; + const Uint16 m_recBits; + const Uint32 m_recMask; + // Implementation must set up these 3 pointers. + void* m_memRoot; + PageEnt* m_pageEnt; + Uint8* m_pageType; + // Free page list. + PageList m_freeList; + // Free pages and sizes. + Uint32 m_initPages; + Uint32 m_incrPages; + Uint32 m_maxPages; + Uint32 m_totPages; + Uint32 m_typeCount; + // Reserved and allocated by group pools. + Uint32 m_groupMinPct; + Uint32 m_groupMinPages; + Uint32 m_groupTotPages; }; inline SuperPool::PageEnt& SuperPool::getPageEnt(PtrI pageI) { - Uint32 ip = pageI >> (32 - m_pageBits); + Uint32 ip = pageI >> m_recBits; return m_pageEnt[ip]; } inline void* SuperPool::getPageP(PtrI ptrI) { - Int32 ip = (Int32)ptrI >> (32 - m_pageBits); - my_ptrdiff_t sz = m_pageSize; - void* pageP = (Uint8*)m_memRoot + ip * sz; - return pageP; -} - -inline Uint32 -SuperPool::makeCheckBits(Uint32 type) -{ - Uint32 shift = 1 << SP_CHECK_LOG2; - Uint32 mask = (1 << shift) - 1; - return 1 + type % mask; -} - -inline Uint32 -SuperPool::getCheckBits(PtrI pageI) -{ - Uint32 ip = pageI >> (32 - m_pageBits); - Uint32 xp = ip >> (5 - SP_CHECK_LOG2); - Uint32 yp = ip & (1 << (5 - SP_CHECK_LOG2)) - 1; - Uint32& w = m_typeCheck[xp]; - Uint32 shift = 1 << SP_CHECK_LOG2; - Uint32 mask = (1 << shift) - 1; - // get - Uint32 bits = (w >> yp * shift) & mask; - return bits; -} - -inline void -SuperPool::setCheckBits(PtrI pageI, Uint32 type) -{ - Uint32 ip = pageI >> (32 - m_pageBits); - Uint32 xp = ip >> (5 - SP_CHECK_LOG2); - Uint32 yp = ip & (1 << (5 - SP_CHECK_LOG2)) - 1; - Uint32& w = m_typeCheck[xp]; - Uint32 shift = 1 << SP_CHECK_LOG2; - Uint32 mask = (1 << shift) - 1; - // set - Uint32 bits = makeCheckBits(type); - w &= ~(mask << yp * shift); - w |= (bits << yp * shift); + Int32 ip = (Int32)ptrI >> m_recBits; + return (Uint8*)m_memRoot + ip * (my_ptrdiff_t)m_pageSize; } inline void* SuperPool::getRecP(PtrI ptrI, RecInfo& ri) { - const Uint32 recMask = (1 << (32 - m_pageBits)) - 1; - PtrI pageI = ptrI & ~recMask; -#if NDB_SP_VERIFY_LEVEL >= 1 - Uint32 bits1 = getCheckBits(pageI); - Uint32 bits2 = makeCheckBits(ri.m_recType); - assert(bits1 == bits2); -#endif - void* pageP = getPageP(pageI); - Uint32 ir = ptrI & recMask; - void* recP = (Uint8*)pageP + ir * ri.m_recSize; - return recP; + Uint32 ip = ptrI >> m_recBits; + assert(m_pageType[ip] == (ri.m_recType & 0xFF)); + Uint32 ir = ptrI & m_recMask; + return (Uint8*)getPageP(ptrI) + ir * ri.m_recSize; } /* - * HeapPool - SuperPool on heap (concrete class) - * - * A super pool based on malloc with memory root on the heap. This - * pool type has 2 realistic uses: - * - * - a small pool with only initial malloc and pageBits set to match - * - the big pool from which all heap allocations are done - * - * A "smart" malloc may break "ip" limit by using different VM areas for - * different sized requests. For this reason malloc is done in units of - * increment size if possible. Memory root is set to start of first - * malloc. + * GroupPool - subset of a super pool pages (concrete class) */ -class HeapPool : public SuperPool { +class GroupPool { public: - // Describes malloc area. The areas are kept in singly linked list. - // There is a list head and pointers to current and last area. - struct Area { - Area(); - Area* m_nextArea; - PtrI m_firstPageI; - Uint32 m_currPage; - Uint32 m_numPages; - void* m_memory; - }; + // Types. + typedef SuperPool::PageList PageList; // Constructor. - HeapPool(Uint32 pageSize, Uint32 pageBits); - - // Initialize. - virtual bool init(); + GroupPool(SuperPool& sp); // Destructor. - virtual ~HeapPool(); + ~GroupPool(); - // Use malloc to allocate more. - bool allocMoreData(size_t size); + // Set minimum pct reserved in super pool. + void setMinPct(Uint32 resPct); - // Get new page from current area. - virtual PtrI getNewPage(); + // Set minimum pages reserved in super pool. + void setMinPages(Uint32 resPages); - // List of malloc areas. - Area m_areaHead; - Area* m_currArea; - Area* m_lastArea; - - // Fraction of malloc size to try if cannot get all in one. - Uint32 m_mallocPart; + SuperPool& m_superPool; + Uint32 m_minPct; + Uint32 m_minPages; + Uint32 m_totPages; + PageList m_freeList; }; /* * RecordPool - record pool using one super pool instance (template) - * - * Documented under SuperPool. Satisfies ArrayPool interface. */ template class RecordPool { public: // Constructor. - RecordPool(SuperPool& superPool); + RecordPool(GroupPool& gp); // Destructor. ~RecordPool(); @@ -462,9 +454,9 @@ public: // todo variants of basic methods - // Return all pages to super pool. The force flag is required if + // Return all pages to group pool. The force flag is required if // there are any used records. - void free(bool force); + void freeAllRecPages(bool force); SuperPool& m_superPool; SuperPool::RecInfo m_recInfo; @@ -472,24 +464,17 @@ public: template inline -RecordPool::RecordPool(SuperPool& superPool) : - m_superPool(superPool), - m_recInfo(1 + superPool.m_typeSeq++, sizeof(T)) +RecordPool::RecordPool(GroupPool& gp) : + m_superPool(gp.m_superPool), + m_recInfo(gp, sizeof(T)) { - SuperPool::RecInfo& ri = m_recInfo; - assert(sizeof(T) == SP_ALIGN_SIZE(sizeof(T), sizeof(Uint32))); - Uint32 maxUseCount = superPool.m_pageSize / sizeof(T); - Uint32 sizeLimit = 1 << (32 - superPool.m_pageBits); - if (maxUseCount >= sizeLimit) - maxUseCount = sizeLimit; - ri.m_maxUseCount = maxUseCount; } template inline RecordPool::~RecordPool() { - free(true); + freeAllRecPages(true); } template @@ -506,18 +491,19 @@ RecordPool::seize(Ptr& ptr) { SuperPool& sp = m_superPool; SuperPool::RecInfo& ri = m_recInfo; - if (ri.m_currFreeRecI != RNIL || sp.getAvailPage(ri)) { - SuperPool::PtrI recI = ri.m_currFreeRecI; + Uint32 recMask = sp.m_recMask; + // get current page + if ((ri.m_freeRecI & recMask) != recMask || + sp.getAvailPage(ri)) { + SuperPool::PtrI recI = ri.m_freeRecI; void* recP = sp.getRecP(recI, ri); - ri.m_currFreeRecI = *(Uint32*)recP; - Uint32 useCount = ri.m_currUseCount; - assert(useCount < ri.m_maxUseCount); - ri.m_currUseCount = useCount + 1; - ri.m_totalUseCount++; + ri.m_freeRecI = *(Uint32*)recP; ptr.i = recI; ptr.p = static_cast(recP); return true; } + ptr.i = RNIL; + ptr.p = 0; return false; } @@ -527,35 +513,79 @@ RecordPool::release(Ptr& ptr) { SuperPool& sp = m_superPool; SuperPool::RecInfo& ri = m_recInfo; - const Uint32 recMask = (1 << (32 - sp.m_pageBits)) - 1; SuperPool::PtrI recI = ptr.i; - SuperPool::PtrI pageI = recI & ~recMask; - if (pageI != ri.m_currPageI) { - sp.setCurrPage(ri, pageI); + Uint32 recMask = sp.m_recMask; + // check if current page + if ((recI & ~recMask) == (ri.m_freeRecI & ~recMask)) { + void* recP = sp.getRecP(recI, ri); + *(Uint32*)recP = ri.m_freeRecI; + ri.m_freeRecI = recI; + } else { + sp.releaseNotCurrent(ri, recI); } - void* recP = sp.getRecP(recI, ri); - *(Uint32*)recP = ri.m_currFreeRecI; - ri.m_currFreeRecI = recI; - Uint32 useCount = ri.m_currUseCount; - assert(useCount != 0); - ri.m_currUseCount = useCount - 1; - ri.m_totalUseCount--; ptr.i = RNIL; ptr.p = 0; } template inline void -RecordPool::free(bool force) +RecordPool::freeAllRecPages(bool force) { SuperPool& sp = m_superPool; - SuperPool::RecInfo& ri = m_recInfo; - sp.setCurrPage(ri, RNIL); - assert(force || ri.m_totalUseCount == 0); - sp.movePages(sp.m_pageList, ri.m_freeList); - sp.movePages(sp.m_pageList, ri.m_activeList); - sp.movePages(sp.m_pageList, ri.m_fullList); - ri.m_totalRecCount = 0; + sp.freeAllRecPages(m_recInfo, force); } +/* + * HeapPool - SuperPool on heap (concrete class) + * + * A super pool based on malloc with memory root on the heap. This + * pool type has 2 realistic uses: + * + * - a small pool with only initial malloc and pageBits set to match + * - the big pool from which all heap allocations are done + * + * A smart malloc may break "ip" limit by using different VM areas for + * different sized requests. For this reason malloc is done in units of + * increment size if possible. Memory root is set to the page-aligned + * address from first page malloc. + */ + +class HeapPool : public SuperPool { +public: + // Describes malloc area. The areas are kept in singly linked list. + // There is a list head and pointers to current and last area. + struct Area { + Area(); + Area* m_nextArea; + PtrI m_firstPageI; + Uint32 m_currPage; + void* m_memory; // from malloc + void* m_pages; // page-aligned pages + Uint32 m_numPages; // number of pages + }; + + // Constructor. + HeapPool(Uint32 pageSize, Uint32 pageBits); + + // Destructor. + virtual ~HeapPool(); + + // Get new page from current area. + virtual PtrI getNewPage(); + + // Allocate fixed arrays. + bool allocInit(); + + // Allocate array of aligned pages. + bool allocArea(Area* ap, Uint32 tryPages); + + // Allocate memory. + virtual bool allocMemory(); + + // List of malloc areas. + Area m_areaHead; + Area* m_currArea; + Area* m_lastArea; +}; + #endif diff --git a/storage/ndb/src/kernel/vm/testSuperPool.cpp b/storage/ndb/src/kernel/vm/testSuperPool.cpp index 194b3a43fa0..bb2c2f2be42 100644 --- a/storage/ndb/src/kernel/vm/testSuperPool.cpp +++ b/storage/ndb/src/kernel/vm/testSuperPool.cpp @@ -81,19 +81,22 @@ cmpPtrP(const void* a, const void* b) static Uint32 loopcount = 3; -template -void -sp_test(SuperPool& sp) +template +static void +sp_test(GroupPool& gp) { - typedef A T; - RecordPool rp(sp); + SuperPool& sp = gp.m_superPool; + RecordPool rp(gp); + assert(gp.m_totPages == gp.m_freeList.m_pageCount); SuperPool::RecInfo& ri = rp.m_recInfo; - Uint32 pageCount = sp.m_totalSize / sp.m_pageSize; - Uint32 perPage = rp.m_recInfo.m_maxUseCount; + Uint32 pageCount = sp.m_totPages; + Uint32 perPage = rp.m_recInfo.m_maxPerPage; Uint32 perPool = perPage * pageCount; ndbout << "pages=" << pageCount << " perpage=" << perPage << " perpool=" << perPool << endl; Ptr* ptrList = new Ptr [perPool]; memset(ptrList, 0x1f, perPool * sizeof(Ptr)); + Uint32 verify = 1000; + Uint32 useCount; Uint32 loop; for (loop = 0; loop < loopcount; loop++) { ndbout << "loop " << loop << endl; @@ -101,25 +104,26 @@ sp_test(SuperPool& sp) // seize all ndbout << "seize all" << endl; for (i = 0; i < perPool + 1; i++) { + if (verify == 0 || urandom(perPool) < verify) + sp.verify(ri); j = i; - sp.verify(ri); Ptr ptr1 = { 0, RNIL }; if (! rp.seize(ptr1)) break; - // write value ptr1.p->fill(); ptr1.p->check(); - // verify getPtr Ptr ptr2 = { 0, ptr1.i }; rp.getPtr(ptr2); assert(ptr1.i == ptr2.i && ptr1.p == ptr2.p); - // save ptrList[j] = ptr1; } - assert(i == perPool); - assert(ri.m_totalUseCount == perPool && ri.m_totalRecCount == perPool); sp.verify(ri); + ndbout << "seized " << i << endl; + assert(i == perPool); + useCount = sp.getRecUseCount(ri); + assert(useCount == perPool); // check duplicates + ndbout << "check dups" << endl; { Ptr* ptrList2 = new Ptr [perPool]; memcpy(ptrList2, ptrList, perPool * sizeof(Ptr)); @@ -135,7 +139,8 @@ sp_test(SuperPool& sp) ndbout << "release all" << endl; Uint32 coprime = random_coprime(perPool); for (i = 0; i < perPool; i++) { - sp.verify(ri); + if (verify == 0 || urandom(perPool) < verify) + sp.verify(ri); switch (loop % 3) { case 0: // ascending j = i; @@ -153,27 +158,31 @@ sp_test(SuperPool& sp) rp.release(ptr); assert(ptr.i == RNIL && ptr.p == 0); } - sp.setCurrPage(ri, RNIL); - assert(ri.m_totalUseCount == 0 && ri.m_totalRecCount == 0); sp.verify(ri); + useCount = sp.getRecUseCount(ri); + assert(useCount == 0); // seize/release at random ndbout << "seize/release at random" << endl; for (i = 0; i < loopcount * perPool; i++) { + if (verify == 0 || urandom(perPool) < verify) + sp.verify(ri); j = urandom(perPool); Ptr& ptr = ptrList[j]; if (ptr.i == RNIL) { - rp.seize(ptr); - ptr.p->fill(); + if (rp.seize(ptr)) + ptr.p->fill(); } else { ptr.p->check(); rp.release(ptr); } } - ndbout << "used " << ri.m_totalUseCount << endl; + ndbout << "used " << ri.m_useCount << endl; sp.verify(ri); // release all ndbout << "release all" << endl; for (i = 0; i < perPool; i++) { + if (verify == 0 || urandom(perPool) < verify) + sp.verify(ri); j = i; Ptr& ptr = ptrList[j]; if (ptr.i != RNIL) { @@ -181,40 +190,54 @@ sp_test(SuperPool& sp) rp.release(ptr); } } - sp.setCurrPage(ri, RNIL); - assert(ri.m_totalUseCount == 0 && ri.m_totalRecCount == 0); sp.verify(ri); + useCount = sp.getRecUseCount(ri); + assert(useCount == 0); } // done delete [] ptrList; } -static Uint32 pageCount = 99; static Uint32 pageSize = 32768; -static Uint32 pageBits = 15; +static Uint32 pageBits = 17; -const Uint32 sz1 = 3, sz2 = 4, sz3 = 53, sz4 = 424, sz5 = 5353; +const Uint32 sz1 = 3; +const Uint32 sz2 = 4; +const Uint32 sz3 = 53; +const Uint32 sz4 = 424; +const Uint32 sz5 = 5353; -template void sp_test(SuperPool& sp); -template void sp_test(SuperPool& sp); -template void sp_test(SuperPool& sp); -template void sp_test(SuperPool& sp); -template void sp_test(SuperPool& sp); +typedef A T1; +typedef A T2; +typedef A T3; +typedef A T4; +typedef A T5; + +template static void sp_test(GroupPool& sp); +template static void sp_test(GroupPool& sp); +template static void sp_test(GroupPool& sp); +template static void sp_test(GroupPool& sp); +template static void sp_test(GroupPool& sp); int main() { HeapPool sp(pageSize, pageBits); - sp.setSizes(pageCount * pageSize); - if (! sp.init()) + sp.setInitPages(7); + sp.setMaxPages(7); + if (! sp.allocMemory()) assert(false); + GroupPool gp(sp); Uint16 s = (Uint16)getpid(); srandom(s); ndbout << "rand " << s << endl; - sp_test(sp); - sp_test(sp); - sp_test(sp); - sp_test(sp); - sp_test(sp); + int count = 0; + while (++count <= 1) { + sp_test(gp); + sp_test(gp); + sp_test(gp); + sp_test(gp); + sp_test(gp); + } return 0; }