tux optim 13 - replace read keys & query th signals tux->tup by methods

This commit is contained in:
pekka@mysql.com 2004-07-25 16:41:11 +02:00
parent 82c759e254
commit fcb569f257
8 changed files with 471 additions and 283 deletions

View file

@ -1014,9 +1014,15 @@ public:
void tuxReadAttrs(Uint32 fragPtrI, Uint32 pageId, Uint32 pageOffset, Uint32 tupVersion, Uint32 numAttrs, const Uint32* attrIds, const Uint32** attrData);
/*
* TUX reads primary key for md5 summing and when returning keyinfo.
* TUX reads primary key without headers into an array of words. Used
* for md5 summing and when returning keyinfo.
*/
void tuxReadKeys(); // under construction
void tuxReadKeys(Uint32 fragPtrI, Uint32 pageId, Uint32 pageOffset, Uint32* pkSize, Uint32* pkData);
/*
* TUX checks if tuple is visible to scan.
*/
bool tuxQueryTh(Uint32 fragPtrI, Uint32 tupAddr, Uint32 tupVersion, Uint32 transId1, Uint32 transId2, Uint32 savePointId);
private:
BLOCK_DEFINES(Dbtup);

View file

@ -152,10 +152,10 @@ Dbtup::tuxReadAttrs(Uint32 fragPtrI, Uint32 pageId, Uint32 pageOffset, Uint32 tu
const Uint32* tupleHeader = &pagePtr.p->pageWord[pageOffset];
for (Uint32 i = 0; i < numAttrs; i++) {
AttributeHeader ah(attrIds[i]);
Uint32 attrId = ah.getAttributeId();
Uint32 index = tabDescriptor + (attrId << ZAD_LOG_SIZE);
Uint32 desc1 = tableDescriptor[index].tabDescr;
Uint32 desc2 = tableDescriptor[index + 1].tabDescr;
const Uint32 attrId = ah.getAttributeId();
const Uint32 index = tabDescriptor + (attrId << ZAD_LOG_SIZE);
const Uint32 desc1 = tableDescriptor[index].tabDescr;
const Uint32 desc2 = tableDescriptor[index + 1].tabDescr;
if (AttributeDescriptor::getNullable(desc1)) {
Uint32 offset = AttributeOffset::getNullFlagOffset(desc2);
ndbrequire(offset < tablePtr.p->tupNullWords);
@ -171,9 +171,78 @@ Dbtup::tuxReadAttrs(Uint32 fragPtrI, Uint32 pageId, Uint32 pageOffset, Uint32 tu
}
}
void // under construction
Dbtup::tuxReadKeys()
void
Dbtup::tuxReadKeys(Uint32 fragPtrI, Uint32 pageId, Uint32 pageOffset, Uint32* pkSize, Uint32* pkData)
{
ljamEntry();
FragrecordPtr fragPtr;
fragPtr.i = fragPtrI;
ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord);
TablerecPtr tablePtr;
tablePtr.i = fragPtr.p->fragTableId;
ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
PagePtr pagePtr;
pagePtr.i = pageId;
ptrCheckGuard(pagePtr, cnoOfPage, page);
const Uint32 tabDescriptor = tablePtr.p->tabDescriptor;
const Uint32 numAttrs = tablePtr.p->noOfKeyAttr;
const Uint32* attrIds = &tableDescriptor[tablePtr.p->readKeyArray].tabDescr;
const Uint32* tupleHeader = &pagePtr.p->pageWord[pageOffset];
Uint32 size = 0;
for (Uint32 i = 0; i < numAttrs; i++) {
AttributeHeader ah(attrIds[i]);
const Uint32 attrId = ah.getAttributeId();
const Uint32 index = tabDescriptor + (attrId << ZAD_LOG_SIZE);
const Uint32 desc1 = tableDescriptor[index].tabDescr;
const Uint32 desc2 = tableDescriptor[index + 1].tabDescr;
ndbrequire(! AttributeDescriptor::getNullable(desc1));
const Uint32 attrSize = AttributeDescriptor::getSizeInWords(desc1);
const Uint32* attrData = tupleHeader + AttributeOffset::getOffset(desc2);
for (Uint32 j = 0; j < attrSize; j++) {
pkData[size + j] = attrData[j];
}
size += attrSize;
}
*pkSize = size;
}
bool
Dbtup::tuxQueryTh(Uint32 fragPtrI, Uint32 tupAddr, Uint32 tupVersion, Uint32 transId1, Uint32 transId2, Uint32 savePointId)
{
ljamEntry();
FragrecordPtr fragPtr;
fragPtr.i = fragPtrI;
ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord);
TablerecPtr tablePtr;
tablePtr.i = fragPtr.p->fragTableId;
ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
// get page
PagePtr pagePtr;
Uint32 fragPageId = tupAddr >> MAX_TUPLES_BITS;
Uint32 pageIndex = tupAddr & ((1 << MAX_TUPLES_BITS ) - 1);
// use temp op rec
Operationrec tempOp;
tempOp.fragPageId = fragPageId;
tempOp.pageIndex = pageIndex;
tempOp.transid1 = transId1;
tempOp.transid2 = transId2;
tempOp.savePointId = savePointId;
tempOp.optype = ZREAD;
tempOp.dirtyOp = 1;
if (getPage(pagePtr, &tempOp, fragPtr.p, tablePtr.p)) {
/*
* We use the normal getPage which will return the tuple to be used
* for this transaction and savepoint id. If its tuple version
* equals the requested then we have a visible tuple otherwise not.
*/
ljam();
Uint32 read_tupVersion = pagePtr.p->pageWord[tempOp.pageOffset + 1];
if (read_tupVersion == tupVersion) {
ljam();
return true;
}
}
return false;
}
// deprecated signal interfaces

View file

@ -542,49 +542,6 @@ private:
void progError(int line, int cause, const char* file);
};
// parameters for methods
/*
* Copy attribute data.
*/
struct CopyPar {
unsigned m_items; // number of attributes
bool m_headers; // copy headers flag (default true)
unsigned m_maxwords; // limit size (default no limit)
// output
unsigned m_numitems; // number of attributes fully copied
unsigned m_numwords; // number of words copied
CopyPar();
};
/*
* Read index key attributes.
*/
struct ReadPar;
friend struct ReadPar;
struct ReadPar {
TreeEnt m_ent; // tuple to read
unsigned m_first; // first index attribute
unsigned m_count; // number of consecutive index attributes
Data m_data; // set pointer if 0 else copy result to it
unsigned m_size; // number of words (set in read keys only)
ReadPar();
};
/*
* Scan bound comparison.
*/
struct BoundPar;
friend struct BoundPar;
struct BoundPar {
ConstData m_data1; // full bound data
ConstData m_data2; // full or prefix data
unsigned m_count1; // number of bounds
unsigned m_len2; // words in data2 buffer
unsigned m_dir; // 0-lower bound 1-upper bound
BoundPar();
};
// methods
/*
@ -596,7 +553,7 @@ private:
// utils
void setKeyAttrs(const Frag& frag);
void readKeyAttrs(const Frag& frag, TreeEnt ent, unsigned start, TableData keyData);
void copyAttrs(Data dst, ConstData src, CopyPar& copyPar);
void readTablePk(const Frag& frag, TreeEnt ent, unsigned& pkSize, Data pkData);
void copyAttrs(const Frag& frag, TableData data1, Data data2, unsigned maxlen2 = MaxAttrDataSize);
/*
@ -614,8 +571,6 @@ private:
* DbtuxMaint.cpp
*/
void execTUX_MAINT_REQ(Signal* signal);
void tupReadAttrs(Signal* signal, const Frag& frag, ReadPar& readPar);
void tupReadKeys(Signal* signal, const Frag& frag, ReadPar& readPar);
/*
* DbtuxNode.cpp
@ -1225,36 +1180,6 @@ Dbtux::NodeHandle::getMinMax(unsigned i)
// parameters for methods
inline
Dbtux::CopyPar::CopyPar() :
m_items(0),
m_headers(true),
m_maxwords(~0), // max unsigned
// output
m_numitems(0),
m_numwords(0)
{
}
inline
Dbtux::ReadPar::ReadPar() :
m_first(0),
m_count(0),
m_data(0),
m_size(0)
{
}
inline
Dbtux::BoundPar::BoundPar() :
m_data1(0),
m_data2(0),
m_count1(0),
m_len2(0),
m_dir(255)
{
}
#ifdef VM_TRACE
inline
Dbtux::PrintPar::PrintPar() :

View file

@ -246,37 +246,14 @@ Dbtux::readKeyAttrs(const Frag& frag, TreeEnt ent, unsigned start, TableData key
}
void
Dbtux::copyAttrs(Data dst, ConstData src, CopyPar& copyPar)
Dbtux::readTablePk(const Frag& frag, TreeEnt ent, unsigned& pkSize, Data pkData)
{
CopyPar c = copyPar;
c.m_numitems = 0;
c.m_numwords = 0;
while (c.m_numitems < c.m_items) {
jam();
if (c.m_headers) {
unsigned i = 0;
while (i < AttributeHeaderSize) {
if (c.m_numwords >= c.m_maxwords) {
copyPar = c;
return;
}
dst[c.m_numwords++] = src[i++];
}
}
unsigned size = src.ah().getDataSize();
src += AttributeHeaderSize;
unsigned i = 0;
while (i < size) {
if (c.m_numwords >= c.m_maxwords) {
copyPar = c;
return;
}
dst[c.m_numwords++] = src[i++];
}
src += size;
c.m_numitems++;
}
copyPar = c;
const Uint32 tableFragPtrI = frag.m_tupTableFragPtrI[ent.m_fragBit];
const TupLoc tupLoc = ent.m_tupLoc;
Uint32 size = 0;
c_tup->tuxReadKeys(tableFragPtrI, tupLoc.m_pageId, tupLoc.m_pageOffset, &size, pkData);
ndbrequire(size != 0);
pkSize = size;
}
/*

View file

@ -180,89 +180,3 @@ Dbtux::execTUX_MAINT_REQ(Signal* signal)
// copy back
*sig = *req;
}
/*
* Read index key attributes from TUP. If buffer is provided the data
* is copied to it. Otherwise pointer is set to signal data.
*/
void
Dbtux::tupReadAttrs(Signal* signal, const Frag& frag, ReadPar& readPar)
{
// define the direct signal
const TreeEnt ent = readPar.m_ent;
TupReadAttrs* const req = (TupReadAttrs*)signal->getDataPtrSend();
req->errorCode = RNIL;
req->requestInfo = 0;
req->tableId = frag.m_tableId;
req->fragId = frag.m_fragId | (ent.m_fragBit << frag.m_fragOff);
req->fragPtrI = frag.m_tupTableFragPtrI[ent.m_fragBit];
req->tupAddr = (Uint32)-1;
req->tupVersion = ent.m_tupVersion;
req->pageId = ent.m_tupLoc.m_pageId;
req->pageOffset = ent.m_tupLoc.m_pageOffset;
req->bufferId = 0;
// add count and list of attribute ids
Data data = (Uint32*)req + TupReadAttrs::SignalLength;
data[0] = readPar.m_count;
data += 1;
const DescEnt& descEnt = getDescEnt(frag.m_descPage, frag.m_descOff);
for (Uint32 i = 0; i < readPar.m_count; i++) {
jam();
const DescAttr& descAttr = descEnt.m_descAttr[readPar.m_first + i];
data.ah() = AttributeHeader(descAttr.m_primaryAttrId, 0);
data += 1;
}
// execute
EXECUTE_DIRECT(DBTUP, GSN_TUP_READ_ATTRS, signal, TupReadAttrs::SignalLength);
jamEntry();
ndbrequire(req->errorCode == 0);
// data is at output
if (readPar.m_data == 0) {
readPar.m_data = data;
} else {
jam();
CopyPar copyPar;
copyPar.m_items = readPar.m_count;
copyPar.m_headers = true;
copyAttrs(readPar.m_data, data, copyPar);
}
}
/*
* Read primary keys. Copy the data without attribute headers into the
* given buffer. Number of words is returned in ReadPar argument.
*/
void
Dbtux::tupReadKeys(Signal* signal, const Frag& frag, ReadPar& readPar)
{
// define the direct signal
const TreeEnt ent = readPar.m_ent;
TupReadAttrs* const req = (TupReadAttrs*)signal->getDataPtrSend();
req->errorCode = RNIL;
req->requestInfo = TupReadAttrs::ReadKeys;
req->tableId = frag.m_tableId;
req->fragId = frag.m_fragId | (ent.m_fragBit << frag.m_fragOff);
req->fragPtrI = frag.m_tupTableFragPtrI[ent.m_fragBit];
req->tupAddr = (Uint32)-1;
req->tupVersion = RNIL; // not used
req->pageId = ent.m_tupLoc.m_pageId;
req->pageOffset = ent.m_tupLoc.m_pageOffset;
req->bufferId = 0;
// execute
EXECUTE_DIRECT(DBTUP, GSN_TUP_READ_ATTRS, signal, TupReadAttrs::SignalLength);
jamEntry();
ndbrequire(req->errorCode == 0);
// copy out in special format
ConstData data = (Uint32*)req + TupReadAttrs::SignalLength;
const Uint32 numKeys = data[0];
data += 1 + numKeys;
// copy out without headers
ndbrequire(readPar.m_data != 0);
CopyPar copyPar;
copyPar.m_items = numKeys;
copyPar.m_headers = false;
copyAttrs(readPar.m_data, data, copyPar);
// return counts
readPar.m_count = numKeys;
readPar.m_size = copyPar.m_numwords;
}

View file

@ -379,8 +379,8 @@ Dbtux::execACC_CHECK_SCAN(Signal* signal)
scanNext(signal, scanPtr);
}
// for reading tuple key in Current or Locked state
ReadPar keyPar;
keyPar.m_data = 0; // indicates not yet done
Data pkData = c_dataBuffer;
unsigned pkSize = 0; // indicates not yet done
if (scan.m_state == ScanOp::Current) {
// found an entry to return
jam();
@ -389,9 +389,7 @@ Dbtux::execACC_CHECK_SCAN(Signal* signal)
jam();
const TreeEnt ent = scan.m_scanPos.m_ent;
// read tuple key
keyPar.m_ent = ent;
keyPar.m_data = c_dataBuffer;
tupReadKeys(signal, frag, keyPar);
readTablePk(frag, ent, pkSize, pkData);
// get read lock or exclusive lock
AccLockReq* const lockReq = (AccLockReq*)signal->getDataPtrSend();
lockReq->returnCode = RNIL;
@ -403,9 +401,9 @@ Dbtux::execACC_CHECK_SCAN(Signal* signal)
lockReq->tableId = scan.m_tableId;
lockReq->fragId = frag.m_fragId | (ent.m_fragBit << frag.m_fragOff);
lockReq->fragPtrI = frag.m_accTableFragPtrI[ent.m_fragBit];
const Uint32* const buf32 = static_cast<Uint32*>(keyPar.m_data);
const Uint32* const buf32 = static_cast<Uint32*>(pkData);
const Uint64* const buf64 = reinterpret_cast<const Uint64*>(buf32);
lockReq->hashValue = md5_hash(buf64, keyPar.m_size);
lockReq->hashValue = md5_hash(buf64, pkSize);
lockReq->tupAddr = getTupAddr(frag, ent);
lockReq->transId1 = scan.m_transId1;
lockReq->transId2 = scan.m_transId2;
@ -480,11 +478,9 @@ Dbtux::execACC_CHECK_SCAN(Signal* signal)
const TreeEnt ent = scan.m_scanPos.m_ent;
if (scan.m_keyInfo) {
jam();
if (keyPar.m_data == 0) {
if (pkSize == 0) {
jam();
keyPar.m_ent = ent;
keyPar.m_data = c_dataBuffer;
tupReadKeys(signal, frag, keyPar);
readTablePk(frag, ent, pkSize, pkData);
}
}
// conf signal
@ -510,10 +506,10 @@ Dbtux::execACC_CHECK_SCAN(Signal* signal)
// add key info
if (scan.m_keyInfo) {
jam();
conf->keyLength = keyPar.m_size;
conf->keyLength = pkSize;
// piggy-back first 4 words of key data
for (unsigned i = 0; i < 4; i++) {
conf->key[i] = i < keyPar.m_size ? keyPar.m_data[i] : 0;
conf->key[i] = i < pkSize ? pkData[i] : 0;
}
signalLength = 11;
}
@ -525,18 +521,18 @@ Dbtux::execACC_CHECK_SCAN(Signal* signal)
EXECUTE_DIRECT(blockNo, GSN_NEXT_SCANCONF, signal, signalLength);
}
// send rest of key data
if (scan.m_keyInfo && keyPar.m_size > 4) {
if (scan.m_keyInfo && pkSize > 4) {
unsigned total = 4;
while (total < keyPar.m_size) {
while (total < pkSize) {
jam();
unsigned length = keyPar.m_size - total;
unsigned length = pkSize - total;
if (length > 20)
length = 20;
signal->theData[0] = scan.m_userPtr;
signal->theData[1] = 0;
signal->theData[2] = 0;
signal->theData[3] = length;
memcpy(&signal->theData[4], &keyPar.m_data[total], length << 2);
memcpy(&signal->theData[4], &pkData[total], length << 2);
sendSignal(scan.m_userRef, GSN_ACC_SCAN_INFO24,
signal, 4 + length, JBB);
total += length;
@ -895,35 +891,25 @@ Dbtux::scanNext(Signal* signal, ScanOpPtr scanPtr)
bool
Dbtux::scanVisible(Signal* signal, ScanOpPtr scanPtr, TreeEnt ent)
{
TupQueryTh* const req = (TupQueryTh*)signal->getDataPtrSend();
const ScanOp& scan = *scanPtr.p;
const Frag& frag = *c_fragPool.getPtr(scan.m_fragPtrI);
/* Assign table, fragment, tuple address + version */
Uint32 tableId = frag.m_tableId;
Uint32 fragBit = ent.m_fragBit;
Uint32 tableFragPtrI = frag.m_tupTableFragPtrI[fragBit];
Uint32 fragId = frag.m_fragId | (fragBit << frag.m_fragOff);
Uint32 tupAddr = getTupAddr(frag, ent);
Uint32 tupVersion = ent.m_tupVersion;
/* Check for same tuple twice in row */
// check for same tuple twice in row
if (scan.m_lastEnt.m_tupLoc == ent.m_tupLoc &&
scan.m_lastEnt.m_fragBit == fragBit) {
jam();
return false;
}
req->tableId = tableId;
req->fragId = fragId;
req->tupAddr = tupAddr;
req->tupVersion = tupVersion;
/* Assign transaction info, trans id + savepoint id */
Uint32 transId1 = scan.m_transId1;
Uint32 transId2 = scan.m_transId2;
Uint32 savePointId = scan.m_savePointId;
req->transId1 = transId1;
req->transId2 = transId2;
req->savePointId = savePointId;
EXECUTE_DIRECT(DBTUP, GSN_TUP_QUERY_TH, signal, TupQueryTh::SignalLength);
bool ret = c_tup->tuxQueryTh(tableFragPtrI, tupAddr, tupVersion, transId1, transId2, savePointId);
jamEntry();
return (bool)req->returnCode;
return ret;
}
/*

View file

@ -1,17 +1,32 @@
index maintenance overhead
==========================
ordered index performance
=========================
"mc02" 2x1700 MHz linux-2.4.9 gcc-2.96 -O3 one db-node
case a: index on Unsigned
testOIBasic -case u -table 1 -index 1 -fragtype small -threads 10 -rows 100000 -subloop 1 -nologging
case a: maintenance: index on Unsigned
testOIBasic -case u -table 1 -index 2 -fragtype small -threads 10 -rows 100000 -subloop 1 -nologging
case b: index on Varchar(5) + Varchar(5) + Varchar(20) + Unsigned
testOIBasic -case u -table 2 -index 4 -fragtype small -threads 10 -rows 100000 -subloop 1 -nologging
case b: maintenance: index on Varchar(5) + Varchar(5) + Varchar(20) + Unsigned
testOIBasic -case u -table 2 -index 5 -fragtype small -threads 10 -rows 100000 -subloop 1 -nologging
case c: full scan: index on PK Unsigned
testOIBasic -case v -table 1 -index 1 -fragtype small -threads 10 -rows 100000 -subloop 1 -nologging
case d: scan 1 tuple via EQ: index on PK Unsigned
testOIBasic -case w -table 1 -index 1 -fragtype small -threads 10 -rows 100000 -samples 10000 -subloop 1 -nologging -v2
a, b
1 million rows, pk update without index, pk update with index
shows ms / 1000 rows for each and pct overhead
the figures are based on single run on idle machine
c
1 million rows, index on PK, full table scan, full index scan
shows ms / 1000 rows for each and index time pct
d
1 million rows, index on PK, read table via each pk, scan index for each pk
shows ms / 1000 rows for each and index time pct
samples 10% of all PKs (100,000 pk reads, 100,000 scans)
040616 mc02/a 40 ms 87 ms 114 pct
mc02/b 51 ms 128 ms 148 pct
@ -51,5 +66,12 @@ optim 11 mc02/a 43 ms 63 ms 46 pct
optim 12 mc02/a 38 ms 55 ms 43 pct
mc02/b 47 ms 77 ms 63 pct
mc02/c 10 ms 14 ms 147 pct
mc02/d 176 ms 281 ms 159 pct
optim 13 mc02/a 40 ms 57 ms 42 pct
mc02/b 47 ms 77 ms 61 pct
mc02/c 9 ms 13 ms 150 pct
mc02/d 170 ms 256 ms 150 pct
vim: set et:

View file

@ -41,6 +41,7 @@ struct Opt {
unsigned m_loop;
bool m_nologging;
unsigned m_rows;
unsigned m_samples;
unsigned m_scanrd;
unsigned m_scanex;
unsigned m_seed;
@ -57,6 +58,7 @@ struct Opt {
m_loop(1),
m_nologging(false),
m_rows(1000),
m_samples(0),
m_scanrd(240),
m_scanex(240),
m_seed(1),
@ -86,6 +88,7 @@ printhelp()
<< " -loop N loop count full suite forever=0 [" << d.m_loop << "]" << endl
<< " -nologging create tables in no-logging mode" << endl
<< " -rows N rows per thread [" << d.m_rows << "]" << endl
<< " -samples N samples for some timings (0=all) [" << d.m_samples << "]" << endl
<< " -scanrd N scan read parallelism [" << d.m_scanrd << "]" << endl
<< " -scanex N scan exclusive parallelism [" << d.m_scanex << "]" << endl
<< " -seed N srandom seed [" << d.m_seed << "]" << endl
@ -177,6 +180,7 @@ class Thr;
class Con;
class Tab;
class Set;
class Tmr;
struct Par : public Opt {
unsigned m_no;
@ -186,6 +190,8 @@ struct Par : public Opt {
const Tab& tab() const { assert(m_tab != 0); return *m_tab; }
Set* m_set;
Set& set() const { assert(m_set != 0); return *m_set; }
Tmr* m_tmr;
Tmr& tmr() const { assert(m_tmr != 0); return *m_tmr; }
unsigned m_totrows;
unsigned m_batch;
// value calculation
@ -201,6 +207,7 @@ struct Par : public Opt {
m_con(0),
m_tab(0),
m_set(0),
m_tmr(0),
m_totrows(m_threads * m_rows),
m_batch(32),
m_pctnull(10),
@ -241,19 +248,20 @@ struct Tmr {
void on();
void off(unsigned cnt = 0);
const char* time();
const char* pct(const Tmr& t1);
const char* over(const Tmr& t1);
NDB_TICKS m_on;
unsigned m_ms;
unsigned m_cnt;
char m_time[100];
char m_over[100];
char m_text[100];
Tmr() { clr(); }
};
void
Tmr::clr()
{
m_on = m_ms = m_cnt = m_time[0] = m_over[0] = 0;
m_on = m_ms = m_cnt = m_time[0] = m_text[0] = 0;
}
void
@ -284,15 +292,26 @@ Tmr::time()
return m_time;
}
const char*
Tmr::pct(const Tmr& t1)
{
if (0 < t1.m_ms) {
sprintf(m_text, "%u pct", (100 * m_ms) / t1.m_ms);
} else {
sprintf(m_text, "[cannot measure]");
}
return m_text;
}
const char*
Tmr::over(const Tmr& t1)
{
if (0 < t1.m_ms && t1.m_ms < m_ms) {
sprintf(m_over, "%u pct", (100 * (m_ms - t1.m_ms)) / t1.m_ms);
sprintf(m_text, "%u pct", (100 * (m_ms - t1.m_ms)) / t1.m_ms);
} else {
sprintf(m_over, "[cannot measure]");
sprintf(m_text, "[cannot measure]");
}
return m_over;
return m_text;
}
// tables and indexes
@ -409,7 +428,7 @@ operator<<(NdbOut& out, const Tab& tab)
return out;
}
// tt1 + tt1x1 tt1x2 tt1x3 tt1x4
// tt1 + tt1x1 tt1x2 tt1x3 tt1x4 tt1x5
static const Col
tt1col[] = {
@ -422,24 +441,29 @@ tt1col[] = {
static const ICol
tt1x1col[] = {
{ 0, tt1col[1] }
{ 0, tt1col[0] }
};
static const ICol
tt1x2col[] = {
{ 0, tt1col[1] }
};
static const ICol
tt1x3col[] = {
{ 0, tt1col[1] },
{ 1, tt1col[2] }
};
static const ICol
tt1x3col[] = {
tt1x4col[] = {
{ 0, tt1col[3] },
{ 1, tt1col[2] },
{ 2, tt1col[1] }
};
static const ICol
tt1x4col[] = {
tt1x5col[] = {
{ 0, tt1col[1] },
{ 1, tt1col[4] },
{ 2, tt1col[2] },
@ -453,17 +477,22 @@ tt1x1 = {
static const ITab
tt1x2 = {
"TT1X2", 2, tt1x2col
"TT1X2", 1, tt1x2col
};
static const ITab
tt1x3 = {
"TT1X3", 3, tt1x3col
"TT1X3", 2, tt1x3col
};
static const ITab
tt1x4 = {
"TT1X4", 4, tt1x4col
"TT1X4", 3, tt1x4col
};
static const ITab
tt1x5 = {
"TT1X5", 4, tt1x5col
};
static const ITab
@ -471,15 +500,16 @@ tt1itab[] = {
tt1x1,
tt1x2,
tt1x3,
tt1x4
tt1x4,
tt1x5
};
static const Tab
tt1 = {
"TT1", 5, tt1col, 4, tt1itab
"TT1", 5, tt1col, 5, tt1itab
};
// tt2 + tt2x1 tt2x2 tt2x3 tt2x4
// tt2 + tt2x1 tt2x2 tt2x3 tt2x4 tt2x5
static const Col
tt2col[] = {
@ -492,24 +522,29 @@ tt2col[] = {
static const ICol
tt2x1col[] = {
{ 0, tt2col[0] }
};
static const ICol
tt2x2col[] = {
{ 0, tt2col[1] },
{ 1, tt2col[2] }
};
static const ICol
tt2x2col[] = {
tt2x3col[] = {
{ 0, tt2col[2] },
{ 1, tt2col[1] }
};
static const ICol
tt2x3col[] = {
tt2x4col[] = {
{ 0, tt2col[3] },
{ 1, tt2col[4] }
};
static const ICol
tt2x4col[] = {
tt2x5col[] = {
{ 0, tt2col[4] },
{ 1, tt2col[3] },
{ 2, tt2col[2] },
@ -518,7 +553,7 @@ tt2x4col[] = {
static const ITab
tt2x1 = {
"TT2X1", 2, tt2x1col
"TT2X1", 1, tt2x1col
};
static const ITab
@ -533,7 +568,12 @@ tt2x3 = {
static const ITab
tt2x4 = {
"TT2X4", 4, tt2x4col
"TT2X4", 2, tt2x4col
};
static const ITab
tt2x5 = {
"TT2X5", 4, tt2x5col
};
static const ITab
@ -541,12 +581,13 @@ tt2itab[] = {
tt2x1,
tt2x2,
tt2x3,
tt2x4
tt2x4,
tt2x5
};
static const Tab
tt2 = {
"TT2", 5, tt2col, 4, tt2itab
"TT2", 5, tt2col, 5, tt2itab
};
// all tables
@ -1369,13 +1410,14 @@ operator<<(NdbOut& out, const Row& row)
struct Set {
const Tab& m_tab;
unsigned m_rows;
unsigned m_count;
Row** m_row;
Row** m_saverow;
Row* m_keyrow;
NdbRecAttr** m_rec;
Set(const Tab& tab, unsigned rows);
~Set();
void reset();
unsigned count() const;
// row methods
bool exist(unsigned i) const;
void calc(Par par, unsigned i);
@ -1408,7 +1450,6 @@ Set::Set(const Tab& tab, unsigned rows) :
m_tab(tab)
{
m_rows = rows;
m_count = 0;
m_row = new Row* [m_rows];
for (unsigned i = 0; i < m_rows; i++) {
m_row[i] = 0;
@ -1437,6 +1478,31 @@ Set::~Set()
NdbMutex_Destroy(m_mutex);
}
void
Set::reset()
{
for (unsigned i = 0; i < m_rows; i++) {
if (m_row[i] != 0) {
Row& row = *m_row[i];
row.m_exist = false;
}
}
}
unsigned
Set::count() const
{
unsigned count = 0;
for (unsigned i = 0; i < m_rows; i++) {
if (m_row[i] != 0) {
Row& row = *m_row[i];
if (row.m_exist)
count++;
}
}
return count;
}
bool
Set::exist(unsigned i) const
{
@ -1460,9 +1526,9 @@ Set::calc(Par par, unsigned i)
int
Set::insrow(Par par, unsigned i)
{
assert(m_row[i] != 0 && m_count < m_rows);
CHK(m_row[i]->insrow(par) == 0);
m_count++;
assert(m_row[i] != 0);
Row& row = *m_row[i];
CHK(row.insrow(par) == 0);
return 0;
}
@ -1470,16 +1536,17 @@ int
Set::updrow(Par par, unsigned i)
{
assert(m_row[i] != 0);
CHK(m_row[i]->updrow(par) == 0);
Row& row = *m_row[i];
CHK(row.updrow(par) == 0);
return 0;
}
int
Set::delrow(Par par, unsigned i)
{
assert(m_row[i] != 0 && m_count != 0);
CHK(m_row[i]->delrow(par) == 0);
m_count--;
assert(m_row[i] != 0);
Row& row = *m_row[i];
CHK(row.delrow(par) == 0);
return 0;
}
@ -1544,10 +1611,8 @@ Set::putval(unsigned i, bool force)
val.copy(aRef);
val.m_null = false;
}
if (! row.m_exist) {
if (! row.m_exist)
row.m_exist = true;
m_count++;
}
return 0;
}
@ -1556,7 +1621,7 @@ Set::verify(const Set& set2) const
{
const Tab& tab = m_tab;
assert(&tab == &set2.m_tab && m_rows == set2.m_rows);
CHKMSG(m_count == set2.m_count, "set=" << m_count << " set2=" << set2.m_count);
CHKMSG(count() == set2.count(), "set=" << count() << " set2=" << set2.count());
for (unsigned i = 0; i < m_rows; i++) {
CHK(exist(i) == set2.exist(i));
if (! exist(i))
@ -1659,7 +1724,10 @@ struct BSet {
unsigned m_bvals;
BVal** m_bval;
BSet(const Tab& tab, const ITab& itab, unsigned rows);
~BSet();
void reset();
void calc(Par par);
void calcpk(Par par, unsigned i);
int setbnd(Par par) const;
void filter(const Set& set, Set& set2) const;
};
@ -1671,12 +1739,31 @@ BSet::BSet(const Tab& tab, const ITab& itab, unsigned rows) :
m_bvals(0)
{
m_bval = new BVal* [m_alloc];
for (unsigned i = 0; i < m_alloc; i++) {
m_bval[i] = 0;
}
}
BSet::~BSet()
{
delete [] m_bval;
}
void
BSet::reset()
{
while (m_bvals > 0) {
unsigned i = --m_bvals;
delete m_bval[i];
m_bval[i] = 0;
}
}
void
BSet::calc(Par par)
{
const ITab& itab = m_itab;
reset();
for (unsigned k = 0; k < itab.m_icols; k++) {
const ICol& icol = itab.m_icol[k];
const Col& col = icol.m_col;
@ -1717,6 +1804,23 @@ BSet::calc(Par par)
}
}
void
BSet::calcpk(Par par, unsigned i)
{
const ITab& itab = m_itab;
reset();
for (unsigned k = 0; k < itab.m_icols; k++) {
const ICol& icol = itab.m_icol[k];
const Col& col = icol.m_col;
assert(col.m_pk);
assert(m_bvals < m_alloc);
BVal& bval = *new BVal(icol);
m_bval[m_bvals++] = &bval;
bval.m_type = 4;
bval.calc(par, i);
}
}
int
BSet::setbnd(Par par) const
{
@ -1733,7 +1837,7 @@ BSet::filter(const Set& set, Set& set2) const
const Tab& tab = m_tab;
const ITab& itab = m_itab;
assert(&tab == &set2.m_tab && set.m_rows == set2.m_rows);
assert(set2.m_count == 0);
assert(set2.count() == 0);
for (unsigned i = 0; i < set.m_rows; i++) {
if (! set.exist(i))
continue;
@ -1781,7 +1885,6 @@ BSet::filter(const Set& set, Set& set2) const
assert(! row2.m_exist);
row2.copy(row);
row2.m_exist = true;
set2.m_count++;
}
}
@ -1919,7 +2022,7 @@ pkread(Par par)
unsigned i2 = (unsigned)-1;
CHK(set2.getkey(par, &i2) == 0 && i == i2);
CHK(set2.putval(i, false) == 0);
LL4("row " << set2.m_count << ": " << *set2.m_row[i]);
LL4("row " << set2.count() << ": " << *set2.m_row[i]);
con.closeTransaction();
}
if (par.m_verify)
@ -1927,6 +2030,31 @@ pkread(Par par)
return 0;
}
static int
pkreadfast(Par par, unsigned count)
{
Con& con = par.con();
const Tab& tab = par.tab();
const Set& set = par.set();
LL3("pkfast " << tab.m_name);
Row keyrow(tab);
for (unsigned j = 0; j < count; j++) {
unsigned i = urandom(set.m_rows);
assert(set.exist(i));
CHK(con.startTransaction() == 0);
// define key
keyrow.calc(par, i);
CHK(keyrow.selrow(par) == 0);
NdbRecAttr* rec;
CHK(con.getValue((Uint32)0, rec) == 0);
CHK(con.executeScan() == 0);
// get 1st column
CHK(con.execute(Commit) == 0);
con.closeTransaction();
}
return 0;
}
// scan read
static int
@ -1952,7 +2080,7 @@ scanreadtable(Par par)
unsigned i = (unsigned)-1;
CHK(set2.getkey(par, &i) == 0);
CHK(set2.putval(i, false) == 0);
LL4("row " << set2.m_count << ": " << *set2.m_row[i]);
LL4("row " << set2.count() << ": " << *set2.m_row[i]);
}
con.closeTransaction();
if (par.m_verify)
@ -1960,6 +2088,33 @@ scanreadtable(Par par)
return 0;
}
static int
scanreadtablefast(Par par, unsigned countcheck)
{
Con& con = par.con();
const Tab& tab = par.tab();
const Set& set = par.set();
LL3("scanfast " << tab.m_name);
CHK(con.startTransaction() == 0);
CHK(con.getNdbScanOperation(tab) == 0);
CHK(con.openScanRead(par.m_scanrd) == 0);
// get 1st column
NdbRecAttr* rec;
CHK(con.getValue((Uint32)0, rec) == 0);
CHK(con.executeScan() == 0);
unsigned count = 0;
while (1) {
int ret;
CHK((ret = con.nextScanResult()) == 0 || ret == 1);
if (ret == 1)
break;
count++;
}
con.closeTransaction();
CHK(count == countcheck);
return 0;
}
static int
scanreadindex(Par par, const ITab& itab, const BSet& bset)
{
@ -1987,7 +2142,7 @@ scanreadindex(Par par, const ITab& itab, const BSet& bset)
CHK(set2.getkey(par, &i) == 0);
LL4("key " << i);
CHK(set2.putval(i, par.m_dups) == 0);
LL4("row " << set2.m_count << ": " << *set2.m_row[i]);
LL4("row " << set2.count() << ": " << *set2.m_row[i]);
}
con.closeTransaction();
if (par.m_verify)
@ -1995,6 +2150,35 @@ scanreadindex(Par par, const ITab& itab, const BSet& bset)
return 0;
}
static int
scanreadindexfast(Par par, const ITab& itab, const BSet& bset, unsigned countcheck)
{
Con& con = par.con();
const Tab& tab = par.tab();
const Set& set = par.set();
LL3("scanfast " << itab.m_name << " bounds=" << bset.m_bvals);
LL4(bset);
CHK(con.startTransaction() == 0);
CHK(con.getNdbScanOperation(itab, tab) == 0);
CHK(con.openScanRead(par.m_scanrd) == 0);
CHK(bset.setbnd(par) == 0);
// get 1st column
NdbRecAttr* rec;
CHK(con.getValue((Uint32)0, rec) == 0);
CHK(con.executeScan() == 0);
unsigned count = 0;
while (1) {
int ret;
CHK((ret = con.nextScanResult()) == 0 || ret == 1);
if (ret == 1)
break;
count++;
}
con.closeTransaction();
CHK(count == countcheck);
return 0;
}
static int
scanreadindex(Par par, const ITab& itab)
{
@ -2029,6 +2213,60 @@ scanreadall(Par par)
return 0;
}
// timing scans
static int
timescantable(Par par)
{
par.tmr().on();
CHK(scanreadtablefast(par, par.m_totrows) == 0);
par.tmr().off(par.set().m_rows);
return 0;
}
static int
timescanpkindex(Par par)
{
const Tab& tab = par.tab();
const ITab& itab = tab.m_itab[0]; // 1st index is on PK
BSet bset(tab, itab, par.m_rows);
par.tmr().on();
CHK(scanreadindexfast(par, itab, bset, par.m_totrows) == 0);
par.tmr().off(par.set().m_rows);
return 0;
}
static int
timepkreadtable(Par par)
{
par.tmr().on();
unsigned count = par.m_samples;
if (count == 0)
count = par.m_totrows;
CHK(pkreadfast(par, count) == 0);
par.tmr().off(count);
return 0;
}
static int
timepkreadindex(Par par)
{
const Tab& tab = par.tab();
const ITab& itab = tab.m_itab[0]; // 1st index is on PK
BSet bset(tab, itab, par.m_rows);
unsigned count = par.m_samples;
if (count == 0)
count = par.m_totrows;
par.tmr().on();
for (unsigned j = 0; j < count; j++) {
unsigned i = urandom(par.m_totrows);
bset.calcpk(par, i);
CHK(scanreadindexfast(par, itab, bset, 1) == 0);
}
par.tmr().off(count);
return 0;
}
// scan update
static int
@ -2438,6 +2676,7 @@ runstep(Par par, const char* fname, TFunc func, unsigned mode)
Thr& thr = *g_thrlist[n];
thr.m_par.m_tab = par.m_tab;
thr.m_par.m_set = par.m_set;
thr.m_par.m_tmr = par.m_tmr;
thr.m_func = func;
thr.start();
}
@ -2564,11 +2803,9 @@ ttimemaint(Par par)
t1.off(par.m_totrows);
RUNSTEP(par, createindex, ST);
RUNSTEP(par, invalidateindex, MT);
RUNSTEP(par, readverify, ST);
t2.on();
RUNSTEP(par, pkupdate, MT);
t2.off(par.m_totrows);
RUNSTEP(par, readverify, ST);
RUNSTEP(par, dropindex, ST);
}
LL1("update - " << t1.time());
@ -2577,6 +2814,50 @@ ttimemaint(Par par)
return 0;
}
static int
ttimescan(Par par)
{
Tmr t1, t2;
RUNSTEP(par, droptable, ST);
RUNSTEP(par, createtable, ST);
RUNSTEP(par, invalidatetable, MT);
for (unsigned i = 0; i < par.m_subloop; i++) {
RUNSTEP(par, pkinsert, MT);
RUNSTEP(par, createindex, ST);
par.m_tmr = &t1;
RUNSTEP(par, timescantable, ST);
par.m_tmr = &t2;
RUNSTEP(par, timescanpkindex, ST);
RUNSTEP(par, dropindex, ST);
}
LL1("full scan table - " << t1.time());
LL1("full scan PK index - " << t2.time());
LL1("index time pct - " << t2.pct(t1));
return 0;
}
static int
ttimepkread(Par par)
{
Tmr t1, t2;
RUNSTEP(par, droptable, ST);
RUNSTEP(par, createtable, ST);
RUNSTEP(par, invalidatetable, MT);
for (unsigned i = 0; i < par.m_subloop; i++) {
RUNSTEP(par, pkinsert, MT);
RUNSTEP(par, createindex, ST);
par.m_tmr = &t1;
RUNSTEP(par, timepkreadtable, ST);
par.m_tmr = &t2;
RUNSTEP(par, timepkreadindex, ST);
RUNSTEP(par, dropindex, ST);
}
LL1("pk read table - " << t1.time());
LL1("pk read PK index - " << t2.time());
LL1("index time pct - " << t2.pct(t1));
return 0;
}
static int
tdrop(Par par)
{
@ -2603,6 +2884,8 @@ tcaselist[] = {
TCase("d", tbusybuild, "pk operations and index build"),
TCase("t", ttimebuild, "time index build"),
TCase("u", ttimemaint, "time index maintenance"),
TCase("v", ttimescan, "time full scan table vs index on pk"),
TCase("w", ttimepkread, "time pk read table vs index on pk"),
TCase("z", tdrop, "drop test tables")
};
@ -2622,7 +2905,7 @@ printcases()
static void
printtables()
{
ndbout << "tables and indexes:" << endl;
ndbout << "tables and indexes (X1 is on table PK):" << endl;
for (unsigned j = 0; j < tabcount; j++) {
const Tab& tab = tablist[j];
ndbout << " " << tab.m_name;
@ -2663,8 +2946,8 @@ runtest(Par par)
continue;
const Tab& tab = tablist[j];
par.m_tab = &tab;
Set set(tab, par.m_totrows);
par.m_set = &set;
delete par.m_set;
par.m_set = new Set(tab, par.m_totrows);
LL1("table " << tab.m_name);
CHK(tcase.m_func(par) == 0);
}
@ -2750,6 +3033,12 @@ NDB_COMMAND(testOIBasic, "testOIBasic", "testOIBasic", "testOIBasic", 65535)
continue;
}
}
if (strcmp(arg, "-samples") == 0) {
if (++argv, --argc > 0) {
g_opt.m_samples = atoi(argv[0]);
continue;
}
}
if (strcmp(arg, "-scanrd") == 0) {
if (++argv, --argc > 0) {
g_opt.m_scanrd = atoi(argv[0]);