From ffb64880b7b5381d29341ec7d5cb07da2e81b77f Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 23 Apr 2005 17:33:12 +0200 Subject: [PATCH] ndb - bug#10029 fix ndb/include/kernel/signaldata/TuxMaint.hpp: handle multipl index update failure atomically ndb/src/kernel/blocks/dbtup/Dbtup.hpp: handle multipl index update failure atomically ndb/src/kernel/blocks/dbtup/DbtupTrigger.cpp: handle multipl index update failure atomically ndb/src/kernel/blocks/dbtup/Notes.txt: handle multipl index update failure atomically ndb/src/kernel/blocks/dbtux/DbtuxNode.cpp: handle multipl index update failure atomically ndb/src/ndbapi/ndberror.c: handle multipl index update failure atomically ndb/test/ndbapi/testOIBasic.cpp: handle multipl index update failure atomically --- ndb/include/kernel/signaldata/TuxMaint.hpp | 4 +- ndb/src/kernel/blocks/dbtup/Dbtup.hpp | 8 + ndb/src/kernel/blocks/dbtup/DbtupTrigger.cpp | 80 +++++----- ndb/src/kernel/blocks/dbtup/Notes.txt | 25 ++- ndb/src/kernel/blocks/dbtux/DbtuxNode.cpp | 11 ++ ndb/src/ndbapi/ndberror.c | 7 +- ndb/test/ndbapi/testOIBasic.cpp | 160 ++++++++++++++----- 7 files changed, 205 insertions(+), 90 deletions(-) diff --git a/ndb/include/kernel/signaldata/TuxMaint.hpp b/ndb/include/kernel/signaldata/TuxMaint.hpp index 9fee031dc41..4518f0531ea 100644 --- a/ndb/include/kernel/signaldata/TuxMaint.hpp +++ b/ndb/include/kernel/signaldata/TuxMaint.hpp @@ -36,8 +36,8 @@ public: }; enum ErrorCode { NoError = 0, // must be zero - SearchError = 895, // add + found or remove + not found - NoMemError = 827 + SearchError = 901, // add + found or remove + not found + NoMemError = 902 }; STATIC_CONST( SignalLength = 8 ); private: diff --git a/ndb/src/kernel/blocks/dbtup/Dbtup.hpp b/ndb/src/kernel/blocks/dbtup/Dbtup.hpp index b48546576f9..06cfd420eac 100644 --- a/ndb/src/kernel/blocks/dbtup/Dbtup.hpp +++ b/ndb/src/kernel/blocks/dbtup/Dbtup.hpp @@ -1777,6 +1777,10 @@ private: Operationrec* const regOperPtr, Tablerec* const regTabPtr); + int addTuxEntries(Signal* signal, + Operationrec* regOperPtr, + Tablerec* regTabPtr); + // these crash the node on error void executeTuxCommitTriggers(Signal* signal, @@ -1787,6 +1791,10 @@ private: Operationrec* regOperPtr, Tablerec* const regTabPtr); + void removeTuxEntries(Signal* signal, + Operationrec* regOperPtr, + Tablerec* regTabPtr); + // ***************************************************************** // Error Handling routines. // ***************************************************************** diff --git a/ndb/src/kernel/blocks/dbtup/DbtupTrigger.cpp b/ndb/src/kernel/blocks/dbtup/DbtupTrigger.cpp index aac5c326cad..575d08efffc 100644 --- a/ndb/src/kernel/blocks/dbtup/DbtupTrigger.cpp +++ b/ndb/src/kernel/blocks/dbtup/DbtupTrigger.cpp @@ -973,25 +973,7 @@ Dbtup::executeTuxInsertTriggers(Signal* signal, req->pageOffset = regOperPtr->pageOffset; req->tupVersion = tupVersion; req->opInfo = TuxMaintReq::OpAdd; - // loop over index list - const ArrayList& triggerList = regTabPtr->tuxCustomTriggers; - TriggerPtr triggerPtr; - triggerList.first(triggerPtr); - while (triggerPtr.i != RNIL) { - ljam(); - req->indexId = triggerPtr.p->indexId; - req->errorCode = RNIL; - EXECUTE_DIRECT(DBTUX, GSN_TUX_MAINT_REQ, - signal, TuxMaintReq::SignalLength); - ljamEntry(); - if (req->errorCode != 0) { - ljam(); - terrorCode = req->errorCode; - return -1; - } - triggerList.next(triggerPtr); - } - return 0; + return addTuxEntries(signal, regOperPtr, regTabPtr); } int @@ -1012,9 +994,18 @@ Dbtup::executeTuxUpdateTriggers(Signal* signal, req->pageOffset = regOperPtr->pageOffset; req->tupVersion = tupVersion; req->opInfo = TuxMaintReq::OpAdd; - // loop over index list + return addTuxEntries(signal, regOperPtr, regTabPtr); +} + +int +Dbtup::addTuxEntries(Signal* signal, + Operationrec* regOperPtr, + Tablerec* regTabPtr) +{ + TuxMaintReq* const req = (TuxMaintReq*)signal->getDataPtrSend(); const ArrayList& triggerList = regTabPtr->tuxCustomTriggers; TriggerPtr triggerPtr; + Uint32 failPtrI; triggerList.first(triggerPtr); while (triggerPtr.i != RNIL) { ljam(); @@ -1026,11 +1017,29 @@ Dbtup::executeTuxUpdateTriggers(Signal* signal, if (req->errorCode != 0) { ljam(); terrorCode = req->errorCode; - return -1; + failPtrI = triggerPtr.i; + goto fail; } triggerList.next(triggerPtr); } return 0; +fail: + req->opInfo = TuxMaintReq::OpRemove; + triggerList.first(triggerPtr); + while (triggerPtr.i != failPtrI) { + ljam(); + req->indexId = triggerPtr.p->indexId; + req->errorCode = RNIL; + EXECUTE_DIRECT(DBTUX, GSN_TUX_MAINT_REQ, + signal, TuxMaintReq::SignalLength); + ljamEntry(); + ndbrequire(req->errorCode == 0); + triggerList.next(triggerPtr); + } +#ifdef VM_TRACE + ndbout << "aborted partial tux update: op " << hex << regOperPtr << endl; +#endif + return -1; } int @@ -1049,7 +1058,6 @@ Dbtup::executeTuxCommitTriggers(Signal* signal, { TuxMaintReq* const req = (TuxMaintReq*)signal->getDataPtrSend(); // get version - // XXX could add prevTupVersion to Operationrec Uint32 tupVersion; if (regOperPtr->optype == ZINSERT) { if (! regOperPtr->deleteInsertFlag) @@ -1087,21 +1095,7 @@ Dbtup::executeTuxCommitTriggers(Signal* signal, req->pageOffset = regOperPtr->pageOffset; req->tupVersion = tupVersion; req->opInfo = TuxMaintReq::OpRemove; - // loop over index list - const ArrayList& triggerList = regTabPtr->tuxCustomTriggers; - TriggerPtr triggerPtr; - triggerList.first(triggerPtr); - while (triggerPtr.i != RNIL) { - ljam(); - req->indexId = triggerPtr.p->indexId; - req->errorCode = RNIL; - EXECUTE_DIRECT(DBTUX, GSN_TUX_MAINT_REQ, - signal, TuxMaintReq::SignalLength); - ljamEntry(); - // commit must succeed - ndbrequire(req->errorCode == 0); - triggerList.next(triggerPtr); - } + removeTuxEntries(signal, regOperPtr, regTabPtr); } void @@ -1132,7 +1126,15 @@ Dbtup::executeTuxAbortTriggers(Signal* signal, req->pageOffset = regOperPtr->pageOffset; req->tupVersion = tupVersion; req->opInfo = TuxMaintReq::OpRemove; - // loop over index list + removeTuxEntries(signal, regOperPtr, regTabPtr); +} + +void +Dbtup::removeTuxEntries(Signal* signal, + Operationrec* regOperPtr, + Tablerec* regTabPtr) +{ + TuxMaintReq* const req = (TuxMaintReq*)signal->getDataPtrSend(); const ArrayList& triggerList = regTabPtr->tuxCustomTriggers; TriggerPtr triggerPtr; triggerList.first(triggerPtr); @@ -1143,7 +1145,7 @@ Dbtup::executeTuxAbortTriggers(Signal* signal, EXECUTE_DIRECT(DBTUX, GSN_TUX_MAINT_REQ, signal, TuxMaintReq::SignalLength); ljamEntry(); - // abort must succeed + // must succeed ndbrequire(req->errorCode == 0); triggerList.next(triggerPtr); } diff --git a/ndb/src/kernel/blocks/dbtup/Notes.txt b/ndb/src/kernel/blocks/dbtup/Notes.txt index 9d47c591fe8..c2973bb0a76 100644 --- a/ndb/src/kernel/blocks/dbtup/Notes.txt +++ b/ndb/src/kernel/blocks/dbtup/Notes.txt @@ -135,6 +135,24 @@ abort DELETE none - 1) alternatively, store prevTupVersion in operation record. +Abort from ordered index error +------------------------------ + +Obviously, index update failure causes operation failure. +The operation is then aborted later by TC. + +The problem here is with multiple indexes. Some may have been +updated successfully before the one that failed. Therefore +the trigger code aborts the successful ones already in +the prepare phase. + +In other words, multiple indexes are treated as one. + +Abort from any cause +-------------------- + +[ hairy stuff ] + Read attributes, query status ----------------------------- @@ -170,14 +188,11 @@ used to decide if the scan can see the tuple. This signal may also be called during any phase since commit/abort of all operations is not done in one time-slice. -Commit and abort ----------------- - -[ hairy stuff ] - Problems -------- Current abort code can destroy a tuple version too early. This happens in test case "ticuur" (insert-commit-update-update-rollback), if abort of first update arrives before abort of second update. + +vim: set textwidth=68: diff --git a/ndb/src/kernel/blocks/dbtux/DbtuxNode.cpp b/ndb/src/kernel/blocks/dbtux/DbtuxNode.cpp index 389192fd0cf..9f9d4cb68e3 100644 --- a/ndb/src/kernel/blocks/dbtux/DbtuxNode.cpp +++ b/ndb/src/kernel/blocks/dbtux/DbtuxNode.cpp @@ -23,6 +23,11 @@ int Dbtux::allocNode(Signal* signal, NodeHandle& node) { + if (ERROR_INSERTED(12007)) { + jam(); + CLEAR_ERROR_INSERT_VALUE; + return TuxMaintReq::NoMemError; + } Frag& frag = node.m_frag; Uint32 pageId = NullTupLoc.getPageId(); Uint32 pageOffset = NullTupLoc.getPageOffset(); @@ -34,6 +39,12 @@ Dbtux::allocNode(Signal* signal, NodeHandle& node) node.m_loc = TupLoc(pageId, pageOffset); node.m_node = reinterpret_cast(node32); ndbrequire(node.m_loc != NullTupLoc && node.m_node != 0); + } else { + switch (errorCode) { + case 827: + errorCode = TuxMaintReq::NoMemError; + break; + } } return errorCode; } diff --git a/ndb/src/ndbapi/ndberror.c b/ndb/src/ndbapi/ndberror.c index 94e1aeb5545..f052200b67d 100644 --- a/ndb/src/ndbapi/ndberror.c +++ b/ndb/src/ndbapi/ndberror.c @@ -175,10 +175,11 @@ ErrorBundle ErrorCodes[] = { */ { 623, IS, "623" }, { 624, IS, "624" }, - { 625, IS, "Out of memory in Ndb Kernel, index part (increase IndexMemory)" }, + { 625, IS, "Out of memory in Ndb Kernel, hash index part (increase IndexMemory)" }, { 800, IS, "Too many ordered indexes (increase MaxNoOfOrderedIndexes)" }, { 826, IS, "Too many tables and attributes (increase MaxNoOfAttributes or MaxNoOfTables)" }, - { 827, IS, "Out of memory in Ndb Kernel, data part (increase DataMemory)" }, + { 827, IS, "Out of memory in Ndb Kernel, table data (increase DataMemory)" }, + { 902, IS, "Out of memory in Ndb Kernel, ordered index data (increase DataMemory)" }, { 832, IS, "832" }, /** @@ -205,7 +206,7 @@ ErrorBundle ErrorCodes[] = { * Internal errors */ { 892, IE, "Inconsistent hash index. The index needs to be dropped and recreated" }, - { 895, IE, "Inconsistent ordered index. The index needs to be dropped and recreated" }, + { 901, IE, "Inconsistent ordered index. The index needs to be dropped and recreated" }, { 202, IE, "202" }, { 203, IE, "203" }, { 207, IE, "207" }, diff --git a/ndb/test/ndbapi/testOIBasic.cpp b/ndb/test/ndbapi/testOIBasic.cpp index e6d3844d18e..9f8da850ff4 100644 --- a/ndb/test/ndbapi/testOIBasic.cpp +++ b/ndb/test/ndbapi/testOIBasic.cpp @@ -228,6 +228,8 @@ struct Par : public Opt { bool m_verify; // deadlock possible bool m_deadlock; + // abort percentabge + unsigned m_abortpct; // timer location Par(const Opt& opt) : Opt(opt), @@ -243,7 +245,8 @@ struct Par : public Opt { m_pctrange(0), m_randomkey(false), m_verify(false), - m_deadlock(false) { + m_deadlock(false), + m_abortpct(0) { } }; @@ -684,7 +687,7 @@ struct Con { NdbResultSet* m_resultset; enum ScanMode { ScanNo = 0, Committed, Latest, Exclusive }; ScanMode m_scanmode; - enum ErrType { ErrNone = 0, ErrDeadlock, ErrOther }; + enum ErrType { ErrNone = 0, ErrDeadlock, ErrNospace, ErrOther }; ErrType m_errtype; Con() : m_ndb(0), m_dic(0), m_tx(0), m_op(0), @@ -705,7 +708,7 @@ struct Con { int setValue(int num, const char* addr); int setBound(int num, int type, const void* value); int execute(ExecType t); - int execute(ExecType t, bool& deadlock); + int execute(ExecType t, bool& deadlock, bool& nospace); int openScanRead(unsigned scanbat, unsigned scanpar); int openScanExclusive(unsigned scanbat, unsigned scanpar); int executeScan(); @@ -818,17 +821,21 @@ Con::execute(ExecType t) } int -Con::execute(ExecType t, bool& deadlock) +Con::execute(ExecType t, bool& deadlock, bool& nospace) { int ret = execute(t); - if (ret != 0) { - if (deadlock && m_errtype == ErrDeadlock) { - LL3("caught deadlock"); - ret = 0; - } + if (ret != 0 && deadlock && m_errtype == ErrDeadlock) { + LL3("caught deadlock"); + ret = 0; } else { deadlock = false; } + if (ret != 0 && nospace && m_errtype == ErrNospace) { + LL3("caught nospace"); + ret = 0; + } else { + nospace = false; + } CHK(ret == 0); return 0; } @@ -940,6 +947,8 @@ Con::printerror(NdbOut& out) die += (code == g_opt.m_die); if (code == 266 || code == 274 || code == 296 || code == 297 || code == 499) m_errtype = ErrDeadlock; + if (code == 826 || code == 827 || code == 902) + m_errtype = ErrNospace; } if (m_op && m_op->getNdbError().code != 0) { LL0(++any << " op : error " << m_op->getNdbError()); @@ -1128,6 +1137,16 @@ irandom(unsigned n) return i; } +static bool +randompct(unsigned pct) +{ + if (pct == 0) + return false; + if (pct >= 100) + return true; + return urandom(100) < pct; +} + // Val - typed column value struct Val { @@ -1565,8 +1584,8 @@ struct Set { // row methods bool exist(unsigned i) const; Row::Op pending(unsigned i) const; - void notpending(unsigned i); - void notpending(const Lst& lst); + void notpending(unsigned i, ExecType et = Commit); + void notpending(const Lst& lst, ExecType et = Commit); void calc(Par par, unsigned i); int insrow(Par par, unsigned i); int updrow(Par par, unsigned i); @@ -1775,23 +1794,30 @@ Set::putval(unsigned i, bool force) } void -Set::notpending(unsigned i) +Set::notpending(unsigned i, ExecType et) { assert(m_row[i] != 0); Row& row = *m_row[i]; - if (row.m_pending == Row::InsOp) - row.m_exist = true; - if (row.m_pending == Row::DelOp) - row.m_exist = false; + if (et == Commit) { + if (row.m_pending == Row::InsOp) + row.m_exist = true; + if (row.m_pending == Row::DelOp) + row.m_exist = false; + } else { + if (row.m_pending == Row::InsOp) + row.m_exist = false; + if (row.m_pending == Row::DelOp) + row.m_exist = true; + } row.m_pending = Row::NoOp; } void -Set::notpending(const Lst& lst) +Set::notpending(const Lst& lst, ExecType et) { for (unsigned j = 0; j < lst.m_cnt; j++) { unsigned i = lst.m_arr[j]; - notpending(i); + notpending(i, et); } } @@ -2121,14 +2147,20 @@ pkinsert(Par par) lst.push(i); if (lst.cnt() == par.m_batch) { bool deadlock = par.m_deadlock; - CHK(con.execute(Commit, deadlock) == 0); + bool nospace = true; + ExecType et = randompct(par.m_abortpct) ? Rollback : Commit; + CHK(con.execute(et, deadlock, nospace) == 0); con.closeTransaction(); if (deadlock) { LL1("pkinsert: stop on deadlock"); return 0; } + if (nospace) { + LL1("pkinsert: cnt=" << j << " stop on nospace"); + return 0; + } set.lock(); - set.notpending(lst); + set.notpending(lst, et); set.unlock(); lst.reset(); CHK(con.startTransaction() == 0); @@ -2136,14 +2168,20 @@ pkinsert(Par par) } if (lst.cnt() != 0) { bool deadlock = par.m_deadlock; - CHK(con.execute(Commit, deadlock) == 0); + bool nospace = true; + ExecType et = randompct(par.m_abortpct) ? Rollback : Commit; + CHK(con.execute(et, deadlock, nospace) == 0); con.closeTransaction(); if (deadlock) { LL1("pkinsert: stop on deadlock"); return 0; } + if (nospace) { + LL1("pkinsert: end: stop on nospace"); + return 0; + } set.lock(); - set.notpending(lst); + set.notpending(lst, et); set.unlock(); return 0; } @@ -2160,6 +2198,7 @@ pkupdate(Par par) CHK(con.startTransaction() == 0); Lst lst; bool deadlock = false; + bool nospace = false; for (unsigned j = 0; j < par.m_rows; j++) { unsigned j2 = ! par.m_randomkey ? j : urandom(par.m_rows); unsigned i = thrrow(par, j2); @@ -2175,27 +2214,37 @@ pkupdate(Par par) lst.push(i); if (lst.cnt() == par.m_batch) { deadlock = par.m_deadlock; - CHK(con.execute(Commit, deadlock) == 0); + nospace = true; + ExecType et = randompct(par.m_abortpct) ? Rollback : Commit; + CHK(con.execute(et, deadlock, nospace) == 0); if (deadlock) { LL1("pkupdate: stop on deadlock"); break; } + if (nospace) { + LL1("pkupdate: cnt=" << j << " stop on nospace"); + break; + } con.closeTransaction(); set.lock(); - set.notpending(lst); + set.notpending(lst, et); set.unlock(); lst.reset(); CHK(con.startTransaction() == 0); } } - if (! deadlock && lst.cnt() != 0) { + if (! deadlock && ! nospace && lst.cnt() != 0) { deadlock = par.m_deadlock; - CHK(con.execute(Commit, deadlock) == 0); + nospace = true; + ExecType et = randompct(par.m_abortpct) ? Rollback : Commit; + CHK(con.execute(et, deadlock, nospace) == 0); if (deadlock) { LL1("pkupdate: stop on deadlock"); + } else if (nospace) { + LL1("pkupdate: end: stop on nospace"); } else { set.lock(); - set.notpending(lst); + set.notpending(lst, et); set.unlock(); } } @@ -2212,6 +2261,7 @@ pkdelete(Par par) CHK(con.startTransaction() == 0); Lst lst; bool deadlock = false; + bool nospace = false; for (unsigned j = 0; j < par.m_rows; j++) { unsigned j2 = ! par.m_randomkey ? j : urandom(par.m_rows); unsigned i = thrrow(par, j2); @@ -2226,27 +2276,31 @@ pkdelete(Par par) lst.push(i); if (lst.cnt() == par.m_batch) { deadlock = par.m_deadlock; - CHK(con.execute(Commit, deadlock) == 0); + nospace = true; + ExecType et = randompct(par.m_abortpct) ? Rollback : Commit; + CHK(con.execute(et, deadlock, nospace) == 0); if (deadlock) { LL1("pkdelete: stop on deadlock"); break; } con.closeTransaction(); set.lock(); - set.notpending(lst); + set.notpending(lst, et); set.unlock(); lst.reset(); CHK(con.startTransaction() == 0); } } - if (! deadlock && lst.cnt() != 0) { + if (! deadlock && ! nospace && lst.cnt() != 0) { deadlock = par.m_deadlock; - CHK(con.execute(Commit, deadlock) == 0); + nospace = true; + ExecType et = randompct(par.m_abortpct) ? Rollback : Commit; + CHK(con.execute(et, deadlock, nospace) == 0); if (deadlock) { LL1("pkdelete: stop on deadlock"); } else { set.lock(); - set.notpending(lst); + set.notpending(lst, et); set.unlock(); } } @@ -2730,6 +2784,10 @@ readverify(Par par) if (par.m_noverify) return 0; par.m_verify = true; + if (par.m_abortpct != 0) { + LL2("skip verify in this version"); // implement in 5.0 version + par.m_verify = false; + } CHK(pkread(par) == 0); CHK(scanreadall(par) == 0); return 0; @@ -3028,11 +3086,11 @@ runstep(Par par, const char* fname, TFunc func, unsigned mode) for (n = 0; n < threads; n++) { LL4("start " << n); Thr& thr = *g_thrlist[n]; - thr.m_par.m_tab = par.m_tab; - thr.m_par.m_set = par.m_set; - thr.m_par.m_tmr = par.m_tmr; - thr.m_par.m_lno = par.m_lno; - thr.m_par.m_slno = par.m_slno; + Par oldpar = thr.m_par; + // update parameters + thr.m_par = par; + thr.m_par.m_no = oldpar.m_no; + thr.m_par.m_con = oldpar.m_con; thr.m_func = func; thr.start(); } @@ -3143,6 +3201,24 @@ tbusybuild(Par par) return 0; } +static int +trollback(Par par) +{ + par.m_abortpct = 50; + RUNSTEP(par, droptable, ST); + RUNSTEP(par, createtable, ST); + RUNSTEP(par, invalidatetable, MT); + RUNSTEP(par, pkinsert, MT); + RUNSTEP(par, createindex, ST); + RUNSTEP(par, invalidateindex, MT); + RUNSTEP(par, readverify, ST); + for (par.m_slno = 0; par.m_slno < par.m_subloop; par.m_slno++) { + RUNSTEP(par, mixedoperations, MT); + RUNSTEP(par, readverify, ST); + } + return 0; +} + static int ttimebuild(Par par) { @@ -3252,10 +3328,12 @@ struct TCase { static const TCase tcaselist[] = { TCase("a", tbuild, "index build"), - TCase("b", tpkops, "pk operations"), - TCase("c", tpkopsread, "pk operations and scan reads"), - TCase("d", tmixedops, "pk operations and scan operations"), - TCase("e", tbusybuild, "pk operations and index build"), + // "b" in 5.0 + TCase("c", tpkops, "pk operations"), + TCase("d", tpkopsread, "pk operations and scan reads"), + TCase("e", tmixedops, "pk operations and scan operations"), + TCase("f", tbusybuild, "pk operations and index build"), + TCase("g", trollback, "operations with random rollbacks"), TCase("t", ttimebuild, "time index build"), TCase("u", ttimemaint, "time index maintenance"), TCase("v", ttimescan, "time full scan table vs index on pk"),