From 39e750d949b203e2ebd282ac9e6ff3145761b5cf Mon Sep 17 00:00:00 2001 From: "joreland@mysql.com" <> Date: Tue, 12 Apr 2005 17:54:34 +0200 Subject: [PATCH 1/7] bug#9749 - ndb lock upgrade handle more cases... --- ndb/src/kernel/blocks/dbacc/Dbacc.hpp | 2 + ndb/src/kernel/blocks/dbacc/DbaccMain.cpp | 143 +++++++++++++++++++++- ndb/test/ndbapi/testOperations.cpp | 72 +++++++++-- 3 files changed, 204 insertions(+), 13 deletions(-) diff --git a/ndb/src/kernel/blocks/dbacc/Dbacc.hpp b/ndb/src/kernel/blocks/dbacc/Dbacc.hpp index 64b947b5462..aaa4aca7b00 100644 --- a/ndb/src/kernel/blocks/dbacc/Dbacc.hpp +++ b/ndb/src/kernel/blocks/dbacc/Dbacc.hpp @@ -1100,6 +1100,8 @@ private: Uint32 executeNextOperation(Signal* signal); void releaselock(Signal* signal); void takeOutFragWaitQue(Signal* signal); + void check_lock_upgrade(Signal* signal, OperationrecPtr lock_owner, + OperationrecPtr release_op); void allocOverflowPage(Signal* signal); bool getrootfragmentrec(Signal* signal, RootfragmentrecPtr&, Uint32 fragId); void insertLockOwnersList(Signal* signal, const OperationrecPtr&); diff --git a/ndb/src/kernel/blocks/dbacc/DbaccMain.cpp b/ndb/src/kernel/blocks/dbacc/DbaccMain.cpp index 17c5a31cbed..28956de198c 100644 --- a/ndb/src/kernel/blocks/dbacc/DbaccMain.cpp +++ b/ndb/src/kernel/blocks/dbacc/DbaccMain.cpp @@ -5802,9 +5802,148 @@ void Dbacc::commitOperation(Signal* signal) ptrCheckGuard(tolqTmpPtr, coprecsize, operationrec); tolqTmpPtr.p->prevParallelQue = operationRecPtr.p->prevParallelQue; }//if - }//if + + /** + * Check possible lock upgrade + * 1) Find lock owner + * 2) Count transactions in parallel que + * 3) If count == 1 and TRANSID(next serial) == TRANSID(lock owner) + * upgrade next serial + */ + if(operationRecPtr.p->lockMode) + { + jam(); + /** + * Committing a non shared operation can't lead to lock upgrade + */ + return; + } + + OperationrecPtr lock_owner; + lock_owner.i = operationRecPtr.p->prevParallelQue; + ptrCheckGuard(lock_owner, coprecsize, operationrec); + Uint32 transid[2] = { lock_owner.p->transId1, + lock_owner.p->transId2 }; + + + while(lock_owner.p->prevParallelQue != RNIL) + { + lock_owner.i = lock_owner.p->prevParallelQue; + ptrCheckGuard(lock_owner, coprecsize, operationrec); + + if(lock_owner.p->transId1 != transid[0] || + lock_owner.p->transId2 != transid[1]) + { + jam(); + /** + * If more than 1 trans in lock queue -> no lock upgrade + */ + return; + } + } + + check_lock_upgrade(signal, lock_owner, operationRecPtr); + } }//Dbacc::commitOperation() +void +Dbacc::check_lock_upgrade(Signal* signal, + OperationrecPtr lock_owner, + OperationrecPtr release_op) +{ + if((lock_owner.p->transId1 == release_op.p->transId1 && + lock_owner.p->transId2 == release_op.p->transId2) || + release_op.p->lockMode || + lock_owner.p->nextSerialQue == RNIL) + { + jam(); + /** + * No lock upgrade if same trans or lock owner has no serial queue + * or releasing non shared op + */ + return; + } + + OperationrecPtr next; + next.i = lock_owner.p->nextSerialQue; + ptrCheckGuard(next, coprecsize, operationrec); + + if(lock_owner.p->transId1 != next.p->transId1 || + lock_owner.p->transId2 != next.p->transId2) + { + jam(); + /** + * No lock upgrad if !same trans in serial queue + */ + return; + } + + tgnptMainOpPtr = lock_owner; + getNoParallelTransaction(signal); + if (tgnptNrTransaction > 1) + { + jam(); + /** + * No lock upgrade if more than 1 transaction in parallell queue + */ + return; + } + + OperationrecPtr tmp; + tmp.i = lock_owner.p->nextSerialQue = next.p->nextSerialQue; + if(tmp.i != RNIL) + { + ptrCheckGuard(tmp, coprecsize, operationrec); + ndbassert(tmp.p->prevSerialQue == next.i); + tmp.p->prevSerialQue = lock_owner.i; + } + next.p->nextSerialQue = next.p->prevSerialQue = RNIL; + + // Find end of parallell que + tmp = lock_owner; + tmp.p->lockMode= 1; + while(tmp.p->nextParallelQue != RNIL) + { + jam(); + tmp.i = tmp.p->nextParallelQue; + ptrCheckGuard(tmp, coprecsize, operationrec); + tmp.p->lockMode= 1; + } + + next.p->prevParallelQue = tmp.i; + tmp.p->nextParallelQue = next.i; + + OperationrecPtr save = operationRecPtr; + + Uint32 TelementIsDisappeared = 0; // lock upgrade = all reads + Uint32 ThashValue = lock_owner.p->hashValue; + Uint32 localdata[2]; + localdata[0] = lock_owner.p->localdata[0]; + localdata[1] = lock_owner.p->localdata[1]; + do { + next.p->elementIsDisappeared = TelementIsDisappeared; + next.p->hashValue = ThashValue; + next.p->localdata[0] = localdata[0]; + next.p->localdata[1] = localdata[1]; + + operationRecPtr = next; + ndbassert(next.p->lockMode); + TelementIsDisappeared = executeNextOperation(signal); + if (next.p->nextParallelQue != RNIL) + { + jam(); + next.i = next.p->nextParallelQue; + ptrCheckGuard(next, coprecsize, operationrec); + } else { + jam(); + break; + }//if + } while (1); + + operationRecPtr = save; + +} + /* ------------------------------------------------------------------------- */ /* RELEASELOCK */ /* RESETS LOCK OF AN ELEMENT. */ @@ -5841,6 +5980,8 @@ void Dbacc::releaselock(Signal* signal) ptrCheckGuard(trlTmpOperPtr, coprecsize, operationrec); trlTmpOperPtr.p->prevSerialQue = trlOperPtr.i; }//if + + check_lock_upgrade(signal, copyInOperPtr, operationRecPtr); /* --------------------------------------------------------------------------------- */ /* SINCE THERE ARE STILL ITEMS IN THE PARALLEL QUEUE WE NEED NOT WORRY ABOUT */ /* STARTING QUEUED OPERATIONS. THUS WE CAN END HERE. */ diff --git a/ndb/test/ndbapi/testOperations.cpp b/ndb/test/ndbapi/testOperations.cpp index 9f1d5ee1191..773511a0475 100644 --- a/ndb/test/ndbapi/testOperations.cpp +++ b/ndb/test/ndbapi/testOperations.cpp @@ -547,21 +547,64 @@ runLockUpgrade1(NDBT_Context* ctx, NDBT_Step* step){ do { CHECK(hugoOps.startTransaction(pNdb) == 0); - CHECK(hugoOps.pkReadRecord(pNdb, 0, 1, NdbOperation::LM_Read) == 0); - CHECK(hugoOps.execute_NoCommit(pNdb) == 0); + if(ctx->getProperty("LOCK_UPGRADE", 1) == 1) + { + CHECK(hugoOps.pkReadRecord(pNdb, 0, 1, NdbOperation::LM_Read) == 0); + CHECK(hugoOps.execute_NoCommit(pNdb) == 0); - ctx->setProperty("READ_DONE", 1); - ctx->broadcast(); - ndbout_c("wait 2"); - ctx->getPropertyWait("READ_DONE", 2); - ndbout_c("wait 2 - done"); + ctx->setProperty("READ_DONE", 1); + ctx->broadcast(); + ndbout_c("wait 2"); + ctx->getPropertyWait("READ_DONE", 2); + ndbout_c("wait 2 - done"); + } + else + { + ctx->setProperty("READ_DONE", 1); + ctx->broadcast(); + ctx->getPropertyWait("READ_DONE", 2); + ndbout_c("wait 2 - done"); + CHECK(hugoOps.pkReadRecord(pNdb, 0, 1, NdbOperation::LM_Read) == 0); + CHECK(hugoOps.execute_NoCommit(pNdb) == 0); + } + if(ctx->getProperty("LU_OP", o_INS) == o_INS) + { + CHECK(hugoOps.pkDeleteRecord(pNdb, 0, 1) == 0); + CHECK(hugoOps.pkInsertRecord(pNdb, 0, 1, 2) == 0); + } + else if(ctx->getProperty("LU_OP", o_UPD) == o_UPD) + { + CHECK(hugoOps.pkUpdateRecord(pNdb, 0, 1, 2) == 0); + } + else + { + CHECK(hugoOps.pkDeleteRecord(pNdb, 0, 1) == 0); + } ctx->setProperty("READ_DONE", 3); ctx->broadcast(); ndbout_c("before update"); - CHECK(hugoOps.pkUpdateRecord(pNdb, 0, 1, 2) == 0); ndbout_c("wait update"); - CHECK(hugoOps.execute_NoCommit(pNdb) == 0); - CHECK(hugoOps.closeTransaction(pNdb)); + CHECK(hugoOps.execute_Commit(pNdb) == 0); + CHECK(hugoOps.closeTransaction(pNdb) == 0); + + CHECK(hugoOps.startTransaction(pNdb) == 0); + CHECK(hugoOps.pkReadRecord(pNdb, 0, 1) == 0); + int res= hugoOps.execute_Commit(pNdb); + if(ctx->getProperty("LU_OP", o_INS) == o_INS) + { + CHECK(res == 0); + CHECK(hugoOps.verifyUpdatesValue(2) == 0); + } + else if(ctx->getProperty("LU_OP", o_UPD) == o_UPD) + { + CHECK(res == 0); + CHECK(hugoOps.verifyUpdatesValue(2) == 0); + } + else + { + CHECK(res == 626); + } + } while(0); return result; @@ -592,10 +635,10 @@ runLockUpgrade2(NDBT_Context* ctx, NDBT_Step* step){ ndbout_c("wait 3 - done"); NdbSleep_MilliSleep(200); - CHECK(hugoOps.execute_Commit(pNdb) == 0); + CHECK(hugoOps.execute_Commit(pNdb) == 0); } while(0); - return NDBT_FAILED; + return result; } int @@ -607,11 +650,16 @@ main(int argc, const char** argv){ NDBT_TestSuite ts("testOperations"); + for(Uint32 i = 0; i <6; i++) { BaseString name("bug_9749"); + name.appfmt("_%d", i); NDBT_TestCaseImpl1 *pt = new NDBT_TestCaseImpl1(&ts, name.c_str(), ""); + pt->setProperty("LOCK_UPGRADE", 1 + (i & 1)); + pt->setProperty("LU_OP", 1 + (i >> 1)); + pt->addInitializer(new NDBT_Initializer(pt, "runClearTable", runClearTable)); From 45a07db5c01937b37c4d62b7bdcd024740d87e6d Mon Sep 17 00:00:00 2001 From: "joreland@mysql.com" <> Date: Wed, 13 Apr 2005 09:54:40 +0200 Subject: [PATCH 2/7] BUG#9749 - ndb lock upgrade - more fixes... 1) Make getNoParall into function instead of a procedure 2) Check for multiple transactions in "upgrade's" parallell queue 3) Set lock mode according to lock_owner's lockMode NOTE: Does still not handle lock upgrade in case of aborts correctly --- ndb/src/kernel/blocks/dbacc/Dbacc.hpp | 4 +- ndb/src/kernel/blocks/dbacc/DbaccMain.cpp | 69 +++++++++++------------ 2 files changed, 34 insertions(+), 39 deletions(-) diff --git a/ndb/src/kernel/blocks/dbacc/Dbacc.hpp b/ndb/src/kernel/blocks/dbacc/Dbacc.hpp index aaa4aca7b00..246afc5ceb8 100644 --- a/ndb/src/kernel/blocks/dbacc/Dbacc.hpp +++ b/ndb/src/kernel/blocks/dbacc/Dbacc.hpp @@ -1022,7 +1022,7 @@ private: Uint32 placeReadInLockQueue(Signal* signal); void placeSerialQueueRead(Signal* signal); void checkOnlyReadEntry(Signal* signal); - void getNoParallelTransaction(Signal* signal); + Uint32 getNoParallelTransaction(const Operationrec*); void moveLastParallelQueue(Signal* signal); void moveLastParallelQueueWrite(Signal* signal); Uint32 placeWriteInLockQueue(Signal* signal); @@ -1265,7 +1265,6 @@ private: OperationrecPtr mlpqOperPtr; OperationrecPtr queOperPtr; OperationrecPtr readWriteOpPtr; - OperationrecPtr tgnptMainOpPtr; Uint32 cfreeopRec; Uint32 coprecsize; /* --------------------------------------------------------------------------------- */ @@ -1516,7 +1515,6 @@ private: Uint32 turlIndex; Uint32 tlfrTmp1; Uint32 tlfrTmp2; - Uint32 tgnptNrTransaction; Uint32 tudqeIndex; Uint32 tscanTrid1; Uint32 tscanTrid2; diff --git a/ndb/src/kernel/blocks/dbacc/DbaccMain.cpp b/ndb/src/kernel/blocks/dbacc/DbaccMain.cpp index 28956de198c..cdb9091da42 100644 --- a/ndb/src/kernel/blocks/dbacc/DbaccMain.cpp +++ b/ndb/src/kernel/blocks/dbacc/DbaccMain.cpp @@ -1936,9 +1936,7 @@ void Dbacc::insertelementLab(Signal* signal) /* --------------------------------------------------------------------------------- */ Uint32 Dbacc::placeReadInLockQueue(Signal* signal) { - tgnptMainOpPtr = queOperPtr; - getNoParallelTransaction(signal); - if (tgnptNrTransaction == 1) { + if (getNoParallelTransaction(queOperPtr.p) == 1) { if ((queOperPtr.p->transId1 == operationRecPtr.p->transId1) && (queOperPtr.p->transId2 == operationRecPtr.p->transId2)) { /* --------------------------------------------------------------------------------- */ @@ -2021,9 +2019,7 @@ void Dbacc::placeSerialQueueRead(Signal* signal) checkOnlyReadEntry(signal); return; }//if - tgnptMainOpPtr = readWriteOpPtr; - getNoParallelTransaction(signal); - if (tgnptNrTransaction == 1) { + if (getNoParallelTransaction(readWriteOpPtr.p) == 1) { jam(); /* --------------------------------------------------------------------------------- */ /* THERE WAS ONLY ONE TRANSACTION INVOLVED IN THE PARALLEL QUEUE. IF THIS IS OUR */ @@ -2104,24 +2100,23 @@ void Dbacc::checkOnlyReadEntry(Signal* signal) /* --------------------------------------------------------------------------------- */ /* GET_NO_PARALLEL_TRANSACTION */ /* --------------------------------------------------------------------------------- */ -void Dbacc::getNoParallelTransaction(Signal* signal) +Uint32 +Dbacc::getNoParallelTransaction(const Operationrec * op) { - OperationrecPtr tnptOpPtr; - - tgnptNrTransaction = 1; - tnptOpPtr.i = tgnptMainOpPtr.p->nextParallelQue; - while ((tnptOpPtr.i != RNIL) && - (tgnptNrTransaction == 1)) { + OperationrecPtr tmp; + + tmp.i= op->nextParallelQue; + Uint32 transId[2] = { op->transId1, op->transId2 }; + while (tmp.i != RNIL) + { jam(); - ptrCheckGuard(tnptOpPtr, coprecsize, operationrec); - if ((tnptOpPtr.p->transId1 == tgnptMainOpPtr.p->transId1) && - (tnptOpPtr.p->transId2 == tgnptMainOpPtr.p->transId2)) { - tnptOpPtr.i = tnptOpPtr.p->nextParallelQue; - } else { - jam(); - tgnptNrTransaction++; - }//if - }//while + ptrCheckGuard(tmp, coprecsize, operationrec); + if (tmp.p->transId1 == transId[0] && tmp.p->transId2 == transId[1]) + tmp.i = tmp.p->nextParallelQue; + else + return 2; + } + return 1; }//Dbacc::getNoParallelTransaction() void Dbacc::moveLastParallelQueue(Signal* signal) @@ -2162,9 +2157,7 @@ void Dbacc::moveLastParallelQueueWrite(Signal* signal) /* --------------------------------------------------------------------------------- */ Uint32 Dbacc::placeWriteInLockQueue(Signal* signal) { - tgnptMainOpPtr = queOperPtr; - getNoParallelTransaction(signal); - if (!((tgnptNrTransaction == 1) && + if (!((getNoParallelTransaction(queOperPtr.p) == 1) && (queOperPtr.p->transId1 == operationRecPtr.p->transId1) && (queOperPtr.p->transId2 == operationRecPtr.p->transId2))) { jam(); @@ -2215,9 +2208,7 @@ void Dbacc::placeSerialQueueWrite(Signal* signal) }//if readWriteOpPtr.i = readWriteOpPtr.p->nextSerialQue; ptrCheckGuard(readWriteOpPtr, coprecsize, operationrec); - tgnptMainOpPtr = readWriteOpPtr; - getNoParallelTransaction(signal); - if (tgnptNrTransaction == 1) { + if (getNoParallelTransaction(readWriteOpPtr.p) == 1) { /* --------------------------------------------------------------------------------- */ /* THERE WAS ONLY ONE TRANSACTION INVOLVED IN THE PARALLEL QUEUE. IF THIS IS OUR */ /* TRANSACTION WE CAN STILL GET HOLD OF THE LOCK. */ @@ -5878,9 +5869,7 @@ Dbacc::check_lock_upgrade(Signal* signal, return; } - tgnptMainOpPtr = lock_owner; - getNoParallelTransaction(signal); - if (tgnptNrTransaction > 1) + if (getNoParallelTransaction(lock_owner.p) > 1) { jam(); /** @@ -5888,6 +5877,15 @@ Dbacc::check_lock_upgrade(Signal* signal, */ return; } + + if (getNoParallelTransaction(next.p) > 1) + { + jam(); + /** + * No lock upgrade if more than 1 transaction in next's parallell queue + */ + return; + } OperationrecPtr tmp; tmp.i = lock_owner.p->nextSerialQue = next.p->nextSerialQue; @@ -5901,20 +5899,19 @@ Dbacc::check_lock_upgrade(Signal* signal, // Find end of parallell que tmp = lock_owner; - tmp.p->lockMode= 1; while(tmp.p->nextParallelQue != RNIL) { jam(); tmp.i = tmp.p->nextParallelQue; ptrCheckGuard(tmp, coprecsize, operationrec); - tmp.p->lockMode= 1; } next.p->prevParallelQue = tmp.i; tmp.p->nextParallelQue = next.i; OperationrecPtr save = operationRecPtr; - + Uint32 lockMode = lock_owner.p->lockMode; + Uint32 TelementIsDisappeared = 0; // lock upgrade = all reads Uint32 ThashValue = lock_owner.p->hashValue; Uint32 localdata[2]; @@ -5927,7 +5924,7 @@ Dbacc::check_lock_upgrade(Signal* signal, next.p->localdata[1] = localdata[1]; operationRecPtr = next; - ndbassert(next.p->lockMode); + next.p->lockMode = lockMode; TelementIsDisappeared = executeNextOperation(signal); if (next.p->nextParallelQue != RNIL) { @@ -5941,7 +5938,7 @@ Dbacc::check_lock_upgrade(Signal* signal, } while (1); operationRecPtr = save; - + } /* ------------------------------------------------------------------------- */ From 27f7a6c41bb9bd786d16e2b24fce66b16a9d99b5 Mon Sep 17 00:00:00 2001 From: "joreland@mysql.com" <> Date: Thu, 14 Apr 2005 13:43:07 +0200 Subject: [PATCH 3/7] BUG#9891 - ndb lcp Crash if ACC_CONTOPREQ was sent while ACC_LCPCONF was in job buffer if ACC_LCPCONF would have arrived eariler (before TUP_LCPSTARTED) operations could lockup. But would be restarted on next LCP -- LQH 1) Better check for LCP started that will also return true if ACC or TUP already has completed 2) Remove incorrect if statement that prevented operations to be started if ACC has completed -- ACC Make sure all ACC_CONTOPCONF are sent before releasing lcp record i.e. use noOfLcpConf == 4 (2 ACC_LCPCONF + 2 ACC_CONTOPCONF) Check for == 4 also when sending ACC_CONTOPCONF --- ndb/src/kernel/blocks/dbacc/DbaccMain.cpp | 20 ++++++- ndb/src/kernel/blocks/dblqh/Dblqh.hpp | 3 +- ndb/src/kernel/blocks/dblqh/DblqhMain.cpp | 66 +++++++++-------------- 3 files changed, 44 insertions(+), 45 deletions(-) diff --git a/ndb/src/kernel/blocks/dbacc/DbaccMain.cpp b/ndb/src/kernel/blocks/dbacc/DbaccMain.cpp index cdb9091da42..d566639489c 100644 --- a/ndb/src/kernel/blocks/dbacc/DbaccMain.cpp +++ b/ndb/src/kernel/blocks/dbacc/DbaccMain.cpp @@ -8486,7 +8486,7 @@ void Dbacc::checkSendLcpConfLab(Signal* signal) break; }//switch lcpConnectptr.p->noOfLcpConf++; - ndbrequire(lcpConnectptr.p->noOfLcpConf <= 2); + ndbrequire(lcpConnectptr.p->noOfLcpConf <= 4); fragrecptr.p->fragState = ACTIVEFRAG; rlpPageptr.i = fragrecptr.p->zeroPagePtr; ptrCheckGuard(rlpPageptr, cpagesize, page8); @@ -8504,7 +8504,7 @@ void Dbacc::checkSendLcpConfLab(Signal* signal) }//for signal->theData[0] = fragrecptr.p->lcpLqhPtr; sendSignal(lcpConnectptr.p->lcpUserblockref, GSN_ACC_LCPCONF, signal, 1, JBB); - if (lcpConnectptr.p->noOfLcpConf == 2) { + if (lcpConnectptr.p->noOfLcpConf == 4) { jam(); releaseLcpConnectRec(signal); rootfragrecptr.i = fragrecptr.p->myroot; @@ -8535,6 +8535,13 @@ void Dbacc::execACC_CONTOPREQ(Signal* signal) /* LOCAL FRAG ID */ tresult = 0; ptrCheckGuard(lcpConnectptr, clcpConnectsize, lcpConnectrec); + if(ERROR_INSERTED(3002) && lcpConnectptr.p->noOfLcpConf < 2) + { + sendSignalWithDelay(cownBlockref, GSN_ACC_CONTOPREQ, signal, 300, + signal->getLength()); + return; + } + ndbrequire(lcpConnectptr.p->lcpstate == LCP_ACTIVE); rootfragrecptr.i = lcpConnectptr.p->rootrecptr; ptrCheckGuard(rootfragrecptr, crootfragmentsize, rootfragmentrec); @@ -8568,6 +8575,15 @@ void Dbacc::execACC_CONTOPREQ(Signal* signal) }//while signal->theData[0] = fragrecptr.p->lcpLqhPtr; sendSignal(lcpConnectptr.p->lcpUserblockref, GSN_ACC_CONTOPCONF, signal, 1, JBA); + + lcpConnectptr.p->noOfLcpConf++; + if (lcpConnectptr.p->noOfLcpConf == 4) { + jam(); + releaseLcpConnectRec(signal); + rootfragrecptr.i = fragrecptr.p->myroot; + ptrCheckGuard(rootfragrecptr, crootfragmentsize, rootfragmentrec); + rootfragrecptr.p->rootState = ACTIVEROOT; + }//if return; /* ALL QUEUED OPERATION ARE RESTARTED IF NEEDED. */ }//Dbacc::execACC_CONTOPREQ() diff --git a/ndb/src/kernel/blocks/dblqh/Dblqh.hpp b/ndb/src/kernel/blocks/dblqh/Dblqh.hpp index 0c63cb5fe17..19e055a3011 100644 --- a/ndb/src/kernel/blocks/dblqh/Dblqh.hpp +++ b/ndb/src/kernel/blocks/dblqh/Dblqh.hpp @@ -968,7 +968,6 @@ public: enum LcpState { LCP_IDLE = 0, - LCP_STARTED = 1, LCP_COMPLETED = 2, LCP_WAIT_FRAGID = 3, LCP_WAIT_TUP_PREPLCP = 4, @@ -2266,7 +2265,7 @@ private: void sendCopyActiveConf(Signal* signal,Uint32 tableId); void checkLcpCompleted(Signal* signal); void checkLcpHoldop(Signal* signal); - void checkLcpStarted(Signal* signal); + bool checkLcpStarted(Signal* signal); void checkLcpTupprep(Signal* signal); void getNextFragForLcp(Signal* signal); void initLcpLocAcc(Signal* signal, Uint32 fragId); diff --git a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp index c79f4dfc6c7..27f995750b6 100644 --- a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp +++ b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp @@ -10351,8 +10351,8 @@ void Dblqh::execTUP_LCPSTARTED(Signal* signal) void Dblqh::lcpStartedLab(Signal* signal) { - checkLcpStarted(signal); - if (lcpPtr.p->lcpState == LcpRecord::LCP_STARTED) { + if (checkLcpStarted(signal)) + { jam(); /* ---------------------------------------------------------------------- * THE LOCAL CHECKPOINT HAS BEEN STARTED. IT IS NOW TIME TO @@ -10432,26 +10432,7 @@ void Dblqh::execLQH_RESTART_OP(Signal* signal) lcpPtr.i = signal->theData[1]; ptrCheckGuard(lcpPtr, clcpFileSize, lcpRecord); ndbrequire(fragptr.p->fragStatus == Fragrecord::BLOCKED); - if (lcpPtr.p->lcpState == LcpRecord::LCP_STARTED) { - jam(); - /***********************************************************************/ - /* THIS SIGNAL CAN ONLY BE RECEIVED WHEN FRAGMENT IS BLOCKED AND - * THE LOCAL CHECKPOINT HAS BEEN STARTED. THE BLOCKING WILL BE - * REMOVED AS SOON AS ALL OPERATIONS HAVE BEEN STARTED. - ***********************************************************************/ - restartOperationsLab(signal); - } else if (lcpPtr.p->lcpState == LcpRecord::LCP_BLOCKED_COMP) { - jam(); - /*******************************************************************> - * THE CHECKPOINT IS COMPLETED BUT HAS NOT YET STARTED UP - * ALL OPERATIONS AGAIN. - * WE PERFORM THIS START-UP BEFORE CONTINUING WITH THE NEXT - * FRAGMENT OF THE LOCAL CHECKPOINT TO AVOID ANY STRANGE ERRORS. - *******************************************************************> */ - restartOperationsLab(signal); - } else { - ndbrequire(false); - } + restartOperationsLab(signal); }//Dblqh::execLQH_RESTART_OP() void Dblqh::restartOperationsLab(Signal* signal) @@ -11000,7 +10981,8 @@ void Dblqh::checkLcpHoldop(Signal* signal) * * SUBROUTINE SHORT NAME = CLS * ========================================================================== */ -void Dblqh::checkLcpStarted(Signal* signal) +bool +Dblqh::checkLcpStarted(Signal* signal) { LcpLocRecordPtr clsLcpLocptr; @@ -11010,7 +10992,7 @@ void Dblqh::checkLcpStarted(Signal* signal) do { ptrCheckGuard(clsLcpLocptr, clcpLocrecFileSize, lcpLocRecord); if (clsLcpLocptr.p->lcpLocstate == LcpLocRecord::ACC_WAIT_STARTED){ - return; + return false; }//if clsLcpLocptr.i = clsLcpLocptr.p->nextLcpLoc; i++; @@ -11021,12 +11003,13 @@ void Dblqh::checkLcpStarted(Signal* signal) do { ptrCheckGuard(clsLcpLocptr, clcpLocrecFileSize, lcpLocRecord); if (clsLcpLocptr.p->lcpLocstate == LcpLocRecord::TUP_WAIT_STARTED){ - return; + return false; }//if clsLcpLocptr.i = clsLcpLocptr.p->nextLcpLoc; i++; } while (clsLcpLocptr.i != RNIL); - lcpPtr.p->lcpState = LcpRecord::LCP_STARTED; + + return true; }//Dblqh::checkLcpStarted() /* ========================================================================== @@ -11187,20 +11170,12 @@ void Dblqh::sendAccContOp(Signal* signal) do { ptrCheckGuard(sacLcpLocptr, clcpLocrecFileSize, lcpLocRecord); sacLcpLocptr.p->accContCounter = 0; - if(sacLcpLocptr.p->lcpLocstate == LcpLocRecord::ACC_STARTED){ - /* ------------------------------------------------------------------- */ - /*SEND START OPERATIONS TO ACC AGAIN */ - /* ------------------------------------------------------------------- */ - signal->theData[0] = lcpPtr.p->lcpAccptr; - signal->theData[1] = sacLcpLocptr.p->locFragid; - sendSignal(fragptr.p->accBlockref, GSN_ACC_CONTOPREQ, signal, 2, JBA); - count++; - } else if(sacLcpLocptr.p->lcpLocstate == LcpLocRecord::ACC_COMPLETED){ - signal->theData[0] = sacLcpLocptr.i; - sendSignal(reference(), GSN_ACC_CONTOPCONF, signal, 1, JBB); - } else { - ndbrequire(false); - } + /* ------------------------------------------------------------------- */ + /*SEND START OPERATIONS TO ACC AGAIN */ + /* ------------------------------------------------------------------- */ + signal->theData[0] = lcpPtr.p->lcpAccptr; + signal->theData[1] = sacLcpLocptr.p->locFragid; + sendSignal(fragptr.p->accBlockref, GSN_ACC_CONTOPREQ, signal, 2, JBA); sacLcpLocptr.i = sacLcpLocptr.p->nextLcpLoc; } while (sacLcpLocptr.i != RNIL); @@ -11236,9 +11211,18 @@ void Dblqh::sendStartLcp(Signal* signal) signal->theData[0] = stlLcpLocptr.i; signal->theData[1] = cownref; signal->theData[2] = stlLcpLocptr.p->tupRef; - sendSignal(fragptr.p->tupBlockref, GSN_TUP_LCPREQ, signal, 3, JBA); + if(ERROR_INSERTED(5077)) + sendSignalWithDelay(fragptr.p->tupBlockref, GSN_TUP_LCPREQ, + signal, 5000, 3); + else + sendSignal(fragptr.p->tupBlockref, GSN_TUP_LCPREQ, signal, 3, JBA); stlLcpLocptr.i = stlLcpLocptr.p->nextLcpLoc; } while (stlLcpLocptr.i != RNIL); + + if(ERROR_INSERTED(5077)) + { + ndbout_c("Delayed TUP_LCPREQ with 5 sec"); + } }//Dblqh::sendStartLcp() /* ------------------------------------------------------------------------- */ From f931466f36c0038181c897e6a6e70d046e9d216e Mon Sep 17 00:00:00 2001 From: "joreland@mysql.com" <> Date: Mon, 18 Apr 2005 12:41:12 +0200 Subject: [PATCH 4/7] bug#9892 Make BUILDINDX RF_LOCAL aware --- ndb/src/kernel/blocks/dbdict/Dbdict.cpp | 37 +++++++++++++++++++------ ndb/src/ndbapi/Ndbif.cpp | 10 +++---- ndb/test/ndbapi/testIndex.cpp | 2 +- 3 files changed, 34 insertions(+), 15 deletions(-) diff --git a/ndb/src/kernel/blocks/dbdict/Dbdict.cpp b/ndb/src/kernel/blocks/dbdict/Dbdict.cpp index 7247b7e2b9c..184db794057 100644 --- a/ndb/src/kernel/blocks/dbdict/Dbdict.cpp +++ b/ndb/src/kernel/blocks/dbdict/Dbdict.cpp @@ -9811,11 +9811,20 @@ Dbdict::execBUILDINDXREQ(Signal* signal) requestType == BuildIndxReq::RT_ALTER_INDEX || requestType == BuildIndxReq::RT_SYSTEMRESTART) { jam(); + + const bool isLocal = req->getRequestFlag() & RequestFlag::RF_LOCAL; + NdbNodeBitmask receiverNodes = c_aliveNodes; + if (isLocal) { + receiverNodes.clear(); + receiverNodes.set(getOwnNodeId()); + } + if (signal->getLength() == BuildIndxReq::SignalLength) { jam(); - if (getOwnNodeId() != c_masterNodeId) { + + if (!isLocal && getOwnNodeId() != c_masterNodeId) { jam(); - + releaseSections(signal); OpBuildIndex opBad; opPtr.p = &opBad; @@ -9828,9 +9837,9 @@ Dbdict::execBUILDINDXREQ(Signal* signal) } // forward initial request plus operation key to all req->setOpKey(++c_opRecordSequence); - NodeReceiverGroup rg(DBDICT, c_aliveNodes); + NodeReceiverGroup rg(DBDICT, receiverNodes); sendSignal(rg, GSN_BUILDINDXREQ, - signal, BuildIndxReq::SignalLength + 1, JBB); + signal, BuildIndxReq::SignalLength + 1, JBB); return; } // seize operation record @@ -9853,7 +9862,7 @@ Dbdict::execBUILDINDXREQ(Signal* signal) } c_opBuildIndex.add(opPtr); // master expects to hear from all - opPtr.p->m_signalCounter = c_aliveNodes; + opPtr.p->m_signalCounter = receiverNodes; buildIndex_sendReply(signal, opPtr, false); return; } @@ -10208,10 +10217,20 @@ Dbdict::buildIndex_sendSlaveReq(Signal* signal, OpBuildIndexPtr opPtr) req->setConnectionPtr(opPtr.p->key); req->setRequestType(opPtr.p->m_requestType); req->addRequestFlag(opPtr.p->m_requestFlag); - opPtr.p->m_signalCounter = c_aliveNodes; - NodeReceiverGroup rg(DBDICT, c_aliveNodes); - sendSignal(rg, GSN_BUILDINDXREQ, - signal, BuildIndxReq::SignalLength, JBB); + if(opPtr.p->m_requestFlag & RequestFlag::RF_LOCAL) + { + opPtr.p->m_signalCounter.clearWaitingFor(); + opPtr.p->m_signalCounter.setWaitingFor(getOwnNodeId()); + sendSignal(reference(), GSN_BUILDINDXREQ, + signal, BuildIndxReq::SignalLength, JBB); + } + else + { + opPtr.p->m_signalCounter = c_aliveNodes; + NodeReceiverGroup rg(DBDICT, c_aliveNodes); + sendSignal(rg, GSN_BUILDINDXREQ, + signal, BuildIndxReq::SignalLength, JBB); + } } void diff --git a/ndb/src/ndbapi/Ndbif.cpp b/ndb/src/ndbapi/Ndbif.cpp index a4f233709c4..1caebe436ef 100644 --- a/ndb/src/ndbapi/Ndbif.cpp +++ b/ndb/src/ndbapi/Ndbif.cpp @@ -453,7 +453,7 @@ Ndb::handleReceivedSignal(NdbApiSignal* aSignal, LinearSectionPtr ptr[3]) tFirstDataPtr = int2void(tFirstData); if(tFirstDataPtr != 0){ tOp = void2rec_op(tFirstDataPtr); - if (tOp->checkMagicNumber() == 0) { + if (tOp->checkMagicNumber(false) == 0) { tCon = tOp->theNdbCon; if (tCon != NULL) { if ((tCon->theSendStatus == NdbConnection::sendTC_OP) || @@ -466,11 +466,11 @@ Ndb::handleReceivedSignal(NdbApiSignal* aSignal, LinearSectionPtr ptr[3]) }//if }//if }//if - } else { -#ifdef VM_TRACE - ndbout_c("Recevied TCKEY_FAILREF wo/ operation"); -#endif } +#ifdef VM_TRACE + ndbout_c("Recevied TCKEY_FAILREF wo/ operation"); +#endif + return; break; } case GSN_TCKEYREF: diff --git a/ndb/test/ndbapi/testIndex.cpp b/ndb/test/ndbapi/testIndex.cpp index 6623ad35a7f..d359f83257f 100644 --- a/ndb/test/ndbapi/testIndex.cpp +++ b/ndb/test/ndbapi/testIndex.cpp @@ -1329,7 +1329,7 @@ TESTCASE("NFNR2_O", INITIALIZER(runLoadTable); STEP(runRestarts); STEP(runTransactions2); - STEP(runTransactions2); + //STEP(runTransactions2); FINALIZER(runVerifyIndex); FINALIZER(createRandomIndex_Drop); FINALIZER(createPkIndex_Drop); From e6142c477c036a0d16e0fe812d6d238419c2b475 Mon Sep 17 00:00:00 2001 From: "joreland@mysql.com" <> Date: Fri, 22 Apr 2005 09:07:25 +0200 Subject: [PATCH 5/7] bug#9924 - ndb backup abort handling Redo abort handling according to descr. in Backup.txt bug#9960 - ndb backup increase wait completed timeout to 48 hours --- ndb/include/kernel/signaldata/BackupImpl.hpp | 12 +- .../kernel/signaldata/BackupSignalData.hpp | 3 + .../common/debugger/signaldata/BackupImpl.cpp | 6 +- ndb/src/kernel/blocks/backup/Backup.cpp | 1508 +++++++---------- ndb/src/kernel/blocks/backup/Backup.hpp | 36 +- ndb/src/kernel/blocks/backup/Backup.txt | 25 + ndb/src/kernel/blocks/backup/BackupInit.cpp | 2 +- ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp | 1 + ndb/src/mgmapi/mgmapi.cpp | 4 +- ndb/src/mgmsrv/MgmtSrvr.cpp | 29 +- ndb/src/mgmsrv/MgmtSrvr.hpp | 4 +- .../mgmsrv/MgmtSrvrGeneralSignalHandling.cpp | 6 +- ndb/src/ndbapi/ndberror.c | 5 +- ndb/test/ndbapi/testBackup.cpp | 14 +- ndb/test/run-test/daily-basic-tests.txt | 24 + ndb/test/src/NdbBackup.cpp | 46 +- 16 files changed, 758 insertions(+), 967 deletions(-) diff --git a/ndb/include/kernel/signaldata/BackupImpl.hpp b/ndb/include/kernel/signaldata/BackupImpl.hpp index 2ac91570aad..2032e2347b5 100644 --- a/ndb/include/kernel/signaldata/BackupImpl.hpp +++ b/ndb/include/kernel/signaldata/BackupImpl.hpp @@ -75,7 +75,7 @@ class DefineBackupRef { friend bool printDEFINE_BACKUP_REF(FILE *, const Uint32 *, Uint32, Uint16); public: - STATIC_CONST( SignalLength = 3 ); + STATIC_CONST( SignalLength = 4 ); enum ErrorCode { Undefined = 1340, @@ -92,6 +92,7 @@ private: Uint32 backupId; Uint32 backupPtr; Uint32 errorCode; + Uint32 nodeId; }; class DefineBackupConf { @@ -158,7 +159,7 @@ class StartBackupRef { friend bool printSTART_BACKUP_REF(FILE *, const Uint32 *, Uint32, Uint16); public: - STATIC_CONST( SignalLength = 4 ); + STATIC_CONST( SignalLength = 5 ); enum ErrorCode { FailedToAllocateTriggerRecord = 1 @@ -168,6 +169,7 @@ private: Uint32 backupPtr; Uint32 signalNo; Uint32 errorCode; + Uint32 nodeId; }; class StartBackupConf { @@ -232,9 +234,8 @@ public: private: Uint32 backupId; Uint32 backupPtr; - Uint32 tableId; - Uint32 fragmentNo; Uint32 errorCode; + Uint32 nodeId; }; class BackupFragmentConf { @@ -296,12 +297,13 @@ class StopBackupRef { friend bool printSTOP_BACKUP_REF(FILE *, const Uint32 *, Uint32, Uint16); public: - STATIC_CONST( SignalLength = 3 ); + STATIC_CONST( SignalLength = 4 ); private: Uint32 backupId; Uint32 backupPtr; Uint32 errorCode; + Uint32 nodeId; }; class StopBackupConf { diff --git a/ndb/include/kernel/signaldata/BackupSignalData.hpp b/ndb/include/kernel/signaldata/BackupSignalData.hpp index fb018026a49..b38dd8d14b2 100644 --- a/ndb/include/kernel/signaldata/BackupSignalData.hpp +++ b/ndb/include/kernel/signaldata/BackupSignalData.hpp @@ -240,6 +240,9 @@ public: FileOrScanError = 1325, // slave -> coordinator BackupFailureDueToNodeFail = 1326, // slave -> slave OkToClean = 1327 // master -> slave + + ,AbortScan = 1328 + ,IncompatibleVersions = 1329 }; private: Uint32 requestType; diff --git a/ndb/src/common/debugger/signaldata/BackupImpl.cpp b/ndb/src/common/debugger/signaldata/BackupImpl.cpp index bdc34d614cf..e9b0188d93b 100644 --- a/ndb/src/common/debugger/signaldata/BackupImpl.cpp +++ b/ndb/src/common/debugger/signaldata/BackupImpl.cpp @@ -90,10 +90,8 @@ printBACKUP_FRAGMENT_REQ(FILE * out, const Uint32 * data, Uint32 l, Uint16 bno){ bool printBACKUP_FRAGMENT_REF(FILE * out, const Uint32 * data, Uint32 l, Uint16 bno){ BackupFragmentRef* sig = (BackupFragmentRef*)data; - fprintf(out, " backupPtr: %d backupId: %d\n", - sig->backupPtr, sig->backupId); - fprintf(out, " tableId: %d fragmentNo: %d errorCode: %d\n", - sig->tableId, sig->fragmentNo, sig->errorCode); + fprintf(out, " backupPtr: %d backupId: %d nodeId: %d errorCode: %d\n", + sig->backupPtr, sig->backupId, sig->nodeId, sig->errorCode); return true; } diff --git a/ndb/src/kernel/blocks/backup/Backup.cpp b/ndb/src/kernel/blocks/backup/Backup.cpp index 2e62979ce8e..713991a4f58 100644 --- a/ndb/src/kernel/blocks/backup/Backup.cpp +++ b/ndb/src/kernel/blocks/backup/Backup.cpp @@ -67,31 +67,6 @@ static const Uint32 BACKUP_SEQUENCE = 0x1F000000; //#define DEBUG_ABORT -//--------------------------------------------------------- -// Ignore this since a completed abort could have preceded -// this message. -//--------------------------------------------------------- -#define slaveAbortCheck() \ -if ((ptr.p->backupId != backupId) || \ - (ptr.p->slaveState.getState() == ABORTING)) { \ - jam(); \ - return; \ -} - -#define masterAbortCheck() \ -if ((ptr.p->backupId != backupId) || \ - (ptr.p->masterData.state.getState() == ABORTING)) { \ - jam(); \ - return; \ -} - -#define defineSlaveAbortCheck() \ - if (ptr.p->slaveState.getState() == ABORTING) { \ - jam(); \ - closeFiles(signal, ptr); \ - return; \ - } - static Uint32 g_TypeOfStart = NodeState::ST_ILLEGAL_TYPE; void @@ -221,12 +196,7 @@ Backup::execCONTINUEB(Signal* signal) jam(); BackupRecordPtr ptr; c_backupPool.getPtr(ptr, Tdata1); - - if (ptr.p->slaveState.getState() == ABORTING) { - jam(); - closeFiles(signal, ptr); - return; - }//if + BackupFilePtr filePtr; ptr.p->files.getPtr(filePtr, ptr.p->ctlFilePtr); FsBuffer & buf = filePtr.p->operation.dataBuffer; @@ -324,13 +294,7 @@ Backup::execDUMP_STATE_ORD(Signal* signal) for(c_backups.first(ptr); ptr.i != RNIL; c_backups.next(ptr)){ infoEvent("BackupRecord %d: BackupId: %d MasterRef: %x ClientRef: %x", ptr.i, ptr.p->backupId, ptr.p->masterRef, ptr.p->clientRef); - if(ptr.p->masterRef == reference()){ - infoEvent(" MasterState: %d State: %d", - ptr.p->masterData.state.getState(), - ptr.p->slaveState.getState()); - } else { - infoEvent(" State: %d", ptr.p->slaveState.getState()); - } + infoEvent(" State: %d", ptr.p->slaveState.getState()); BackupFilePtr filePtr; for(ptr.p->files.first(filePtr); filePtr.i != RNIL; ptr.p->files.next(filePtr)){ @@ -338,7 +302,7 @@ Backup::execDUMP_STATE_ORD(Signal* signal) infoEvent(" file %d: type: %d open: %d running: %d done: %d scan: %d", filePtr.i, filePtr.p->fileType, filePtr.p->fileOpened, filePtr.p->fileRunning, - filePtr.p->fileDone, filePtr.p->scanRunning); + filePtr.p->fileClosing, filePtr.p->scanRunning); } } } @@ -356,6 +320,17 @@ Backup::execDUMP_STATE_ORD(Signal* signal) infoEvent("PagePool: %d", c_pagePool.getSize()); + + if(signal->getLength() == 2 && signal->theData[1] == 2424) + { + ndbrequire(c_tablePool.getSize() == c_tablePool.getNoOfFree()); + ndbrequire(c_attributePool.getSize() == c_attributePool.getNoOfFree()); + ndbrequire(c_backupPool.getSize() == c_backupPool.getNoOfFree()); + ndbrequire(c_backupFilePool.getSize() == c_backupFilePool.getNoOfFree()); + ndbrequire(c_pagePool.getSize() == c_pagePool.getNoOfFree()); + ndbrequire(c_fragmentPool.getSize() == c_fragmentPool.getNoOfFree()); + ndbrequire(c_triggerPool.getSize() == c_triggerPool.getNoOfFree()); + } } } @@ -511,27 +486,6 @@ const char* triggerNameFormat[] = { "NDB$BACKUP_%d_%d_DELETE" }; -const Backup::State -Backup::validMasterTransitions[] = { - INITIAL, DEFINING, - DEFINING, DEFINED, - DEFINED, STARTED, - STARTED, SCANNING, - SCANNING, STOPPING, - STOPPING, INITIAL, - - DEFINING, ABORTING, - DEFINED, ABORTING, - STARTED, ABORTING, - SCANNING, ABORTING, - STOPPING, ABORTING, - ABORTING, ABORTING, - - DEFINING, INITIAL, - ABORTING, INITIAL, - INITIAL, INITIAL -}; - const Backup::State Backup::validSlaveTransitions[] = { INITIAL, DEFINING, @@ -561,10 +515,6 @@ const Uint32 Backup::validSlaveTransitionsCount = sizeof(Backup::validSlaveTransitions) / sizeof(Backup::State); -const Uint32 -Backup::validMasterTransitionsCount = -sizeof(Backup::validMasterTransitions) / sizeof(Backup::State); - void Backup::CompoundState::setState(State newState){ bool found = false; @@ -578,7 +528,8 @@ Backup::CompoundState::setState(State newState){ break; } } - ndbrequire(found); + + //ndbrequire(found); if (newState == INITIAL) abortState = INITIAL; @@ -647,8 +598,7 @@ Backup::execNODE_FAILREP(Signal* signal) Uint32 theFailedNodes[NodeBitmask::Size]; for (Uint32 i = 0; i < NodeBitmask::Size; i++) theFailedNodes[i] = rep->theNodes[i]; - -// NodeId old_master_node_id = getMasterNodeId(); + c_masterNodeId = new_master_node_id; NodePtr nodePtr; @@ -686,15 +636,24 @@ Backup::execNODE_FAILREP(Signal* signal) } bool -Backup::verifyNodesAlive(const NdbNodeBitmask& aNodeBitMask) +Backup::verifyNodesAlive(BackupRecordPtr ptr, + const NdbNodeBitmask& aNodeBitMask) { + Uint32 version = getNodeInfo(getOwnNodeId()).m_version; for (Uint32 i = 0; i < MAX_NDB_NODES; i++) { jam(); if(aNodeBitMask.get(i)) { if(!c_aliveNodes.get(i)){ jam(); + ptr.p->setErrorCode(AbortBackupOrd::BackupFailureDueToNodeFail); return false; }//if + if(getNodeInfo(i).m_version != version) + { + jam(); + ptr.p->setErrorCode(AbortBackupOrd::IncompatibleVersions); + return false; + } }//if }//for return true; @@ -709,6 +668,10 @@ Backup::checkNodeFail(Signal* signal, ndbrequire( ptr.p->nodes.get(newCoord)); /* just to make sure newCoord * is part of the backup */ + + NdbNodeBitmask mask; + mask.assign(2, theFailedNodes); + /* Update ptr.p->nodes to be up to date with current alive nodes */ NodePtr nodePtr; @@ -730,26 +693,42 @@ Backup::checkNodeFail(Signal* signal, return; // failed node is not part of backup process, safe to continue } - bool doMasterTakeover = false; - if(NodeBitmask::get(theFailedNodes, refToNode(ptr.p->masterRef))){ - jam(); - doMasterTakeover = true; - }; - - if (newCoord == getOwnNodeId()){ - jam(); - if (doMasterTakeover) { - /** - * I'm new master - */ - CRASH_INSERTION((10002)); -#ifdef DEBUG_ABORT - ndbout_c("**** Master Takeover: Node failed: Master id = %u", - refToNode(ptr.p->masterRef)); -#endif - masterTakeOver(signal, ptr); + if(mask.get(refToNode(ptr.p->masterRef))) + { + /** + * Master died...abort + */ + ptr.p->masterRef = reference(); + ptr.p->nodes.clear(); + ptr.p->nodes.set(getOwnNodeId()); + ptr.p->setErrorCode(AbortBackupOrd::BackupFailureDueToNodeFail); + switch(ptr.p->m_gsn){ + case GSN_DEFINE_BACKUP_REQ: + case GSN_START_BACKUP_REQ: + case GSN_BACKUP_FRAGMENT_REQ: + case GSN_STOP_BACKUP_REQ: + // I'm currently processing...reply to self and abort... + ptr.p->masterData.gsn = ptr.p->m_gsn; + ptr.p->masterData.sendCounter = ptr.p->nodes; return; - }//if + case GSN_DEFINE_BACKUP_REF: + case GSN_DEFINE_BACKUP_CONF: + case GSN_START_BACKUP_REF: + case GSN_START_BACKUP_CONF: + case GSN_BACKUP_FRAGMENT_REF: + case GSN_BACKUP_FRAGMENT_CONF: + case GSN_STOP_BACKUP_REF: + case GSN_STOP_BACKUP_CONF: + ptr.p->masterData.gsn = GSN_DEFINE_BACKUP_REQ; + masterAbort(signal, ptr); + return; + case GSN_ABORT_BACKUP_ORD: + // Already aborting + return; + } + } + else if (newCoord == getOwnNodeId()) + { /** * I'm master for this backup */ @@ -759,61 +738,81 @@ Backup::checkNodeFail(Signal* signal, ndbout_c("**** Master: Node failed: Master id = %u", refToNode(ptr.p->masterRef)); #endif - masterAbort(signal, ptr, false); + + Uint32 gsn, len, pos; + ptr.p->nodes.bitANDC(mask); + switch(ptr.p->masterData.gsn){ + case GSN_DEFINE_BACKUP_REQ: + { + DefineBackupRef * ref = (DefineBackupRef*)signal->getDataPtr(); + ref->backupPtr = ptr.i; + ref->backupId = ptr.p->backupId; + ref->errorCode = AbortBackupOrd::BackupFailureDueToNodeFail; + gsn= GSN_DEFINE_BACKUP_REF; + len= DefineBackupRef::SignalLength; + pos= &ref->nodeId - signal->getDataPtr(); + break; + } + case GSN_START_BACKUP_REQ: + { + StartBackupRef * ref = (StartBackupRef*)signal->getDataPtr(); + ref->backupPtr = ptr.i; + ref->backupId = ptr.p->backupId; + ref->errorCode = AbortBackupOrd::BackupFailureDueToNodeFail; + ref->signalNo = ptr.p->masterData.startBackup.signalNo; + gsn= GSN_START_BACKUP_REF; + len= StartBackupRef::SignalLength; + pos= &ref->nodeId - signal->getDataPtr(); + break; + } + case GSN_BACKUP_FRAGMENT_REQ: + { + BackupFragmentRef * ref = (BackupFragmentRef*)signal->getDataPtr(); + ref->backupPtr = ptr.i; + ref->backupId = ptr.p->backupId; + ref->errorCode = AbortBackupOrd::BackupFailureDueToNodeFail; + gsn= GSN_BACKUP_FRAGMENT_REF; + len= BackupFragmentRef::SignalLength; + pos= &ref->nodeId - signal->getDataPtr(); + break; + } + case GSN_STOP_BACKUP_REQ: + { + StopBackupRef * ref = (StopBackupRef*)signal->getDataPtr(); + ref->backupPtr = ptr.i; + ref->backupId = ptr.p->backupId; + ref->errorCode = AbortBackupOrd::BackupFailureDueToNodeFail; + gsn= GSN_STOP_BACKUP_REF; + len= StopBackupRef::SignalLength; + pos= &ref->nodeId - signal->getDataPtr(); + break; + } + case GSN_CREATE_TRIG_REQ: + case GSN_ALTER_TRIG_REQ: + case GSN_WAIT_GCP_REQ: + case GSN_UTIL_SEQUENCE_REQ: + case GSN_UTIL_LOCK_REQ: + case GSN_DROP_TRIG_REQ: + return; + } + + for(Uint32 i = 0; (i = mask.find(i+1)) != NdbNodeBitmask::NotFound; ) + { + signal->theData[pos] = i; + sendSignal(reference(), gsn, signal, len, JBB); +#ifdef DEBUG_ABORT + ndbout_c("sending %d to self from %d", gsn, i); +#endif + } return; }//if - - /** - * If there's a new master, (it's not me) - * but remember who it is - */ - ptr.p->masterRef = calcBackupBlockRef(newCoord); -#ifdef DEBUG_ABORT - ndbout_c("**** Slave: Node failed: Master id = %u", - refToNode(ptr.p->masterRef)); -#endif + /** * I abort myself as slave if not master */ CRASH_INSERTION((10021)); - // slaveAbort(signal, ptr); } -void -Backup::masterTakeOver(Signal* signal, BackupRecordPtr ptr) -{ - ptr.p->masterRef = reference(); - ptr.p->masterData.gsn = MAX_GSN + 1; - - switch(ptr.p->slaveState.getState()){ - case INITIAL: - jam(); - ptr.p->masterData.state.forceState(INITIAL); - break; - case ABORTING: - jam(); - case DEFINING: - jam(); - case DEFINED: - jam(); - case STARTED: - jam(); - case SCANNING: - jam(); - ptr.p->masterData.state.forceState(STARTED); - break; - case STOPPING: - jam(); - case CLEANING: - jam(); - ptr.p->masterData.state.forceState(STOPPING); - break; - default: - ndbrequire(false); - } - masterAbort(signal, ptr, false); -} - void Backup::execINCL_NODEREQ(Signal* signal) { @@ -895,8 +894,8 @@ Backup::execBACKUP_REQ(Signal* signal) ndbrequire(ptr.p->pages.empty()); ndbrequire(ptr.p->tables.isEmpty()); - ptr.p->masterData.state.forceState(INITIAL); - ptr.p->masterData.state.setState(DEFINING); + ptr.p->m_gsn = 0; + ptr.p->errorCode = 0; ptr.p->clientRef = senderRef; ptr.p->clientData = senderData; ptr.p->masterRef = reference(); @@ -905,6 +904,7 @@ Backup::execBACKUP_REQ(Signal* signal) ptr.p->backupKey[0] = 0; ptr.p->backupKey[1] = 0; ptr.p->backupDataLen = 0; + ptr.p->masterData.errorCode = 0; ptr.p->masterData.dropTrig.tableId = RNIL; ptr.p->masterData.alterTrig.tableId = RNIL; @@ -928,7 +928,6 @@ Backup::execUTIL_SEQUENCE_REF(Signal* signal) ndbrequire(ptr.i == RNIL); c_backupPool.getPtr(ptr); ndbrequire(ptr.p->masterData.gsn == GSN_UTIL_SEQUENCE_REQ); - ptr.p->masterData.gsn = 0; sendBackupRef(signal, ptr, BackupRef::SequenceFailure); }//execUTIL_SEQUENCE_REF() @@ -938,8 +937,7 @@ Backup::sendBackupRef(Signal* signal, BackupRecordPtr ptr, Uint32 errorCode) { jam(); sendBackupRef(ptr.p->clientRef, signal, ptr.p->clientData, errorCode); - // ptr.p->masterData.state.setState(INITIAL); - cleanupSlaveResources(ptr); + cleanup(signal, ptr); } void @@ -968,7 +966,8 @@ Backup::execUTIL_SEQUENCE_CONF(Signal* signal) UtilSequenceConf * conf = (UtilSequenceConf*)signal->getDataPtr(); - if(conf->requestType == UtilSequenceReq::Create) { + if(conf->requestType == UtilSequenceReq::Create) + { jam(); sendSTTORRY(signal); // At startup in NDB return; @@ -979,18 +978,20 @@ Backup::execUTIL_SEQUENCE_CONF(Signal* signal) c_backupPool.getPtr(ptr); ndbrequire(ptr.p->masterData.gsn == GSN_UTIL_SEQUENCE_REQ); - ptr.p->masterData.gsn = 0; - if (ptr.p->masterData.state.getState() == ABORTING) { + + if (ptr.p->checkError()) + { jam(); sendBackupRef(signal, ptr, ptr.p->errorCode); return; }//if - if (ERROR_INSERTED(10023)) { - ptr.p->masterData.state.setState(ABORTING); + + if (ERROR_INSERTED(10023)) + { sendBackupRef(signal, ptr, 323); return; }//if - ndbrequire(ptr.p->masterData.state.getState() == DEFINING); + { Uint64 backupId; @@ -1018,7 +1019,6 @@ Backup::defineBackupMutex_locked(Signal* signal, Uint32 ptrI, Uint32 retVal){ c_backupPool.getPtr(ptr); ndbrequire(ptr.p->masterData.gsn == GSN_UTIL_LOCK_REQ); - ptr.p->masterData.gsn = 0; ptr.p->masterData.gsn = GSN_UTIL_LOCK_REQ; Mutex mutex(signal, c_mutexMgr, ptr.p->masterData.m_dictCommitTableMutex); @@ -1040,14 +1040,13 @@ Backup::dictCommitTableMutex_locked(Signal* signal, Uint32 ptrI,Uint32 retVal) c_backupPool.getPtr(ptr); ndbrequire(ptr.p->masterData.gsn == GSN_UTIL_LOCK_REQ); - ptr.p->masterData.gsn = 0; if (ERROR_INSERTED(10031)) { - ptr.p->masterData.state.setState(ABORTING); ptr.p->setErrorCode(331); }//if - if (ptr.p->masterData.state.getState() == ABORTING) { + if (ptr.p->checkError()) + { jam(); /** @@ -1062,13 +1061,11 @@ Backup::dictCommitTableMutex_locked(Signal* signal, Uint32 ptrI,Uint32 retVal) Mutex mutex2(signal, c_mutexMgr, ptr.p->masterData.m_defineBackupMutex); jam(); mutex2.unlock(); // ignore response - + sendBackupRef(signal, ptr, ptr.p->errorCode); return; }//if - ndbrequire(ptr.p->masterData.state.getState() == DEFINING); - sendDefineBackupReq(signal, ptr); } @@ -1078,33 +1075,6 @@ Backup::dictCommitTableMutex_locked(Signal* signal, Uint32 ptrI,Uint32 retVal) * *****************************************************************************/ -void -Backup::sendSignalAllWait(BackupRecordPtr ptr, Uint32 gsn, Signal *signal, - Uint32 signalLength, bool executeDirect) -{ - jam(); - ptr.p->masterData.gsn = gsn; - ptr.p->masterData.sendCounter.clearWaitingFor(); - NodePtr node; - for(c_nodes.first(node); node.i != RNIL; c_nodes.next(node)){ - jam(); - const Uint32 nodeId = node.p->nodeId; - if(node.p->alive && ptr.p->nodes.get(nodeId)){ - jam(); - - ptr.p->masterData.sendCounter.setWaitingFor(nodeId); - - const BlockReference ref = numberToRef(BACKUP, nodeId); - if (!executeDirect || ref != reference()) { - sendSignal(ref, gsn, signal, signalLength, JBB); - }//if - }//if - }//for - if (executeDirect) { - EXECUTE_DIRECT(BACKUP, gsn, signal, signalLength); - } -} - bool Backup::haveAllSignals(BackupRecordPtr ptr, Uint32 gsn, Uint32 nodeId) { @@ -1114,10 +1084,6 @@ Backup::haveAllSignals(BackupRecordPtr ptr, Uint32 gsn, Uint32 nodeId) ndbrequire(ptr.p->masterData.sendCounter.isWaitingFor(nodeId)); ptr.p->masterData.sendCounter.clearWaitingFor(nodeId); - - if (ptr.p->masterData.sendCounter.done()) - ptr.p->masterData.gsn = 0; - return ptr.p->masterData.sendCounter.done(); } @@ -1138,11 +1104,12 @@ Backup::sendDefineBackupReq(Signal *signal, BackupRecordPtr ptr) req->nodes = ptr.p->nodes; req->backupDataLen = ptr.p->backupDataLen; - ptr.p->masterData.errorCode = 0; - ptr.p->okToCleanMaster = false; // master must wait with cleaning to last - sendSignalAllWait(ptr, GSN_DEFINE_BACKUP_REQ, signal, - DefineBackupReq::SignalLength, - true /* do execute direct on oneself */); + ptr.p->masterData.gsn = GSN_DEFINE_BACKUP_REQ; + ptr.p->masterData.sendCounter = ptr.p->nodes; + NodeReceiverGroup rg(BACKUP, ptr.p->nodes); + sendSignal(rg, GSN_DEFINE_BACKUP_REQ, signal, + DefineBackupReq::SignalLength, JBB); + /** * Now send backup data */ @@ -1167,17 +1134,15 @@ Backup::execDEFINE_BACKUP_REF(Signal* signal) jamEntry(); DefineBackupRef* ref = (DefineBackupRef*)signal->getDataPtr(); - + const Uint32 ptrI = ref->backupPtr; - const Uint32 backupId = ref->backupId; - const Uint32 nodeId = refToNode(signal->senderBlockRef()); - + //const Uint32 backupId = ref->backupId; + const Uint32 nodeId = ref->nodeId; + BackupRecordPtr ptr; c_backupPool.getPtr(ptr, ptrI); - - masterAbortCheck(); // macro will do return if ABORTING - ptr.p->masterData.errorCode = ref->errorCode; + ptr.p->setErrorCode(ref->errorCode); defineBackupReply(signal, ptr, nodeId); } @@ -1188,17 +1153,16 @@ Backup::execDEFINE_BACKUP_CONF(Signal* signal) DefineBackupConf* conf = (DefineBackupConf*)signal->getDataPtr(); const Uint32 ptrI = conf->backupPtr; - const Uint32 backupId = conf->backupId; + //const Uint32 backupId = conf->backupId; const Uint32 nodeId = refToNode(signal->senderBlockRef()); BackupRecordPtr ptr; c_backupPool.getPtr(ptr, ptrI); - masterAbortCheck(); // macro will do return if ABORTING - - if (ERROR_INSERTED(10024)) { - ptr.p->masterData.errorCode = 324; - }//if + if (ERROR_INSERTED(10024)) + { + ptr.p->setErrorCode(324); + } defineBackupReply(signal, ptr, nodeId); } @@ -1210,6 +1174,7 @@ Backup::defineBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32 nodeId) jam(); return; } + /** * Unlock mutexes */ @@ -1223,16 +1188,10 @@ Backup::defineBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32 nodeId) jam(); mutex2.unlock(); // ignore response - if(ptr.p->errorCode) { + if(ptr.p->checkError()) + { jam(); - ptr.p->masterData.errorCode = ptr.p->errorCode; - } - - if(ptr.p->masterData.errorCode){ - jam(); - ptr.p->setErrorCode(ptr.p->masterData.errorCode); - sendAbortBackupOrd(signal, ptr, AbortBackupOrd::OkToClean); - masterSendAbortBackup(signal, ptr); + masterAbort(signal, ptr); return; } @@ -1252,7 +1211,6 @@ Backup::defineBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32 nodeId) ptr.p->nodes.copyto(NdbNodeBitmask::Size, signal->theData+3); sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3+NdbNodeBitmask::Size, JBB); - ptr.p->masterData.state.setState(DEFINED); /** * Prepare Trig */ @@ -1286,7 +1244,6 @@ Backup::sendCreateTrig(Signal* signal, { CreateTrigReq * req =(CreateTrigReq *)signal->getDataPtrSend(); - ptr.p->errorCode = 0; ptr.p->masterData.gsn = GSN_CREATE_TRIG_REQ; ptr.p->masterData.sendCounter = 3; ptr.p->masterData.createTrig.tableId = tabPtr.p->tableId; @@ -1395,17 +1352,14 @@ Backup::createTrigReply(Signal* signal, BackupRecordPtr ptr) return; }//if - ptr.p->masterData.gsn = 0; + if (ERROR_INSERTED(10025)) + { + ptr.p->errorCode = 325; + } if(ptr.p->checkError()) { jam(); - masterAbort(signal, ptr, true); - return; - }//if - - if (ERROR_INSERTED(10025)) { - ptr.p->errorCode = 325; - masterAbort(signal, ptr, true); + masterAbort(signal, ptr); return; }//if @@ -1425,10 +1379,7 @@ Backup::createTrigReply(Signal* signal, BackupRecordPtr ptr) /** * Finished with all tables, send StartBackupReq */ - ptr.p->masterData.state.setState(STARTED); - ptr.p->tables.first(tabPtr); - ptr.p->errorCode = 0; ptr.p->masterData.startBackup.signalNo = 0; ptr.p->masterData.startBackup.noOfSignals = (ptr.p->tables.noOfElements() + StartBackupReq::MaxTableTriggers - 1) / @@ -1467,9 +1418,12 @@ Backup::sendStartBackup(Signal* signal, BackupRecordPtr ptr, TablePtr tabPtr) }//for req->noOfTableTriggers = i; - sendSignalAllWait(ptr, GSN_START_BACKUP_REQ, signal, - StartBackupReq::HeaderLength + - (i * StartBackupReq::TableTriggerLength)); + ptr.p->masterData.gsn = GSN_START_BACKUP_REQ; + ptr.p->masterData.sendCounter = ptr.p->nodes; + NodeReceiverGroup rg(BACKUP, ptr.p->nodes); + sendSignal(rg, GSN_START_BACKUP_REQ, signal, + StartBackupReq::HeaderLength + + (i * StartBackupReq::TableTriggerLength), JBB); } void @@ -1479,15 +1433,13 @@ Backup::execSTART_BACKUP_REF(Signal* signal) StartBackupRef* ref = (StartBackupRef*)signal->getDataPtr(); const Uint32 ptrI = ref->backupPtr; - const Uint32 backupId = ref->backupId; + //const Uint32 backupId = ref->backupId; const Uint32 signalNo = ref->signalNo; - const Uint32 nodeId = refToNode(signal->senderBlockRef()); + const Uint32 nodeId = ref->nodeId; BackupRecordPtr ptr; c_backupPool.getPtr(ptr, ptrI); - masterAbortCheck(); // macro will do return if ABORTING - ptr.p->setErrorCode(ref->errorCode); startBackupReply(signal, ptr, nodeId, signalNo); } @@ -1499,15 +1451,13 @@ Backup::execSTART_BACKUP_CONF(Signal* signal) StartBackupConf* conf = (StartBackupConf*)signal->getDataPtr(); const Uint32 ptrI = conf->backupPtr; - const Uint32 backupId = conf->backupId; + //const Uint32 backupId = conf->backupId; const Uint32 signalNo = conf->signalNo; const Uint32 nodeId = refToNode(signal->senderBlockRef()); BackupRecordPtr ptr; c_backupPool.getPtr(ptr, ptrI); - masterAbortCheck(); // macro will do return if ABORTING - startBackupReply(signal, ptr, nodeId, signalNo); } @@ -1524,17 +1474,16 @@ Backup::startBackupReply(Signal* signal, BackupRecordPtr ptr, return; } + if (ERROR_INSERTED(10026)) + { + ptr.p->errorCode = 326; + } + if(ptr.p->checkError()){ jam(); - masterAbort(signal, ptr, true); + masterAbort(signal, ptr); return; } - - if (ERROR_INSERTED(10026)) { - ptr.p->errorCode = 326; - masterAbort(signal, ptr, true); - return; - }//if TablePtr tabPtr; c_tablePool.getPtr(tabPtr, ptr.p->masterData.startBackup.tablePtr); @@ -1566,7 +1515,6 @@ Backup::sendAlterTrig(Signal* signal, BackupRecordPtr ptr) { AlterTrigReq * req =(AlterTrigReq *)signal->getDataPtrSend(); - ptr.p->errorCode = 0; ptr.p->masterData.gsn = GSN_ALTER_TRIG_REQ; ptr.p->masterData.sendCounter = 0; @@ -1608,6 +1556,7 @@ Backup::sendAlterTrig(Signal* signal, BackupRecordPtr ptr) return; }//if ptr.p->masterData.alterTrig.tableId = RNIL; + /** * Finished with all tables */ @@ -1669,11 +1618,9 @@ Backup::alterTrigReply(Signal* signal, BackupRecordPtr ptr) return; }//if - ptr.p->masterData.gsn = 0; - if(ptr.p->checkError()){ jam(); - masterAbort(signal, ptr, true); + masterAbort(signal, ptr); return; }//if @@ -1719,11 +1666,10 @@ Backup::execWAIT_GCP_CONF(Signal* signal){ ndbrequire(ptr.p->masterRef == reference()); ndbrequire(ptr.p->masterData.gsn == GSN_WAIT_GCP_REQ); - ptr.p->masterData.gsn = 0; if(ptr.p->checkError()) { jam(); - masterAbort(signal, ptr, true); + masterAbort(signal, ptr); return; }//if @@ -1731,13 +1677,13 @@ Backup::execWAIT_GCP_CONF(Signal* signal){ jam(); CRASH_INSERTION((10008)); ptr.p->startGCP = gcp; - ptr.p->masterData.state.setState(SCANNING); + ptr.p->masterData.sendCounter= 0; + ptr.p->masterData.gsn = GSN_BACKUP_FRAGMENT_REQ; nextFragment(signal, ptr); } else { jam(); CRASH_INSERTION((10009)); ptr.p->stopGCP = gcp; - ptr.p->masterData.state.setState(STOPPING); sendDropTrig(signal, ptr); // regular dropping of triggers }//if } @@ -1787,6 +1733,7 @@ Backup::nextFragment(Signal* signal, BackupRecordPtr ptr) req->fragmentNo = i; req->count = 0; + ptr.p->masterData.sendCounter++; const BlockReference ref = numberToRef(BACKUP, nodeId); sendSignal(ref, GSN_BACKUP_FRAGMENT_REQ, signal, BackupFragmentReq::SignalLength, JBB); @@ -1824,7 +1771,7 @@ Backup::execBACKUP_FRAGMENT_CONF(Signal* signal) BackupFragmentConf * conf = (BackupFragmentConf*)signal->getDataPtr(); const Uint32 ptrI = conf->backupPtr; - const Uint32 backupId = conf->backupId; + //const Uint32 backupId = conf->backupId; const Uint32 tableId = conf->tableId; const Uint32 fragmentNo = conf->fragmentNo; const Uint32 nodeId = refToNode(signal->senderBlockRef()); @@ -1834,10 +1781,9 @@ Backup::execBACKUP_FRAGMENT_CONF(Signal* signal) BackupRecordPtr ptr; c_backupPool.getPtr(ptr, ptrI); - masterAbortCheck(); // macro will do return if ABORTING - ptr.p->noOfBytes += noOfBytes; ptr.p->noOfRecords += noOfRecords; + ptr.p->masterData.sendCounter--; TablePtr tabPtr; ndbrequire(findTable(ptr, tabPtr, tableId)); @@ -1852,17 +1798,24 @@ Backup::execBACKUP_FRAGMENT_CONF(Signal* signal) fragPtr.p->scanned = 1; fragPtr.p->scanning = 0; - if(ptr.p->checkError()) { - jam(); - masterAbort(signal, ptr, true); - return; - }//if - if (ERROR_INSERTED(10028)) { + if (ERROR_INSERTED(10028)) + { ptr.p->errorCode = 328; - masterAbort(signal, ptr, true); - return; - }//if - nextFragment(signal, ptr); + } + + if(ptr.p->checkError()) + { + if(ptr.p->masterData.sendCounter.done()) + { + jam(); + masterAbort(signal, ptr); + return; + }//if + } + else + { + nextFragment(signal, ptr); + } } void @@ -1874,15 +1827,52 @@ Backup::execBACKUP_FRAGMENT_REF(Signal* signal) BackupFragmentRef * ref = (BackupFragmentRef*)signal->getDataPtr(); const Uint32 ptrI = ref->backupPtr; - const Uint32 backupId = ref->backupId; + //const Uint32 backupId = ref->backupId; + const Uint32 nodeId = ref->nodeId; BackupRecordPtr ptr; c_backupPool.getPtr(ptr, ptrI); - masterAbortCheck(); // macro will do return if ABORTING + TablePtr tabPtr; + ptr.p->tables.first(tabPtr); + for(; tabPtr.i != RNIL; ptr.p->tables.next(tabPtr)) { + jam(); + FragmentPtr fragPtr; + Array & frags = tabPtr.p->fragments; + const Uint32 fragCount = frags.getSize(); + + for(Uint32 i = 0; ifragments.getPtr(fragPtr, i); + if(fragPtr.p->scanning != 0 && nodeId == fragPtr.p->node) + { + jam(); + ndbrequire(fragPtr.p->scanned == 0); + fragPtr.p->scanned = 1; + fragPtr.p->scanning = 0; + goto done; + } + } + } + ndbrequire(false); +done: + ptr.p->masterData.sendCounter--; ptr.p->setErrorCode(ref->errorCode); - masterAbort(signal, ptr, true); + + if(ptr.p->masterData.sendCounter.done()) + { + jam(); + masterAbort(signal, ptr); + return; + }//if + + AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend(); + ord->backupId = ptr.p->backupId; + ord->backupPtr = ptr.i; + ord->requestType = AbortBackupOrd::LogBufferFull; + ord->senderData= ptr.i; + execABORT_BACKUP_ORD(signal); } /***************************************************************************** @@ -1910,15 +1900,7 @@ Backup::sendDropTrig(Signal* signal, BackupRecordPtr ptr) jam(); ptr.p->masterData.dropTrig.tableId = RNIL; - sendAbortBackupOrd(signal, ptr, AbortBackupOrd::OkToClean); - - if(ptr.p->masterData.state.getState() == STOPPING) { - jam(); - sendStopBackup(signal, ptr); - return; - }//if - ndbrequire(ptr.p->masterData.state.getState() == ABORTING); - masterSendAbortBackup(signal, ptr); + sendStopBackup(signal, ptr); }//if } @@ -2010,7 +1992,6 @@ Backup::dropTrigReply(Signal* signal, BackupRecordPtr ptr) return; }//if - ptr.p->masterData.gsn = 0; sendDropTrig(signal, ptr); // recursive next } @@ -2023,14 +2004,23 @@ void Backup::execSTOP_BACKUP_REF(Signal* signal) { jamEntry(); - ndbrequire(0); + + StopBackupRef* ref = (StopBackupRef*)signal->getDataPtr(); + const Uint32 ptrI = ref->backupPtr; + //const Uint32 backupId = ref->backupId; + const Uint32 nodeId = ref->nodeId; + + BackupRecordPtr ptr; + c_backupPool.getPtr(ptr, ptrI); + + ptr.p->setErrorCode(ref->errorCode); + stopBackupReply(signal, ptr, nodeId); } void Backup::sendStopBackup(Signal* signal, BackupRecordPtr ptr) { jam(); - ptr.p->masterData.gsn = GSN_STOP_BACKUP_REQ; StopBackupReq* stop = (StopBackupReq*)signal->getDataPtrSend(); stop->backupPtr = ptr.i; @@ -2038,8 +2028,11 @@ Backup::sendStopBackup(Signal* signal, BackupRecordPtr ptr) stop->startGCP = ptr.p->startGCP; stop->stopGCP = ptr.p->stopGCP; - sendSignalAllWait(ptr, GSN_STOP_BACKUP_REQ, signal, - StopBackupReq::SignalLength); + ptr.p->masterData.gsn = GSN_STOP_BACKUP_REQ; + ptr.p->masterData.sendCounter = ptr.p->nodes; + NodeReceiverGroup rg(BACKUP, ptr.p->nodes); + sendSignal(rg, GSN_STOP_BACKUP_REQ, signal, + StopBackupReq::SignalLength, JBB); } void @@ -2049,14 +2042,12 @@ Backup::execSTOP_BACKUP_CONF(Signal* signal) StopBackupConf* conf = (StopBackupConf*)signal->getDataPtr(); const Uint32 ptrI = conf->backupPtr; - const Uint32 backupId = conf->backupId; + //const Uint32 backupId = conf->backupId; const Uint32 nodeId = refToNode(signal->senderBlockRef()); BackupRecordPtr ptr; c_backupPool.getPtr(ptr, ptrI); - masterAbortCheck(); // macro will do return if ABORTING - ptr.p->noOfLogBytes += conf->noOfLogBytes; ptr.p->noOfLogRecords += conf->noOfLogRecords; @@ -2073,35 +2064,39 @@ Backup::stopBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32 nodeId) return; } - // ptr.p->masterData.state.setState(INITIAL); - - // send backup complete first to slaves so that they know sendAbortBackupOrd(signal, ptr, AbortBackupOrd::BackupComplete); - - BackupCompleteRep * rep = (BackupCompleteRep*)signal->getDataPtrSend(); - rep->backupId = ptr.p->backupId; - rep->senderData = ptr.p->clientData; - rep->startGCP = ptr.p->startGCP; - rep->stopGCP = ptr.p->stopGCP; - rep->noOfBytes = ptr.p->noOfBytes; - rep->noOfRecords = ptr.p->noOfRecords; - rep->noOfLogBytes = ptr.p->noOfLogBytes; - rep->noOfLogRecords = ptr.p->noOfLogRecords; - rep->nodes = ptr.p->nodes; - sendSignal(ptr.p->clientRef, GSN_BACKUP_COMPLETE_REP, signal, - BackupCompleteRep::SignalLength, JBB); - - signal->theData[0] = EventReport::BackupCompleted; - signal->theData[1] = ptr.p->clientRef; - signal->theData[2] = ptr.p->backupId; - signal->theData[3] = ptr.p->startGCP; - signal->theData[4] = ptr.p->stopGCP; - signal->theData[5] = ptr.p->noOfBytes; - signal->theData[6] = ptr.p->noOfRecords; - signal->theData[7] = ptr.p->noOfLogBytes; - signal->theData[8] = ptr.p->noOfLogRecords; - ptr.p->nodes.copyto(NdbNodeBitmask::Size, signal->theData+9); - sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 9+NdbNodeBitmask::Size, JBB); + + if(!ptr.p->checkError()) + { + BackupCompleteRep * rep = (BackupCompleteRep*)signal->getDataPtrSend(); + rep->backupId = ptr.p->backupId; + rep->senderData = ptr.p->clientData; + rep->startGCP = ptr.p->startGCP; + rep->stopGCP = ptr.p->stopGCP; + rep->noOfBytes = ptr.p->noOfBytes; + rep->noOfRecords = ptr.p->noOfRecords; + rep->noOfLogBytes = ptr.p->noOfLogBytes; + rep->noOfLogRecords = ptr.p->noOfLogRecords; + rep->nodes = ptr.p->nodes; + sendSignal(ptr.p->clientRef, GSN_BACKUP_COMPLETE_REP, signal, + BackupCompleteRep::SignalLength, JBB); + + signal->theData[0] = EventReport::BackupCompleted; + signal->theData[1] = ptr.p->clientRef; + signal->theData[2] = ptr.p->backupId; + signal->theData[3] = ptr.p->startGCP; + signal->theData[4] = ptr.p->stopGCP; + signal->theData[5] = ptr.p->noOfBytes; + signal->theData[6] = ptr.p->noOfRecords; + signal->theData[7] = ptr.p->noOfLogBytes; + signal->theData[8] = ptr.p->noOfLogRecords; + ptr.p->nodes.copyto(NdbNodeBitmask::Size, signal->theData+9); + sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 9+NdbNodeBitmask::Size, JBB); + } + else + { + masterAbort(signal, ptr); + } } /***************************************************************************** @@ -2110,199 +2105,96 @@ Backup::stopBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32 nodeId) * *****************************************************************************/ void -Backup::masterAbort(Signal* signal, BackupRecordPtr ptr, bool controlledAbort) +Backup::masterAbort(Signal* signal, BackupRecordPtr ptr) { - if(ptr.p->masterData.state.getState() == ABORTING) { -#ifdef DEBUG_ABORT - ndbout_c("---- Master already aborting"); -#endif - jam(); - return; - } jam(); #ifdef DEBUG_ABORT ndbout_c("************ masterAbort"); #endif - - sendAbortBackupOrd(signal, ptr, AbortBackupOrd::BackupFailure); - if (!ptr.p->checkError()) - ptr.p->errorCode = AbortBackupOrd::BackupFailureDueToNodeFail; - - const State s = ptr.p->masterData.state.getState(); - - ptr.p->masterData.state.setState(ABORTING); - - ndbrequire(s == INITIAL || - s == STARTED || - s == DEFINING || - s == DEFINED || - s == SCANNING || - s == STOPPING || - s == ABORTING); - if(ptr.p->masterData.gsn == GSN_UTIL_SEQUENCE_REQ) { + if(ptr.p->masterData.errorCode != 0) + { jam(); - DEBUG_OUT("masterAbort: gsn = GSN_UTIL_SEQUENCE_REQ"); - //------------------------------------------------------- - // We are waiting for UTIL_SEQUENCE response. We rely on - // this to arrive and check for ABORTING in response. - // No slaves are involved at this point and ABORT simply - // results in BACKUP_REF to client - //------------------------------------------------------- - /** - * Waiting for Sequence Id - * @see execUTIL_SEQUENCE_CONF - */ return; - }//if + } + + BackupAbortRep* rep = (BackupAbortRep*)signal->getDataPtrSend(); + rep->backupId = ptr.p->backupId; + rep->senderData = ptr.p->clientData; + rep->reason = ptr.p->errorCode; + sendSignal(ptr.p->clientRef, GSN_BACKUP_ABORT_REP, signal, + BackupAbortRep::SignalLength, JBB); - if(ptr.p->masterData.gsn == GSN_UTIL_LOCK_REQ) { - jam(); - DEBUG_OUT("masterAbort: gsn = GSN_UTIL_LOCK_REQ"); - //------------------------------------------------------- - // We are waiting for UTIL_LOCK response (mutex). We rely on - // this to arrive and check for ABORTING in response. - // No slaves are involved at this point and ABORT simply - // results in BACKUP_REF to client - //------------------------------------------------------- - /** - * Waiting for lock - * @see execUTIL_LOCK_CONF - */ + signal->theData[0] = EventReport::BackupAborted; + signal->theData[1] = ptr.p->clientRef; + signal->theData[2] = ptr.p->backupId; + signal->theData[3] = ptr.p->errorCode; + sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB); + + ndbrequire(ptr.p->errorCode); + ptr.p->masterData.errorCode = ptr.p->errorCode; + + AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend(); + ord->backupId = ptr.p->backupId; + ord->backupPtr = ptr.i; + ord->senderData= ptr.i; + NodeReceiverGroup rg(BACKUP, ptr.p->nodes); + + switch(ptr.p->masterData.gsn){ + case GSN_DEFINE_BACKUP_REQ: + ord->requestType = AbortBackupOrd::BackupFailure; + sendSignal(rg, GSN_ABORT_BACKUP_ORD, signal, + AbortBackupOrd::SignalLength, JBB); return; - }//if - - /** - * Unlock mutexes only at master - */ - jam(); - Mutex mutex1(signal, c_mutexMgr, ptr.p->masterData.m_dictCommitTableMutex); - jam(); - mutex1.unlock(); // ignore response - - jam(); - Mutex mutex2(signal, c_mutexMgr, ptr.p->masterData.m_defineBackupMutex); - jam(); - mutex2.unlock(); // ignore response - - if (!controlledAbort) { + case GSN_CREATE_TRIG_REQ: + case GSN_START_BACKUP_REQ: + case GSN_ALTER_TRIG_REQ: + case GSN_WAIT_GCP_REQ: + case GSN_BACKUP_FRAGMENT_REQ: jam(); - if (s == DEFINING) { - jam(); -//------------------------------------------------------- -// If we are in the defining phase all work is done by -// slaves. No triggers have been allocated thus slaves -// may free all "Master" resources, let them know... -//------------------------------------------------------- - sendAbortBackupOrd(signal, ptr, AbortBackupOrd::OkToClean); - return; - }//if - if (s == DEFINED) { - jam(); -//------------------------------------------------------- -// DEFINED is the state when triggers are created. We rely -// on that DICT will report create trigger failure in case -// of node failure. Thus no special action is needed here. -// We will check for errorCode != 0 when receiving -// replies on create trigger. -//------------------------------------------------------- - return; - }//if - if(ptr.p->masterData.gsn == GSN_WAIT_GCP_REQ) { - jam(); - DEBUG_OUT("masterAbort: gsn = GSN_WAIT_GCP_REQ"); -//------------------------------------------------------- -// We are waiting for WAIT_GCP response. We rely on -// this to arrive and check for ABORTING in response. -//------------------------------------------------------- - - /** - * Waiting for GCP - * @see execWAIT_GCP_CONF - */ - return; - }//if - - if(ptr.p->masterData.gsn == GSN_ALTER_TRIG_REQ) { - jam(); - DEBUG_OUT("masterAbort: gsn = GSN_ALTER_TRIG_REQ"); -//------------------------------------------------------- -// We are waiting for ALTER_TRIG response. We rely on -// this to arrive and check for ABORTING in response. -//------------------------------------------------------- - - /** - * All triggers haven't been created yet - */ - return; - }//if - - if(ptr.p->masterData.gsn == GSN_DROP_TRIG_REQ) { - jam(); - DEBUG_OUT("masterAbort: gsn = GSN_DROP_TRIG_REQ"); -//------------------------------------------------------- -// We are waiting for DROP_TRIG response. We rely on -// this to arrive and will continue dropping triggers -// until completed. -//------------------------------------------------------- - - /** - * I'm currently dropping the trigger - */ - return; - }//if - }//if - -//------------------------------------------------------- -// If we are waiting for START_BACKUP responses we can -// safely start dropping triggers (state == STARTED). -// We will ignore any START_BACKUP responses after this. -//------------------------------------------------------- - DEBUG_OUT("masterAbort: sendDropTrig"); - sendDropTrig(signal, ptr); // dropping due to error + ptr.p->stopGCP= ptr.p->startGCP + 1; + sendDropTrig(signal, ptr); // dropping due to error + return; + case GSN_UTIL_SEQUENCE_REQ: + case GSN_UTIL_LOCK_REQ: + case GSN_DROP_TRIG_REQ: + ndbrequire(false); + return; + case GSN_STOP_BACKUP_REQ: + return; + } } void -Backup::masterSendAbortBackup(Signal* signal, BackupRecordPtr ptr) +Backup::abort_scan(Signal * signal, BackupRecordPtr ptr) { - if (ptr.p->masterData.state.getState() != ABORTING) { - sendAbortBackupOrd(signal, ptr, AbortBackupOrd::BackupFailure); - ptr.p->masterData.state.setState(ABORTING); + AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend(); + ord->backupId = ptr.p->backupId; + ord->backupPtr = ptr.i; + ord->senderData= ptr.i; + ord->requestType = AbortBackupOrd::AbortScan; + + TablePtr tabPtr; + ptr.p->tables.first(tabPtr); + for(; tabPtr.i != RNIL; ptr.p->tables.next(tabPtr)) { + jam(); + FragmentPtr fragPtr; + Array & frags = tabPtr.p->fragments; + const Uint32 fragCount = frags.getSize(); + + for(Uint32 i = 0; ifragments.getPtr(fragPtr, i); + const Uint32 nodeId = fragPtr.p->node; + if(fragPtr.p->scanning != 0 && ptr.p->nodes.get(nodeId)) { + jam(); + + const BlockReference ref = numberToRef(BACKUP, nodeId); + sendSignal(ref, GSN_ABORT_BACKUP_ORD, signal, + AbortBackupOrd::SignalLength, JBB); + + } + } } - const State s = ptr.p->masterData.state.getAbortState(); - - /** - * First inform to client - */ - if(s == DEFINING) { - jam(); -#ifdef DEBUG_ABORT - ndbout_c("** Abort: sending BACKUP_REF to mgmtsrvr"); -#endif - sendBackupRef(ptr.p->clientRef, signal, ptr.p->clientData, - ptr.p->errorCode); - - } else { - jam(); -#ifdef DEBUG_ABORT - ndbout_c("** Abort: sending BACKUP_ABORT_REP to mgmtsrvr"); -#endif - BackupAbortRep* rep = (BackupAbortRep*)signal->getDataPtrSend(); - rep->backupId = ptr.p->backupId; - rep->senderData = ptr.p->clientData; - rep->reason = ptr.p->errorCode; - sendSignal(ptr.p->clientRef, GSN_BACKUP_ABORT_REP, signal, - BackupAbortRep::SignalLength, JBB); - - signal->theData[0] = EventReport::BackupAborted; - signal->theData[1] = ptr.p->clientRef; - signal->theData[2] = ptr.p->backupId; - signal->theData[3] = ptr.p->errorCode; - sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB); - }//if - - // ptr.p->masterData.state.setState(INITIAL); - - sendAbortBackupOrd(signal, ptr, AbortBackupOrd::BackupFailure); } /***************************************************************************** @@ -2313,26 +2205,17 @@ Backup::masterSendAbortBackup(Signal* signal, BackupRecordPtr ptr) void Backup::defineBackupRef(Signal* signal, BackupRecordPtr ptr, Uint32 errCode) { - if (ptr.p->slaveState.getState() == ABORTING) { - jam(); - return; - } - ptr.p->slaveState.setState(ABORTING); - - if (errCode != 0) { - jam(); - ptr.p->setErrorCode(errCode); - }//if + ptr.p->m_gsn = GSN_DEFINE_BACKUP_REF; + ptr.p->setErrorCode(errCode); ndbrequire(ptr.p->errorCode != 0); - + DefineBackupRef* ref = (DefineBackupRef*)signal->getDataPtrSend(); ref->backupId = ptr.p->backupId; ref->backupPtr = ptr.i; ref->errorCode = ptr.p->errorCode; + ref->nodeId = getOwnNodeId(); sendSignal(ptr.p->masterRef, GSN_DEFINE_BACKUP_REF, signal, DefineBackupRef::SignalLength, JBB); - - closeFiles(signal, ptr); } void @@ -2366,6 +2249,7 @@ Backup::execDEFINE_BACKUP_REQ(Signal* signal) CRASH_INSERTION((10014)); + ptr.p->m_gsn = GSN_DEFINE_BACKUP_REQ; ptr.p->slaveState.forceState(INITIAL); ptr.p->slaveState.setState(DEFINING); ptr.p->errorCode = 0; @@ -2432,7 +2316,7 @@ Backup::execDEFINE_BACKUP_REQ(Signal* signal) files[i].p->tableId = RNIL; files[i].p->backupPtr = ptr.i; files[i].p->filePointer = RNIL; - files[i].p->fileDone = 0; + files[i].p->fileClosing = 0; files[i].p->fileOpened = 0; files[i].p->fileRunning = 0; files[i].p->scanRunning = 0; @@ -2468,17 +2352,14 @@ Backup::execDEFINE_BACKUP_REQ(Signal* signal) ptr.p->logFilePtr = files[1].i; ptr.p->dataFilePtr = files[2].i; - if (!verifyNodesAlive(ptr.p->nodes)) { + if (!verifyNodesAlive(ptr, ptr.p->nodes)) { jam(); defineBackupRef(signal, ptr, DefineBackupRef::Undefined); - // sendBackupRef(signal, ptr, - // ptr.p->errorCode?ptr.p->errorCode:BackupRef::Undefined); return; }//if if (ERROR_INSERTED(10027)) { jam(); defineBackupRef(signal, ptr, 327); - // sendBackupRef(signal, ptr, 327); return; }//if @@ -2546,8 +2427,6 @@ Backup::execLIST_TABLES_CONF(Signal* signal) return; }//if - defineSlaveAbortCheck(); - /** * All tables fetched */ @@ -2679,8 +2558,6 @@ Backup::openFilesReply(Signal* signal, }//if }//for - defineSlaveAbortCheck(); - /** * Did open succeed for all files */ @@ -2810,8 +2687,6 @@ Backup::execGET_TABINFOREF(Signal* signal) BackupRecordPtr ptr; c_backupPool.getPtr(ptr, senderData); - defineSlaveAbortCheck(); - defineBackupRef(signal, ptr, ref->errorCode); } @@ -2833,8 +2708,6 @@ Backup::execGET_TABINFO_CONF(Signal* signal) BackupRecordPtr ptr; c_backupPool.getPtr(ptr, senderData); - defineSlaveAbortCheck(); - SegmentedSectionPtr dictTabInfoPtr; signal->getSection(dictTabInfoPtr, GetTabInfoConf::DICT_TAB_INFO); ndbrequire(dictTabInfoPtr.sz == len); @@ -3047,8 +2920,6 @@ Backup::execDI_FCOUNTCONF(Signal* signal) BackupRecordPtr ptr; c_backupPool.getPtr(ptr, senderData); - defineSlaveAbortCheck(); - TablePtr tabPtr; ndbrequire(findTable(ptr, tabPtr, tableId)); @@ -3127,8 +2998,6 @@ Backup::execDIGETPRIMCONF(Signal* signal) BackupRecordPtr ptr; c_backupPool.getPtr(ptr, senderData); - defineSlaveAbortCheck(); - TablePtr tabPtr; ndbrequire(findTable(ptr, tabPtr, tableId)); @@ -3143,9 +3012,7 @@ Backup::execDIGETPRIMCONF(Signal* signal) void Backup::getFragmentInfoDone(Signal* signal, BackupRecordPtr ptr) { - // Slave must now hold on to master data until - // AbortBackupOrd::OkToClean signal - ptr.p->okToCleanMaster = false; + ptr.p->m_gsn = GSN_DEFINE_BACKUP_CONF; ptr.p->slaveState.setState(DEFINED); DefineBackupConf * conf = (DefineBackupConf*)signal->getDataPtr(); conf->backupPtr = ptr.i; @@ -3169,16 +3036,15 @@ Backup::execSTART_BACKUP_REQ(Signal* signal) StartBackupReq* req = (StartBackupReq*)signal->getDataPtr(); const Uint32 ptrI = req->backupPtr; - const Uint32 backupId = req->backupId; + //const Uint32 backupId = req->backupId; const Uint32 signalNo = req->signalNo; - + BackupRecordPtr ptr; c_backupPool.getPtr(ptr, ptrI); - - slaveAbortCheck(); // macro will do return if ABORTING ptr.p->slaveState.setState(STARTED); - + ptr.p->m_gsn = GSN_START_BACKUP_REQ; + for(Uint32 i = 0; inoOfTableTriggers; i++) { jam(); TablePtr tabPtr; @@ -3191,11 +3057,13 @@ Backup::execSTART_BACKUP_REQ(Signal* signal) TriggerPtr trigPtr; if(!ptr.p->triggers.seizeId(trigPtr, triggerId)) { jam(); + ptr.p->m_gsn = GSN_START_BACKUP_REF; StartBackupRef* ref = (StartBackupRef*)signal->getDataPtrSend(); ref->backupPtr = ptr.i; ref->backupId = ptr.p->backupId; ref->signalNo = signalNo; ref->errorCode = StartBackupRef::FailedToAllocateTriggerRecord; + ref->nodeId = getOwnNodeId(); sendSignal(ptr.p->masterRef, GSN_START_BACKUP_REF, signal, StartBackupRef::SignalLength, JBB); return; @@ -3233,6 +3101,7 @@ Backup::execSTART_BACKUP_REQ(Signal* signal) }//if }//for + ptr.p->m_gsn = GSN_START_BACKUP_CONF; StartBackupConf* conf = (StartBackupConf*)signal->getDataPtrSend(); conf->backupPtr = ptr.i; conf->backupId = ptr.p->backupId; @@ -3255,7 +3124,7 @@ Backup::execBACKUP_FRAGMENT_REQ(Signal* signal) CRASH_INSERTION((10016)); const Uint32 ptrI = req->backupPtr; - const Uint32 backupId = req->backupId; + //const Uint32 backupId = req->backupId; const Uint32 tableId = req->tableId; const Uint32 fragNo = req->fragmentNo; const Uint32 count = req->count; @@ -3266,10 +3135,9 @@ Backup::execBACKUP_FRAGMENT_REQ(Signal* signal) BackupRecordPtr ptr; c_backupPool.getPtr(ptr, ptrI); - slaveAbortCheck(); // macro will do return if ABORTING - ptr.p->slaveState.setState(SCANNING); - + ptr.p->m_gsn = GSN_BACKUP_FRAGMENT_REQ; + /** * Get file */ @@ -3280,7 +3148,7 @@ Backup::execBACKUP_FRAGMENT_REQ(Signal* signal) ndbrequire(filePtr.p->fileOpened == 1); ndbrequire(filePtr.p->fileRunning == 1); ndbrequire(filePtr.p->scanRunning == 0); - ndbrequire(filePtr.p->fileDone == 0); + ndbrequire(filePtr.p->fileClosing == 0); /** * Get table @@ -3350,7 +3218,7 @@ Backup::execBACKUP_FRAGMENT_REQ(Signal* signal) req->transId1 = 0; req->transId2 = (BACKUP << 20) + (getOwnNodeId() << 8); req->clientOpPtr= filePtr.i; - req->batch_size_rows= 16; + req->batch_size_rows= parallelism; req->batch_size_bytes= 0; sendSignal(DBLQH_REF, GSN_SCAN_FRAGREQ, signal, ScanFragReq::SignalLength, JBB); @@ -3572,6 +3440,13 @@ Backup::OperationRecord::newScan() return false; } +bool +Backup::OperationRecord::closeScan() +{ + opNoDone = opNoConf = opLen = 0; + return true; +} + bool Backup::OperationRecord::scanConf(Uint32 noOfOps, Uint32 total_len) { @@ -3600,11 +3475,9 @@ Backup::execSCAN_FRAGREF(Signal* signal) c_backupFilePool.getPtr(filePtr, filePtrI); filePtr.p->errorCode = ref->errorCode; + filePtr.p->scanRunning = 0; - BackupRecordPtr ptr; - c_backupPool.getPtr(ptr, filePtr.p->backupPtr); - - abortFile(signal, ptr, filePtr); + backupFragmentRef(signal, filePtr); } void @@ -3639,9 +3512,11 @@ Backup::fragmentCompleted(Signal* signal, BackupFilePtr filePtr) { jam(); - if(filePtr.p->errorCode != 0){ + if(filePtr.p->errorCode != 0) + { jam(); - abortFileHook(signal, filePtr, true); // Scan completed + filePtr.p->scanRunning = 0; + backupFragmentRef(signal, filePtr); // Scan completed return; }//if @@ -3669,20 +3544,51 @@ Backup::fragmentCompleted(Signal* signal, BackupFilePtr filePtr) sendSignal(ptr.p->masterRef, GSN_BACKUP_FRAGMENT_CONF, signal, BackupFragmentConf::SignalLength, JBB); + ptr.p->m_gsn = GSN_BACKUP_FRAGMENT_CONF; ptr.p->slaveState.setState(STARTED); return; } + +void +Backup::backupFragmentRef(Signal * signal, BackupFilePtr filePtr) +{ + BackupRecordPtr ptr; + c_backupPool.getPtr(ptr, filePtr.p->backupPtr); + + ptr.p->m_gsn = GSN_BACKUP_FRAGMENT_REF; + + BackupFragmentRef * ref = (BackupFragmentRef*)signal->getDataPtrSend(); + ref->backupId = ptr.p->backupId; + ref->backupPtr = ptr.i; + ref->nodeId = getOwnNodeId(); + ref->errorCode = ptr.p->errorCode; + sendSignal(ptr.p->masterRef, GSN_BACKUP_FRAGMENT_REF, signal, + BackupFragmentRef::SignalLength, JBB); +} void Backup::checkScan(Signal* signal, BackupFilePtr filePtr) { - if(filePtr.p->errorCode != 0){ + OperationRecord & op = filePtr.p->operation; + + if(filePtr.p->errorCode != 0) + { jam(); - abortFileHook(signal, filePtr, false); // Scan not completed + + /** + * Close scan + */ + op.closeScan(); + ScanFragNextReq * req = (ScanFragNextReq *)signal->getDataPtrSend(); + req->senderData = filePtr.i; + req->closeFlag = 1; + req->transId1 = 0; + req->transId2 = (BACKUP << 20) + (getOwnNodeId() << 8); + sendSignal(DBLQH_REF, GSN_SCAN_NEXTREQ, signal, + ScanFragNextReq::SignalLength, JBB); return; }//if - - OperationRecord & op = filePtr.p->operation; + if(op.newScan()) { jam(); @@ -3693,8 +3599,28 @@ Backup::checkScan(Signal* signal, BackupFilePtr filePtr) req->transId2 = (BACKUP << 20) + (getOwnNodeId() << 8); req->batch_size_rows= 16; req->batch_size_bytes= 0; - sendSignal(DBLQH_REF, GSN_SCAN_NEXTREQ, signal, - ScanFragNextReq::SignalLength, JBB); + if(ERROR_INSERTED(10032)) + sendSignalWithDelay(DBLQH_REF, GSN_SCAN_NEXTREQ, signal, + 100, ScanFragNextReq::SignalLength); + else if(ERROR_INSERTED(10033)) + { + SET_ERROR_INSERT_VALUE(10032); + sendSignalWithDelay(DBLQH_REF, GSN_SCAN_NEXTREQ, signal, + 10000, ScanFragNextReq::SignalLength); + + BackupRecordPtr ptr; + c_backupPool.getPtr(ptr, filePtr.p->backupPtr); + AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend(); + ord->backupId = ptr.p->backupId; + ord->backupPtr = ptr.i; + ord->requestType = AbortBackupOrd::FileOrScanError; + ord->senderData= ptr.i; + sendSignal(ptr.p->masterRef, GSN_ABORT_BACKUP_ORD, signal, + AbortBackupOrd::SignalLength, JBB); + } + else + sendSignal(DBLQH_REF, GSN_SCAN_NEXTREQ, signal, + ScanFragNextReq::SignalLength, JBB); return; }//if @@ -3718,11 +3644,8 @@ Backup::execFSAPPENDREF(Signal* signal) filePtr.p->fileRunning = 0; filePtr.p->errorCode = errCode; - - BackupRecordPtr ptr; - c_backupPool.getPtr(ptr, filePtr.p->backupPtr); - - abortFile(signal, ptr, filePtr); + + checkFile(signal, filePtr); } void @@ -3738,12 +3661,6 @@ Backup::execFSAPPENDCONF(Signal* signal) BackupFilePtr filePtr; c_backupFilePool.getPtr(filePtr, filePtrI); - - if (ERROR_INSERTED(10029)) { - BackupRecordPtr ptr; - c_backupPool.getPtr(ptr, filePtr.p->backupPtr); - abortFile(signal, ptr, filePtr); - }//if OperationRecord & op = filePtr.p->operation; @@ -3761,30 +3678,25 @@ Backup::checkFile(Signal* signal, BackupFilePtr filePtr) #endif OperationRecord & op = filePtr.p->operation; - + Uint32 * tmp, sz; bool eof; - if(op.dataBuffer.getReadPtr(&tmp, &sz, &eof)) { + if(op.dataBuffer.getReadPtr(&tmp, &sz, &eof)) + { jam(); - if(filePtr.p->errorCode == 0) { - jam(); - FsAppendReq * req = (FsAppendReq *)signal->getDataPtrSend(); - req->filePointer = filePtr.p->filePointer; - req->userPointer = filePtr.i; - req->userReference = reference(); - req->varIndex = 0; - req->offset = tmp - c_startOfPages; - req->size = sz; - - sendSignal(NDBFS_REF, GSN_FSAPPENDREQ, signal, - FsAppendReq::SignalLength, JBA); - return; - } else { - jam(); - if (filePtr.p->scanRunning == 1) - eof = false; - }//if - }//if + jam(); + FsAppendReq * req = (FsAppendReq *)signal->getDataPtrSend(); + req->filePointer = filePtr.p->filePointer; + req->userPointer = filePtr.i; + req->userReference = reference(); + req->varIndex = 0; + req->offset = tmp - c_startOfPages; + req->size = sz; + + sendSignal(NDBFS_REF, GSN_FSAPPENDREQ, signal, + FsAppendReq::SignalLength, JBA); + return; + } if(!eof) { jam(); @@ -3794,9 +3706,7 @@ Backup::checkFile(Signal* signal, BackupFilePtr filePtr) return; }//if - ndbrequire(filePtr.p->fileDone == 1); - - if(sz > 0 && filePtr.p->errorCode == 0) { + if(sz > 0) { jam(); FsAppendReq * req = (FsAppendReq *)signal->getDataPtrSend(); req->filePointer = filePtr.p->filePointer; @@ -3812,6 +3722,7 @@ Backup::checkFile(Signal* signal, BackupFilePtr filePtr) }//if filePtr.p->fileRunning = 0; + filePtr.p->fileClosing = 1; FsCloseReq * req = (FsCloseReq *)signal->getDataPtrSend(); req->filePointer = filePtr.p->filePointer; @@ -3819,64 +3730,11 @@ Backup::checkFile(Signal* signal, BackupFilePtr filePtr) req->userReference = reference(); req->fileFlag = 0; #ifdef DEBUG_ABORT - ndbout_c("***** FSCLOSEREQ filePtr.i = %u", filePtr.i); + ndbout_c("***** a FSCLOSEREQ filePtr.i = %u", filePtr.i); #endif sendSignal(NDBFS_REF, GSN_FSCLOSEREQ, signal, FsCloseReq::SignalLength, JBA); } -void -Backup::abortFile(Signal* signal, BackupRecordPtr ptr, BackupFilePtr filePtr) -{ - jam(); - - if(ptr.p->slaveState.getState() != ABORTING) { - /** - * Inform master of failure - */ - jam(); - ptr.p->slaveState.setState(ABORTING); - ptr.p->setErrorCode(AbortBackupOrd::FileOrScanError); - sendAbortBackupOrdSlave(signal, ptr, AbortBackupOrd::FileOrScanError); - return; - }//if - - - for(ptr.p->files.first(filePtr); - filePtr.i!=RNIL; - ptr.p->files.next(filePtr)){ - jam(); - filePtr.p->errorCode = 1; - }//for - - closeFiles(signal, ptr); -} - -void -Backup::abortFileHook(Signal* signal, BackupFilePtr filePtr, bool scanComplete) -{ - jam(); - - if(!scanComplete) { - jam(); - - ScanFragNextReq * req = (ScanFragNextReq *)signal->getDataPtrSend(); - req->senderData = filePtr.i; - req->closeFlag = 1; - req->transId1 = 0; - req->transId2 = (BACKUP << 20) + (getOwnNodeId() << 8); - sendSignal(DBLQH_REF, GSN_SCAN_NEXTREQ, signal, - ScanFragNextReq::SignalLength, JBB); - return; - }//if - - filePtr.p->scanRunning = 0; - - BackupRecordPtr ptr; - c_backupPool.getPtr(ptr, filePtr.p->backupPtr); - - filePtr.i = RNIL; - abortFile(signal, ptr, filePtr); -} /**************************************************************************** * @@ -3953,27 +3811,30 @@ Backup::execTRIG_ATTRINFO(Signal* signal) { }//if BackupFormat::LogFile::LogEntry * logEntry = trigPtr.p->logEntry; - if(logEntry == 0) { + if(logEntry == 0) + { jam(); Uint32 * dst; FsBuffer & buf = trigPtr.p->operation->dataBuffer; ndbrequire(trigPtr.p->maxRecordSize <= buf.getMaxWrite()); - BackupRecordPtr ptr; - c_backupPool.getPtr(ptr, trigPtr.p->backupPtr); - if(!buf.getWritePtr(&dst, trigPtr.p->maxRecordSize)) { + if(ERROR_INSERTED(10030) || + !buf.getWritePtr(&dst, trigPtr.p->maxRecordSize)) + { jam(); + BackupRecordPtr ptr; + c_backupPool.getPtr(ptr, trigPtr.p->backupPtr); trigPtr.p->errorCode = AbortBackupOrd::LogBufferFull; - sendAbortBackupOrdSlave(signal, ptr, AbortBackupOrd::LogBufferFull); + AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend(); + ord->backupId = ptr.p->backupId; + ord->backupPtr = ptr.i; + ord->requestType = AbortBackupOrd::LogBufferFull; + ord->senderData= ptr.i; + sendSignal(ptr.p->masterRef, GSN_ABORT_BACKUP_ORD, signal, + AbortBackupOrd::SignalLength, JBB); return; }//if - if(trigPtr.p->operation->noOfBytes > 123 && ERROR_INSERTED(10030)) { - jam(); - trigPtr.p->errorCode = AbortBackupOrd::LogBufferFull; - sendAbortBackupOrdSlave(signal, ptr, AbortBackupOrd::LogBufferFull); - return; - }//if - + logEntry = (BackupFormat::LogFile::LogEntry *)dst; trigPtr.p->logEntry = logEntry; logEntry->Length = 0; @@ -4015,9 +3876,10 @@ Backup::execFIRE_TRIG_ORD(Signal* signal) BackupRecordPtr ptr; c_backupPool.getPtr(ptr, trigPtr.p->backupPtr); - if(gci != ptr.p->currGCP) { + if(gci != ptr.p->currGCP) + { jam(); - + trigPtr.p->logEntry->TriggerEvent = htonl(trigPtr.p->event | 0x10000); trigPtr.p->logEntry->Data[len] = htonl(gci); len ++; @@ -4035,20 +3897,6 @@ Backup::execFIRE_TRIG_ORD(Signal* signal) trigPtr.p->operation->noOfRecords += 1; } -void -Backup::sendAbortBackupOrdSlave(Signal* signal, BackupRecordPtr ptr, - Uint32 requestType) -{ - jam(); - AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend(); - ord->backupId = ptr.p->backupId; - ord->backupPtr = ptr.i; - ord->requestType = requestType; - ord->senderData= ptr.i; - sendSignal(ptr.p->masterRef, GSN_ABORT_BACKUP_ORD, signal, - AbortBackupOrd::SignalLength, JBB); -} - void Backup::sendAbortBackupOrd(Signal* signal, BackupRecordPtr ptr, Uint32 requestType) @@ -4085,7 +3933,7 @@ Backup::execSTOP_BACKUP_REQ(Signal* signal) CRASH_INSERTION((10020)); const Uint32 ptrI = req->backupPtr; - const Uint32 backupId = req->backupId; + //const Uint32 backupId = req->backupId; const Uint32 startGCP = req->startGCP; const Uint32 stopGCP = req->stopGCP; @@ -4101,7 +3949,7 @@ Backup::execSTOP_BACKUP_REQ(Signal* signal) c_backupPool.getPtr(ptr, ptrI); ptr.p->slaveState.setState(STOPPING); - slaveAbortCheck(); // macro will do return if ABORTING + ptr.p->m_gsn = GSN_STOP_BACKUP_REQ; /** * Insert footers @@ -4140,12 +3988,6 @@ Backup::execSTOP_BACKUP_REQ(Signal* signal) void Backup::closeFiles(Signal* sig, BackupRecordPtr ptr) { - if (ptr.p->closingFiles) { - jam(); - return; - } - ptr.p->closingFiles = true; - /** * Close all files */ @@ -4161,12 +4003,12 @@ Backup::closeFiles(Signal* sig, BackupRecordPtr ptr) jam(); openCount++; - if(filePtr.p->fileDone == 1){ + if(filePtr.p->fileClosing == 1){ jam(); continue; }//if - filePtr.p->fileDone = 1; + filePtr.p->fileClosing = 1; if(filePtr.p->fileRunning == 1){ jam(); @@ -4183,7 +4025,7 @@ Backup::closeFiles(Signal* sig, BackupRecordPtr ptr) req->userReference = reference(); req->fileFlag = 0; #ifdef DEBUG_ABORT - ndbout_c("***** FSCLOSEREQ filePtr.i = %u", filePtr.i); + ndbout_c("***** b FSCLOSEREQ filePtr.i = %u", filePtr.i); #endif sendSignal(NDBFS_REF, GSN_FSCLOSEREQ, sig, FsCloseReq::SignalLength, JBA); @@ -4210,11 +4052,6 @@ Backup::execFSCLOSEREF(Signal* signal) BackupRecordPtr ptr; c_backupPool.getPtr(ptr, filePtr.p->backupPtr); - /** - * This should only happen during abort of backup - */ - ndbrequire(ptr.p->slaveState.getState() == ABORTING); - filePtr.p->fileOpened = 1; FsConf * conf = (FsConf*)signal->getDataPtr(); conf->userPointer = filePtrI; @@ -4237,7 +4074,7 @@ Backup::execFSCLOSECONF(Signal* signal) ndbout_c("***** FSCLOSECONF filePtrI = %u", filePtrI); #endif - ndbrequire(filePtr.p->fileDone == 1); + ndbrequire(filePtr.p->fileClosing == 1); ndbrequire(filePtr.p->fileOpened == 1); ndbrequire(filePtr.p->fileRunning == 0); ndbrequire(filePtr.p->scanRunning == 0); @@ -4265,25 +4102,20 @@ Backup::closeFilesDone(Signal* signal, BackupRecordPtr ptr) { jam(); - if(ptr.p->slaveState.getState() == STOPPING) { - jam(); - BackupFilePtr filePtr; - ptr.p->files.getPtr(filePtr, ptr.p->logFilePtr); - - StopBackupConf* conf = (StopBackupConf*)signal->getDataPtrSend(); - conf->backupId = ptr.p->backupId; - conf->backupPtr = ptr.i; - conf->noOfLogBytes = filePtr.p->operation.noOfBytes; - conf->noOfLogRecords = filePtr.p->operation.noOfRecords; - sendSignal(ptr.p->masterRef, GSN_STOP_BACKUP_CONF, signal, - StopBackupConf::SignalLength, JBB); - - ptr.p->slaveState.setState(CLEANING); - return; - }//if + jam(); + BackupFilePtr filePtr; + ptr.p->files.getPtr(filePtr, ptr.p->logFilePtr); - ndbrequire(ptr.p->slaveState.getState() == ABORTING); - removeBackup(signal, ptr); + StopBackupConf* conf = (StopBackupConf*)signal->getDataPtrSend(); + conf->backupId = ptr.p->backupId; + conf->backupPtr = ptr.i; + conf->noOfLogBytes = filePtr.p->operation.noOfBytes; + conf->noOfLogRecords = filePtr.p->operation.noOfRecords; + sendSignal(ptr.p->masterRef, GSN_STOP_BACKUP_CONF, signal, + StopBackupConf::SignalLength, JBB); + + ptr.p->m_gsn = GSN_STOP_BACKUP_CONF; + ptr.p->slaveState.setState(CLEANING); } /***************************************************************************** @@ -4291,57 +4123,6 @@ Backup::closeFilesDone(Signal* signal, BackupRecordPtr ptr) * Slave functionallity: Abort backup * *****************************************************************************/ -void -Backup::removeBackup(Signal* signal, BackupRecordPtr ptr) -{ - jam(); - - FsRemoveReq * req = (FsRemoveReq *)signal->getDataPtrSend(); - req->userReference = reference(); - req->userPointer = ptr.i; - req->directory = 1; - req->ownDirectory = 1; - FsOpenReq::setVersion(req->fileNumber, 2); - FsOpenReq::setSuffix(req->fileNumber, FsOpenReq::S_CTL); - FsOpenReq::v2_setSequence(req->fileNumber, ptr.p->backupId); - FsOpenReq::v2_setNodeId(req->fileNumber, getOwnNodeId()); - sendSignal(NDBFS_REF, GSN_FSREMOVEREQ, signal, - FsRemoveReq::SignalLength, JBA); -} - -void -Backup::execFSREMOVEREF(Signal* signal) -{ - jamEntry(); - ndbrequire(0); -} - -void -Backup::execFSREMOVECONF(Signal* signal){ - jamEntry(); - - FsConf * conf = (FsConf*)signal->getDataPtr(); - const Uint32 ptrI = conf->userPointer; - - /** - * Get backup record - */ - BackupRecordPtr ptr; - c_backupPool.getPtr(ptr, ptrI); - - ndbrequire(ptr.p->slaveState.getState() == ABORTING); - if (ptr.p->masterRef == reference()) { - if (ptr.p->masterData.state.getAbortState() == DEFINING) { - jam(); - sendBackupRef(signal, ptr, ptr.p->errorCode); - return; - } else { - jam(); - }//if - }//if - cleanupSlaveResources(ptr); -} - /***************************************************************************** * * Slave functionallity: Abort backup @@ -4394,8 +4175,7 @@ Backup::execABORT_BACKUP_ORD(Signal* signal) if (c_backupPool.findId(senderData)) { jam(); c_backupPool.getPtr(ptr, senderData); - } else { // TODO might be abort sent to not master, - // or master aborting too early + } else { jam(); #ifdef DEBUG_ABORT ndbout_c("Backup: abort request type=%u on id=%u,%u not found", @@ -4405,15 +4185,15 @@ Backup::execABORT_BACKUP_ORD(Signal* signal) } }//if + ptr.p->m_gsn = GSN_ABORT_BACKUP_ORD; const bool isCoordinator = (ptr.p->masterRef == reference()); - + bool ok = false; switch(requestType){ /** * Requests sent to master */ - case AbortBackupOrd::ClientAbort: jam(); // fall through @@ -4422,113 +4202,61 @@ Backup::execABORT_BACKUP_ORD(Signal* signal) // fall through case AbortBackupOrd::FileOrScanError: jam(); - if(ptr.p->masterData.state.getState() == ABORTING) { -#ifdef DEBUG_ABORT - ndbout_c("---- Already aborting"); -#endif - jam(); - return; - } + ndbrequire(isCoordinator); ptr.p->setErrorCode(requestType); - ndbrequire(isCoordinator); // Sent from slave to coordinator - masterAbort(signal, ptr, false); + if(ptr.p->masterData.gsn == GSN_BACKUP_FRAGMENT_REQ) + { + /** + * Only scans are actively aborted + */ + abort_scan(signal, ptr); + } return; - - /** - * Info sent to slave - */ - - case AbortBackupOrd::OkToClean: - jam(); - cleanupMasterResources(ptr); - return; - + /** * Requests sent to slave */ - + case AbortBackupOrd::AbortScan: + jam(); + ptr.p->setErrorCode(requestType); + return; + case AbortBackupOrd::BackupComplete: jam(); - if (ptr.p->slaveState.getState() == CLEANING) { // TODO what if state is - // not CLEANING? - jam(); - cleanupSlaveResources(ptr); - }//if + cleanup(signal, ptr); return; - break; - case AbortBackupOrd::BackupFailureDueToNodeFail: - jam(); - ok = true; - if (ptr.p->errorCode != 0) - ptr.p->setErrorCode(requestType); - break; case AbortBackupOrd::BackupFailure: - jam(); - ok = true; - break; + case AbortBackupOrd::BackupFailureDueToNodeFail: + case AbortBackupOrd::OkToClean: + case AbortBackupOrd::IncompatibleVersions: +#ifndef VM_TRACE + default: +#endif + ptr.p->setErrorCode(requestType); + ok= true; } ndbrequire(ok); - /** - * Slave abort - */ - slaveAbort(signal, ptr); -} - -void -Backup::slaveAbort(Signal* signal, BackupRecordPtr ptr) -{ - if(ptr.p->slaveState.getState() == ABORTING) { -#ifdef DEBUG_ABORT - ndbout_c("---- Slave already aborting"); -#endif - jam(); - return; + Uint32 ref= ptr.p->masterRef; + ptr.p->masterRef = reference(); + ptr.p->nodes.clear(); + ptr.p->nodes.set(getOwnNodeId()); + + if(ref == reference()) + { + ptr.p->stopGCP= ptr.p->startGCP + 1; + sendDropTrig(signal, ptr); } -#ifdef DEBUG_ABORT - ndbout_c("************* slaveAbort"); -#endif - - State slaveState = ptr.p->slaveState.getState(); - ptr.p->slaveState.setState(ABORTING); - switch(slaveState) { - case DEFINING: - jam(); - return; -//------------------------------------------ -// Will watch for the abort at various places -// in the defining phase. -//------------------------------------------ - case ABORTING: - jam(); - //Fall through - case DEFINED: - jam(); - //Fall through - case STOPPING: - jam(); + else + { + ptr.p->masterData.gsn = GSN_STOP_BACKUP_REQ; + ptr.p->masterData.sendCounter.clearWaitingFor(); + ptr.p->masterData.sendCounter.setWaitingFor(getOwnNodeId()); closeFiles(signal, ptr); - return; - case STARTED: - jam(); - //Fall through - case SCANNING: - jam(); - BackupFilePtr filePtr; - filePtr.i = RNIL; - abortFile(signal, ptr, filePtr); - return; - case CLEANING: - jam(); - cleanupSlaveResources(ptr); - return; - case INITIAL: - jam(); - ndbrequire(false); - return; } } + void Backup::dumpUsedResources() { @@ -4576,12 +4304,8 @@ Backup::dumpUsedResources() } void -Backup::cleanupMasterResources(BackupRecordPtr ptr) +Backup::cleanup(Signal* signal, BackupRecordPtr ptr) { -#ifdef DEBUG_ABORT - ndbout_c("******** Cleanup Master Resources *********"); - ndbout_c("backupId = %u, errorCode = %u", ptr.p->backupId, ptr.p->errorCode); -#endif TablePtr tabPtr; for(ptr.p->tables.first(tabPtr); tabPtr.i != RNIL;ptr.p->tables.next(tabPtr)) @@ -4601,20 +4325,6 @@ Backup::cleanupMasterResources(BackupRecordPtr ptr) tabPtr.p->triggerIds[j] = ILLEGAL_TRIGGER_ID; }//for }//for - ptr.p->tables.release(); - ptr.p->triggers.release(); - ptr.p->okToCleanMaster = true; - - cleanupFinalResources(ptr); -} - -void -Backup::cleanupSlaveResources(BackupRecordPtr ptr) -{ -#ifdef DEBUG_ABORT - ndbout_c("******** Clean Up Slave Resources*********"); - ndbout_c("backupId = %u, errorCode = %u", ptr.p->backupId, ptr.p->errorCode); -#endif BackupFilePtr filePtr; for(ptr.p->files.first(filePtr); @@ -4626,35 +4336,65 @@ Backup::cleanupSlaveResources(BackupRecordPtr ptr) ndbrequire(filePtr.p->scanRunning == 0); filePtr.p->pages.release(); }//for - ptr.p->files.release(); - cleanupFinalResources(ptr); + ptr.p->files.release(); + ptr.p->tables.release(); + ptr.p->triggers.release(); + + ptr.p->tables.release(); + ptr.p->triggers.release(); + ptr.p->pages.release(); + ptr.p->backupId = ~0; + + if(ptr.p->checkError()) + removeBackup(signal, ptr); + else + c_backups.release(ptr); +} + + +void +Backup::removeBackup(Signal* signal, BackupRecordPtr ptr) +{ + jam(); + + FsRemoveReq * req = (FsRemoveReq *)signal->getDataPtrSend(); + req->userReference = reference(); + req->userPointer = ptr.i; + req->directory = 1; + req->ownDirectory = 1; + FsOpenReq::setVersion(req->fileNumber, 2); + FsOpenReq::setSuffix(req->fileNumber, FsOpenReq::S_CTL); + FsOpenReq::v2_setSequence(req->fileNumber, ptr.p->backupId); + FsOpenReq::v2_setNodeId(req->fileNumber, getOwnNodeId()); + sendSignal(NDBFS_REF, GSN_FSREMOVEREQ, signal, + FsRemoveReq::SignalLength, JBA); } void -Backup::cleanupFinalResources(BackupRecordPtr ptr) +Backup::execFSREMOVEREF(Signal* signal) { -#ifdef DEBUG_ABORT - ndbout_c("******** Clean Up Final Resources*********"); - ndbout_c("backupId = %u, errorCode = %u", ptr.p->backupId, ptr.p->errorCode); -#endif + jamEntry(); + FsRef * ref = (FsRef*)signal->getDataPtr(); + const Uint32 ptrI = ref->userPointer; - // if (!ptr.p->tables.empty() || !ptr.p->files.empty()) { - if (!ptr.p->okToCleanMaster || !ptr.p->files.empty()) { - jam(); -#ifdef DEBUG_ABORT - ndbout_c("******** Waiting to do final cleanup"); -#endif - return; - } - ptr.p->pages.release(); - ptr.p->masterData.state.setState(INITIAL); - ptr.p->slaveState.setState(INITIAL); - ptr.p->backupId = 0; - - ptr.p->closingFiles = false; - ptr.p->okToCleanMaster = true; - - c_backups.release(ptr); - // ndbrequire(false); + FsConf * conf = (FsConf*)signal->getDataPtr(); + conf->userPointer = ptrI; + execFSREMOVECONF(signal); } + +void +Backup::execFSREMOVECONF(Signal* signal){ + jamEntry(); + + FsConf * conf = (FsConf*)signal->getDataPtr(); + const Uint32 ptrI = conf->userPointer; + + /** + * Get backup record + */ + BackupRecordPtr ptr; + c_backupPool.getPtr(ptr, ptrI); + c_backups.release(ptr); +} + diff --git a/ndb/src/kernel/blocks/backup/Backup.hpp b/ndb/src/kernel/blocks/backup/Backup.hpp index 1a5d6c7a925..7bcea5655b4 100644 --- a/ndb/src/kernel/blocks/backup/Backup.hpp +++ b/ndb/src/kernel/blocks/backup/Backup.hpp @@ -232,6 +232,7 @@ public: */ bool newScan(); bool scanConf(Uint32 noOfOps, Uint32 opLen); + bool closeScan(); /** * Per record @@ -330,7 +331,7 @@ public: Uint8 fileOpened; Uint8 fileRunning; - Uint8 fileDone; + Uint8 fileClosing; Uint8 scanRunning; }; typedef Ptr BackupFilePtr; @@ -403,13 +404,11 @@ public: ArrayPool & trp) : slaveState(b, validSlaveTransitions, validSlaveTransitionsCount,1) , tables(tp), triggers(trp), files(bp), pages(pp) - , masterData(b, validMasterTransitions, validMasterTransitionsCount) - , backup(b) - { - closingFiles = false; - okToCleanMaster = true; - } + , masterData(b), backup(b) + { + } + Uint32 m_gsn; CompoundState slaveState; Uint32 clientRef; @@ -420,9 +419,6 @@ public: Uint32 errorCode; NdbNodeBitmask nodes; - bool okToCleanMaster; - bool closingFiles; - Uint64 noOfBytes; Uint64 noOfRecords; Uint64 noOfLogBytes; @@ -444,15 +440,13 @@ public: SimpleProperties props;// Used for (un)packing backup request struct MasterData { - MasterData(Backup & b, const State valid[], Uint32 count) - : state(b, valid, count, 0) - { - } + MasterData(Backup & b) + { + } MutexHandle2 m_defineBackupMutex; MutexHandle2 m_dictCommitTableMutex; Uint32 gsn; - CompoundState state; SignalCounter sendCounter; Uint32 errorCode; struct { @@ -557,7 +551,8 @@ public: void stopBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32 nodeId); void defineBackupRef(Signal*, BackupRecordPtr, Uint32 errCode = 0); - + void backupFragmentRef(Signal * signal, BackupFilePtr filePtr); + void nextFragment(Signal*, BackupRecordPtr); void sendCreateTrig(Signal*, BackupRecordPtr ptr, TablePtr tabPtr); @@ -578,14 +573,14 @@ public: void sendAbortBackupOrd(Signal* signal, BackupRecordPtr ptr, Uint32 errCode); void sendAbortBackupOrdSlave(Signal* signal, BackupRecordPtr ptr, Uint32 errCode); - void masterAbort(Signal*, BackupRecordPtr ptr, bool controlledAbort); + void masterAbort(Signal*, BackupRecordPtr ptr); void masterSendAbortBackup(Signal*, BackupRecordPtr ptr); void slaveAbort(Signal*, BackupRecordPtr ptr); void abortFile(Signal* signal, BackupRecordPtr ptr, BackupFilePtr filePtr); void abortFileHook(Signal* signal, BackupFilePtr filePtr, bool scanDone); - bool verifyNodesAlive(const NdbNodeBitmask& aNodeBitMask); + bool verifyNodesAlive(BackupRecordPtr, const NdbNodeBitmask& aNodeBitMask); bool checkAbort(BackupRecordPtr ptr); void checkNodeFail(Signal* signal, BackupRecordPtr ptr, @@ -603,9 +598,8 @@ public: void sendBackupRef(BlockReference ref, Signal *signal, Uint32 senderData, Uint32 errorCode); void dumpUsedResources(); - void cleanupMasterResources(BackupRecordPtr ptr); - void cleanupSlaveResources(BackupRecordPtr ptr); - void cleanupFinalResources(BackupRecordPtr ptr); + void cleanup(Signal*, BackupRecordPtr ptr); + void abort_scan(Signal*, BackupRecordPtr ptr); void removeBackup(Signal*, BackupRecordPtr ptr); void sendSTTORRY(Signal*); diff --git a/ndb/src/kernel/blocks/backup/Backup.txt b/ndb/src/kernel/blocks/backup/Backup.txt index ee5e02bb549..73942c6ebdc 100644 --- a/ndb/src/kernel/blocks/backup/Backup.txt +++ b/ndb/src/kernel/blocks/backup/Backup.txt @@ -341,3 +341,28 @@ start backup (ERROR_INSERTED(10022))) { if (ERROR_INSERTED(10029)) { if(trigPtr.p->operation->noOfBytes > 123 && ERROR_INSERTED(10030)) { + +----- XXX --- + +DEFINE_BACKUP_REF -> + ABORT_BACKUP_ORD(no reply) when all DEFINE_BACKUP replies has arrived + +START_BACKUP_REF + ABORT_BACKUP_ORD(no reply) when all START_BACKUP_ replies has arrived + +BACKUP_FRAGMENT_REF + ABORT_BACKUP_ORD(reply) directly to all nodes running BACKUP_FRAGMENT + + When all nodes has replied BACKUP_FRAGMENT + ABORT_BACKUP_ORD(no reply) + +STOP_BACKUP_REF + ABORT_BACKUP_ORD(no reply) when all STOP_BACKUP_ replies has arrived + +NF_COMPLETE_REP + slave dies + master sends OUTSTANDING_REF to self + slave does nothing + + master dies + slave elects self as master and sets only itself as participant diff --git a/ndb/src/kernel/blocks/backup/BackupInit.cpp b/ndb/src/kernel/blocks/backup/BackupInit.cpp index 08fa089a9c0..eae72f43db5 100644 --- a/ndb/src/kernel/blocks/backup/BackupInit.cpp +++ b/ndb/src/kernel/blocks/backup/BackupInit.cpp @@ -175,7 +175,7 @@ Backup::Backup(const Configuration & conf) : addRecSignal(GSN_START_BACKUP_CONF, &Backup::execSTART_BACKUP_CONF); addRecSignal(GSN_BACKUP_FRAGMENT_REQ, &Backup::execBACKUP_FRAGMENT_REQ); - //addRecSignal(GSN_BACKUP_FRAGMENT_REF, &Backup::execBACKUP_FRAGMENT_REF); + addRecSignal(GSN_BACKUP_FRAGMENT_REF, &Backup::execBACKUP_FRAGMENT_REF); addRecSignal(GSN_BACKUP_FRAGMENT_CONF, &Backup::execBACKUP_FRAGMENT_CONF); addRecSignal(GSN_STOP_BACKUP_REQ, &Backup::execSTOP_BACKUP_REQ); diff --git a/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp b/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp index dfae180ae71..0274ef4af3e 100644 --- a/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp +++ b/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp @@ -126,6 +126,7 @@ Cmvmi::Cmvmi(const Configuration & conf) : } setNodeInfo(getOwnNodeId()).m_connected = true; + setNodeInfo(getOwnNodeId()).m_version = ndbGetOwnVersion(); } Cmvmi::~Cmvmi() diff --git a/ndb/src/mgmapi/mgmapi.cpp b/ndb/src/mgmapi/mgmapi.cpp index 68106c4689d..a8931fb32ea 100644 --- a/ndb/src/mgmapi/mgmapi.cpp +++ b/ndb/src/mgmapi/mgmapi.cpp @@ -1565,9 +1565,9 @@ ndb_mgm_start_backup(NdbMgmHandle handle, int wait_completed, { // start backup can take some time, set timeout high Uint64 old_timeout= handle->read_timeout; if (wait_completed == 2) - handle->read_timeout= 30*60*1000; // 30 minutes + handle->read_timeout= 48*60*60*1000; // 48 hours else if (wait_completed == 1) - handle->read_timeout= 5*60*1000; // 5 minutes + handle->read_timeout= 10*60*1000; // 10 minutes reply = ndb_mgm_call(handle, start_backup_reply, "start backup", &args); handle->read_timeout= old_timeout; } diff --git a/ndb/src/mgmsrv/MgmtSrvr.cpp b/ndb/src/mgmsrv/MgmtSrvr.cpp index fb05e57e138..ceaedc9955b 100644 --- a/ndb/src/mgmsrv/MgmtSrvr.cpp +++ b/ndb/src/mgmsrv/MgmtSrvr.cpp @@ -791,7 +791,7 @@ MgmtSrvr::restartNode(int processId, bool nostart, result = sendSignal(processId, NO_WAIT, signal, true); } - if (result == -1) { + if (result == -1 && theWaitState != WAIT_NODEFAILURE) { m_stopRec.inUse = false; return SEND_OR_RECEIVE_FAILED; } @@ -1920,6 +1920,7 @@ MgmtSrvr::handleReceivedSignal(NdbApiSignal* signal) #ifdef VM_TRACE ndbout_c("I'm not master resending to %d", aNodeId); #endif + theWaitNode= aNodeId; NdbApiSignal aSignal(_ownReference); BackupReq* req = CAST_PTR(BackupReq, aSignal.getDataPtrSend()); aSignal.set(TestOrd::TraceAPI, BACKUP, GSN_BACKUP_REQ, @@ -1947,6 +1948,7 @@ MgmtSrvr::handleReceivedSignal(NdbApiSignal* signal) event.Event = BackupEvent::BackupAborted; event.Aborted.Reason = rep->reason; event.Aborted.BackupId = rep->backupId; + event.Aborted.ErrorCode = rep->reason; backupCallback(event); } break; @@ -2076,6 +2078,13 @@ MgmtSrvr::handleStatus(NodeId nodeId, bool alive, bool nfComplete) handleStopReply(nodeId, 0); DBUG_VOID_RETURN; } + + if(theWaitNode == nodeId && + theWaitState != NO_WAIT && theWaitState != WAIT_STOP) + { + theWaitState = WAIT_NODEFAILURE; + NdbCondition_Signal(theMgmtWaitForResponseCondPtr); + } } eventReport(_ownNodeId, theData); @@ -2427,7 +2436,7 @@ MgmtSrvr::startBackup(Uint32& backupId, int waitCompleted) int result; if (waitCompleted == 2) { result = sendRecSignal(nodeId, WAIT_BACKUP_COMPLETED, - signal, true, 30*60*1000 /*30 secs*/); + signal, true, 48*60*60*1000 /* 48 hours */); } else if (waitCompleted == 1) { result = sendRecSignal(nodeId, WAIT_BACKUP_STARTED, @@ -2456,22 +2465,6 @@ MgmtSrvr::startBackup(Uint32& backupId, int waitCompleted) return -1; break; } - } else { - switch(m_lastBackupEvent.Event){ - case BackupEvent::BackupCompleted: - backupId = m_lastBackupEvent.Completed.BackupId; - break; - case BackupEvent::BackupStarted: - backupId = m_lastBackupEvent.Started.BackupId; - break; - case BackupEvent::BackupFailedToStart: - return m_lastBackupEvent.FailedToStart.ErrorCode; - case BackupEvent::BackupAborted: - return m_lastBackupEvent.Aborted.ErrorCode; - default: - return -1; - break; - } } return 0; diff --git a/ndb/src/mgmsrv/MgmtSrvr.hpp b/ndb/src/mgmsrv/MgmtSrvr.hpp index a05b29b7f31..ce78983b3c3 100644 --- a/ndb/src/mgmsrv/MgmtSrvr.hpp +++ b/ndb/src/mgmsrv/MgmtSrvr.hpp @@ -611,7 +611,8 @@ private: WAIT_STOP, WAIT_BACKUP_STARTED, WAIT_BACKUP_COMPLETED, - WAIT_VERSION + WAIT_VERSION, + WAIT_NODEFAILURE }; /** @@ -695,6 +696,7 @@ private: NdbApiSignal* theSignalIdleList; // List of unused signals + Uint32 theWaitNode; WaitSignalType theWaitState; // State denoting a set of signals we accept to recieve. diff --git a/ndb/src/mgmsrv/MgmtSrvrGeneralSignalHandling.cpp b/ndb/src/mgmsrv/MgmtSrvrGeneralSignalHandling.cpp index 2126c9d358d..f93948abc75 100644 --- a/ndb/src/mgmsrv/MgmtSrvrGeneralSignalHandling.cpp +++ b/ndb/src/mgmsrv/MgmtSrvrGeneralSignalHandling.cpp @@ -108,6 +108,7 @@ MgmtSrvr::sendRecSignal(Uint16 aNodeId, return -1; } theWaitState = aWaitState; + theWaitNode = aNodeId; return receiveOptimisedResponse(waitTime); } @@ -119,11 +120,12 @@ MgmtSrvr::receiveOptimisedResponse(int waitTime) theFacade->checkForceSend(_blockNumber); NDB_TICKS maxTime = NdbTick_CurrentMillisecond() + waitTime; - while (theWaitState != NO_WAIT && waitTime > 0) { + while (theWaitState != NO_WAIT && theWaitState != WAIT_NODEFAILURE + && waitTime > 0) { NdbCondition_WaitTimeout(theMgmtWaitForResponseCondPtr, theFacade->theMutexPtr, waitTime); - if(theWaitState == NO_WAIT) + if(theWaitState == NO_WAIT || theWaitState == WAIT_NODEFAILURE) break; waitTime = (maxTime - NdbTick_CurrentMillisecond()); }//while diff --git a/ndb/src/ndbapi/ndberror.c b/ndb/src/ndbapi/ndberror.c index d4ad9cd6f1c..484e91f2977 100644 --- a/ndb/src/ndbapi/ndberror.c +++ b/ndb/src/ndbapi/ndberror.c @@ -345,7 +345,7 @@ ErrorBundle ErrorCodes[] = { { 1325, IE, "File or scan error" }, { 1326, IE, "Backup abortet due to node failure" }, { 1327, IE, "1327" }, - + { 1340, IE, "Backup undefined error" }, { 1342, AE, "Backup failed to allocate buffers (check configuration)" }, { 1343, AE, "Backup failed to setup fs buffers (check configuration)" }, @@ -355,7 +355,8 @@ ErrorBundle ErrorCodes[] = { { 1347, AE, "Backup failed to allocate table memory (check configuration)" }, { 1348, AE, "Backup failed to allocate file record (check configuration)" }, { 1349, AE, "Backup failed to allocate attribute record (check configuration)" }, - + { 1329, AE, "Backup during software upgrade not supported" }, + /** * Still uncategorized */ diff --git a/ndb/test/ndbapi/testBackup.cpp b/ndb/test/ndbapi/testBackup.cpp index 77b9d0a4baa..bea5d5307e2 100644 --- a/ndb/test/ndbapi/testBackup.cpp +++ b/ndb/test/ndbapi/testBackup.cpp @@ -74,20 +74,20 @@ int runAbort(NDBT_Context* ctx, NDBT_Step* step){ if (testMaster) { if (testSlave) { - if (backup.NFMasterAsSlave(restarter) == -1){ + if (backup.NFMasterAsSlave(restarter) != NDBT_OK){ return NDBT_FAILED; } } else { - if (backup.NFMaster(restarter) == -1){ + if (backup.NFMaster(restarter) != NDBT_OK){ return NDBT_FAILED; } } } else { - if (backup.NFSlave(restarter) == -1){ + if (backup.NFSlave(restarter) != NDBT_OK){ return NDBT_FAILED; } } - + return NDBT_OK; } @@ -108,16 +108,16 @@ int runFail(NDBT_Context* ctx, NDBT_Step* step){ if (testMaster) { if (testSlave) { - if (backup.FailMasterAsSlave(restarter) == -1){ + if (backup.FailMasterAsSlave(restarter) != NDBT_OK){ return NDBT_FAILED; } } else { - if (backup.FailMaster(restarter) == -1){ + if (backup.FailMaster(restarter) != NDBT_OK){ return NDBT_FAILED; } } } else { - if (backup.FailSlave(restarter) == -1){ + if (backup.FailSlave(restarter) != NDBT_OK){ return NDBT_FAILED; } } diff --git a/ndb/test/run-test/daily-basic-tests.txt b/ndb/test/run-test/daily-basic-tests.txt index 453fe1ad7ae..2d7c435e8b4 100644 --- a/ndb/test/run-test/daily-basic-tests.txt +++ b/ndb/test/run-test/daily-basic-tests.txt @@ -2,6 +2,30 @@ max-time: 3600 cmd: atrt-mysql-test-run args: --force +max-time: 600 +cmd: atrt-testBackup +args: -n NFMaster T1 + +max-time: 600 +cmd: atrt-testBackup +args: -n NFMasterAsSlave T1 + +max-time: 600 +cmd: atrt-testBackup +args: -n NFSlave T1 + +max-time: 600 +cmd: atrt-testBackup +args: -n FailMaster T1 + +max-time: 600 +cmd: atrt-testBackup +args: -n FailMasterAsSlave T1 + +max-time: 600 +cmd: atrt-testBackup +args: -n FailSlave T1 + max-time: 600 cmd: atrt-testBackup args: -n BackupOne T1 T6 T3 I3 diff --git a/ndb/test/src/NdbBackup.cpp b/ndb/test/src/NdbBackup.cpp index 5e22468692e..7c5d1405f6b 100644 --- a/ndb/test/src/NdbBackup.cpp +++ b/ndb/test/src/NdbBackup.cpp @@ -245,6 +245,10 @@ NdbBackup::NFSlave(NdbRestarter& _restarter){ int NdbBackup::NF(NdbRestarter& _restarter, int *NFDuringBackup_codes, const int sz, bool onMaster){ { + int nNodes = _restarter.getNumDbNodes(); + if(nNodes == 1) + return NDBT_OK; + int nodeId = _restarter.getMasterNodeId(); CHECK(_restarter.restartOneDbNode(nodeId, false, true, true) == 0, @@ -255,15 +259,11 @@ NdbBackup::NF(NdbRestarter& _restarter, int *NFDuringBackup_codes, const int sz, CHECK(_restarter.startNodes(&nodeId, 1) == 0, "failed to start node"); - - NdbSleep_SecSleep(10); } - + CHECK(_restarter.waitClusterStarted() == 0, "waitClusterStarted failed"); - - int nNodes = _restarter.getNumDbNodes(); - + myRandom48Init(NdbTick_CurrentMillisecond()); for(int i = 0; i Date: Fri, 22 Apr 2005 09:40:44 +0200 Subject: [PATCH 6/7] bug#9969 - ndb missleading error message --- ndb/src/ndbapi/ndberror.c | 6 +++--- ndb/test/src/NdbBackup.cpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ndb/src/ndbapi/ndberror.c b/ndb/src/ndbapi/ndberror.c index 484e91f2977..98cce88e2a7 100644 --- a/ndb/src/ndbapi/ndberror.c +++ b/ndb/src/ndbapi/ndberror.c @@ -140,10 +140,10 @@ ErrorBundle ErrorCodes[] = { { 4008, UR, "Receive from NDB failed" }, { 4009, UR, "Cluster Failure" }, { 4012, UR, - "Time-out, most likely caused by simple read or cluster failure" }, + "Request ndbd time-out, maybe due to high load or communication problems"}, { 4024, UR, - "Time-out, most likely caused by simple read or cluster failure" }, - + "Time-out, most likely caused by simple read or cluster failure" }, + /** * TemporaryResourceError */ diff --git a/ndb/test/src/NdbBackup.cpp b/ndb/test/src/NdbBackup.cpp index 7c5d1405f6b..28724323bd7 100644 --- a/ndb/test/src/NdbBackup.cpp +++ b/ndb/test/src/NdbBackup.cpp @@ -244,8 +244,8 @@ NdbBackup::NFSlave(NdbRestarter& _restarter){ int NdbBackup::NF(NdbRestarter& _restarter, int *NFDuringBackup_codes, const int sz, bool onMaster){ + int nNodes = _restarter.getNumDbNodes(); { - int nNodes = _restarter.getNumDbNodes(); if(nNodes == 1) return NDBT_OK; From 0bfc92467255010583aa4eee6166d69ea7cfff76 Mon Sep 17 00:00:00 2001 From: "joreland@mysql.com" <> Date: Fri, 22 Apr 2005 11:04:26 +0200 Subject: [PATCH 7/7] bug#9724 - ndb restart if file already open occur print files... --- ndb/src/kernel/blocks/ndbfs/OpenFiles.hpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/ndb/src/kernel/blocks/ndbfs/OpenFiles.hpp b/ndb/src/kernel/blocks/ndbfs/OpenFiles.hpp index b944bb5485b..0fee687f1bc 100644 --- a/ndb/src/kernel/blocks/ndbfs/OpenFiles.hpp +++ b/ndb/src/kernel/blocks/ndbfs/OpenFiles.hpp @@ -82,8 +82,14 @@ inline bool OpenFiles::insert(AsyncFile* file, Uint16 id){ continue; if(strcmp(m_files[i].m_file->theFileName.c_str(), - file->theFileName.c_str()) == 0){ - ERROR_SET(fatal, AFS_ERROR_ALLREADY_OPEN,"","OpenFiles::insert()"); + file->theFileName.c_str()) == 0) + { + BaseString names; + names.assfmt("open: >%s< existing: >%s<", + file->theFileName.c_str(), + m_files[i].m_file->theFileName.c_str()); + ERROR_SET(fatal, AFS_ERROR_ALLREADY_OPEN, names.c_str(), + "OpenFiles::insert()"); } }