From 6ac6b08c41beb47cb38aa2d19a86157997f5cda0 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 17 Mar 2006 10:09:35 +0100 Subject: [PATCH 01/16] ndb - bug#18298 8 repeated nr with table wo/ logging cause crash Dont create crashed replica for temporary tables ndb/src/kernel/blocks/dbdih/Dbdih.hpp: Dont create crashed replica for temporary tables ndb/src/kernel/blocks/dbdih/DbdihMain.cpp: Dont create crashed replica for temporary tables --- ndb/src/kernel/blocks/dbdih/Dbdih.hpp | 3 ++- ndb/src/kernel/blocks/dbdih/DbdihMain.cpp | 16 +++++++++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/ndb/src/kernel/blocks/dbdih/Dbdih.hpp b/ndb/src/kernel/blocks/dbdih/Dbdih.hpp index 0c107e35603..f74c0f36c4d 100644 --- a/ndb/src/kernel/blocks/dbdih/Dbdih.hpp +++ b/ndb/src/kernel/blocks/dbdih/Dbdih.hpp @@ -1038,7 +1038,8 @@ private: void prepareReplicas(FragmentstorePtr regFragptr); void removeNodeFromStored(Uint32 nodeId, FragmentstorePtr regFragptr, - ReplicaRecordPtr replicaPtr); + ReplicaRecordPtr replicaPtr, + bool temporary); void removeOldStoredReplica(FragmentstorePtr regFragptr, ReplicaRecordPtr replicaPtr); void removeStoredReplica(FragmentstorePtr regFragptr, diff --git a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp index 776e59ea495..fab428aadef 100644 --- a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +++ b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp @@ -5212,6 +5212,7 @@ void Dbdih::removeNodeFromTable(Signal* signal, //const Uint32 lcpId = SYSFILE->latestLCP_ID; const bool lcpOngoingFlag = (tabPtr.p->tabLcpStatus== TabRecord::TLS_ACTIVE); + const bool temporary = !tabPtr.p->storedTable; FragmentstorePtr fragPtr; for(Uint32 fragNo = 0; fragNo < tabPtr.p->totalfragments; fragNo++){ @@ -5232,7 +5233,7 @@ void Dbdih::removeNodeFromTable(Signal* signal, jam(); found = true; noOfRemovedReplicas++; - removeNodeFromStored(nodeId, fragPtr, replicaPtr); + removeNodeFromStored(nodeId, fragPtr, replicaPtr, temporary); if(replicaPtr.p->lcpOngoingFlag){ jam(); /** @@ -12051,9 +12052,18 @@ void Dbdih::removeDeadNode(NodeRecordPtr removeNodePtr) /*---------------------------------------------------------------*/ void Dbdih::removeNodeFromStored(Uint32 nodeId, FragmentstorePtr fragPtr, - ReplicaRecordPtr replicatePtr) + ReplicaRecordPtr replicatePtr, + bool temporary) { - newCrashedReplica(nodeId, replicatePtr); + if (!temporary) + { + jam(); + newCrashedReplica(nodeId, replicatePtr); + } + else + { + jam(); + } removeStoredReplica(fragPtr, replicatePtr); linkOldStoredReplica(fragPtr, replicatePtr); ndbrequire(fragPtr.p->storedReplicas != RNIL); From 3bfaf33392901b90d420e37450164d7a0db8e3ed Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 17 Mar 2006 10:55:02 +0100 Subject: [PATCH 02/16] ndb - bug#16772 dont't allow node to join cluster until all nodes has completed failure handling ndb/src/kernel/blocks/qmgr/QmgrMain.cpp: When getting CM_ADD for node that I haven't completed failure handling for do _not_ just override. But instead set state...and send CM_ACK_ADD on execCONNECT_REP (much...later) ndb/test/ndbapi/testNodeRestart.cpp: testcase for bug#16772 ndb/test/run-test/daily-basic-tests.txt: Run test in basic suite --- ndb/src/kernel/blocks/qmgr/QmgrMain.cpp | 103 ++++++++++++++++++++---- ndb/test/ndbapi/testNodeRestart.cpp | 50 ++++++++++++ ndb/test/run-test/daily-basic-tests.txt | 4 + 3 files changed, 142 insertions(+), 15 deletions(-) diff --git a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp index 6095895e7c2..70084e6b171 100644 --- a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp +++ b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp @@ -257,6 +257,7 @@ void Qmgr::setArbitTimeout(UintR aArbitTimeout) void Qmgr::execCONNECT_REP(Signal* signal) { + jamEntry(); const Uint32 nodeId = signal->theData[0]; c_connectedNodes.set(nodeId); NodeRecPtr nodePtr; @@ -264,9 +265,13 @@ void Qmgr::execCONNECT_REP(Signal* signal) ptrCheckGuard(nodePtr, MAX_NODES, nodeRec); switch(nodePtr.p->phase){ case ZSTARTING: - jam(); - break; case ZRUNNING: + jam(); + if(!c_start.m_nodes.isWaitingFor(nodeId)){ + jam(); + return; + } + break; case ZPREPARE_FAIL: case ZFAIL_CLOSING: jam(); @@ -277,21 +282,28 @@ void Qmgr::execCONNECT_REP(Signal* signal) case ZAPI_INACTIVE: return; } - - if(!c_start.m_nodes.isWaitingFor(nodeId)){ - jam(); - return; - } - + switch(c_start.m_gsn){ case GSN_CM_REGREQ: jam(); sendCmRegReq(signal, nodeId); return; - case GSN_CM_NODEINFOREQ:{ + case GSN_CM_NODEINFOREQ: jam(); sendCmNodeInfoReq(signal, nodeId, nodePtr.p); return; + case GSN_CM_ADD:{ + jam(); + + ndbrequire(getOwnNodeId() != cpresident); + c_start.m_nodes.clearWaitingFor(nodeId); + c_start.m_gsn = RNIL; + + NodeRecPtr addNodePtr; + addNodePtr.i = nodeId; + ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec); + cmAddPrepare(signal, addNodePtr, nodePtr.p); + return; } default: return; @@ -924,15 +936,27 @@ Qmgr::cmAddPrepare(Signal* signal, NodeRecPtr nodePtr, const NodeRec * self){ return; case ZFAIL_CLOSING: jam(); -#ifdef VM_TRACE - ndbout_c("Enabling communication to CM_ADD node state=%d", - nodePtr.p->phase); -#endif + +#if 1 + warningEvent("Recieved request to incorperate node %u, " + "while error handling has not yet completed", + nodePtr.i); + + ndbrequire(getOwnNodeId() != cpresident); + ndbrequire(signal->header.theVerId_signalNumber == GSN_CM_ADD); + c_start.m_nodes.clearWaitingFor(); + c_start.m_nodes.setWaitingFor(nodePtr.i); + c_start.m_gsn = GSN_CM_ADD; +#else + warningEvent("Enabling communication to CM_ADD node %u state=%d", + nodePtr.i, + nodePtr.p->phase); nodePtr.p->phase = ZSTARTING; nodePtr.p->failState = NORMAL; signal->theData[0] = 0; signal->theData[1] = nodePtr.i; sendSignal(CMVMI_REF, GSN_OPEN_COMREQ, signal, 2, JBA); +#endif return; case ZSTARTING: break; @@ -1766,11 +1790,27 @@ void Qmgr::execNDB_FAILCONF(Signal* signal) jamEntry(); failedNodePtr.i = signal->theData[0]; + + if (ERROR_INSERTED(930)) + { + CLEAR_ERROR_INSERT_VALUE; + infoEvent("Discarding NDB_FAILCONF for %u", failedNodePtr.i); + return; + } + ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec); if (failedNodePtr.p->failState == WAITING_FOR_NDB_FAILCONF){ failedNodePtr.p->failState = NORMAL; } else { jam(); + + char buf[100]; + BaseString::snprintf(buf, 100, + "Received NDB_FAILCONF for node %u with state: %d %d", + failedNodePtr.i, + failedNodePtr.p->phase, + failedNodePtr.p->failState); + progError(__LINE__, 0, buf); systemErrorLab(signal, __LINE__); }//if if (cpresident == getOwnNodeId()) { @@ -2077,10 +2117,42 @@ void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode, ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec); if (failedNodePtr.i == getOwnNodeId()) { jam(); - systemErrorLab(signal, __LINE__); + + const char * msg = 0; + switch(aFailCause){ + case FailRep::ZOWN_FAILURE: + msg = "Own failure"; + break; + case FailRep::ZOTHER_NODE_WHEN_WE_START: + case FailRep::ZOTHERNODE_FAILED_DURING_START: + msg = "Other node died during start"; + break; + case FailRep::ZIN_PREP_FAIL_REQ: + msg = "Prep fail"; + break; + case FailRep::ZSTART_IN_REGREQ: + msg = "Start timeout"; + break; + case FailRep::ZHEARTBEAT_FAILURE: + msg = "Hearbeat failure"; + break; + case FailRep::ZLINK_FAILURE: + msg = "Connection failure"; + break; + } + + char buf[100]; + BaseString::snprintf(buf, 100, + "We(%u) have been declared dead by %u reason: %s(%u)", + getOwnNodeId(), + refToNode(signal->getSendersBlockRef()), + aFailCause, + msg ? msg : ""); + + progError(__LINE__, 0, buf); return; }//if - + myNodePtr.i = getOwnNodeId(); ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec); if (myNodePtr.p->phase != ZRUNNING) { @@ -2791,6 +2863,7 @@ void Qmgr::failReport(Signal* signal, cfailureNr = cprepareFailureNr; ctoFailureNr = 0; ctoStatus = Q_ACTIVE; + c_start.reset(); // Don't take over nodes being started if (cnoCommitFailedNodes > 0) { jam(); /**----------------------------------------------------------------- diff --git a/ndb/test/ndbapi/testNodeRestart.cpp b/ndb/test/ndbapi/testNodeRestart.cpp index a741e6233d9..eebd631af94 100644 --- a/ndb/test/ndbapi/testNodeRestart.cpp +++ b/ndb/test/ndbapi/testNodeRestart.cpp @@ -535,6 +535,52 @@ err: return NDBT_FAILED; } +int +runBug16772(NDBT_Context* ctx, NDBT_Step* step){ + + NdbRestarter restarter; + if (restarter.getNumDbNodes() < 2) + { + ctx->stopTest(); + return NDBT_OK; + } + + int aliveNodeId = restarter.getRandomNotMasterNodeId(rand()); + int deadNodeId = aliveNodeId; + while (deadNodeId == aliveNodeId) + deadNodeId = restarter.getDbNodeId(rand() % restarter.getNumDbNodes()); + + if (restarter.insertErrorInNode(aliveNodeId, 930)) + return NDBT_FAILED; + + if (restarter.restartOneDbNode(deadNodeId, + /** initial */ false, + /** nostart */ true, + /** abort */ true)) + return NDBT_FAILED; + + if (restarter.waitNodesNoStart(&deadNodeId, 1)) + return NDBT_FAILED; + + if (restarter.startNodes(&deadNodeId, 1)) + return NDBT_FAILED; + + // It should now be hanging since we throw away NDB_FAILCONF + int ret = restarter.waitNodesStartPhase(&deadNodeId, 1, 3, 10); + // So this should fail...i.e it should not reach startphase 3 + + // Now send a NDB_FAILCONF for deadNo + int dump[] = { 7020, 323, 252, 0 }; + dump[3] = deadNodeId; + if (restarter.dumpStateOneNode(aliveNodeId, dump, 4)) + return NDBT_FAILED; + + if (restarter.waitNodesStarted(&deadNodeId, 1)) + return NDBT_FAILED; + + return ret ? NDBT_OK : NDBT_FAILED; +} + NDBT_TESTSUITE(testNodeRestart); TESTCASE("NoLoad", @@ -820,6 +866,10 @@ TESTCASE("Bug15685", STEP(runBug15685); FINALIZER(runClearTable); } +TESTCASE("Bug16772", + "Test bug with restarting before NF handling is complete"){ + STEP(runBug16772); +} NDBT_TESTSUITE_END(testNodeRestart); int main(int argc, const char** argv){ diff --git a/ndb/test/run-test/daily-basic-tests.txt b/ndb/test/run-test/daily-basic-tests.txt index 6378b4a06d3..169daae6d7f 100644 --- a/ndb/test/run-test/daily-basic-tests.txt +++ b/ndb/test/run-test/daily-basic-tests.txt @@ -446,6 +446,10 @@ max-time: 500 cmd: testNodeRestart args: -n Bug15685 T1 +max-time: 500 +cmd: testNodeRestart +args: -n Bug16772 T1 + # OLD FLEX max-time: 500 cmd: flexBench From 37230a2a8867a2cc6066dac51ddc775688cb1cba Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 20 Mar 2006 11:29:58 +0100 Subject: [PATCH 03/16] ndb - wl2610 Activly abort transactions (that's affected) during NF This removes a lot of bugs that can occur otherwise is using high value for TransactionDeadLockTimout ndb/include/kernel/signaldata/TcContinueB.hpp: New continueb for active transaction abort on nf ndb/src/kernel/blocks/dbtc/Dbtc.hpp: Add bitmask of participating nodes to transaction record Add bitmask of node fail steps, so that NF_CompleteRep is not sent until all steps has completed ndb/src/kernel/blocks/dbtc/DbtcMain.cpp: Active transaction baortion --- ndb/include/kernel/signaldata/TcContinueB.hpp | 3 +- ndb/src/kernel/blocks/dbtc/Dbtc.hpp | 18 +- ndb/src/kernel/blocks/dbtc/DbtcMain.cpp | 192 +++++++++++++----- 3 files changed, 164 insertions(+), 49 deletions(-) diff --git a/ndb/include/kernel/signaldata/TcContinueB.hpp b/ndb/include/kernel/signaldata/TcContinueB.hpp index 85213791b2a..b87b982e49b 100644 --- a/ndb/include/kernel/signaldata/TcContinueB.hpp +++ b/ndb/include/kernel/signaldata/TcContinueB.hpp @@ -44,7 +44,8 @@ private: CHECK_WAIT_DROP_TAB_FAILED_LQH = 16, TRIGGER_PENDING = 17, - DelayTCKEYCONF = 18 + DelayTCKEYCONF = 18, + ZNF_CHECK_TRANSACTIONS = 19 }; }; diff --git a/ndb/src/kernel/blocks/dbtc/Dbtc.hpp b/ndb/src/kernel/blocks/dbtc/Dbtc.hpp index 61afef30b43..23c5a7d08eb 100644 --- a/ndb/src/kernel/blocks/dbtc/Dbtc.hpp +++ b/ndb/src/kernel/blocks/dbtc/Dbtc.hpp @@ -636,6 +636,7 @@ public: ConnectionState apiConnectstate; UintR transid[2]; UintR firstTcConnect; + NdbNodeBitmask m_transaction_nodes; //--------------------------------------------------- // Second 16 byte cache line. Hot variables. @@ -941,6 +942,17 @@ public: UintR noOfWordsTCINDXCONF; UintR packedWordsTCINDXCONF[30]; BlockReference hostLqhBlockRef; + + enum NodeFailBits + { + NF_TAKEOVER = 0x1, + NF_CHECK_SCAN = 0x2, + NF_CHECK_TRANSACTION = 0x4, + NF_CHECK_DROP_TAB = 0x8, + NF_NODE_FAIL_BITS = 0xF // All bits... + }; + Uint32 m_nf_bits; + NdbNodeBitmask m_lqh_trans_conf; }; /* p2c: size = 128 bytes */ typedef Ptr HostRecordPtr; @@ -1578,7 +1590,7 @@ private: void wrongSchemaVersionErrorLab(Signal* signal); void noFreeConnectionErrorLab(Signal* signal); void tckeyreq050Lab(Signal* signal); - void timeOutFoundLab(Signal* signal, UintR anAdd); + void timeOutFoundLab(Signal* signal, UintR anAdd, Uint32 errCode); void completeTransAtTakeOverLab(Signal* signal, UintR TtakeOverInd); void completeTransAtTakeOverDoLast(Signal* signal, UintR TtakeOverInd); void completeTransAtTakeOverDoOne(Signal* signal, UintR TtakeOverInd); @@ -1600,6 +1612,9 @@ private: void checkScanFragList(Signal*, Uint32 failedNodeId, ScanRecord * scanP, LocalDLList::Head&); + void nodeFailCheckTransactions(Signal*,Uint32 transPtrI,Uint32 failedNodeId); + void checkNodeFailComplete(Signal* signal, Uint32 failedNodeId, Uint32 bit); + // Initialisation void initData(); void initRecords(); @@ -1626,6 +1641,7 @@ private: HostRecord *hostRecord; HostRecordPtr hostptr; UintR chostFilesize; + NdbNodeBitmask c_alive_nodes; GcpRecord *gcpRecord; GcpRecordPtr gcpPtr; diff --git a/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp b/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp index d9d1f01b213..4750a8c388a 100644 --- a/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp +++ b/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp @@ -262,6 +262,10 @@ void Dbtc::execCONTINUEB(Signal* signal) jam(); checkScanActiveInFailedLqh(signal, Tdata0, Tdata1); return; + case TcContinueB::ZNF_CHECK_TRANSACTIONS: + jam(); + nodeFailCheckTransactions(signal, Tdata0, Tdata1); + return; case TcContinueB::CHECK_WAIT_DROP_TAB_FAILED_LQH: jam(); checkWaitDropTabFailedLqh(signal, Tdata0, Tdata1); @@ -301,6 +305,7 @@ void Dbtc::execINCL_NODEREQ(Signal* signal) hostptr.p->hostStatus = HS_ALIVE; hostptr.p->takeOverStatus = TOS_IDLE; signal->theData[0] = cownref; + c_alive_nodes.set(hostptr.i); sendSignal(tblockref, GSN_INCL_NODECONF, signal, 1, JBB); } @@ -487,6 +492,7 @@ Dbtc::checkWaitDropTabFailedLqh(Signal* signal, Uint32 nodeId, Uint32 tableId) * Finished */ jam(); + checkNodeFailComplete(signal, nodeId, HostRecord::NF_CHECK_DROP_TAB); return; } @@ -859,6 +865,7 @@ void Dbtc::execREAD_NODESCONF(Signal* signal) jam(); con_lineNodes++; hostptr.p->hostStatus = HS_ALIVE; + c_alive_nodes.set(i); }//if }//if }//for @@ -2314,6 +2321,7 @@ void Dbtc::initApiConnectRec(Signal* signal, regApiPtr->commitAckMarker = RNIL; regApiPtr->buddyPtr = RNIL; regApiPtr->currSavePointId = 0; + regApiPtr->m_transaction_nodes.clear(); // Trigger data releaseFiredTriggerData(®ApiPtr->theFiredTriggers), // Index data @@ -2921,6 +2929,10 @@ void Dbtc::tckeyreq050Lab(Signal* signal) signal->theData[0] = TdihConnectptr; signal->theData[1] = Ttableref; signal->theData[2] = TdistrHashValue; + signal->theData[3] = 0; + signal->theData[4] = 0; + signal->theData[5] = 0; + signal->theData[6] = 0; /*-------------------------------------------------------------*/ /* FOR EFFICIENCY REASONS WE AVOID THE SIGNAL SENDING HERE AND */ @@ -3098,6 +3110,7 @@ void Dbtc::sendlqhkeyreq(Signal* signal, TcConnectRecord * const regTcPtr = tcConnectptr.p; ApiConnectRecord * const regApiPtr = apiConnectptr.p; CacheRecord * const regCachePtr = cachePtr.p; + UintR sig0, sig1, sig2, sig3, sig4, sig5, sig6; #ifdef ERROR_INSERT if (ERROR_INSERTED(8002)) { systemErrorLab(signal); @@ -3135,6 +3148,9 @@ void Dbtc::sendlqhkeyreq(Signal* signal, LqhKeyReq::setScanTakeOverFlag(tslrAttrLen, regCachePtr->scanTakeOverInd); Tdata10 = 0; + sig0 = regCachePtr->opSimple; + sig1 = regTcPtr->operation; + bool simpleRead = (sig1 == ZREAD && sig0 == ZTRUE); LqhKeyReq::setKeyLen(Tdata10, regCachePtr->keylen); LqhKeyReq::setLastReplicaNo(Tdata10, regTcPtr->lastReplicaNo); LqhKeyReq::setLockType(Tdata10, regCachePtr->opLock); @@ -3144,8 +3160,8 @@ void Dbtc::sendlqhkeyreq(Signal* signal, LqhKeyReq::setApplicationAddressFlag(Tdata10, 1); LqhKeyReq::setDirtyFlag(Tdata10, regTcPtr->dirtyOp); LqhKeyReq::setInterpretedFlag(Tdata10, regCachePtr->opExec); - LqhKeyReq::setSimpleFlag(Tdata10, regCachePtr->opSimple); - LqhKeyReq::setOperation(Tdata10, regTcPtr->operation); + LqhKeyReq::setSimpleFlag(Tdata10, sig0); + LqhKeyReq::setOperation(Tdata10, sig1); /* ----------------------------------------------------------------------- * Sequential Number of first LQH = 0, bit 22-23 * IF ATTRIBUTE INFORMATION IS SENT IN TCKEYREQ, @@ -3158,18 +3174,16 @@ void Dbtc::sendlqhkeyreq(Signal* signal, * ----------------------------------------------------------------------- */ //LqhKeyReq::setAPIVersion(Tdata10, regCachePtr->apiVersionNo); Uint32 commitAckMarker = regTcPtr->commitAckMarker; + const Uint32 noOfLqhs = regTcPtr->noOfNodes; if(commitAckMarker != RNIL){ jam(); - LqhKeyReq::setMarkerFlag(Tdata10, 1); - CommitAckMarker * tmp; - tmp = m_commitAckMarkerHash.getPtr(commitAckMarker); + CommitAckMarker * tmp = m_commitAckMarkerHash.getPtr(commitAckMarker); /** * Populate LQH array */ - const Uint32 noOfLqhs = regTcPtr->noOfNodes; tmp->noOfLqhs = noOfLqhs; for(Uint32 i = 0; ilqhNodeId[i] = regTcPtr->tcNodedata[i]; @@ -3180,7 +3194,6 @@ void Dbtc::sendlqhkeyreq(Signal* signal, /* NO READ LENGTH SENT FROM TC. SEQUENTIAL NUMBER IS 1 AND IT */ /* IS SENT TO A PRIMARY NODE. */ /* ************************************************************> */ - UintR sig0, sig1, sig2, sig3, sig4, sig5, sig6; LqhKeyReq * const lqhKeyReq = (LqhKeyReq *)signal->getDataPtrSend(); @@ -3204,6 +3217,14 @@ void Dbtc::sendlqhkeyreq(Signal* signal, sig5 = regTcPtr->clientData; sig6 = regCachePtr->scanInfo; + if (! simpleRead) + { + regApiPtr->m_transaction_nodes.set(regTcPtr->tcNodedata[0]); + regApiPtr->m_transaction_nodes.set(regTcPtr->tcNodedata[1]); + regApiPtr->m_transaction_nodes.set(regTcPtr->tcNodedata[2]); + regApiPtr->m_transaction_nodes.set(regTcPtr->tcNodedata[3]); + } + lqhKeyReq->tableSchemaVersion = sig0; lqhKeyReq->fragmentData = sig1; lqhKeyReq->transId1 = sig2; @@ -4587,6 +4608,7 @@ void Dbtc::copyApi(Signal* signal) UintR TgcpPointer = regTmpApiPtr->gcpPointer; UintR TgcpFilesize = cgcpFilesize; UintR TcommitAckMarker = regTmpApiPtr->commitAckMarker; + NdbNodeBitmask Tnodes = regTmpApiPtr->m_transaction_nodes; GcpRecord *localGcpRecord = gcpRecord; regApiPtr->ndbapiBlockref = regTmpApiPtr->ndbapiBlockref; @@ -4597,6 +4619,7 @@ void Dbtc::copyApi(Signal* signal) regApiPtr->transid[1] = Ttransid2; regApiPtr->lqhkeyconfrec = Tlqhkeyconfrec; regApiPtr->commitAckMarker = TcommitAckMarker; + regApiPtr->m_transaction_nodes = Tnodes; gcpPtr.i = TgcpPointer; ptrCheckGuard(gcpPtr, TgcpFilesize, localGcpRecord); @@ -4607,6 +4630,7 @@ void Dbtc::copyApi(Signal* signal) regTmpApiPtr->commitAckMarker = RNIL; regTmpApiPtr->firstTcConnect = RNIL; regTmpApiPtr->lastTcConnect = RNIL; + regTmpApiPtr->m_transaction_nodes.clear(); releaseAllSeizedIndexOperations(regTmpApiPtr); }//Dbtc::copyApi() @@ -4865,7 +4889,7 @@ void Dbtc::releaseTransResources(Signal* signal) TcConnectRecordPtr localTcConnectptr; UintR TtcConnectFilesize = ctcConnectFilesize; TcConnectRecord *localTcConnectRecord = tcConnectRecord; - + apiConnectptr.p->m_transaction_nodes.clear(); localTcConnectptr.i = apiConnectptr.p->firstTcConnect; do { jam(); @@ -5269,7 +5293,8 @@ void Dbtc::execTC_COMMITREQ(Signal* signal) break; case CS_ABORTING: jam(); - errorCode = ZABORTINPROGRESS; + errorCode = regApiPtr->returncode ? + regApiPtr->returncode : ZABORTINPROGRESS; break; case CS_START_SCAN: jam(); @@ -5808,9 +5833,9 @@ void Dbtc::abort010Lab(Signal* signal) if (transP->firstTcConnect == RNIL) { jam(); - /*-----------------------------------------------------------------------*/ - /* WE HAVE NO PARTICIPANTS IN THE TRANSACTION. */ - /*-----------------------------------------------------------------------*/ + /*--------------------------------------------------------------------*/ + /* WE HAVE NO PARTICIPANTS IN THE TRANSACTION. */ + /*--------------------------------------------------------------------*/ releaseAbortResources(signal); return; }//if @@ -6087,10 +6112,12 @@ void Dbtc::timeOutLoopStartLab(Signal* signal, Uint32 api_con_ptr) if (api_timer != 0) { time_out_value= time_out_param + (api_con_ptr & mask_value); time_passed= tc_timer - api_timer; - if (time_passed > time_out_value) { + if (time_passed > time_out_value) + { jam(); - timeOutFoundLab(signal, api_con_ptr); - return; + timeOutFoundLab(signal, api_con_ptr, ZTIME_OUT_ERROR); + api_con_ptr++; + break; } } } @@ -6110,10 +6137,8 @@ void Dbtc::timeOutLoopStartLab(Signal* signal, Uint32 api_con_ptr) return; }//Dbtc::timeOutLoopStartLab() -void Dbtc::timeOutFoundLab(Signal* signal, Uint32 TapiConPtr) +void Dbtc::timeOutFoundLab(Signal* signal, Uint32 TapiConPtr, Uint32 errCode) { - sendContinueTimeOutControl(signal, TapiConPtr + 1); - apiConnectptr.i = TapiConPtr; ptrCheckGuard(apiConnectptr, capiConnectFilesize, apiConnectRecord); /*------------------------------------------------------------------*/ @@ -6126,7 +6151,8 @@ void Dbtc::timeOutFoundLab(Signal* signal, Uint32 TapiConPtr) << "Time-out in state = " << apiConnectptr.p->apiConnectstate << " apiConnectptr.i = " << apiConnectptr.i << " - exec: " << apiConnectptr.p->m_exec_flag - << " - place: " << c_apiConTimer_line[apiConnectptr.i]); + << " - place: " << c_apiConTimer_line[apiConnectptr.i] + << " code: " << errCode); switch (apiConnectptr.p->apiConnectstate) { case CS_STARTED: if(apiConnectptr.p->lqhkeyreqrec == apiConnectptr.p->lqhkeyconfrec){ @@ -6143,7 +6169,7 @@ void Dbtc::timeOutFoundLab(Signal* signal, Uint32 TapiConPtr) }//if } apiConnectptr.p->returnsignal = RS_TCROLLBACKREP; - apiConnectptr.p->returncode = ZTIME_OUT_ERROR; + apiConnectptr.p->returncode = errCode; abort010Lab(signal); return; case CS_RECEIVING: @@ -6156,7 +6182,7 @@ void Dbtc::timeOutFoundLab(Signal* signal, Uint32 TapiConPtr) /* START ABORTING THE TRANSACTION. ALSO START CHECKING THE */ /* REMAINING TRANSACTIONS. */ /*------------------------------------------------------------------*/ - terrorCode = ZTIME_OUT_ERROR; + terrorCode = errCode; abortErrorLab(signal); return; case CS_COMMITTING: @@ -6820,6 +6846,8 @@ void Dbtc::execNODE_FAILREP(Signal* signal) /* FAILED. */ /*------------------------------------------------------------*/ hostptr.p->hostStatus = HS_DEAD; + hostptr.p->m_nf_bits = HostRecord::NF_NODE_FAIL_BITS; + c_alive_nodes.clear(hostptr.i); if (hostptr.p->takeOverStatus == TOS_COMPLETED) { jam(); @@ -6832,14 +6860,7 @@ void Dbtc::execNODE_FAILREP(Signal* signal) /* REMAINING WILL BE RELEASED WHEN THE TRANSACTION THAT */ /* USED THEM IS COMPLETED. */ /*------------------------------------------------------------*/ - { - NFCompleteRep * const nfRep = (NFCompleteRep *)&signal->theData[0]; - nfRep->blockNo = DBTC; - nfRep->nodeId = cownNodeid; - nfRep->failedNodeId = hostptr.i; - } - sendSignal(cdihblockref, GSN_NF_COMPLETEREP, signal, - NFCompleteRep::SignalLength, JBB); + hostptr.p->m_nf_bits &= ~HostRecord::NF_TAKEOVER; } else { ndbrequire(hostptr.p->takeOverStatus == TOS_IDLE); hostptr.p->takeOverStatus = TOS_NODE_FAILED; @@ -6892,16 +6913,9 @@ void Dbtc::execNODE_FAILREP(Signal* signal) /* MASTER IT MIGHT START A NEW TAKE OVER EVEN AFTER THE */ /* CRASHED NODE HAVE ALREADY RECOVERED. */ /*------------------------------------------------------------*/ - for(tmpHostptr.i = 1; tmpHostptr.i < MAX_NDB_NODES;tmpHostptr.i++) { - jam(); - ptrAss(tmpHostptr, hostRecord); - if (tmpHostptr.p->hostStatus == HS_ALIVE) { - jam(); - tblockref = calcTcBlockRef(tmpHostptr.i); - signal->theData[0] = hostptr.i; - sendSignal(tblockref, GSN_TAKE_OVERTCCONF, signal, 1, JBB); - }//if - }//for + NodeReceiverGroup rg(DBTC, c_alive_nodes); + signal->theData[0] = hostptr.i; + sendSignal(rg, GSN_TAKE_OVERTCCONF, signal, 1, JBB); }//if }//if }//for @@ -6939,10 +6953,30 @@ void Dbtc::execNODE_FAILREP(Signal* signal) /*------------------------------------------------------------*/ checkScanActiveInFailedLqh(signal, 0, hostptr.i); checkWaitDropTabFailedLqh(signal, hostptr.i, 0); // nodeid, tableid + nodeFailCheckTransactions(signal, 0, hostptr.i); }//for }//Dbtc::execNODE_FAILREP() +void +Dbtc::checkNodeFailComplete(Signal* signal, + Uint32 failedNodeId, + Uint32 bit) +{ + hostptr.i = failedNodeId; + ptrCheckGuard(hostptr, chostFilesize, hostRecord); + hostptr.p->m_nf_bits &= ~bit; + if (hostptr.p->m_nf_bits == 0) + { + NFCompleteRep * const nfRep = (NFCompleteRep *)&signal->theData[0]; + nfRep->blockNo = DBTC; + nfRep->nodeId = cownNodeid; + nfRep->failedNodeId = hostptr.i; + sendSignal(cdihblockref, GSN_NF_COMPLETEREP, signal, + NFCompleteRep::SignalLength, JBB); + } +} + void Dbtc::checkScanActiveInFailedLqh(Signal* signal, Uint32 scanPtrI, Uint32 failedNodeId){ @@ -6984,8 +7018,44 @@ void Dbtc::checkScanActiveInFailedLqh(Signal* signal, sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB); return; }//for + + checkNodeFailComplete(signal, failedNodeId, HostRecord::NF_CHECK_SCAN); } +void +Dbtc::nodeFailCheckTransactions(Signal* signal, + Uint32 transPtrI, + Uint32 failedNodeId) +{ + jam(); + Ptr transPtr; + for (transPtr.i = transPtrI; transPtr.i < capiConnectFilesize; transPtr.i++) + { + ptrCheckGuard(transPtr, capiConnectFilesize, apiConnectRecord); + if (transPtr.p->m_transaction_nodes.get(failedNodeId)) + { + jam(); + // Force timeout regardless of state + Uint32 save = c_appl_timeout_value; + c_appl_timeout_value = 1; + setApiConTimer(transPtr.i, 0, __LINE__); + timeOutFoundLab(signal, transPtr.i, ZNODEFAIL_BEFORE_COMMIT); + c_appl_timeout_value = save; + } + + // Send CONTINUEB to continue later + signal->theData[0] = TcContinueB::ZNF_CHECK_TRANSACTIONS; + signal->theData[1] = transPtr.i + 1; // Check next + signal->theData[2] = failedNodeId; + sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB); + return; + } + + checkNodeFailComplete(signal, failedNodeId, + HostRecord::NF_CHECK_TRANSACTION); +} + + void Dbtc::checkScanFragList(Signal* signal, Uint32 failedNodeId, @@ -7025,14 +7095,7 @@ void Dbtc::execTAKE_OVERTCCONF(Signal* signal) /* USED THEM IS COMPLETED. */ /*------------------------------------------------------------*/ hostptr.p->takeOverStatus = TOS_COMPLETED; - { - NFCompleteRep * const nfRep = (NFCompleteRep *)&signal->theData[0]; - nfRep->blockNo = DBTC; - nfRep->nodeId = cownNodeid; - nfRep->failedNodeId = hostptr.i; - } - sendSignal(cdihblockref, GSN_NF_COMPLETEREP, signal, - NFCompleteRep::SignalLength, JBB); + checkNodeFailComplete(signal, hostptr.i, HostRecord::NF_TAKEOVER); break; case TOS_COMPLETED: jam(); @@ -7979,6 +8042,7 @@ void Dbtc::initApiConnectFail(Signal* signal) apiConnectptr.p->ndbapiBlockref = 0; apiConnectptr.p->ndbapiConnect = 0; apiConnectptr.p->buddyPtr = RNIL; + apiConnectptr.p->m_transaction_nodes.clear(); setApiConTimer(apiConnectptr.i, 0, __LINE__); switch(ttransStatus){ case LqhTransConf::Committed: @@ -9756,6 +9820,7 @@ void Dbtc::initApiConnect(Signal* signal) apiConnectptr.p->executingIndexOp = RNIL; apiConnectptr.p->buddyPtr = RNIL; apiConnectptr.p->currSavePointId = 0; + apiConnectptr.p->m_transaction_nodes.clear(); }//for apiConnectptr.i = tiacTmp - 1; ptrCheckGuard(apiConnectptr, capiConnectFilesize, apiConnectRecord); @@ -9783,6 +9848,7 @@ void Dbtc::initApiConnect(Signal* signal) apiConnectptr.p->executingIndexOp = RNIL; apiConnectptr.p->buddyPtr = RNIL; apiConnectptr.p->currSavePointId = 0; + apiConnectptr.p->m_transaction_nodes.clear(); }//for apiConnectptr.i = (2 * tiacTmp) - 1; ptrCheckGuard(apiConnectptr, capiConnectFilesize, apiConnectRecord); @@ -9810,6 +9876,7 @@ void Dbtc::initApiConnect(Signal* signal) apiConnectptr.p->executingIndexOp = RNIL; apiConnectptr.p->buddyPtr = RNIL; apiConnectptr.p->currSavePointId = 0; + apiConnectptr.p->m_transaction_nodes.clear(); }//for apiConnectptr.i = (3 * tiacTmp) - 1; ptrCheckGuard(apiConnectptr, capiConnectFilesize, apiConnectRecord); @@ -9877,6 +9944,7 @@ void Dbtc::inithost(Signal* signal) hostptr.p->noOfPackedWordsLqh = 0; hostptr.p->hostLqhBlockRef = calcLqhBlockRef(hostptr.i); }//for + c_alive_nodes.clear(); }//Dbtc::inithost() void Dbtc::initialiseRecordsLab(Signal* signal, UintR Tdata0, @@ -10126,6 +10194,7 @@ void Dbtc::releaseAbortResources(Signal* signal) }//while apiConnectptr.p->firstTcConnect = RNIL; apiConnectptr.p->lastTcConnect = RNIL; + apiConnectptr.p->m_transaction_nodes.clear(); // MASV let state be CS_ABORTING until all // signals in the "air" have been received. Reset to CS_CONNECTED @@ -10199,6 +10268,7 @@ void Dbtc::releaseApiCon(Signal* signal, UintR TapiConnectPtr) cfirstfreeApiConnect = TlocalApiConnectptr.i; setApiConTimer(TlocalApiConnectptr.i, 0, __LINE__); TlocalApiConnectptr.p->apiConnectstate = CS_DISCONNECTED; + ndbassert(TlocalApiConnectptr.p->m_transaction_nodes.isclear()); ndbassert(TlocalApiConnectptr.p->apiScanRec == RNIL); TlocalApiConnectptr.p->ndbapiBlockref = 0; }//Dbtc::releaseApiCon() @@ -10734,6 +10804,34 @@ Dbtc::execDUMP_STATE_ORD(Signal* signal) c_theIndexOperationPool.getSize(), c_theIndexOperationPool.getNoOfFree()); } + + if (dumpState->args[0] == 2514) + { + if (signal->getLength() == 2) + { + dumpState->args[0] = DumpStateOrd::TcDumpOneApiConnectRec; + execDUMP_STATE_ORD(signal); + } + + NodeReceiverGroup rg(CMVMI, c_alive_nodes); + dumpState->args[0] = 15; + sendSignal(rg, GSN_DUMP_STATE_ORD, signal, 1, JBB); + + signal->theData[0] = 2515; + sendSignalWithDelay(cownref, GSN_DUMP_STATE_ORD, signal, 1000, 1); + return; + } + + if (dumpState->args[0] == 2515) + { + NdbNodeBitmask mask = c_alive_nodes; + mask.clear(getOwnNodeId()); + NodeReceiverGroup rg(NDBCNTR, mask); + + sendSignal(rg, GSN_SYSTEM_ERROR, signal, 1, JBB); + sendSignalWithDelay(cownref, GSN_SYSTEM_ERROR, signal, 300, 1); + return; + } }//Dbtc::execDUMP_STATE_ORD() void Dbtc::execSET_VAR_REQ(Signal* signal) From 51a093f18762d299899c7c9e5cb0a2a639631720 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 20 Mar 2006 14:49:46 +0100 Subject: [PATCH 04/16] ndb - bug#18352 Use variable waitfor_response_timeout (depending on TransactionDeadLockTimeout) When getting 4012, set NeedAbort and ReleaseOnClose ndb/src/ndbapi/NdbConnection.cpp: Use variable for WAITFOR_RESPONSE_TIMEOUT ndb/src/ndbapi/Ndbif.cpp: Use variable timeout for waitfor, when receiving 4012, set NeedAbort and ReleaseOnClose ndb/src/ndbapi/TransporterFacade.cpp: Init wait_for_response_timoue as max TRANSACTION_DEADLOCK_TIMEOUT ndb/src/ndbapi/TransporterFacade.hpp: Init wait_for_response_timoue as max TRANSACTION_DEADLOCK_TIMEOUT ndb/test/ndbapi/testTimeout.cpp: Add testcase for 4012 ndb/test/run-test/daily-basic-tests.txt: Add testcase for 4012 --- ndb/src/ndbapi/NdbConnection.cpp | 4 +- ndb/src/ndbapi/Ndbif.cpp | 12 +-- ndb/src/ndbapi/TransporterFacade.cpp | 14 ++++ ndb/src/ndbapi/TransporterFacade.hpp | 1 + ndb/test/ndbapi/testTimeout.cpp | 101 ++++++++++++++++++++++++ ndb/test/run-test/daily-basic-tests.txt | 4 + 6 files changed, 129 insertions(+), 7 deletions(-) diff --git a/ndb/src/ndbapi/NdbConnection.cpp b/ndb/src/ndbapi/NdbConnection.cpp index c9e26f8ccaf..9cd7d6ed42e 100644 --- a/ndb/src/ndbapi/NdbConnection.cpp +++ b/ndb/src/ndbapi/NdbConnection.cpp @@ -450,12 +450,12 @@ NdbConnection::executeNoBlobs(ExecType aTypeOfExec, //------------------------------------------------------------------------ Ndb* tNdb = theNdb; + Uint32 timeout = TransporterFacade::instance()->m_waitfor_timeout; m_waitForReply = false; executeAsynchPrepare(aTypeOfExec, NULL, NULL, abortOption); if (m_waitForReply){ while (1) { - int noOfComp = tNdb->sendPollNdb((3 * WAITFOR_RESPONSE_TIMEOUT), - 1, forceSend); + int noOfComp = tNdb->sendPollNdb(3 * timeout, 1, forceSend); if (noOfComp == 0) { /** * This timeout situation can occur if NDB crashes. diff --git a/ndb/src/ndbapi/Ndbif.cpp b/ndb/src/ndbapi/Ndbif.cpp index 3ebba7e1c4a..d753117aa9a 100644 --- a/ndb/src/ndbapi/Ndbif.cpp +++ b/ndb/src/ndbapi/Ndbif.cpp @@ -954,23 +954,25 @@ Ndb::pollCompleted(NdbConnection** aCopyArray) void Ndb::check_send_timeout() { + Uint32 timeout = TransporterFacade::instance()->m_waitfor_timeout; NDB_TICKS current_time = NdbTick_CurrentMillisecond(); if (current_time - the_last_check_time > 1000) { the_last_check_time = current_time; Uint32 no_of_sent = theNoOfSentTransactions; for (Uint32 i = 0; i < no_of_sent; i++) { NdbConnection* a_con = theSentTransactionsArray[i]; - if ((current_time - a_con->theStartTransTime) > - WAITFOR_RESPONSE_TIMEOUT) { + if ((current_time - a_con->theStartTransTime) > timeout) + { #ifdef VM_TRACE a_con->printState(); Uint32 t1 = a_con->theTransactionId; Uint32 t2 = a_con->theTransactionId >> 32; - ndbout_c("[%.8x %.8x]", t1, t2); - abort(); + ndbout_c("4012 [%.8x %.8x]", t1, t2); + //abort(); #endif + a_con->theReleaseOnClose = true; a_con->setOperationErrorCodeAbort(4012); - a_con->theCommitStatus = NdbConnection::Aborted; + a_con->theCommitStatus = NdbConnection::NeedAbort; a_con->theCompletionStatus = NdbConnection::CompletedFailure; a_con->handleExecuteCompletion(); remove_sent_list(i); diff --git a/ndb/src/ndbapi/TransporterFacade.cpp b/ndb/src/ndbapi/TransporterFacade.cpp index b6fb2d6cded..5e9147304eb 100644 --- a/ndb/src/ndbapi/TransporterFacade.cpp +++ b/ndb/src/ndbapi/TransporterFacade.cpp @@ -567,6 +567,20 @@ TransporterFacade::init(Uint32 nodeId, const ndb_mgm_configuration* props) } #endif + Uint32 timeout = 120000; + iter.first(); + for (iter.first(); iter.valid(); iter.next()) + { + Uint32 tmp1 = 0, tmp2 = 0; + iter.get(CFG_DB_TRANSACTION_CHECK_INTERVAL, &tmp1); + iter.get(CFG_DB_TRANSACTION_DEADLOCK_TIMEOUT, &tmp2); + tmp1 += tmp2; + if (tmp1 > timeout) + timeout = tmp1; + } + m_waitfor_timeout = timeout; + ndbout_c("Using waitfor: %d", timeout); + if (!theTransporterRegistry->start_service(m_socket_server)){ ndbout_c("Unable to start theTransporterRegistry->start_service"); DBUG_RETURN(false); diff --git a/ndb/src/ndbapi/TransporterFacade.hpp b/ndb/src/ndbapi/TransporterFacade.hpp index 99edea846c1..1e7377a3b4d 100644 --- a/ndb/src/ndbapi/TransporterFacade.hpp +++ b/ndb/src/ndbapi/TransporterFacade.hpp @@ -172,6 +172,7 @@ private: */ public: STATIC_CONST( MAX_NO_THREADS = 4711 ); + Uint32 m_waitfor_timeout; // in milli seconds... private: struct ThreadData { diff --git a/ndb/test/ndbapi/testTimeout.cpp b/ndb/test/ndbapi/testTimeout.cpp index 71c11b25859..25392698642 100644 --- a/ndb/test/ndbapi/testTimeout.cpp +++ b/ndb/test/ndbapi/testTimeout.cpp @@ -24,6 +24,7 @@ #define TIMEOUT (Uint32)3000 Uint32 g_org_timeout = 3000; +Uint32 g_org_deadlock = 3000; int setTransactionTimeout(NDBT_Context* ctx, NDBT_Step* step){ @@ -59,6 +60,60 @@ resetTransactionTimeout(NDBT_Context* ctx, NDBT_Step* step){ return NDBT_OK; } +int +setDeadlockTimeout(NDBT_Context* ctx, NDBT_Step* step){ + NdbRestarter restarter; + int timeout = ctx->getProperty("TransactionDeadlockTimeout", TIMEOUT); + + NdbConfig conf(GETNDB(step)->getNodeId()+1); + unsigned int nodeId = conf.getMasterNodeId(); + if (!conf.getProperty(nodeId, + NODE_TYPE_DB, + CFG_DB_TRANSACTION_DEADLOCK_TIMEOUT, + &g_org_deadlock)) + return NDBT_FAILED; + + g_err << "Setting timeout: " << timeout << endl; + int val[] = { DumpStateOrd::TcSetTransactionTimeout, timeout }; + if(restarter.dumpStateAllNodes(val, 2) != 0){ + return NDBT_FAILED; + } + + return NDBT_OK; +} + +int +getDeadlockTimeout(NDBT_Context* ctx, NDBT_Step* step){ + NdbRestarter restarter; + + Uint32 val = 0; + NdbConfig conf(GETNDB(step)->getNodeId()+1); + unsigned int nodeId = conf.getMasterNodeId(); + if (!conf.getProperty(nodeId, + NODE_TYPE_DB, + CFG_DB_TRANSACTION_DEADLOCK_TIMEOUT, + &val)) + return NDBT_FAILED; + + if (val < 120000) + val = 120000; + ctx->setProperty("TransactionDeadlockTimeout", 4*val); + + return NDBT_OK; +} + +int +resetDeadlockTimeout(NDBT_Context* ctx, NDBT_Step* step){ + NdbRestarter restarter; + + int val[] = { DumpStateOrd::TcSetTransactionTimeout, g_org_deadlock }; + if(restarter.dumpStateAllNodes(val, 2) != 0){ + return NDBT_FAILED; + } + + return NDBT_OK; +} + int runLoadTable(NDBT_Context* ctx, NDBT_Step* step){ @@ -374,6 +429,43 @@ int runBuddyTransNoTimeout(NDBT_Context* ctx, NDBT_Step* step){ return result; } +int +runError4012(NDBT_Context* ctx, NDBT_Step* step){ + int result = NDBT_OK; + int loops = ctx->getNumLoops(); + int stepNo = step->getStepNo(); + + int timeout = ctx->getProperty("TransactionDeadlockTimeout", TIMEOUT); + + HugoOperations hugoOps(*ctx->getTab()); + Ndb* pNdb = GETNDB(step); + + do{ + // Commit transaction + CHECK(hugoOps.startTransaction(pNdb) == 0); + CHECK(hugoOps.pkUpdateRecord(pNdb, 0) == 0); + int ret = hugoOps.execute_NoCommit(pNdb); + if (ret == 0) + { + int sleep = timeout; + ndbout << "Sleeping for " << sleep << " milliseconds" << endl; + NdbSleep_MilliSleep(sleep); + + // Expect that transaction has NOT timed-out + CHECK(hugoOps.execute_Commit(pNdb) == 0); + } + else + { + CHECK(ret == 4012); + } + } while(false); + + hugoOps.closeTransaction(pNdb); + + return result; +} + + NDBT_TESTSUITE(testTimeout); TESTCASE("DontTimeoutTransaction", "Test that the transaction does not timeout "\ @@ -465,6 +557,15 @@ TESTCASE("BuddyTransNoTimeout5", FINALIZER(resetTransactionTimeout); FINALIZER(runClearTable); } +TESTCASE("Error4012", ""){ + TC_PROPERTY("TransactionDeadlockTimeout", 120000); + INITIALIZER(runLoadTable); + INITIALIZER(getDeadlockTimeout); + INITIALIZER(setDeadlockTimeout); + STEPS(runError4012, 2); + FINALIZER(runClearTable); +} + NDBT_TESTSUITE_END(testTimeout); int main(int argc, const char** argv){ diff --git a/ndb/test/run-test/daily-basic-tests.txt b/ndb/test/run-test/daily-basic-tests.txt index 169daae6d7f..70518f7881d 100644 --- a/ndb/test/run-test/daily-basic-tests.txt +++ b/ndb/test/run-test/daily-basic-tests.txt @@ -236,6 +236,10 @@ max-time: 500 cmd: testTimeout args: -n TimeoutRandTransaction T1 +max-time: 600 +cmd: testTimeout +args: -n Error4012 T1 + # SCAN TESTS # max-time: 500 From d230d0e1e6c7aa92bd6afabee378746d9d46c340 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 20 Mar 2006 14:53:29 +0100 Subject: [PATCH 05/16] ndb - wl2610, bug#18352 Remove useless and tricky state fiddleing in TC to syncronize NF_CompleteRep as code is already present in DIH aswell Keep broadcast of TAKEOVER_TCCONF for online upgrade ndb/src/kernel/blocks/dblqh/DblqhMain.cpp: Add clever dump for showing active operations ndb/src/kernel/blocks/dbtc/Dbtc.hpp: Remove useless and tricky state fiddleing in TC to syncronize NF_CompleteRep as code is already present in DIH aswell Keep broadcast of TAKEOVER_TCCONF for online upgrade ndb/src/kernel/blocks/dbtc/DbtcMain.cpp: Remove useless and tricky state fiddleing in TC to syncronize NF_CompleteRep as code is already present in DIH aswell Keep broadcast of TAKEOVER_TCCONF for online upgrade --- ndb/src/kernel/blocks/dblqh/DblqhMain.cpp | 166 +++++++++++++++++++ ndb/src/kernel/blocks/dbtc/Dbtc.hpp | 9 -- ndb/src/kernel/blocks/dbtc/DbtcMain.cpp | 186 +++++----------------- 3 files changed, 208 insertions(+), 153 deletions(-) diff --git a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp index ff7e3c32924..0aeeaccd55e 100644 --- a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp +++ b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp @@ -18448,6 +18448,172 @@ Dblqh::execDUMP_STATE_ORD(Signal* signal) c_error_insert_table_id = dumpState->args[1]; SET_ERROR_INSERT_VALUE(5042); } + + TcConnectionrec *regTcConnectionrec = tcConnectionrec; + Uint32 ttcConnectrecFileSize = ctcConnectrecFileSize; + Uint32 arg = dumpState->args[0]; + if(arg == 2306) + { + for(Uint32 i = 0; i<1024; i++) + { + TcConnectionrecPtr tcRec; + tcRec.i = ctransidHash[i]; + while(tcRec.i != RNIL) + { + ptrCheckGuard(tcRec, ttcConnectrecFileSize, regTcConnectionrec); + ndbout << "TcConnectionrec " << tcRec.i; + signal->theData[0] = 2307; + signal->theData[1] = tcRec.i; + execDUMP_STATE_ORD(signal); + tcRec.i = tcRec.p->nextHashRec; + } + } + } + + if(arg == 2307 || arg == 2308) + { + TcConnectionrecPtr tcRec; + tcRec.i = signal->theData[1]; + ptrCheckGuard(tcRec, ttcConnectrecFileSize, regTcConnectionrec); + + ndbout << " transactionState = " << tcRec.p->transactionState<theData[1]; ptrCheckGuard(hostptr, chostFilesize, hostRecord); hostptr.p->hostStatus = HS_ALIVE; - hostptr.p->takeOverStatus = TOS_IDLE; signal->theData[0] = cownref; c_alive_nodes.set(hostptr.i); sendSignal(tblockref, GSN_INCL_NODECONF, signal, 1, JBB); @@ -856,8 +855,6 @@ void Dbtc::execREAD_NODESCONF(Signal* signal) hostptr.i = i; ptrCheckGuard(hostptr, chostFilesize, hostRecord); - hostptr.p->takeOverStatus = TOS_IDLE; - if (NodeBitmask::get(readNodes->inactiveNodes, i)) { jam(); hostptr.p->hostStatus = HS_DEAD; @@ -6826,21 +6823,27 @@ void Dbtc::execNODE_FAILREP(Signal* signal) const Uint32 tnewMasterId = nodeFail->masterNodeId; arrGuard(tnoOfNodes, MAX_NDB_NODES); + Uint32 i; int index = 0; - for (unsigned i = 1; i< MAX_NDB_NODES; i++) { - if(NodeBitmask::get(nodeFail->theNodes, i)){ + for (i = 1; i< MAX_NDB_NODES; i++) + { + if(NodeBitmask::get(nodeFail->theNodes, i)) + { cdata[index] = i; index++; }//if }//for + cmasterNodeId = tnewMasterId; + tcNodeFailptr.i = 0; ptrAss(tcNodeFailptr, tcFailRecord); - Uint32 tindex; - for (tindex = 0; tindex < tnoOfNodes; tindex++) { + for (i = 0; i < tnoOfNodes; i++) + { jam(); - hostptr.i = cdata[tindex]; + hostptr.i = cdata[i]; ptrCheckGuard(hostptr, chostFilesize, hostRecord); + /*------------------------------------------------------------*/ /* SET STATUS OF THE FAILED NODE TO DEAD SINCE IT HAS */ /* FAILED. */ @@ -6849,30 +6852,15 @@ void Dbtc::execNODE_FAILREP(Signal* signal) hostptr.p->m_nf_bits = HostRecord::NF_NODE_FAIL_BITS; c_alive_nodes.clear(hostptr.i); - if (hostptr.p->takeOverStatus == TOS_COMPLETED) { - jam(); - /*------------------------------------------------------------*/ - /* A VERY UNUSUAL SITUATION. THE TAKE OVER WAS COMPLETED*/ - /* EVEN BEFORE WE HEARD ABOUT THE NODE FAILURE REPORT. */ - /* HOWEVER UNUSUAL THIS SITUATION IS POSSIBLE. */ - /*------------------------------------------------------------*/ - /* RELEASE THE CURRENTLY UNUSED LQH CONNECTIONS. THE */ - /* REMAINING WILL BE RELEASED WHEN THE TRANSACTION THAT */ - /* USED THEM IS COMPLETED. */ - /*------------------------------------------------------------*/ - hostptr.p->m_nf_bits &= ~HostRecord::NF_TAKEOVER; - } else { - ndbrequire(hostptr.p->takeOverStatus == TOS_IDLE); - hostptr.p->takeOverStatus = TOS_NODE_FAILED; - }//if - - if (tcNodeFailptr.p->failStatus == FS_LISTENING) { + if (tcNodeFailptr.p->failStatus == FS_LISTENING) + { jam(); /*------------------------------------------------------------*/ /* THE CURRENT TAKE OVER CAN BE AFFECTED BY THIS NODE */ /* FAILURE. */ /*------------------------------------------------------------*/ - if (hostptr.p->lqhTransStatus == LTS_ACTIVE) { + if (hostptr.p->lqhTransStatus == LTS_ACTIVE) + { jam(); /*------------------------------------------------------------*/ /* WE WERE WAITING FOR THE FAILED NODE IN THE TAKE OVER */ @@ -6884,78 +6872,25 @@ void Dbtc::execNODE_FAILREP(Signal* signal) }//if }//if - }//for - - const bool masterFailed = (cmasterNodeId != tnewMasterId); - cmasterNodeId = tnewMasterId; - - if(getOwnNodeId() == cmasterNodeId && masterFailed){ - /** - * Master has failed and I'm the new master - */ - jam(); - - for (hostptr.i = 1; hostptr.i < MAX_NDB_NODES; hostptr.i++) { + if (getOwnNodeId() != tnewMasterId) + { jam(); - ptrAss(hostptr, hostRecord); - if (hostptr.p->hostStatus != HS_ALIVE) { - jam(); - if (hostptr.p->takeOverStatus == TOS_COMPLETED) { - jam(); - /*------------------------------------------------------------*/ - /* SEND TAKE OVER CONFIRMATION TO ALL ALIVE NODES IF */ - /* TAKE OVER IS COMPLETED. THIS IS PERFORMED TO ENSURE */ - /* THAT ALL NODES AGREE ON THE IDLE STATE OF THE TAKE */ - /* OVER. THIS MIGHT BE MISSED IN AN ERROR SITUATION IF */ - /* MASTER FAILS AFTER SENDING CONFIRMATION TO NEW */ - /* MASTER BUT FAILING BEFORE SENDING TO ANOTHER NODE */ - /* WHICH WAS NOT MASTER. IF THIS NODE LATER BECOMES */ - /* MASTER IT MIGHT START A NEW TAKE OVER EVEN AFTER THE */ - /* CRASHED NODE HAVE ALREADY RECOVERED. */ - /*------------------------------------------------------------*/ - NodeReceiverGroup rg(DBTC, c_alive_nodes); - signal->theData[0] = hostptr.i; - sendSignal(rg, GSN_TAKE_OVERTCCONF, signal, 1, JBB); - }//if - }//if - }//for - } - - if(getOwnNodeId() == cmasterNodeId){ - jam(); - for (hostptr.i = 1; hostptr.i < MAX_NDB_NODES; hostptr.i++) { + /** + * Only master does takeover currently + */ + hostptr.p->m_nf_bits &= ~HostRecord::NF_TAKEOVER; + } + else + { jam(); - ptrAss(hostptr, hostRecord); - if (hostptr.p->hostStatus != HS_ALIVE) { - jam(); - if (hostptr.p->takeOverStatus == TOS_NODE_FAILED) { - jam(); - /*------------------------------------------------------------*/ - /* CONCLUDE ALL ACTIVITIES THE FAILED TC DID CONTROL */ - /* SINCE WE ARE THE MASTER. THIS COULD HAVE BEEN STARTED*/ - /* BY A PREVIOUS MASTER BUT HAVE NOT BEEN CONCLUDED YET.*/ - /*------------------------------------------------------------*/ - hostptr.p->takeOverStatus = TOS_ACTIVE; - signal->theData[0] = hostptr.i; - sendSignal(cownref, GSN_TAKE_OVERTCREQ, signal, 1, JBB); - }//if - }//if - }//for - }//if - for (tindex = 0; tindex < tnoOfNodes; tindex++) { - jam(); - hostptr.i = cdata[tindex]; - ptrCheckGuard(hostptr, chostFilesize, hostRecord); - /*------------------------------------------------------------*/ - /* LOOP THROUGH AND ABORT ALL SCANS THAT WHERE */ - /* CONTROLLED BY THIS TC AND ACTIVE IN THE FAILED */ - /* NODE'S LQH */ - /*------------------------------------------------------------*/ + signal->theData[0] = hostptr.i; + sendSignal(cownref, GSN_TAKE_OVERTCREQ, signal, 1, JBB); + } + checkScanActiveInFailedLqh(signal, 0, hostptr.i); checkWaitDropTabFailedLqh(signal, hostptr.i, 0); // nodeid, tableid nodeFailCheckTransactions(signal, 0, hostptr.i); - }//for - + } }//Dbtc::execNODE_FAILREP() void @@ -7071,47 +7006,17 @@ void Dbtc::execTAKE_OVERTCCONF(Signal* signal) tfailedNodeId = signal->theData[0]; hostptr.i = tfailedNodeId; ptrCheckGuard(hostptr, chostFilesize, hostRecord); - switch (hostptr.p->takeOverStatus) { - case TOS_IDLE: + + ndbout_c("received execTAKE_OVERTCCONF(%d) from %x (%x)", + tfailedNodeId, signal->getSendersBlockRef(), reference()); + if (signal->getSendersBlockRef() != reference()) + { jam(); - /*------------------------------------------------------------*/ - /* THIS MESSAGE ARRIVED EVEN BEFORE THE NODE_FAILREP */ - /* MESSAGE. THIS IS POSSIBLE IN EXTREME SITUATIONS. */ - /* WE SET THE STATE TO TAKE_OVER_COMPLETED AND WAIT */ - /* FOR THE NODE_FAILREP MESSAGE. */ - /*------------------------------------------------------------*/ - hostptr.p->takeOverStatus = TOS_COMPLETED; - break; - case TOS_NODE_FAILED: - case TOS_ACTIVE: - jam(); - /*------------------------------------------------------------*/ - /* WE ARE NOT MASTER AND THE TAKE OVER IS ACTIVE OR WE */ - /* ARE MASTER AND THE TAKE OVER IS ACTIVE. IN BOTH */ - /* WE SET THE STATE TO TAKE_OVER_COMPLETED. */ - /*------------------------------------------------------------*/ - /* RELEASE THE CURRENTLY UNUSED LQH CONNECTIONS. THE */ - /* REMAINING WILL BE RELEASED WHEN THE TRANSACTION THAT */ - /* USED THEM IS COMPLETED. */ - /*------------------------------------------------------------*/ - hostptr.p->takeOverStatus = TOS_COMPLETED; - checkNodeFailComplete(signal, hostptr.i, HostRecord::NF_TAKEOVER); - break; - case TOS_COMPLETED: - jam(); - /*------------------------------------------------------------*/ - /* WE HAVE ALREADY RECEIVED THE CONF SIGNAL. IT IS MOST */ - /* LIKELY SENT FROM A NEW MASTER WHICH WASN'T SURE IF */ - /* THIS NODE HEARD THE CONF SIGNAL FROM THE OLD MASTER. */ - /* WE SIMPLY IGNORE THE MESSAGE. */ - /*------------------------------------------------------------*/ - /*empty*/; - break; - default: - jam(); - systemErrorLab(signal); return; - }//switch + } + + + checkNodeFailComplete(signal, hostptr.i, HostRecord::NF_TAKEOVER); }//Dbtc::execTAKE_OVERTCCONF() void Dbtc::execTAKE_OVERTCREQ(Signal* signal) @@ -7351,16 +7256,10 @@ void Dbtc::completeTransAtTakeOverDoLast(Signal* signal, UintR TtakeOverInd) /* TO REPORT THE COMPLETION OF THE TAKE OVER TO ALL */ /* NODES THAT ARE ALIVE. */ /*------------------------------------------------------------*/ - for (hostptr.i = 1; hostptr.i < MAX_NDB_NODES; hostptr.i++) { - jam(); - ptrAss(hostptr, hostRecord); - if (hostptr.p->hostStatus == HS_ALIVE) { - jam(); - tblockref = calcTcBlockRef(hostptr.i); - signal->theData[0] = tcNodeFailptr.p->takeOverNode; - sendSignal(tblockref, GSN_TAKE_OVERTCCONF, signal, 1, JBB); - }//if - }//for + NodeReceiverGroup rg(DBTC, c_alive_nodes); + signal->theData[0] = tcNodeFailptr.p->takeOverNode; + sendSignal(rg, GSN_TAKE_OVERTCCONF, signal, 1, JBB); + if (tcNodeFailptr.p->queueIndex > 0) { jam(); /*------------------------------------------------------------*/ @@ -9937,7 +9836,6 @@ void Dbtc::inithost(Signal* signal) ptrAss(hostptr, hostRecord); hostptr.p->hostStatus = HS_DEAD; hostptr.p->inPackedList = false; - hostptr.p->takeOverStatus = TOS_NOT_DEFINED; hostptr.p->lqhTransStatus = LTS_IDLE; hostptr.p->noOfWordsTCKEYCONF = 0; hostptr.p->noOfWordsTCINDXCONF = 0; From ad6dcfb1277b3b0a8692c3bfd802ba48cc3fe537 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 20 Mar 2006 14:55:14 +0100 Subject: [PATCH 06/16] ndb - bug#18352 remove debug prinout --- ndb/src/ndbapi/TransporterFacade.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/ndb/src/ndbapi/TransporterFacade.cpp b/ndb/src/ndbapi/TransporterFacade.cpp index 5e9147304eb..30d0eec1e4a 100644 --- a/ndb/src/ndbapi/TransporterFacade.cpp +++ b/ndb/src/ndbapi/TransporterFacade.cpp @@ -579,7 +579,6 @@ TransporterFacade::init(Uint32 nodeId, const ndb_mgm_configuration* props) timeout = tmp1; } m_waitfor_timeout = timeout; - ndbout_c("Using waitfor: %d", timeout); if (!theTransporterRegistry->start_service(m_socket_server)){ ndbout_c("Unable to start theTransporterRegistry->start_service"); From 8ed36cb667b675244f55072cefa15fb65ec89ee7 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 21 Mar 2006 14:47:10 +0100 Subject: [PATCH 07/16] ndb - bug#18385 Partial system restart, can not try to start with higher GCI that own even if knowing about a higher number ndb/include/kernel/signaldata/DumpStateOrd.hpp: Add new dump for setting time between gcp ndb/include/kernel/signaldata/StartPerm.hpp: Move error codes into StartPerm + Add new error code ndb/src/kernel/blocks/ERROR_codes.txt: Add new error insert ndb/src/kernel/blocks/dbdih/Dbdih.hpp: Move error codes into StartPerm + Add new error code ndb/src/kernel/blocks/dbdih/DbdihMain.cpp: Fix so that we don't try to restart to a too new GCI when doing a partial start Add new error code when this node later tries to join ndb/test/include/NdbRestarter.hpp: Add new method for selecting random node ndb/test/ndbapi/testSystemRestart.cpp: Add new testcase for bug#18385 ndb/test/run-test/daily-basic-tests.txt: Run test in daily-basic ndb/test/src/NdbRestarter.cpp: Add new method for selecting random node --- .../kernel/signaldata/DumpStateOrd.hpp | 1 + ndb/include/kernel/signaldata/StartPerm.hpp | 6 ++ ndb/src/kernel/blocks/ERROR_codes.txt | 2 + ndb/src/kernel/blocks/dbdih/Dbdih.hpp | 1 - ndb/src/kernel/blocks/dbdih/DbdihMain.cpp | 99 ++++++++++++++----- ndb/test/include/NdbRestarter.hpp | 1 + ndb/test/ndbapi/testSystemRestart.cpp | 53 ++++++++++ ndb/test/run-test/daily-basic-tests.txt | 4 + ndb/test/src/NdbRestarter.cpp | 33 +++++++ 9 files changed, 177 insertions(+), 23 deletions(-) diff --git a/ndb/include/kernel/signaldata/DumpStateOrd.hpp b/ndb/include/kernel/signaldata/DumpStateOrd.hpp index 4dd22cf5092..2c824670cef 100644 --- a/ndb/include/kernel/signaldata/DumpStateOrd.hpp +++ b/ndb/include/kernel/signaldata/DumpStateOrd.hpp @@ -127,6 +127,7 @@ public: DihMinTimeBetweenLCP = 7017, DihMaxTimeBetweenLCP = 7018, EnableUndoDelayDataWrite = 7080, // DIH+ACC+TUP + DihSetTimeBetweenGcp = 7090, DihStartLcpImmediately = 7099, // 8000 Suma // 12000 Tux diff --git a/ndb/include/kernel/signaldata/StartPerm.hpp b/ndb/include/kernel/signaldata/StartPerm.hpp index 38be72835a3..63e01ed3868 100644 --- a/ndb/include/kernel/signaldata/StartPerm.hpp +++ b/ndb/include/kernel/signaldata/StartPerm.hpp @@ -64,5 +64,11 @@ private: Uint32 startingNodeId; Uint32 errorCode; + + enum ErrorCode + { + ZNODE_ALREADY_STARTING_ERROR = 305, + InitialStartRequired = 320 + }; }; #endif diff --git a/ndb/src/kernel/blocks/ERROR_codes.txt b/ndb/src/kernel/blocks/ERROR_codes.txt index 62481837c14..e5576450846 100644 --- a/ndb/src/kernel/blocks/ERROR_codes.txt +++ b/ndb/src/kernel/blocks/ERROR_codes.txt @@ -303,6 +303,8 @@ Test Crashes in handling node restarts 7131: Crash when receiving START_COPYREQ in master node 7132: Crash when receiving START_COPYCONF in starting node +7170: Crash when receiving START_PERMREF (InitialStartRequired) + DICT: 6000 Crash during NR when receiving DICTSTARTREQ 6001 Crash during NR when receiving SCHEMA_INFO diff --git a/ndb/src/kernel/blocks/dbdih/Dbdih.hpp b/ndb/src/kernel/blocks/dbdih/Dbdih.hpp index f74c0f36c4d..78acf1ffd19 100644 --- a/ndb/src/kernel/blocks/dbdih/Dbdih.hpp +++ b/ndb/src/kernel/blocks/dbdih/Dbdih.hpp @@ -81,7 +81,6 @@ #define ZWRONG_FAILURE_NUMBER_ERROR 302 #define ZWRONG_START_NODE_ERROR 303 #define ZNO_REPLICA_FOUND_ERROR 304 -#define ZNODE_ALREADY_STARTING_ERROR 305 #define ZNODE_START_DISALLOWED_ERROR 309 // -------------------------------------- diff --git a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp index fab428aadef..eb4ae61a3e4 100644 --- a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +++ b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp @@ -1420,6 +1420,33 @@ void Dbdih::ndbStartReqLab(Signal* signal, BlockReference ref) return; } + NodeRecordPtr nodePtr; + Uint32 gci = SYSFILE->lastCompletedGCI[getOwnNodeId()]; + for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) + { + jam(); + ptrAss(nodePtr, nodeRecord); + if (SYSFILE->lastCompletedGCI[nodePtr.i] > gci) + { + jam(); + /** + * Since we're starting(is master) and there + * there are other nodes with higher GCI... + * there gci's must be invalidated... + * and they _must_ do an initial start + * indicate this by setting lastCompletedGCI = 0 + */ + SYSFILE->lastCompletedGCI[nodePtr.i] = 0; + ndbrequire(nodePtr.p->nodeStatus != NodeRecord::ALIVE); + warningEvent("Making filesystem for node %d unusable", + nodePtr.i); + } + } + /** + * This set which GCI we will try to restart to + */ + SYSFILE->newestRestorableGCI = gci; + ndbrequire(isMaster()); copyGciLab(signal, CopyGCIReq::RESTART); // We have already read the file! }//Dbdih::ndbStartReqLab() @@ -1557,7 +1584,7 @@ void Dbdih::execSTART_PERMREF(Signal* signal) { jamEntry(); Uint32 errorCode = signal->theData[1]; - if (errorCode == ZNODE_ALREADY_STARTING_ERROR) { + if (errorCode == StartPermRef::ZNODE_ALREADY_STARTING_ERROR) { jam(); /*-----------------------------------------------------------------------*/ // The master was busy adding another node. We will wait for a second and @@ -1567,6 +1594,20 @@ void Dbdih::execSTART_PERMREF(Signal* signal) sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1); return; }//if + + if (errorCode == StartPermRef::InitialStartRequired) + { + CRASH_INSERTION(7170); + char buf[255]; + BaseString::snprintf(buf, sizeof(buf), + "Cluster requires this node to be started " + " with --initial as partial start has been performed" + " and this filesystem is unusable"); + progError(__LINE__, + ERR_SR_RESTARTCONFLICT, + buf); + ndbrequire(false); + } /*------------------------------------------------------------------------*/ // Some node process in another node involving our node was still active. We // will recover from this by crashing here. @@ -1657,7 +1698,7 @@ void Dbdih::execSTART_PERMREQ(Signal* signal) (c_nodeStartMaster.wait != ZFALSE)) { jam(); signal->theData[0] = nodeId; - signal->theData[1] = ZNODE_ALREADY_STARTING_ERROR; + signal->theData[1] = StartPermRef::ZNODE_ALREADY_STARTING_ERROR; sendSignal(retRef, GSN_START_PERMREF, signal, 2, JBB); return; }//if @@ -1667,6 +1708,16 @@ void Dbdih::execSTART_PERMREQ(Signal* signal) ndbrequire(false); }//if + if (SYSFILE->lastCompletedGCI[nodeId] == 0 && + typeStart != NodeState::ST_INITIAL_NODE_RESTART) + { + jam(); + signal->theData[0] = nodeId; + signal->theData[1] = StartPermRef::InitialStartRequired; + sendSignal(retRef, GSN_START_PERMREF, signal, 2, JBB); + return; + } + /*---------------------------------------------------------------------- * WE START THE INCLUSION PROCEDURE * ---------------------------------------------------------------------*/ @@ -3515,24 +3566,12 @@ void Dbdih::closingGcpLab(Signal* signal, FileRecordPtr filePtr) /* ------------------------------------------------------------------------- */ void Dbdih::selectMasterCandidateAndSend(Signal* signal) { - Uint32 gci = 0; - Uint32 masterCandidateId = 0; - NodeRecordPtr nodePtr; - for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) { - jam(); - ptrAss(nodePtr, nodeRecord); - if (SYSFILE->lastCompletedGCI[nodePtr.i] > gci) { - jam(); - masterCandidateId = nodePtr.i; - gci = SYSFILE->lastCompletedGCI[nodePtr.i]; - }//if - }//for - ndbrequire(masterCandidateId != 0); setNodeGroups(); - signal->theData[0] = masterCandidateId; - signal->theData[1] = gci; + signal->theData[0] = getOwnNodeId(); + signal->theData[1] = SYSFILE->lastCompletedGCI[getOwnNodeId()]; sendSignal(cntrlblockref, GSN_DIH_RESTARTCONF, signal, 2, JBB); - + + NodeRecordPtr nodePtr; Uint32 node_groups[MAX_NDB_NODES]; memset(node_groups, 0, sizeof(node_groups)); for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) { @@ -3550,10 +3589,10 @@ void Dbdih::selectMasterCandidateAndSend(Signal* signal) if(count != 0 && count != cnoReplicas){ char buf[255]; BaseString::snprintf(buf, sizeof(buf), - "Illegal configuration change." - " Initial start needs to be performed " - " when changing no of replicas (%d != %d)", - node_groups[nodePtr.i], cnoReplicas); + "Illegal configuration change." + " Initial start needs to be performed " + " when changing no of replicas (%d != %d)", + node_groups[nodePtr.i], cnoReplicas); progError(__LINE__, ERR_INVALID_CONFIG, buf); @@ -13359,6 +13398,22 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal) c_lcpState.ctimer += (1 << c_lcpState.clcpDelay); return; } + + if (dumpState->args[0] == DumpStateOrd::DihSetTimeBetweenGcp) + { + if (signal->getLength() == 1) + { + const ndb_mgm_configuration_iterator * p = + theConfiguration.getOwnConfigIterator(); + ndbrequire(p != 0); + ndb_mgm_get_int_parameter(p, CFG_DB_GCP_INTERVAL, &cgcpDelay); + } + else + { + cgcpDelay = signal->theData[1]; + } + ndbout_c("Setting time between gcp : %d", cgcpDelay); + } }//Dbdih::execDUMP_STATE_ORD() void diff --git a/ndb/test/include/NdbRestarter.hpp b/ndb/test/include/NdbRestarter.hpp index 19a88b4f8ad..3ec92ae786e 100644 --- a/ndb/test/include/NdbRestarter.hpp +++ b/ndb/test/include/NdbRestarter.hpp @@ -62,6 +62,7 @@ public: int dumpStateAllNodes(int * _args, int _num_args); int getMasterNodeId(); + int getRandomNodeSameNodeGroup(int nodeId, int randomNumber); int getRandomNodeOtherNodeGroup(int nodeId, int randomNumber); int getRandomNotMasterNodeId(int randomNumber); diff --git a/ndb/test/ndbapi/testSystemRestart.cpp b/ndb/test/ndbapi/testSystemRestart.cpp index 35016896495..30f7aca9b06 100644 --- a/ndb/test/ndbapi/testSystemRestart.cpp +++ b/ndb/test/ndbapi/testSystemRestart.cpp @@ -1051,6 +1051,52 @@ int runSystemRestart9(NDBT_Context* ctx, NDBT_Step* step){ return result; } +int runBug18385(NDBT_Context* ctx, NDBT_Step* step){ + NdbRestarter restarter; + const Uint32 nodeCount = restarter.getNumDbNodes(); + if(nodeCount < 2){ + g_info << "Bug18385 - Needs atleast 2 nodes to test" << endl; + return NDBT_OK; + } + + int node1 = restarter.getDbNodeId(rand() % nodeCount); + int node2 = restarter.getRandomNodeSameNodeGroup(node1, rand()); + + if (node1 == -1 || node2 == -1) + return NDBT_OK; + + int dump[] = { DumpStateOrd::DihSetTimeBetweenGcp, 300 }; + + int result = NDBT_OK; + do { + CHECK(restarter.dumpStateAllNodes(dump, 2) == 0); + CHECK(restarter.restartOneDbNode(node1, false, true, false) == 0); + NdbSleep_SecSleep(3); + CHECK(restarter.restartAll(false, true, false) == 0); + + Uint32 cnt = 0; + int nodes[128]; + for(Uint32 i = 0; i Date: Tue, 21 Mar 2006 15:13:41 +0100 Subject: [PATCH 08/16] ndb - bug#18118 timeslice DUMP(7015) ndb/include/kernel/signaldata/DumpStateOrd.hpp: doc... ndb/src/kernel/blocks/dbdih/DbdihMain.cpp: timeslice DUMP(7015) --- .../kernel/signaldata/DumpStateOrd.hpp | 3 + ndb/src/kernel/blocks/dbdih/DbdihMain.cpp | 126 ++++++++++-------- 2 files changed, 77 insertions(+), 52 deletions(-) diff --git a/ndb/include/kernel/signaldata/DumpStateOrd.hpp b/ndb/include/kernel/signaldata/DumpStateOrd.hpp index 2c824670cef..b42b930711c 100644 --- a/ndb/include/kernel/signaldata/DumpStateOrd.hpp +++ b/ndb/include/kernel/signaldata/DumpStateOrd.hpp @@ -126,6 +126,9 @@ public: DihAllAllowNodeStart = 7016, DihMinTimeBetweenLCP = 7017, DihMaxTimeBetweenLCP = 7018, + // 7019 + // 7020 + // 7021 EnableUndoDelayDataWrite = 7080, // DIH+ACC+TUP DihSetTimeBetweenGcp = 7090, DihStartLcpImmediately = 7099, diff --git a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp index eb4ae61a3e4..a8633af2529 100644 --- a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +++ b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp @@ -5983,6 +5983,7 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId) execDUMP_STATE_ORD(signal); signal->theData[0] = 7015; + signal->theData[1] = 0; execDUMP_STATE_ORD(signal); c_lcpMasterTakeOverState.set(LMTOS_IDLE, __LINE__); @@ -13036,7 +13037,8 @@ void Dbdih::execDUMP_STATE_ORD(Signal* signal) { DumpStateOrd * const & dumpState = (DumpStateOrd *)&signal->theData[0]; - if (dumpState->args[0] == DumpStateOrd::DihDumpNodeRestartInfo) { + Uint32 arg = dumpState->args[0]; + if (arg == DumpStateOrd::DihDumpNodeRestartInfo) { infoEvent("c_nodeStartMaster.blockLcp = %d, c_nodeStartMaster.blockGcp = %d, c_nodeStartMaster.wait = %d", c_nodeStartMaster.blockLcp, c_nodeStartMaster.blockGcp, c_nodeStartMaster.wait); infoEvent("cstartGcpNow = %d, cgcpStatus = %d", @@ -13046,7 +13048,7 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal) infoEvent("cgcpOrderBlocked = %d, cgcpStartCounter = %d", cgcpOrderBlocked, cgcpStartCounter); }//if - if (dumpState->args[0] == DumpStateOrd::DihDumpNodeStatusInfo) { + if (arg == DumpStateOrd::DihDumpNodeStatusInfo) { NodeRecordPtr localNodePtr; infoEvent("Printing nodeStatus of all nodes"); for (localNodePtr.i = 1; localNodePtr.i < MAX_NDB_NODES; localNodePtr.i++) { @@ -13058,7 +13060,7 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal) }//for }//if - if (dumpState->args[0] == DumpStateOrd::DihPrintFragmentation){ + if (arg == DumpStateOrd::DihPrintFragmentation){ infoEvent("Printing fragmentation of all tables --"); for(Uint32 i = 0; iargs[0] == 7019 && signal->getLength() == 2) + if(arg == 7019 && signal->getLength() == 2) { char buf2[8+1]; NodeRecordPtr nodePtr; @@ -13251,7 +13253,7 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal) nodePtr.p->m_nodefailSteps.getText(buf2)); } - if(dumpState->args[0] == 7020 && signal->getLength() > 3) + if(arg == 7020 && signal->getLength() > 3) { Uint32 gsn= signal->theData[1]; Uint32 block= signal->theData[2]; @@ -13275,7 +13277,7 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal) gsn, getBlockName(block, "UNKNOWN"), length, buf); } - if(dumpState->args[0] == DumpStateOrd::DihDumpLCPState){ + if(arg == DumpStateOrd::DihDumpLCPState){ infoEvent("-- Node %d LCP STATE --", getOwnNodeId()); infoEvent("lcpStatus = %d (update place = %d) ", c_lcpState.lcpStatus, c_lcpState.lcpStatusUpdatedPlace); @@ -13291,7 +13293,7 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal) infoEvent("-- Node %d LCP STATE --", getOwnNodeId()); } - if(dumpState->args[0] == DumpStateOrd::DihDumpLCPMasterTakeOver){ + if(arg == DumpStateOrd::DihDumpLCPMasterTakeOver){ infoEvent("-- Node %d LCP MASTER TAKE OVER STATE --", getOwnNodeId()); infoEvent ("c_lcpMasterTakeOverState.state = %d updatePlace = %d failedNodeId = %d", @@ -13306,52 +13308,25 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal) infoEvent("-- Node %d LCP MASTER TAKE OVER STATE --", getOwnNodeId()); } - if (signal->theData[0] == 7015){ - for(Uint32 i = 0; itabStatus != TabRecord::TS_ACTIVE) - continue; - - infoEvent - ("Table %d: TabCopyStatus: %d TabUpdateStatus: %d TabLcpStatus: %d", - tabPtr.i, - tabPtr.p->tabCopyStatus, - tabPtr.p->tabUpdateState, - tabPtr.p->tabLcpStatus); + if (signal->theData[0] == 7015) + { + if (signal->getLength() == 1) + { + signal->theData[1] = 0; + } - FragmentstorePtr fragPtr; - for (Uint32 fid = 0; fid < tabPtr.p->totalfragments; fid++) { - jam(); - getFragstore(tabPtr.p, fid, fragPtr); - - char buf[100], buf2[100]; - BaseString::snprintf(buf, sizeof(buf), " Fragment %d: noLcpReplicas==%d ", - fid, fragPtr.p->noLcpReplicas); - - Uint32 num=0; - ReplicaRecordPtr replicaPtr; - replicaPtr.i = fragPtr.p->storedReplicas; - do { - ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord); - BaseString::snprintf(buf2, sizeof(buf2), "%s %d(on %d)=%d(%s)", - buf, num, - replicaPtr.p->procNode, - replicaPtr.p->lcpIdStarted, - replicaPtr.p->lcpOngoingFlag ? "Ongoing" : "Idle"); - BaseString::snprintf(buf, sizeof(buf), "%s", buf2); - - num++; - replicaPtr.i = replicaPtr.p->nextReplica; - } while (replicaPtr.i != RNIL); - infoEvent(buf); - } + Uint32 tableId = signal->theData[1]; + if (tableId < ctabFileSize) + { + signal->theData[0] = 7021; + execDUMP_STATE_ORD(signal); + signal->theData[0] = 7015; + signal->theData[1] = tableId + 1; + sendSignal(reference(), GSN_DUMP_STATE_ORD, signal, 2, JBB); } } - if(dumpState->args[0] == DumpStateOrd::EnableUndoDelayDataWrite){ + if(arg == DumpStateOrd::EnableUndoDelayDataWrite){ ndbout << "Dbdih:: delay write of datapages for table = " << dumpState->args[1]<< endl; // Send this dump to ACC and TUP @@ -13381,7 +13356,7 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal) return; } - if(dumpState->args[0] == 7098){ + if(arg == 7098){ if(signal->length() == 3){ jam(); infoEvent("startLcpRoundLoopLab(tabel=%d, fragment=%d)", @@ -13394,12 +13369,12 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal) } } - if(dumpState->args[0] == DumpStateOrd::DihStartLcpImmediately){ + if(arg == DumpStateOrd::DihStartLcpImmediately){ c_lcpState.ctimer += (1 << c_lcpState.clcpDelay); return; } - if (dumpState->args[0] == DumpStateOrd::DihSetTimeBetweenGcp) + if (arg == DumpStateOrd::DihSetTimeBetweenGcp) { if (signal->getLength() == 1) { @@ -13414,6 +13389,53 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal) } ndbout_c("Setting time between gcp : %d", cgcpDelay); } + + if (arg == 7021 && signal->getLength() == 2) + { + TabRecordPtr tabPtr; + tabPtr.i = signal->theData[1]; + if (tabPtr.i >= ctabFileSize) + return; + + ptrCheckGuard(tabPtr, ctabFileSize, tabRecord); + + if(tabPtr.p->tabStatus != TabRecord::TS_ACTIVE) + return; + + infoEvent + ("Table %d: TabCopyStatus: %d TabUpdateStatus: %d TabLcpStatus: %d", + tabPtr.i, + tabPtr.p->tabCopyStatus, + tabPtr.p->tabUpdateState, + tabPtr.p->tabLcpStatus); + + FragmentstorePtr fragPtr; + for (Uint32 fid = 0; fid < tabPtr.p->totalfragments; fid++) { + jam(); + getFragstore(tabPtr.p, fid, fragPtr); + + char buf[100], buf2[100]; + BaseString::snprintf(buf, sizeof(buf), " Fragment %d: noLcpReplicas==%d ", + fid, fragPtr.p->noLcpReplicas); + + Uint32 num=0; + ReplicaRecordPtr replicaPtr; + replicaPtr.i = fragPtr.p->storedReplicas; + do { + ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord); + BaseString::snprintf(buf2, sizeof(buf2), "%s %d(on %d)=%d(%s)", + buf, num, + replicaPtr.p->procNode, + replicaPtr.p->lcpIdStarted, + replicaPtr.p->lcpOngoingFlag ? "Ongoing" : "Idle"); + BaseString::snprintf(buf, sizeof(buf), "%s", buf2); + + num++; + replicaPtr.i = replicaPtr.p->nextReplica; + } while (replicaPtr.i != RNIL); + infoEvent(buf); + } + } }//Dbdih::execDUMP_STATE_ORD() void From 19340f2242443ec54101d7fd518be47211ed0f15 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 22 Mar 2006 11:44:31 +0100 Subject: [PATCH 09/16] ndb - bug#18414 Fix timeout during ABORT when ZABORT_TIMEOUT_BREAK is outstanding ndb/src/kernel/blocks/ERROR_codes.txt: New error code ndb/src/kernel/blocks/dbdih/DbdihMain.cpp: remove dumping of LCP info during NF ndb/src/kernel/blocks/dbtc/DbtcMain.cpp: Fix timeout during ABORT when ZABORT_TIMEOUT_BREAK is outstanding ndb/test/ndbapi/testNodeRestart.cpp: Add testcase for bug18414 ndb/test/ndbapi/testTimeout.cpp: Fix error code checking ndb/test/run-test/daily-basic-tests.txt: Add testcase for bug18414 --- ndb/src/kernel/blocks/ERROR_codes.txt | 2 + ndb/src/kernel/blocks/dbdih/DbdihMain.cpp | 4 -- ndb/src/kernel/blocks/dbtc/DbtcMain.cpp | 52 +++++++++++++--- ndb/test/ndbapi/testNodeRestart.cpp | 73 +++++++++++++++++++++++ ndb/test/ndbapi/testTimeout.cpp | 7 ++- ndb/test/run-test/daily-basic-tests.txt | 4 ++ 6 files changed, 128 insertions(+), 14 deletions(-) diff --git a/ndb/src/kernel/blocks/ERROR_codes.txt b/ndb/src/kernel/blocks/ERROR_codes.txt index e5576450846..b4c5d1b1d7e 100644 --- a/ndb/src/kernel/blocks/ERROR_codes.txt +++ b/ndb/src/kernel/blocks/ERROR_codes.txt @@ -226,6 +226,8 @@ Delay execution of COMPLETECONF signal 2 seconds to generate time-out. 8045: (ABORTCONF only as part of take-over) Delay execution of ABORTCONF signal 2 seconds to generate time-out. +8050: Send ZABORT_TIMEOUT_BREAK delayed + ERROR CODES FOR TESTING TIME-OUT HANDLING IN DBTC ------------------------------------------------- diff --git a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp index a8633af2529..de35ce5c275 100644 --- a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +++ b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp @@ -5982,10 +5982,6 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId) signal->theData[0] = 7012; execDUMP_STATE_ORD(signal); - signal->theData[0] = 7015; - signal->theData[1] = 0; - execDUMP_STATE_ORD(signal); - c_lcpMasterTakeOverState.set(LMTOS_IDLE, __LINE__); checkLocalNodefailComplete(signal, failedNodePtr.i, NF_LCP_TAKE_OVER); diff --git a/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp b/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp index ff9b279592c..4ca13bf433b 100644 --- a/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp +++ b/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp @@ -6386,6 +6386,7 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck) return; } + bool found = false; OperationState tmp[16]; Uint32 TloopCount = 0; @@ -6393,7 +6394,31 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck) jam(); if (tcConnectptr.i == RNIL) { jam(); - if (Tcheck == 0) { + +#ifdef VM_TRACE + ndbout_c("found: %d Tcheck: %d apiConnectptr.p->counter: %d", + found, Tcheck, apiConnectptr.p->counter); +#endif + if (found || apiConnectptr.p->counter) + { + jam(); + /** + * We sent atleast one ABORT/ABORTED + * or ZABORT_TIMEOUT_BREAK is in job buffer + * wait for reception... + */ + return; + } + + if (Tcheck == 1) + { + jam(); + releaseAbortResources(signal); + return; + } + + if (Tcheck == 0) + { jam(); /*------------------------------------------------------------------ * All nodes had already reported ABORTED for all tcConnect records. @@ -6402,9 +6427,11 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck) *------------------------------------------------------------------*/ char buf[96]; buf[0] = 0; char buf2[96]; - BaseString::snprintf(buf, sizeof(buf), "TC %d: %d ops:", - __LINE__, apiConnectptr.i); - for(Uint32 i = 0; icounter); + for(Uint32 i = 0; itheData[0] = TcContinueB::ZABORT_TIMEOUT_BREAK; signal->theData[1] = tcConnectptr.i; signal->theData[2] = apiConnectptr.i; - sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB); + if (ERROR_INSERTED(8050)) + { + ndbout_c("sending ZABORT_TIMEOUT_BREAK delayed (%d %d)", + Tcheck, apiConnectptr.p->counter); + sendSignalWithDelay(cownref, GSN_CONTINUEB, signal, 2000, 3); + } + else + { + sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB); + } return; }//if ptrCheckGuard(tcConnectptr, ctcConnectFilesize, tcConnectRecord); @@ -6450,7 +6488,7 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck) jam(); if (tcConnectptr.p->tcNodedata[Ti] != 0) { TloopCount += 31; - Tcheck = 1; + found = true; hostptr.i = tcConnectptr.p->tcNodedata[Ti]; ptrCheckGuard(hostptr, chostFilesize, hostRecord); if (hostptr.p->hostStatus == HS_ALIVE) { @@ -7007,8 +7045,6 @@ void Dbtc::execTAKE_OVERTCCONF(Signal* signal) hostptr.i = tfailedNodeId; ptrCheckGuard(hostptr, chostFilesize, hostRecord); - ndbout_c("received execTAKE_OVERTCCONF(%d) from %x (%x)", - tfailedNodeId, signal->getSendersBlockRef(), reference()); if (signal->getSendersBlockRef() != reference()) { jam(); diff --git a/ndb/test/ndbapi/testNodeRestart.cpp b/ndb/test/ndbapi/testNodeRestart.cpp index eebd631af94..cc2998ff73a 100644 --- a/ndb/test/ndbapi/testNodeRestart.cpp +++ b/ndb/test/ndbapi/testNodeRestart.cpp @@ -581,6 +581,73 @@ runBug16772(NDBT_Context* ctx, NDBT_Step* step){ return ret ? NDBT_OK : NDBT_FAILED; } +int +runBug18414(NDBT_Context* ctx, NDBT_Step* step){ + + NdbRestarter restarter; + if (restarter.getNumDbNodes() < 2) + { + ctx->stopTest(); + return NDBT_OK; + } + + Ndb* pNdb = GETNDB(step); + HugoOperations hugoOps(*ctx->getTab()); + HugoTransactions hugoTrans(*ctx->getTab()); + int loop = 0; + do + { + if(hugoOps.startTransaction(pNdb) != 0) + goto err; + + if(hugoOps.pkUpdateRecord(pNdb, 0, 128, rand()) != 0) + goto err; + + if(hugoOps.execute_NoCommit(pNdb) != 0) + goto err; + + int node1 = hugoOps.getTransaction()->getConnectedNodeId(); + int node2 = restarter.getRandomNodeSameNodeGroup(node1, rand()); + + if (node1 == -1 || node2 == -1) + break; + + if (loop & 1) + { + if (restarter.insertErrorInNode(node1, 8050)) + goto err; + } + + if (restarter.insertErrorInNode(node2, 5003)) + goto err; + + int res= hugoOps.execute_Rollback(pNdb); + + if (restarter.waitNodesNoStart(&node2, 1) != 0) + goto err; + + if (restarter.insertErrorInAllNodes(0)) + goto err; + + if (restarter.startNodes(&node2, 1) != 0) + goto err; + + if (restarter.waitClusterStarted() != 0) + goto err; + + if (hugoTrans.scanUpdateRecords(pNdb, 128) != 0) + goto err; + + hugoOps.closeTransaction(pNdb); + + } while(++loop < 5); + + return NDBT_OK; + +err: + hugoOps.closeTransaction(pNdb); + return NDBT_FAILED; +} NDBT_TESTSUITE(testNodeRestart); TESTCASE("NoLoad", @@ -870,6 +937,12 @@ TESTCASE("Bug16772", "Test bug with restarting before NF handling is complete"){ STEP(runBug16772); } +TESTCASE("Bug18414", + "Test bug with NF during NR"){ + INITIALIZER(runLoadTable); + STEP(runBug18414); + FINALIZER(runClearTable); +} NDBT_TESTSUITE_END(testNodeRestart); int main(int argc, const char** argv){ diff --git a/ndb/test/ndbapi/testTimeout.cpp b/ndb/test/ndbapi/testTimeout.cpp index 25392698642..957fcd1d1e7 100644 --- a/ndb/test/ndbapi/testTimeout.cpp +++ b/ndb/test/ndbapi/testTimeout.cpp @@ -173,8 +173,11 @@ int runTimeoutTrans(NDBT_Context* ctx, NDBT_Step* step){ NdbSleep_MilliSleep(sleep); // Expect that transaction has timed-out - CHECK(hugoOps.execute_Commit(pNdb) == 237); - + int ret = hugoOps.execute_Commit(pNdb); + CHECK(ret != 0); + NdbError err = pNdb->getNdbError(ret); + CHECK(err.classification == NdbError::TimeoutExpired); + } while(false); hugoOps.closeTransaction(pNdb); diff --git a/ndb/test/run-test/daily-basic-tests.txt b/ndb/test/run-test/daily-basic-tests.txt index 0533d585a41..b11e4479a57 100644 --- a/ndb/test/run-test/daily-basic-tests.txt +++ b/ndb/test/run-test/daily-basic-tests.txt @@ -458,6 +458,10 @@ max-time: 500 cmd: testSystemRestart args: -n Bug18385 T1 +max-time: 500 +cmd: testNodeRestart +args: -n Bug18414 T1 + # OLD FLEX max-time: 500 cmd: flexBench From ad911e8575e84fb336143b5463711ba8dfc7690b Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 22 Mar 2006 12:11:51 +0100 Subject: [PATCH 10/16] ndb - minor update to ndb-autotest.sh and config files ndb/test/run-test/conf-daily-devel-ndbmaster.txt: Add SendBufferMemory to remove rare overruns ndb/test/run-test/conf-dl145a.txt: Add SendBufferMemory to remove rare overruns ndb/test/run-test/conf-ndbmaster.txt: Add SendBufferMemory to remove rare overruns ndb/test/run-test/conf-shark.txt: Add SendBufferMemory to remove rare overruns ndb/test/run-test/ndb-autotest.sh: Add support for conf per host --- ndb/test/run-test/conf-daily-devel-ndbmaster.txt | 3 +++ .../run-test/{conf-daily-basic-dl145a.txt => conf-dl145a.txt} | 3 +++ .../{conf-daily-basic-ndbmaster.txt => conf-ndbmaster.txt} | 3 +++ .../run-test/{conf-daily-basic-shark.txt => conf-shark.txt} | 3 +++ ndb/test/run-test/ndb-autotest.sh | 3 +++ 5 files changed, 15 insertions(+) rename ndb/test/run-test/{conf-daily-basic-dl145a.txt => conf-dl145a.txt} (91%) rename ndb/test/run-test/{conf-daily-basic-ndbmaster.txt => conf-ndbmaster.txt} (91%) rename ndb/test/run-test/{conf-daily-basic-shark.txt => conf-shark.txt} (91%) diff --git a/ndb/test/run-test/conf-daily-devel-ndbmaster.txt b/ndb/test/run-test/conf-daily-devel-ndbmaster.txt index 8b340e6a39d..51c171a6357 100644 --- a/ndb/test/run-test/conf-daily-devel-ndbmaster.txt +++ b/ndb/test/run-test/conf-daily-devel-ndbmaster.txt @@ -17,3 +17,6 @@ FileSystemPath: /space/autotest/run PortNumber: 16000 ArbitrationRank: 1 DataDir: . + +[TCP DEFAULT] +SendBufferMemory: 2M diff --git a/ndb/test/run-test/conf-daily-basic-dl145a.txt b/ndb/test/run-test/conf-dl145a.txt similarity index 91% rename from ndb/test/run-test/conf-daily-basic-dl145a.txt rename to ndb/test/run-test/conf-dl145a.txt index d8cf8d34d82..d0a240f09d1 100644 --- a/ndb/test/run-test/conf-daily-basic-dl145a.txt +++ b/ndb/test/run-test/conf-dl145a.txt @@ -17,3 +17,6 @@ FileSystemPath: /home/ndbdev/autotest/run PortNumber: 14000 ArbitrationRank: 1 DataDir: . + +[TCP DEFAULT] +SendBufferMemory: 2M diff --git a/ndb/test/run-test/conf-daily-basic-ndbmaster.txt b/ndb/test/run-test/conf-ndbmaster.txt similarity index 91% rename from ndb/test/run-test/conf-daily-basic-ndbmaster.txt rename to ndb/test/run-test/conf-ndbmaster.txt index bcd809593f3..89b41850ec0 100644 --- a/ndb/test/run-test/conf-daily-basic-ndbmaster.txt +++ b/ndb/test/run-test/conf-ndbmaster.txt @@ -17,3 +17,6 @@ FileSystemPath: /space/autotest/run PortNumber: 14000 ArbitrationRank: 1 DataDir: . + +[TCP DEFAULT] +SendBufferMemory: 2M diff --git a/ndb/test/run-test/conf-daily-basic-shark.txt b/ndb/test/run-test/conf-shark.txt similarity index 91% rename from ndb/test/run-test/conf-daily-basic-shark.txt rename to ndb/test/run-test/conf-shark.txt index 6d1f8b64f44..d66d0280d8a 100644 --- a/ndb/test/run-test/conf-daily-basic-shark.txt +++ b/ndb/test/run-test/conf-shark.txt @@ -17,3 +17,6 @@ FileSystemPath: /space/autotest/run PortNumber: 14000 ArbitrationRank: 1 DataDir: . + +[TCP DEFAULT] +SendBufferMemory: 2M diff --git a/ndb/test/run-test/ndb-autotest.sh b/ndb/test/run-test/ndb-autotest.sh index 4228d2354d3..459f0cd6233 100755 --- a/ndb/test/run-test/ndb-autotest.sh +++ b/ndb/test/run-test/ndb-autotest.sh @@ -299,9 +299,12 @@ choose_conf(){ elif [ -f $test_dir/conf-$1.txt ] then echo "$test_dir/conf-$1.txt" + elif [ -f $test_dir/conf-$HOST.txt ] + echo "$test_dir/conf-$HOST.txt" else echo "Unable to find conf file looked for" 1>&2 echo "$test_dir/conf-$1-$HOST.txt and" 1>&2 + echo "$test_dir/conf-$HOST.txt" 1>&2 echo "$test_dir/conf-$1.txt" 1>&2 exit fi From 4fb98ee6b87a63374381788e2c70bc17e61bd455 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 22 Mar 2006 12:18:07 +0100 Subject: [PATCH 11/16] ndb - some more ndb-autotest updates (previously uncommitted...but in use) ndb/test/run-test/ndb-autotest.sh: More autotest updates --- ndb/test/run-test/ndb-autotest.sh | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ndb/test/run-test/ndb-autotest.sh b/ndb/test/run-test/ndb-autotest.sh index 459f0cd6233..544897a2aa2 100755 --- a/ndb/test/run-test/ndb-autotest.sh +++ b/ndb/test/run-test/ndb-autotest.sh @@ -13,7 +13,7 @@ save_args=$* VERSION="ndb-autotest.sh version 1.04" DATE=`date '+%Y-%m-%d'` -HOST=`hostname` +HOST=`hostname -s` export DATE HOST set -e @@ -35,6 +35,7 @@ report=yes clone=5.0-ndb RUN="daily-basic daily-devel" conf=autotest.conf +LOCK=$HOME/.autotest-lock ############################ # Read command line entries# @@ -66,7 +67,7 @@ done if [ -f $conf ] then - . ./$conf + . $conf else echo "Can't find config file: $conf" exit @@ -105,7 +106,6 @@ fi # Setup the clone source location # #################################### -LOCK=$HOME/.autotest-lock src_clone=$src_clone_base-$clone ####################################### @@ -389,7 +389,8 @@ do awk '{for(i=1;i<='$count';i++)print $i;}'` echo $run_hosts >> /tmp/filter_hosts.$$ - choose $conf $run_hosts > d.tmp + choose $conf $run_hosts > d.tmp.$$ + sed -e s,CHOOSE_dir,"$install_dir",g < d.tmp.$$ > d.tmp $mkconfig d.tmp fi From e74b313c115b6eec1e96a33e16d117f33c788ce8 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 22 Mar 2006 13:38:03 +0100 Subject: [PATCH 12/16] ndb - autotest Update makefile for removed files ndb/test/run-test/Makefile.am: Update makefile for removed files --- ndb/test/run-test/Makefile.am | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/ndb/test/run-test/Makefile.am b/ndb/test/run-test/Makefile.am index cf08542ae97..8aced6e91b3 100644 --- a/ndb/test/run-test/Makefile.am +++ b/ndb/test/run-test/Makefile.am @@ -7,11 +7,10 @@ include $(top_srcdir)/ndb/config/type_mgmapiclient.mk.am test_PROGRAMS = atrt test_DATA=daily-basic-tests.txt daily-devel-tests.txt \ - conf-daily-basic-ndbmaster.txt \ - conf-daily-basic-shark.txt \ - conf-daily-devel-ndbmaster.txt \ - conf-daily-sql-ndbmaster.txt \ - conf-daily-basic-dl145a.txt + conf-ndbmaster.txt \ + conf-shark.txt \ + conf-dl145a.txt + test_SCRIPTS=atrt-analyze-result.sh atrt-gather-result.sh atrt-setup.sh \ atrt-clear-result.sh make-config.sh make-index.sh make-html-reports.sh From 2279f08af421311fb7b22474942dc7fe2cfd3bc6 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 22 Mar 2006 15:06:44 +0100 Subject: [PATCH 13/16] ndb - Add per partition info (optionally to ndb_desc) ndb/tools/desc.cpp: Add per partition info (optionally to ndb_desc) --- ndb/tools/desc.cpp | 77 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/ndb/tools/desc.cpp b/ndb/tools/desc.cpp index aac47c9042c..e5371b9b458 100644 --- a/ndb/tools/desc.cpp +++ b/ndb/tools/desc.cpp @@ -23,6 +23,7 @@ NDB_STD_OPTS_VARS; static const char* _dbname = "TEST_DB"; static int _unqualified = 0; +static int _partinfo = 0; static struct my_option my_long_options[] = { NDB_STD_OPTS("ndb_desc"), @@ -32,6 +33,9 @@ static struct my_option my_long_options[] = { "unqualified", 'u', "Use unqualified table names", (gptr*) &_unqualified, (gptr*) &_unqualified, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 }, + { "extra-partition-info", 'p', "Print more info per partition", + (gptr*) &_partinfo, (gptr*) &_partinfo, 0, + GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} }; static void usage() @@ -52,6 +56,8 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), "d:t:O,/tmp/ndb_desc.trace"); } +static void print_part_info(Ndb* pNdb, NDBT_Table* pTab); + int main(int argc, char** argv){ NDB_INIT(argv[0]); const char *load_default_groups[]= { "mysql_cluster",0 }; @@ -106,7 +112,11 @@ int main(int argc, char** argv){ ndbout << (*pIdx) << endl; } + ndbout << endl; + + if (_partinfo) + print_part_info(pMyNdb, pTab); } else ndbout << argv[i] << ": " << dict->getNdbError() << endl; @@ -115,3 +125,70 @@ int main(int argc, char** argv){ delete pMyNdb; return NDBT_ProgramExit(NDBT_OK); } + +struct InfoInfo +{ + const char * m_title; + NdbRecAttr* m_rec_attr; + const NdbDictionary::Column* m_column; +}; + + +static +void print_part_info(Ndb* pNdb, NDBT_Table* pTab) +{ + InfoInfo g_part_info[] = { + { "Partition", 0, NdbDictionary::Column::FRAGMENT }, + { "Row count", 0, NdbDictionary::Column::ROW_COUNT }, + { "Commit count", 0, NdbDictionary::Column::COMMIT_COUNT }, + { 0, 0, 0 } + }; + + ndbout << "-- Per partition info -- " << endl; + + NdbConnection* pTrans = pNdb->startTransaction(); + if (pTrans == 0) + return; + + do + { + NdbScanOperation* pOp= pTrans->getNdbScanOperation(pTab->getName()); + if (pOp == NULL) + break; + + NdbResultSet* rs= pOp->readTuples(NdbOperation::LM_CommittedRead); + if (rs == 0) + break; + + if (pOp->interpret_exit_last_row() != 0) + break; + + Uint32 i = 0; + for(i = 0; g_part_info[i].m_title != 0; i++) + { + if ((g_part_info[i].m_rec_attr = pOp->getValue(g_part_info[i].m_column)) == 0) + break; + } + + if (g_part_info[i].m_title != 0) + break; + + if (pTrans->execute(NoCommit) != 0) + break; + + for (i = 0; g_part_info[i].m_title != 0; i++) + ndbout << g_part_info[i].m_title << "\t"; + ndbout << endl; + + while(rs->nextResult() == 0) + { + for(i = 0; g_part_info[i].m_title != 0; i++) + { + ndbout << *g_part_info[i].m_rec_attr << "\t"; + } + ndbout << endl; + } + } while(0); + + pTrans->close(); +} From fde02a804367149ccd24718044ca3de82cc30de5 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 23 Mar 2006 11:53:54 +0100 Subject: [PATCH 14/16] ndb - minor fixes in test programs ndb/src/kernel/blocks/ERROR_codes.txt: Fix conflicting error codes ndb/src/kernel/blocks/dblqh/DblqhMain.cpp: Fix conflicting error codes ndb/test/ndbapi/testNodeRestart.cpp: Fix test program --- ndb/src/kernel/blocks/ERROR_codes.txt | 2 +- ndb/src/kernel/blocks/dblqh/DblqhMain.cpp | 2 +- ndb/test/ndbapi/testNodeRestart.cpp | 8 ++++++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/ndb/src/kernel/blocks/ERROR_codes.txt b/ndb/src/kernel/blocks/ERROR_codes.txt index b4c5d1b1d7e..4887b6a7ea5 100644 --- a/ndb/src/kernel/blocks/ERROR_codes.txt +++ b/ndb/src/kernel/blocks/ERROR_codes.txt @@ -316,7 +316,7 @@ LQH: 5026 Crash when receiving COPY_ACTIVEREQ 5027 Crash when receiving STAT_RECREQ -5042 Crash starting node, when scan is finished on primary replica +5043 Crash starting node, when scan is finished on primary replica Test Crashes in handling take over ---------------------------------- diff --git a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp index 0aeeaccd55e..3540fc79dff 100644 --- a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp +++ b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp @@ -9221,7 +9221,7 @@ void Dblqh::nextScanConfCopyLab(Signal* signal) /*---------------------------------------------------------------------------*/ scanptr.p->scanCompletedStatus = ZTRUE; scanptr.p->scanState = ScanRecord::WAIT_LQHKEY_COPY; - if (ERROR_INSERTED(5042)) + if (ERROR_INSERTED(5043)) { CLEAR_ERROR_INSERT_VALUE; tcConnectptr.p->copyCountWords = ~0; diff --git a/ndb/test/ndbapi/testNodeRestart.cpp b/ndb/test/ndbapi/testNodeRestart.cpp index cc2998ff73a..5a7510be9bd 100644 --- a/ndb/test/ndbapi/testNodeRestart.cpp +++ b/ndb/test/ndbapi/testNodeRestart.cpp @@ -439,6 +439,14 @@ int runBug15587(NDBT_Context* ctx, NDBT_Step* step){ if (restarter.startNodes(&nodeId, 1)) return NDBT_FAILED; + restarter.waitNodesStartPhase(&nodeId, 1, 3); + + if (restarter.waitNodesNoStart(&nodeId, 1)) + return NDBT_FAILED; + + if (restarter.startNodes(&nodeId, 1)) + return NDBT_FAILED; + if (restarter.waitNodesStarted(&nodeId, 1)) return NDBT_FAILED; From deb4d310909b8589368adf561a2663007dde5cbe Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 23 Mar 2006 15:33:40 +0100 Subject: [PATCH 15/16] ndb - remove bug#18385 from autotest as it only works on 2 node clusters ndb/test/run-test/daily-basic-tests.txt: remove bug#18385 as it only works on 2 node clusters --- ndb/test/run-test/daily-basic-tests.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ndb/test/run-test/daily-basic-tests.txt b/ndb/test/run-test/daily-basic-tests.txt index b11e4479a57..ce5462d11c9 100644 --- a/ndb/test/run-test/daily-basic-tests.txt +++ b/ndb/test/run-test/daily-basic-tests.txt @@ -454,10 +454,10 @@ max-time: 500 cmd: testNodeRestart args: -n Bug16772 T1 -max-time: 500 -cmd: testSystemRestart -args: -n Bug18385 T1 - +#max-time: 500 +#cmd: testSystemRestart +#args: -n Bug18385 T1 +# max-time: 500 cmd: testNodeRestart args: -n Bug18414 T1 From 2a00c51673dd11230daa9d3843b8076a15e1f874 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 27 Mar 2006 10:18:48 +0200 Subject: [PATCH 16/16] ndb - autotest Change semantic on DumpStateOrd::CmvmiSetRestartOnErrorInsert() Called wo/ args it resets to value in configuration (previously it set to 1 if called wo/ args) ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp: Change semantic on DumpStateOrd::CmvmiSetRestartOnErrorInsert Called wo/ args it resets to value in configuration (previously it set to 1 if called wo/ args) ndb/test/ndbapi/testNodeRestart.cpp: Change semantic on DumpStateOrd::CmvmiSetRestartOnErrorInsert Called wo/ args it resets to value in configuration (previously it set to 1 if called wo/ args) ndb/test/src/NdbBackup.cpp: Change semantic on DumpStateOrd::CmvmiSetRestartOnErrorInsert Called wo/ args it resets to value in configuration (previously it set to 1 if called wo/ args) ndb/test/src/NdbRestarts.cpp: Change semantic on DumpStateOrd::CmvmiSetRestartOnErrorInsert Called wo/ args it resets to value in configuration (previously it set to 1 if called wo/ args) --- ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp | 17 +++++++++++++++-- ndb/test/ndbapi/testNodeRestart.cpp | 12 ++++++++++++ ndb/test/src/NdbBackup.cpp | 4 ++-- ndb/test/src/NdbRestarts.cpp | 16 ++++++++-------- 4 files changed, 37 insertions(+), 12 deletions(-) diff --git a/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp b/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp index 7659ee1145d..04761cb67a8 100644 --- a/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp +++ b/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp @@ -1049,11 +1049,24 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal) g_sectionSegmentPool.getNoOfFree()); } - if (dumpState->args[0] == DumpStateOrd::CmvmiSetRestartOnErrorInsert){ + if (dumpState->args[0] == DumpStateOrd::CmvmiSetRestartOnErrorInsert) + { if(signal->getLength() == 1) - theConfig.setRestartOnErrorInsert((int)NRT_NoStart_Restart); + { + Uint32 val = (Uint32)NRT_NoStart_Restart; + const ndb_mgm_configuration_iterator * p = + theConfig.getOwnConfigIterator(); + ndbrequire(p != 0); + + if(!ndb_mgm_get_int_parameter(p, CFG_DB_STOP_ON_ERROR_INSERT, &val)) + { + theConfig.setRestartOnErrorInsert(val); + } + } else + { theConfig.setRestartOnErrorInsert(signal->theData[1]); + } } if (dumpState->args[0] == DumpStateOrd::CmvmiTestLongSigWithDelay) { diff --git a/ndb/test/ndbapi/testNodeRestart.cpp b/ndb/test/ndbapi/testNodeRestart.cpp index 5a7510be9bd..365d6e3ed6e 100644 --- a/ndb/test/ndbapi/testNodeRestart.cpp +++ b/ndb/test/ndbapi/testNodeRestart.cpp @@ -433,6 +433,11 @@ int runBug15587(NDBT_Context* ctx, NDBT_Step* step){ if (restarter.waitNodesNoStart(&nodeId, 1)) return NDBT_FAILED; + int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; + + if (restarter.dumpStateOneNode(nodeId, val2, 2)) + return NDBT_FAILED; + if (restarter.dumpStateOneNode(nodeId, dump, 2)) return NDBT_FAILED; @@ -444,6 +449,9 @@ int runBug15587(NDBT_Context* ctx, NDBT_Step* step){ if (restarter.waitNodesNoStart(&nodeId, 1)) return NDBT_FAILED; + if (restarter.dumpStateOneNode(nodeId, val2, 1)) + return NDBT_FAILED; + if (restarter.startNodes(&nodeId, 1)) return NDBT_FAILED; @@ -626,6 +634,10 @@ runBug18414(NDBT_Context* ctx, NDBT_Step* step){ goto err; } + int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; + if (restarter.dumpStateOneNode(node2, val2, 2)) + goto err; + if (restarter.insertErrorInNode(node2, 5003)) goto err; diff --git a/ndb/test/src/NdbBackup.cpp b/ndb/test/src/NdbBackup.cpp index 9f65fe6b3bc..a9c71120d80 100644 --- a/ndb/test/src/NdbBackup.cpp +++ b/ndb/test/src/NdbBackup.cpp @@ -292,8 +292,8 @@ NdbBackup::NF(NdbRestarter& _restarter, int *NFDuringBackup_codes, const int sz, << masterNodeId << endl; - int val = DumpStateOrd::CmvmiSetRestartOnErrorInsert; - CHECK(_restarter.dumpStateOneNode(nodeId, &val, 1) == 0, + int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; + CHECK(_restarter.dumpStateOneNode(nodeId, val, 2) == 0, "failed to set RestartOnErrorInsert"); CHECK(_restarter.insertErrorInNode(nodeId, error) == 0, "failed to set error insert"); diff --git a/ndb/test/src/NdbRestarts.cpp b/ndb/test/src/NdbRestarts.cpp index c0f31af84ce..eea4af437c4 100644 --- a/ndb/test/src/NdbRestarts.cpp +++ b/ndb/test/src/NdbRestarts.cpp @@ -641,8 +641,8 @@ int restartNFDuringNR(NdbRestarter& _restarter, CHECK(_restarter.waitNodesNoStart(&nodeId, 1) == 0, "waitNodesNoStart failed"); - int val = DumpStateOrd::CmvmiSetRestartOnErrorInsert; - CHECK(_restarter.dumpStateOneNode(nodeId, &val, 1) == 0, + int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 } ; + CHECK(_restarter.dumpStateOneNode(nodeId, val, 2) == 0, "failed to set RestartOnErrorInsert"); CHECK(_restarter.insertErrorInNode(nodeId, error) == 0, @@ -698,8 +698,8 @@ int restartNFDuringNR(NdbRestarter& _restarter, CHECK(_restarter.waitNodesNoStart(&nodeId, 1) == 0, "waitNodesNoStart failed"); - int val = DumpStateOrd::CmvmiSetRestartOnErrorInsert; - CHECK(_restarter.dumpStateOneNode(crashNodeId, &val, 2) == 0, + int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; + CHECK(_restarter.dumpStateOneNode(crashNodeId, val, 2) == 0, "failed to set RestartOnErrorInsert"); CHECK(_restarter.insertErrorInNode(crashNodeId, error) == 0, @@ -771,8 +771,8 @@ int restartNodeDuringLCP(NdbRestarter& _restarter, << " error code = " << error << endl; { - int val = DumpStateOrd::CmvmiSetRestartOnErrorInsert; - CHECK(_restarter.dumpStateAllNodes(&val, 1) == 0, + int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; + CHECK(_restarter.dumpStateAllNodes(val, 2) == 0, "failed to set RestartOnErrorInsert"); } @@ -812,8 +812,8 @@ int restartNodeDuringLCP(NdbRestarter& _restarter, ndbout << _restart->m_name << " restarting non-master node = " << nodeId << " error code = " << error << endl; - int val = DumpStateOrd::CmvmiSetRestartOnErrorInsert; - CHECK(_restarter.dumpStateAllNodes(&val, 1) == 0, + int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; + CHECK(_restarter.dumpStateAllNodes(val, 2) == 0, "failed to set RestartOnErrorInsert"); CHECK(_restarter.insertErrorInNode(nodeId, error) == 0,