From 1a6c83f04d12db2e50a4e6d82bd72f97b36f66e5 Mon Sep 17 00:00:00 2001 From: "jonas@perch.ndb.mysql.com" <> Date: Thu, 1 Jun 2006 08:24:05 +0200 Subject: [PATCH] ndb - bug#20185 Dont be too aggressive in Dbtc::nodeFailCheckTransaction let it timeout by 1, so that it does not assert that it has waited too long old impl. set timeotu value to 0, making timeout = (ctcTimer - 0) which could be quite big. --- ndb/src/kernel/blocks/ERROR_codes.txt | 3 ++ ndb/src/kernel/blocks/dbdih/DbdihMain.cpp | 24 ++++++++++ ndb/src/kernel/blocks/dbtc/DbtcMain.cpp | 8 ++-- ndb/test/ndbapi/testNodeRestart.cpp | 56 +++++++++++++++++++++++ ndb/test/run-test/daily-basic-tests.txt | 4 ++ 5 files changed, 92 insertions(+), 3 deletions(-) diff --git a/ndb/src/kernel/blocks/ERROR_codes.txt b/ndb/src/kernel/blocks/ERROR_codes.txt index 4887b6a7ea5..20f03e7ea69 100644 --- a/ndb/src/kernel/blocks/ERROR_codes.txt +++ b/ndb/src/kernel/blocks/ERROR_codes.txt @@ -63,6 +63,9 @@ Delay GCP_SAVEREQ by 10 secs 7165: Delay INCL_NODE_REQ in starting node yeilding error in GCP_PREPARE +7030: Delay in GCP_PREPARE until node has completed a node failure +7031: Delay in GCP_PREPARE and die 3s later + ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING: ----------------------------------------------------------------- diff --git a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp index 3bbf1c76644..65c864bd853 100644 --- a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +++ b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp @@ -5417,6 +5417,12 @@ Dbdih::checkLocalNodefailComplete(Signal* signal, Uint32 failedNodeId, return; } + if (ERROR_INSERTED(7030)) + { + ndbout_c("Reenable GCP_PREPARE"); + CLEAR_ERROR_INSERT_VALUE; + } + NFCompleteRep * const nf = (NFCompleteRep *)&signal->theData[0]; nf->blockNo = DBDIH; nf->nodeId = cownNodeId; @@ -7459,6 +7465,16 @@ void Dbdih::execGCP_PREPARE(Signal* signal) { jamEntry(); CRASH_INSERTION(7005); + + if (ERROR_INSERTED(7030)) + { + cgckptflag = true; + ndbout_c("Delayed GCP_PREPARE 5s"); + sendSignalWithDelay(reference(), GSN_GCP_PREPARE, signal, 5000, + signal->getLength()); + return; + } + Uint32 masterNodeId = signal->theData[0]; Uint32 gci = signal->theData[1]; BlockReference retRef = calcDihBlockRef(masterNodeId); @@ -7471,6 +7487,14 @@ void Dbdih::execGCP_PREPARE(Signal* signal) cgcpParticipantState = GCP_PARTICIPANT_PREPARE_RECEIVED; cnewgcp = gci; + if (ERROR_INSERTED(7031)) + { + ndbout_c("Crashing delayed in GCP_PREPARE 3s"); + signal->theData[0] = 9999; + sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 3000, 1); + return; + } + signal->theData[0] = cownNodeId; signal->theData[1] = gci; sendSignal(retRef, GSN_GCP_PREPARECONF, signal, 2, JBA); diff --git a/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp b/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp index 7982fe82e45..2bd61296554 100644 --- a/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp +++ b/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp @@ -7002,18 +7002,20 @@ Dbtc::nodeFailCheckTransactions(Signal* signal, { jam(); Ptr transPtr; + Uint32 TtcTimer = ctcTimer; + Uint32 TapplTimeout = c_appl_timeout_value; for (transPtr.i = transPtrI; transPtr.i < capiConnectFilesize; transPtr.i++) { ptrCheckGuard(transPtr, capiConnectFilesize, apiConnectRecord); if (transPtr.p->m_transaction_nodes.get(failedNodeId)) { jam(); + // Force timeout regardless of state - Uint32 save = c_appl_timeout_value; c_appl_timeout_value = 1; - setApiConTimer(transPtr.i, 0, __LINE__); + setApiConTimer(transPtr.i, TtcTimer - 2, __LINE__); timeOutFoundLab(signal, transPtr.i, ZNODEFAIL_BEFORE_COMMIT); - c_appl_timeout_value = save; + c_appl_timeout_value = TapplTimeout; } // Send CONTINUEB to continue later diff --git a/ndb/test/ndbapi/testNodeRestart.cpp b/ndb/test/ndbapi/testNodeRestart.cpp index 7017aac0ade..68f101442c5 100644 --- a/ndb/test/ndbapi/testNodeRestart.cpp +++ b/ndb/test/ndbapi/testNodeRestart.cpp @@ -868,6 +868,56 @@ runBug18612SR(NDBT_Context* ctx, NDBT_Step* step){ return NDBT_OK; } +int runBug20185(NDBT_Context* ctx, NDBT_Step* step){ + int result = NDBT_OK; + int loops = ctx->getNumLoops(); + int records = ctx->getNumRecords(); + NdbRestarter restarter; + HugoOperations hugoOps(*ctx->getTab()); + Ndb* pNdb = GETNDB(step); + + int dump[] = { 7090, 20 } ; + if (restarter.dumpStateAllNodes(dump, 2)) + return NDBT_FAILED; + + NdbSleep_MilliSleep(3000); + + if(hugoOps.startTransaction(pNdb) != 0) + return NDBT_FAILED; + + if(hugoOps.pkUpdateRecord(pNdb, 1, 1) != 0) + return NDBT_FAILED; + + if (hugoOps.execute_NoCommit(pNdb) != 0) + return NDBT_FAILED; + + int nodeId; + const int node = hugoOps.getTransaction()->getConnectedNodeId(); + do { + nodeId = restarter.getDbNodeId(rand() % restarter.getNumDbNodes()); + } while (nodeId == node); + + if (restarter.insertErrorInAllNodes(7030)) + return NDBT_FAILED; + + if (restarter.insertErrorInNode(nodeId, 7031)) + return NDBT_FAILED; + + NdbSleep_MilliSleep(500); + + if (hugoOps.execute_Commit(pNdb) == 0) + return NDBT_FAILED; + + NdbSleep_MilliSleep(3000); + + restarter.waitClusterStarted(); + + if (restarter.dumpStateAllNodes(dump, 1)) + return NDBT_FAILED; + + return NDBT_OK; +} + NDBT_TESTSUITE(testNodeRestart); TESTCASE("NoLoad", @@ -1175,6 +1225,12 @@ TESTCASE("Bug18612SR", STEP(runBug18612SR); FINALIZER(runClearTable); } +TESTCASE("Bug20185", + ""){ + INITIALIZER(runLoadTable); + STEP(runBug20185); + FINALIZER(runClearTable); +} NDBT_TESTSUITE_END(testNodeRestart); int main(int argc, const char** argv){ diff --git a/ndb/test/run-test/daily-basic-tests.txt b/ndb/test/run-test/daily-basic-tests.txt index 508bf4c3d1e..e70dcafb249 100644 --- a/ndb/test/run-test/daily-basic-tests.txt +++ b/ndb/test/run-test/daily-basic-tests.txt @@ -470,6 +470,10 @@ max-time: 1000 cmd: testNodeRestart args: -n Bug18612SR T1 +max-time: 1000 +cmd: testNodeRestart +args: -n Bug20185 T1 + # OLD FLEX max-time: 500 cmd: flexBench