From 6ac6b08c41beb47cb38aa2d19a86157997f5cda0 Mon Sep 17 00:00:00 2001
From: unknown <jonas@perch.ndb.mysql.com>
Date: Fri, 17 Mar 2006 10:09:35 +0100
Subject: [PATCH 01/16] ndb - bug#18298   8 repeated nr with table wo/ logging
 cause crash   Dont create crashed replica for temporary tables

ndb/src/kernel/blocks/dbdih/Dbdih.hpp:
  Dont create crashed replica for temporary tables
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
  Dont create crashed replica for temporary tables
---
 ndb/src/kernel/blocks/dbdih/Dbdih.hpp     |  3 ++-
 ndb/src/kernel/blocks/dbdih/DbdihMain.cpp | 16 +++++++++++++---
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/ndb/src/kernel/blocks/dbdih/Dbdih.hpp b/ndb/src/kernel/blocks/dbdih/Dbdih.hpp
index 0c107e35603..f74c0f36c4d 100644
--- a/ndb/src/kernel/blocks/dbdih/Dbdih.hpp
+++ b/ndb/src/kernel/blocks/dbdih/Dbdih.hpp
@@ -1038,7 +1038,8 @@ private:
   void prepareReplicas(FragmentstorePtr regFragptr);
   void removeNodeFromStored(Uint32 nodeId,
                             FragmentstorePtr regFragptr,
-                            ReplicaRecordPtr replicaPtr);
+                            ReplicaRecordPtr replicaPtr,
+			    bool temporary);
   void removeOldStoredReplica(FragmentstorePtr regFragptr,
                               ReplicaRecordPtr replicaPtr);
   void removeStoredReplica(FragmentstorePtr regFragptr,
diff --git a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
index 776e59ea495..fab428aadef 100644
--- a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
+++ b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
@@ -5212,6 +5212,7 @@ void Dbdih::removeNodeFromTable(Signal* signal,
 
   //const Uint32 lcpId = SYSFILE->latestLCP_ID;
   const bool lcpOngoingFlag = (tabPtr.p->tabLcpStatus== TabRecord::TLS_ACTIVE);
+  const bool temporary = !tabPtr.p->storedTable;
   
   FragmentstorePtr fragPtr;
   for(Uint32 fragNo = 0; fragNo < tabPtr.p->totalfragments; fragNo++){
@@ -5232,7 +5233,7 @@ void Dbdih::removeNodeFromTable(Signal* signal,
         jam();
 	found = true;
 	noOfRemovedReplicas++;
-	removeNodeFromStored(nodeId, fragPtr, replicaPtr);
+	removeNodeFromStored(nodeId, fragPtr, replicaPtr, temporary);
 	if(replicaPtr.p->lcpOngoingFlag){
 	  jam();
 	  /**
@@ -12051,9 +12052,18 @@ void Dbdih::removeDeadNode(NodeRecordPtr removeNodePtr)
 /*---------------------------------------------------------------*/
 void Dbdih::removeNodeFromStored(Uint32 nodeId,
                                  FragmentstorePtr fragPtr,
-                                 ReplicaRecordPtr replicatePtr)
+                                 ReplicaRecordPtr replicatePtr,
+				 bool temporary)
 {
-  newCrashedReplica(nodeId, replicatePtr);
+  if (!temporary)
+  {
+    jam();
+    newCrashedReplica(nodeId, replicatePtr);
+  }
+  else
+  {
+    jam();
+  }
   removeStoredReplica(fragPtr, replicatePtr);
   linkOldStoredReplica(fragPtr, replicatePtr);
   ndbrequire(fragPtr.p->storedReplicas != RNIL);

From 3bfaf33392901b90d420e37450164d7a0db8e3ed Mon Sep 17 00:00:00 2001
From: unknown <jonas@perch.ndb.mysql.com>
Date: Fri, 17 Mar 2006 10:55:02 +0100
Subject: [PATCH 02/16] ndb - bug#16772   dont't allow node to join cluster
 until all nodes has completed failure handling

ndb/src/kernel/blocks/qmgr/QmgrMain.cpp:
  When getting CM_ADD for node that I haven't completed failure handling for do _not_ just override.
  But instead set state...and send CM_ACK_ADD on execCONNECT_REP (much...later)
ndb/test/ndbapi/testNodeRestart.cpp:
  testcase for bug#16772
ndb/test/run-test/daily-basic-tests.txt:
  Run test in basic suite
---
 ndb/src/kernel/blocks/qmgr/QmgrMain.cpp | 103 ++++++++++++++++++++----
 ndb/test/ndbapi/testNodeRestart.cpp     |  50 ++++++++++++
 ndb/test/run-test/daily-basic-tests.txt |   4 +
 3 files changed, 142 insertions(+), 15 deletions(-)

diff --git a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
index 6095895e7c2..70084e6b171 100644
--- a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
+++ b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
@@ -257,6 +257,7 @@ void Qmgr::setArbitTimeout(UintR aArbitTimeout)
 
 void Qmgr::execCONNECT_REP(Signal* signal)
 {
+  jamEntry();
   const Uint32 nodeId = signal->theData[0];
   c_connectedNodes.set(nodeId);
   NodeRecPtr nodePtr;
@@ -264,9 +265,13 @@ void Qmgr::execCONNECT_REP(Signal* signal)
   ptrCheckGuard(nodePtr, MAX_NODES, nodeRec);
   switch(nodePtr.p->phase){
   case ZSTARTING:
-    jam();
-    break;
   case ZRUNNING:
+    jam();
+    if(!c_start.m_nodes.isWaitingFor(nodeId)){
+      jam();
+      return;
+    }
+    break;
   case ZPREPARE_FAIL:
   case ZFAIL_CLOSING:
     jam();
@@ -277,21 +282,28 @@ void Qmgr::execCONNECT_REP(Signal* signal)
   case ZAPI_INACTIVE:
     return;
   }
-
-  if(!c_start.m_nodes.isWaitingFor(nodeId)){
-    jam();
-    return;
-  }
-
+  
   switch(c_start.m_gsn){
   case GSN_CM_REGREQ:
     jam();
     sendCmRegReq(signal, nodeId);
     return;
-  case GSN_CM_NODEINFOREQ:{
+  case GSN_CM_NODEINFOREQ:
     jam();
     sendCmNodeInfoReq(signal, nodeId, nodePtr.p);
     return;
+  case GSN_CM_ADD:{
+    jam();
+
+    ndbrequire(getOwnNodeId() != cpresident);
+    c_start.m_nodes.clearWaitingFor(nodeId);
+    c_start.m_gsn = RNIL;
+    
+    NodeRecPtr addNodePtr;
+    addNodePtr.i = nodeId;
+    ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
+    cmAddPrepare(signal, addNodePtr, nodePtr.p);
+    return;
   }
   default:
     return;
@@ -924,15 +936,27 @@ Qmgr::cmAddPrepare(Signal* signal, NodeRecPtr nodePtr, const NodeRec * self){
     return;
   case ZFAIL_CLOSING:
     jam();
-#ifdef VM_TRACE
-    ndbout_c("Enabling communication to CM_ADD node state=%d", 
-	     nodePtr.p->phase);
-#endif
+    
+#if 1
+    warningEvent("Recieved request to incorperate node %u, "
+		 "while error handling has not yet completed",
+		 nodePtr.i);
+    
+    ndbrequire(getOwnNodeId() != cpresident);
+    ndbrequire(signal->header.theVerId_signalNumber == GSN_CM_ADD);
+    c_start.m_nodes.clearWaitingFor();
+    c_start.m_nodes.setWaitingFor(nodePtr.i);
+    c_start.m_gsn = GSN_CM_ADD;
+#else
+    warningEvent("Enabling communication to CM_ADD node %u state=%d", 
+		 nodePtr.i,
+		 nodePtr.p->phase);
     nodePtr.p->phase = ZSTARTING;
     nodePtr.p->failState = NORMAL;
     signal->theData[0] = 0;
     signal->theData[1] = nodePtr.i;
     sendSignal(CMVMI_REF, GSN_OPEN_COMREQ, signal, 2, JBA);
+#endif
     return;
   case ZSTARTING:
     break;
@@ -1766,11 +1790,27 @@ void Qmgr::execNDB_FAILCONF(Signal* signal)
 
   jamEntry();
   failedNodePtr.i = signal->theData[0];  
+
+  if (ERROR_INSERTED(930))
+  {
+    CLEAR_ERROR_INSERT_VALUE;
+    infoEvent("Discarding NDB_FAILCONF for %u", failedNodePtr.i);
+    return;
+  }
+  
   ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
   if (failedNodePtr.p->failState == WAITING_FOR_NDB_FAILCONF){
     failedNodePtr.p->failState = NORMAL;
   } else {
     jam();
+
+    char buf[100];
+    BaseString::snprintf(buf, 100, 
+			 "Received NDB_FAILCONF for node %u with state: %d %d",
+			 failedNodePtr.i,
+			 failedNodePtr.p->phase,
+			 failedNodePtr.p->failState);
+    progError(__LINE__, 0, buf);
     systemErrorLab(signal, __LINE__);
   }//if
   if (cpresident == getOwnNodeId()) {
@@ -2077,10 +2117,42 @@ void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode,
   ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
   if (failedNodePtr.i == getOwnNodeId()) {
     jam();
-    systemErrorLab(signal, __LINE__);
+
+    const char * msg = 0;
+    switch(aFailCause){
+    case FailRep::ZOWN_FAILURE: 
+      msg = "Own failure"; 
+      break;
+    case FailRep::ZOTHER_NODE_WHEN_WE_START: 
+    case FailRep::ZOTHERNODE_FAILED_DURING_START:
+      msg = "Other node died during start"; 
+      break;
+    case FailRep::ZIN_PREP_FAIL_REQ:
+      msg = "Prep fail";
+      break;
+    case FailRep::ZSTART_IN_REGREQ:
+      msg = "Start timeout";
+      break;
+    case FailRep::ZHEARTBEAT_FAILURE:
+      msg = "Hearbeat failure";
+      break;
+    case FailRep::ZLINK_FAILURE:
+      msg = "Connection failure";
+      break;
+    }
+    
+    char buf[100];
+    BaseString::snprintf(buf, 100, 
+			 "We(%u) have been declared dead by %u reason: %s(%u)",
+			 getOwnNodeId(),
+			 refToNode(signal->getSendersBlockRef()),
+			 aFailCause,
+			 msg ? msg : "<Unknown>");
+
+    progError(__LINE__, 0, buf);
     return;
   }//if
-
+  
   myNodePtr.i = getOwnNodeId();
   ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
   if (myNodePtr.p->phase != ZRUNNING) {
@@ -2791,6 +2863,7 @@ void Qmgr::failReport(Signal* signal,
         cfailureNr = cprepareFailureNr;
         ctoFailureNr = 0;
         ctoStatus = Q_ACTIVE;
+	c_start.reset(); // Don't take over nodes being started
         if (cnoCommitFailedNodes > 0) {
           jam();
 	  /**-----------------------------------------------------------------
diff --git a/ndb/test/ndbapi/testNodeRestart.cpp b/ndb/test/ndbapi/testNodeRestart.cpp
index a741e6233d9..eebd631af94 100644
--- a/ndb/test/ndbapi/testNodeRestart.cpp
+++ b/ndb/test/ndbapi/testNodeRestart.cpp
@@ -535,6 +535,52 @@ err:
   return NDBT_FAILED;
 }
 
+int 
+runBug16772(NDBT_Context* ctx, NDBT_Step* step){
+
+  NdbRestarter restarter;
+  if (restarter.getNumDbNodes() < 2)
+  {
+    ctx->stopTest();
+    return NDBT_OK;
+  }
+
+  int aliveNodeId = restarter.getRandomNotMasterNodeId(rand());
+  int deadNodeId = aliveNodeId;
+  while (deadNodeId == aliveNodeId)
+    deadNodeId = restarter.getDbNodeId(rand() % restarter.getNumDbNodes());
+  
+  if (restarter.insertErrorInNode(aliveNodeId, 930))
+    return NDBT_FAILED;
+
+  if (restarter.restartOneDbNode(deadNodeId,
+				 /** initial */ false, 
+				 /** nostart */ true,
+				 /** abort   */ true))
+    return NDBT_FAILED;
+  
+  if (restarter.waitNodesNoStart(&deadNodeId, 1))
+    return NDBT_FAILED;
+
+  if (restarter.startNodes(&deadNodeId, 1))
+    return NDBT_FAILED;
+
+  // It should now be hanging since we throw away NDB_FAILCONF
+  int ret = restarter.waitNodesStartPhase(&deadNodeId, 1, 3, 10);
+  // So this should fail...i.e it should not reach startphase 3
+
+  // Now send a NDB_FAILCONF for deadNo
+  int dump[] = { 7020, 323, 252, 0 };
+  dump[3] = deadNodeId;
+  if (restarter.dumpStateOneNode(aliveNodeId, dump, 4))
+    return NDBT_FAILED;
+  
+  if (restarter.waitNodesStarted(&deadNodeId, 1))
+    return NDBT_FAILED;
+
+  return ret ? NDBT_OK : NDBT_FAILED;
+}
+
 
 NDBT_TESTSUITE(testNodeRestart);
 TESTCASE("NoLoad", 
@@ -820,6 +866,10 @@ TESTCASE("Bug15685",
   STEP(runBug15685);
   FINALIZER(runClearTable);
 }
+TESTCASE("Bug16772",
+	 "Test bug with restarting before NF handling is complete"){
+  STEP(runBug16772);
+}
 NDBT_TESTSUITE_END(testNodeRestart);
 
 int main(int argc, const char** argv){
diff --git a/ndb/test/run-test/daily-basic-tests.txt b/ndb/test/run-test/daily-basic-tests.txt
index 6378b4a06d3..169daae6d7f 100644
--- a/ndb/test/run-test/daily-basic-tests.txt
+++ b/ndb/test/run-test/daily-basic-tests.txt
@@ -446,6 +446,10 @@ max-time: 500
 cmd: testNodeRestart
 args: -n Bug15685 T1
 
+max-time: 500
+cmd: testNodeRestart
+args: -n Bug16772 T1
+
 # OLD FLEX
 max-time: 500
 cmd: flexBench

From 37230a2a8867a2cc6066dac51ddc775688cb1cba Mon Sep 17 00:00:00 2001
From: unknown <jonas@perch.ndb.mysql.com>
Date: Mon, 20 Mar 2006 11:29:58 +0100
Subject: [PATCH 03/16] ndb - wl2610   Activly abort transactions (that's
 affected) during NF   This removes a lot of bugs that can occur otherwise is
 using     high value for TransactionDeadLockTimout

ndb/include/kernel/signaldata/TcContinueB.hpp:
  New continueb for active transaction abort on nf
ndb/src/kernel/blocks/dbtc/Dbtc.hpp:
  Add bitmask of participating nodes to transaction record
  Add bitmask of node fail steps, so that NF_CompleteRep is not sent until all steps has completed
ndb/src/kernel/blocks/dbtc/DbtcMain.cpp:
  Active transaction baortion
---
 ndb/include/kernel/signaldata/TcContinueB.hpp |   3 +-
 ndb/src/kernel/blocks/dbtc/Dbtc.hpp           |  18 +-
 ndb/src/kernel/blocks/dbtc/DbtcMain.cpp       | 192 +++++++++++++-----
 3 files changed, 164 insertions(+), 49 deletions(-)

diff --git a/ndb/include/kernel/signaldata/TcContinueB.hpp b/ndb/include/kernel/signaldata/TcContinueB.hpp
index 85213791b2a..b87b982e49b 100644
--- a/ndb/include/kernel/signaldata/TcContinueB.hpp
+++ b/ndb/include/kernel/signaldata/TcContinueB.hpp
@@ -44,7 +44,8 @@ private:
     CHECK_WAIT_DROP_TAB_FAILED_LQH         = 16,
     TRIGGER_PENDING                        = 17,
     
-    DelayTCKEYCONF = 18
+    DelayTCKEYCONF = 18,
+    ZNF_CHECK_TRANSACTIONS = 19
   };
 };
 
diff --git a/ndb/src/kernel/blocks/dbtc/Dbtc.hpp b/ndb/src/kernel/blocks/dbtc/Dbtc.hpp
index 61afef30b43..23c5a7d08eb 100644
--- a/ndb/src/kernel/blocks/dbtc/Dbtc.hpp
+++ b/ndb/src/kernel/blocks/dbtc/Dbtc.hpp
@@ -636,6 +636,7 @@ public:
     ConnectionState apiConnectstate;
     UintR transid[2];
     UintR firstTcConnect;
+    NdbNodeBitmask m_transaction_nodes; 
     
     //---------------------------------------------------
     // Second 16 byte cache line. Hot variables.
@@ -941,6 +942,17 @@ public:
     UintR noOfWordsTCINDXCONF;
     UintR packedWordsTCINDXCONF[30];
     BlockReference hostLqhBlockRef;
+
+    enum NodeFailBits
+    {
+      NF_TAKEOVER          = 0x1,
+      NF_CHECK_SCAN        = 0x2,
+      NF_CHECK_TRANSACTION = 0x4,
+      NF_CHECK_DROP_TAB    = 0x8,
+      NF_NODE_FAIL_BITS    = 0xF // All bits...
+    };
+    Uint32 m_nf_bits;
+    NdbNodeBitmask m_lqh_trans_conf;
   }; /* p2c: size = 128 bytes */
   
   typedef Ptr<HostRecord> HostRecordPtr;
@@ -1578,7 +1590,7 @@ private:
   void wrongSchemaVersionErrorLab(Signal* signal);
   void noFreeConnectionErrorLab(Signal* signal);
   void tckeyreq050Lab(Signal* signal);
-  void timeOutFoundLab(Signal* signal, UintR anAdd);
+  void timeOutFoundLab(Signal* signal, UintR anAdd, Uint32 errCode);
   void completeTransAtTakeOverLab(Signal* signal, UintR TtakeOverInd);
   void completeTransAtTakeOverDoLast(Signal* signal, UintR TtakeOverInd);
   void completeTransAtTakeOverDoOne(Signal* signal, UintR TtakeOverInd);
@@ -1600,6 +1612,9 @@ private:
   void checkScanFragList(Signal*, Uint32 failedNodeId, ScanRecord * scanP, 
 			 LocalDLList<ScanFragRec>::Head&);
 
+  void nodeFailCheckTransactions(Signal*,Uint32 transPtrI,Uint32 failedNodeId);
+  void checkNodeFailComplete(Signal* signal, Uint32 failedNodeId, Uint32 bit);
+  
   // Initialisation
   void initData();
   void initRecords();
@@ -1626,6 +1641,7 @@ private:
   HostRecord *hostRecord;
   HostRecordPtr hostptr;
   UintR chostFilesize;
+  NdbNodeBitmask c_alive_nodes;
 
   GcpRecord *gcpRecord;
   GcpRecordPtr gcpPtr;
diff --git a/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp b/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
index d9d1f01b213..4750a8c388a 100644
--- a/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
+++ b/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
@@ -262,6 +262,10 @@ void Dbtc::execCONTINUEB(Signal* signal)
     jam();
     checkScanActiveInFailedLqh(signal, Tdata0, Tdata1);
     return;
+  case TcContinueB::ZNF_CHECK_TRANSACTIONS:
+    jam();
+    nodeFailCheckTransactions(signal, Tdata0, Tdata1);
+    return;
   case TcContinueB::CHECK_WAIT_DROP_TAB_FAILED_LQH:
     jam();
     checkWaitDropTabFailedLqh(signal, Tdata0, Tdata1);
@@ -301,6 +305,7 @@ void Dbtc::execINCL_NODEREQ(Signal* signal)
   hostptr.p->hostStatus = HS_ALIVE;
   hostptr.p->takeOverStatus = TOS_IDLE;
   signal->theData[0] = cownref;
+  c_alive_nodes.set(hostptr.i);
   sendSignal(tblockref, GSN_INCL_NODECONF, signal, 1, JBB);
 }
 
@@ -487,6 +492,7 @@ Dbtc::checkWaitDropTabFailedLqh(Signal* signal, Uint32 nodeId, Uint32 tableId)
      * Finished
      */
     jam();
+    checkNodeFailComplete(signal, nodeId, HostRecord::NF_CHECK_DROP_TAB);
     return;
   }
   
@@ -859,6 +865,7 @@ void Dbtc::execREAD_NODESCONF(Signal* signal)
         jam();
         con_lineNodes++;
         hostptr.p->hostStatus = HS_ALIVE;
+	c_alive_nodes.set(i);
       }//if
     }//if
   }//for
@@ -2314,6 +2321,7 @@ void Dbtc::initApiConnectRec(Signal* signal,
   regApiPtr->commitAckMarker = RNIL;
   regApiPtr->buddyPtr = RNIL;
   regApiPtr->currSavePointId = 0;
+  regApiPtr->m_transaction_nodes.clear();
   // Trigger data
   releaseFiredTriggerData(&regApiPtr->theFiredTriggers),
   // Index data
@@ -2921,6 +2929,10 @@ void Dbtc::tckeyreq050Lab(Signal* signal)
   signal->theData[0] = TdihConnectptr;
   signal->theData[1] = Ttableref;
   signal->theData[2] = TdistrHashValue;
+  signal->theData[3] = 0;
+  signal->theData[4] = 0;
+  signal->theData[5] = 0;
+  signal->theData[6] = 0;
 
   /*-------------------------------------------------------------*/
   /* FOR EFFICIENCY REASONS WE AVOID THE SIGNAL SENDING HERE AND */
@@ -3098,6 +3110,7 @@ void Dbtc::sendlqhkeyreq(Signal* signal,
   TcConnectRecord * const regTcPtr = tcConnectptr.p;
   ApiConnectRecord * const regApiPtr = apiConnectptr.p;
   CacheRecord * const regCachePtr = cachePtr.p;
+  UintR sig0, sig1, sig2, sig3, sig4, sig5, sig6;
 #ifdef ERROR_INSERT
   if (ERROR_INSERTED(8002)) {
     systemErrorLab(signal);
@@ -3135,6 +3148,9 @@ void Dbtc::sendlqhkeyreq(Signal* signal,
   LqhKeyReq::setScanTakeOverFlag(tslrAttrLen, regCachePtr->scanTakeOverInd);
 
   Tdata10 = 0;
+  sig0 = regCachePtr->opSimple;
+  sig1 = regTcPtr->operation;
+  bool simpleRead = (sig1 == ZREAD && sig0 == ZTRUE);
   LqhKeyReq::setKeyLen(Tdata10, regCachePtr->keylen);
   LqhKeyReq::setLastReplicaNo(Tdata10, regTcPtr->lastReplicaNo);
   LqhKeyReq::setLockType(Tdata10, regCachePtr->opLock);
@@ -3144,8 +3160,8 @@ void Dbtc::sendlqhkeyreq(Signal* signal,
   LqhKeyReq::setApplicationAddressFlag(Tdata10, 1);
   LqhKeyReq::setDirtyFlag(Tdata10, regTcPtr->dirtyOp);
   LqhKeyReq::setInterpretedFlag(Tdata10, regCachePtr->opExec);
-  LqhKeyReq::setSimpleFlag(Tdata10, regCachePtr->opSimple);
-  LqhKeyReq::setOperation(Tdata10, regTcPtr->operation);
+  LqhKeyReq::setSimpleFlag(Tdata10, sig0);
+  LqhKeyReq::setOperation(Tdata10, sig1);
   /* ----------------------------------------------------------------------- 
    * Sequential Number of first LQH = 0, bit 22-23                           
    * IF ATTRIBUTE INFORMATION IS SENT IN TCKEYREQ,
@@ -3158,18 +3174,16 @@ void Dbtc::sendlqhkeyreq(Signal* signal,
    * ----------------------------------------------------------------------- */
   //LqhKeyReq::setAPIVersion(Tdata10, regCachePtr->apiVersionNo);
   Uint32 commitAckMarker = regTcPtr->commitAckMarker;
+  const Uint32 noOfLqhs = regTcPtr->noOfNodes;
   if(commitAckMarker != RNIL){
     jam();
-    
     LqhKeyReq::setMarkerFlag(Tdata10, 1);
 
-    CommitAckMarker * tmp;
-    tmp = m_commitAckMarkerHash.getPtr(commitAckMarker);
+    CommitAckMarker * tmp = m_commitAckMarkerHash.getPtr(commitAckMarker);
     
     /**
      * Populate LQH array
      */
-    const Uint32 noOfLqhs = regTcPtr->noOfNodes;
     tmp->noOfLqhs = noOfLqhs;
     for(Uint32 i = 0; i<noOfLqhs; i++){
       tmp->lqhNodeId[i] = regTcPtr->tcNodedata[i];
@@ -3180,7 +3194,6 @@ void Dbtc::sendlqhkeyreq(Signal* signal,
   /* NO READ LENGTH SENT FROM TC. SEQUENTIAL NUMBER IS 1 AND IT    */
   /* IS SENT TO A PRIMARY NODE.                                    */
   /* ************************************************************> */
-  UintR sig0, sig1, sig2, sig3, sig4, sig5, sig6;
 
   LqhKeyReq * const lqhKeyReq = (LqhKeyReq *)signal->getDataPtrSend();
 
@@ -3204,6 +3217,14 @@ void Dbtc::sendlqhkeyreq(Signal* signal,
   sig5 = regTcPtr->clientData;
   sig6 = regCachePtr->scanInfo;
 
+  if (! simpleRead)
+  {
+    regApiPtr->m_transaction_nodes.set(regTcPtr->tcNodedata[0]);
+    regApiPtr->m_transaction_nodes.set(regTcPtr->tcNodedata[1]);
+    regApiPtr->m_transaction_nodes.set(regTcPtr->tcNodedata[2]);
+    regApiPtr->m_transaction_nodes.set(regTcPtr->tcNodedata[3]);  
+  }
+  
   lqhKeyReq->tableSchemaVersion = sig0;
   lqhKeyReq->fragmentData = sig1;
   lqhKeyReq->transId1 = sig2;
@@ -4587,6 +4608,7 @@ void Dbtc::copyApi(Signal* signal)
   UintR TgcpPointer = regTmpApiPtr->gcpPointer;
   UintR TgcpFilesize = cgcpFilesize;
   UintR TcommitAckMarker = regTmpApiPtr->commitAckMarker;
+  NdbNodeBitmask Tnodes = regTmpApiPtr->m_transaction_nodes;
   GcpRecord *localGcpRecord = gcpRecord;
 
   regApiPtr->ndbapiBlockref = regTmpApiPtr->ndbapiBlockref;
@@ -4597,6 +4619,7 @@ void Dbtc::copyApi(Signal* signal)
   regApiPtr->transid[1] = Ttransid2;
   regApiPtr->lqhkeyconfrec = Tlqhkeyconfrec;
   regApiPtr->commitAckMarker = TcommitAckMarker;
+  regApiPtr->m_transaction_nodes = Tnodes;
 
   gcpPtr.i = TgcpPointer;
   ptrCheckGuard(gcpPtr, TgcpFilesize, localGcpRecord);
@@ -4607,6 +4630,7 @@ void Dbtc::copyApi(Signal* signal)
   regTmpApiPtr->commitAckMarker = RNIL;
   regTmpApiPtr->firstTcConnect = RNIL;
   regTmpApiPtr->lastTcConnect = RNIL;
+  regTmpApiPtr->m_transaction_nodes.clear();
   releaseAllSeizedIndexOperations(regTmpApiPtr);
 }//Dbtc::copyApi()
 
@@ -4865,7 +4889,7 @@ void Dbtc::releaseTransResources(Signal* signal)
   TcConnectRecordPtr localTcConnectptr;
   UintR TtcConnectFilesize = ctcConnectFilesize;
   TcConnectRecord *localTcConnectRecord = tcConnectRecord;
-
+  apiConnectptr.p->m_transaction_nodes.clear();
   localTcConnectptr.i = apiConnectptr.p->firstTcConnect;
   do {
     jam();
@@ -5269,7 +5293,8 @@ void Dbtc::execTC_COMMITREQ(Signal* signal)
       break;
     case CS_ABORTING:
       jam();
-      errorCode = ZABORTINPROGRESS;
+      errorCode = regApiPtr->returncode ? 
+	regApiPtr->returncode : ZABORTINPROGRESS;
       break;
     case CS_START_SCAN:
       jam();
@@ -5808,9 +5833,9 @@ void Dbtc::abort010Lab(Signal* signal)
 
   if (transP->firstTcConnect == RNIL) {
     jam();
-    /*-----------------------------------------------------------------------*/
-    /*    WE HAVE NO PARTICIPANTS IN THE TRANSACTION.                        */
-    /*-----------------------------------------------------------------------*/
+    /*--------------------------------------------------------------------*/
+    /* WE HAVE NO PARTICIPANTS IN THE TRANSACTION.                        */
+    /*--------------------------------------------------------------------*/
     releaseAbortResources(signal);
     return;
   }//if
@@ -6087,10 +6112,12 @@ void Dbtc::timeOutLoopStartLab(Signal* signal, Uint32 api_con_ptr)
     if (api_timer != 0) {
       time_out_value= time_out_param + (api_con_ptr & mask_value);
       time_passed= tc_timer - api_timer;
-      if (time_passed > time_out_value) {
+      if (time_passed > time_out_value) 
+      {
         jam();
-        timeOutFoundLab(signal, api_con_ptr);
-        return;
+        timeOutFoundLab(signal, api_con_ptr, ZTIME_OUT_ERROR);
+	api_con_ptr++;
+	break;
       }
     }
   }
@@ -6110,10 +6137,8 @@ void Dbtc::timeOutLoopStartLab(Signal* signal, Uint32 api_con_ptr)
   return;
 }//Dbtc::timeOutLoopStartLab()
 
-void Dbtc::timeOutFoundLab(Signal* signal, Uint32 TapiConPtr) 
+void Dbtc::timeOutFoundLab(Signal* signal, Uint32 TapiConPtr, Uint32 errCode) 
 {
-  sendContinueTimeOutControl(signal, TapiConPtr + 1);
-  
   apiConnectptr.i = TapiConPtr;
   ptrCheckGuard(apiConnectptr, capiConnectFilesize, apiConnectRecord);
   /*------------------------------------------------------------------*/
@@ -6126,7 +6151,8 @@ void Dbtc::timeOutFoundLab(Signal* signal, Uint32 TapiConPtr)
 	<< "Time-out in state = " << apiConnectptr.p->apiConnectstate
 	<< " apiConnectptr.i = " << apiConnectptr.i 
 	<< " - exec: " << apiConnectptr.p->m_exec_flag
-	<< " - place: " << c_apiConTimer_line[apiConnectptr.i]);
+	<< " - place: " << c_apiConTimer_line[apiConnectptr.i]
+	<< " code: " << errCode);
   switch (apiConnectptr.p->apiConnectstate) {
   case CS_STARTED:
     if(apiConnectptr.p->lqhkeyreqrec == apiConnectptr.p->lqhkeyconfrec){
@@ -6143,7 +6169,7 @@ void Dbtc::timeOutFoundLab(Signal* signal, Uint32 TapiConPtr)
       }//if
     }
     apiConnectptr.p->returnsignal = RS_TCROLLBACKREP;      
-    apiConnectptr.p->returncode = ZTIME_OUT_ERROR;
+    apiConnectptr.p->returncode = errCode;
     abort010Lab(signal);
     return;
   case CS_RECEIVING:
@@ -6156,7 +6182,7 @@ void Dbtc::timeOutFoundLab(Signal* signal, Uint32 TapiConPtr)
     /*       START ABORTING THE TRANSACTION. ALSO START CHECKING THE    */
     /*       REMAINING TRANSACTIONS.                                    */
     /*------------------------------------------------------------------*/
-    terrorCode = ZTIME_OUT_ERROR;
+    terrorCode = errCode;
     abortErrorLab(signal);
     return;
   case CS_COMMITTING:
@@ -6820,6 +6846,8 @@ void Dbtc::execNODE_FAILREP(Signal* signal)
     /*       FAILED.                                              */
     /*------------------------------------------------------------*/
     hostptr.p->hostStatus = HS_DEAD;
+    hostptr.p->m_nf_bits = HostRecord::NF_NODE_FAIL_BITS;
+    c_alive_nodes.clear(hostptr.i);
 
     if (hostptr.p->takeOverStatus == TOS_COMPLETED) {
       jam();
@@ -6832,14 +6860,7 @@ void Dbtc::execNODE_FAILREP(Signal* signal)
       /*       REMAINING WILL BE RELEASED WHEN THE TRANSACTION THAT */
       /*       USED THEM IS COMPLETED.                              */
       /*------------------------------------------------------------*/
-      {
-	NFCompleteRep * const nfRep = (NFCompleteRep *)&signal->theData[0];
-	nfRep->blockNo      = DBTC;
-	nfRep->nodeId       = cownNodeid;
-	nfRep->failedNodeId = hostptr.i;
-      }
-      sendSignal(cdihblockref, GSN_NF_COMPLETEREP, signal, 
-		 NFCompleteRep::SignalLength, JBB);
+      hostptr.p->m_nf_bits &= ~HostRecord::NF_TAKEOVER;
     } else {
       ndbrequire(hostptr.p->takeOverStatus == TOS_IDLE);
       hostptr.p->takeOverStatus = TOS_NODE_FAILED;
@@ -6892,16 +6913,9 @@ void Dbtc::execNODE_FAILREP(Signal* signal)
 	  /*       MASTER IT MIGHT START A NEW TAKE OVER EVEN AFTER THE */
 	  /*       CRASHED NODE HAVE ALREADY RECOVERED.                 */
 	  /*------------------------------------------------------------*/
-	  for(tmpHostptr.i = 1; tmpHostptr.i < MAX_NDB_NODES;tmpHostptr.i++) {
-	    jam();
-	    ptrAss(tmpHostptr, hostRecord);
-	    if (tmpHostptr.p->hostStatus == HS_ALIVE) {
-	      jam();
-	      tblockref = calcTcBlockRef(tmpHostptr.i);
-	      signal->theData[0] = hostptr.i;
-	      sendSignal(tblockref, GSN_TAKE_OVERTCCONF, signal, 1, JBB);
-	    }//if
-	  }//for
+	  NodeReceiverGroup rg(DBTC, c_alive_nodes);
+	  signal->theData[0] = hostptr.i;
+	  sendSignal(rg, GSN_TAKE_OVERTCCONF, signal, 1, JBB);
 	}//if
       }//if
     }//for
@@ -6939,10 +6953,30 @@ void Dbtc::execNODE_FAILREP(Signal* signal)
     /*------------------------------------------------------------*/
     checkScanActiveInFailedLqh(signal, 0, hostptr.i);
     checkWaitDropTabFailedLqh(signal, hostptr.i, 0); // nodeid, tableid
+    nodeFailCheckTransactions(signal, 0, hostptr.i);
   }//for
 
 }//Dbtc::execNODE_FAILREP()
 
+void
+Dbtc::checkNodeFailComplete(Signal* signal, 
+			    Uint32 failedNodeId,
+			    Uint32 bit)
+{
+  hostptr.i = failedNodeId;
+  ptrCheckGuard(hostptr, chostFilesize, hostRecord);
+  hostptr.p->m_nf_bits &= ~bit;
+  if (hostptr.p->m_nf_bits == 0)
+  {
+    NFCompleteRep * const nfRep = (NFCompleteRep *)&signal->theData[0];
+    nfRep->blockNo      = DBTC;
+    nfRep->nodeId       = cownNodeid;
+    nfRep->failedNodeId = hostptr.i;
+    sendSignal(cdihblockref, GSN_NF_COMPLETEREP, signal, 
+	       NFCompleteRep::SignalLength, JBB);
+  }
+}
+
 void Dbtc::checkScanActiveInFailedLqh(Signal* signal, 
 				      Uint32 scanPtrI, 
 				      Uint32 failedNodeId){
@@ -6984,8 +7018,44 @@ void Dbtc::checkScanActiveInFailedLqh(Signal* signal,
     sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB);
     return;
   }//for
+
+  checkNodeFailComplete(signal, failedNodeId, HostRecord::NF_CHECK_SCAN);
 }
 
+void
+Dbtc::nodeFailCheckTransactions(Signal* signal, 
+				Uint32 transPtrI, 
+				Uint32 failedNodeId)
+{
+  jam();
+  Ptr<ApiConnectRecord> transPtr;
+  for (transPtr.i = transPtrI; transPtr.i < capiConnectFilesize; transPtr.i++)
+  {
+    ptrCheckGuard(transPtr, capiConnectFilesize, apiConnectRecord); 
+    if (transPtr.p->m_transaction_nodes.get(failedNodeId))
+    {
+      jam();
+      // Force timeout regardless of state      
+      Uint32 save = c_appl_timeout_value;
+      c_appl_timeout_value = 1;
+      setApiConTimer(transPtr.i, 0, __LINE__);
+      timeOutFoundLab(signal, transPtr.i, ZNODEFAIL_BEFORE_COMMIT);
+      c_appl_timeout_value = save;
+    }
+    
+    // Send CONTINUEB to continue later
+    signal->theData[0] = TcContinueB::ZNF_CHECK_TRANSACTIONS;
+    signal->theData[1] = transPtr.i + 1; // Check next
+    signal->theData[2] = failedNodeId;
+    sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB);
+    return;
+  }
+
+  checkNodeFailComplete(signal, failedNodeId, 
+			HostRecord::NF_CHECK_TRANSACTION);
+}
+
+
 void
 Dbtc::checkScanFragList(Signal* signal,
 			Uint32 failedNodeId,
@@ -7025,14 +7095,7 @@ void Dbtc::execTAKE_OVERTCCONF(Signal* signal)
     /*       USED THEM IS COMPLETED.                              */
     /*------------------------------------------------------------*/
     hostptr.p->takeOverStatus = TOS_COMPLETED;
-    {
-      NFCompleteRep * const nfRep = (NFCompleteRep *)&signal->theData[0];
-      nfRep->blockNo      = DBTC;
-      nfRep->nodeId       = cownNodeid;
-      nfRep->failedNodeId = hostptr.i;
-    }
-    sendSignal(cdihblockref, GSN_NF_COMPLETEREP, signal, 
-               NFCompleteRep::SignalLength, JBB);
+    checkNodeFailComplete(signal, hostptr.i, HostRecord::NF_TAKEOVER);
     break;
   case TOS_COMPLETED:
     jam();
@@ -7979,6 +8042,7 @@ void Dbtc::initApiConnectFail(Signal* signal)
   apiConnectptr.p->ndbapiBlockref = 0;
   apiConnectptr.p->ndbapiConnect = 0;
   apiConnectptr.p->buddyPtr = RNIL;
+  apiConnectptr.p->m_transaction_nodes.clear();
   setApiConTimer(apiConnectptr.i, 0, __LINE__);
   switch(ttransStatus){
   case LqhTransConf::Committed:
@@ -9756,6 +9820,7 @@ void Dbtc::initApiConnect(Signal* signal)
     apiConnectptr.p->executingIndexOp = RNIL;
     apiConnectptr.p->buddyPtr = RNIL;
     apiConnectptr.p->currSavePointId = 0;
+    apiConnectptr.p->m_transaction_nodes.clear();
   }//for
   apiConnectptr.i = tiacTmp - 1;
   ptrCheckGuard(apiConnectptr, capiConnectFilesize, apiConnectRecord);
@@ -9783,6 +9848,7 @@ void Dbtc::initApiConnect(Signal* signal)
       apiConnectptr.p->executingIndexOp = RNIL;
       apiConnectptr.p->buddyPtr = RNIL;
       apiConnectptr.p->currSavePointId = 0;
+      apiConnectptr.p->m_transaction_nodes.clear();
     }//for
   apiConnectptr.i = (2 * tiacTmp) - 1;
   ptrCheckGuard(apiConnectptr, capiConnectFilesize, apiConnectRecord);
@@ -9810,6 +9876,7 @@ void Dbtc::initApiConnect(Signal* signal)
     apiConnectptr.p->executingIndexOp = RNIL;
     apiConnectptr.p->buddyPtr = RNIL;
     apiConnectptr.p->currSavePointId = 0;
+    apiConnectptr.p->m_transaction_nodes.clear();
   }//for
   apiConnectptr.i = (3 * tiacTmp) - 1;
   ptrCheckGuard(apiConnectptr, capiConnectFilesize, apiConnectRecord);
@@ -9877,6 +9944,7 @@ void Dbtc::inithost(Signal* signal)
     hostptr.p->noOfPackedWordsLqh = 0;
     hostptr.p->hostLqhBlockRef = calcLqhBlockRef(hostptr.i);
   }//for
+  c_alive_nodes.clear();
 }//Dbtc::inithost()
 
 void Dbtc::initialiseRecordsLab(Signal* signal, UintR Tdata0, 
@@ -10126,6 +10194,7 @@ void Dbtc::releaseAbortResources(Signal* signal)
   }//while
   apiConnectptr.p->firstTcConnect = RNIL;
   apiConnectptr.p->lastTcConnect = RNIL;
+  apiConnectptr.p->m_transaction_nodes.clear();
 
   // MASV let state be CS_ABORTING until all 
   // signals in the "air" have been received. Reset to CS_CONNECTED
@@ -10199,6 +10268,7 @@ void Dbtc::releaseApiCon(Signal* signal, UintR TapiConnectPtr)
   cfirstfreeApiConnect = TlocalApiConnectptr.i;
   setApiConTimer(TlocalApiConnectptr.i, 0, __LINE__);
   TlocalApiConnectptr.p->apiConnectstate = CS_DISCONNECTED;
+  ndbassert(TlocalApiConnectptr.p->m_transaction_nodes.isclear());
   ndbassert(TlocalApiConnectptr.p->apiScanRec == RNIL);
   TlocalApiConnectptr.p->ndbapiBlockref = 0;
 }//Dbtc::releaseApiCon()
@@ -10734,6 +10804,34 @@ Dbtc::execDUMP_STATE_ORD(Signal* signal)
 	      c_theIndexOperationPool.getSize(),
 	      c_theIndexOperationPool.getNoOfFree());
   }
+
+  if (dumpState->args[0] == 2514)
+  {
+    if (signal->getLength() == 2)
+    {
+      dumpState->args[0] = DumpStateOrd::TcDumpOneApiConnectRec;
+      execDUMP_STATE_ORD(signal);
+    }
+
+    NodeReceiverGroup rg(CMVMI, c_alive_nodes);
+    dumpState->args[0] = 15;
+    sendSignal(rg, GSN_DUMP_STATE_ORD, signal, 1, JBB);
+
+    signal->theData[0] = 2515;
+    sendSignalWithDelay(cownref, GSN_DUMP_STATE_ORD, signal, 1000, 1);    
+    return;
+  }
+
+  if (dumpState->args[0] == 2515)
+  {
+    NdbNodeBitmask mask = c_alive_nodes;
+    mask.clear(getOwnNodeId());
+    NodeReceiverGroup rg(NDBCNTR, mask);
+    
+    sendSignal(rg, GSN_SYSTEM_ERROR, signal, 1, JBB);
+    sendSignalWithDelay(cownref, GSN_SYSTEM_ERROR, signal, 300, 1);    
+    return;
+  }
 }//Dbtc::execDUMP_STATE_ORD()
 
 void Dbtc::execSET_VAR_REQ(Signal* signal)

From 51a093f18762d299899c7c9e5cb0a2a639631720 Mon Sep 17 00:00:00 2001
From: unknown <jonas@perch.ndb.mysql.com>
Date: Mon, 20 Mar 2006 14:49:46 +0100
Subject: [PATCH 04/16] ndb - bug#18352   Use variable waitfor_response_timeout
 (depending on TransactionDeadLockTimeout)   When getting 4012, set NeedAbort
 and ReleaseOnClose

ndb/src/ndbapi/NdbConnection.cpp:
  Use variable for WAITFOR_RESPONSE_TIMEOUT
ndb/src/ndbapi/Ndbif.cpp:
  Use variable timeout for waitfor,
    when receiving 4012, set NeedAbort and ReleaseOnClose
ndb/src/ndbapi/TransporterFacade.cpp:
  Init wait_for_response_timoue as max TRANSACTION_DEADLOCK_TIMEOUT
ndb/src/ndbapi/TransporterFacade.hpp:
  Init wait_for_response_timoue as max TRANSACTION_DEADLOCK_TIMEOUT
ndb/test/ndbapi/testTimeout.cpp:
  Add testcase for 4012
ndb/test/run-test/daily-basic-tests.txt:
  Add testcase for 4012
---
 ndb/src/ndbapi/NdbConnection.cpp        |   4 +-
 ndb/src/ndbapi/Ndbif.cpp                |  12 +--
 ndb/src/ndbapi/TransporterFacade.cpp    |  14 ++++
 ndb/src/ndbapi/TransporterFacade.hpp    |   1 +
 ndb/test/ndbapi/testTimeout.cpp         | 101 ++++++++++++++++++++++++
 ndb/test/run-test/daily-basic-tests.txt |   4 +
 6 files changed, 129 insertions(+), 7 deletions(-)

diff --git a/ndb/src/ndbapi/NdbConnection.cpp b/ndb/src/ndbapi/NdbConnection.cpp
index c9e26f8ccaf..9cd7d6ed42e 100644
--- a/ndb/src/ndbapi/NdbConnection.cpp
+++ b/ndb/src/ndbapi/NdbConnection.cpp
@@ -450,12 +450,12 @@ NdbConnection::executeNoBlobs(ExecType aTypeOfExec,
 //------------------------------------------------------------------------
   Ndb* tNdb = theNdb;
 
+  Uint32 timeout = TransporterFacade::instance()->m_waitfor_timeout;
   m_waitForReply = false;
   executeAsynchPrepare(aTypeOfExec, NULL, NULL, abortOption);
   if (m_waitForReply){
     while (1) {
-      int noOfComp = tNdb->sendPollNdb((3 * WAITFOR_RESPONSE_TIMEOUT),
-                                       1, forceSend);
+      int noOfComp = tNdb->sendPollNdb(3 * timeout, 1, forceSend);
       if (noOfComp == 0) {
         /** 
          * This timeout situation can occur if NDB crashes.
diff --git a/ndb/src/ndbapi/Ndbif.cpp b/ndb/src/ndbapi/Ndbif.cpp
index 3ebba7e1c4a..d753117aa9a 100644
--- a/ndb/src/ndbapi/Ndbif.cpp
+++ b/ndb/src/ndbapi/Ndbif.cpp
@@ -954,23 +954,25 @@ Ndb::pollCompleted(NdbConnection** aCopyArray)
 void
 Ndb::check_send_timeout()
 {
+  Uint32 timeout = TransporterFacade::instance()->m_waitfor_timeout;
   NDB_TICKS current_time = NdbTick_CurrentMillisecond();
   if (current_time - the_last_check_time > 1000) {
     the_last_check_time = current_time;
     Uint32 no_of_sent = theNoOfSentTransactions;
     for (Uint32 i = 0; i < no_of_sent; i++) {
       NdbConnection* a_con = theSentTransactionsArray[i];
-      if ((current_time - a_con->theStartTransTime) >
-          WAITFOR_RESPONSE_TIMEOUT) {
+      if ((current_time - a_con->theStartTransTime) > timeout)
+      {
 #ifdef VM_TRACE
         a_con->printState();
 	Uint32 t1 = a_con->theTransactionId;
 	Uint32 t2 = a_con->theTransactionId >> 32;
-	ndbout_c("[%.8x %.8x]", t1, t2);
-	abort();
+	ndbout_c("4012 [%.8x %.8x]", t1, t2);
+	//abort();
 #endif
+        a_con->theReleaseOnClose = true;
         a_con->setOperationErrorCodeAbort(4012);
-        a_con->theCommitStatus = NdbConnection::Aborted;
+	a_con->theCommitStatus = NdbConnection::NeedAbort;
         a_con->theCompletionStatus = NdbConnection::CompletedFailure;
         a_con->handleExecuteCompletion();
         remove_sent_list(i);
diff --git a/ndb/src/ndbapi/TransporterFacade.cpp b/ndb/src/ndbapi/TransporterFacade.cpp
index b6fb2d6cded..5e9147304eb 100644
--- a/ndb/src/ndbapi/TransporterFacade.cpp
+++ b/ndb/src/ndbapi/TransporterFacade.cpp
@@ -567,6 +567,20 @@ TransporterFacade::init(Uint32 nodeId, const ndb_mgm_configuration* props)
   }
 #endif
   
+  Uint32 timeout = 120000;
+  iter.first();
+  for (iter.first(); iter.valid(); iter.next())
+  {
+    Uint32 tmp1 = 0, tmp2 = 0;
+    iter.get(CFG_DB_TRANSACTION_CHECK_INTERVAL, &tmp1);
+    iter.get(CFG_DB_TRANSACTION_DEADLOCK_TIMEOUT, &tmp2);
+    tmp1 += tmp2;
+    if (tmp1 > timeout)
+      timeout = tmp1;
+  }
+  m_waitfor_timeout = timeout;
+  ndbout_c("Using waitfor: %d", timeout);
+  
   if (!theTransporterRegistry->start_service(m_socket_server)){
     ndbout_c("Unable to start theTransporterRegistry->start_service");
     DBUG_RETURN(false);
diff --git a/ndb/src/ndbapi/TransporterFacade.hpp b/ndb/src/ndbapi/TransporterFacade.hpp
index 99edea846c1..1e7377a3b4d 100644
--- a/ndb/src/ndbapi/TransporterFacade.hpp
+++ b/ndb/src/ndbapi/TransporterFacade.hpp
@@ -172,6 +172,7 @@ private:
    */
 public:
   STATIC_CONST( MAX_NO_THREADS = 4711 );
+  Uint32 m_waitfor_timeout; // in milli seconds...
 private:
 
   struct ThreadData {
diff --git a/ndb/test/ndbapi/testTimeout.cpp b/ndb/test/ndbapi/testTimeout.cpp
index 71c11b25859..25392698642 100644
--- a/ndb/test/ndbapi/testTimeout.cpp
+++ b/ndb/test/ndbapi/testTimeout.cpp
@@ -24,6 +24,7 @@
 
 #define TIMEOUT (Uint32)3000
 Uint32 g_org_timeout = 3000;
+Uint32 g_org_deadlock = 3000;
 
 int
 setTransactionTimeout(NDBT_Context* ctx, NDBT_Step* step){
@@ -59,6 +60,60 @@ resetTransactionTimeout(NDBT_Context* ctx, NDBT_Step* step){
   return NDBT_OK;
 }
 
+int
+setDeadlockTimeout(NDBT_Context* ctx, NDBT_Step* step){
+  NdbRestarter restarter;
+  int timeout = ctx->getProperty("TransactionDeadlockTimeout", TIMEOUT);
+  
+  NdbConfig conf(GETNDB(step)->getNodeId()+1);
+  unsigned int nodeId = conf.getMasterNodeId();
+  if (!conf.getProperty(nodeId,
+			NODE_TYPE_DB, 
+			CFG_DB_TRANSACTION_DEADLOCK_TIMEOUT,
+			&g_org_deadlock))
+    return NDBT_FAILED;
+  
+  g_err << "Setting timeout: " << timeout << endl;
+  int val[] = { DumpStateOrd::TcSetTransactionTimeout, timeout };
+  if(restarter.dumpStateAllNodes(val, 2) != 0){
+    return NDBT_FAILED;
+  }
+  
+  return NDBT_OK;
+}
+
+int
+getDeadlockTimeout(NDBT_Context* ctx, NDBT_Step* step){
+  NdbRestarter restarter;
+  
+  Uint32 val = 0;
+  NdbConfig conf(GETNDB(step)->getNodeId()+1);
+  unsigned int nodeId = conf.getMasterNodeId();
+  if (!conf.getProperty(nodeId,
+			NODE_TYPE_DB, 
+			CFG_DB_TRANSACTION_DEADLOCK_TIMEOUT,
+			&val))
+    return NDBT_FAILED;
+
+  if (val < 120000)
+    val = 120000;
+  ctx->setProperty("TransactionDeadlockTimeout", 4*val);
+  
+  return NDBT_OK;
+}
+
+int
+resetDeadlockTimeout(NDBT_Context* ctx, NDBT_Step* step){
+  NdbRestarter restarter;
+  
+  int val[] = { DumpStateOrd::TcSetTransactionTimeout, g_org_deadlock };
+  if(restarter.dumpStateAllNodes(val, 2) != 0){
+    return NDBT_FAILED;
+  }
+  
+  return NDBT_OK;
+}
+
 
 int runLoadTable(NDBT_Context* ctx, NDBT_Step* step){
 
@@ -374,6 +429,43 @@ int runBuddyTransNoTimeout(NDBT_Context* ctx, NDBT_Step* step){
   return result;
 }
 
+int 
+runError4012(NDBT_Context* ctx, NDBT_Step* step){
+  int result = NDBT_OK;
+  int loops = ctx->getNumLoops();
+  int stepNo = step->getStepNo();
+  
+  int timeout = ctx->getProperty("TransactionDeadlockTimeout", TIMEOUT);
+
+  HugoOperations hugoOps(*ctx->getTab());
+  Ndb* pNdb = GETNDB(step);
+
+  do{
+    // Commit transaction
+    CHECK(hugoOps.startTransaction(pNdb) == 0);
+    CHECK(hugoOps.pkUpdateRecord(pNdb, 0) == 0);
+    int ret = hugoOps.execute_NoCommit(pNdb);
+    if (ret == 0)
+    {
+      int sleep = timeout;
+      ndbout << "Sleeping for " << sleep << " milliseconds" << endl;
+      NdbSleep_MilliSleep(sleep);
+      
+      // Expect that transaction has NOT timed-out
+      CHECK(hugoOps.execute_Commit(pNdb) == 0);
+    }
+    else
+    {
+      CHECK(ret == 4012);
+    }
+  } while(false);
+  
+  hugoOps.closeTransaction(pNdb);
+  
+  return result;
+}
+
+
 NDBT_TESTSUITE(testTimeout);
 TESTCASE("DontTimeoutTransaction", 
 	 "Test that the transaction does not timeout "\
@@ -465,6 +557,15 @@ TESTCASE("BuddyTransNoTimeout5",
   FINALIZER(resetTransactionTimeout);
   FINALIZER(runClearTable);
 }
+TESTCASE("Error4012", ""){
+  TC_PROPERTY("TransactionDeadlockTimeout", 120000);
+  INITIALIZER(runLoadTable);
+  INITIALIZER(getDeadlockTimeout);
+  INITIALIZER(setDeadlockTimeout);
+  STEPS(runError4012, 2);
+  FINALIZER(runClearTable);
+}
+
 NDBT_TESTSUITE_END(testTimeout);
 
 int main(int argc, const char** argv){
diff --git a/ndb/test/run-test/daily-basic-tests.txt b/ndb/test/run-test/daily-basic-tests.txt
index 169daae6d7f..70518f7881d 100644
--- a/ndb/test/run-test/daily-basic-tests.txt
+++ b/ndb/test/run-test/daily-basic-tests.txt
@@ -236,6 +236,10 @@ max-time: 500
 cmd: testTimeout
 args: -n TimeoutRandTransaction T1
 
+max-time: 600
+cmd: testTimeout
+args: -n Error4012 T1
+
 # SCAN TESTS
 #
 max-time: 500

From d230d0e1e6c7aa92bd6afabee378746d9d46c340 Mon Sep 17 00:00:00 2001
From: unknown <jonas@perch.ndb.mysql.com>
Date: Mon, 20 Mar 2006 14:53:29 +0100
Subject: [PATCH 05/16] ndb - wl2610, bug#18352   Remove useless and tricky
 state fiddleing in TC     to syncronize NF_CompleteRep as code is already
 present in DIH aswell   Keep broadcast of TAKEOVER_TCCONF for online upgrade

ndb/src/kernel/blocks/dblqh/DblqhMain.cpp:
  Add clever dump for showing active operations
ndb/src/kernel/blocks/dbtc/Dbtc.hpp:
  Remove useless and tricky state fiddleing in TC
    to syncronize NF_CompleteRep as code is already present in DIH aswell
    Keep broadcast of TAKEOVER_TCCONF for online upgrade
ndb/src/kernel/blocks/dbtc/DbtcMain.cpp:
  Remove useless and tricky state fiddleing in TC
    to syncronize NF_CompleteRep as code is already present in DIH aswell
    Keep broadcast of TAKEOVER_TCCONF for online upgrade
---
 ndb/src/kernel/blocks/dblqh/DblqhMain.cpp | 166 +++++++++++++++++++
 ndb/src/kernel/blocks/dbtc/Dbtc.hpp       |   9 --
 ndb/src/kernel/blocks/dbtc/DbtcMain.cpp   | 186 +++++-----------------
 3 files changed, 208 insertions(+), 153 deletions(-)

diff --git a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
index ff7e3c32924..0aeeaccd55e 100644
--- a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
+++ b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
@@ -18448,6 +18448,172 @@ Dblqh::execDUMP_STATE_ORD(Signal* signal)
     c_error_insert_table_id = dumpState->args[1];
     SET_ERROR_INSERT_VALUE(5042);
   }
+
+  TcConnectionrec *regTcConnectionrec = tcConnectionrec;
+  Uint32 ttcConnectrecFileSize = ctcConnectrecFileSize;
+  Uint32 arg = dumpState->args[0];
+  if(arg == 2306)
+  {
+    for(Uint32 i = 0; i<1024; i++)
+    {
+      TcConnectionrecPtr tcRec;
+      tcRec.i = ctransidHash[i];
+      while(tcRec.i != RNIL)
+      {
+	ptrCheckGuard(tcRec, ttcConnectrecFileSize, regTcConnectionrec);
+	ndbout << "TcConnectionrec " << tcRec.i;
+	signal->theData[0] = 2307;
+	signal->theData[1] = tcRec.i;
+	execDUMP_STATE_ORD(signal);
+	tcRec.i = tcRec.p->nextHashRec;
+      }
+    }
+  }
+
+  if(arg == 2307 || arg == 2308)
+  {
+    TcConnectionrecPtr tcRec;
+    tcRec.i = signal->theData[1];
+    ptrCheckGuard(tcRec, ttcConnectrecFileSize, regTcConnectionrec);
+    
+    ndbout << " transactionState = " << tcRec.p->transactionState<<endl;
+    ndbout << " operation = " << tcRec.p->operation<<endl;
+    ndbout << " tcNodeFailrec = " << tcRec.p->tcNodeFailrec
+	   << " seqNoReplica = " << tcRec.p->seqNoReplica
+	   << " simpleRead = " << tcRec.p->simpleRead
+	   << endl;
+    ndbout << " replicaType = " << tcRec.p->replicaType
+	   << " reclenAiLqhkey = " << tcRec.p->reclenAiLqhkey
+	   << " opExec = " << tcRec.p->opExec
+	   << endl;
+    ndbout << " opSimple = " << tcRec.p->opSimple
+	   << " nextSeqNoReplica = " << tcRec.p->nextSeqNoReplica
+	   << " lockType = " << tcRec.p->lockType
+	   << endl;
+    ndbout << " lastReplicaNo = " << tcRec.p->lastReplicaNo
+	   << " indTakeOver = " << tcRec.p->indTakeOver
+	   << " dirtyOp = " << tcRec.p->dirtyOp
+	   << endl;
+    ndbout << " activeCreat = " << tcRec.p->activeCreat
+	   << " tcBlockref = " << hex << tcRec.p->tcBlockref
+	   << " reqBlockref = " << hex << tcRec.p->reqBlockref
+	   << " primKeyLen = " << tcRec.p->primKeyLen
+	   << endl;
+    ndbout << " nextReplica = " << tcRec.p->nextReplica
+	   << " tcBlockref = " << hex << tcRec.p->tcBlockref
+	   << " reqBlockref = " << hex << tcRec.p->reqBlockref
+	   << " primKeyLen = " << tcRec.p->primKeyLen
+	   << endl;
+    ndbout << " logStopPageNo = " << tcRec.p->logStopPageNo
+	   << " logStartPageNo = " << tcRec.p->logStartPageNo
+	   << " logStartPageIndex = " << tcRec.p->logStartPageIndex
+	   << endl;
+    ndbout << " errorCode = " << tcRec.p->errorCode
+	   << " clientBlockref = " << hex << tcRec.p->clientBlockref
+	   << " applRef = " << hex << tcRec.p->applRef
+	   << " totSendlenAi = " << tcRec.p->totSendlenAi
+	   << endl;
+    ndbout << " totReclenAi = " << tcRec.p->totReclenAi
+	   << " tcScanRec = " << tcRec.p->tcScanRec
+	   << " tcScanInfo = " << tcRec.p->tcScanInfo
+	   << " tcOprec = " << hex << tcRec.p->tcOprec
+	   << endl;
+    ndbout << " tableref = " << tcRec.p->tableref
+	   << " simpleTcConnect = " << tcRec.p->simpleTcConnect
+	   << " storedProcId = " << tcRec.p->storedProcId
+	   << " schemaVersion = " << tcRec.p->schemaVersion
+	   << endl;
+    ndbout << " reqinfo = " << tcRec.p->reqinfo
+	   << " reqRef = " << tcRec.p->reqRef
+	   << " readlenAi = " << tcRec.p->readlenAi
+	   << " prevTc = " << tcRec.p->prevTc
+	   << endl;
+    ndbout << " prevLogTcrec = " << tcRec.p->prevLogTcrec
+	   << " prevHashRec = " << tcRec.p->prevHashRec
+	   << " nodeAfterNext0 = " << tcRec.p->nodeAfterNext[0]
+	   << " nodeAfterNext1 = " << tcRec.p->nodeAfterNext[1]
+	   << endl;
+    ndbout << " nextTcConnectrec = " << tcRec.p->nextTcConnectrec
+	   << " nextTc = " << tcRec.p->nextTc
+	   << " nextTcLogQueue = " << tcRec.p->nextTcLogQueue
+	   << " nextLogTcrec = " << tcRec.p->nextLogTcrec
+	   << endl;
+    ndbout << " nextHashRec = " << tcRec.p->nextHashRec
+	   << " logWriteState = " << tcRec.p->logWriteState
+	   << " logStartFileNo = " << tcRec.p->logStartFileNo
+	   << " listState = " << tcRec.p->listState
+	   << endl;
+    ndbout << " lastAttrinbuf = " << tcRec.p->lastAttrinbuf
+	   << " lastTupkeybuf = " << tcRec.p->lastTupkeybuf
+	   << " hashValue = " << tcRec.p->hashValue
+	   << endl;
+    ndbout << " gci = " << tcRec.p->gci
+	   << " fragmentptr = " << tcRec.p->fragmentptr
+	   << " fragmentid = " << tcRec.p->fragmentid
+	   << " firstTupkeybuf = " << tcRec.p->firstTupkeybuf
+	   << endl;
+    ndbout << " firstAttrinbuf = " << tcRec.p->firstAttrinbuf
+	   << " currTupAiLen = " << tcRec.p->currTupAiLen
+	   << " currReclenAi = " << tcRec.p->currReclenAi
+	   << endl;
+    ndbout << " tcTimer = " << tcRec.p->tcTimer
+	   << " clientConnectrec = " << tcRec.p->clientConnectrec
+	   << " applOprec = " << hex << tcRec.p->applOprec
+	   << " abortState = " << tcRec.p->abortState
+	   << endl;
+    ndbout << " transid0 = " << hex << tcRec.p->transid[0]
+	   << " transid1 = " << hex << tcRec.p->transid[1]
+	   << " tupkeyData0 = " << tcRec.p->tupkeyData[0]
+	   << " tupkeyData1 = " << tcRec.p->tupkeyData[1]
+	   << endl;
+    ndbout << " tupkeyData2 = " << tcRec.p->tupkeyData[2]
+	   << " tupkeyData3 = " << tcRec.p->tupkeyData[3]
+	   << endl;
+    switch (tcRec.p->transactionState) {
+	
+    case TcConnectionrec::SCAN_STATE_USED:
+      if (tcRec.p->tcScanRec < cscanrecFileSize){
+	ScanRecordPtr TscanPtr;
+	c_scanRecordPool.getPtr(TscanPtr, tcRec.p->tcScanRec);
+	ndbout << " scanState = " << TscanPtr.p->scanState << endl;
+	//TscanPtr.p->scanLocalref[2];
+	ndbout << " copyPtr="<<TscanPtr.p->copyPtr
+	       << " scanAccPtr="<<TscanPtr.p->scanAccPtr
+	       << " scanAiLength="<<TscanPtr.p->scanAiLength
+	       << endl;
+	ndbout << " m_curr_batch_size_rows="<<
+	  TscanPtr.p->m_curr_batch_size_rows
+	       << " m_max_batch_size_rows="<<
+	  TscanPtr.p->m_max_batch_size_rows
+	       << " scanErrorCounter="<<TscanPtr.p->scanErrorCounter
+	       << endl;
+	ndbout << " scanSchemaVersion="<<TscanPtr.p->scanSchemaVersion
+	       << "  scanStoredProcId="<<TscanPtr.p->scanStoredProcId
+	       << "  scanTcrec="<<TscanPtr.p->scanTcrec
+	       << endl;
+	ndbout << "  scanType="<<TscanPtr.p->scanType
+	       << "  scanApiBlockref="<<TscanPtr.p->scanApiBlockref
+	       << "  scanNodeId="<<TscanPtr.p->scanNodeId
+	       << "  scanCompletedStatus="<<TscanPtr.p->scanCompletedStatus
+	       << endl;
+	ndbout << "  scanFlag="<<TscanPtr.p->scanFlag
+	       << "  scanLockHold="<<TscanPtr.p->scanLockHold
+	       << "  scanLockMode="<<TscanPtr.p->scanLockMode
+	       << "  scanNumber="<<TscanPtr.p->scanNumber
+	       << endl;
+	ndbout << "  scanReleaseCounter="<<TscanPtr.p->scanReleaseCounter
+	       << "  scanTcWaiting="<<TscanPtr.p->scanTcWaiting
+	       << "  scanKeyinfoFlag="<<TscanPtr.p->scanKeyinfoFlag
+	       << endl;
+      } else{
+	ndbout << "No connected scan record found" << endl;
+      }
+      break;
+    default:
+      break;
+    }
+    ndbrequire(arg != 2308);
+  }
   
 }//Dblqh::execDUMP_STATE_ORD()
 
diff --git a/ndb/src/kernel/blocks/dbtc/Dbtc.hpp b/ndb/src/kernel/blocks/dbtc/Dbtc.hpp
index 23c5a7d08eb..b1332a4fd0b 100644
--- a/ndb/src/kernel/blocks/dbtc/Dbtc.hpp
+++ b/ndb/src/kernel/blocks/dbtc/Dbtc.hpp
@@ -211,14 +211,6 @@ public:
     LTS_ACTIVE = 1
   };
 
-  enum TakeOverState {
-    TOS_NOT_DEFINED = 0,
-    TOS_IDLE = 1,
-    TOS_ACTIVE = 2,
-    TOS_COMPLETED = 3,
-    TOS_NODE_FAILED = 4
-  };
-
   enum FailState {
     FS_IDLE = 0,
     FS_LISTENING = 1,
@@ -933,7 +925,6 @@ public:
   struct HostRecord {
     HostState hostStatus;
     LqhTransState lqhTransStatus;
-    TakeOverState takeOverStatus;
     bool  inPackedList;
     UintR noOfPackedWordsLqh;
     UintR packedWordsLqh[26];
diff --git a/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp b/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
index 4750a8c388a..ff9b279592c 100644
--- a/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
+++ b/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
@@ -303,7 +303,6 @@ void Dbtc::execINCL_NODEREQ(Signal* signal)
   hostptr.i = signal->theData[1];
   ptrCheckGuard(hostptr, chostFilesize, hostRecord);
   hostptr.p->hostStatus = HS_ALIVE;
-  hostptr.p->takeOverStatus = TOS_IDLE;
   signal->theData[0] = cownref;
   c_alive_nodes.set(hostptr.i);
   sendSignal(tblockref, GSN_INCL_NODECONF, signal, 1, JBB);
@@ -856,8 +855,6 @@ void Dbtc::execREAD_NODESCONF(Signal* signal)
       hostptr.i = i;
       ptrCheckGuard(hostptr, chostFilesize, hostRecord);
 
-      hostptr.p->takeOverStatus = TOS_IDLE;
-      
       if (NodeBitmask::get(readNodes->inactiveNodes, i)) {
         jam();
         hostptr.p->hostStatus = HS_DEAD;
@@ -6826,21 +6823,27 @@ void Dbtc::execNODE_FAILREP(Signal* signal)
   const Uint32 tnewMasterId = nodeFail->masterNodeId;
   
   arrGuard(tnoOfNodes, MAX_NDB_NODES);
+  Uint32 i;
   int index = 0;
-  for (unsigned i = 1; i< MAX_NDB_NODES; i++) {
-    if(NodeBitmask::get(nodeFail->theNodes, i)){
+  for (i = 1; i< MAX_NDB_NODES; i++) 
+  {
+    if(NodeBitmask::get(nodeFail->theNodes, i))
+    {
       cdata[index] = i;
       index++;
     }//if
   }//for
 
+  cmasterNodeId = tnewMasterId;
+  
   tcNodeFailptr.i = 0;
   ptrAss(tcNodeFailptr, tcFailRecord);
-  Uint32 tindex;
-  for (tindex = 0; tindex < tnoOfNodes; tindex++) {
+  for (i = 0; i < tnoOfNodes; i++) 
+  {
     jam();
-    hostptr.i = cdata[tindex];
+    hostptr.i = cdata[i];
     ptrCheckGuard(hostptr, chostFilesize, hostRecord);
+    
     /*------------------------------------------------------------*/
     /*       SET STATUS OF THE FAILED NODE TO DEAD SINCE IT HAS   */
     /*       FAILED.                                              */
@@ -6849,30 +6852,15 @@ void Dbtc::execNODE_FAILREP(Signal* signal)
     hostptr.p->m_nf_bits = HostRecord::NF_NODE_FAIL_BITS;
     c_alive_nodes.clear(hostptr.i);
 
-    if (hostptr.p->takeOverStatus == TOS_COMPLETED) {
-      jam();
-      /*------------------------------------------------------------*/
-      /*       A VERY UNUSUAL SITUATION. THE TAKE OVER WAS COMPLETED*/
-      /*       EVEN BEFORE WE HEARD ABOUT THE NODE FAILURE REPORT.  */
-      /*       HOWEVER UNUSUAL THIS SITUATION IS POSSIBLE.          */
-      /*------------------------------------------------------------*/
-      /*       RELEASE THE CURRENTLY UNUSED LQH CONNECTIONS. THE    */
-      /*       REMAINING WILL BE RELEASED WHEN THE TRANSACTION THAT */
-      /*       USED THEM IS COMPLETED.                              */
-      /*------------------------------------------------------------*/
-      hostptr.p->m_nf_bits &= ~HostRecord::NF_TAKEOVER;
-    } else {
-      ndbrequire(hostptr.p->takeOverStatus == TOS_IDLE);
-      hostptr.p->takeOverStatus = TOS_NODE_FAILED;
-    }//if
-    
-    if (tcNodeFailptr.p->failStatus == FS_LISTENING) {
+    if (tcNodeFailptr.p->failStatus == FS_LISTENING) 
+    {
       jam();
       /*------------------------------------------------------------*/
       /*       THE CURRENT TAKE OVER CAN BE AFFECTED BY THIS NODE   */
       /*       FAILURE.                                             */
       /*------------------------------------------------------------*/
-      if (hostptr.p->lqhTransStatus == LTS_ACTIVE) {
+      if (hostptr.p->lqhTransStatus == LTS_ACTIVE) 
+      {
 	jam();
 	/*------------------------------------------------------------*/
 	/*       WE WERE WAITING FOR THE FAILED NODE IN THE TAKE OVER */
@@ -6884,78 +6872,25 @@ void Dbtc::execNODE_FAILREP(Signal* signal)
       }//if
     }//if
     
-  }//for
-
-  const bool masterFailed = (cmasterNodeId != tnewMasterId);
-  cmasterNodeId = tnewMasterId;
-
-  if(getOwnNodeId() == cmasterNodeId && masterFailed){
-    /**
-     * Master has failed and I'm the new master
-     */
-    jam();
-    
-    for (hostptr.i = 1; hostptr.i < MAX_NDB_NODES; hostptr.i++) {
+    if (getOwnNodeId() != tnewMasterId)
+    {
       jam();
-      ptrAss(hostptr, hostRecord);
-      if (hostptr.p->hostStatus != HS_ALIVE) {
-	jam();
-	if (hostptr.p->takeOverStatus == TOS_COMPLETED) {
-	  jam();
-	  /*------------------------------------------------------------*/
-	  /*       SEND TAKE OVER CONFIRMATION TO ALL ALIVE NODES IF    */
-	  /*       TAKE OVER IS COMPLETED. THIS IS PERFORMED TO ENSURE  */
-	  /*       THAT ALL NODES AGREE ON THE IDLE STATE OF THE TAKE   */
-	  /*       OVER. THIS MIGHT BE MISSED IN AN ERROR SITUATION IF  */
-	  /*       MASTER FAILS AFTER SENDING CONFIRMATION TO NEW       */
-	  /*       MASTER BUT FAILING BEFORE SENDING TO ANOTHER NODE    */
-	  /*       WHICH WAS NOT MASTER. IF THIS NODE LATER BECOMES     */
-	  /*       MASTER IT MIGHT START A NEW TAKE OVER EVEN AFTER THE */
-	  /*       CRASHED NODE HAVE ALREADY RECOVERED.                 */
-	  /*------------------------------------------------------------*/
-	  NodeReceiverGroup rg(DBTC, c_alive_nodes);
-	  signal->theData[0] = hostptr.i;
-	  sendSignal(rg, GSN_TAKE_OVERTCCONF, signal, 1, JBB);
-	}//if
-      }//if
-    }//for
-  }
-
-  if(getOwnNodeId() == cmasterNodeId){
-    jam();
-    for (hostptr.i = 1; hostptr.i < MAX_NDB_NODES; hostptr.i++) {
+      /**
+       * Only master does takeover currently
+       */
+      hostptr.p->m_nf_bits &= ~HostRecord::NF_TAKEOVER;
+    }
+    else
+    {
       jam();
-      ptrAss(hostptr, hostRecord);
-      if (hostptr.p->hostStatus != HS_ALIVE) {
-        jam();
-        if (hostptr.p->takeOverStatus == TOS_NODE_FAILED) {
-          jam();
-	  /*------------------------------------------------------------*/
-	  /*       CONCLUDE ALL ACTIVITIES THE FAILED TC DID CONTROL    */
-	  /*       SINCE WE ARE THE MASTER. THIS COULD HAVE BEEN STARTED*/
-	  /*       BY A PREVIOUS MASTER BUT HAVE NOT BEEN CONCLUDED YET.*/
-	  /*------------------------------------------------------------*/
-          hostptr.p->takeOverStatus = TOS_ACTIVE;
-          signal->theData[0] = hostptr.i;
-          sendSignal(cownref, GSN_TAKE_OVERTCREQ, signal, 1, JBB);
-        }//if
-      }//if
-    }//for
-  }//if
-  for (tindex = 0; tindex < tnoOfNodes; tindex++) {
-    jam();
-    hostptr.i = cdata[tindex];
-    ptrCheckGuard(hostptr, chostFilesize, hostRecord);
-    /*------------------------------------------------------------*/
-    /*       LOOP THROUGH AND ABORT ALL SCANS THAT WHERE          */
-    /*       CONTROLLED BY THIS TC AND ACTIVE IN THE FAILED       */
-    /*       NODE'S LQH                                           */
-    /*------------------------------------------------------------*/
+      signal->theData[0] = hostptr.i;
+      sendSignal(cownref, GSN_TAKE_OVERTCREQ, signal, 1, JBB);
+    }
+
     checkScanActiveInFailedLqh(signal, 0, hostptr.i);
     checkWaitDropTabFailedLqh(signal, hostptr.i, 0); // nodeid, tableid
     nodeFailCheckTransactions(signal, 0, hostptr.i);
-  }//for
-
+  }
 }//Dbtc::execNODE_FAILREP()
 
 void
@@ -7071,47 +7006,17 @@ void Dbtc::execTAKE_OVERTCCONF(Signal* signal)
   tfailedNodeId = signal->theData[0];
   hostptr.i = tfailedNodeId;
   ptrCheckGuard(hostptr, chostFilesize, hostRecord);
-  switch (hostptr.p->takeOverStatus) {
-  case TOS_IDLE:
+
+  ndbout_c("received execTAKE_OVERTCCONF(%d) from %x (%x)",
+	   tfailedNodeId, signal->getSendersBlockRef(), reference());
+  if (signal->getSendersBlockRef() != reference())
+  {
     jam();
-    /*------------------------------------------------------------*/
-    /*       THIS MESSAGE ARRIVED EVEN BEFORE THE NODE_FAILREP    */
-    /*       MESSAGE. THIS IS POSSIBLE IN EXTREME SITUATIONS.     */
-    /*       WE SET THE STATE TO TAKE_OVER_COMPLETED AND WAIT     */
-    /*       FOR THE NODE_FAILREP MESSAGE.                        */
-    /*------------------------------------------------------------*/
-    hostptr.p->takeOverStatus = TOS_COMPLETED;
-    break;
-  case TOS_NODE_FAILED:
-  case TOS_ACTIVE:
-    jam();
-    /*------------------------------------------------------------*/
-    /*       WE ARE NOT MASTER AND THE TAKE OVER IS ACTIVE OR WE  */
-    /*       ARE MASTER AND THE TAKE OVER IS ACTIVE. IN BOTH      */
-    /*       WE SET THE STATE TO TAKE_OVER_COMPLETED.             */
-    /*------------------------------------------------------------*/
-    /*       RELEASE THE CURRENTLY UNUSED LQH CONNECTIONS. THE    */
-    /*       REMAINING WILL BE RELEASED WHEN THE TRANSACTION THAT */
-    /*       USED THEM IS COMPLETED.                              */
-    /*------------------------------------------------------------*/
-    hostptr.p->takeOverStatus = TOS_COMPLETED;
-    checkNodeFailComplete(signal, hostptr.i, HostRecord::NF_TAKEOVER);
-    break;
-  case TOS_COMPLETED:
-    jam();
-    /*------------------------------------------------------------*/
-    /*       WE HAVE ALREADY RECEIVED THE CONF SIGNAL. IT IS MOST */
-    /*       LIKELY SENT FROM A NEW MASTER WHICH WASN'T SURE IF   */
-    /*       THIS NODE HEARD THE CONF SIGNAL FROM THE OLD MASTER. */
-    /*       WE SIMPLY IGNORE THE MESSAGE.                        */
-    /*------------------------------------------------------------*/
-    /*empty*/;
-    break;
-  default:
-    jam();
-    systemErrorLab(signal);
     return;
-  }//switch
+  }
+  
+  
+  checkNodeFailComplete(signal, hostptr.i, HostRecord::NF_TAKEOVER);
 }//Dbtc::execTAKE_OVERTCCONF()
 
 void Dbtc::execTAKE_OVERTCREQ(Signal* signal) 
@@ -7351,16 +7256,10 @@ void Dbtc::completeTransAtTakeOverDoLast(Signal* signal, UintR TtakeOverInd)
     /*       TO REPORT THE COMPLETION OF THE TAKE OVER TO ALL     */
     /*       NODES THAT ARE ALIVE.                                */
     /*------------------------------------------------------------*/
-    for (hostptr.i = 1; hostptr.i < MAX_NDB_NODES; hostptr.i++) {
-      jam();
-      ptrAss(hostptr, hostRecord);
-      if (hostptr.p->hostStatus == HS_ALIVE) {
-        jam();
-        tblockref = calcTcBlockRef(hostptr.i);
-        signal->theData[0] = tcNodeFailptr.p->takeOverNode;
-        sendSignal(tblockref, GSN_TAKE_OVERTCCONF, signal, 1, JBB);
-      }//if
-    }//for
+    NodeReceiverGroup rg(DBTC, c_alive_nodes);
+    signal->theData[0] = tcNodeFailptr.p->takeOverNode;
+    sendSignal(rg, GSN_TAKE_OVERTCCONF, signal, 1, JBB);
+    
     if (tcNodeFailptr.p->queueIndex > 0) {
       jam();
       /*------------------------------------------------------------*/
@@ -9937,7 +9836,6 @@ void Dbtc::inithost(Signal* signal)
     ptrAss(hostptr, hostRecord);
     hostptr.p->hostStatus = HS_DEAD;
     hostptr.p->inPackedList = false;
-    hostptr.p->takeOverStatus = TOS_NOT_DEFINED;
     hostptr.p->lqhTransStatus = LTS_IDLE;
     hostptr.p->noOfWordsTCKEYCONF = 0;
     hostptr.p->noOfWordsTCINDXCONF = 0;

From ad6dcfb1277b3b0a8692c3bfd802ba48cc3fe537 Mon Sep 17 00:00:00 2001
From: unknown <jonas@perch.ndb.mysql.com>
Date: Mon, 20 Mar 2006 14:55:14 +0100
Subject: [PATCH 06/16] ndb - bug#18352   remove debug prinout

---
 ndb/src/ndbapi/TransporterFacade.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ndb/src/ndbapi/TransporterFacade.cpp b/ndb/src/ndbapi/TransporterFacade.cpp
index 5e9147304eb..30d0eec1e4a 100644
--- a/ndb/src/ndbapi/TransporterFacade.cpp
+++ b/ndb/src/ndbapi/TransporterFacade.cpp
@@ -579,7 +579,6 @@ TransporterFacade::init(Uint32 nodeId, const ndb_mgm_configuration* props)
       timeout = tmp1;
   }
   m_waitfor_timeout = timeout;
-  ndbout_c("Using waitfor: %d", timeout);
   
   if (!theTransporterRegistry->start_service(m_socket_server)){
     ndbout_c("Unable to start theTransporterRegistry->start_service");

From 8ed36cb667b675244f55072cefa15fb65ec89ee7 Mon Sep 17 00:00:00 2001
From: unknown <jonas@perch.ndb.mysql.com>
Date: Tue, 21 Mar 2006 14:47:10 +0100
Subject: [PATCH 07/16] ndb - bug#18385   Partial system restart, can not try
 to start with higher GCI that own   even if knowing about a higher number

ndb/include/kernel/signaldata/DumpStateOrd.hpp:
  Add new dump for setting time between gcp
ndb/include/kernel/signaldata/StartPerm.hpp:
  Move error codes into StartPerm + Add new error code
ndb/src/kernel/blocks/ERROR_codes.txt:
  Add new error insert
ndb/src/kernel/blocks/dbdih/Dbdih.hpp:
  Move error codes into StartPerm + Add new error code
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
  Fix so that we don't try to restart to a too new GCI when doing a partial start
  Add new error code when this node later tries to join
ndb/test/include/NdbRestarter.hpp:
  Add new method for selecting random node
ndb/test/ndbapi/testSystemRestart.cpp:
  Add new testcase for bug#18385
ndb/test/run-test/daily-basic-tests.txt:
  Run test in daily-basic
ndb/test/src/NdbRestarter.cpp:
  Add new method for selecting random node
---
 .../kernel/signaldata/DumpStateOrd.hpp        |  1 +
 ndb/include/kernel/signaldata/StartPerm.hpp   |  6 ++
 ndb/src/kernel/blocks/ERROR_codes.txt         |  2 +
 ndb/src/kernel/blocks/dbdih/Dbdih.hpp         |  1 -
 ndb/src/kernel/blocks/dbdih/DbdihMain.cpp     | 99 ++++++++++++++-----
 ndb/test/include/NdbRestarter.hpp             |  1 +
 ndb/test/ndbapi/testSystemRestart.cpp         | 53 ++++++++++
 ndb/test/run-test/daily-basic-tests.txt       |  4 +
 ndb/test/src/NdbRestarter.cpp                 | 33 +++++++
 9 files changed, 177 insertions(+), 23 deletions(-)

diff --git a/ndb/include/kernel/signaldata/DumpStateOrd.hpp b/ndb/include/kernel/signaldata/DumpStateOrd.hpp
index 4dd22cf5092..2c824670cef 100644
--- a/ndb/include/kernel/signaldata/DumpStateOrd.hpp
+++ b/ndb/include/kernel/signaldata/DumpStateOrd.hpp
@@ -127,6 +127,7 @@ public:
     DihMinTimeBetweenLCP = 7017,
     DihMaxTimeBetweenLCP = 7018,
     EnableUndoDelayDataWrite = 7080, // DIH+ACC+TUP
+    DihSetTimeBetweenGcp = 7090,
     DihStartLcpImmediately = 7099,
     // 8000 Suma
     // 12000 Tux
diff --git a/ndb/include/kernel/signaldata/StartPerm.hpp b/ndb/include/kernel/signaldata/StartPerm.hpp
index 38be72835a3..63e01ed3868 100644
--- a/ndb/include/kernel/signaldata/StartPerm.hpp
+++ b/ndb/include/kernel/signaldata/StartPerm.hpp
@@ -64,5 +64,11 @@ private:
   
   Uint32 startingNodeId;
   Uint32 errorCode;  
+
+  enum ErrorCode
+  {
+    ZNODE_ALREADY_STARTING_ERROR = 305,
+    InitialStartRequired = 320
+  };
 };
 #endif
diff --git a/ndb/src/kernel/blocks/ERROR_codes.txt b/ndb/src/kernel/blocks/ERROR_codes.txt
index 62481837c14..e5576450846 100644
--- a/ndb/src/kernel/blocks/ERROR_codes.txt
+++ b/ndb/src/kernel/blocks/ERROR_codes.txt
@@ -303,6 +303,8 @@ Test Crashes in handling node restarts
 7131: Crash when receiving START_COPYREQ in master node
 7132: Crash when receiving START_COPYCONF in starting node
 
+7170: Crash when receiving START_PERMREF (InitialStartRequired)
+
 DICT:
 6000  Crash during NR when receiving DICTSTARTREQ
 6001  Crash during NR when receiving SCHEMA_INFO
diff --git a/ndb/src/kernel/blocks/dbdih/Dbdih.hpp b/ndb/src/kernel/blocks/dbdih/Dbdih.hpp
index f74c0f36c4d..78acf1ffd19 100644
--- a/ndb/src/kernel/blocks/dbdih/Dbdih.hpp
+++ b/ndb/src/kernel/blocks/dbdih/Dbdih.hpp
@@ -81,7 +81,6 @@
 #define ZWRONG_FAILURE_NUMBER_ERROR 302
 #define ZWRONG_START_NODE_ERROR 303
 #define ZNO_REPLICA_FOUND_ERROR 304
-#define ZNODE_ALREADY_STARTING_ERROR 305
 #define ZNODE_START_DISALLOWED_ERROR 309
 
 // --------------------------------------
diff --git a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
index fab428aadef..eb4ae61a3e4 100644
--- a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
+++ b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
@@ -1420,6 +1420,33 @@ void Dbdih::ndbStartReqLab(Signal* signal, BlockReference ref)
     return;
   }
   
+  NodeRecordPtr nodePtr;
+  Uint32 gci = SYSFILE->lastCompletedGCI[getOwnNodeId()];
+  for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) 
+  {
+    jam();
+    ptrAss(nodePtr, nodeRecord);
+    if (SYSFILE->lastCompletedGCI[nodePtr.i] > gci) 
+    {
+      jam();
+      /**
+       * Since we're starting(is master) and there 
+       *   there are other nodes with higher GCI...
+       *   there gci's must be invalidated...
+       *   and they _must_ do an initial start
+       *   indicate this by setting lastCompletedGCI = 0
+       */
+      SYSFILE->lastCompletedGCI[nodePtr.i] = 0;
+      ndbrequire(nodePtr.p->nodeStatus != NodeRecord::ALIVE);
+      warningEvent("Making filesystem for node %d unusable",
+		   nodePtr.i);
+    }
+  }
+  /**
+   * This set which GCI we will try to restart to
+   */
+  SYSFILE->newestRestorableGCI = gci;
+  
   ndbrequire(isMaster());
   copyGciLab(signal, CopyGCIReq::RESTART); // We have already read the file!
 }//Dbdih::ndbStartReqLab()
@@ -1557,7 +1584,7 @@ void Dbdih::execSTART_PERMREF(Signal* signal)
 {
   jamEntry();
   Uint32 errorCode = signal->theData[1];
-  if (errorCode == ZNODE_ALREADY_STARTING_ERROR) {
+  if (errorCode == StartPermRef::ZNODE_ALREADY_STARTING_ERROR) {
     jam();
     /*-----------------------------------------------------------------------*/
     // The master was busy adding another node. We will wait for a second and
@@ -1567,6 +1594,20 @@ void Dbdih::execSTART_PERMREF(Signal* signal)
     sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1);
     return;
   }//if
+
+  if (errorCode == StartPermRef::InitialStartRequired)
+  {
+    CRASH_INSERTION(7170);
+    char buf[255];
+    BaseString::snprintf(buf, sizeof(buf), 
+			 "Cluster requires this node to be started "
+			 " with --initial as partial start has been performed"
+			 " and this filesystem is unusable");
+    progError(__LINE__, 
+	      ERR_SR_RESTARTCONFLICT,
+	      buf);
+    ndbrequire(false);
+  }
   /*------------------------------------------------------------------------*/
   // Some node process in another node involving our node was still active. We
   // will recover from this by crashing here. 
@@ -1657,7 +1698,7 @@ void Dbdih::execSTART_PERMREQ(Signal* signal)
       (c_nodeStartMaster.wait != ZFALSE)) {
     jam();
     signal->theData[0] = nodeId;
-    signal->theData[1] = ZNODE_ALREADY_STARTING_ERROR;
+    signal->theData[1] = StartPermRef::ZNODE_ALREADY_STARTING_ERROR;
     sendSignal(retRef, GSN_START_PERMREF, signal, 2, JBB);
     return;
   }//if
@@ -1667,6 +1708,16 @@ void Dbdih::execSTART_PERMREQ(Signal* signal)
     ndbrequire(false);
   }//if
 
+  if (SYSFILE->lastCompletedGCI[nodeId] == 0 &&
+      typeStart != NodeState::ST_INITIAL_NODE_RESTART)
+  {
+    jam();
+    signal->theData[0] = nodeId;
+    signal->theData[1] = StartPermRef::InitialStartRequired;
+    sendSignal(retRef, GSN_START_PERMREF, signal, 2, JBB);
+    return;
+  }
+
   /*----------------------------------------------------------------------
    * WE START THE INCLUSION PROCEDURE 
    * ---------------------------------------------------------------------*/
@@ -3515,24 +3566,12 @@ void Dbdih::closingGcpLab(Signal* signal, FileRecordPtr filePtr)
 /* ------------------------------------------------------------------------- */
 void Dbdih::selectMasterCandidateAndSend(Signal* signal)
 {
-  Uint32 gci = 0;
-  Uint32 masterCandidateId = 0;
-  NodeRecordPtr nodePtr;
-  for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
-    jam();
-    ptrAss(nodePtr, nodeRecord);
-    if (SYSFILE->lastCompletedGCI[nodePtr.i] > gci) {
-      jam();
-      masterCandidateId = nodePtr.i;
-      gci = SYSFILE->lastCompletedGCI[nodePtr.i];
-    }//if
-  }//for
-  ndbrequire(masterCandidateId != 0);
   setNodeGroups();
-  signal->theData[0] = masterCandidateId;
-  signal->theData[1] = gci;
+  signal->theData[0] = getOwnNodeId();
+  signal->theData[1] = SYSFILE->lastCompletedGCI[getOwnNodeId()];
   sendSignal(cntrlblockref, GSN_DIH_RESTARTCONF, signal, 2, JBB);
-
+  
+  NodeRecordPtr nodePtr;
   Uint32 node_groups[MAX_NDB_NODES];
   memset(node_groups, 0, sizeof(node_groups));
   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
@@ -3550,10 +3589,10 @@ void Dbdih::selectMasterCandidateAndSend(Signal* signal)
     if(count != 0 && count != cnoReplicas){
       char buf[255];
       BaseString::snprintf(buf, sizeof(buf), 
-	       "Illegal configuration change."
-	       " Initial start needs to be performed "
-	       " when changing no of replicas (%d != %d)", 
-	       node_groups[nodePtr.i], cnoReplicas);
+			   "Illegal configuration change."
+			   " Initial start needs to be performed "
+			   " when changing no of replicas (%d != %d)", 
+			   node_groups[nodePtr.i], cnoReplicas);
       progError(__LINE__, 
 		ERR_INVALID_CONFIG,
 		buf);
@@ -13359,6 +13398,22 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal)
     c_lcpState.ctimer += (1 << c_lcpState.clcpDelay);
     return;
   }
+
+  if (dumpState->args[0] == DumpStateOrd::DihSetTimeBetweenGcp)
+  {
+    if (signal->getLength() == 1)
+    {
+      const ndb_mgm_configuration_iterator * p = 
+	theConfiguration.getOwnConfigIterator();
+      ndbrequire(p != 0);
+      ndb_mgm_get_int_parameter(p, CFG_DB_GCP_INTERVAL, &cgcpDelay);
+    }
+    else
+    {
+      cgcpDelay = signal->theData[1];
+    }
+    ndbout_c("Setting time between gcp : %d", cgcpDelay);
+  }
 }//Dbdih::execDUMP_STATE_ORD()
 
 void
diff --git a/ndb/test/include/NdbRestarter.hpp b/ndb/test/include/NdbRestarter.hpp
index 19a88b4f8ad..3ec92ae786e 100644
--- a/ndb/test/include/NdbRestarter.hpp
+++ b/ndb/test/include/NdbRestarter.hpp
@@ -62,6 +62,7 @@ public:
   int dumpStateAllNodes(int * _args, int _num_args);
 
   int getMasterNodeId();
+  int getRandomNodeSameNodeGroup(int nodeId, int randomNumber);
   int getRandomNodeOtherNodeGroup(int nodeId, int randomNumber);
   int getRandomNotMasterNodeId(int randomNumber);
   
diff --git a/ndb/test/ndbapi/testSystemRestart.cpp b/ndb/test/ndbapi/testSystemRestart.cpp
index 35016896495..30f7aca9b06 100644
--- a/ndb/test/ndbapi/testSystemRestart.cpp
+++ b/ndb/test/ndbapi/testSystemRestart.cpp
@@ -1051,6 +1051,52 @@ int runSystemRestart9(NDBT_Context* ctx, NDBT_Step* step){
   return result;
 }
 
+int runBug18385(NDBT_Context* ctx, NDBT_Step* step){
+  NdbRestarter restarter;
+  const Uint32 nodeCount = restarter.getNumDbNodes();
+  if(nodeCount < 2){
+    g_info << "Bug18385 - Needs atleast 2 nodes to test" << endl;
+    return NDBT_OK;
+  }
+
+  int node1 = restarter.getDbNodeId(rand() % nodeCount);
+  int node2 = restarter.getRandomNodeSameNodeGroup(node1, rand());
+
+  if (node1 == -1 || node2 == -1)
+    return NDBT_OK;
+  
+  int dump[] = { DumpStateOrd::DihSetTimeBetweenGcp, 300 };
+  
+  int result = NDBT_OK;
+  do {
+    CHECK(restarter.dumpStateAllNodes(dump, 2) == 0);
+    CHECK(restarter.restartOneDbNode(node1, false, true, false) == 0);
+    NdbSleep_SecSleep(3);
+    CHECK(restarter.restartAll(false, true, false) == 0);
+    
+    Uint32 cnt = 0;
+    int nodes[128];
+    for(Uint32 i = 0; i<nodeCount; i++)
+      if ((nodes[cnt] = restarter.getDbNodeId(i)) != node2)
+	cnt++;
+    
+    assert(cnt == nodeCount - 1);
+    
+    CHECK(restarter.startNodes(nodes, cnt) == 0);
+    CHECK(restarter.waitNodesStarted(nodes, cnt, 300) == 0);
+    
+    CHECK(restarter.insertErrorInNode(node2, 7170) == 0);
+    CHECK(restarter.waitNodesNoStart(&node2, 1) == 0);
+    CHECK(restarter.restartOneDbNode(node2, true, false, true) == 0);
+    CHECK(restarter.waitNodesStarted(&node2, 1) == 0);
+
+  } while(0);
+  
+  g_info << "Bug18385 finished" << endl;  
+  
+  return result;
+}
+
 int runWaitStarted(NDBT_Context* ctx, NDBT_Step* step){
 
   NdbRestarter restarter;
@@ -1234,6 +1280,13 @@ TESTCASE("SR9",
   STEP(runSystemRestart9);
   FINALIZER(runClearTable);
 }
+TESTCASE("Bug18385", 
+	 "Perform partition system restart with other nodes with higher GCI"){
+  INITIALIZER(runWaitStarted);
+  INITIALIZER(runClearTable);
+  STEP(runBug18385);
+  FINALIZER(runClearTable);
+}
 NDBT_TESTSUITE_END(testSystemRestart);
 
 int main(int argc, const char** argv){
diff --git a/ndb/test/run-test/daily-basic-tests.txt b/ndb/test/run-test/daily-basic-tests.txt
index 70518f7881d..0533d585a41 100644
--- a/ndb/test/run-test/daily-basic-tests.txt
+++ b/ndb/test/run-test/daily-basic-tests.txt
@@ -454,6 +454,10 @@ max-time: 500
 cmd: testNodeRestart
 args: -n Bug16772 T1
 
+max-time: 500
+cmd: testSystemRestart
+args: -n Bug18385 T1
+
 # OLD FLEX
 max-time: 500
 cmd: flexBench
diff --git a/ndb/test/src/NdbRestarter.cpp b/ndb/test/src/NdbRestarter.cpp
index 91c0963feae..2c16a05240d 100644
--- a/ndb/test/src/NdbRestarter.cpp
+++ b/ndb/test/src/NdbRestarter.cpp
@@ -174,6 +174,39 @@ NdbRestarter::getRandomNodeOtherNodeGroup(int nodeId, int rand){
   return -1;
 }
 
+int
+NdbRestarter::getRandomNodeSameNodeGroup(int nodeId, int rand){
+  if (!isConnected())
+    return -1;
+  
+  if (getStatus() != 0)
+    return -1;
+  
+  int node_group = -1;
+  for(size_t i = 0; i < ndbNodes.size(); i++){
+    if(ndbNodes[i].node_id == nodeId){
+      node_group = ndbNodes[i].node_group;
+      break;
+    }
+  }
+  if(node_group == -1){
+    return -1;
+  }
+
+  Uint32 counter = 0;
+  rand = rand % ndbNodes.size();
+  while(counter++ < ndbNodes.size() && 
+	(ndbNodes[rand].node_id == nodeId || 
+	 ndbNodes[rand].node_group != node_group))
+    rand = (rand + 1) % ndbNodes.size();
+  
+  if(ndbNodes[rand].node_group == node_group &&
+     ndbNodes[rand].node_id != nodeId)
+    return ndbNodes[rand].node_id;
+  
+  return -1;
+}
+
 int 
 NdbRestarter::waitClusterStarted(unsigned int _timeout){
   return waitClusterState(NDB_MGM_NODE_STATUS_STARTED, _timeout);

From 058019f66cdfefcdaa179f011c9c9a10ee0d10df Mon Sep 17 00:00:00 2001
From: unknown <jonas@perch.ndb.mysql.com>
Date: Tue, 21 Mar 2006 15:13:41 +0100
Subject: [PATCH 08/16] ndb - bug#18118   timeslice DUMP(7015)

ndb/include/kernel/signaldata/DumpStateOrd.hpp:
  doc...
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
  timeslice DUMP(7015)
---
 .../kernel/signaldata/DumpStateOrd.hpp        |   3 +
 ndb/src/kernel/blocks/dbdih/DbdihMain.cpp     | 126 ++++++++++--------
 2 files changed, 77 insertions(+), 52 deletions(-)

diff --git a/ndb/include/kernel/signaldata/DumpStateOrd.hpp b/ndb/include/kernel/signaldata/DumpStateOrd.hpp
index 2c824670cef..b42b930711c 100644
--- a/ndb/include/kernel/signaldata/DumpStateOrd.hpp
+++ b/ndb/include/kernel/signaldata/DumpStateOrd.hpp
@@ -126,6 +126,9 @@ public:
     DihAllAllowNodeStart = 7016,
     DihMinTimeBetweenLCP = 7017,
     DihMaxTimeBetweenLCP = 7018,
+    // 7019
+    // 7020
+    // 7021
     EnableUndoDelayDataWrite = 7080, // DIH+ACC+TUP
     DihSetTimeBetweenGcp = 7090,
     DihStartLcpImmediately = 7099,
diff --git a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
index eb4ae61a3e4..a8633af2529 100644
--- a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
+++ b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
@@ -5983,6 +5983,7 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId)
   execDUMP_STATE_ORD(signal);
 
   signal->theData[0] = 7015;
+  signal->theData[1] = 0;
   execDUMP_STATE_ORD(signal);
 
   c_lcpMasterTakeOverState.set(LMTOS_IDLE, __LINE__);
@@ -13036,7 +13037,8 @@ void
 Dbdih::execDUMP_STATE_ORD(Signal* signal)
 {
   DumpStateOrd * const & dumpState = (DumpStateOrd *)&signal->theData[0];
-  if (dumpState->args[0] == DumpStateOrd::DihDumpNodeRestartInfo) {
+  Uint32 arg = dumpState->args[0];
+  if (arg == DumpStateOrd::DihDumpNodeRestartInfo) {
     infoEvent("c_nodeStartMaster.blockLcp = %d, c_nodeStartMaster.blockGcp = %d, c_nodeStartMaster.wait = %d",
 	      c_nodeStartMaster.blockLcp, c_nodeStartMaster.blockGcp, c_nodeStartMaster.wait);
     infoEvent("cstartGcpNow = %d, cgcpStatus = %d",
@@ -13046,7 +13048,7 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal)
     infoEvent("cgcpOrderBlocked = %d, cgcpStartCounter = %d",
               cgcpOrderBlocked, cgcpStartCounter);
   }//if  
-  if (dumpState->args[0] == DumpStateOrd::DihDumpNodeStatusInfo) {
+  if (arg == DumpStateOrd::DihDumpNodeStatusInfo) {
     NodeRecordPtr localNodePtr;
     infoEvent("Printing nodeStatus of all nodes");
     for (localNodePtr.i = 1; localNodePtr.i < MAX_NDB_NODES; localNodePtr.i++) {
@@ -13058,7 +13060,7 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal)
     }//for
   }//if
   
-  if (dumpState->args[0] == DumpStateOrd::DihPrintFragmentation){
+  if (arg == DumpStateOrd::DihPrintFragmentation){
     infoEvent("Printing fragmentation of all tables --");
     for(Uint32 i = 0; i<ctabFileSize; i++){
       TabRecordPtr tabPtr;
@@ -13233,7 +13235,7 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal)
     }
   }
 
-  if(dumpState->args[0] == 7019 && signal->getLength() == 2)
+  if(arg == 7019 && signal->getLength() == 2)
   {
     char buf2[8+1];
     NodeRecordPtr nodePtr;
@@ -13251,7 +13253,7 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal)
 	      nodePtr.p->m_nodefailSteps.getText(buf2));
   }
   
-  if(dumpState->args[0] == 7020 && signal->getLength() > 3)
+  if(arg == 7020 && signal->getLength() > 3)
   {
     Uint32 gsn= signal->theData[1];
     Uint32 block= signal->theData[2];
@@ -13275,7 +13277,7 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal)
 			  gsn, getBlockName(block, "UNKNOWN"), length, buf);
   }
   
-  if(dumpState->args[0] == DumpStateOrd::DihDumpLCPState){
+  if(arg == DumpStateOrd::DihDumpLCPState){
     infoEvent("-- Node %d LCP STATE --", getOwnNodeId());
     infoEvent("lcpStatus = %d (update place = %d) ",
 	      c_lcpState.lcpStatus, c_lcpState.lcpStatusUpdatedPlace);
@@ -13291,7 +13293,7 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal)
     infoEvent("-- Node %d LCP STATE --", getOwnNodeId());
   }
 
-  if(dumpState->args[0] == DumpStateOrd::DihDumpLCPMasterTakeOver){
+  if(arg == DumpStateOrd::DihDumpLCPMasterTakeOver){
     infoEvent("-- Node %d LCP MASTER TAKE OVER STATE --", getOwnNodeId());
     infoEvent
       ("c_lcpMasterTakeOverState.state = %d updatePlace = %d failedNodeId = %d",
@@ -13306,52 +13308,25 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal)
     infoEvent("-- Node %d LCP MASTER TAKE OVER STATE --", getOwnNodeId());
   }
 
-  if (signal->theData[0] == 7015){
-    for(Uint32 i = 0; i<ctabFileSize; i++){
-      TabRecordPtr tabPtr;
-      tabPtr.i = i;
-      ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
-      
-      if(tabPtr.p->tabStatus != TabRecord::TS_ACTIVE)
-	continue;
-      
-      infoEvent
-	("Table %d: TabCopyStatus: %d TabUpdateStatus: %d TabLcpStatus: %d",
-	 tabPtr.i, 
-	 tabPtr.p->tabCopyStatus, 
-	 tabPtr.p->tabUpdateState,
-	 tabPtr.p->tabLcpStatus);
+  if (signal->theData[0] == 7015)
+  {
+    if (signal->getLength() == 1)
+    {
+      signal->theData[1] = 0;
+    }
 
-      FragmentstorePtr fragPtr;
-      for (Uint32 fid = 0; fid < tabPtr.p->totalfragments; fid++) {
-	jam();
-	getFragstore(tabPtr.p, fid, fragPtr);
-	
-	char buf[100], buf2[100];
-	BaseString::snprintf(buf, sizeof(buf), " Fragment %d: noLcpReplicas==%d ", 
-		 fid, fragPtr.p->noLcpReplicas);
-	
-	Uint32 num=0;
-	ReplicaRecordPtr replicaPtr;
-	replicaPtr.i = fragPtr.p->storedReplicas;
-	do {
-	  ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
-	  BaseString::snprintf(buf2, sizeof(buf2), "%s %d(on %d)=%d(%s)",
-		   buf, num, 
-		   replicaPtr.p->procNode, 
-		   replicaPtr.p->lcpIdStarted,
-		   replicaPtr.p->lcpOngoingFlag ? "Ongoing" : "Idle");
-	  BaseString::snprintf(buf, sizeof(buf), "%s", buf2);
-	  
-	  num++;
-	  replicaPtr.i = replicaPtr.p->nextReplica;
-	} while (replicaPtr.i != RNIL);
-	infoEvent(buf);
-      }
+    Uint32 tableId = signal->theData[1];
+    if (tableId < ctabFileSize)
+    {
+      signal->theData[0] = 7021;
+      execDUMP_STATE_ORD(signal);
+      signal->theData[0] = 7015;
+      signal->theData[1] = tableId + 1;
+      sendSignal(reference(), GSN_DUMP_STATE_ORD, signal, 2, JBB);
     }
   }
 
-  if(dumpState->args[0] == DumpStateOrd::EnableUndoDelayDataWrite){
+  if(arg == DumpStateOrd::EnableUndoDelayDataWrite){
     ndbout << "Dbdih:: delay write of datapages for table = " 
 	   << dumpState->args[1]<< endl;
     // Send this dump to ACC and TUP
@@ -13381,7 +13356,7 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal)
     return;
   }
   
-  if(dumpState->args[0] == 7098){
+  if(arg == 7098){
     if(signal->length() == 3){
       jam();
       infoEvent("startLcpRoundLoopLab(tabel=%d, fragment=%d)",
@@ -13394,12 +13369,12 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal)
     }
   }
 
-  if(dumpState->args[0] == DumpStateOrd::DihStartLcpImmediately){
+  if(arg == DumpStateOrd::DihStartLcpImmediately){
     c_lcpState.ctimer += (1 << c_lcpState.clcpDelay);
     return;
   }
 
-  if (dumpState->args[0] == DumpStateOrd::DihSetTimeBetweenGcp)
+  if (arg == DumpStateOrd::DihSetTimeBetweenGcp)
   {
     if (signal->getLength() == 1)
     {
@@ -13414,6 +13389,53 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal)
     }
     ndbout_c("Setting time between gcp : %d", cgcpDelay);
   }
+
+  if (arg == 7021 && signal->getLength() == 2)
+  {
+    TabRecordPtr tabPtr;
+    tabPtr.i = signal->theData[1];
+    if (tabPtr.i >= ctabFileSize)
+      return;
+
+    ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
+    
+    if(tabPtr.p->tabStatus != TabRecord::TS_ACTIVE)
+      return;
+    
+    infoEvent
+      ("Table %d: TabCopyStatus: %d TabUpdateStatus: %d TabLcpStatus: %d",
+       tabPtr.i, 
+       tabPtr.p->tabCopyStatus, 
+       tabPtr.p->tabUpdateState,
+       tabPtr.p->tabLcpStatus);
+    
+    FragmentstorePtr fragPtr;
+    for (Uint32 fid = 0; fid < tabPtr.p->totalfragments; fid++) {
+      jam();
+      getFragstore(tabPtr.p, fid, fragPtr);
+      
+      char buf[100], buf2[100];
+      BaseString::snprintf(buf, sizeof(buf), " Fragment %d: noLcpReplicas==%d ", 
+			   fid, fragPtr.p->noLcpReplicas);
+      
+      Uint32 num=0;
+      ReplicaRecordPtr replicaPtr;
+      replicaPtr.i = fragPtr.p->storedReplicas;
+      do {
+	ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
+	BaseString::snprintf(buf2, sizeof(buf2), "%s %d(on %d)=%d(%s)",
+			     buf, num, 
+			     replicaPtr.p->procNode, 
+			     replicaPtr.p->lcpIdStarted,
+			     replicaPtr.p->lcpOngoingFlag ? "Ongoing" : "Idle");
+	BaseString::snprintf(buf, sizeof(buf), "%s", buf2);
+	
+	num++;
+	replicaPtr.i = replicaPtr.p->nextReplica;
+      } while (replicaPtr.i != RNIL);
+      infoEvent(buf);
+    }
+  }
 }//Dbdih::execDUMP_STATE_ORD()
 
 void

From 19340f2242443ec54101d7fd518be47211ed0f15 Mon Sep 17 00:00:00 2001
From: unknown <jonas@perch.ndb.mysql.com>
Date: Wed, 22 Mar 2006 11:44:31 +0100
Subject: [PATCH 09/16] ndb - bug#18414   Fix timeout during ABORT when
 ZABORT_TIMEOUT_BREAK is outstanding

ndb/src/kernel/blocks/ERROR_codes.txt:
  New error code
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
  remove dumping of LCP info during NF
ndb/src/kernel/blocks/dbtc/DbtcMain.cpp:
  Fix timeout during ABORT when ZABORT_TIMEOUT_BREAK is outstanding
ndb/test/ndbapi/testNodeRestart.cpp:
  Add testcase for bug18414
ndb/test/ndbapi/testTimeout.cpp:
  Fix error code checking
ndb/test/run-test/daily-basic-tests.txt:
  Add testcase for bug18414
---
 ndb/src/kernel/blocks/ERROR_codes.txt     |  2 +
 ndb/src/kernel/blocks/dbdih/DbdihMain.cpp |  4 --
 ndb/src/kernel/blocks/dbtc/DbtcMain.cpp   | 52 +++++++++++++---
 ndb/test/ndbapi/testNodeRestart.cpp       | 73 +++++++++++++++++++++++
 ndb/test/ndbapi/testTimeout.cpp           |  7 ++-
 ndb/test/run-test/daily-basic-tests.txt   |  4 ++
 6 files changed, 128 insertions(+), 14 deletions(-)

diff --git a/ndb/src/kernel/blocks/ERROR_codes.txt b/ndb/src/kernel/blocks/ERROR_codes.txt
index e5576450846..b4c5d1b1d7e 100644
--- a/ndb/src/kernel/blocks/ERROR_codes.txt
+++ b/ndb/src/kernel/blocks/ERROR_codes.txt
@@ -226,6 +226,8 @@ Delay execution of COMPLETECONF signal 2 seconds to generate time-out.
 8045: (ABORTCONF only as part of take-over)
 Delay execution of ABORTCONF signal 2 seconds to generate time-out.
 
+8050: Send ZABORT_TIMEOUT_BREAK delayed
+
 ERROR CODES FOR TESTING TIME-OUT HANDLING IN DBTC
 -------------------------------------------------
 
diff --git a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
index a8633af2529..de35ce5c275 100644
--- a/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
+++ b/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
@@ -5982,10 +5982,6 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId)
   signal->theData[0] = 7012;
   execDUMP_STATE_ORD(signal);
 
-  signal->theData[0] = 7015;
-  signal->theData[1] = 0;
-  execDUMP_STATE_ORD(signal);
-
   c_lcpMasterTakeOverState.set(LMTOS_IDLE, __LINE__);
 
   checkLocalNodefailComplete(signal, failedNodePtr.i, NF_LCP_TAKE_OVER);
diff --git a/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp b/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
index ff9b279592c..4ca13bf433b 100644
--- a/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
+++ b/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
@@ -6386,6 +6386,7 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck)
     return;
   }
   
+  bool found = false;
   OperationState tmp[16];
   
   Uint32 TloopCount = 0;
@@ -6393,7 +6394,31 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck)
     jam();
     if (tcConnectptr.i == RNIL) {
       jam();
-      if (Tcheck == 0) {
+
+#ifdef VM_TRACE
+      ndbout_c("found: %d Tcheck: %d apiConnectptr.p->counter: %d",
+	       found, Tcheck, apiConnectptr.p->counter);
+#endif
+      if (found || apiConnectptr.p->counter)
+      {
+	jam();
+	/**
+	 * We sent atleast one ABORT/ABORTED
+	 *   or ZABORT_TIMEOUT_BREAK is in job buffer
+	 *   wait for reception...
+	 */
+	return;
+      }
+      
+      if (Tcheck == 1)
+      {
+	jam();
+	releaseAbortResources(signal);
+	return;
+      }
+      
+      if (Tcheck == 0)
+      {
         jam();
 	/*------------------------------------------------------------------
 	 * All nodes had already reported ABORTED for all tcConnect records.
@@ -6402,9 +6427,11 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck)
 	 *------------------------------------------------------------------*/
 	char buf[96]; buf[0] = 0;
 	char buf2[96];
-	BaseString::snprintf(buf, sizeof(buf), "TC %d: %d ops:",
-		 __LINE__, apiConnectptr.i);
-	for(Uint32 i = 0; i<TloopCount; i++){
+	BaseString::snprintf(buf, sizeof(buf), "TC %d: %d counter: %d ops:",
+			     __LINE__, apiConnectptr.i,
+			     apiConnectptr.p->counter);
+	for(Uint32 i = 0; i<TloopCount; i++)
+	{
 	  BaseString::snprintf(buf2, sizeof(buf2), "%s %d", buf, tmp[i]);
 	  BaseString::snprintf(buf, sizeof(buf), buf2);
 	}
@@ -6412,7 +6439,9 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck)
 	ndbout_c(buf);
 	ndbrequire(false);
 	releaseAbortResources(signal);
+	return;
       }
+      
       return;
     }//if
     TloopCount++;
@@ -6427,7 +6456,16 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck)
       signal->theData[0] = TcContinueB::ZABORT_TIMEOUT_BREAK;
       signal->theData[1] = tcConnectptr.i;
       signal->theData[2] = apiConnectptr.i;      
-      sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB);
+      if (ERROR_INSERTED(8050))
+      {
+	ndbout_c("sending ZABORT_TIMEOUT_BREAK delayed (%d %d)", 
+		 Tcheck, apiConnectptr.p->counter);
+	sendSignalWithDelay(cownref, GSN_CONTINUEB, signal, 2000, 3);
+      }
+      else
+      {
+	sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB);
+      }
       return;
     }//if
     ptrCheckGuard(tcConnectptr, ctcConnectFilesize, tcConnectRecord);
@@ -6450,7 +6488,7 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck)
         jam();
         if (tcConnectptr.p->tcNodedata[Ti] != 0) {
           TloopCount += 31;
-          Tcheck = 1;
+	  found = true;
           hostptr.i = tcConnectptr.p->tcNodedata[Ti];
           ptrCheckGuard(hostptr, chostFilesize, hostRecord);
           if (hostptr.p->hostStatus == HS_ALIVE) {
@@ -7007,8 +7045,6 @@ void Dbtc::execTAKE_OVERTCCONF(Signal* signal)
   hostptr.i = tfailedNodeId;
   ptrCheckGuard(hostptr, chostFilesize, hostRecord);
 
-  ndbout_c("received execTAKE_OVERTCCONF(%d) from %x (%x)",
-	   tfailedNodeId, signal->getSendersBlockRef(), reference());
   if (signal->getSendersBlockRef() != reference())
   {
     jam();
diff --git a/ndb/test/ndbapi/testNodeRestart.cpp b/ndb/test/ndbapi/testNodeRestart.cpp
index eebd631af94..cc2998ff73a 100644
--- a/ndb/test/ndbapi/testNodeRestart.cpp
+++ b/ndb/test/ndbapi/testNodeRestart.cpp
@@ -581,6 +581,73 @@ runBug16772(NDBT_Context* ctx, NDBT_Step* step){
   return ret ? NDBT_OK : NDBT_FAILED;
 }
 
+int 
+runBug18414(NDBT_Context* ctx, NDBT_Step* step){
+
+  NdbRestarter restarter;
+  if (restarter.getNumDbNodes() < 2)
+  {
+    ctx->stopTest();
+    return NDBT_OK;
+  }
+
+  Ndb* pNdb = GETNDB(step);
+  HugoOperations hugoOps(*ctx->getTab());
+  HugoTransactions hugoTrans(*ctx->getTab());
+  int loop = 0;
+  do 
+  {
+    if(hugoOps.startTransaction(pNdb) != 0)
+      goto err;
+    
+    if(hugoOps.pkUpdateRecord(pNdb, 0, 128, rand()) != 0)
+      goto err;
+    
+    if(hugoOps.execute_NoCommit(pNdb) != 0)
+      goto err;
+
+    int node1 = hugoOps.getTransaction()->getConnectedNodeId();
+    int node2 = restarter.getRandomNodeSameNodeGroup(node1, rand());
+    
+    if (node1 == -1 || node2 == -1)
+      break;
+    
+    if (loop & 1)
+    {
+      if (restarter.insertErrorInNode(node1, 8050))
+	goto err;
+    }
+    
+    if (restarter.insertErrorInNode(node2, 5003))
+      goto err;
+    
+    int res= hugoOps.execute_Rollback(pNdb);
+  
+    if (restarter.waitNodesNoStart(&node2, 1) != 0)
+      goto err;
+    
+    if (restarter.insertErrorInAllNodes(0))
+      goto err;
+    
+    if (restarter.startNodes(&node2, 1) != 0)
+      goto err;
+    
+    if (restarter.waitClusterStarted() != 0)
+      goto err;
+    
+    if (hugoTrans.scanUpdateRecords(pNdb, 128) != 0)
+      goto err;
+
+    hugoOps.closeTransaction(pNdb);
+    
+  } while(++loop < 5);
+  
+  return NDBT_OK;
+  
+err:
+  hugoOps.closeTransaction(pNdb);
+  return NDBT_FAILED;    
+}
 
 NDBT_TESTSUITE(testNodeRestart);
 TESTCASE("NoLoad", 
@@ -870,6 +937,12 @@ TESTCASE("Bug16772",
 	 "Test bug with restarting before NF handling is complete"){
   STEP(runBug16772);
 }
+TESTCASE("Bug18414",
+	 "Test bug with NF during NR"){
+  INITIALIZER(runLoadTable);
+  STEP(runBug18414);
+  FINALIZER(runClearTable);
+}
 NDBT_TESTSUITE_END(testNodeRestart);
 
 int main(int argc, const char** argv){
diff --git a/ndb/test/ndbapi/testTimeout.cpp b/ndb/test/ndbapi/testTimeout.cpp
index 25392698642..957fcd1d1e7 100644
--- a/ndb/test/ndbapi/testTimeout.cpp
+++ b/ndb/test/ndbapi/testTimeout.cpp
@@ -173,8 +173,11 @@ int runTimeoutTrans(NDBT_Context* ctx, NDBT_Step* step){
       NdbSleep_MilliSleep(sleep);
       
       // Expect that transaction has timed-out
-      CHECK(hugoOps.execute_Commit(pNdb) == 237); 
-
+      int ret = hugoOps.execute_Commit(pNdb);
+      CHECK(ret != 0);
+      NdbError err = pNdb->getNdbError(ret);
+      CHECK(err.classification == NdbError::TimeoutExpired);
+      
     } while(false);
 
     hugoOps.closeTransaction(pNdb);
diff --git a/ndb/test/run-test/daily-basic-tests.txt b/ndb/test/run-test/daily-basic-tests.txt
index 0533d585a41..b11e4479a57 100644
--- a/ndb/test/run-test/daily-basic-tests.txt
+++ b/ndb/test/run-test/daily-basic-tests.txt
@@ -458,6 +458,10 @@ max-time: 500
 cmd: testSystemRestart
 args: -n Bug18385 T1
 
+max-time: 500
+cmd: testNodeRestart
+args: -n Bug18414 T1
+
 # OLD FLEX
 max-time: 500
 cmd: flexBench

From ad911e8575e84fb336143b5463711ba8dfc7690b Mon Sep 17 00:00:00 2001
From: unknown <jonas@perch.ndb.mysql.com>
Date: Wed, 22 Mar 2006 12:11:51 +0100
Subject: [PATCH 10/16] ndb -   minor update to ndb-autotest.sh and config
 files

ndb/test/run-test/conf-daily-devel-ndbmaster.txt:
  Add SendBufferMemory to remove rare overruns
ndb/test/run-test/conf-dl145a.txt:
  Add SendBufferMemory to remove rare overruns
ndb/test/run-test/conf-ndbmaster.txt:
  Add SendBufferMemory to remove rare overruns
ndb/test/run-test/conf-shark.txt:
  Add SendBufferMemory to remove rare overruns
ndb/test/run-test/ndb-autotest.sh:
  Add support for conf per host
---
 ndb/test/run-test/conf-daily-devel-ndbmaster.txt               | 3 +++
 .../run-test/{conf-daily-basic-dl145a.txt => conf-dl145a.txt}  | 3 +++
 .../{conf-daily-basic-ndbmaster.txt => conf-ndbmaster.txt}     | 3 +++
 .../run-test/{conf-daily-basic-shark.txt => conf-shark.txt}    | 3 +++
 ndb/test/run-test/ndb-autotest.sh                              | 3 +++
 5 files changed, 15 insertions(+)
 rename ndb/test/run-test/{conf-daily-basic-dl145a.txt => conf-dl145a.txt} (91%)
 rename ndb/test/run-test/{conf-daily-basic-ndbmaster.txt => conf-ndbmaster.txt} (91%)
 rename ndb/test/run-test/{conf-daily-basic-shark.txt => conf-shark.txt} (91%)

diff --git a/ndb/test/run-test/conf-daily-devel-ndbmaster.txt b/ndb/test/run-test/conf-daily-devel-ndbmaster.txt
index 8b340e6a39d..51c171a6357 100644
--- a/ndb/test/run-test/conf-daily-devel-ndbmaster.txt
+++ b/ndb/test/run-test/conf-daily-devel-ndbmaster.txt
@@ -17,3 +17,6 @@ FileSystemPath: /space/autotest/run
 PortNumber: 16000
 ArbitrationRank: 1
 DataDir: .
+
+[TCP DEFAULT]
+SendBufferMemory: 2M
diff --git a/ndb/test/run-test/conf-daily-basic-dl145a.txt b/ndb/test/run-test/conf-dl145a.txt
similarity index 91%
rename from ndb/test/run-test/conf-daily-basic-dl145a.txt
rename to ndb/test/run-test/conf-dl145a.txt
index d8cf8d34d82..d0a240f09d1 100644
--- a/ndb/test/run-test/conf-daily-basic-dl145a.txt
+++ b/ndb/test/run-test/conf-dl145a.txt
@@ -17,3 +17,6 @@ FileSystemPath: /home/ndbdev/autotest/run
 PortNumber: 14000
 ArbitrationRank: 1
 DataDir: .
+
+[TCP DEFAULT]
+SendBufferMemory: 2M
diff --git a/ndb/test/run-test/conf-daily-basic-ndbmaster.txt b/ndb/test/run-test/conf-ndbmaster.txt
similarity index 91%
rename from ndb/test/run-test/conf-daily-basic-ndbmaster.txt
rename to ndb/test/run-test/conf-ndbmaster.txt
index bcd809593f3..89b41850ec0 100644
--- a/ndb/test/run-test/conf-daily-basic-ndbmaster.txt
+++ b/ndb/test/run-test/conf-ndbmaster.txt
@@ -17,3 +17,6 @@ FileSystemPath: /space/autotest/run
 PortNumber: 14000
 ArbitrationRank: 1
 DataDir: .
+
+[TCP DEFAULT]
+SendBufferMemory: 2M
diff --git a/ndb/test/run-test/conf-daily-basic-shark.txt b/ndb/test/run-test/conf-shark.txt
similarity index 91%
rename from ndb/test/run-test/conf-daily-basic-shark.txt
rename to ndb/test/run-test/conf-shark.txt
index 6d1f8b64f44..d66d0280d8a 100644
--- a/ndb/test/run-test/conf-daily-basic-shark.txt
+++ b/ndb/test/run-test/conf-shark.txt
@@ -17,3 +17,6 @@ FileSystemPath: /space/autotest/run
 PortNumber: 14000
 ArbitrationRank: 1
 DataDir: .
+
+[TCP DEFAULT]
+SendBufferMemory: 2M
diff --git a/ndb/test/run-test/ndb-autotest.sh b/ndb/test/run-test/ndb-autotest.sh
index 4228d2354d3..459f0cd6233 100755
--- a/ndb/test/run-test/ndb-autotest.sh
+++ b/ndb/test/run-test/ndb-autotest.sh
@@ -299,9 +299,12 @@ choose_conf(){
     elif [ -f $test_dir/conf-$1.txt ]
     then
 	echo "$test_dir/conf-$1.txt"
+    elif [ -f $test_dir/conf-$HOST.txt ]
+	echo "$test_dir/conf-$HOST.txt"
     else
 	echo "Unable to find conf file looked for" 1>&2
 	echo "$test_dir/conf-$1-$HOST.txt and" 1>&2
+	echo "$test_dir/conf-$HOST.txt" 1>&2
 	echo "$test_dir/conf-$1.txt" 1>&2
 	exit
     fi

From 4fb98ee6b87a63374381788e2c70bc17e61bd455 Mon Sep 17 00:00:00 2001
From: unknown <jonas@perch.ndb.mysql.com>
Date: Wed, 22 Mar 2006 12:18:07 +0100
Subject: [PATCH 11/16] ndb -   some more ndb-autotest updates (previously
 uncommitted...but in use)

ndb/test/run-test/ndb-autotest.sh:
  More autotest updates
---
 ndb/test/run-test/ndb-autotest.sh | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/ndb/test/run-test/ndb-autotest.sh b/ndb/test/run-test/ndb-autotest.sh
index 459f0cd6233..544897a2aa2 100755
--- a/ndb/test/run-test/ndb-autotest.sh
+++ b/ndb/test/run-test/ndb-autotest.sh
@@ -13,7 +13,7 @@ save_args=$*
 VERSION="ndb-autotest.sh version 1.04"
 
 DATE=`date '+%Y-%m-%d'`
-HOST=`hostname`
+HOST=`hostname -s`
 export DATE HOST
 
 set -e
@@ -35,6 +35,7 @@ report=yes
 clone=5.0-ndb
 RUN="daily-basic daily-devel"
 conf=autotest.conf
+LOCK=$HOME/.autotest-lock
 
 ############################
 # Read command line entries#
@@ -66,7 +67,7 @@ done
 
 if [ -f $conf ]
 then
-	. ./$conf
+	. $conf
 else
 	echo "Can't find config file: $conf"
 	exit
@@ -105,7 +106,6 @@ fi
 # Setup the clone source location  #
 ####################################
 
-LOCK=$HOME/.autotest-lock
 src_clone=$src_clone_base-$clone
 
 #######################################
@@ -389,7 +389,8 @@ do
                        awk '{for(i=1;i<='$count';i++)print $i;}'`
 	    echo $run_hosts >> /tmp/filter_hosts.$$	
 	
-	    choose $conf $run_hosts > d.tmp
+	    choose $conf $run_hosts > d.tmp.$$
+            sed -e s,CHOOSE_dir,"$install_dir",g < d.tmp.$$ > d.tmp
 	    $mkconfig d.tmp
 	fi
 	

From e74b313c115b6eec1e96a33e16d117f33c788ce8 Mon Sep 17 00:00:00 2001
From: unknown <jonas@perch.ndb.mysql.com>
Date: Wed, 22 Mar 2006 13:38:03 +0100
Subject: [PATCH 12/16] ndb - autotest   Update makefile for removed files

ndb/test/run-test/Makefile.am:
  Update makefile for removed files
---
 ndb/test/run-test/Makefile.am | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/ndb/test/run-test/Makefile.am b/ndb/test/run-test/Makefile.am
index cf08542ae97..8aced6e91b3 100644
--- a/ndb/test/run-test/Makefile.am
+++ b/ndb/test/run-test/Makefile.am
@@ -7,11 +7,10 @@ include $(top_srcdir)/ndb/config/type_mgmapiclient.mk.am
 
 test_PROGRAMS = atrt
 test_DATA=daily-basic-tests.txt daily-devel-tests.txt \
-          conf-daily-basic-ndbmaster.txt \
-          conf-daily-basic-shark.txt \
-          conf-daily-devel-ndbmaster.txt \
-          conf-daily-sql-ndbmaster.txt \
-          conf-daily-basic-dl145a.txt
+          conf-ndbmaster.txt \
+          conf-shark.txt \
+          conf-dl145a.txt
+
 test_SCRIPTS=atrt-analyze-result.sh atrt-gather-result.sh atrt-setup.sh \
           atrt-clear-result.sh make-config.sh make-index.sh make-html-reports.sh
 

From 2279f08af421311fb7b22474942dc7fe2cfd3bc6 Mon Sep 17 00:00:00 2001
From: unknown <jonas@perch.ndb.mysql.com>
Date: Wed, 22 Mar 2006 15:06:44 +0100
Subject: [PATCH 13/16] ndb -   Add per partition info (optionally to ndb_desc)

ndb/tools/desc.cpp:
  Add per partition info (optionally to ndb_desc)
---
 ndb/tools/desc.cpp | 77 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)

diff --git a/ndb/tools/desc.cpp b/ndb/tools/desc.cpp
index aac47c9042c..e5371b9b458 100644
--- a/ndb/tools/desc.cpp
+++ b/ndb/tools/desc.cpp
@@ -23,6 +23,7 @@ NDB_STD_OPTS_VARS;
 
 static const char* _dbname = "TEST_DB";
 static int _unqualified = 0;
+static int _partinfo = 0;
 static struct my_option my_long_options[] =
 {
   NDB_STD_OPTS("ndb_desc"),
@@ -32,6 +33,9 @@ static struct my_option my_long_options[] =
   { "unqualified", 'u', "Use unqualified table names",
     (gptr*) &_unqualified, (gptr*) &_unqualified, 0,
     GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 }, 
+  { "extra-partition-info", 'p', "Print more info per partition",
+    (gptr*) &_partinfo, (gptr*) &_partinfo, 0,
+    GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 }, 
   { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
 };
 static void usage()
@@ -52,6 +56,8 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
 				"d:t:O,/tmp/ndb_desc.trace");
 }
 
+static void print_part_info(Ndb* pNdb, NDBT_Table* pTab);
+
 int main(int argc, char** argv){
   NDB_INIT(argv[0]);
   const char *load_default_groups[]= { "mysql_cluster",0 };
@@ -106,7 +112,11 @@ int main(int argc, char** argv){
 	  
 	ndbout << (*pIdx) << endl;
       }
+
       ndbout << endl;
+      
+      if (_partinfo)
+	print_part_info(pMyNdb, pTab);
     }
     else
       ndbout << argv[i] << ": " << dict->getNdbError() << endl;
@@ -115,3 +125,70 @@ int main(int argc, char** argv){
   delete pMyNdb;
   return NDBT_ProgramExit(NDBT_OK);
 }
+
+struct InfoInfo
+{
+  const char * m_title;
+  NdbRecAttr* m_rec_attr;
+  const NdbDictionary::Column* m_column;
+};
+
+
+static 
+void print_part_info(Ndb* pNdb, NDBT_Table* pTab)
+{
+  InfoInfo g_part_info[] = {
+    { "Partition", 0, NdbDictionary::Column::FRAGMENT },
+    { "Row count", 0, NdbDictionary::Column::ROW_COUNT },
+    { "Commit count", 0, NdbDictionary::Column::COMMIT_COUNT },
+    { 0, 0, 0 }
+  };
+
+  ndbout << "-- Per partition info -- " << endl;
+  
+  NdbConnection* pTrans = pNdb->startTransaction();
+  if (pTrans == 0)
+    return;
+  
+  do
+  {
+    NdbScanOperation* pOp= pTrans->getNdbScanOperation(pTab->getName());
+    if (pOp == NULL)
+      break;
+    
+    NdbResultSet* rs= pOp->readTuples(NdbOperation::LM_CommittedRead); 
+    if (rs == 0)
+      break;
+    
+    if (pOp->interpret_exit_last_row() != 0)
+      break;
+    
+    Uint32 i = 0;
+    for(i = 0; g_part_info[i].m_title != 0; i++)
+    {
+      if ((g_part_info[i].m_rec_attr = pOp->getValue(g_part_info[i].m_column)) == 0)
+	break;
+    }
+
+    if (g_part_info[i].m_title != 0)
+      break;
+
+    if (pTrans->execute(NoCommit) != 0)
+      break;
+	
+    for (i = 0; g_part_info[i].m_title != 0; i++)
+      ndbout << g_part_info[i].m_title << "\t";
+    ndbout << endl;
+    
+    while(rs->nextResult() == 0)
+    {
+      for(i = 0; g_part_info[i].m_title != 0; i++)
+      {
+	ndbout << *g_part_info[i].m_rec_attr << "\t";
+      }
+      ndbout << endl;
+    }
+  } while(0);
+  
+  pTrans->close();
+}

From fde02a804367149ccd24718044ca3de82cc30de5 Mon Sep 17 00:00:00 2001
From: unknown <jonas@perch.ndb.mysql.com>
Date: Thu, 23 Mar 2006 11:53:54 +0100
Subject: [PATCH 14/16] ndb -   minor fixes in test programs

ndb/src/kernel/blocks/ERROR_codes.txt:
  Fix conflicting error codes
ndb/src/kernel/blocks/dblqh/DblqhMain.cpp:
  Fix conflicting error codes
ndb/test/ndbapi/testNodeRestart.cpp:
  Fix test program
---
 ndb/src/kernel/blocks/ERROR_codes.txt     | 2 +-
 ndb/src/kernel/blocks/dblqh/DblqhMain.cpp | 2 +-
 ndb/test/ndbapi/testNodeRestart.cpp       | 8 ++++++++
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/ndb/src/kernel/blocks/ERROR_codes.txt b/ndb/src/kernel/blocks/ERROR_codes.txt
index b4c5d1b1d7e..4887b6a7ea5 100644
--- a/ndb/src/kernel/blocks/ERROR_codes.txt
+++ b/ndb/src/kernel/blocks/ERROR_codes.txt
@@ -316,7 +316,7 @@ LQH:
 5026  Crash when receiving COPY_ACTIVEREQ
 5027  Crash when receiving STAT_RECREQ
 
-5042  Crash starting node, when scan is finished on primary replica
+5043  Crash starting node, when scan is finished on primary replica
 
 Test Crashes in handling take over
 ----------------------------------
diff --git a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
index 0aeeaccd55e..3540fc79dff 100644
--- a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
+++ b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
@@ -9221,7 +9221,7 @@ void Dblqh::nextScanConfCopyLab(Signal* signal)
 /*---------------------------------------------------------------------------*/
     scanptr.p->scanCompletedStatus = ZTRUE;
     scanptr.p->scanState = ScanRecord::WAIT_LQHKEY_COPY;
-    if (ERROR_INSERTED(5042))
+    if (ERROR_INSERTED(5043))
     {
       CLEAR_ERROR_INSERT_VALUE;
       tcConnectptr.p->copyCountWords = ~0;
diff --git a/ndb/test/ndbapi/testNodeRestart.cpp b/ndb/test/ndbapi/testNodeRestart.cpp
index cc2998ff73a..5a7510be9bd 100644
--- a/ndb/test/ndbapi/testNodeRestart.cpp
+++ b/ndb/test/ndbapi/testNodeRestart.cpp
@@ -439,6 +439,14 @@ int runBug15587(NDBT_Context* ctx, NDBT_Step* step){
   if (restarter.startNodes(&nodeId, 1))
     return NDBT_FAILED;
 
+  restarter.waitNodesStartPhase(&nodeId, 1, 3);
+  
+  if (restarter.waitNodesNoStart(&nodeId, 1))
+    return NDBT_FAILED; 
+   
+  if (restarter.startNodes(&nodeId, 1))
+    return NDBT_FAILED;
+  
   if (restarter.waitNodesStarted(&nodeId, 1))
     return NDBT_FAILED;
   

From deb4d310909b8589368adf561a2663007dde5cbe Mon Sep 17 00:00:00 2001
From: unknown <jonas@perch.ndb.mysql.com>
Date: Thu, 23 Mar 2006 15:33:40 +0100
Subject: [PATCH 15/16] ndb -   remove bug#18385 from autotest as it only works
 on 2 node clusters

ndb/test/run-test/daily-basic-tests.txt:
  remove bug#18385 as it only works on 2 node clusters
---
 ndb/test/run-test/daily-basic-tests.txt | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/ndb/test/run-test/daily-basic-tests.txt b/ndb/test/run-test/daily-basic-tests.txt
index b11e4479a57..ce5462d11c9 100644
--- a/ndb/test/run-test/daily-basic-tests.txt
+++ b/ndb/test/run-test/daily-basic-tests.txt
@@ -454,10 +454,10 @@ max-time: 500
 cmd: testNodeRestart
 args: -n Bug16772 T1
 
-max-time: 500
-cmd: testSystemRestart
-args: -n Bug18385 T1
-
+#max-time: 500
+#cmd: testSystemRestart
+#args: -n Bug18385 T1
+#
 max-time: 500
 cmd: testNodeRestart
 args: -n Bug18414 T1

From 2a00c51673dd11230daa9d3843b8076a15e1f874 Mon Sep 17 00:00:00 2001
From: unknown <jonas@perch.ndb.mysql.com>
Date: Mon, 27 Mar 2006 10:18:48 +0200
Subject: [PATCH 16/16] ndb - autotest   Change semantic on
 DumpStateOrd::CmvmiSetRestartOnErrorInsert()     Called wo/ args it resets to
 value in configuration (previously it set to 1 if called wo/ args)

ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp:
  Change semantic on DumpStateOrd::CmvmiSetRestartOnErrorInsert
    Called wo/ args it resets to value in configuration (previously it set to 1 if called wo/ args)
ndb/test/ndbapi/testNodeRestart.cpp:
  Change semantic on DumpStateOrd::CmvmiSetRestartOnErrorInsert
    Called wo/ args it resets to value in configuration (previously it set to 1 if called wo/ args)
ndb/test/src/NdbBackup.cpp:
  Change semantic on DumpStateOrd::CmvmiSetRestartOnErrorInsert
    Called wo/ args it resets to value in configuration (previously it set to 1 if called wo/ args)
ndb/test/src/NdbRestarts.cpp:
  Change semantic on DumpStateOrd::CmvmiSetRestartOnErrorInsert
    Called wo/ args it resets to value in configuration (previously it set to 1 if called wo/ args)
---
 ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp | 17 +++++++++++++++--
 ndb/test/ndbapi/testNodeRestart.cpp   | 12 ++++++++++++
 ndb/test/src/NdbBackup.cpp            |  4 ++--
 ndb/test/src/NdbRestarts.cpp          | 16 ++++++++--------
 4 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp b/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp
index 7659ee1145d..04761cb67a8 100644
--- a/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp
+++ b/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp
@@ -1049,11 +1049,24 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
 	      g_sectionSegmentPool.getNoOfFree());
   }
   
-  if (dumpState->args[0] == DumpStateOrd::CmvmiSetRestartOnErrorInsert){
+  if (dumpState->args[0] == DumpStateOrd::CmvmiSetRestartOnErrorInsert)
+  {
     if(signal->getLength() == 1)
-      theConfig.setRestartOnErrorInsert((int)NRT_NoStart_Restart);
+    {
+      Uint32 val = (Uint32)NRT_NoStart_Restart;
+      const ndb_mgm_configuration_iterator * p = 
+	theConfig.getOwnConfigIterator();
+      ndbrequire(p != 0);
+      
+      if(!ndb_mgm_get_int_parameter(p, CFG_DB_STOP_ON_ERROR_INSERT, &val))
+      {
+	theConfig.setRestartOnErrorInsert(val);
+      }
+    }
     else
+    {
       theConfig.setRestartOnErrorInsert(signal->theData[1]);
+    }
   }
 
   if (dumpState->args[0] == DumpStateOrd::CmvmiTestLongSigWithDelay) {
diff --git a/ndb/test/ndbapi/testNodeRestart.cpp b/ndb/test/ndbapi/testNodeRestart.cpp
index 5a7510be9bd..365d6e3ed6e 100644
--- a/ndb/test/ndbapi/testNodeRestart.cpp
+++ b/ndb/test/ndbapi/testNodeRestart.cpp
@@ -433,6 +433,11 @@ int runBug15587(NDBT_Context* ctx, NDBT_Step* step){
   if (restarter.waitNodesNoStart(&nodeId, 1))
     return NDBT_FAILED; 
    
+  int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
+  
+  if (restarter.dumpStateOneNode(nodeId, val2, 2))
+    return NDBT_FAILED;
+
   if (restarter.dumpStateOneNode(nodeId, dump, 2))
     return NDBT_FAILED;
 
@@ -444,6 +449,9 @@ int runBug15587(NDBT_Context* ctx, NDBT_Step* step){
   if (restarter.waitNodesNoStart(&nodeId, 1))
     return NDBT_FAILED; 
    
+  if (restarter.dumpStateOneNode(nodeId, val2, 1))
+    return NDBT_FAILED;
+  
   if (restarter.startNodes(&nodeId, 1))
     return NDBT_FAILED;
   
@@ -626,6 +634,10 @@ runBug18414(NDBT_Context* ctx, NDBT_Step* step){
 	goto err;
     }
     
+    int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
+    if (restarter.dumpStateOneNode(node2, val2, 2))
+      goto err;
+    
     if (restarter.insertErrorInNode(node2, 5003))
       goto err;
     
diff --git a/ndb/test/src/NdbBackup.cpp b/ndb/test/src/NdbBackup.cpp
index 9f65fe6b3bc..a9c71120d80 100644
--- a/ndb/test/src/NdbBackup.cpp
+++ b/ndb/test/src/NdbBackup.cpp
@@ -292,8 +292,8 @@ NdbBackup::NF(NdbRestarter& _restarter, int *NFDuringBackup_codes, const int sz,
 	   << masterNodeId << endl;
 
 
-    int val = DumpStateOrd::CmvmiSetRestartOnErrorInsert;
-    CHECK(_restarter.dumpStateOneNode(nodeId, &val, 1) == 0,
+    int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
+    CHECK(_restarter.dumpStateOneNode(nodeId, val, 2) == 0,
 	  "failed to set RestartOnErrorInsert");
     CHECK(_restarter.insertErrorInNode(nodeId, error) == 0,
 	  "failed to set error insert");
diff --git a/ndb/test/src/NdbRestarts.cpp b/ndb/test/src/NdbRestarts.cpp
index c0f31af84ce..eea4af437c4 100644
--- a/ndb/test/src/NdbRestarts.cpp
+++ b/ndb/test/src/NdbRestarts.cpp
@@ -641,8 +641,8 @@ int restartNFDuringNR(NdbRestarter& _restarter,
     CHECK(_restarter.waitNodesNoStart(&nodeId, 1) == 0,
 	  "waitNodesNoStart failed");
     
-    int val = DumpStateOrd::CmvmiSetRestartOnErrorInsert;
-    CHECK(_restarter.dumpStateOneNode(nodeId, &val, 1) == 0,
+    int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 } ;
+    CHECK(_restarter.dumpStateOneNode(nodeId, val, 2) == 0,
 	  "failed to set RestartOnErrorInsert");
     
     CHECK(_restarter.insertErrorInNode(nodeId, error) == 0,
@@ -698,8 +698,8 @@ int restartNFDuringNR(NdbRestarter& _restarter,
     CHECK(_restarter.waitNodesNoStart(&nodeId, 1) == 0,
 	  "waitNodesNoStart failed");
         
-    int val = DumpStateOrd::CmvmiSetRestartOnErrorInsert;
-    CHECK(_restarter.dumpStateOneNode(crashNodeId, &val, 2) == 0,
+    int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
+    CHECK(_restarter.dumpStateOneNode(crashNodeId, val, 2) == 0,
 	  "failed to set RestartOnErrorInsert");
     
     CHECK(_restarter.insertErrorInNode(crashNodeId, error) == 0,
@@ -771,8 +771,8 @@ int restartNodeDuringLCP(NdbRestarter& _restarter,
 	   << " error code = " << error << endl;
 
     {
-      int val = DumpStateOrd::CmvmiSetRestartOnErrorInsert;
-      CHECK(_restarter.dumpStateAllNodes(&val, 1) == 0,
+      int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
+      CHECK(_restarter.dumpStateAllNodes(val, 2) == 0,
 	    "failed to set RestartOnErrorInsert");
     }
 
@@ -812,8 +812,8 @@ int restartNodeDuringLCP(NdbRestarter& _restarter,
     ndbout << _restart->m_name << " restarting non-master node = " << nodeId
 	   << " error code = " << error << endl;
 
-    int val = DumpStateOrd::CmvmiSetRestartOnErrorInsert;
-    CHECK(_restarter.dumpStateAllNodes(&val, 1) == 0,
+    int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
+    CHECK(_restarter.dumpStateAllNodes(val, 2) == 0,
 	  "failed to set RestartOnErrorInsert");
     
     CHECK(_restarter.insertErrorInNode(nodeId, error) == 0,