From 80abad58fc60e4cd93b31f24030340c380ddab77 Mon Sep 17 00:00:00 2001
From: unknown <joreland@mysql.com>
Date: Wed, 4 May 2005 18:40:54 +0200
Subject: [PATCH] bug#10358 - ndb   Cluster failure with non started nodes can
 result in timedout transactions

ndb/src/mgmapi/mgmapi.cpp:
  Increase timeout for restarts
ndb/src/ndbapi/ClusterMgr.cpp:
  Report NFCOMPLETEREP if no alive node exists
    (instead of no connected node exists)
ndb/src/ndbapi/ClusterMgr.hpp:
  Report NFCOMPLETEREP if no alive node exists
    (instead of no connected node exists)
---
 ndb/src/mgmapi/mgmapi.cpp     |  6 ++++++
 ndb/src/ndbapi/ClusterMgr.cpp | 13 +++++++------
 ndb/src/ndbapi/ClusterMgr.hpp | 14 ++++++++++++++
 3 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/ndb/src/mgmapi/mgmapi.cpp b/ndb/src/mgmapi/mgmapi.cpp
index a8931fb32ea..863f54ce51a 100644
--- a/ndb/src/mgmapi/mgmapi.cpp
+++ b/ndb/src/mgmapi/mgmapi.cpp
@@ -857,7 +857,10 @@ ndb_mgm_restart2(NdbMgmHandle handle, int no_of_nodes, const int * node_list,
     args.put("initialstart", initial);
     args.put("nostart", nostart);
     const Properties *reply;
+    const int timeout = handle->read_timeout;
+    handle->read_timeout= 5*60*1000; // 5 minutes
     reply = ndb_mgm_call(handle, restart_reply, "restart all", &args);
+    handle->read_timeout= timeout;
     CHECK_REPLY(reply, -1);
 
     BaseString result;
@@ -890,7 +893,10 @@ ndb_mgm_restart2(NdbMgmHandle handle, int no_of_nodes, const int * node_list,
   args.put("nostart", nostart);
 
   const Properties *reply;
+  const int timeout = handle->read_timeout;
+  handle->read_timeout= 5*60*1000; // 5 minutes
   reply = ndb_mgm_call(handle, restart_reply, "restart node", &args);
+  handle->read_timeout= timeout;
   if(reply != NULL) {
     BaseString result;
     reply->get("result", result);
diff --git a/ndb/src/ndbapi/ClusterMgr.cpp b/ndb/src/ndbapi/ClusterMgr.cpp
index 1fe0cedbd6c..9603ddf7751 100644
--- a/ndb/src/ndbapi/ClusterMgr.cpp
+++ b/ndb/src/ndbapi/ClusterMgr.cpp
@@ -66,6 +66,7 @@ ClusterMgr::ClusterMgr(TransporterFacade & _facade):
 {
   ndbSetOwnVersion();
   clusterMgrThreadMutex = NdbMutex_Create();
+  noOfAliveNodes= 0;
   noOfConnectedNodes= 0;
   theClusterMgrThread= 0;
 }
@@ -335,9 +336,9 @@ ClusterMgr::execAPI_REGCONF(const Uint32 * theData){
   node.m_state = apiRegConf->nodeState;
   if (node.compatible && (node.m_state.startLevel == NodeState::SL_STARTED  ||
 			  node.m_state.startLevel == NodeState::SL_SINGLEUSER)){
-    node.m_alive = true;
+    set_node_alive(node, true);
   } else {
-    node.m_alive = false;
+    set_node_alive(node, false);
   }//if
   node.hbSent = 0;
   node.hbCounter = 0;
@@ -360,7 +361,7 @@ ClusterMgr::execAPI_REGREF(const Uint32 * theData){
   assert(node.defined == true);
 
   node.compatible = false;
-  node.m_alive = false;
+  set_node_alive(node, false);
   node.m_state = NodeState::SL_NOTHING;
   node.m_info.m_version = ref->version;
 
@@ -437,7 +438,7 @@ ClusterMgr::reportNodeFailed(NodeId nodeId){
 
   Node & theNode = theNodes[nodeId];
  
-  theNode.m_alive = false;
+  set_node_alive(theNode, false);
   if(theNode.connected)
     theFacade.doDisconnect(nodeId);
   
@@ -449,8 +450,8 @@ ClusterMgr::reportNodeFailed(NodeId nodeId){
   }  
 
   theNode.nfCompleteRep = false;
-
-  if(noOfConnectedNodes == 0){
+  
+  if(noOfAliveNodes == 0){
     NFCompleteRep rep;
     for(Uint32 i = 1; i<MAX_NODES; i++){
       if(theNodes[i].defined && theNodes[i].nfCompleteRep == false){
diff --git a/ndb/src/ndbapi/ClusterMgr.hpp b/ndb/src/ndbapi/ClusterMgr.hpp
index cc3cf66c8aa..d75b820e9cb 100644
--- a/ndb/src/ndbapi/ClusterMgr.hpp
+++ b/ndb/src/ndbapi/ClusterMgr.hpp
@@ -80,6 +80,7 @@ public:
   Uint32        getNoOfConnectedNodes() const;
   
 private:
+  Uint32        noOfAliveNodes;
   Uint32        noOfConnectedNodes;
   Node          theNodes[MAX_NODES];
   NdbThread*    theClusterMgrThread;
@@ -100,6 +101,19 @@ private:
   void execAPI_REGREF    (const Uint32 * theData);
   void execNODE_FAILREP  (const Uint32 * theData);
   void execNF_COMPLETEREP(const Uint32 * theData);
+
+  inline void set_node_alive(Node& node, bool alive){
+    if(node.m_alive && !alive)
+    {
+      assert(noOfAliveNodes);
+      noOfAliveNodes--;
+    }
+    else if(!node.m_alive && alive)
+    {
+      noOfAliveNodes++;
+    }
+    node.m_alive = alive;
+  }
 };
 
 inline