From 80abad58fc60e4cd93b31f24030340c380ddab77 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 4 May 2005 18:40:54 +0200 Subject: [PATCH] bug#10358 - ndb Cluster failure with non started nodes can result in timedout transactions ndb/src/mgmapi/mgmapi.cpp: Increase timeout for restarts ndb/src/ndbapi/ClusterMgr.cpp: Report NFCOMPLETEREP if no alive node exists (instead of no connected node exists) ndb/src/ndbapi/ClusterMgr.hpp: Report NFCOMPLETEREP if no alive node exists (instead of no connected node exists) --- ndb/src/mgmapi/mgmapi.cpp | 6 ++++++ ndb/src/ndbapi/ClusterMgr.cpp | 13 +++++++------ ndb/src/ndbapi/ClusterMgr.hpp | 14 ++++++++++++++ 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/ndb/src/mgmapi/mgmapi.cpp b/ndb/src/mgmapi/mgmapi.cpp index a8931fb32ea..863f54ce51a 100644 --- a/ndb/src/mgmapi/mgmapi.cpp +++ b/ndb/src/mgmapi/mgmapi.cpp @@ -857,7 +857,10 @@ ndb_mgm_restart2(NdbMgmHandle handle, int no_of_nodes, const int * node_list, args.put("initialstart", initial); args.put("nostart", nostart); const Properties *reply; + const int timeout = handle->read_timeout; + handle->read_timeout= 5*60*1000; // 5 minutes reply = ndb_mgm_call(handle, restart_reply, "restart all", &args); + handle->read_timeout= timeout; CHECK_REPLY(reply, -1); BaseString result; @@ -890,7 +893,10 @@ ndb_mgm_restart2(NdbMgmHandle handle, int no_of_nodes, const int * node_list, args.put("nostart", nostart); const Properties *reply; + const int timeout = handle->read_timeout; + handle->read_timeout= 5*60*1000; // 5 minutes reply = ndb_mgm_call(handle, restart_reply, "restart node", &args); + handle->read_timeout= timeout; if(reply != NULL) { BaseString result; reply->get("result", result); diff --git a/ndb/src/ndbapi/ClusterMgr.cpp b/ndb/src/ndbapi/ClusterMgr.cpp index 1fe0cedbd6c..9603ddf7751 100644 --- a/ndb/src/ndbapi/ClusterMgr.cpp +++ b/ndb/src/ndbapi/ClusterMgr.cpp @@ -66,6 +66,7 @@ ClusterMgr::ClusterMgr(TransporterFacade & _facade): { ndbSetOwnVersion(); clusterMgrThreadMutex = NdbMutex_Create(); + noOfAliveNodes= 0; noOfConnectedNodes= 0; theClusterMgrThread= 0; } @@ -335,9 +336,9 @@ ClusterMgr::execAPI_REGCONF(const Uint32 * theData){ node.m_state = apiRegConf->nodeState; if (node.compatible && (node.m_state.startLevel == NodeState::SL_STARTED || node.m_state.startLevel == NodeState::SL_SINGLEUSER)){ - node.m_alive = true; + set_node_alive(node, true); } else { - node.m_alive = false; + set_node_alive(node, false); }//if node.hbSent = 0; node.hbCounter = 0; @@ -360,7 +361,7 @@ ClusterMgr::execAPI_REGREF(const Uint32 * theData){ assert(node.defined == true); node.compatible = false; - node.m_alive = false; + set_node_alive(node, false); node.m_state = NodeState::SL_NOTHING; node.m_info.m_version = ref->version; @@ -437,7 +438,7 @@ ClusterMgr::reportNodeFailed(NodeId nodeId){ Node & theNode = theNodes[nodeId]; - theNode.m_alive = false; + set_node_alive(theNode, false); if(theNode.connected) theFacade.doDisconnect(nodeId); @@ -449,8 +450,8 @@ ClusterMgr::reportNodeFailed(NodeId nodeId){ } theNode.nfCompleteRep = false; - - if(noOfConnectedNodes == 0){ + + if(noOfAliveNodes == 0){ NFCompleteRep rep; for(Uint32 i = 1; i