bug#10358 - ndb

Cluster failure with non started nodes can result in timedout transactions


ndb/src/mgmapi/mgmapi.cpp:
  Increase timeout for restarts
ndb/src/ndbapi/ClusterMgr.cpp:
  Report NFCOMPLETEREP if no alive node exists 
    (instead of no connected node exists)
ndb/src/ndbapi/ClusterMgr.hpp:
  Report NFCOMPLETEREP if no alive node exists 
    (instead of no connected node exists)
This commit is contained in:
unknown 2005-05-04 18:40:54 +02:00
parent 55c9c4d7e0
commit 80abad58fc
3 changed files with 27 additions and 6 deletions

View file

@ -857,7 +857,10 @@ ndb_mgm_restart2(NdbMgmHandle handle, int no_of_nodes, const int * node_list,
args.put("initialstart", initial);
args.put("nostart", nostart);
const Properties *reply;
const int timeout = handle->read_timeout;
handle->read_timeout= 5*60*1000; // 5 minutes
reply = ndb_mgm_call(handle, restart_reply, "restart all", &args);
handle->read_timeout= timeout;
CHECK_REPLY(reply, -1);
BaseString result;
@ -890,7 +893,10 @@ ndb_mgm_restart2(NdbMgmHandle handle, int no_of_nodes, const int * node_list,
args.put("nostart", nostart);
const Properties *reply;
const int timeout = handle->read_timeout;
handle->read_timeout= 5*60*1000; // 5 minutes
reply = ndb_mgm_call(handle, restart_reply, "restart node", &args);
handle->read_timeout= timeout;
if(reply != NULL) {
BaseString result;
reply->get("result", result);

View file

@ -66,6 +66,7 @@ ClusterMgr::ClusterMgr(TransporterFacade & _facade):
{
ndbSetOwnVersion();
clusterMgrThreadMutex = NdbMutex_Create();
noOfAliveNodes= 0;
noOfConnectedNodes= 0;
theClusterMgrThread= 0;
}
@ -335,9 +336,9 @@ ClusterMgr::execAPI_REGCONF(const Uint32 * theData){
node.m_state = apiRegConf->nodeState;
if (node.compatible && (node.m_state.startLevel == NodeState::SL_STARTED ||
node.m_state.startLevel == NodeState::SL_SINGLEUSER)){
node.m_alive = true;
set_node_alive(node, true);
} else {
node.m_alive = false;
set_node_alive(node, false);
}//if
node.hbSent = 0;
node.hbCounter = 0;
@ -360,7 +361,7 @@ ClusterMgr::execAPI_REGREF(const Uint32 * theData){
assert(node.defined == true);
node.compatible = false;
node.m_alive = false;
set_node_alive(node, false);
node.m_state = NodeState::SL_NOTHING;
node.m_info.m_version = ref->version;
@ -437,7 +438,7 @@ ClusterMgr::reportNodeFailed(NodeId nodeId){
Node & theNode = theNodes[nodeId];
theNode.m_alive = false;
set_node_alive(theNode, false);
if(theNode.connected)
theFacade.doDisconnect(nodeId);
@ -449,8 +450,8 @@ ClusterMgr::reportNodeFailed(NodeId nodeId){
}
theNode.nfCompleteRep = false;
if(noOfConnectedNodes == 0){
if(noOfAliveNodes == 0){
NFCompleteRep rep;
for(Uint32 i = 1; i<MAX_NODES; i++){
if(theNodes[i].defined && theNodes[i].nfCompleteRep == false){

View file

@ -80,6 +80,7 @@ public:
Uint32 getNoOfConnectedNodes() const;
private:
Uint32 noOfAliveNodes;
Uint32 noOfConnectedNodes;
Node theNodes[MAX_NODES];
NdbThread* theClusterMgrThread;
@ -100,6 +101,19 @@ private:
void execAPI_REGREF (const Uint32 * theData);
void execNODE_FAILREP (const Uint32 * theData);
void execNF_COMPLETEREP(const Uint32 * theData);
inline void set_node_alive(Node& node, bool alive){
if(node.m_alive && !alive)
{
assert(noOfAliveNodes);
noOfAliveNodes--;
}
else if(!node.m_alive && alive)
{
noOfAliveNodes++;
}
node.m_alive = alive;
}
};
inline