mirror of
https://github.com/MariaDB/server.git
synced 2025-01-18 13:02:28 +01:00
bug#10358 - ndb
Cluster failure with non started nodes can result in timedout transactions ndb/src/mgmapi/mgmapi.cpp: Increase timeout for restarts ndb/src/ndbapi/ClusterMgr.cpp: Report NFCOMPLETEREP if no alive node exists (instead of no connected node exists) ndb/src/ndbapi/ClusterMgr.hpp: Report NFCOMPLETEREP if no alive node exists (instead of no connected node exists)
This commit is contained in:
parent
55c9c4d7e0
commit
80abad58fc
3 changed files with 27 additions and 6 deletions
|
@ -857,7 +857,10 @@ ndb_mgm_restart2(NdbMgmHandle handle, int no_of_nodes, const int * node_list,
|
|||
args.put("initialstart", initial);
|
||||
args.put("nostart", nostart);
|
||||
const Properties *reply;
|
||||
const int timeout = handle->read_timeout;
|
||||
handle->read_timeout= 5*60*1000; // 5 minutes
|
||||
reply = ndb_mgm_call(handle, restart_reply, "restart all", &args);
|
||||
handle->read_timeout= timeout;
|
||||
CHECK_REPLY(reply, -1);
|
||||
|
||||
BaseString result;
|
||||
|
@ -890,7 +893,10 @@ ndb_mgm_restart2(NdbMgmHandle handle, int no_of_nodes, const int * node_list,
|
|||
args.put("nostart", nostart);
|
||||
|
||||
const Properties *reply;
|
||||
const int timeout = handle->read_timeout;
|
||||
handle->read_timeout= 5*60*1000; // 5 minutes
|
||||
reply = ndb_mgm_call(handle, restart_reply, "restart node", &args);
|
||||
handle->read_timeout= timeout;
|
||||
if(reply != NULL) {
|
||||
BaseString result;
|
||||
reply->get("result", result);
|
||||
|
|
|
@ -66,6 +66,7 @@ ClusterMgr::ClusterMgr(TransporterFacade & _facade):
|
|||
{
|
||||
ndbSetOwnVersion();
|
||||
clusterMgrThreadMutex = NdbMutex_Create();
|
||||
noOfAliveNodes= 0;
|
||||
noOfConnectedNodes= 0;
|
||||
theClusterMgrThread= 0;
|
||||
}
|
||||
|
@ -335,9 +336,9 @@ ClusterMgr::execAPI_REGCONF(const Uint32 * theData){
|
|||
node.m_state = apiRegConf->nodeState;
|
||||
if (node.compatible && (node.m_state.startLevel == NodeState::SL_STARTED ||
|
||||
node.m_state.startLevel == NodeState::SL_SINGLEUSER)){
|
||||
node.m_alive = true;
|
||||
set_node_alive(node, true);
|
||||
} else {
|
||||
node.m_alive = false;
|
||||
set_node_alive(node, false);
|
||||
}//if
|
||||
node.hbSent = 0;
|
||||
node.hbCounter = 0;
|
||||
|
@ -360,7 +361,7 @@ ClusterMgr::execAPI_REGREF(const Uint32 * theData){
|
|||
assert(node.defined == true);
|
||||
|
||||
node.compatible = false;
|
||||
node.m_alive = false;
|
||||
set_node_alive(node, false);
|
||||
node.m_state = NodeState::SL_NOTHING;
|
||||
node.m_info.m_version = ref->version;
|
||||
|
||||
|
@ -437,7 +438,7 @@ ClusterMgr::reportNodeFailed(NodeId nodeId){
|
|||
|
||||
Node & theNode = theNodes[nodeId];
|
||||
|
||||
theNode.m_alive = false;
|
||||
set_node_alive(theNode, false);
|
||||
if(theNode.connected)
|
||||
theFacade.doDisconnect(nodeId);
|
||||
|
||||
|
@ -450,7 +451,7 @@ ClusterMgr::reportNodeFailed(NodeId nodeId){
|
|||
|
||||
theNode.nfCompleteRep = false;
|
||||
|
||||
if(noOfConnectedNodes == 0){
|
||||
if(noOfAliveNodes == 0){
|
||||
NFCompleteRep rep;
|
||||
for(Uint32 i = 1; i<MAX_NODES; i++){
|
||||
if(theNodes[i].defined && theNodes[i].nfCompleteRep == false){
|
||||
|
|
|
@ -80,6 +80,7 @@ public:
|
|||
Uint32 getNoOfConnectedNodes() const;
|
||||
|
||||
private:
|
||||
Uint32 noOfAliveNodes;
|
||||
Uint32 noOfConnectedNodes;
|
||||
Node theNodes[MAX_NODES];
|
||||
NdbThread* theClusterMgrThread;
|
||||
|
@ -100,6 +101,19 @@ private:
|
|||
void execAPI_REGREF (const Uint32 * theData);
|
||||
void execNODE_FAILREP (const Uint32 * theData);
|
||||
void execNF_COMPLETEREP(const Uint32 * theData);
|
||||
|
||||
inline void set_node_alive(Node& node, bool alive){
|
||||
if(node.m_alive && !alive)
|
||||
{
|
||||
assert(noOfAliveNodes);
|
||||
noOfAliveNodes--;
|
||||
}
|
||||
else if(!node.m_alive && alive)
|
||||
{
|
||||
noOfAliveNodes++;
|
||||
}
|
||||
node.m_alive = alive;
|
||||
}
|
||||
};
|
||||
|
||||
inline
|
||||
|
|
Loading…
Reference in a new issue