mirror of
https://github.com/MariaDB/server.git
synced 2025-01-18 13:02:28 +01:00
Bug #13461 Slave Cluster crashed on restart of two data nodes in seperate groups
- ensure in ndb_mgmd that the stop command is not issued if a node is restarting - added some new error messages - in ndbcntr on master check so that node does not shutdown id shutdoen in progress
This commit is contained in:
parent
849eafa751
commit
96d4b56cc5
5 changed files with 41 additions and 19 deletions
|
@ -104,6 +104,7 @@ typedef ndbd_exit_classification_enum ndbd_exit_classification;
|
|||
|
||||
/* NDBCNTR 6100-> */
|
||||
#define NDBD_EXIT_RESTART_TIMEOUT 6100
|
||||
#define NDBD_EXIT_RESTART_DURING_SHUTDOWN 6101
|
||||
|
||||
/* TC 6200-> */
|
||||
/* DIH 6300-> */
|
||||
|
|
|
@ -525,6 +525,9 @@ Ndbcntr::execCNTR_START_REF(Signal * signal){
|
|||
cmasterNodeId = ref->masterNodeId;
|
||||
sendCntrStartReq(signal);
|
||||
return;
|
||||
case CntrStartRef::StopInProgress:
|
||||
jam();
|
||||
progError(__LINE__, NDBD_EXIT_RESTART_DURING_SHUTDOWN);
|
||||
}
|
||||
ndbrequire(false);
|
||||
}
|
||||
|
@ -2022,7 +2025,9 @@ Ndbcntr::execSTOP_REQ(Signal* signal){
|
|||
return;
|
||||
}
|
||||
|
||||
if(c_stopRec.stopReq.senderRef != 0){
|
||||
if(c_stopRec.stopReq.senderRef != 0 ||
|
||||
(cmasterNodeId == getOwnNodeId() && !c_start.m_starting.isclear()))
|
||||
{
|
||||
/**
|
||||
* Requested a system shutdown
|
||||
*/
|
||||
|
@ -2036,7 +2041,8 @@ Ndbcntr::execSTOP_REQ(Signal* signal){
|
|||
/**
|
||||
* Requested a node shutdown
|
||||
*/
|
||||
if(StopReq::getSystemStop(c_stopRec.stopReq.requestInfo))
|
||||
if(c_stopRec.stopReq.senderRef &&
|
||||
StopReq::getSystemStop(c_stopRec.stopReq.requestInfo))
|
||||
ref->errorCode = StopRef::SystemShutdownInProgress;
|
||||
else
|
||||
ref->errorCode = StopRef::NodeShutdownInProgress;
|
||||
|
|
|
@ -101,6 +101,9 @@ static const ErrStruct errArray[] =
|
|||
{NDBD_EXIT_RESTART_TIMEOUT, XCE,
|
||||
"Total restart time too long, consider increasing StartFailureTimeout "
|
||||
"or investigate error(s) on other node(s)"},
|
||||
{NDBD_EXIT_RESTART_DURING_SHUTDOWN, XRE,
|
||||
"Node started while node shutdown in progress. "
|
||||
"Please wait until shutdown complete before starting node"},
|
||||
|
||||
/* DIH */
|
||||
{NDBD_EXIT_MAX_CRASHED_REPLICAS, XFL,
|
||||
|
|
|
@ -277,15 +277,13 @@ static ErrorItem errorTable[] =
|
|||
{MgmtSrvr::NOT_POSSIBLE_TO_SEND_CONFIG_UPDATE_TO_PROCESS_TYPE,
|
||||
"It is not possible to send an update of a configuration variable "
|
||||
"to this kind of process."},
|
||||
{5026, "Node shutdown in progress" },
|
||||
{5027, "System shutdown in progress" },
|
||||
{5028, "Node shutdown would cause system crash" },
|
||||
{5029, "Only one shutdown at a time is possible via mgm server" },
|
||||
{5060, "Operation not allowed in single user mode." },
|
||||
{5061, "DB is not in single user mode." },
|
||||
{5062, "The specified node is not an API node." },
|
||||
{5063,
|
||||
"Cannot enter single user mode. DB nodes in inconsistent startlevel."},
|
||||
{MgmtSrvr::NODE_SHUTDOWN_IN_PROGESS, "Node shutdown in progress" },
|
||||
{MgmtSrvr::SYSTEM_SHUTDOWN_IN_PROGRESS, "System shutdown in progress" },
|
||||
{MgmtSrvr::NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH,
|
||||
"Node shutdown would cause system crash" },
|
||||
{MgmtSrvr::NODE_NOT_API_NODE, "The specified node is not an API node." },
|
||||
{MgmtSrvr::OPERATION_NOT_ALLOWED_START_STOP,
|
||||
"Operation not allowed while nodes are starting or stopping."},
|
||||
{MgmtSrvr::NO_CONTACT_WITH_DB_NODES, "No contact with database nodes" }
|
||||
};
|
||||
|
||||
|
@ -293,13 +291,13 @@ int MgmtSrvr::translateStopRef(Uint32 errCode)
|
|||
{
|
||||
switch(errCode){
|
||||
case StopRef::NodeShutdownInProgress:
|
||||
return 5026;
|
||||
return NODE_SHUTDOWN_IN_PROGESS;
|
||||
break;
|
||||
case StopRef::SystemShutdownInProgress:
|
||||
return 5027;
|
||||
return SYSTEM_SHUTDOWN_IN_PROGRESS;
|
||||
break;
|
||||
case StopRef::NodeShutdownWouldCauseSystemCrash:
|
||||
return 5028;
|
||||
return NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH;
|
||||
break;
|
||||
}
|
||||
return 4999;
|
||||
|
@ -989,6 +987,18 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId,
|
|||
|
||||
int MgmtSrvr::stopNode(int nodeId, bool abort)
|
||||
{
|
||||
if (!abort)
|
||||
{
|
||||
NodeId nodeId = 0;
|
||||
ClusterMgr::Node node;
|
||||
while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
|
||||
{
|
||||
node = theFacade->theClusterMgr->getNodeInfo(nodeId);
|
||||
if((node.m_state.startLevel != NodeState::SL_STARTED) &&
|
||||
(node.m_state.startLevel != NodeState::SL_NOTHING))
|
||||
return OPERATION_NOT_ALLOWED_START_STOP;
|
||||
}
|
||||
}
|
||||
NodeBitmask nodes;
|
||||
return sendSTOP_REQ(nodeId,
|
||||
nodes,
|
||||
|
@ -1027,7 +1037,7 @@ int MgmtSrvr::stop(int * stopCount, bool abort)
|
|||
int MgmtSrvr::enterSingleUser(int * stopCount, Uint32 singleUserNodeId)
|
||||
{
|
||||
if (getNodeType(singleUserNodeId) != NDB_MGM_NODE_TYPE_API)
|
||||
return 5062;
|
||||
return NODE_NOT_API_NODE;
|
||||
NodeId nodeId = 0;
|
||||
ClusterMgr::Node node;
|
||||
while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
|
||||
|
@ -1035,7 +1045,7 @@ int MgmtSrvr::enterSingleUser(int * stopCount, Uint32 singleUserNodeId)
|
|||
node = theFacade->theClusterMgr->getNodeInfo(nodeId);
|
||||
if((node.m_state.startLevel != NodeState::SL_STARTED) &&
|
||||
(node.m_state.startLevel != NodeState::SL_NOTHING))
|
||||
return 5063;
|
||||
return OPERATION_NOT_ALLOWED_START_STOP;
|
||||
}
|
||||
NodeBitmask nodes;
|
||||
int ret = sendSTOP_REQ(0,
|
||||
|
|
|
@ -174,10 +174,12 @@ public:
|
|||
STATIC_CONST( NODE_SHUTDOWN_IN_PROGESS = 5026 );
|
||||
STATIC_CONST( SYSTEM_SHUTDOWN_IN_PROGRESS = 5027 );
|
||||
STATIC_CONST( NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH = 5028 );
|
||||
STATIC_CONST( NO_CONTACT_WITH_CLUSTER = 6666 );
|
||||
STATIC_CONST( OPERATION_IN_PROGRESS = 6667 );
|
||||
|
||||
|
||||
STATIC_CONST( NO_CONTACT_WITH_DB_NODES = 5030 );
|
||||
|
||||
STATIC_CONST( NODE_NOT_API_NODE = 5062 );
|
||||
STATIC_CONST( OPERATION_NOT_ALLOWED_START_STOP = 5063 );
|
||||
|
||||
/**
|
||||
* This enum specifies the different signal loggig modes possible to set
|
||||
* with the setSignalLoggingMode method.
|
||||
|
|
Loading…
Reference in a new issue