Bug #13461 Slave Cluster crashed on restart of two data nodes in seperate groups

- ensure in ndb_mgmd that the stop command is not issued if a node is restarting
- added some new error messages
- in ndbcntr on master check so that node does not shutdown id shutdoen in progress
This commit is contained in:
unknown 2005-10-13 16:38:38 +02:00
parent 849eafa751
commit 96d4b56cc5
5 changed files with 41 additions and 19 deletions

View file

@ -104,6 +104,7 @@ typedef ndbd_exit_classification_enum ndbd_exit_classification;
/* NDBCNTR 6100-> */
#define NDBD_EXIT_RESTART_TIMEOUT 6100
#define NDBD_EXIT_RESTART_DURING_SHUTDOWN 6101
/* TC 6200-> */
/* DIH 6300-> */

View file

@ -525,6 +525,9 @@ Ndbcntr::execCNTR_START_REF(Signal * signal){
cmasterNodeId = ref->masterNodeId;
sendCntrStartReq(signal);
return;
case CntrStartRef::StopInProgress:
jam();
progError(__LINE__, NDBD_EXIT_RESTART_DURING_SHUTDOWN);
}
ndbrequire(false);
}
@ -2022,7 +2025,9 @@ Ndbcntr::execSTOP_REQ(Signal* signal){
return;
}
if(c_stopRec.stopReq.senderRef != 0){
if(c_stopRec.stopReq.senderRef != 0 ||
(cmasterNodeId == getOwnNodeId() && !c_start.m_starting.isclear()))
{
/**
* Requested a system shutdown
*/
@ -2036,7 +2041,8 @@ Ndbcntr::execSTOP_REQ(Signal* signal){
/**
* Requested a node shutdown
*/
if(StopReq::getSystemStop(c_stopRec.stopReq.requestInfo))
if(c_stopRec.stopReq.senderRef &&
StopReq::getSystemStop(c_stopRec.stopReq.requestInfo))
ref->errorCode = StopRef::SystemShutdownInProgress;
else
ref->errorCode = StopRef::NodeShutdownInProgress;

View file

@ -101,6 +101,9 @@ static const ErrStruct errArray[] =
{NDBD_EXIT_RESTART_TIMEOUT, XCE,
"Total restart time too long, consider increasing StartFailureTimeout "
"or investigate error(s) on other node(s)"},
{NDBD_EXIT_RESTART_DURING_SHUTDOWN, XRE,
"Node started while node shutdown in progress. "
"Please wait until shutdown complete before starting node"},
/* DIH */
{NDBD_EXIT_MAX_CRASHED_REPLICAS, XFL,

View file

@ -277,15 +277,13 @@ static ErrorItem errorTable[] =
{MgmtSrvr::NOT_POSSIBLE_TO_SEND_CONFIG_UPDATE_TO_PROCESS_TYPE,
"It is not possible to send an update of a configuration variable "
"to this kind of process."},
{5026, "Node shutdown in progress" },
{5027, "System shutdown in progress" },
{5028, "Node shutdown would cause system crash" },
{5029, "Only one shutdown at a time is possible via mgm server" },
{5060, "Operation not allowed in single user mode." },
{5061, "DB is not in single user mode." },
{5062, "The specified node is not an API node." },
{5063,
"Cannot enter single user mode. DB nodes in inconsistent startlevel."},
{MgmtSrvr::NODE_SHUTDOWN_IN_PROGESS, "Node shutdown in progress" },
{MgmtSrvr::SYSTEM_SHUTDOWN_IN_PROGRESS, "System shutdown in progress" },
{MgmtSrvr::NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH,
"Node shutdown would cause system crash" },
{MgmtSrvr::NODE_NOT_API_NODE, "The specified node is not an API node." },
{MgmtSrvr::OPERATION_NOT_ALLOWED_START_STOP,
"Operation not allowed while nodes are starting or stopping."},
{MgmtSrvr::NO_CONTACT_WITH_DB_NODES, "No contact with database nodes" }
};
@ -293,13 +291,13 @@ int MgmtSrvr::translateStopRef(Uint32 errCode)
{
switch(errCode){
case StopRef::NodeShutdownInProgress:
return 5026;
return NODE_SHUTDOWN_IN_PROGESS;
break;
case StopRef::SystemShutdownInProgress:
return 5027;
return SYSTEM_SHUTDOWN_IN_PROGRESS;
break;
case StopRef::NodeShutdownWouldCauseSystemCrash:
return 5028;
return NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH;
break;
}
return 4999;
@ -989,6 +987,18 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId,
int MgmtSrvr::stopNode(int nodeId, bool abort)
{
if (!abort)
{
NodeId nodeId = 0;
ClusterMgr::Node node;
while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
{
node = theFacade->theClusterMgr->getNodeInfo(nodeId);
if((node.m_state.startLevel != NodeState::SL_STARTED) &&
(node.m_state.startLevel != NodeState::SL_NOTHING))
return OPERATION_NOT_ALLOWED_START_STOP;
}
}
NodeBitmask nodes;
return sendSTOP_REQ(nodeId,
nodes,
@ -1027,7 +1037,7 @@ int MgmtSrvr::stop(int * stopCount, bool abort)
int MgmtSrvr::enterSingleUser(int * stopCount, Uint32 singleUserNodeId)
{
if (getNodeType(singleUserNodeId) != NDB_MGM_NODE_TYPE_API)
return 5062;
return NODE_NOT_API_NODE;
NodeId nodeId = 0;
ClusterMgr::Node node;
while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
@ -1035,7 +1045,7 @@ int MgmtSrvr::enterSingleUser(int * stopCount, Uint32 singleUserNodeId)
node = theFacade->theClusterMgr->getNodeInfo(nodeId);
if((node.m_state.startLevel != NodeState::SL_STARTED) &&
(node.m_state.startLevel != NodeState::SL_NOTHING))
return 5063;
return OPERATION_NOT_ALLOWED_START_STOP;
}
NodeBitmask nodes;
int ret = sendSTOP_REQ(0,

View file

@ -174,10 +174,12 @@ public:
STATIC_CONST( NODE_SHUTDOWN_IN_PROGESS = 5026 );
STATIC_CONST( SYSTEM_SHUTDOWN_IN_PROGRESS = 5027 );
STATIC_CONST( NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH = 5028 );
STATIC_CONST( NO_CONTACT_WITH_CLUSTER = 6666 );
STATIC_CONST( OPERATION_IN_PROGRESS = 6667 );
STATIC_CONST( NO_CONTACT_WITH_DB_NODES = 5030 );
STATIC_CONST( NODE_NOT_API_NODE = 5062 );
STATIC_CONST( OPERATION_NOT_ALLOWED_START_STOP = 5063 );
/**
* This enum specifies the different signal loggig modes possible to set
* with the setSignalLoggingMode method.