mirror of
https://github.com/MariaDB/server.git
synced 2025-01-30 10:31:54 +01:00
bug#28445 - Heartbeat does not start until first API_REGREQ is recevied
- move api failure handling into own method - add START_ORD so that hb checking can start really early storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp: - make sure qmgr is "fully" informed about connections so that it can handle hb correctly - dont allow API/mysqld node to reconnect if we have not started yet (sp 8) storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp: - move api failure handling into own method - add START_ORD so that hb checking can start really early storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp: - move api failure handling into own method - add START_ORD so that hb checking can start really early - Init datastructures in constructor - as CONNECT_REP may occur before start phases storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp: - Init datastructures in constructor - as CONNECT_REP may occur before start phases - start hb handling directly on connect rep (instead of first hb)
This commit is contained in:
parent
41362e64ee
commit
bcd2abaaf8
4 changed files with 241 additions and 203 deletions
|
@ -421,9 +421,10 @@ void Cmvmi::execCLOSE_COMREQ(Signal* signal)
|
||||||
// Uint32 noOfNodes = closeCom->noOfNodes;
|
// Uint32 noOfNodes = closeCom->noOfNodes;
|
||||||
|
|
||||||
jamEntry();
|
jamEntry();
|
||||||
for (unsigned i = 0; i < MAX_NODES; i++){
|
for (unsigned i = 0; i < MAX_NODES; i++)
|
||||||
if(NodeBitmask::get(closeCom->theNodes, i)){
|
{
|
||||||
|
if(NodeBitmask::get(closeCom->theNodes, i))
|
||||||
|
{
|
||||||
jam();
|
jam();
|
||||||
|
|
||||||
//-----------------------------------------------------
|
//-----------------------------------------------------
|
||||||
|
@ -437,7 +438,9 @@ void Cmvmi::execCLOSE_COMREQ(Signal* signal)
|
||||||
globalTransporterRegistry.do_disconnect(i);
|
globalTransporterRegistry.do_disconnect(i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (failNo != 0) {
|
|
||||||
|
if (failNo != 0)
|
||||||
|
{
|
||||||
jam();
|
jam();
|
||||||
signal->theData[0] = userRef;
|
signal->theData[0] = userRef;
|
||||||
signal->theData[1] = failNo;
|
signal->theData[1] = failNo;
|
||||||
|
@ -456,13 +459,21 @@ void Cmvmi::execOPEN_COMREQ(Signal* signal)
|
||||||
jamEntry();
|
jamEntry();
|
||||||
|
|
||||||
const Uint32 len = signal->getLength();
|
const Uint32 len = signal->getLength();
|
||||||
if(len == 2){
|
if(len == 2)
|
||||||
|
{
|
||||||
#ifdef ERROR_INSERT
|
#ifdef ERROR_INSERT
|
||||||
if (! ((ERROR_INSERTED(9000) || ERROR_INSERTED(9002))
|
if (! ((ERROR_INSERTED(9000) || ERROR_INSERTED(9002))
|
||||||
&& c_error_9000_nodes_mask.get(tStartingNode)))
|
&& c_error_9000_nodes_mask.get(tStartingNode)))
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
|
if (globalData.theStartLevel != NodeState::SL_STARTED &&
|
||||||
|
(getNodeInfo(tStartingNode).m_type != NodeInfo::DB &&
|
||||||
|
getNodeInfo(tStartingNode).m_type != NodeInfo::MGM))
|
||||||
|
{
|
||||||
|
jam();
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
globalTransporterRegistry.do_connect(tStartingNode);
|
globalTransporterRegistry.do_connect(tStartingNode);
|
||||||
globalTransporterRegistry.setIOState(tStartingNode, HaltIO);
|
globalTransporterRegistry.setIOState(tStartingNode, HaltIO);
|
||||||
|
|
||||||
|
@ -475,9 +486,11 @@ void Cmvmi::execOPEN_COMREQ(Signal* signal)
|
||||||
//-----------------------------------------------------
|
//-----------------------------------------------------
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for(unsigned int i = 1; i < MAX_NODES; i++ ) {
|
for(unsigned int i = 1; i < MAX_NODES; i++ )
|
||||||
|
{
|
||||||
jam();
|
jam();
|
||||||
if (i != getOwnNodeId() && getNodeInfo(i).m_type == tData2){
|
if (i != getOwnNodeId() && getNodeInfo(i).m_type == tData2)
|
||||||
|
{
|
||||||
jam();
|
jam();
|
||||||
|
|
||||||
#ifdef ERROR_INSERT
|
#ifdef ERROR_INSERT
|
||||||
|
@ -496,6 +509,7 @@ void Cmvmi::execOPEN_COMREQ(Signal* signal)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
done:
|
||||||
if (userRef != 0) {
|
if (userRef != 0) {
|
||||||
jam();
|
jam();
|
||||||
signal->theData[0] = tStartingNode;
|
signal->theData[0] = tStartingNode;
|
||||||
|
@ -536,24 +550,10 @@ void Cmvmi::execDISCONNECT_REP(Signal *signal)
|
||||||
setNodeInfo(hostId).m_connectCount++;
|
setNodeInfo(hostId).m_connectCount++;
|
||||||
const NodeInfo::NodeType type = getNodeInfo(hostId).getType();
|
const NodeInfo::NodeType type = getNodeInfo(hostId).getType();
|
||||||
ndbrequire(type != NodeInfo::INVALID);
|
ndbrequire(type != NodeInfo::INVALID);
|
||||||
|
|
||||||
if(type == NodeInfo::DB || globalData.theStartLevel == NodeState::SL_STARTED){
|
|
||||||
jam();
|
|
||||||
DisconnectRep * const rep = (DisconnectRep *)&signal->theData[0];
|
|
||||||
rep->nodeId = hostId;
|
|
||||||
rep->err = errNo;
|
|
||||||
sendSignal(QMGR_REF, GSN_DISCONNECT_REP, signal,
|
|
||||||
DisconnectRep::SignalLength, JBA);
|
|
||||||
} else if((globalData.theStartLevel == NodeState::SL_CMVMI ||
|
|
||||||
globalData.theStartLevel == NodeState::SL_STARTING)
|
|
||||||
&& type == NodeInfo::MGM) {
|
|
||||||
/**
|
|
||||||
* Someone disconnected during cmvmi period
|
|
||||||
*/
|
|
||||||
jam();
|
|
||||||
globalTransporterRegistry.do_connect(hostId);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
sendSignal(QMGR_REF, GSN_DISCONNECT_REP, signal,
|
||||||
|
DisconnectRep::SignalLength, JBA);
|
||||||
|
|
||||||
cancelSubscription(hostId);
|
cancelSubscription(hostId);
|
||||||
|
|
||||||
signal->theData[0] = NDB_LE_Disconnected;
|
signal->theData[0] = NDB_LE_Disconnected;
|
||||||
|
@ -587,6 +587,8 @@ void Cmvmi::execCONNECT_REP(Signal *signal){
|
||||||
*/
|
*/
|
||||||
if(type == NodeInfo::MGM){
|
if(type == NodeInfo::MGM){
|
||||||
jam();
|
jam();
|
||||||
|
signal->theData[0] = hostId;
|
||||||
|
sendSignal(QMGR_REF, GSN_CONNECT_REP, signal, 1, JBA);
|
||||||
} else {
|
} else {
|
||||||
/**
|
/**
|
||||||
* Dont allow api nodes to connect
|
* Dont allow api nodes to connect
|
||||||
|
@ -802,6 +804,8 @@ Cmvmi::execSTART_ORD(Signal* signal) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EXECUTE_DIRECT(QMGR, GSN_START_ORD, signal, 1);
|
||||||
return ;
|
return ;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -829,9 +833,6 @@ Cmvmi::execSTART_ORD(Signal* signal) {
|
||||||
*
|
*
|
||||||
* Do Restart
|
* Do Restart
|
||||||
*/
|
*/
|
||||||
|
|
||||||
globalScheduler.clear();
|
|
||||||
globalTimeQueue.clear();
|
|
||||||
|
|
||||||
// Disconnect all nodes as part of the system restart.
|
// Disconnect all nodes as part of the system restart.
|
||||||
// We need to ensure that we are starting up
|
// We need to ensure that we are starting up
|
||||||
|
|
|
@ -265,6 +265,8 @@ private:
|
||||||
void execALLOC_NODEID_CONF(Signal *);
|
void execALLOC_NODEID_CONF(Signal *);
|
||||||
void execALLOC_NODEID_REF(Signal *);
|
void execALLOC_NODEID_REF(Signal *);
|
||||||
void completeAllocNodeIdReq(Signal *);
|
void completeAllocNodeIdReq(Signal *);
|
||||||
|
|
||||||
|
void execSTART_ORD(Signal*);
|
||||||
|
|
||||||
// Arbitration signals
|
// Arbitration signals
|
||||||
void execARBIT_CFG(Signal* signal);
|
void execARBIT_CFG(Signal* signal);
|
||||||
|
@ -281,6 +283,7 @@ private:
|
||||||
void check_readnodes_reply(Signal* signal, Uint32 nodeId, Uint32 gsn);
|
void check_readnodes_reply(Signal* signal, Uint32 nodeId, Uint32 gsn);
|
||||||
Uint32 check_startup(Signal* signal);
|
Uint32 check_startup(Signal* signal);
|
||||||
|
|
||||||
|
void api_failed(Signal* signal, Uint32 aFailedNode);
|
||||||
void node_failed(Signal* signal, Uint16 aFailedNode);
|
void node_failed(Signal* signal, Uint16 aFailedNode);
|
||||||
void checkStartInterface(Signal* signal);
|
void checkStartInterface(Signal* signal);
|
||||||
void failReport(Signal* signal,
|
void failReport(Signal* signal,
|
||||||
|
|
|
@ -31,10 +31,6 @@ void Qmgr::initData()
|
||||||
cnoCommitFailedNodes = 0;
|
cnoCommitFailedNodes = 0;
|
||||||
c_maxDynamicId = 0;
|
c_maxDynamicId = 0;
|
||||||
c_clusterNodes.clear();
|
c_clusterNodes.clear();
|
||||||
|
|
||||||
Uint32 hbDBAPI = 500;
|
|
||||||
setHbApiDelay(hbDBAPI);
|
|
||||||
c_connectedNodes.set(getOwnNodeId());
|
|
||||||
c_stopReq.senderRef = 0;
|
c_stopReq.senderRef = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -43,6 +39,27 @@ void Qmgr::initData()
|
||||||
ndbrequire((Uint32)NodeInfo::DB == 0);
|
ndbrequire((Uint32)NodeInfo::DB == 0);
|
||||||
ndbrequire((Uint32)NodeInfo::API == 1);
|
ndbrequire((Uint32)NodeInfo::API == 1);
|
||||||
ndbrequire((Uint32)NodeInfo::MGM == 2);
|
ndbrequire((Uint32)NodeInfo::MGM == 2);
|
||||||
|
|
||||||
|
NodeRecPtr nodePtr;
|
||||||
|
nodePtr.i = getOwnNodeId();
|
||||||
|
ptrAss(nodePtr, nodeRec);
|
||||||
|
nodePtr.p->blockRef = reference();
|
||||||
|
|
||||||
|
c_connectedNodes.set(getOwnNodeId());
|
||||||
|
setNodeInfo(getOwnNodeId()).m_version = NDB_VERSION;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Timeouts
|
||||||
|
*/
|
||||||
|
const ndb_mgm_configuration_iterator * p =
|
||||||
|
m_ctx.m_config.getOwnConfigIterator();
|
||||||
|
ndbrequire(p != 0);
|
||||||
|
|
||||||
|
Uint32 hbDBAPI = 1500;
|
||||||
|
ndb_mgm_get_int_parameter(p, CFG_DB_API_HEARTBEAT_INTERVAL, &hbDBAPI);
|
||||||
|
|
||||||
|
setHbApiDelay(hbDBAPI);
|
||||||
}//Qmgr::initData()
|
}//Qmgr::initData()
|
||||||
|
|
||||||
void Qmgr::initRecords()
|
void Qmgr::initRecords()
|
||||||
|
@ -113,6 +130,7 @@ Qmgr::Qmgr(Block_context& ctx)
|
||||||
addRecSignal(GSN_DIH_RESTARTREF, &Qmgr::execDIH_RESTARTREF);
|
addRecSignal(GSN_DIH_RESTARTREF, &Qmgr::execDIH_RESTARTREF);
|
||||||
addRecSignal(GSN_DIH_RESTARTCONF, &Qmgr::execDIH_RESTARTCONF);
|
addRecSignal(GSN_DIH_RESTARTCONF, &Qmgr::execDIH_RESTARTCONF);
|
||||||
addRecSignal(GSN_NODE_VERSION_REP, &Qmgr::execNODE_VERSION_REP);
|
addRecSignal(GSN_NODE_VERSION_REP, &Qmgr::execNODE_VERSION_REP);
|
||||||
|
addRecSignal(GSN_START_ORD, &Qmgr::execSTART_ORD);
|
||||||
|
|
||||||
initData();
|
initData();
|
||||||
}//Qmgr::Qmgr()
|
}//Qmgr::Qmgr()
|
||||||
|
|
|
@ -238,6 +238,38 @@ Qmgr::execREAD_CONFIG_REQ(Signal* signal)
|
||||||
ReadConfigConf::SignalLength, JBB);
|
ReadConfigConf::SignalLength, JBB);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
Qmgr::execSTART_ORD(Signal* signal)
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Start timer handling
|
||||||
|
*/
|
||||||
|
signal->theData[0] = ZTIMER_HANDLING;
|
||||||
|
sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 1, JBB);
|
||||||
|
|
||||||
|
NodeRecPtr nodePtr;
|
||||||
|
for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
|
||||||
|
{
|
||||||
|
ptrAss(nodePtr, nodeRec);
|
||||||
|
nodePtr.p->ndynamicId = 0;
|
||||||
|
if(getNodeInfo(nodePtr.i).m_type == NodeInfo::DB)
|
||||||
|
{
|
||||||
|
nodePtr.p->phase = ZINIT;
|
||||||
|
c_definedNodes.set(nodePtr.i);
|
||||||
|
} else {
|
||||||
|
nodePtr.p->phase = ZAPI_INACTIVE;
|
||||||
|
}
|
||||||
|
|
||||||
|
setNodeInfo(nodePtr.i).m_heartbeat_cnt= 0;
|
||||||
|
nodePtr.p->sendPrepFailReqStatus = Q_NOT_ACTIVE;
|
||||||
|
nodePtr.p->sendCommitFailReqStatus = Q_NOT_ACTIVE;
|
||||||
|
nodePtr.p->sendPresToStatus = Q_NOT_ACTIVE;
|
||||||
|
nodePtr.p->failState = NORMAL;
|
||||||
|
nodePtr.p->rcv[0] = 0;
|
||||||
|
nodePtr.p->rcv[1] = 0;
|
||||||
|
}//for
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
4.2 ADD NODE MODULE*/
|
4.2 ADD NODE MODULE*/
|
||||||
/*##########################################################################*/
|
/*##########################################################################*/
|
||||||
|
@ -298,8 +330,6 @@ void Qmgr::startphase1(Signal* signal)
|
||||||
nodePtr.i = getOwnNodeId();
|
nodePtr.i = getOwnNodeId();
|
||||||
ptrAss(nodePtr, nodeRec);
|
ptrAss(nodePtr, nodeRec);
|
||||||
nodePtr.p->phase = ZSTARTING;
|
nodePtr.p->phase = ZSTARTING;
|
||||||
nodePtr.p->blockRef = reference();
|
|
||||||
c_connectedNodes.set(nodePtr.i);
|
|
||||||
|
|
||||||
signal->theData[0] = reference();
|
signal->theData[0] = reference();
|
||||||
sendSignal(DBDIH_REF, GSN_DIH_RESTARTREQ, signal, 1, JBB);
|
sendSignal(DBDIH_REF, GSN_DIH_RESTARTREQ, signal, 1, JBB);
|
||||||
|
@ -371,11 +401,14 @@ void Qmgr::execCONNECT_REP(Signal* signal)
|
||||||
case ZFAIL_CLOSING:
|
case ZFAIL_CLOSING:
|
||||||
jam();
|
jam();
|
||||||
return;
|
return;
|
||||||
case ZINIT:
|
|
||||||
ndbrequire(false);
|
|
||||||
case ZAPI_ACTIVE:
|
case ZAPI_ACTIVE:
|
||||||
case ZAPI_INACTIVE:
|
case ZAPI_INACTIVE:
|
||||||
return;
|
return;
|
||||||
|
case ZINIT:
|
||||||
|
ndbrequire(getNodeInfo(nodeId).m_type == NodeInfo::MGM);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
ndbrequire(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (getNodeInfo(nodeId).getType() != NodeInfo::DB)
|
if (getNodeInfo(nodeId).getType() != NodeInfo::DB)
|
||||||
|
@ -1212,12 +1245,6 @@ void Qmgr::execCM_REGREF(Signal* signal)
|
||||||
{
|
{
|
||||||
jam();
|
jam();
|
||||||
electionWon(signal);
|
electionWon(signal);
|
||||||
|
|
||||||
/**
|
|
||||||
* Start timer handling
|
|
||||||
*/
|
|
||||||
signal->theData[0] = ZTIMER_HANDLING;
|
|
||||||
sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 10, JBB);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return;
|
return;
|
||||||
|
@ -1855,12 +1882,6 @@ Qmgr::joinedCluster(Signal* signal, NodeRecPtr nodePtr){
|
||||||
|
|
||||||
sendSttorryLab(signal);
|
sendSttorryLab(signal);
|
||||||
|
|
||||||
/**
|
|
||||||
* Start timer handling
|
|
||||||
*/
|
|
||||||
signal->theData[0] = ZTIMER_HANDLING;
|
|
||||||
sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 10, JBB);
|
|
||||||
|
|
||||||
sendCmAckAdd(signal, getOwnNodeId(), CmAdd::CommitNew);
|
sendCmAckAdd(signal, getOwnNodeId(), CmAdd::CommitNew);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2094,25 +2115,6 @@ void Qmgr::findNeighbours(Signal* signal)
|
||||||
/*---------------------------------------------------------------------------*/
|
/*---------------------------------------------------------------------------*/
|
||||||
void Qmgr::initData(Signal* signal)
|
void Qmgr::initData(Signal* signal)
|
||||||
{
|
{
|
||||||
NodeRecPtr nodePtr;
|
|
||||||
for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++) {
|
|
||||||
ptrAss(nodePtr, nodeRec);
|
|
||||||
nodePtr.p->ndynamicId = 0;
|
|
||||||
if(getNodeInfo(nodePtr.i).m_type == NodeInfo::DB){
|
|
||||||
nodePtr.p->phase = ZINIT;
|
|
||||||
c_definedNodes.set(nodePtr.i);
|
|
||||||
} else {
|
|
||||||
nodePtr.p->phase = ZAPI_INACTIVE;
|
|
||||||
}
|
|
||||||
|
|
||||||
setNodeInfo(nodePtr.i).m_heartbeat_cnt= 0;
|
|
||||||
nodePtr.p->sendPrepFailReqStatus = Q_NOT_ACTIVE;
|
|
||||||
nodePtr.p->sendCommitFailReqStatus = Q_NOT_ACTIVE;
|
|
||||||
nodePtr.p->sendPresToStatus = Q_NOT_ACTIVE;
|
|
||||||
nodePtr.p->failState = NORMAL;
|
|
||||||
nodePtr.p->rcv[0] = 0;
|
|
||||||
nodePtr.p->rcv[1] = 0;
|
|
||||||
}//for
|
|
||||||
cfailureNr = 1;
|
cfailureNr = 1;
|
||||||
ccommitFailureNr = 1;
|
ccommitFailureNr = 1;
|
||||||
cprepareFailureNr = 1;
|
cprepareFailureNr = 1;
|
||||||
|
@ -2146,13 +2148,11 @@ void Qmgr::initData(Signal* signal)
|
||||||
ndbrequire(p != 0);
|
ndbrequire(p != 0);
|
||||||
|
|
||||||
Uint32 hbDBDB = 1500;
|
Uint32 hbDBDB = 1500;
|
||||||
Uint32 hbDBAPI = 1500;
|
|
||||||
Uint32 arbitTimeout = 1000;
|
Uint32 arbitTimeout = 1000;
|
||||||
c_restartPartialTimeout = 30000;
|
c_restartPartialTimeout = 30000;
|
||||||
c_restartPartionedTimeout = 60000;
|
c_restartPartionedTimeout = 60000;
|
||||||
c_restartFailureTimeout = ~0;
|
c_restartFailureTimeout = ~0;
|
||||||
ndb_mgm_get_int_parameter(p, CFG_DB_HEARTBEAT_INTERVAL, &hbDBDB);
|
ndb_mgm_get_int_parameter(p, CFG_DB_HEARTBEAT_INTERVAL, &hbDBDB);
|
||||||
ndb_mgm_get_int_parameter(p, CFG_DB_API_HEARTBEAT_INTERVAL, &hbDBAPI);
|
|
||||||
ndb_mgm_get_int_parameter(p, CFG_DB_ARBIT_TIMEOUT, &arbitTimeout);
|
ndb_mgm_get_int_parameter(p, CFG_DB_ARBIT_TIMEOUT, &arbitTimeout);
|
||||||
ndb_mgm_get_int_parameter(p, CFG_DB_START_PARTIAL_TIMEOUT,
|
ndb_mgm_get_int_parameter(p, CFG_DB_START_PARTIAL_TIMEOUT,
|
||||||
&c_restartPartialTimeout);
|
&c_restartPartialTimeout);
|
||||||
|
@ -2177,7 +2177,6 @@ void Qmgr::initData(Signal* signal)
|
||||||
}
|
}
|
||||||
|
|
||||||
setHbDelay(hbDBDB);
|
setHbDelay(hbDBDB);
|
||||||
setHbApiDelay(hbDBAPI);
|
|
||||||
setArbitTimeout(arbitTimeout);
|
setArbitTimeout(arbitTimeout);
|
||||||
|
|
||||||
arbitRec.state = ARBIT_NULL; // start state for all nodes
|
arbitRec.state = ARBIT_NULL; // start state for all nodes
|
||||||
|
@ -2204,7 +2203,6 @@ void Qmgr::initData(Signal* signal)
|
||||||
|
|
||||||
execARBIT_CFG(signal);
|
execARBIT_CFG(signal);
|
||||||
}
|
}
|
||||||
setNodeInfo(getOwnNodeId()).m_version = NDB_VERSION;
|
|
||||||
}//Qmgr::initData()
|
}//Qmgr::initData()
|
||||||
|
|
||||||
|
|
||||||
|
@ -2237,20 +2235,22 @@ void Qmgr::timerHandlingLab(Signal* signal)
|
||||||
hb_check_timer.reset();
|
hb_check_timer.reset();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (interface_check_timer.check(TcurrentTime)) {
|
if (interface_check_timer.check(TcurrentTime)) {
|
||||||
jam();
|
jam();
|
||||||
interface_check_timer.reset();
|
interface_check_timer.reset();
|
||||||
checkStartInterface(signal);
|
checkStartInterface(signal);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (hb_api_timer.check(TcurrentTime))
|
||||||
|
{
|
||||||
|
jam();
|
||||||
|
hb_api_timer.reset();
|
||||||
|
apiHbHandlingLab(signal);
|
||||||
|
}
|
||||||
|
|
||||||
if (cactivateApiCheck != 0) {
|
if (cactivateApiCheck != 0) {
|
||||||
jam();
|
jam();
|
||||||
if (hb_api_timer.check(TcurrentTime)) {
|
|
||||||
jam();
|
|
||||||
hb_api_timer.reset();
|
|
||||||
apiHbHandlingLab(signal);
|
|
||||||
}//if
|
|
||||||
if (clatestTransactionCheck == 0) {
|
if (clatestTransactionCheck == 0) {
|
||||||
//-------------------------------------------------------------
|
//-------------------------------------------------------------
|
||||||
// Initialise the Transaction check timer.
|
// Initialise the Transaction check timer.
|
||||||
|
@ -2367,18 +2367,21 @@ void Qmgr::apiHbHandlingLab(Signal* signal)
|
||||||
if(type == NodeInfo::INVALID)
|
if(type == NodeInfo::INVALID)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (TnodePtr.p->phase == ZAPI_ACTIVE){
|
if (c_connectedNodes.get(nodeId))
|
||||||
|
{
|
||||||
jam();
|
jam();
|
||||||
setNodeInfo(TnodePtr.i).m_heartbeat_cnt++;
|
setNodeInfo(TnodePtr.i).m_heartbeat_cnt++;
|
||||||
|
|
||||||
if(getNodeInfo(TnodePtr.i).m_heartbeat_cnt > 2){
|
if(getNodeInfo(TnodePtr.i).m_heartbeat_cnt > 2)
|
||||||
|
{
|
||||||
signal->theData[0] = NDB_LE_MissedHeartbeat;
|
signal->theData[0] = NDB_LE_MissedHeartbeat;
|
||||||
signal->theData[1] = nodeId;
|
signal->theData[1] = nodeId;
|
||||||
signal->theData[2] = getNodeInfo(TnodePtr.i).m_heartbeat_cnt - 1;
|
signal->theData[2] = getNodeInfo(TnodePtr.i).m_heartbeat_cnt - 1;
|
||||||
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
|
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (getNodeInfo(TnodePtr.i).m_heartbeat_cnt > 4) {
|
if (getNodeInfo(TnodePtr.i).m_heartbeat_cnt > 4)
|
||||||
|
{
|
||||||
jam();
|
jam();
|
||||||
/*------------------------------------------------------------------*/
|
/*------------------------------------------------------------------*/
|
||||||
/* THE API NODE HAS NOT SENT ANY HEARTBEAT FOR THREE SECONDS.
|
/* THE API NODE HAS NOT SENT ANY HEARTBEAT FOR THREE SECONDS.
|
||||||
|
@ -2390,8 +2393,8 @@ void Qmgr::apiHbHandlingLab(Signal* signal)
|
||||||
signal->theData[0] = NDB_LE_DeadDueToHeartbeat;
|
signal->theData[0] = NDB_LE_DeadDueToHeartbeat;
|
||||||
signal->theData[1] = nodeId;
|
signal->theData[1] = nodeId;
|
||||||
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
|
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
|
||||||
|
|
||||||
node_failed(signal, nodeId);
|
api_failed(signal, nodeId);
|
||||||
}//if
|
}//if
|
||||||
}//if
|
}//if
|
||||||
}//for
|
}//for
|
||||||
|
@ -2480,26 +2483,6 @@ void Qmgr::sendApiFailReq(Signal* signal, Uint16 failedNodeNo)
|
||||||
sendSignal(DBTC_REF, GSN_API_FAILREQ, signal, 2, JBA);
|
sendSignal(DBTC_REF, GSN_API_FAILREQ, signal, 2, JBA);
|
||||||
sendSignal(DBDICT_REF, GSN_API_FAILREQ, signal, 2, JBA);
|
sendSignal(DBDICT_REF, GSN_API_FAILREQ, signal, 2, JBA);
|
||||||
sendSignal(SUMA_REF, GSN_API_FAILREQ, signal, 2, JBA);
|
sendSignal(SUMA_REF, GSN_API_FAILREQ, signal, 2, JBA);
|
||||||
|
|
||||||
/**-------------------------------------------------------------------------
|
|
||||||
* THE OTHER NODE WAS AN API NODE. THE COMMUNICATION LINK IS ALREADY
|
|
||||||
* BROKEN AND THUS NO ACTION IS NEEDED TO BREAK THE CONNECTION.
|
|
||||||
* WE ONLY NEED TO SET PARAMETERS TO ENABLE A NEW CONNECTION IN A FEW
|
|
||||||
* SECONDS.
|
|
||||||
*-------------------------------------------------------------------------*/
|
|
||||||
setNodeInfo(failedNodePtr.i).m_heartbeat_cnt= 0;
|
|
||||||
setNodeInfo(failedNodePtr.i).m_version = 0;
|
|
||||||
recompute_version_info(getNodeInfo(failedNodePtr.i).m_type);
|
|
||||||
|
|
||||||
CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
|
|
||||||
|
|
||||||
closeCom->xxxBlockRef = reference();
|
|
||||||
closeCom->failNo = 0;
|
|
||||||
closeCom->noOfNodes = 1;
|
|
||||||
NodeBitmask::clear(closeCom->theNodes);
|
|
||||||
NodeBitmask::set(closeCom->theNodes, failedNodePtr.i);
|
|
||||||
sendSignal(CMVMI_REF, GSN_CLOSE_COMREQ, signal,
|
|
||||||
CloseComReqConf::SignalLength, JBA);
|
|
||||||
}//Qmgr::sendApiFailReq()
|
}//Qmgr::sendApiFailReq()
|
||||||
|
|
||||||
void Qmgr::execAPI_FAILREQ(Signal* signal)
|
void Qmgr::execAPI_FAILREQ(Signal* signal)
|
||||||
|
@ -2512,20 +2495,7 @@ void Qmgr::execAPI_FAILREQ(Signal* signal)
|
||||||
|
|
||||||
ndbrequire(getNodeInfo(failedNodePtr.i).getType() != NodeInfo::DB);
|
ndbrequire(getNodeInfo(failedNodePtr.i).getType() != NodeInfo::DB);
|
||||||
|
|
||||||
// ignore if api not active
|
api_failed(signal, signal->theData[0]);
|
||||||
if (failedNodePtr.p->phase != ZAPI_ACTIVE)
|
|
||||||
{
|
|
||||||
jam();
|
|
||||||
// But send to SUMA anyway...
|
|
||||||
sendSignal(SUMA_REF, GSN_API_FAILREQ, signal, 2, JBA);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
signal->theData[0] = NDB_LE_Disconnected;
|
|
||||||
signal->theData[1] = failedNodePtr.i;
|
|
||||||
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
|
|
||||||
|
|
||||||
node_failed(signal, failedNodePtr.i);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Qmgr::execAPI_FAILCONF(Signal* signal)
|
void Qmgr::execAPI_FAILCONF(Signal* signal)
|
||||||
|
@ -2649,6 +2619,13 @@ void Qmgr::execDISCONNECT_REP(Signal* signal)
|
||||||
ndbrequire(false);
|
ndbrequire(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (getNodeInfo(nodeId).getType() != NodeInfo::DB)
|
||||||
|
{
|
||||||
|
jam();
|
||||||
|
api_failed(signal, nodeId);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
switch(nodePtr.p->phase){
|
switch(nodePtr.p->phase){
|
||||||
case ZRUNNING:
|
case ZRUNNING:
|
||||||
jam();
|
jam();
|
||||||
|
@ -2685,66 +2662,109 @@ void Qmgr::node_failed(Signal* signal, Uint16 aFailedNode)
|
||||||
failedNodePtr.i = aFailedNode;
|
failedNodePtr.i = aFailedNode;
|
||||||
ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
|
ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
|
||||||
|
|
||||||
if (getNodeInfo(failedNodePtr.i).getType() == NodeInfo::DB){
|
ndbrequire(getNodeInfo(failedNodePtr.i).getType() == NodeInfo::DB);
|
||||||
|
|
||||||
|
/**---------------------------------------------------------------------
|
||||||
|
* THE OTHER NODE IS AN NDB NODE, WE HANDLE IT AS IF A HEARTBEAT
|
||||||
|
* FAILURE WAS DISCOVERED.
|
||||||
|
*---------------------------------------------------------------------*/
|
||||||
|
switch(failedNodePtr.p->phase){
|
||||||
|
case ZRUNNING:
|
||||||
jam();
|
jam();
|
||||||
/**---------------------------------------------------------------------
|
failReportLab(signal, aFailedNode, FailRep::ZLINK_FAILURE);
|
||||||
* THE OTHER NODE IS AN NDB NODE, WE HANDLE IT AS IF A HEARTBEAT
|
return;
|
||||||
* FAILURE WAS DISCOVERED.
|
case ZFAIL_CLOSING:
|
||||||
*---------------------------------------------------------------------*/
|
jam();
|
||||||
switch(failedNodePtr.p->phase){
|
return;
|
||||||
case ZRUNNING:
|
case ZSTARTING:
|
||||||
jam();
|
c_start.reset();
|
||||||
failReportLab(signal, aFailedNode, FailRep::ZLINK_FAILURE);
|
// Fall-through
|
||||||
return;
|
default:
|
||||||
case ZFAIL_CLOSING:
|
jam();
|
||||||
jam();
|
/*---------------------------------------------------------------------*/
|
||||||
return;
|
// The other node is still not in the cluster but disconnected.
|
||||||
case ZSTARTING:
|
// We must restart communication in three seconds.
|
||||||
c_start.reset();
|
/*---------------------------------------------------------------------*/
|
||||||
// Fall-through
|
failedNodePtr.p->failState = NORMAL;
|
||||||
default:
|
failedNodePtr.p->phase = ZFAIL_CLOSING;
|
||||||
jam();
|
setNodeInfo(failedNodePtr.i).m_heartbeat_cnt= 0;
|
||||||
/*---------------------------------------------------------------------*/
|
|
||||||
// The other node is still not in the cluster but disconnected.
|
|
||||||
// We must restart communication in three seconds.
|
|
||||||
/*---------------------------------------------------------------------*/
|
|
||||||
failedNodePtr.p->failState = NORMAL;
|
|
||||||
failedNodePtr.p->phase = ZFAIL_CLOSING;
|
|
||||||
setNodeInfo(failedNodePtr.i).m_heartbeat_cnt= 0;
|
|
||||||
|
|
||||||
CloseComReqConf * const closeCom =
|
CloseComReqConf * const closeCom =
|
||||||
(CloseComReqConf *)&signal->theData[0];
|
(CloseComReqConf *)&signal->theData[0];
|
||||||
|
|
||||||
closeCom->xxxBlockRef = reference();
|
closeCom->xxxBlockRef = reference();
|
||||||
closeCom->failNo = 0;
|
closeCom->failNo = 0;
|
||||||
closeCom->noOfNodes = 1;
|
closeCom->noOfNodes = 1;
|
||||||
NodeBitmask::clear(closeCom->theNodes);
|
NodeBitmask::clear(closeCom->theNodes);
|
||||||
NodeBitmask::set(closeCom->theNodes, failedNodePtr.i);
|
NodeBitmask::set(closeCom->theNodes, failedNodePtr.i);
|
||||||
sendSignal(CMVMI_REF, GSN_CLOSE_COMREQ, signal,
|
sendSignal(CMVMI_REF, GSN_CLOSE_COMREQ, signal,
|
||||||
CloseComReqConf::SignalLength, JBA);
|
CloseComReqConf::SignalLength, JBA);
|
||||||
}//if
|
}//if
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
Qmgr::api_failed(Signal* signal, Uint32 nodeId)
|
||||||
|
{
|
||||||
|
NodeRecPtr failedNodePtr;
|
||||||
|
/**------------------------------------------------------------------------
|
||||||
|
* A COMMUNICATION LINK HAS BEEN DISCONNECTED. WE MUST TAKE SOME ACTION
|
||||||
|
* DUE TO THIS.
|
||||||
|
*-----------------------------------------------------------------------*/
|
||||||
|
failedNodePtr.i = nodeId;
|
||||||
|
ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
|
||||||
|
|
||||||
|
if (failedNodePtr.p->phase == ZFAIL_CLOSING)
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Failure handling already in progress
|
||||||
|
*/
|
||||||
|
jam();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
if (failedNodePtr.p->phase == ZAPI_ACTIVE)
|
||||||
* API code
|
{
|
||||||
*/
|
|
||||||
jam();
|
|
||||||
if (failedNodePtr.p->phase != ZFAIL_CLOSING){
|
|
||||||
jam();
|
jam();
|
||||||
//-------------------------------------------------------------------------
|
sendApiFailReq(signal, nodeId);
|
||||||
// The API was active and has now failed. We need to initiate API failure
|
|
||||||
// handling. If the API had already failed then we can ignore this
|
|
||||||
// discovery.
|
|
||||||
//-------------------------------------------------------------------------
|
|
||||||
failedNodePtr.p->phase = ZFAIL_CLOSING;
|
|
||||||
|
|
||||||
sendApiFailReq(signal, aFailedNode);
|
|
||||||
arbitRec.code = ArbitCode::ApiFail;
|
arbitRec.code = ArbitCode::ApiFail;
|
||||||
handleArbitApiFail(signal, aFailedNode);
|
handleArbitApiFail(signal, nodeId);
|
||||||
}//if
|
}
|
||||||
return;
|
else
|
||||||
}//Qmgr::node_failed()
|
{
|
||||||
|
/**
|
||||||
|
* Always inform SUMA
|
||||||
|
*/
|
||||||
|
jam();
|
||||||
|
signal->theData[0] = nodeId;
|
||||||
|
signal->theData[1] = QMGR_REF;
|
||||||
|
sendSignal(SUMA_REF, GSN_API_FAILREQ, signal, 2, JBA);
|
||||||
|
failedNodePtr.p->failState = NORMAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
failedNodePtr.p->phase = ZFAIL_CLOSING;
|
||||||
|
setNodeInfo(failedNodePtr.i).m_heartbeat_cnt= 0;
|
||||||
|
setNodeInfo(failedNodePtr.i).m_version = 0;
|
||||||
|
recompute_version_info(getNodeInfo(failedNodePtr.i).m_type);
|
||||||
|
|
||||||
|
CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
|
||||||
|
closeCom->xxxBlockRef = reference();
|
||||||
|
closeCom->failNo = 0;
|
||||||
|
closeCom->noOfNodes = 1;
|
||||||
|
NodeBitmask::clear(closeCom->theNodes);
|
||||||
|
NodeBitmask::set(closeCom->theNodes, failedNodePtr.i);
|
||||||
|
sendSignal(CMVMI_REF, GSN_CLOSE_COMREQ, signal,
|
||||||
|
CloseComReqConf::SignalLength, JBA);
|
||||||
|
|
||||||
|
if (getNodeInfo(failedNodePtr.i).getType() == NodeInfo::MGM)
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Allow MGM do reconnect "directly"
|
||||||
|
*/
|
||||||
|
jam();
|
||||||
|
setNodeInfo(failedNodePtr.i).m_heartbeat_cnt = 3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**--------------------------------------------------------------------------
|
/**--------------------------------------------------------------------------
|
||||||
* AN API NODE IS REGISTERING. IF FOR THE FIRST TIME WE WILL ENABLE
|
* AN API NODE IS REGISTERING. IF FOR THE FIRST TIME WE WILL ENABLE
|
||||||
|
@ -4963,43 +4983,39 @@ Qmgr::execDUMP_STATE_ORD(Signal* signal)
|
||||||
c_start.m_president_candidate_gci);
|
c_start.m_president_candidate_gci);
|
||||||
infoEvent("ctoStatus = %d\n", ctoStatus);
|
infoEvent("ctoStatus = %d\n", ctoStatus);
|
||||||
for(Uint32 i = 1; i<MAX_NDB_NODES; i++){
|
for(Uint32 i = 1; i<MAX_NDB_NODES; i++){
|
||||||
if(getNodeInfo(i).getType() == NodeInfo::DB){
|
NodeRecPtr nodePtr;
|
||||||
NodeRecPtr nodePtr;
|
nodePtr.i = i;
|
||||||
nodePtr.i = i;
|
ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
|
||||||
ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
|
char buf[100];
|
||||||
char buf[100];
|
switch(nodePtr.p->phase){
|
||||||
switch(nodePtr.p->phase){
|
case ZINIT:
|
||||||
case ZINIT:
|
sprintf(buf, "Node %d: ZINIT(%d)", i, nodePtr.p->phase);
|
||||||
sprintf(buf, "Node %d: ZINIT(%d)", i, nodePtr.p->phase);
|
break;
|
||||||
break;
|
case ZSTARTING:
|
||||||
case ZSTARTING:
|
sprintf(buf, "Node %d: ZSTARTING(%d)", i, nodePtr.p->phase);
|
||||||
sprintf(buf, "Node %d: ZSTARTING(%d)", i, nodePtr.p->phase);
|
break;
|
||||||
break;
|
case ZRUNNING:
|
||||||
case ZRUNNING:
|
sprintf(buf, "Node %d: ZRUNNING(%d)", i, nodePtr.p->phase);
|
||||||
sprintf(buf, "Node %d: ZRUNNING(%d)", i, nodePtr.p->phase);
|
break;
|
||||||
break;
|
case ZPREPARE_FAIL:
|
||||||
case ZPREPARE_FAIL:
|
sprintf(buf, "Node %d: ZPREPARE_FAIL(%d)", i, nodePtr.p->phase);
|
||||||
sprintf(buf, "Node %d: ZPREPARE_FAIL(%d)", i, nodePtr.p->phase);
|
break;
|
||||||
break;
|
case ZFAIL_CLOSING:
|
||||||
case ZFAIL_CLOSING:
|
sprintf(buf, "Node %d: ZFAIL_CLOSING(%d)", i, nodePtr.p->phase);
|
||||||
sprintf(buf, "Node %d: ZFAIL_CLOSING(%d)", i, nodePtr.p->phase);
|
break;
|
||||||
break;
|
case ZAPI_INACTIVE:
|
||||||
case ZAPI_INACTIVE:
|
sprintf(buf, "Node %d: ZAPI_INACTIVE(%d)", i, nodePtr.p->phase);
|
||||||
sprintf(buf, "Node %d: ZAPI_INACTIVE(%d)", i, nodePtr.p->phase);
|
break;
|
||||||
break;
|
case ZAPI_ACTIVE:
|
||||||
case ZAPI_ACTIVE:
|
sprintf(buf, "Node %d: ZAPI_ACTIVE(%d)", i, nodePtr.p->phase);
|
||||||
sprintf(buf, "Node %d: ZAPI_ACTIVE(%d)", i, nodePtr.p->phase);
|
break;
|
||||||
break;
|
default:
|
||||||
default:
|
sprintf(buf, "Node %d: <UNKNOWN>(%d)", i, nodePtr.p->phase);
|
||||||
sprintf(buf, "Node %d: <UNKNOWN>(%d)", i, nodePtr.p->phase);
|
break;
|
||||||
break;
|
|
||||||
}
|
|
||||||
infoEvent(buf);
|
|
||||||
}
|
}
|
||||||
|
infoEvent(buf);
|
||||||
}
|
}
|
||||||
default:
|
}
|
||||||
;
|
|
||||||
}//switch
|
|
||||||
|
|
||||||
#ifdef ERROR_INSERT
|
#ifdef ERROR_INSERT
|
||||||
if (signal->theData[0] == 935 && signal->getLength() == 2)
|
if (signal->theData[0] == 935 && signal->getLength() == 2)
|
||||||
|
|
Loading…
Add table
Reference in a new issue