mirror of
https://github.com/MariaDB/server.git
synced 2025-01-16 20:12:31 +01:00
Merge perch.ndb.mysql.com:/home/jonas/src/41-work
into perch.ndb.mysql.com:/home/jonas/src/50-work ndb/src/kernel/blocks/dbdih/DbdihMain.cpp: Auto merged ndb/src/kernel/blocks/ndbcntr/Ndbcntr.hpp: Auto merged ndb/src/kernel/blocks/ndbcntr/NdbcntrInit.cpp: Auto merged ndb/src/kernel/blocks/qmgr/Qmgr.hpp: Auto merged ndb/test/ndbapi/testNodeRestart.cpp: Auto merged ndb/test/run-test/daily-basic-tests.txt: Auto merged ndb/test/src/NdbRestarts.cpp: Auto merged ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp: merge ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp: merge ndb/src/kernel/blocks/qmgr/QmgrInit.cpp: merge ndb/src/kernel/blocks/qmgr/QmgrMain.cpp: merge
This commit is contained in:
commit
66daa3753b
15 changed files with 982 additions and 133 deletions
|
@ -64,6 +64,7 @@ public:
|
|||
// 19 NDBFS Fipple with O_SYNC, O_CREATE etc.
|
||||
// 20-24 BACKUP
|
||||
NdbcntrTestStopOnError = 25,
|
||||
NdbcntrStopNodes = 70,
|
||||
// 100-105 TUP and ACC
|
||||
// 200-240 UTIL
|
||||
// 300-305 TRIX
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define FAIL_REP_HPP
|
||||
|
||||
#include "SignalData.hpp"
|
||||
#include <NodeBitmask.hpp>
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -27,6 +28,7 @@ class FailRep {
|
|||
* Sender(s) & Reciver(s)
|
||||
*/
|
||||
friend class Qmgr;
|
||||
friend class Ndbcntr;
|
||||
|
||||
/**
|
||||
* For printing
|
||||
|
@ -35,7 +37,8 @@ class FailRep {
|
|||
|
||||
public:
|
||||
STATIC_CONST( SignalLength = 2 );
|
||||
|
||||
STATIC_CONST( ExtraLength = 1 + NdbNodeBitmask::Size );
|
||||
|
||||
enum FailCause {
|
||||
ZOWN_FAILURE=0,
|
||||
ZOTHER_NODE_WHEN_WE_START=1,
|
||||
|
@ -43,13 +46,20 @@ public:
|
|||
ZSTART_IN_REGREQ=3,
|
||||
ZHEARTBEAT_FAILURE=4,
|
||||
ZLINK_FAILURE=5,
|
||||
ZOTHERNODE_FAILED_DURING_START=6
|
||||
ZOTHERNODE_FAILED_DURING_START=6,
|
||||
ZMULTI_NODE_SHUTDOWN = 7,
|
||||
ZPARTITIONED_CLUSTER = 8
|
||||
};
|
||||
|
||||
|
||||
private:
|
||||
|
||||
Uint32 failNodeId;
|
||||
Uint32 failCause;
|
||||
/**
|
||||
* Used when failCause == ZPARTITIONED_CLUSTER
|
||||
*/
|
||||
Uint32 president;
|
||||
Uint32 partition[NdbNodeBitmask::Size];
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -32,7 +32,7 @@ class StopReq
|
|||
friend class MgmtSrvr;
|
||||
|
||||
public:
|
||||
STATIC_CONST( SignalLength = 9 );
|
||||
STATIC_CONST( SignalLength = 9 + NdbNodeBitmask::Size);
|
||||
|
||||
public:
|
||||
Uint32 senderRef;
|
||||
|
@ -49,29 +49,34 @@ public:
|
|||
Int32 readOperationTimeout; // Timeout before read operations are aborted
|
||||
Int32 operationTimeout; // Timeout before all operations are aborted
|
||||
|
||||
Uint32 nodes[NdbNodeBitmask::Size];
|
||||
|
||||
static void setSystemStop(Uint32 & requestInfo, bool value);
|
||||
static void setPerformRestart(Uint32 & requestInfo, bool value);
|
||||
static void setNoStart(Uint32 & requestInfo, bool value);
|
||||
static void setInitialStart(Uint32 & requestInfo, bool value);
|
||||
static void setEscalateOnNodeFail(Uint32 & requestInfo, bool value);
|
||||
/**
|
||||
* Don't perform "graceful" shutdown/restart...
|
||||
*/
|
||||
static void setStopAbort(Uint32 & requestInfo, bool value);
|
||||
static void setStopNodes(Uint32 & requestInfo, bool value);
|
||||
|
||||
static bool getSystemStop(const Uint32 & requestInfo);
|
||||
static bool getPerformRestart(const Uint32 & requestInfo);
|
||||
static bool getNoStart(const Uint32 & requestInfo);
|
||||
static bool getInitialStart(const Uint32 & requestInfo);
|
||||
static bool getEscalateOnNodeFail(const Uint32 & requestInfo);
|
||||
static bool getStopAbort(const Uint32 & requestInfo);
|
||||
static bool getStopNodes(const Uint32 & requestInfo);
|
||||
};
|
||||
|
||||
struct StopConf
|
||||
{
|
||||
STATIC_CONST( SignalLength = 2 );
|
||||
Uint32 senderData;
|
||||
Uint32 nodeState;
|
||||
union {
|
||||
Uint32 nodeState;
|
||||
Uint32 nodeId;
|
||||
};
|
||||
};
|
||||
|
||||
class StopRef
|
||||
|
@ -94,7 +99,9 @@ public:
|
|||
NodeShutdownInProgress = 1,
|
||||
SystemShutdownInProgress = 2,
|
||||
NodeShutdownWouldCauseSystemCrash = 3,
|
||||
TransactionAbortFailed = 4
|
||||
TransactionAbortFailed = 4,
|
||||
UnsupportedNodeShutdown = 5,
|
||||
MultiNodeShutdownNotMaster = 6
|
||||
};
|
||||
|
||||
public:
|
||||
|
@ -132,16 +139,16 @@ StopReq::getInitialStart(const Uint32 & requestInfo)
|
|||
|
||||
inline
|
||||
bool
|
||||
StopReq::getEscalateOnNodeFail(const Uint32 & requestInfo)
|
||||
StopReq::getStopAbort(const Uint32 & requestInfo)
|
||||
{
|
||||
return requestInfo & 16;
|
||||
return requestInfo & 32;
|
||||
}
|
||||
|
||||
inline
|
||||
bool
|
||||
StopReq::getStopAbort(const Uint32 & requestInfo)
|
||||
StopReq::getStopNodes(const Uint32 & requestInfo)
|
||||
{
|
||||
return requestInfo & 32;
|
||||
return requestInfo & 64;
|
||||
}
|
||||
|
||||
|
||||
|
@ -185,16 +192,6 @@ StopReq::setInitialStart(Uint32 & requestInfo, bool value)
|
|||
requestInfo &= ~8;
|
||||
}
|
||||
|
||||
inline
|
||||
void
|
||||
StopReq::setEscalateOnNodeFail(Uint32 & requestInfo, bool value)
|
||||
{
|
||||
if(value)
|
||||
requestInfo |= 16;
|
||||
else
|
||||
requestInfo &= ~16;
|
||||
}
|
||||
|
||||
inline
|
||||
void
|
||||
StopReq::setStopAbort(Uint32 & requestInfo, bool value)
|
||||
|
@ -205,6 +202,15 @@ StopReq::setStopAbort(Uint32 & requestInfo, bool value)
|
|||
requestInfo &= ~32;
|
||||
}
|
||||
|
||||
inline
|
||||
void
|
||||
StopReq::setStopNodes(Uint32 & requestInfo, bool value)
|
||||
{
|
||||
if(value)
|
||||
requestInfo |= 64;
|
||||
else
|
||||
requestInfo &= ~64;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -46,7 +46,9 @@ public:
|
|||
Complete = 1, ///< Wait for a GCP to complete
|
||||
CompleteForceStart = 2, ///< Wait for a GCP to complete start one if needed
|
||||
CompleteIfRunning = 3, ///< Wait for ongoing GCP
|
||||
CurrentGCI = 8 ///< Immediately return current GCI
|
||||
CurrentGCI = 8, ///< Immediately return current GCI
|
||||
BlockStartGcp = 9,
|
||||
UnblockStartGcp = 10
|
||||
};
|
||||
|
||||
Uint32 senderRef;
|
||||
|
@ -70,11 +72,12 @@ class WaitGCPConf {
|
|||
//friend class Grep::PSCoord;
|
||||
|
||||
public:
|
||||
STATIC_CONST( SignalLength = 2 );
|
||||
STATIC_CONST( SignalLength = 3 );
|
||||
|
||||
public:
|
||||
Uint32 senderData;
|
||||
Uint32 gcp;
|
||||
Uint32 blockStatus;
|
||||
};
|
||||
|
||||
class WaitGCPRef {
|
||||
|
|
|
@ -134,6 +134,9 @@ Cmvmi::~Cmvmi()
|
|||
{
|
||||
}
|
||||
|
||||
#ifdef ERROR_INSERT
|
||||
NodeBitmask c_error_9000_nodes_mask;
|
||||
#endif
|
||||
|
||||
void Cmvmi::execNDB_TAMPER(Signal* signal)
|
||||
{
|
||||
|
@ -419,21 +422,33 @@ void Cmvmi::execOPEN_COMREQ(Signal* signal)
|
|||
|
||||
const Uint32 len = signal->getLength();
|
||||
if(len == 2){
|
||||
globalTransporterRegistry.do_connect(tStartingNode);
|
||||
globalTransporterRegistry.setIOState(tStartingNode, HaltIO);
|
||||
|
||||
//-----------------------------------------------------
|
||||
// Report that the connection to the node is opened
|
||||
//-----------------------------------------------------
|
||||
signal->theData[0] = NDB_LE_CommunicationOpened;
|
||||
signal->theData[1] = tStartingNode;
|
||||
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
|
||||
//-----------------------------------------------------
|
||||
#ifdef ERROR_INSERT
|
||||
if (! (ERROR_INSERTED(9000) && c_error_9000_nodes_mask.get(tStartingNode)))
|
||||
#endif
|
||||
{
|
||||
globalTransporterRegistry.do_connect(tStartingNode);
|
||||
globalTransporterRegistry.setIOState(tStartingNode, HaltIO);
|
||||
|
||||
//-----------------------------------------------------
|
||||
// Report that the connection to the node is opened
|
||||
//-----------------------------------------------------
|
||||
signal->theData[0] = NDB_LE_CommunicationOpened;
|
||||
signal->theData[1] = tStartingNode;
|
||||
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
|
||||
//-----------------------------------------------------
|
||||
}
|
||||
} else {
|
||||
for(unsigned int i = 1; i < MAX_NODES; i++ ) {
|
||||
jam();
|
||||
if (i != getOwnNodeId() && getNodeInfo(i).m_type == tData2){
|
||||
jam();
|
||||
|
||||
#ifdef ERROR_INSERT
|
||||
if (ERROR_INSERTED(9000) && c_error_9000_nodes_mask.get(i))
|
||||
continue;
|
||||
#endif
|
||||
|
||||
globalTransporterRegistry.do_connect(i);
|
||||
globalTransporterRegistry.setIOState(i, HaltIO);
|
||||
|
||||
|
@ -1039,7 +1054,8 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
|
|||
}
|
||||
|
||||
DumpStateOrd * const & dumpState = (DumpStateOrd *)&signal->theData[0];
|
||||
if (dumpState->args[0] == DumpStateOrd::CmvmiDumpConnections){
|
||||
Uint32 arg = dumpState->args[0];
|
||||
if (arg == DumpStateOrd::CmvmiDumpConnections){
|
||||
for(unsigned int i = 1; i < MAX_NODES; i++ ){
|
||||
const char* nodeTypeStr = "";
|
||||
switch(getNodeInfo(i).m_type){
|
||||
|
@ -1072,13 +1088,13 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
|
|||
}
|
||||
}
|
||||
|
||||
if (dumpState->args[0] == DumpStateOrd::CmvmiDumpLongSignalMemory){
|
||||
if (arg == DumpStateOrd::CmvmiDumpLongSignalMemory){
|
||||
infoEvent("Cmvmi: g_sectionSegmentPool size: %d free: %d",
|
||||
g_sectionSegmentPool.getSize(),
|
||||
g_sectionSegmentPool.getNoOfFree());
|
||||
}
|
||||
|
||||
if (dumpState->args[0] == DumpStateOrd::CmvmiSetRestartOnErrorInsert)
|
||||
if (arg == DumpStateOrd::CmvmiSetRestartOnErrorInsert)
|
||||
{
|
||||
if(signal->getLength() == 1)
|
||||
{
|
||||
|
@ -1098,7 +1114,7 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
|
|||
}
|
||||
}
|
||||
|
||||
if (dumpState->args[0] == DumpStateOrd::CmvmiTestLongSigWithDelay) {
|
||||
if (arg == DumpStateOrd::CmvmiTestLongSigWithDelay) {
|
||||
unsigned i;
|
||||
Uint32 loopCount = dumpState->args[1];
|
||||
const unsigned len0 = 11;
|
||||
|
@ -1126,6 +1142,30 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
|
|||
sendSignal(reference(), GSN_TESTSIG, signal, 8, JBB, ptr, 2);
|
||||
}
|
||||
|
||||
#ifdef ERROR_INSERT
|
||||
if (arg == 9000)
|
||||
{
|
||||
SET_ERROR_INSERT_VALUE(9000);
|
||||
for (Uint32 i = 1; i<signal->getLength(); i++)
|
||||
c_error_9000_nodes_mask.set(signal->theData[i]);
|
||||
}
|
||||
|
||||
if (arg == 9001)
|
||||
{
|
||||
CLEAR_ERROR_INSERT_VALUE;
|
||||
for (Uint32 i = 0; i<MAX_NODES; i++)
|
||||
{
|
||||
if (c_error_9000_nodes_mask.get(i))
|
||||
{
|
||||
signal->theData[0] = 0;
|
||||
signal->theData[1] = i;
|
||||
EXECUTE_DIRECT(CMVMI, GSN_OPEN_COMREQ, signal, 2);
|
||||
}
|
||||
}
|
||||
c_error_9000_nodes_mask.clear();
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef VM_TRACE
|
||||
#if 0
|
||||
{
|
||||
|
|
|
@ -14273,11 +14273,36 @@ void Dbdih::execWAIT_GCP_REQ(Signal* signal)
|
|||
jam();
|
||||
conf->senderData = senderData;
|
||||
conf->gcp = cnewgcp;
|
||||
conf->blockStatus = cgcpOrderBlocked;
|
||||
sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal,
|
||||
WaitGCPConf::SignalLength, JBB);
|
||||
return;
|
||||
}//if
|
||||
|
||||
if (requestType == WaitGCPReq::BlockStartGcp)
|
||||
{
|
||||
jam();
|
||||
conf->senderData = senderData;
|
||||
conf->gcp = cnewgcp;
|
||||
conf->blockStatus = cgcpOrderBlocked;
|
||||
sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal,
|
||||
WaitGCPConf::SignalLength, JBB);
|
||||
cgcpOrderBlocked = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
if (requestType == WaitGCPReq::UnblockStartGcp)
|
||||
{
|
||||
jam();
|
||||
conf->senderData = senderData;
|
||||
conf->gcp = cnewgcp;
|
||||
conf->blockStatus = cgcpOrderBlocked;
|
||||
sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal,
|
||||
WaitGCPConf::SignalLength, JBB);
|
||||
cgcpOrderBlocked = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if(isMaster()) {
|
||||
/**
|
||||
* Master
|
||||
|
@ -14289,6 +14314,7 @@ void Dbdih::execWAIT_GCP_REQ(Signal* signal)
|
|||
jam();
|
||||
conf->senderData = senderData;
|
||||
conf->gcp = coldgcp;
|
||||
conf->blockStatus = cgcpOrderBlocked;
|
||||
sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal,
|
||||
WaitGCPConf::SignalLength, JBB);
|
||||
return;
|
||||
|
@ -14375,6 +14401,7 @@ void Dbdih::execWAIT_GCP_CONF(Signal* signal)
|
|||
|
||||
conf->senderData = ptr.p->clientData;
|
||||
conf->gcp = gcp;
|
||||
conf->blockStatus = cgcpOrderBlocked;
|
||||
sendSignal(ptr.p->clientRef, GSN_WAIT_GCP_CONF, signal,
|
||||
WaitGCPConf::SignalLength, JBB);
|
||||
|
||||
|
@ -14442,6 +14469,7 @@ void Dbdih::emptyWaitGCPMasterQueue(Signal* signal)
|
|||
|
||||
c_waitGCPMasterList.next(ptr);
|
||||
conf->senderData = clientData;
|
||||
conf->blockStatus = cgcpOrderBlocked;
|
||||
sendSignal(clientRef, GSN_WAIT_GCP_CONF, signal,
|
||||
WaitGCPConf::SignalLength, JBB);
|
||||
|
||||
|
|
|
@ -203,6 +203,7 @@ private:
|
|||
void execWAIT_GCP_CONF(Signal* signal);
|
||||
|
||||
void execSTOP_REQ(Signal* signal);
|
||||
void execSTOP_CONF(Signal* signal);
|
||||
void execRESUME_REQ(Signal* signal);
|
||||
|
||||
void execCHANGE_NODE_STATE_CONF(Signal* signal);
|
||||
|
@ -338,6 +339,16 @@ public:
|
|||
void progError(int line, int cause, const char * extra) {
|
||||
cntr.progError(line, cause, extra);
|
||||
}
|
||||
|
||||
enum StopNodesStep {
|
||||
SR_BLOCK_GCP_START_GCP = 0,
|
||||
SR_WAIT_COMPLETE_GCP = 1,
|
||||
SR_UNBLOCK_GCP_START_GCP = 2,
|
||||
SR_QMGR_STOP_REQ = 3,
|
||||
SR_WAIT_NODE_FAILURES = 4,
|
||||
SR_CLUSTER_SHUTDOWN = 12
|
||||
} m_state;
|
||||
SignalCounter m_stop_req_counter;
|
||||
};
|
||||
private:
|
||||
StopRecord c_stopRec;
|
||||
|
|
|
@ -87,6 +87,7 @@ Ndbcntr::Ndbcntr(const class Configuration & conf):
|
|||
addRecSignal(GSN_STOP_ME_CONF, &Ndbcntr::execSTOP_ME_CONF);
|
||||
|
||||
addRecSignal(GSN_STOP_REQ, &Ndbcntr::execSTOP_REQ);
|
||||
addRecSignal(GSN_STOP_CONF, &Ndbcntr::execSTOP_CONF);
|
||||
addRecSignal(GSN_RESUME_REQ, &Ndbcntr::execRESUME_REQ);
|
||||
|
||||
addRecSignal(GSN_WAIT_GCP_REF, &Ndbcntr::execWAIT_GCP_REF);
|
||||
|
|
|
@ -42,6 +42,8 @@
|
|||
#include <signaldata/FsRemoveReq.hpp>
|
||||
#include <signaldata/ReadConfig.hpp>
|
||||
|
||||
#include <signaldata/FailRep.hpp>
|
||||
|
||||
#include <AttributeHeader.hpp>
|
||||
#include <Configuration.hpp>
|
||||
#include <DebuggerNames.hpp>
|
||||
|
@ -1474,13 +1476,74 @@ void Ndbcntr::execNODE_FAILREP(Signal* signal)
|
|||
sendSignal(SUMA_REF, GSN_NODE_FAILREP, signal,
|
||||
NodeFailRep::SignalLength, JBB);
|
||||
|
||||
if (c_stopRec.stopReq.senderRef)
|
||||
{
|
||||
jam();
|
||||
switch(c_stopRec.m_state){
|
||||
case StopRecord::SR_WAIT_NODE_FAILURES:
|
||||
{
|
||||
jam();
|
||||
NdbNodeBitmask tmp;
|
||||
tmp.assign(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes);
|
||||
tmp.bitANDC(allFailed);
|
||||
tmp.copyto(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes);
|
||||
|
||||
if (tmp.isclear())
|
||||
{
|
||||
jam();
|
||||
if (c_stopRec.stopReq.senderRef != RNIL)
|
||||
{
|
||||
jam();
|
||||
StopConf * const stopConf = (StopConf *)&signal->theData[0];
|
||||
stopConf->senderData = c_stopRec.stopReq.senderData;
|
||||
stopConf->nodeState = (Uint32) NodeState::SL_SINGLEUSER;
|
||||
sendSignal(c_stopRec.stopReq.senderRef, GSN_STOP_CONF, signal,
|
||||
StopConf::SignalLength, JBB);
|
||||
}
|
||||
|
||||
c_stopRec.stopReq.senderRef = 0;
|
||||
WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0];
|
||||
req->senderRef = reference();
|
||||
req->senderData = StopRecord::SR_UNBLOCK_GCP_START_GCP;
|
||||
req->requestType = WaitGCPReq::UnblockStartGcp;
|
||||
sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal,
|
||||
WaitGCPReq::SignalLength, JBA);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case StopRecord::SR_QMGR_STOP_REQ:
|
||||
{
|
||||
NdbNodeBitmask tmp;
|
||||
tmp.assign(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes);
|
||||
tmp.bitANDC(allFailed);
|
||||
|
||||
if (tmp.isclear())
|
||||
{
|
||||
Uint32 nodeId = allFailed.find(0);
|
||||
tmp.set(nodeId);
|
||||
|
||||
StopConf* conf = (StopConf*)signal->getDataPtrSend();
|
||||
conf->senderData = c_stopRec.stopReq.senderData;
|
||||
conf->nodeId = nodeId;
|
||||
sendSignal(reference(),
|
||||
GSN_STOP_CONF, signal, StopConf::SignalLength, JBB);
|
||||
}
|
||||
|
||||
tmp.copyto(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
signal->theData[0] = NDB_LE_NODE_FAILREP;
|
||||
signal->theData[2] = 0;
|
||||
|
||||
Uint32 nodeId = 0;
|
||||
while(!allFailed.isclear()){
|
||||
nodeId = allFailed.find(nodeId + 1);
|
||||
allFailed.clear(nodeId);
|
||||
signal->theData[0] = NDB_LE_NODE_FAILREP;
|
||||
signal->theData[1] = nodeId;
|
||||
signal->theData[2] = 0;
|
||||
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
|
||||
}//for
|
||||
|
||||
|
@ -1924,13 +1987,15 @@ void
|
|||
Ndbcntr::execDUMP_STATE_ORD(Signal* signal)
|
||||
{
|
||||
DumpStateOrd * const & dumpState = (DumpStateOrd *)&signal->theData[0];
|
||||
if(signal->theData[0] == 13){
|
||||
Uint32 arg = dumpState->args[0];
|
||||
|
||||
if(arg == 13){
|
||||
infoEvent("Cntr: cstartPhase = %d, cinternalStartphase = %d, block = %d",
|
||||
cstartPhase, cinternalStartphase, cndbBlocksCount);
|
||||
infoEvent("Cntr: cmasterNodeId = %d", cmasterNodeId);
|
||||
}
|
||||
|
||||
if (dumpState->args[0] == DumpStateOrd::NdbcntrTestStopOnError){
|
||||
if (arg == DumpStateOrd::NdbcntrTestStopOnError){
|
||||
if (theConfiguration.stopOnError() == true)
|
||||
((Configuration&)theConfiguration).stopOnError(false);
|
||||
|
||||
|
@ -1943,6 +2008,28 @@ Ndbcntr::execDUMP_STATE_ORD(Signal* signal)
|
|||
SystemError::SignalLength, JBA);
|
||||
}
|
||||
|
||||
if (arg == DumpStateOrd::NdbcntrStopNodes)
|
||||
{
|
||||
NdbNodeBitmask mask;
|
||||
for(Uint32 i = 1; i<signal->getLength(); i++)
|
||||
mask.set(signal->theData[i]);
|
||||
|
||||
StopReq* req = (StopReq*)signal->getDataPtrSend();
|
||||
req->senderRef = RNIL;
|
||||
req->senderData = 123;
|
||||
req->requestInfo = 0;
|
||||
req->singleuser = 0;
|
||||
req->singleUserApi = 0;
|
||||
mask.copyto(NdbNodeBitmask::Size, req->nodes);
|
||||
StopReq::setPerformRestart(req->requestInfo, 1);
|
||||
StopReq::setNoStart(req->requestInfo, 1);
|
||||
StopReq::setStopNodes(req->requestInfo, 1);
|
||||
StopReq::setStopAbort(req->requestInfo, 1);
|
||||
|
||||
sendSignal(reference(), GSN_STOP_REQ, signal,
|
||||
StopReq::SignalLength, JBB);
|
||||
return;
|
||||
}
|
||||
|
||||
}//Ndbcntr::execDUMP_STATE_ORD()
|
||||
|
||||
|
@ -2003,9 +2090,12 @@ Ndbcntr::execSTOP_REQ(Signal* signal){
|
|||
Uint32 senderData = req->senderData;
|
||||
BlockReference senderRef = req->senderRef;
|
||||
bool abort = StopReq::getStopAbort(req->requestInfo);
|
||||
bool stopnodes = StopReq::getStopNodes(req->requestInfo);
|
||||
|
||||
if(getNodeState().startLevel < NodeState::SL_STARTED ||
|
||||
abort && !singleuser){
|
||||
if(!singleuser &&
|
||||
(getNodeState().startLevel < NodeState::SL_STARTED ||
|
||||
(abort && !stopnodes)))
|
||||
{
|
||||
/**
|
||||
* Node is not started yet
|
||||
*
|
||||
|
@ -2047,21 +2137,71 @@ Ndbcntr::execSTOP_REQ(Signal* signal){
|
|||
else
|
||||
ref->errorCode = StopRef::NodeShutdownInProgress;
|
||||
ref->senderData = senderData;
|
||||
sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB);
|
||||
|
||||
if (senderRef != RNIL)
|
||||
sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB);
|
||||
return;
|
||||
}
|
||||
|
||||
if (stopnodes && !abort)
|
||||
{
|
||||
jam();
|
||||
ref->errorCode = StopRef::UnsupportedNodeShutdown;
|
||||
ref->senderData = senderData;
|
||||
if (senderRef != RNIL)
|
||||
sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB);
|
||||
return;
|
||||
}
|
||||
|
||||
if (stopnodes && cmasterNodeId != getOwnNodeId())
|
||||
{
|
||||
jam();
|
||||
ref->errorCode = StopRef::MultiNodeShutdownNotMaster;
|
||||
ref->senderData = senderData;
|
||||
if (senderRef != RNIL)
|
||||
sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB);
|
||||
return;
|
||||
}
|
||||
|
||||
c_stopRec.stopReq = * req;
|
||||
c_stopRec.stopInitiatedTime = NdbTick_CurrentMillisecond();
|
||||
|
||||
if(!singleuser) {
|
||||
if(StopReq::getSystemStop(c_stopRec.stopReq.requestInfo)) {
|
||||
if (stopnodes)
|
||||
{
|
||||
jam();
|
||||
|
||||
if(!c_stopRec.checkNodeFail(signal))
|
||||
{
|
||||
jam();
|
||||
if(StopReq::getPerformRestart(c_stopRec.stopReq.requestInfo)){
|
||||
return;
|
||||
}
|
||||
|
||||
char buf[100];
|
||||
NdbNodeBitmask mask;
|
||||
mask.assign(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes);
|
||||
infoEvent("Initiating shutdown abort of %s", mask.getText(buf));
|
||||
ndbout_c("Initiating shutdown abort of %s", mask.getText(buf));
|
||||
|
||||
WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0];
|
||||
req->senderRef = reference();
|
||||
req->senderData = StopRecord::SR_BLOCK_GCP_START_GCP;
|
||||
req->requestType = WaitGCPReq::BlockStartGcp;
|
||||
sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal,
|
||||
WaitGCPReq::SignalLength, JBB);
|
||||
return;
|
||||
}
|
||||
else if(!singleuser)
|
||||
{
|
||||
if(StopReq::getSystemStop(c_stopRec.stopReq.requestInfo))
|
||||
{
|
||||
jam();
|
||||
if(StopReq::getPerformRestart(c_stopRec.stopReq.requestInfo))
|
||||
{
|
||||
((Configuration&)theConfiguration).stopOnError(false);
|
||||
}
|
||||
}
|
||||
if(!c_stopRec.checkNodeFail(signal)){
|
||||
if(!c_stopRec.checkNodeFail(signal))
|
||||
{
|
||||
jam();
|
||||
return;
|
||||
}
|
||||
|
@ -2131,7 +2271,17 @@ Ndbcntr::StopRecord::checkNodeFail(Signal* signal){
|
|||
*/
|
||||
NodeBitmask ndbMask;
|
||||
ndbMask.assign(cntr.c_startedNodes);
|
||||
ndbMask.clear(cntr.getOwnNodeId());
|
||||
|
||||
if (StopReq::getStopNodes(stopReq.requestInfo))
|
||||
{
|
||||
NdbNodeBitmask tmp;
|
||||
tmp.assign(NdbNodeBitmask::Size, stopReq.nodes);
|
||||
ndbMask.bitANDC(tmp);
|
||||
}
|
||||
else
|
||||
{
|
||||
ndbMask.clear(cntr.getOwnNodeId());
|
||||
}
|
||||
|
||||
CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0];
|
||||
sd->blockRef = cntr.reference();
|
||||
|
@ -2153,7 +2303,8 @@ Ndbcntr::StopRecord::checkNodeFail(Signal* signal){
|
|||
ref->errorCode = StopRef::NodeShutdownWouldCauseSystemCrash;
|
||||
|
||||
const BlockReference bref = stopReq.senderRef;
|
||||
cntr.sendSignal(bref, GSN_STOP_REF, signal, StopRef::SignalLength, JBB);
|
||||
if (bref != RNIL)
|
||||
cntr.sendSignal(bref, GSN_STOP_REF, signal, StopRef::SignalLength, JBB);
|
||||
|
||||
stopReq.senderRef = 0;
|
||||
|
||||
|
@ -2203,23 +2354,23 @@ Ndbcntr::StopRecord::checkTcTimeout(Signal* signal){
|
|||
if(stopReq.getSystemStop(stopReq.requestInfo) || stopReq.singleuser){
|
||||
jam();
|
||||
if(stopReq.singleuser)
|
||||
{
|
||||
jam();
|
||||
AbortAllReq * req = (AbortAllReq*)&signal->theData[0];
|
||||
req->senderRef = cntr.reference();
|
||||
req->senderData = 12;
|
||||
cntr.sendSignal(DBTC_REF, GSN_ABORT_ALL_REQ, signal,
|
||||
AbortAllReq::SignalLength, JBB);
|
||||
}
|
||||
{
|
||||
jam();
|
||||
AbortAllReq * req = (AbortAllReq*)&signal->theData[0];
|
||||
req->senderRef = cntr.reference();
|
||||
req->senderData = 12;
|
||||
cntr.sendSignal(DBTC_REF, GSN_ABORT_ALL_REQ, signal,
|
||||
AbortAllReq::SignalLength, JBB);
|
||||
}
|
||||
else
|
||||
{
|
||||
WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0];
|
||||
req->senderRef = cntr.reference();
|
||||
req->senderData = 12;
|
||||
req->requestType = WaitGCPReq::CompleteForceStart;
|
||||
cntr.sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal,
|
||||
WaitGCPReq::SignalLength, JBB);
|
||||
}
|
||||
{
|
||||
WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0];
|
||||
req->senderRef = cntr.reference();
|
||||
req->senderData = StopRecord::SR_CLUSTER_SHUTDOWN;
|
||||
req->requestType = WaitGCPReq::CompleteForceStart;
|
||||
cntr.sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal,
|
||||
WaitGCPReq::SignalLength, JBB);
|
||||
}
|
||||
} else {
|
||||
jam();
|
||||
StopPermReq * req = (StopPermReq*)&signal->theData[0];
|
||||
|
@ -2381,7 +2532,7 @@ void Ndbcntr::execWAIT_GCP_REF(Signal* signal){
|
|||
|
||||
WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0];
|
||||
req->senderRef = reference();
|
||||
req->senderData = 12;
|
||||
req->senderData = StopRecord::SR_CLUSTER_SHUTDOWN;
|
||||
req->requestType = WaitGCPReq::CompleteForceStart;
|
||||
sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal,
|
||||
WaitGCPReq::SignalLength, JBB);
|
||||
|
@ -2390,29 +2541,129 @@ void Ndbcntr::execWAIT_GCP_REF(Signal* signal){
|
|||
void Ndbcntr::execWAIT_GCP_CONF(Signal* signal){
|
||||
jamEntry();
|
||||
|
||||
ndbrequire(StopReq::getSystemStop(c_stopRec.stopReq.requestInfo));
|
||||
NodeState newState(NodeState::SL_STOPPING_3, true);
|
||||
WaitGCPConf* conf = (WaitGCPConf*)signal->getDataPtr();
|
||||
|
||||
/**
|
||||
* Inform QMGR so that arbitrator won't kill us
|
||||
*/
|
||||
NodeStateRep * rep = (NodeStateRep *)&signal->theData[0];
|
||||
rep->nodeState = newState;
|
||||
rep->nodeState.masterNodeId = cmasterNodeId;
|
||||
rep->nodeState.setNodeGroup(c_nodeGroup);
|
||||
EXECUTE_DIRECT(QMGR, GSN_NODE_STATE_REP, signal, NodeStateRep::SignalLength);
|
||||
switch(conf->senderData){
|
||||
case StopRecord::SR_BLOCK_GCP_START_GCP:
|
||||
{
|
||||
jam();
|
||||
/**
|
||||
*
|
||||
*/
|
||||
if(!c_stopRec.checkNodeFail(signal))
|
||||
{
|
||||
jam();
|
||||
goto unblock;
|
||||
}
|
||||
|
||||
WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0];
|
||||
req->senderRef = reference();
|
||||
req->senderData = StopRecord::SR_WAIT_COMPLETE_GCP;
|
||||
req->requestType = WaitGCPReq::CompleteIfRunning;
|
||||
|
||||
if(StopReq::getPerformRestart(c_stopRec.stopReq.requestInfo)){
|
||||
jam();
|
||||
StartOrd * startOrd = (StartOrd *)&signal->theData[0];
|
||||
startOrd->restartInfo = c_stopRec.stopReq.requestInfo;
|
||||
sendSignalWithDelay(CMVMI_REF, GSN_START_ORD, signal, 500,
|
||||
StartOrd::SignalLength);
|
||||
} else {
|
||||
jam();
|
||||
sendSignalWithDelay(CMVMI_REF, GSN_STOP_ORD, signal, 500, 1);
|
||||
sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal,
|
||||
WaitGCPReq::SignalLength, JBB);
|
||||
return;
|
||||
}
|
||||
case StopRecord::SR_UNBLOCK_GCP_START_GCP:
|
||||
{
|
||||
jam();
|
||||
return;
|
||||
}
|
||||
case StopRecord::SR_WAIT_COMPLETE_GCP:
|
||||
{
|
||||
jam();
|
||||
if(!c_stopRec.checkNodeFail(signal))
|
||||
{
|
||||
jam();
|
||||
goto unblock;
|
||||
}
|
||||
|
||||
NdbNodeBitmask tmp;
|
||||
tmp.assign(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes);
|
||||
c_stopRec.m_stop_req_counter = tmp;
|
||||
NodeReceiverGroup rg(QMGR, tmp);
|
||||
StopReq * stopReq = (StopReq *)&signal->theData[0];
|
||||
* stopReq = c_stopRec.stopReq;
|
||||
stopReq->senderRef = reference();
|
||||
sendSignal(rg, GSN_STOP_REQ, signal, StopReq::SignalLength, JBA);
|
||||
c_stopRec.m_state = StopRecord::SR_QMGR_STOP_REQ;
|
||||
return;
|
||||
}
|
||||
case StopRecord::SR_CLUSTER_SHUTDOWN:
|
||||
{
|
||||
jam();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
ndbrequire(StopReq::getSystemStop(c_stopRec.stopReq.requestInfo));
|
||||
NodeState newState(NodeState::SL_STOPPING_3, true);
|
||||
|
||||
/**
|
||||
* Inform QMGR so that arbitrator won't kill us
|
||||
*/
|
||||
NodeStateRep * rep = (NodeStateRep *)&signal->theData[0];
|
||||
rep->nodeState = newState;
|
||||
rep->nodeState.masterNodeId = cmasterNodeId;
|
||||
rep->nodeState.setNodeGroup(c_nodeGroup);
|
||||
EXECUTE_DIRECT(QMGR, GSN_NODE_STATE_REP, signal,
|
||||
NodeStateRep::SignalLength);
|
||||
|
||||
if(StopReq::getPerformRestart(c_stopRec.stopReq.requestInfo)){
|
||||
jam();
|
||||
StartOrd * startOrd = (StartOrd *)&signal->theData[0];
|
||||
startOrd->restartInfo = c_stopRec.stopReq.requestInfo;
|
||||
sendSignalWithDelay(CMVMI_REF, GSN_START_ORD, signal, 500,
|
||||
StartOrd::SignalLength);
|
||||
} else {
|
||||
jam();
|
||||
sendSignalWithDelay(CMVMI_REF, GSN_STOP_ORD, signal, 500, 1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
unblock:
|
||||
WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0];
|
||||
req->senderRef = reference();
|
||||
req->senderData = StopRecord::SR_UNBLOCK_GCP_START_GCP;
|
||||
req->requestType = WaitGCPReq::UnblockStartGcp;
|
||||
sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal,
|
||||
WaitGCPReq::SignalLength, JBB);
|
||||
}
|
||||
|
||||
void
|
||||
Ndbcntr::execSTOP_CONF(Signal* signal)
|
||||
{
|
||||
jamEntry();
|
||||
StopConf *conf = (StopConf*)signal->getDataPtr();
|
||||
ndbrequire(c_stopRec.m_state == StopRecord::SR_QMGR_STOP_REQ);
|
||||
c_stopRec.m_stop_req_counter.clearWaitingFor(conf->nodeId);
|
||||
if (c_stopRec.m_stop_req_counter.done())
|
||||
{
|
||||
char buf[100];
|
||||
NdbNodeBitmask mask;
|
||||
mask.assign(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes);
|
||||
infoEvent("Stopping of %s", mask.getText(buf));
|
||||
ndbout_c("Stopping of %s", mask.getText(buf));
|
||||
|
||||
/**
|
||||
* Kill any node...
|
||||
*/
|
||||
FailRep * const failRep = (FailRep *)&signal->theData[0];
|
||||
failRep->failCause = FailRep::ZMULTI_NODE_SHUTDOWN;
|
||||
NodeReceiverGroup rg(QMGR, c_clusterNodes);
|
||||
Uint32 nodeId = 0;
|
||||
while ((nodeId = NdbNodeBitmask::find(c_stopRec.stopReq.nodes, nodeId+1))
|
||||
!= NdbNodeBitmask::NotFound)
|
||||
{
|
||||
failRep->failNodeId = nodeId;
|
||||
sendSignal(rg, GSN_FAIL_REP, signal, FailRep::SignalLength, JBA);
|
||||
}
|
||||
c_stopRec.m_state = StopRecord::SR_WAIT_NODE_FAILURES;
|
||||
return;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void Ndbcntr::execSTTORRY(Signal* signal){
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include <signaldata/CmRegSignalData.hpp>
|
||||
#include <signaldata/ApiRegSignalData.hpp>
|
||||
#include <signaldata/FailRep.hpp>
|
||||
#include <signaldata/StopReq.hpp>
|
||||
|
||||
#include "timer.hpp"
|
||||
|
||||
|
@ -100,7 +101,12 @@ public:
|
|||
};
|
||||
|
||||
struct StartRecord {
|
||||
void reset(){ m_startKey++; m_startNode = 0;}
|
||||
void reset(){
|
||||
m_startKey++;
|
||||
m_startNode = 0;
|
||||
m_gsn = RNIL;
|
||||
m_nodes.clearWaitingFor();
|
||||
}
|
||||
Uint32 m_startKey;
|
||||
Uint32 m_startNode;
|
||||
Uint64 m_startTimeout;
|
||||
|
@ -112,6 +118,14 @@ public:
|
|||
NdbNodeBitmask c_definedNodes; // DB nodes in config
|
||||
NdbNodeBitmask c_clusterNodes; // DB nodes in cluster
|
||||
NodeBitmask c_connectedNodes; // All kinds of connected nodes
|
||||
|
||||
/**
|
||||
* Nodes which we're checking for partitioned cluster
|
||||
*
|
||||
* i.e. nodes that connect to use, when we already have elected president
|
||||
*/
|
||||
NdbNodeBitmask c_readnodes_nodes;
|
||||
|
||||
Uint32 c_maxDynamicId;
|
||||
|
||||
// Records
|
||||
|
@ -204,6 +218,7 @@ private:
|
|||
void execPRES_TOCONF(Signal* signal);
|
||||
void execDISCONNECT_REP(Signal* signal);
|
||||
void execSYSTEM_ERROR(Signal* signal);
|
||||
void execSTOP_REQ(Signal* signal);
|
||||
|
||||
// Received signals
|
||||
void execDUMP_STATE_ORD(Signal* signal);
|
||||
|
@ -218,6 +233,8 @@ private:
|
|||
void execREAD_NODESREQ(Signal* signal);
|
||||
void execSET_VAR_REQ(Signal* signal);
|
||||
|
||||
void execREAD_NODESREF(Signal* signal);
|
||||
void execREAD_NODESCONF(Signal* signal);
|
||||
|
||||
void execAPI_VERSION_REQ(Signal* signal);
|
||||
void execAPI_BROADCAST_REP(Signal* signal);
|
||||
|
@ -234,6 +251,8 @@ private:
|
|||
void execARBIT_STOPREP(Signal* signal);
|
||||
|
||||
// Statement blocks
|
||||
void check_readnodes_reply(Signal* signal, Uint32 nodeId, Uint32 gsn);
|
||||
|
||||
void node_failed(Signal* signal, Uint16 aFailedNode);
|
||||
void checkStartInterface(Signal* signal);
|
||||
void failReport(Signal* signal,
|
||||
|
@ -251,8 +270,9 @@ private:
|
|||
|
||||
// Generated statement blocks
|
||||
void startphase1(Signal* signal);
|
||||
void electionWon();
|
||||
void electionWon(Signal* signal);
|
||||
void cmInfoconf010Lab(Signal* signal);
|
||||
|
||||
void apiHbHandlingLab(Signal* signal);
|
||||
void timerHandlingLab(Signal* signal);
|
||||
void hbReceivedLab(Signal* signal);
|
||||
|
@ -387,7 +407,9 @@ private:
|
|||
Uint16 cfailedNodes[MAX_NDB_NODES];
|
||||
Uint16 cprepFailedNodes[MAX_NDB_NODES];
|
||||
Uint16 ccommitFailedNodes[MAX_NDB_NODES];
|
||||
|
||||
|
||||
StopReq c_stopReq;
|
||||
void check_multi_node_shutdown(Signal* signal);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -35,9 +35,8 @@ void Qmgr::initData()
|
|||
|
||||
Uint32 hbDBAPI = 500;
|
||||
setHbApiDelay(hbDBAPI);
|
||||
|
||||
c_connectedNodes.clear();
|
||||
c_connectedNodes.set(getOwnNodeId());
|
||||
c_stopReq.senderRef = 0;
|
||||
}//Qmgr::initData()
|
||||
|
||||
void Qmgr::initRecords()
|
||||
|
@ -52,6 +51,7 @@ Qmgr::Qmgr(const class Configuration & conf)
|
|||
|
||||
// Transit signals
|
||||
addRecSignal(GSN_DUMP_STATE_ORD, &Qmgr::execDUMP_STATE_ORD);
|
||||
addRecSignal(GSN_STOP_REQ, &Qmgr::execSTOP_REQ);
|
||||
addRecSignal(GSN_DEBUG_SIG, &Qmgr::execDEBUG_SIG);
|
||||
addRecSignal(GSN_CONTINUEB, &Qmgr::execCONTINUEB);
|
||||
addRecSignal(GSN_CM_HEARTBEAT, &Qmgr::execCM_HEARTBEAT);
|
||||
|
@ -96,6 +96,9 @@ Qmgr::Qmgr(const class Configuration & conf)
|
|||
addRecSignal(GSN_ARBIT_CHOOSEREF, &Qmgr::execARBIT_CHOOSEREF);
|
||||
addRecSignal(GSN_ARBIT_STOPREP, &Qmgr::execARBIT_STOPREP);
|
||||
|
||||
addRecSignal(GSN_READ_NODESREF, &Qmgr::execREAD_NODESREF);
|
||||
addRecSignal(GSN_READ_NODESCONF, &Qmgr::execREAD_NODESCONF);
|
||||
|
||||
initData();
|
||||
}//Qmgr::Qmgr()
|
||||
|
||||
|
|
|
@ -56,6 +56,33 @@
|
|||
#define DEBUG_START3(signal, msg)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* c_start.m_gsn = GSN_CM_REGREQ
|
||||
* Possible for all nodes
|
||||
* c_start.m_nodes contains all nodes in config
|
||||
*
|
||||
* c_start.m_gsn = GSN_CM_NODEINFOREQ;
|
||||
* Set when receiving CM_REGCONF
|
||||
* State possible for starting node only (not in cluster)
|
||||
*
|
||||
* c_start.m_nodes contains all node in alive cluster that
|
||||
* that has not replied to GSN_CM_NODEINFOREQ
|
||||
* passed by president in GSN_CM_REGCONF
|
||||
*
|
||||
* c_start.m_gsn = GSN_CM_ADD
|
||||
* Possible for president only
|
||||
* Set when receiving and accepting CM_REGREQ (to include node)
|
||||
*
|
||||
* c_start.m_nodes contains all nodes in alive cluster + starting node
|
||||
* that has not replied to GSN_CM_ADD
|
||||
* by sending GSN_CM_ACKADD
|
||||
*
|
||||
* c_start.m_gsn = GSN_CM_NODEINFOCONF
|
||||
* Possible for non presidents only
|
||||
* c_start.m_nodes contains a node that has been accepted by president
|
||||
* but has not connected to us yet
|
||||
*/
|
||||
|
||||
// Signal entries and statement blocks
|
||||
/* 4 P R O G R A M */
|
||||
/*******************************/
|
||||
|
@ -280,18 +307,24 @@ void Qmgr::execCONNECT_REP(Signal* signal)
|
|||
{
|
||||
jamEntry();
|
||||
const Uint32 nodeId = signal->theData[0];
|
||||
|
||||
if (ERROR_INSERTED(931))
|
||||
{
|
||||
jam();
|
||||
ndbout_c("Discarding CONNECT_REP(%d)", nodeId);
|
||||
infoEvent("Discarding CONNECT_REP(%d)", nodeId);
|
||||
return;
|
||||
}
|
||||
|
||||
c_connectedNodes.set(nodeId);
|
||||
NodeRecPtr nodePtr;
|
||||
nodePtr.i = getOwnNodeId();
|
||||
ptrCheckGuard(nodePtr, MAX_NODES, nodeRec);
|
||||
switch(nodePtr.p->phase){
|
||||
case ZSTARTING:
|
||||
case ZRUNNING:
|
||||
ndbrequire(!c_clusterNodes.get(nodeId));
|
||||
case ZSTARTING:
|
||||
jam();
|
||||
if(!c_start.m_nodes.isWaitingFor(nodeId)){
|
||||
jam();
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case ZPREPARE_FAIL:
|
||||
case ZFAIL_CLOSING:
|
||||
|
@ -303,35 +336,83 @@ void Qmgr::execCONNECT_REP(Signal* signal)
|
|||
case ZAPI_INACTIVE:
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
if (getNodeInfo(nodeId).getType() != NodeInfo::DB)
|
||||
{
|
||||
jam();
|
||||
return;
|
||||
}
|
||||
|
||||
switch(c_start.m_gsn){
|
||||
case GSN_CM_REGREQ:
|
||||
jam();
|
||||
sendCmRegReq(signal, nodeId);
|
||||
|
||||
/**
|
||||
* We're waiting for CM_REGCONF c_start.m_nodes contains all configured
|
||||
* nodes
|
||||
*/
|
||||
ndbrequire(nodePtr.p->phase == ZSTARTING);
|
||||
ndbrequire(c_start.m_nodes.isWaitingFor(nodeId));
|
||||
return;
|
||||
case GSN_CM_NODEINFOREQ:
|
||||
jam();
|
||||
sendCmNodeInfoReq(signal, nodeId, nodePtr.p);
|
||||
return;
|
||||
case GSN_CM_ADD:{
|
||||
jam();
|
||||
|
||||
ndbrequire(getOwnNodeId() != cpresident);
|
||||
c_start.m_nodes.clearWaitingFor(nodeId);
|
||||
c_start.m_gsn = RNIL;
|
||||
|
||||
NodeRecPtr addNodePtr;
|
||||
addNodePtr.i = nodeId;
|
||||
ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
|
||||
cmAddPrepare(signal, addNodePtr, nodePtr.p);
|
||||
if (c_start.m_nodes.isWaitingFor(nodeId))
|
||||
{
|
||||
jam();
|
||||
ndbrequire(getOwnNodeId() != cpresident);
|
||||
ndbrequire(nodePtr.p->phase == ZSTARTING);
|
||||
sendCmNodeInfoReq(signal, nodeId, nodePtr.p);
|
||||
return;
|
||||
}
|
||||
return;
|
||||
case GSN_CM_NODEINFOCONF:{
|
||||
jam();
|
||||
|
||||
ndbrequire(getOwnNodeId() != cpresident);
|
||||
ndbrequire(nodePtr.p->phase == ZRUNNING);
|
||||
if (c_start.m_nodes.isWaitingFor(nodeId))
|
||||
{
|
||||
jam();
|
||||
c_start.m_nodes.clearWaitingFor(nodeId);
|
||||
c_start.m_gsn = RNIL;
|
||||
|
||||
NodeRecPtr addNodePtr;
|
||||
addNodePtr.i = nodeId;
|
||||
ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
|
||||
cmAddPrepare(signal, addNodePtr, nodePtr.p);
|
||||
return;
|
||||
}
|
||||
}
|
||||
default:
|
||||
return;
|
||||
(void)1;
|
||||
}
|
||||
|
||||
ndbrequire(!c_start.m_nodes.isWaitingFor(nodeId));
|
||||
ndbrequire(!c_readnodes_nodes.get(nodeId));
|
||||
c_readnodes_nodes.set(nodeId);
|
||||
signal->theData[0] = reference();
|
||||
sendSignal(calcQmgrBlockRef(nodeId), GSN_READ_NODESREQ, signal, 1, JBA);
|
||||
return;
|
||||
}//Qmgr::execCONNECT_REP()
|
||||
|
||||
void
|
||||
Qmgr::execREAD_NODESCONF(Signal* signal)
|
||||
{
|
||||
check_readnodes_reply(signal,
|
||||
refToNode(signal->getSendersBlockRef()),
|
||||
GSN_READ_NODESCONF);
|
||||
}
|
||||
|
||||
void
|
||||
Qmgr::execREAD_NODESREF(Signal* signal)
|
||||
{
|
||||
check_readnodes_reply(signal,
|
||||
refToNode(signal->getSendersBlockRef()),
|
||||
GSN_READ_NODESREF);
|
||||
}
|
||||
|
||||
/*******************************/
|
||||
/* CM_INFOCONF */
|
||||
/*******************************/
|
||||
|
@ -622,22 +703,33 @@ void Qmgr::execCM_REGCONF(Signal* signal)
|
|||
jamEntry();
|
||||
|
||||
const CmRegConf * const cmRegConf = (CmRegConf *)&signal->theData[0];
|
||||
Uint32 presidentNodeId = cmRegConf->presidentNodeId;
|
||||
|
||||
if (!ndbCompatible_ndb_ndb(NDB_VERSION, cmRegConf->presidentVersion)) {
|
||||
jam();
|
||||
char buf[128];
|
||||
BaseString::snprintf(buf,sizeof(buf),"incompatible version own=0x%x other=0x%x, shutting down", NDB_VERSION, cmRegConf->presidentVersion);
|
||||
BaseString::snprintf(buf,sizeof(buf),
|
||||
"incompatible version own=0x%x other=0x%x, "
|
||||
" shutting down",
|
||||
NDB_VERSION, cmRegConf->presidentVersion);
|
||||
systemErrorLab(signal, __LINE__, buf);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
myNodePtr.i = getOwnNodeId();
|
||||
ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
|
||||
|
||||
ndbrequire(c_start.m_gsn == GSN_CM_REGREQ);
|
||||
ndbrequire(myNodePtr.p->phase = ZSTARTING);
|
||||
|
||||
cpdistref = cmRegConf->presidentBlockRef;
|
||||
cpresident = cmRegConf->presidentNodeId;
|
||||
UintR TdynamicId = cmRegConf->dynamicId;
|
||||
c_maxDynamicId = TdynamicId;
|
||||
c_clusterNodes.assign(NdbNodeBitmask::Size, cmRegConf->allNdbNodes);
|
||||
|
||||
myNodePtr.p->ndynamicId = TdynamicId;
|
||||
|
||||
/*--------------------------------------------------------------*/
|
||||
// Send this as an EVENT REPORT to inform about hearing about
|
||||
// other NDB node proclaiming to be president.
|
||||
|
@ -648,10 +740,6 @@ void Qmgr::execCM_REGCONF(Signal* signal)
|
|||
signal->theData[3] = TdynamicId;
|
||||
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
|
||||
|
||||
myNodePtr.i = getOwnNodeId();
|
||||
ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
|
||||
myNodePtr.p->ndynamicId = TdynamicId;
|
||||
|
||||
for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
|
||||
jam();
|
||||
if (c_clusterNodes.get(nodePtr.i)){
|
||||
|
@ -674,6 +762,84 @@ void Qmgr::execCM_REGCONF(Signal* signal)
|
|||
return;
|
||||
}//Qmgr::execCM_REGCONF()
|
||||
|
||||
void
|
||||
Qmgr::check_readnodes_reply(Signal* signal, Uint32 nodeId, Uint32 gsn)
|
||||
{
|
||||
NodeRecPtr myNodePtr;
|
||||
myNodePtr.i = getOwnNodeId();
|
||||
ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
|
||||
|
||||
NodeRecPtr nodePtr;
|
||||
nodePtr.i = nodeId;
|
||||
ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
|
||||
|
||||
ndbrequire(c_readnodes_nodes.get(nodeId));
|
||||
ReadNodesConf* conf = (ReadNodesConf*)signal->getDataPtr();
|
||||
if (gsn == GSN_READ_NODESREF)
|
||||
{
|
||||
jam();
|
||||
retry:
|
||||
signal->theData[0] = reference();
|
||||
sendSignal(calcQmgrBlockRef(nodeId), GSN_READ_NODESREQ, signal, 1, JBA);
|
||||
return;
|
||||
}
|
||||
|
||||
if (conf->masterNodeId == ZNIL)
|
||||
{
|
||||
jam();
|
||||
goto retry;
|
||||
}
|
||||
|
||||
Uint32 president = conf->masterNodeId;
|
||||
if (president == cpresident)
|
||||
{
|
||||
jam();
|
||||
c_readnodes_nodes.clear(nodeId);
|
||||
return;
|
||||
}
|
||||
|
||||
char buf[255];
|
||||
BaseString::snprintf(buf, sizeof(buf),
|
||||
"Partitioned cluster! check StartPartialTimeout, "
|
||||
" node %d thinks %d is president, "
|
||||
" I think president is: %d",
|
||||
nodeId, president, cpresident);
|
||||
|
||||
ndbout_c(buf);
|
||||
CRASH_INSERTION(933);
|
||||
|
||||
if (getNodeState().startLevel == NodeState::SL_STARTED)
|
||||
{
|
||||
jam();
|
||||
NdbNodeBitmask part;
|
||||
part.assign(NdbNodeBitmask::Size, conf->clusterNodes);
|
||||
FailRep* rep = (FailRep*)signal->getDataPtrSend();
|
||||
rep->failCause = FailRep::ZPARTITIONED_CLUSTER;
|
||||
rep->president = cpresident;
|
||||
c_clusterNodes.copyto(NdbNodeBitmask::Size, rep->partition);
|
||||
Uint32 ref = calcQmgrBlockRef(nodeId);
|
||||
Uint32 i = 0;
|
||||
while((i = part.find(i + 1)) != NdbNodeBitmask::NotFound)
|
||||
{
|
||||
if (i == nodeId)
|
||||
continue;
|
||||
rep->failNodeId = i;
|
||||
sendSignal(ref, GSN_FAIL_REP, signal, FailRep::SignalLength, JBA);
|
||||
}
|
||||
rep->failNodeId = nodeId;
|
||||
sendSignal(ref, GSN_FAIL_REP, signal, FailRep::SignalLength, JBB);
|
||||
return;
|
||||
}
|
||||
|
||||
CRASH_INSERTION(932);
|
||||
|
||||
progError(__LINE__,
|
||||
ERR_ARBIT_SHUTDOWN,
|
||||
buf);
|
||||
|
||||
ndbrequire(false);
|
||||
}
|
||||
|
||||
void
|
||||
Qmgr::sendCmNodeInfoReq(Signal* signal, Uint32 nodeId, const NodeRec * self){
|
||||
CmNodeInfoReq * const req = (CmNodeInfoReq*)signal->getDataPtrSend();
|
||||
|
@ -706,13 +872,15 @@ Qmgr::sendCmNodeInfoReq(Signal* signal, Uint32 nodeId, const NodeRec * self){
|
|||
void Qmgr::execCM_REGREF(Signal* signal)
|
||||
{
|
||||
jamEntry();
|
||||
c_regReqReqRecv++;
|
||||
|
||||
// Ignore block reference in data[0]
|
||||
UintR TaddNodeno = signal->theData[1];
|
||||
UintR TrefuseReason = signal->theData[2];
|
||||
Uint32 candidate = signal->theData[3];
|
||||
DEBUG_START3(signal, TrefuseReason);
|
||||
|
||||
c_regReqReqRecv++;
|
||||
|
||||
// Ignore block reference in data[0]
|
||||
|
||||
if(candidate != cpresidentCandidate){
|
||||
jam();
|
||||
|
@ -800,7 +968,7 @@ void Qmgr::execCM_REGREF(Signal* signal)
|
|||
Uint64 now = NdbTick_CurrentMillisecond();
|
||||
if((c_regReqReqRecv == cnoOfNodes) || now > c_stopElectionTime){
|
||||
jam();
|
||||
electionWon();
|
||||
electionWon(signal);
|
||||
sendSttorryLab(signal);
|
||||
|
||||
/**
|
||||
|
@ -814,7 +982,7 @@ void Qmgr::execCM_REGREF(Signal* signal)
|
|||
}//Qmgr::execCM_REGREF()
|
||||
|
||||
void
|
||||
Qmgr::electionWon(){
|
||||
Qmgr::electionWon(Signal* signal){
|
||||
NodeRecPtr myNodePtr;
|
||||
cpresident = getOwnNodeId(); /* This node becomes president. */
|
||||
myNodePtr.i = getOwnNodeId();
|
||||
|
@ -833,6 +1001,12 @@ Qmgr::electionWon(){
|
|||
cpresidentAlive = ZTRUE;
|
||||
c_stopElectionTime = ~0;
|
||||
c_start.reset();
|
||||
|
||||
signal->theData[0] = EventReport::CM_REGCONF;
|
||||
signal->theData[1] = getOwnNodeId();
|
||||
signal->theData[2] = cpresident;
|
||||
signal->theData[3] = 1;
|
||||
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -967,7 +1141,7 @@ Qmgr::cmAddPrepare(Signal* signal, NodeRecPtr nodePtr, const NodeRec * self){
|
|||
ndbrequire(signal->header.theVerId_signalNumber == GSN_CM_ADD);
|
||||
c_start.m_nodes.clearWaitingFor();
|
||||
c_start.m_nodes.setWaitingFor(nodePtr.i);
|
||||
c_start.m_gsn = GSN_CM_ADD;
|
||||
c_start.m_gsn = GSN_CM_NODEINFOCONF;
|
||||
#else
|
||||
warningEvent("Enabling communication to CM_ADD node %u state=%d",
|
||||
nodePtr.i,
|
||||
|
@ -1872,7 +2046,8 @@ void Qmgr::execDISCONNECT_REP(Signal* signal)
|
|||
const Uint32 nodeId = rep->nodeId;
|
||||
const Uint32 err = rep->err;
|
||||
c_connectedNodes.clear(nodeId);
|
||||
|
||||
c_readnodes_nodes.clear(nodeId);
|
||||
|
||||
NodeRecPtr nodePtr;
|
||||
nodePtr.i = getOwnNodeId();
|
||||
ptrCheckGuard(nodePtr, MAX_NODES, nodeRec);
|
||||
|
@ -1893,9 +2068,13 @@ void Qmgr::execDISCONNECT_REP(Signal* signal)
|
|||
case ZAPI_ACTIVE:
|
||||
ndbrequire(false);
|
||||
case ZAPI_INACTIVE:
|
||||
{
|
||||
char buf[100];
|
||||
BaseString::snprintf(buf, 100, "Node %u disconected", nodeId);
|
||||
progError(__LINE__, ERR_SR_OTHERNODEFAILED, buf);
|
||||
ndbrequire(false);
|
||||
}
|
||||
|
||||
}
|
||||
node_failed(signal, nodeId);
|
||||
}//DISCONNECT_REP
|
||||
|
||||
|
@ -2150,10 +2329,16 @@ void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode,
|
|||
|
||||
failedNodePtr.i = aFailedNode;
|
||||
ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
|
||||
FailRep* rep = (FailRep*)signal->getDataPtr();
|
||||
|
||||
check_multi_node_shutdown(signal);
|
||||
|
||||
if (failedNodePtr.i == getOwnNodeId()) {
|
||||
jam();
|
||||
|
||||
Uint32 code = 0;
|
||||
const char * msg = 0;
|
||||
char extra[100];
|
||||
switch(aFailCause){
|
||||
case FailRep::ZOWN_FAILURE:
|
||||
msg = "Own failure";
|
||||
|
@ -2174,17 +2359,46 @@ void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode,
|
|||
case FailRep::ZLINK_FAILURE:
|
||||
msg = "Connection failure";
|
||||
break;
|
||||
case FailRep::ZPARTITIONED_CLUSTER:
|
||||
{
|
||||
code = ERR_ARBIT_SHUTDOWN;
|
||||
char buf1[100], buf2[100];
|
||||
c_clusterNodes.getText(buf1);
|
||||
if (signal->getLength()== FailRep::SignalLength + FailRep::ExtraLength &&
|
||||
signal->header.theVerId_signalNumber == GSN_FAIL_REP)
|
||||
{
|
||||
jam();
|
||||
NdbNodeBitmask part;
|
||||
part.assign(NdbNodeBitmask::Size, rep->partition);
|
||||
part.getText(buf2);
|
||||
BaseString::snprintf(extra, sizeof(extra),
|
||||
"Partitioned cluster!"
|
||||
" Our cluster: %s other cluster: %s",
|
||||
buf1, buf2);
|
||||
}
|
||||
else
|
||||
{
|
||||
jam();
|
||||
BaseString::snprintf(extra, sizeof(extra),
|
||||
"Partitioned cluster!"
|
||||
" Our cluster: %s ", buf1);
|
||||
}
|
||||
msg = extra;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
char buf[100];
|
||||
BaseString::snprintf(buf, 100,
|
||||
CRASH_INSERTION(932);
|
||||
|
||||
char buf[255];
|
||||
BaseString::snprintf(buf, sizeof(buf),
|
||||
"We(%u) have been declared dead by %u reason: %s(%u)",
|
||||
getOwnNodeId(),
|
||||
refToNode(signal->getSendersBlockRef()),
|
||||
aFailCause,
|
||||
msg ? msg : "<Unknown>");
|
||||
|
||||
progError(__LINE__, 0, buf);
|
||||
|
||||
progError(__LINE__, code, buf);
|
||||
return;
|
||||
}//if
|
||||
|
||||
|
@ -2241,7 +2455,9 @@ void Qmgr::execPREP_FAILREQ(Signal* signal)
|
|||
{
|
||||
NodeRecPtr myNodePtr;
|
||||
jamEntry();
|
||||
|
||||
|
||||
check_multi_node_shutdown(signal);
|
||||
|
||||
PrepFailReqRef * const prepFail = (PrepFailReqRef *)&signal->theData[0];
|
||||
|
||||
BlockReference Tblockref = prepFail->xxxBlockRef;
|
||||
|
@ -3893,6 +4109,7 @@ Qmgr::stateArbitCrash(Signal* signal)
|
|||
if (! (arbitRec.getTimediff() > getArbitTimeout()))
|
||||
return;
|
||||
#endif
|
||||
CRASH_INSERTION(932);
|
||||
progError(__LINE__, NDBD_EXIT_ARBIT_SHUTDOWN,
|
||||
"Arbitrator decided to shutdown this node");
|
||||
}
|
||||
|
@ -4054,3 +4271,40 @@ Qmgr::execAPI_BROADCAST_REP(Signal* signal)
|
|||
NodeReceiverGroup rg(API_CLUSTERMGR, mask);
|
||||
sendSignal(rg, api.gsn, signal, len, JBB); // forward sections
|
||||
}
|
||||
|
||||
void
|
||||
Qmgr::execSTOP_REQ(Signal* signal)
|
||||
{
|
||||
jamEntry();
|
||||
c_stopReq = * (StopReq*)signal->getDataPtr();
|
||||
|
||||
if (c_stopReq.senderRef)
|
||||
{
|
||||
ndbrequire(NdbNodeBitmask::get(c_stopReq.nodes, getOwnNodeId()));
|
||||
|
||||
StopConf *conf = (StopConf*)signal->getDataPtrSend();
|
||||
conf->senderData = c_stopReq.senderData;
|
||||
conf->nodeState = getOwnNodeId();
|
||||
sendSignal(c_stopReq.senderRef,
|
||||
GSN_STOP_CONF, signal, StopConf::SignalLength, JBA);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Qmgr::check_multi_node_shutdown(Signal* signal)
|
||||
{
|
||||
if (c_stopReq.senderRef &&
|
||||
NdbNodeBitmask::get(c_stopReq.nodes, getOwnNodeId()))
|
||||
{
|
||||
jam();
|
||||
if(StopReq::getPerformRestart(c_stopReq.requestInfo))
|
||||
{
|
||||
jam();
|
||||
StartOrd * startOrd = (StartOrd *)&signal->theData[0];
|
||||
startOrd->restartInfo = c_stopReq.requestInfo;
|
||||
EXECUTE_DIRECT(CMVMI, GSN_START_ORD, signal, 2);
|
||||
} else {
|
||||
EXECUTE_DIRECT(CMVMI, GSN_STOP_ORD, signal, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
#include <NdbRestarts.hpp>
|
||||
#include <Vector.hpp>
|
||||
#include <signaldata/DumpStateOrd.hpp>
|
||||
|
||||
#include <Bitmask.hpp>
|
||||
|
||||
int runLoadTable(NDBT_Context* ctx, NDBT_Step* step){
|
||||
|
||||
|
@ -669,6 +669,206 @@ err:
|
|||
return NDBT_FAILED;
|
||||
}
|
||||
|
||||
int
|
||||
runBug18612(NDBT_Context* ctx, NDBT_Step* step){
|
||||
|
||||
// Assume two replicas
|
||||
NdbRestarter restarter;
|
||||
if (restarter.getNumDbNodes() < 2)
|
||||
{
|
||||
ctx->stopTest();
|
||||
return NDBT_OK;
|
||||
}
|
||||
|
||||
Uint32 cnt = restarter.getNumDbNodes();
|
||||
|
||||
for(int loop = 0; loop < ctx->getNumLoops(); loop++)
|
||||
{
|
||||
int partition0[256];
|
||||
int partition1[256];
|
||||
bzero(partition0, sizeof(partition0));
|
||||
bzero(partition1, sizeof(partition1));
|
||||
Bitmask<4> nodesmask;
|
||||
|
||||
Uint32 node1 = restarter.getDbNodeId(rand()%cnt);
|
||||
for (Uint32 i = 0; i<cnt/2; i++)
|
||||
{
|
||||
do {
|
||||
int tmp = restarter.getRandomNodeOtherNodeGroup(node1, rand());
|
||||
if (tmp == -1)
|
||||
break;
|
||||
node1 = tmp;
|
||||
} while(nodesmask.get(node1));
|
||||
|
||||
partition0[i] = node1;
|
||||
partition1[i] = restarter.getRandomNodeSameNodeGroup(node1, rand());
|
||||
|
||||
ndbout_c("nodes %d %d", node1, partition1[i]);
|
||||
|
||||
assert(!nodesmask.get(node1));
|
||||
assert(!nodesmask.get(partition1[i]));
|
||||
nodesmask.set(node1);
|
||||
nodesmask.set(partition1[i]);
|
||||
}
|
||||
|
||||
ndbout_c("done");
|
||||
|
||||
int dump[255];
|
||||
dump[0] = DumpStateOrd::NdbcntrStopNodes;
|
||||
memcpy(dump + 1, partition0, sizeof(int)*cnt/2);
|
||||
|
||||
Uint32 master = restarter.getMasterNodeId();
|
||||
|
||||
if (restarter.dumpStateOneNode(master, dump, 1+cnt/2))
|
||||
return NDBT_FAILED;
|
||||
|
||||
if (restarter.waitNodesNoStart(partition0, cnt/2))
|
||||
return NDBT_FAILED;
|
||||
|
||||
int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
|
||||
|
||||
if (restarter.dumpStateAllNodes(val2, 2))
|
||||
return NDBT_FAILED;
|
||||
|
||||
if (restarter.insertErrorInAllNodes(932))
|
||||
return NDBT_FAILED;
|
||||
|
||||
dump[0] = 9000;
|
||||
memcpy(dump + 1, partition0, sizeof(int)*cnt/2);
|
||||
for (Uint32 i = 0; i<cnt/2; i++)
|
||||
if (restarter.dumpStateOneNode(partition1[i], dump, 1+cnt/2))
|
||||
return NDBT_FAILED;
|
||||
|
||||
dump[0] = 9000;
|
||||
memcpy(dump + 1, partition1, sizeof(int)*cnt/2);
|
||||
for (Uint32 i = 0; i<cnt/2; i++)
|
||||
if (restarter.dumpStateOneNode(partition0[i], dump, 1+cnt/2))
|
||||
return NDBT_FAILED;
|
||||
|
||||
if (restarter.startNodes(partition0, cnt/2))
|
||||
return NDBT_FAILED;
|
||||
|
||||
if (restarter.waitNodesStartPhase(partition0, cnt/2, 2))
|
||||
return NDBT_FAILED;
|
||||
|
||||
dump[0] = 9001;
|
||||
for (Uint32 i = 0; i<cnt/2; i++)
|
||||
if (restarter.dumpStateAllNodes(dump, 2))
|
||||
return NDBT_FAILED;
|
||||
|
||||
if (restarter.waitNodesNoStart(partition0, cnt/2))
|
||||
return NDBT_FAILED;
|
||||
|
||||
for (Uint32 i = 0; i<cnt/2; i++)
|
||||
if (restarter.restartOneDbNode(partition0[i], true, true, true))
|
||||
return NDBT_FAILED;
|
||||
|
||||
if (restarter.waitNodesNoStart(partition0, cnt/2))
|
||||
return NDBT_FAILED;
|
||||
|
||||
if (restarter.startAll())
|
||||
return NDBT_FAILED;
|
||||
|
||||
if (restarter.waitClusterStarted())
|
||||
return NDBT_FAILED;
|
||||
}
|
||||
return NDBT_OK;
|
||||
}
|
||||
|
||||
int
|
||||
runBug18612SR(NDBT_Context* ctx, NDBT_Step* step){
|
||||
|
||||
// Assume two replicas
|
||||
NdbRestarter restarter;
|
||||
if (restarter.getNumDbNodes() < 2)
|
||||
{
|
||||
ctx->stopTest();
|
||||
return NDBT_OK;
|
||||
}
|
||||
|
||||
Uint32 cnt = restarter.getNumDbNodes();
|
||||
|
||||
for(int loop = 0; loop < ctx->getNumLoops(); loop++)
|
||||
{
|
||||
int partition0[256];
|
||||
int partition1[256];
|
||||
bzero(partition0, sizeof(partition0));
|
||||
bzero(partition1, sizeof(partition1));
|
||||
Bitmask<4> nodesmask;
|
||||
|
||||
Uint32 node1 = restarter.getDbNodeId(rand()%cnt);
|
||||
for (Uint32 i = 0; i<cnt/2; i++)
|
||||
{
|
||||
do {
|
||||
int tmp = restarter.getRandomNodeOtherNodeGroup(node1, rand());
|
||||
if (tmp == -1)
|
||||
break;
|
||||
node1 = tmp;
|
||||
} while(nodesmask.get(node1));
|
||||
|
||||
partition0[i] = node1;
|
||||
partition1[i] = restarter.getRandomNodeSameNodeGroup(node1, rand());
|
||||
|
||||
ndbout_c("nodes %d %d", node1, partition1[i]);
|
||||
|
||||
assert(!nodesmask.get(node1));
|
||||
assert(!nodesmask.get(partition1[i]));
|
||||
nodesmask.set(node1);
|
||||
nodesmask.set(partition1[i]);
|
||||
}
|
||||
|
||||
ndbout_c("done");
|
||||
|
||||
if (restarter.restartAll(false, true, false))
|
||||
return NDBT_FAILED;
|
||||
|
||||
int dump[255];
|
||||
dump[0] = 9000;
|
||||
memcpy(dump + 1, partition0, sizeof(int)*cnt/2);
|
||||
for (Uint32 i = 0; i<cnt/2; i++)
|
||||
if (restarter.dumpStateOneNode(partition1[i], dump, 1+cnt/2))
|
||||
return NDBT_FAILED;
|
||||
|
||||
dump[0] = 9000;
|
||||
memcpy(dump + 1, partition1, sizeof(int)*cnt/2);
|
||||
for (Uint32 i = 0; i<cnt/2; i++)
|
||||
if (restarter.dumpStateOneNode(partition0[i], dump, 1+cnt/2))
|
||||
return NDBT_FAILED;
|
||||
|
||||
int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
|
||||
|
||||
if (restarter.dumpStateAllNodes(val2, 2))
|
||||
return NDBT_FAILED;
|
||||
|
||||
if (restarter.insertErrorInAllNodes(932))
|
||||
return NDBT_FAILED;
|
||||
|
||||
if (restarter.startAll())
|
||||
return NDBT_FAILED;
|
||||
|
||||
if (restarter.waitClusterStartPhase(2))
|
||||
return NDBT_FAILED;
|
||||
|
||||
dump[0] = 9001;
|
||||
for (Uint32 i = 0; i<cnt/2; i++)
|
||||
if (restarter.dumpStateAllNodes(dump, 2))
|
||||
return NDBT_FAILED;
|
||||
|
||||
if (restarter.waitClusterNoStart(30))
|
||||
if (restarter.waitNodesNoStart(partition0, cnt/2, 10))
|
||||
if (restarter.waitNodesNoStart(partition1, cnt/2, 10))
|
||||
return NDBT_FAILED;
|
||||
|
||||
if (restarter.startAll())
|
||||
return NDBT_FAILED;
|
||||
|
||||
if (restarter.waitClusterStarted())
|
||||
return NDBT_FAILED;
|
||||
}
|
||||
return NDBT_OK;
|
||||
}
|
||||
|
||||
|
||||
NDBT_TESTSUITE(testNodeRestart);
|
||||
TESTCASE("NoLoad",
|
||||
"Test that one node at a time can be stopped and then restarted "\
|
||||
|
@ -963,6 +1163,18 @@ TESTCASE("Bug18414",
|
|||
STEP(runBug18414);
|
||||
FINALIZER(runClearTable);
|
||||
}
|
||||
TESTCASE("Bug18612",
|
||||
"Test bug with partitioned clusters"){
|
||||
INITIALIZER(runLoadTable);
|
||||
STEP(runBug18612);
|
||||
FINALIZER(runClearTable);
|
||||
}
|
||||
TESTCASE("Bug18612SR",
|
||||
"Test bug with partitioned clusters"){
|
||||
INITIALIZER(runLoadTable);
|
||||
STEP(runBug18612SR);
|
||||
FINALIZER(runClearTable);
|
||||
}
|
||||
NDBT_TESTSUITE_END(testNodeRestart);
|
||||
|
||||
int main(int argc, const char** argv){
|
||||
|
|
|
@ -433,10 +433,18 @@ args: -n Bug16772 T1
|
|||
#cmd: testSystemRestart
|
||||
#args: -n Bug18385 T1
|
||||
#
|
||||
max-time: 500
|
||||
max-time: 1000
|
||||
cmd: testNodeRestart
|
||||
args: -n Bug18414 T1
|
||||
|
||||
max-time: 1000
|
||||
cmd: testNodeRestart
|
||||
args: -n Bug18612 T1
|
||||
|
||||
max-time: 1000
|
||||
cmd: testNodeRestart
|
||||
args: -n Bug18612SR T1
|
||||
|
||||
# OLD FLEX
|
||||
max-time: 500
|
||||
cmd: flexBench
|
||||
|
|
|
@ -445,8 +445,7 @@ int twoNodeFailure(NdbRestarter& _restarter,
|
|||
<< ") secs " << endl;
|
||||
NdbSleep_SecSleep(seconds);
|
||||
|
||||
randomId = (rand() % _restarter.getNumDbNodes());
|
||||
nodeId = _restarter.getDbNodeId(randomId);
|
||||
nodeId = _restarter.getRandomNodeOtherNodeGroup(nodeId, rand());
|
||||
g_info << _restart->m_name << ": node = "<< nodeId << endl;
|
||||
|
||||
CHECK(_restarter.insertErrorInNode(nodeId, 9999) == 0,
|
||||
|
|
Loading…
Reference in a new issue