Merge perch.ndb.mysql.com:/home/jonas/src/50-work

into  perch.ndb.mysql.com:/home/jonas/src/50-jonas


ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
  Auto merged
This commit is contained in:
unknown 2006-04-06 11:55:36 +02:00
commit 71a1864f8c
20 changed files with 1721 additions and 241 deletions

View file

@ -30,12 +30,17 @@ class CmRegReq {
friend class Qmgr;
public:
STATIC_CONST( SignalLength = 3 );
STATIC_CONST( SignalLength = 5 + NdbNodeBitmask::Size );
private:
Uint32 blockRef;
Uint32 nodeId;
Uint32 version; // See ndb_version.h
Uint32 version; // See ndb_version.h
Uint32 start_type; // As specified by cmd-line or mgm, NodeState::StartType
Uint32 latest_gci; // 0 means no fs
Uint32 skip_nodes[NdbNodeBitmask::Size]; // Nodes that does not _need_
// to be part of restart
};
/**
@ -59,8 +64,7 @@ private:
* The dynamic id that the node reciving this signal has
*/
Uint32 dynamicId;
Uint32 allNdbNodes[NdbNodeBitmask::Size];
Uint32 allNdbNodes[NdbNodeBitmask::Size];
};
/**
@ -73,7 +77,7 @@ class CmRegRef {
friend class Qmgr;
public:
STATIC_CONST( SignalLength = 4 );
STATIC_CONST( SignalLength = 7 + NdbNodeBitmask::Size );
enum ErrorCode {
ZBUSY = 0, /* Only the president can send this */
@ -85,14 +89,27 @@ public:
* as president. */
ZNOT_PRESIDENT = 5, /* We are not president */
ZNOT_DEAD = 6, /* We are not dead when we are starting */
ZINCOMPATIBLE_VERSION = 7
ZINCOMPATIBLE_VERSION = 7,
ZINCOMPATIBLE_START_TYPE = 8
};
private:
Uint32 blockRef;
Uint32 nodeId;
Uint32 errorCode;
/**
* Applicable if ZELECTION
*/
Uint32 presidentCandidate;
Uint32 candidate_latest_gci; // 0 means non
/**
* Data for sending node sending node
*/
Uint32 latest_gci;
Uint32 start_type;
Uint32 skip_nodes[NdbNodeBitmask::Size]; // Nodes that does not _need_
// to be part of restart
};
class CmAdd {

View file

@ -64,6 +64,7 @@ public:
// 19 NDBFS Fipple with O_SYNC, O_CREATE etc.
// 20-24 BACKUP
NdbcntrTestStopOnError = 25,
NdbcntrStopNodes = 70,
// 100-105 TUP and ACC
// 200-240 UTIL
// 300-305 TRIX

View file

@ -18,6 +18,7 @@
#define FAIL_REP_HPP
#include "SignalData.hpp"
#include <NodeBitmask.hpp>
/**
*
@ -27,6 +28,7 @@ class FailRep {
* Sender(s) & Reciver(s)
*/
friend class Qmgr;
friend class Ndbcntr;
/**
* For printing
@ -35,7 +37,8 @@ class FailRep {
public:
STATIC_CONST( SignalLength = 2 );
STATIC_CONST( ExtraLength = 1 + NdbNodeBitmask::Size );
enum FailCause {
ZOWN_FAILURE=0,
ZOTHER_NODE_WHEN_WE_START=1,
@ -43,13 +46,20 @@ public:
ZSTART_IN_REGREQ=3,
ZHEARTBEAT_FAILURE=4,
ZLINK_FAILURE=5,
ZOTHERNODE_FAILED_DURING_START=6
ZOTHERNODE_FAILED_DURING_START=6,
ZMULTI_NODE_SHUTDOWN = 7,
ZPARTITIONED_CLUSTER = 8
};
private:
Uint32 failNodeId;
Uint32 failCause;
/**
* Used when failCause == ZPARTITIONED_CLUSTER
*/
Uint32 president;
Uint32 partition[NdbNodeBitmask::Size];
};

View file

@ -32,7 +32,7 @@ class StopReq
friend class MgmtSrvr;
public:
STATIC_CONST( SignalLength = 9 );
STATIC_CONST( SignalLength = 9 + NdbNodeBitmask::Size);
public:
Uint32 senderRef;
@ -49,29 +49,34 @@ public:
Int32 readOperationTimeout; // Timeout before read operations are aborted
Int32 operationTimeout; // Timeout before all operations are aborted
Uint32 nodes[NdbNodeBitmask::Size];
static void setSystemStop(Uint32 & requestInfo, bool value);
static void setPerformRestart(Uint32 & requestInfo, bool value);
static void setNoStart(Uint32 & requestInfo, bool value);
static void setInitialStart(Uint32 & requestInfo, bool value);
static void setEscalateOnNodeFail(Uint32 & requestInfo, bool value);
/**
* Don't perform "graceful" shutdown/restart...
*/
static void setStopAbort(Uint32 & requestInfo, bool value);
static void setStopNodes(Uint32 & requestInfo, bool value);
static bool getSystemStop(const Uint32 & requestInfo);
static bool getPerformRestart(const Uint32 & requestInfo);
static bool getNoStart(const Uint32 & requestInfo);
static bool getInitialStart(const Uint32 & requestInfo);
static bool getEscalateOnNodeFail(const Uint32 & requestInfo);
static bool getStopAbort(const Uint32 & requestInfo);
static bool getStopNodes(const Uint32 & requestInfo);
};
struct StopConf
{
STATIC_CONST( SignalLength = 2 );
Uint32 senderData;
Uint32 nodeState;
union {
Uint32 nodeState;
Uint32 nodeId;
};
};
class StopRef
@ -94,7 +99,9 @@ public:
NodeShutdownInProgress = 1,
SystemShutdownInProgress = 2,
NodeShutdownWouldCauseSystemCrash = 3,
TransactionAbortFailed = 4
TransactionAbortFailed = 4,
UnsupportedNodeShutdown = 5,
MultiNodeShutdownNotMaster = 6
};
public:
@ -132,16 +139,16 @@ StopReq::getInitialStart(const Uint32 & requestInfo)
inline
bool
StopReq::getEscalateOnNodeFail(const Uint32 & requestInfo)
StopReq::getStopAbort(const Uint32 & requestInfo)
{
return requestInfo & 16;
return requestInfo & 32;
}
inline
bool
StopReq::getStopAbort(const Uint32 & requestInfo)
StopReq::getStopNodes(const Uint32 & requestInfo)
{
return requestInfo & 32;
return requestInfo & 64;
}
@ -185,16 +192,6 @@ StopReq::setInitialStart(Uint32 & requestInfo, bool value)
requestInfo &= ~8;
}
inline
void
StopReq::setEscalateOnNodeFail(Uint32 & requestInfo, bool value)
{
if(value)
requestInfo |= 16;
else
requestInfo &= ~16;
}
inline
void
StopReq::setStopAbort(Uint32 & requestInfo, bool value)
@ -205,6 +202,15 @@ StopReq::setStopAbort(Uint32 & requestInfo, bool value)
requestInfo &= ~32;
}
inline
void
StopReq::setStopNodes(Uint32 & requestInfo, bool value)
{
if(value)
requestInfo |= 64;
else
requestInfo &= ~64;
}
#endif

View file

@ -46,7 +46,9 @@ public:
Complete = 1, ///< Wait for a GCP to complete
CompleteForceStart = 2, ///< Wait for a GCP to complete start one if needed
CompleteIfRunning = 3, ///< Wait for ongoing GCP
CurrentGCI = 8 ///< Immediately return current GCI
CurrentGCI = 8, ///< Immediately return current GCI
BlockStartGcp = 9,
UnblockStartGcp = 10
};
Uint32 senderRef;
@ -70,11 +72,12 @@ class WaitGCPConf {
//friend class Grep::PSCoord;
public:
STATIC_CONST( SignalLength = 2 );
STATIC_CONST( SignalLength = 3 );
public:
Uint32 senderData;
Uint32 gcp;
Uint32 blockStatus;
};
class WaitGCPRef {

View file

@ -166,10 +166,14 @@ extern "C" {
/** NDB_MGM_EVENT_CATEGORY_BACKUP */
NDB_LE_BackupCompleted = 56,
/** NDB_MGM_EVENT_CATEGORY_BACKUP */
NDB_LE_BackupAborted = 57
NDB_LE_BackupAborted = 57,
/* 58 used in 5.1 */
/* 59 used */
/** NDB_MGM_EVENT_CATEGORY_STARTUP */
NDB_LE_StartReport = 60
/* 60 unused */
/* 61 unused */
/* 62 unused */
@ -625,6 +629,13 @@ extern "C" {
unsigned type;
unsigned node_id;
} SingleUser;
/** Log even data @ref NDB_LE_StartReport */
struct {
unsigned report_type;
unsigned remaining_time;
unsigned bitmask_size;
unsigned bitmask_data[1];
} StartReport;
#ifndef DOXYGEN_FIX
};
#else

View file

@ -707,6 +707,90 @@ void getTextSingleUser(QQQQ) {
}
}
void getTextStartReport(QQQQ) {
Uint32 time = theData[2];
Uint32 sz = theData[3];
char mask1[100];
char mask2[100];
char mask3[100];
char mask4[100];
BitmaskImpl::getText(sz, theData + 4 + (0 * sz), mask1);
BitmaskImpl::getText(sz, theData + 4 + (1 * sz), mask2);
BitmaskImpl::getText(sz, theData + 4 + (2 * sz), mask3);
BitmaskImpl::getText(sz, theData + 4 + (3 * sz), mask4);
switch(theData[1]){
case 1: // Wait initial
BaseString::snprintf
(m_text, m_text_len,
"Initial start, waiting for %s to connect, "
" nodes [ all: %s connected: %s no-wait: %s ]",
mask4, mask1, mask2, mask3);
break;
case 2: // Wait partial
BaseString::snprintf
(m_text, m_text_len,
"Waiting until nodes: %s connects, "
"nodes [ all: %s connected: %s no-wait: %s ]",
mask4, mask1, mask2, mask3);
break;
case 3: // Wait partial timeout
BaseString::snprintf
(m_text, m_text_len,
"Waiting %u sec for nodes %s to connect, "
"nodes [ all: %s connected: %s no-wait: %s ]",
time, mask4, mask1, mask2, mask3);
break;
case 4: // Wait partioned
BaseString::snprintf
(m_text, m_text_len,
"Waiting for non partitioned start, "
"nodes [ all: %s connected: %s missing: %s no-wait: %s ]",
mask1, mask2, mask4, mask3);
break;
case 5:
BaseString::snprintf
(m_text, m_text_len,
"Waiting %u sec for non partitioned start, "
"nodes [ all: %s connected: %s missing: %s no-wait: %s ]",
time, mask1, mask2, mask4, mask3);
break;
case 0x8000: // Do initial
BaseString::snprintf
(m_text, m_text_len,
"Initial start with nodes %s [ missing: %s no-wait: %s ]",
mask2, mask4, mask3);
break;
case 0x8001: // Do start
BaseString::snprintf
(m_text, m_text_len,
"Start with all nodes %s",
mask2);
break;
case 0x8002: // Do partial
BaseString::snprintf
(m_text, m_text_len,
"Start with nodes %s [ missing: %s no-wait: %s ]",
mask2, mask4, mask3);
break;
case 0x8003: // Do partioned
BaseString::snprintf
(m_text, m_text_len,
"Start potentially partitioned with nodes %s "
" [ missing: %s no-wait: %s ]",
mask2, mask4, mask3);
break;
default:
BaseString::snprintf
(m_text, m_text_len,
"Unknown startreport: 0x%x [ %s %s %s %s ]",
theData[1],
mask1, mask2, mask3, mask4);
}
}
#if 0
BaseString::snprintf(m_text,
m_text_len,
@ -755,6 +839,7 @@ const EventLoggerBase::EventRepLogLevelMatrix EventLoggerBase::matrix[] = {
ROW(StartREDOLog, LogLevel::llStartUp, 10, Logger::LL_INFO ),
ROW(StartLog, LogLevel::llStartUp, 10, Logger::LL_INFO ),
ROW(UNDORecordsExecuted, LogLevel::llStartUp, 15, Logger::LL_INFO ),
ROW(StartReport, LogLevel::llStartUp, 4, Logger::LL_INFO ),
// NODERESTART
ROW(NR_CopyDict, LogLevel::llNodeRestart, 8, Logger::LL_INFO ),

View file

@ -134,6 +134,9 @@ Cmvmi::~Cmvmi()
{
}
#ifdef ERROR_INSERT
NodeBitmask c_error_9000_nodes_mask;
#endif
void Cmvmi::execNDB_TAMPER(Signal* signal)
{
@ -419,21 +422,33 @@ void Cmvmi::execOPEN_COMREQ(Signal* signal)
const Uint32 len = signal->getLength();
if(len == 2){
globalTransporterRegistry.do_connect(tStartingNode);
globalTransporterRegistry.setIOState(tStartingNode, HaltIO);
//-----------------------------------------------------
// Report that the connection to the node is opened
//-----------------------------------------------------
signal->theData[0] = NDB_LE_CommunicationOpened;
signal->theData[1] = tStartingNode;
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
//-----------------------------------------------------
#ifdef ERROR_INSERT
if (! (ERROR_INSERTED(9000) && c_error_9000_nodes_mask.get(tStartingNode)))
#endif
{
globalTransporterRegistry.do_connect(tStartingNode);
globalTransporterRegistry.setIOState(tStartingNode, HaltIO);
//-----------------------------------------------------
// Report that the connection to the node is opened
//-----------------------------------------------------
signal->theData[0] = NDB_LE_CommunicationOpened;
signal->theData[1] = tStartingNode;
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
//-----------------------------------------------------
}
} else {
for(unsigned int i = 1; i < MAX_NODES; i++ ) {
jam();
if (i != getOwnNodeId() && getNodeInfo(i).m_type == tData2){
jam();
#ifdef ERROR_INSERT
if (ERROR_INSERTED(9000) && c_error_9000_nodes_mask.get(i))
continue;
#endif
globalTransporterRegistry.do_connect(i);
globalTransporterRegistry.setIOState(i, HaltIO);
@ -1039,7 +1054,8 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
}
DumpStateOrd * const & dumpState = (DumpStateOrd *)&signal->theData[0];
if (dumpState->args[0] == DumpStateOrd::CmvmiDumpConnections){
Uint32 arg = dumpState->args[0];
if (arg == DumpStateOrd::CmvmiDumpConnections){
for(unsigned int i = 1; i < MAX_NODES; i++ ){
const char* nodeTypeStr = "";
switch(getNodeInfo(i).m_type){
@ -1072,13 +1088,13 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
}
}
if (dumpState->args[0] == DumpStateOrd::CmvmiDumpLongSignalMemory){
if (arg == DumpStateOrd::CmvmiDumpLongSignalMemory){
infoEvent("Cmvmi: g_sectionSegmentPool size: %d free: %d",
g_sectionSegmentPool.getSize(),
g_sectionSegmentPool.getNoOfFree());
}
if (dumpState->args[0] == DumpStateOrd::CmvmiSetRestartOnErrorInsert)
if (arg == DumpStateOrd::CmvmiSetRestartOnErrorInsert)
{
if(signal->getLength() == 1)
{
@ -1098,7 +1114,7 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
}
}
if (dumpState->args[0] == DumpStateOrd::CmvmiTestLongSigWithDelay) {
if (arg == DumpStateOrd::CmvmiTestLongSigWithDelay) {
unsigned i;
Uint32 loopCount = dumpState->args[1];
const unsigned len0 = 11;
@ -1126,6 +1142,30 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
sendSignal(reference(), GSN_TESTSIG, signal, 8, JBB, ptr, 2);
}
#ifdef ERROR_INSERT
if (arg == 9000)
{
SET_ERROR_INSERT_VALUE(9000);
for (Uint32 i = 1; i<signal->getLength(); i++)
c_error_9000_nodes_mask.set(signal->theData[i]);
}
if (arg == 9001)
{
CLEAR_ERROR_INSERT_VALUE;
for (Uint32 i = 0; i<MAX_NODES; i++)
{
if (c_error_9000_nodes_mask.get(i))
{
signal->theData[0] = 0;
signal->theData[1] = i;
EXECUTE_DIRECT(CMVMI, GSN_OPEN_COMREQ, signal, 2);
}
}
c_error_9000_nodes_mask.clear();
}
#endif
#ifdef VM_TRACE
#if 0
{

View file

@ -71,6 +71,7 @@ void Dbdih::initData()
cwaitLcpSr = false;
c_blockCommit = false;
c_blockCommitNo = 1;
cntrlblockref = RNIL;
}//Dbdih::initData()
void Dbdih::initRecords()

View file

@ -11659,7 +11659,7 @@ void Dbdih::makeNodeGroups(Uint32 nodeArray[])
Uint32 tmngNode;
Uint32 tmngNodeGroup;
Uint32 tmngLimit;
Uint32 i;
Uint32 i, j;
/**-----------------------------------------------------------------------
* ASSIGN ALL ACTIVE NODES INTO NODE GROUPS. HOT SPARE NODES ARE ASSIGNED
@ -11705,6 +11705,38 @@ void Dbdih::makeNodeGroups(Uint32 nodeArray[])
Sysfile::setNodeGroup(mngNodeptr.i, SYSFILE->nodeGroups, mngNodeptr.p->nodeGroup);
}//if
}//for
for (i = 0; i<cnoOfNodeGroups; i++)
{
jam();
bool alive = false;
NodeGroupRecordPtr NGPtr;
NGPtr.i = i;
ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
for (j = 0; j<NGPtr.p->nodeCount; j++)
{
jam();
mngNodeptr.i = NGPtr.p->nodesInGroup[j];
ptrCheckGuard(mngNodeptr, MAX_NDB_NODES, nodeRecord);
if (checkNodeAlive(NGPtr.p->nodesInGroup[j]))
{
alive = true;
break;
}
}
if (!alive)
{
char buf[255];
BaseString::snprintf
(buf, sizeof(buf),
"Illegal initial start, no alive node in nodegroup %u", i);
progError(__LINE__,
NDBD_EXIT_SR_RESTARTCONFLICT,
buf);
}
}
}//Dbdih::makeNodeGroups()
/**
@ -12513,7 +12545,6 @@ void Dbdih::sendStartFragreq(Signal* signal,
void Dbdih::setInitialActiveStatus()
{
NodeRecordPtr siaNodeptr;
Uint32 tsiaNodeActiveStatus;
Uint32 tsiaNoActiveNodes;
tsiaNoActiveNodes = csystemnodes - cnoHotSpare;
@ -12521,39 +12552,34 @@ void Dbdih::setInitialActiveStatus()
SYSFILE->nodeStatus[i] = 0;
for (siaNodeptr.i = 1; siaNodeptr.i < MAX_NDB_NODES; siaNodeptr.i++) {
ptrAss(siaNodeptr, nodeRecord);
if (siaNodeptr.p->nodeStatus == NodeRecord::ALIVE) {
switch(siaNodeptr.p->nodeStatus){
case NodeRecord::ALIVE:
case NodeRecord::DEAD:
if (tsiaNoActiveNodes == 0) {
jam();
siaNodeptr.p->activeStatus = Sysfile::NS_HotSpare;
} else {
jam();
tsiaNoActiveNodes = tsiaNoActiveNodes - 1;
siaNodeptr.p->activeStatus = Sysfile::NS_Active;
}//if
} else {
jam();
siaNodeptr.p->activeStatus = Sysfile::NS_NotDefined;
}//if
switch (siaNodeptr.p->activeStatus) {
case Sysfile::NS_Active:
jam();
tsiaNodeActiveStatus = Sysfile::NS_Active;
break;
case Sysfile::NS_HotSpare:
jam();
tsiaNodeActiveStatus = Sysfile::NS_HotSpare;
break;
case Sysfile::NS_NotDefined:
jam();
tsiaNodeActiveStatus = Sysfile::NS_NotDefined;
if (siaNodeptr.p->nodeStatus == NodeRecord::ALIVE)
{
jam();
siaNodeptr.p->activeStatus = Sysfile::NS_Active;
}
else
{
siaNodeptr.p->activeStatus = Sysfile::NS_NotActive_NotTakenOver;
}
}
break;
default:
ndbrequire(false);
return;
jam();
siaNodeptr.p->activeStatus = Sysfile::NS_NotDefined;
break;
}//switch
Sysfile::setNodeStatus(siaNodeptr.i, SYSFILE->nodeStatus,
tsiaNodeActiveStatus);
}//if
Sysfile::setNodeStatus(siaNodeptr.i,
SYSFILE->nodeStatus,
siaNodeptr.p->activeStatus);
}//for
}//Dbdih::setInitialActiveStatus()
@ -14274,11 +14300,36 @@ void Dbdih::execWAIT_GCP_REQ(Signal* signal)
jam();
conf->senderData = senderData;
conf->gcp = cnewgcp;
conf->blockStatus = cgcpOrderBlocked;
sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal,
WaitGCPConf::SignalLength, JBB);
return;
}//if
if (requestType == WaitGCPReq::BlockStartGcp)
{
jam();
conf->senderData = senderData;
conf->gcp = cnewgcp;
conf->blockStatus = cgcpOrderBlocked;
sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal,
WaitGCPConf::SignalLength, JBB);
cgcpOrderBlocked = 1;
return;
}
if (requestType == WaitGCPReq::UnblockStartGcp)
{
jam();
conf->senderData = senderData;
conf->gcp = cnewgcp;
conf->blockStatus = cgcpOrderBlocked;
sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal,
WaitGCPConf::SignalLength, JBB);
cgcpOrderBlocked = 0;
return;
}
if(isMaster()) {
/**
* Master
@ -14290,6 +14341,7 @@ void Dbdih::execWAIT_GCP_REQ(Signal* signal)
jam();
conf->senderData = senderData;
conf->gcp = coldgcp;
conf->blockStatus = cgcpOrderBlocked;
sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal,
WaitGCPConf::SignalLength, JBB);
return;
@ -14376,6 +14428,7 @@ void Dbdih::execWAIT_GCP_CONF(Signal* signal)
conf->senderData = ptr.p->clientData;
conf->gcp = gcp;
conf->blockStatus = cgcpOrderBlocked;
sendSignal(ptr.p->clientRef, GSN_WAIT_GCP_CONF, signal,
WaitGCPConf::SignalLength, JBB);
@ -14443,6 +14496,7 @@ void Dbdih::emptyWaitGCPMasterQueue(Signal* signal)
c_waitGCPMasterList.next(ptr);
conf->senderData = clientData;
conf->blockStatus = cgcpOrderBlocked;
sendSignal(clientRef, GSN_WAIT_GCP_CONF, signal,
WaitGCPConf::SignalLength, JBB);

View file

@ -203,6 +203,7 @@ private:
void execWAIT_GCP_CONF(Signal* signal);
void execSTOP_REQ(Signal* signal);
void execSTOP_CONF(Signal* signal);
void execRESUME_REQ(Signal* signal);
void execCHANGE_NODE_STATE_CONF(Signal* signal);
@ -338,6 +339,16 @@ public:
void progError(int line, int cause, const char * extra) {
cntr.progError(line, cause, extra);
}
enum StopNodesStep {
SR_BLOCK_GCP_START_GCP = 0,
SR_WAIT_COMPLETE_GCP = 1,
SR_UNBLOCK_GCP_START_GCP = 2,
SR_QMGR_STOP_REQ = 3,
SR_WAIT_NODE_FAILURES = 4,
SR_CLUSTER_SHUTDOWN = 12
} m_state;
SignalCounter m_stop_req_counter;
};
private:
StopRecord c_stopRec;

View file

@ -87,6 +87,7 @@ Ndbcntr::Ndbcntr(const class Configuration & conf):
addRecSignal(GSN_STOP_ME_CONF, &Ndbcntr::execSTOP_ME_CONF);
addRecSignal(GSN_STOP_REQ, &Ndbcntr::execSTOP_REQ);
addRecSignal(GSN_STOP_CONF, &Ndbcntr::execSTOP_CONF);
addRecSignal(GSN_RESUME_REQ, &Ndbcntr::execRESUME_REQ);
addRecSignal(GSN_WAIT_GCP_REF, &Ndbcntr::execWAIT_GCP_REF);

View file

@ -42,6 +42,8 @@
#include <signaldata/FsRemoveReq.hpp>
#include <signaldata/ReadConfig.hpp>
#include <signaldata/FailRep.hpp>
#include <AttributeHeader.hpp>
#include <Configuration.hpp>
#include <DebuggerNames.hpp>
@ -818,17 +820,9 @@ Ndbcntr::trySystemRestart(Signal* signal){
return false;
}
if(!allNodes && c_start.m_startPartialTimeout > now){
jam();
return false;
}
NodeState::StartType srType = NodeState::ST_SYSTEM_RESTART;
if(c_start.m_waiting.equal(c_start.m_withoutLog)){
if(!allNodes){
jam();
return false;
}
if(c_start.m_waiting.equal(c_start.m_withoutLog))
{
jam();
srType = NodeState::ST_INITIAL_START;
c_start.m_starting = c_start.m_withoutLog; // Used for starting...
@ -858,10 +852,6 @@ Ndbcntr::trySystemRestart(Signal* signal){
ndbrequire(false); // All nodes -> partitioning, which is not allowed
}
if(c_start.m_startPartitionedTimeout > now){
jam();
return false;
}
break;
}
@ -1474,13 +1464,74 @@ void Ndbcntr::execNODE_FAILREP(Signal* signal)
sendSignal(SUMA_REF, GSN_NODE_FAILREP, signal,
NodeFailRep::SignalLength, JBB);
if (c_stopRec.stopReq.senderRef)
{
jam();
switch(c_stopRec.m_state){
case StopRecord::SR_WAIT_NODE_FAILURES:
{
jam();
NdbNodeBitmask tmp;
tmp.assign(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes);
tmp.bitANDC(allFailed);
tmp.copyto(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes);
if (tmp.isclear())
{
jam();
if (c_stopRec.stopReq.senderRef != RNIL)
{
jam();
StopConf * const stopConf = (StopConf *)&signal->theData[0];
stopConf->senderData = c_stopRec.stopReq.senderData;
stopConf->nodeState = (Uint32) NodeState::SL_SINGLEUSER;
sendSignal(c_stopRec.stopReq.senderRef, GSN_STOP_CONF, signal,
StopConf::SignalLength, JBB);
}
c_stopRec.stopReq.senderRef = 0;
WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0];
req->senderRef = reference();
req->senderData = StopRecord::SR_UNBLOCK_GCP_START_GCP;
req->requestType = WaitGCPReq::UnblockStartGcp;
sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal,
WaitGCPReq::SignalLength, JBA);
}
break;
}
case StopRecord::SR_QMGR_STOP_REQ:
{
NdbNodeBitmask tmp;
tmp.assign(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes);
tmp.bitANDC(allFailed);
if (tmp.isclear())
{
Uint32 nodeId = allFailed.find(0);
tmp.set(nodeId);
StopConf* conf = (StopConf*)signal->getDataPtrSend();
conf->senderData = c_stopRec.stopReq.senderData;
conf->nodeId = nodeId;
sendSignal(reference(),
GSN_STOP_CONF, signal, StopConf::SignalLength, JBB);
}
tmp.copyto(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes);
break;
}
}
}
signal->theData[0] = NDB_LE_NODE_FAILREP;
signal->theData[2] = 0;
Uint32 nodeId = 0;
while(!allFailed.isclear()){
nodeId = allFailed.find(nodeId + 1);
allFailed.clear(nodeId);
signal->theData[0] = NDB_LE_NODE_FAILREP;
signal->theData[1] = nodeId;
signal->theData[2] = 0;
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
}//for
@ -1924,13 +1975,15 @@ void
Ndbcntr::execDUMP_STATE_ORD(Signal* signal)
{
DumpStateOrd * const & dumpState = (DumpStateOrd *)&signal->theData[0];
if(signal->theData[0] == 13){
Uint32 arg = dumpState->args[0];
if(arg == 13){
infoEvent("Cntr: cstartPhase = %d, cinternalStartphase = %d, block = %d",
cstartPhase, cinternalStartphase, cndbBlocksCount);
infoEvent("Cntr: cmasterNodeId = %d", cmasterNodeId);
}
if (dumpState->args[0] == DumpStateOrd::NdbcntrTestStopOnError){
if (arg == DumpStateOrd::NdbcntrTestStopOnError){
if (theConfiguration.stopOnError() == true)
((Configuration&)theConfiguration).stopOnError(false);
@ -1943,6 +1996,28 @@ Ndbcntr::execDUMP_STATE_ORD(Signal* signal)
SystemError::SignalLength, JBA);
}
if (arg == DumpStateOrd::NdbcntrStopNodes)
{
NdbNodeBitmask mask;
for(Uint32 i = 1; i<signal->getLength(); i++)
mask.set(signal->theData[i]);
StopReq* req = (StopReq*)signal->getDataPtrSend();
req->senderRef = RNIL;
req->senderData = 123;
req->requestInfo = 0;
req->singleuser = 0;
req->singleUserApi = 0;
mask.copyto(NdbNodeBitmask::Size, req->nodes);
StopReq::setPerformRestart(req->requestInfo, 1);
StopReq::setNoStart(req->requestInfo, 1);
StopReq::setStopNodes(req->requestInfo, 1);
StopReq::setStopAbort(req->requestInfo, 1);
sendSignal(reference(), GSN_STOP_REQ, signal,
StopReq::SignalLength, JBB);
return;
}
}//Ndbcntr::execDUMP_STATE_ORD()
@ -2003,9 +2078,12 @@ Ndbcntr::execSTOP_REQ(Signal* signal){
Uint32 senderData = req->senderData;
BlockReference senderRef = req->senderRef;
bool abort = StopReq::getStopAbort(req->requestInfo);
bool stopnodes = StopReq::getStopNodes(req->requestInfo);
if(getNodeState().startLevel < NodeState::SL_STARTED ||
abort && !singleuser){
if(!singleuser &&
(getNodeState().startLevel < NodeState::SL_STARTED ||
(abort && !stopnodes)))
{
/**
* Node is not started yet
*
@ -2047,21 +2125,71 @@ Ndbcntr::execSTOP_REQ(Signal* signal){
else
ref->errorCode = StopRef::NodeShutdownInProgress;
ref->senderData = senderData;
sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB);
if (senderRef != RNIL)
sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB);
return;
}
if (stopnodes && !abort)
{
jam();
ref->errorCode = StopRef::UnsupportedNodeShutdown;
ref->senderData = senderData;
if (senderRef != RNIL)
sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB);
return;
}
if (stopnodes && cmasterNodeId != getOwnNodeId())
{
jam();
ref->errorCode = StopRef::MultiNodeShutdownNotMaster;
ref->senderData = senderData;
if (senderRef != RNIL)
sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB);
return;
}
c_stopRec.stopReq = * req;
c_stopRec.stopInitiatedTime = NdbTick_CurrentMillisecond();
if(!singleuser) {
if(StopReq::getSystemStop(c_stopRec.stopReq.requestInfo)) {
if (stopnodes)
{
jam();
if(!c_stopRec.checkNodeFail(signal))
{
jam();
if(StopReq::getPerformRestart(c_stopRec.stopReq.requestInfo)){
return;
}
char buf[100];
NdbNodeBitmask mask;
mask.assign(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes);
infoEvent("Initiating shutdown abort of %s", mask.getText(buf));
ndbout_c("Initiating shutdown abort of %s", mask.getText(buf));
WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0];
req->senderRef = reference();
req->senderData = StopRecord::SR_BLOCK_GCP_START_GCP;
req->requestType = WaitGCPReq::BlockStartGcp;
sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal,
WaitGCPReq::SignalLength, JBB);
return;
}
else if(!singleuser)
{
if(StopReq::getSystemStop(c_stopRec.stopReq.requestInfo))
{
jam();
if(StopReq::getPerformRestart(c_stopRec.stopReq.requestInfo))
{
((Configuration&)theConfiguration).stopOnError(false);
}
}
if(!c_stopRec.checkNodeFail(signal)){
if(!c_stopRec.checkNodeFail(signal))
{
jam();
return;
}
@ -2131,7 +2259,17 @@ Ndbcntr::StopRecord::checkNodeFail(Signal* signal){
*/
NodeBitmask ndbMask;
ndbMask.assign(cntr.c_startedNodes);
ndbMask.clear(cntr.getOwnNodeId());
if (StopReq::getStopNodes(stopReq.requestInfo))
{
NdbNodeBitmask tmp;
tmp.assign(NdbNodeBitmask::Size, stopReq.nodes);
ndbMask.bitANDC(tmp);
}
else
{
ndbMask.clear(cntr.getOwnNodeId());
}
CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0];
sd->blockRef = cntr.reference();
@ -2153,7 +2291,8 @@ Ndbcntr::StopRecord::checkNodeFail(Signal* signal){
ref->errorCode = StopRef::NodeShutdownWouldCauseSystemCrash;
const BlockReference bref = stopReq.senderRef;
cntr.sendSignal(bref, GSN_STOP_REF, signal, StopRef::SignalLength, JBB);
if (bref != RNIL)
cntr.sendSignal(bref, GSN_STOP_REF, signal, StopRef::SignalLength, JBB);
stopReq.senderRef = 0;
@ -2203,23 +2342,23 @@ Ndbcntr::StopRecord::checkTcTimeout(Signal* signal){
if(stopReq.getSystemStop(stopReq.requestInfo) || stopReq.singleuser){
jam();
if(stopReq.singleuser)
{
jam();
AbortAllReq * req = (AbortAllReq*)&signal->theData[0];
req->senderRef = cntr.reference();
req->senderData = 12;
cntr.sendSignal(DBTC_REF, GSN_ABORT_ALL_REQ, signal,
AbortAllReq::SignalLength, JBB);
}
{
jam();
AbortAllReq * req = (AbortAllReq*)&signal->theData[0];
req->senderRef = cntr.reference();
req->senderData = 12;
cntr.sendSignal(DBTC_REF, GSN_ABORT_ALL_REQ, signal,
AbortAllReq::SignalLength, JBB);
}
else
{
WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0];
req->senderRef = cntr.reference();
req->senderData = 12;
req->requestType = WaitGCPReq::CompleteForceStart;
cntr.sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal,
WaitGCPReq::SignalLength, JBB);
}
{
WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0];
req->senderRef = cntr.reference();
req->senderData = StopRecord::SR_CLUSTER_SHUTDOWN;
req->requestType = WaitGCPReq::CompleteForceStart;
cntr.sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal,
WaitGCPReq::SignalLength, JBB);
}
} else {
jam();
StopPermReq * req = (StopPermReq*)&signal->theData[0];
@ -2381,7 +2520,7 @@ void Ndbcntr::execWAIT_GCP_REF(Signal* signal){
WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0];
req->senderRef = reference();
req->senderData = 12;
req->senderData = StopRecord::SR_CLUSTER_SHUTDOWN;
req->requestType = WaitGCPReq::CompleteForceStart;
sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal,
WaitGCPReq::SignalLength, JBB);
@ -2390,29 +2529,129 @@ void Ndbcntr::execWAIT_GCP_REF(Signal* signal){
void Ndbcntr::execWAIT_GCP_CONF(Signal* signal){
jamEntry();
ndbrequire(StopReq::getSystemStop(c_stopRec.stopReq.requestInfo));
NodeState newState(NodeState::SL_STOPPING_3, true);
WaitGCPConf* conf = (WaitGCPConf*)signal->getDataPtr();
/**
* Inform QMGR so that arbitrator won't kill us
*/
NodeStateRep * rep = (NodeStateRep *)&signal->theData[0];
rep->nodeState = newState;
rep->nodeState.masterNodeId = cmasterNodeId;
rep->nodeState.setNodeGroup(c_nodeGroup);
EXECUTE_DIRECT(QMGR, GSN_NODE_STATE_REP, signal, NodeStateRep::SignalLength);
switch(conf->senderData){
case StopRecord::SR_BLOCK_GCP_START_GCP:
{
jam();
/**
*
*/
if(!c_stopRec.checkNodeFail(signal))
{
jam();
goto unblock;
}
WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0];
req->senderRef = reference();
req->senderData = StopRecord::SR_WAIT_COMPLETE_GCP;
req->requestType = WaitGCPReq::CompleteIfRunning;
if(StopReq::getPerformRestart(c_stopRec.stopReq.requestInfo)){
jam();
StartOrd * startOrd = (StartOrd *)&signal->theData[0];
startOrd->restartInfo = c_stopRec.stopReq.requestInfo;
sendSignalWithDelay(CMVMI_REF, GSN_START_ORD, signal, 500,
StartOrd::SignalLength);
} else {
jam();
sendSignalWithDelay(CMVMI_REF, GSN_STOP_ORD, signal, 500, 1);
sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal,
WaitGCPReq::SignalLength, JBB);
return;
}
case StopRecord::SR_UNBLOCK_GCP_START_GCP:
{
jam();
return;
}
case StopRecord::SR_WAIT_COMPLETE_GCP:
{
jam();
if(!c_stopRec.checkNodeFail(signal))
{
jam();
goto unblock;
}
NdbNodeBitmask tmp;
tmp.assign(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes);
c_stopRec.m_stop_req_counter = tmp;
NodeReceiverGroup rg(QMGR, tmp);
StopReq * stopReq = (StopReq *)&signal->theData[0];
* stopReq = c_stopRec.stopReq;
stopReq->senderRef = reference();
sendSignal(rg, GSN_STOP_REQ, signal, StopReq::SignalLength, JBA);
c_stopRec.m_state = StopRecord::SR_QMGR_STOP_REQ;
return;
}
case StopRecord::SR_CLUSTER_SHUTDOWN:
{
jam();
break;
}
}
{
ndbrequire(StopReq::getSystemStop(c_stopRec.stopReq.requestInfo));
NodeState newState(NodeState::SL_STOPPING_3, true);
/**
* Inform QMGR so that arbitrator won't kill us
*/
NodeStateRep * rep = (NodeStateRep *)&signal->theData[0];
rep->nodeState = newState;
rep->nodeState.masterNodeId = cmasterNodeId;
rep->nodeState.setNodeGroup(c_nodeGroup);
EXECUTE_DIRECT(QMGR, GSN_NODE_STATE_REP, signal,
NodeStateRep::SignalLength);
if(StopReq::getPerformRestart(c_stopRec.stopReq.requestInfo)){
jam();
StartOrd * startOrd = (StartOrd *)&signal->theData[0];
startOrd->restartInfo = c_stopRec.stopReq.requestInfo;
sendSignalWithDelay(CMVMI_REF, GSN_START_ORD, signal, 500,
StartOrd::SignalLength);
} else {
jam();
sendSignalWithDelay(CMVMI_REF, GSN_STOP_ORD, signal, 500, 1);
}
return;
}
unblock:
WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0];
req->senderRef = reference();
req->senderData = StopRecord::SR_UNBLOCK_GCP_START_GCP;
req->requestType = WaitGCPReq::UnblockStartGcp;
sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal,
WaitGCPReq::SignalLength, JBB);
}
void
Ndbcntr::execSTOP_CONF(Signal* signal)
{
jamEntry();
StopConf *conf = (StopConf*)signal->getDataPtr();
ndbrequire(c_stopRec.m_state == StopRecord::SR_QMGR_STOP_REQ);
c_stopRec.m_stop_req_counter.clearWaitingFor(conf->nodeId);
if (c_stopRec.m_stop_req_counter.done())
{
char buf[100];
NdbNodeBitmask mask;
mask.assign(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes);
infoEvent("Stopping of %s", mask.getText(buf));
ndbout_c("Stopping of %s", mask.getText(buf));
/**
* Kill any node...
*/
FailRep * const failRep = (FailRep *)&signal->theData[0];
failRep->failCause = FailRep::ZMULTI_NODE_SHUTDOWN;
NodeReceiverGroup rg(QMGR, c_clusterNodes);
Uint32 nodeId = 0;
while ((nodeId = NdbNodeBitmask::find(c_stopRec.stopReq.nodes, nodeId+1))
!= NdbNodeBitmask::NotFound)
{
failRep->failNodeId = nodeId;
sendSignal(rg, GSN_FAIL_REP, signal, FailRep::SignalLength, JBA);
}
c_stopRec.m_state = StopRecord::SR_WAIT_NODE_FAILURES;
return;
}
return;
}
void Ndbcntr::execSTTORRY(Signal* signal){

View file

@ -29,6 +29,7 @@
#include <signaldata/CmRegSignalData.hpp>
#include <signaldata/ApiRegSignalData.hpp>
#include <signaldata/FailRep.hpp>
#include <signaldata/StopReq.hpp>
#include "timer.hpp"
@ -49,6 +50,7 @@
#define ZAPI_HB_HANDLING 3
#define ZTIMER_HANDLING 4
#define ZARBIT_HANDLING 5
#define ZSTART_FAILURE_LIMIT 6
/* Error Codes ------------------------------*/
#define ZERRTOOMANY 1101
@ -100,18 +102,42 @@ public:
};
struct StartRecord {
void reset(){ m_startKey++; m_startNode = 0;}
void reset(){
m_startKey++;
m_startNode = 0;
m_gsn = RNIL;
m_nodes.clearWaitingFor();
}
Uint32 m_startKey;
Uint32 m_startNode;
Uint64 m_startTimeout;
Uint32 m_gsn;
SignalCounter m_nodes;
} c_start;
Uint32 m_latest_gci;
Uint32 m_start_type;
NdbNodeBitmask m_skip_nodes;
NdbNodeBitmask m_starting_nodes;
NdbNodeBitmask m_starting_nodes_w_log;
Uint16 m_president_candidate;
Uint32 m_president_candidate_gci;
Uint16 m_regReqReqSent;
Uint16 m_regReqReqRecv;
} c_start;
NdbNodeBitmask c_definedNodes; // DB nodes in config
NdbNodeBitmask c_clusterNodes; // DB nodes in cluster
NodeBitmask c_connectedNodes; // All kinds of connected nodes
/**
* Nodes which we're checking for partitioned cluster
*
* i.e. nodes that connect to use, when we already have elected president
*/
NdbNodeBitmask c_readnodes_nodes;
Uint32 c_maxDynamicId;
// Records
@ -204,6 +230,7 @@ private:
void execPRES_TOCONF(Signal* signal);
void execDISCONNECT_REP(Signal* signal);
void execSYSTEM_ERROR(Signal* signal);
void execSTOP_REQ(Signal* signal);
// Received signals
void execDUMP_STATE_ORD(Signal* signal);
@ -218,7 +245,12 @@ private:
void execREAD_NODESREQ(Signal* signal);
void execSET_VAR_REQ(Signal* signal);
void execREAD_NODESREF(Signal* signal);
void execREAD_NODESCONF(Signal* signal);
void execDIH_RESTARTREF(Signal* signal);
void execDIH_RESTARTCONF(Signal* signal);
void execAPI_VERSION_REQ(Signal* signal);
void execAPI_BROADCAST_REP(Signal* signal);
@ -234,6 +266,9 @@ private:
void execARBIT_STOPREP(Signal* signal);
// Statement blocks
void check_readnodes_reply(Signal* signal, Uint32 nodeId, Uint32 gsn);
Uint32 check_startup(Signal* signal);
void node_failed(Signal* signal, Uint16 aFailedNode);
void checkStartInterface(Signal* signal);
void failReport(Signal* signal,
@ -251,8 +286,9 @@ private:
// Generated statement blocks
void startphase1(Signal* signal);
void electionWon();
void electionWon(Signal* signal);
void cmInfoconf010Lab(Signal* signal);
void apiHbHandlingLab(Signal* signal);
void timerHandlingLab(Signal* signal);
void hbReceivedLab(Signal* signal);
@ -354,12 +390,12 @@ private:
/* Status flags ----------------------------------*/
Uint32 c_restartPartialTimeout;
Uint32 c_restartPartionedTimeout;
Uint32 c_restartFailureTimeout;
Uint64 c_start_election_time;
Uint16 creadyDistCom;
Uint16 c_regReqReqSent;
Uint16 c_regReqReqRecv;
Uint64 c_stopElectionTime;
Uint16 cpresidentCandidate;
Uint16 cdelayRegreq;
Uint16 cpresidentAlive;
Uint16 cnoFailedNodes;
@ -387,7 +423,9 @@ private:
Uint16 cfailedNodes[MAX_NDB_NODES];
Uint16 cprepFailedNodes[MAX_NDB_NODES];
Uint16 ccommitFailedNodes[MAX_NDB_NODES];
StopReq c_stopReq;
void check_multi_node_shutdown(Signal* signal);
};
#endif

View file

@ -35,9 +35,8 @@ void Qmgr::initData()
Uint32 hbDBAPI = 500;
setHbApiDelay(hbDBAPI);
c_connectedNodes.clear();
c_connectedNodes.set(getOwnNodeId());
c_stopReq.senderRef = 0;
}//Qmgr::initData()
void Qmgr::initRecords()
@ -52,6 +51,7 @@ Qmgr::Qmgr(const class Configuration & conf)
// Transit signals
addRecSignal(GSN_DUMP_STATE_ORD, &Qmgr::execDUMP_STATE_ORD);
addRecSignal(GSN_STOP_REQ, &Qmgr::execSTOP_REQ);
addRecSignal(GSN_DEBUG_SIG, &Qmgr::execDEBUG_SIG);
addRecSignal(GSN_CONTINUEB, &Qmgr::execCONTINUEB);
addRecSignal(GSN_CM_HEARTBEAT, &Qmgr::execCM_HEARTBEAT);
@ -96,6 +96,12 @@ Qmgr::Qmgr(const class Configuration & conf)
addRecSignal(GSN_ARBIT_CHOOSEREF, &Qmgr::execARBIT_CHOOSEREF);
addRecSignal(GSN_ARBIT_STOPREP, &Qmgr::execARBIT_STOPREP);
addRecSignal(GSN_READ_NODESREF, &Qmgr::execREAD_NODESREF);
addRecSignal(GSN_READ_NODESCONF, &Qmgr::execREAD_NODESCONF);
addRecSignal(GSN_DIH_RESTARTREF, &Qmgr::execDIH_RESTARTREF);
addRecSignal(GSN_DIH_RESTARTCONF, &Qmgr::execDIH_RESTARTCONF);
initData();
}//Qmgr::Qmgr()

File diff suppressed because it is too large Load diff

View file

@ -55,6 +55,12 @@ enum ndbd_options {
NDB_STD_OPTS_VARS;
// XXX should be my_bool ???
static int _daemon, _no_daemon, _foreground, _initial, _no_start;
static int _initialstart;
static const char* _nowait_nodes;
extern Uint32 g_start_type;
extern NdbNodeBitmask g_nowait_nodes;
/**
* Arguments to NDB process
*/
@ -82,6 +88,14 @@ static struct my_option my_long_options[] =
" (implies --nodaemon)",
(gptr*) &_foreground, (gptr*) &_foreground, 0,
GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 },
{ "nowait-nodes", NO_ARG,
"Nodes that will not be waited for during start",
(gptr*) &_nowait_nodes, (gptr*) &_nowait_nodes, 0,
GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0 },
{ "initial-start", NO_ARG,
"Perform initial start",
(gptr*) &_initialstart, (gptr*) &_initialstart, 0,
GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
};
static void short_usage_sub(void)
@ -150,6 +164,37 @@ Configuration::init(int argc, char** argv)
globalData.ownId= 0;
if (_nowait_nodes)
{
BaseString str(_nowait_nodes);
Vector<BaseString> arr;
str.split(arr, ",");
for (Uint32 i = 0; i<arr.size(); i++)
{
char *endptr = 0;
long val = strtol(arr[i].c_str(), &endptr, 10);
if (*endptr)
{
ndbout_c("Unable to parse nowait-nodes argument: %s : %s",
arr[i].c_str(), _nowait_nodes);
exit(-1);
}
if (! (val > 0 && val < MAX_NDB_NODES))
{
ndbout_c("Invalid nodeid specified in nowait-nodes: %d : %s",
val, _nowait_nodes);
exit(-1);
}
g_nowait_nodes.set(val);
}
}
if (_initialstart)
{
_initialStart = true;
g_start_type |= (1 << NodeState::ST_INITIAL_START);
}
return true;
}

View file

@ -22,7 +22,7 @@
#include <NdbRestarts.hpp>
#include <Vector.hpp>
#include <signaldata/DumpStateOrd.hpp>
#include <Bitmask.hpp>
int runLoadTable(NDBT_Context* ctx, NDBT_Step* step){
@ -669,6 +669,206 @@ err:
return NDBT_FAILED;
}
int
runBug18612(NDBT_Context* ctx, NDBT_Step* step){
// Assume two replicas
NdbRestarter restarter;
if (restarter.getNumDbNodes() < 2)
{
ctx->stopTest();
return NDBT_OK;
}
Uint32 cnt = restarter.getNumDbNodes();
for(int loop = 0; loop < ctx->getNumLoops(); loop++)
{
int partition0[256];
int partition1[256];
bzero(partition0, sizeof(partition0));
bzero(partition1, sizeof(partition1));
Bitmask<4> nodesmask;
Uint32 node1 = restarter.getDbNodeId(rand()%cnt);
for (Uint32 i = 0; i<cnt/2; i++)
{
do {
int tmp = restarter.getRandomNodeOtherNodeGroup(node1, rand());
if (tmp == -1)
break;
node1 = tmp;
} while(nodesmask.get(node1));
partition0[i] = node1;
partition1[i] = restarter.getRandomNodeSameNodeGroup(node1, rand());
ndbout_c("nodes %d %d", node1, partition1[i]);
assert(!nodesmask.get(node1));
assert(!nodesmask.get(partition1[i]));
nodesmask.set(node1);
nodesmask.set(partition1[i]);
}
ndbout_c("done");
int dump[255];
dump[0] = DumpStateOrd::NdbcntrStopNodes;
memcpy(dump + 1, partition0, sizeof(int)*cnt/2);
Uint32 master = restarter.getMasterNodeId();
if (restarter.dumpStateOneNode(master, dump, 1+cnt/2))
return NDBT_FAILED;
if (restarter.waitNodesNoStart(partition0, cnt/2))
return NDBT_FAILED;
int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
if (restarter.dumpStateAllNodes(val2, 2))
return NDBT_FAILED;
if (restarter.insertErrorInAllNodes(932))
return NDBT_FAILED;
dump[0] = 9000;
memcpy(dump + 1, partition0, sizeof(int)*cnt/2);
for (Uint32 i = 0; i<cnt/2; i++)
if (restarter.dumpStateOneNode(partition1[i], dump, 1+cnt/2))
return NDBT_FAILED;
dump[0] = 9000;
memcpy(dump + 1, partition1, sizeof(int)*cnt/2);
for (Uint32 i = 0; i<cnt/2; i++)
if (restarter.dumpStateOneNode(partition0[i], dump, 1+cnt/2))
return NDBT_FAILED;
if (restarter.startNodes(partition0, cnt/2))
return NDBT_FAILED;
if (restarter.waitNodesStartPhase(partition0, cnt/2, 2))
return NDBT_FAILED;
dump[0] = 9001;
for (Uint32 i = 0; i<cnt/2; i++)
if (restarter.dumpStateAllNodes(dump, 2))
return NDBT_FAILED;
if (restarter.waitNodesNoStart(partition0, cnt/2))
return NDBT_FAILED;
for (Uint32 i = 0; i<cnt/2; i++)
if (restarter.restartOneDbNode(partition0[i], true, true, true))
return NDBT_FAILED;
if (restarter.waitNodesNoStart(partition0, cnt/2))
return NDBT_FAILED;
if (restarter.startAll())
return NDBT_FAILED;
if (restarter.waitClusterStarted())
return NDBT_FAILED;
}
return NDBT_OK;
}
int
runBug18612SR(NDBT_Context* ctx, NDBT_Step* step){
// Assume two replicas
NdbRestarter restarter;
if (restarter.getNumDbNodes() < 2)
{
ctx->stopTest();
return NDBT_OK;
}
Uint32 cnt = restarter.getNumDbNodes();
for(int loop = 0; loop < ctx->getNumLoops(); loop++)
{
int partition0[256];
int partition1[256];
bzero(partition0, sizeof(partition0));
bzero(partition1, sizeof(partition1));
Bitmask<4> nodesmask;
Uint32 node1 = restarter.getDbNodeId(rand()%cnt);
for (Uint32 i = 0; i<cnt/2; i++)
{
do {
int tmp = restarter.getRandomNodeOtherNodeGroup(node1, rand());
if (tmp == -1)
break;
node1 = tmp;
} while(nodesmask.get(node1));
partition0[i] = node1;
partition1[i] = restarter.getRandomNodeSameNodeGroup(node1, rand());
ndbout_c("nodes %d %d", node1, partition1[i]);
assert(!nodesmask.get(node1));
assert(!nodesmask.get(partition1[i]));
nodesmask.set(node1);
nodesmask.set(partition1[i]);
}
ndbout_c("done");
if (restarter.restartAll(false, true, false))
return NDBT_FAILED;
int dump[255];
dump[0] = 9000;
memcpy(dump + 1, partition0, sizeof(int)*cnt/2);
for (Uint32 i = 0; i<cnt/2; i++)
if (restarter.dumpStateOneNode(partition1[i], dump, 1+cnt/2))
return NDBT_FAILED;
dump[0] = 9000;
memcpy(dump + 1, partition1, sizeof(int)*cnt/2);
for (Uint32 i = 0; i<cnt/2; i++)
if (restarter.dumpStateOneNode(partition0[i], dump, 1+cnt/2))
return NDBT_FAILED;
int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
if (restarter.dumpStateAllNodes(val2, 2))
return NDBT_FAILED;
if (restarter.insertErrorInAllNodes(932))
return NDBT_FAILED;
if (restarter.startAll())
return NDBT_FAILED;
if (restarter.waitClusterStartPhase(2))
return NDBT_FAILED;
dump[0] = 9001;
for (Uint32 i = 0; i<cnt/2; i++)
if (restarter.dumpStateAllNodes(dump, 2))
return NDBT_FAILED;
if (restarter.waitClusterNoStart(30))
if (restarter.waitNodesNoStart(partition0, cnt/2, 10))
if (restarter.waitNodesNoStart(partition1, cnt/2, 10))
return NDBT_FAILED;
if (restarter.startAll())
return NDBT_FAILED;
if (restarter.waitClusterStarted())
return NDBT_FAILED;
}
return NDBT_OK;
}
NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad",
"Test that one node at a time can be stopped and then restarted "\
@ -963,6 +1163,18 @@ TESTCASE("Bug18414",
STEP(runBug18414);
FINALIZER(runClearTable);
}
TESTCASE("Bug18612",
"Test bug with partitioned clusters"){
INITIALIZER(runLoadTable);
STEP(runBug18612);
FINALIZER(runClearTable);
}
TESTCASE("Bug18612SR",
"Test bug with partitioned clusters"){
INITIALIZER(runLoadTable);
STEP(runBug18612SR);
FINALIZER(runClearTable);
}
NDBT_TESTSUITE_END(testNodeRestart);
int main(int argc, const char** argv){

View file

@ -433,10 +433,18 @@ args: -n Bug16772 T1
#cmd: testSystemRestart
#args: -n Bug18385 T1
#
max-time: 500
max-time: 1000
cmd: testNodeRestart
args: -n Bug18414 T1
max-time: 1000
cmd: testNodeRestart
args: -n Bug18612 T1
max-time: 1000
cmd: testNodeRestart
args: -n Bug18612SR T1
# OLD FLEX
max-time: 500
cmd: flexBench

View file

@ -445,8 +445,7 @@ int twoNodeFailure(NdbRestarter& _restarter,
<< ") secs " << endl;
NdbSleep_SecSleep(seconds);
randomId = (rand() % _restarter.getNumDbNodes());
nodeId = _restarter.getDbNodeId(randomId);
nodeId = _restarter.getRandomNodeOtherNodeGroup(nodeId, rand());
g_info << _restart->m_name << ": node = "<< nodeId << endl;
CHECK(_restarter.insertErrorInNode(nodeId, 9999) == 0,