ndb: new arbitrator behaviour for >=3-way: < 1/2 nodes can survive

ndb/include/kernel/signaldata/ArbitSignalData.hpp:
  new arbitrator behaviour for >=3-way: < 1/2 nodes can survive
ndb/src/common/debugger/EventLogger.cpp:
  new arbitrator behaviour for >=3-way: < 1/2 nodes can survive
ndb/src/kernel/blocks/qmgr/QmgrMain.cpp:
  new arbitrator behaviour for >=3-way: < 1/2 nodes can survive
This commit is contained in:
unknown 2004-11-18 16:44:50 +01:00
parent 55ddbd46bd
commit 50ef2cc1a5
3 changed files with 33 additions and 16 deletions

View file

@ -94,13 +94,14 @@ public:
// arbitration result
LoseNodes = 41, // lose on ndb node count
WinGroups = 42, // we win, no need for arbitration
LoseGroups = 43, // we lose, missing node group
Partitioning = 44, // possible network partitioning
WinChoose = 45, // positive reply
LoseChoose = 46, // negative reply
LoseNorun = 47, // arbitrator required but not running
LoseNocfg = 48, // arbitrator required but none configured
WinNodes = 42, // win on ndb node count
WinGroups = 43, // we win, no need for arbitration
LoseGroups = 44, // we lose, missing node group
Partitioning = 45, // possible network partitioning
WinChoose = 46, // positive reply
LoseChoose = 47, // negative reply
LoseNorun = 48, // arbitrator required but not running
LoseNocfg = 49, // arbitrator required but none configured
// general error codes
ErrTicket = 91, // invalid arbitrator-ticket

View file

@ -421,6 +421,11 @@ EventLogger::getText(char * m_text, size_t m_text_len,
"%sArbitration check lost - less than 1/2 nodes left",
theNodeId);
break;
case ArbitCode::WinNodes:
BaseString::snprintf(m_text, m_text_len,
"%sArbitration check won - all node groups and more than 1/2 nodes left",
theNodeId);
break;
case ArbitCode::WinGroups:
BaseString::snprintf(m_text, m_text_len,
"%sArbitration check won - node group majority",

View file

@ -2946,6 +2946,12 @@ void Qmgr::sendPrepFailReq(Signal* signal, Uint16 aNode)
* the "handle" routines.
*/
/**
* Should < 1/2 nodes die unconditionally. Affects only >= 3-way
* replication.
*/
static const bool g_ndb_arbit_one_half_rule = false;
/**
* Config signals are logically part of CM_INIT.
*/
@ -3157,7 +3163,8 @@ Qmgr::handleArbitCheck(Signal* signal)
ndbrequire(cpresident == getOwnNodeId());
NodeBitmask ndbMask;
computeArbitNdbMask(ndbMask);
if (2 * ndbMask.count() < cnoOfNodes) {
if (g_ndb_arbit_one_half_rule &&
2 * ndbMask.count() < cnoOfNodes) {
jam();
arbitRec.code = ArbitCode::LoseNodes;
} else {
@ -3181,6 +3188,11 @@ Qmgr::handleArbitCheck(Signal* signal)
case CheckNodeGroups::Partitioning:
jam();
arbitRec.code = ArbitCode::Partitioning;
if (g_ndb_arbit_one_half_rule &&
2 * ndbMask.count() > cnoOfNodes) {
jam();
arbitRec.code = ArbitCode::WinNodes;
}
break;
default:
ndbrequire(false);
@ -3190,8 +3202,12 @@ Qmgr::handleArbitCheck(Signal* signal)
switch (arbitRec.code) {
case ArbitCode::LoseNodes:
jam();
case ArbitCode::LoseGroups:
jam();
goto crashme;
case ArbitCode::WinGroups:
case ArbitCode::WinNodes:
jam();
case ArbitCode::WinGroups:
jam();
if (arbitRec.state == ARBIT_RUN) {
jam();
@ -3200,9 +3216,6 @@ Qmgr::handleArbitCheck(Signal* signal)
arbitRec.state = ARBIT_INIT;
arbitRec.newstate = true;
break;
case ArbitCode::LoseGroups:
jam();
goto crashme;
case ArbitCode::Partitioning:
if (arbitRec.state == ARBIT_RUN) {
jam();
@ -3762,8 +3775,7 @@ Qmgr::execARBIT_CHOOSEREF(Signal* signal)
}
/**
* Handle CRASH state. We must crash immediately. But it
* would be nice to wait until event reports have been sent.
* Handle CRASH state. We must crash immediately.
* XXX tell other nodes in our party to crash too.
*/
void
@ -3773,12 +3785,11 @@ Qmgr::stateArbitCrash(Signal* signal)
if (arbitRec.newstate) {
jam();
CRASH_INSERTION((Uint32)910 + arbitRec.state);
arbitRec.setTimestamp();
arbitRec.code = 0;
arbitRec.newstate = false;
}
#if 0
#ifdef ndb_arbit_crash_wait_for_event_report_to_get_out
if (! (arbitRec.getTimediff() > getArbitTimeout()))
return;
#endif