mirror of
https://github.com/MariaDB/server.git
synced 2026-05-06 23:25:34 +02:00
bug#15632 - ndb
Fix race between INCL_NODEREQ(prio b) and GCP_PREPARE(prio a) by also waiting for starting nodes
ndb/include/ndb_version.h.in:
Handle upgrade of bug fix
ndb/src/kernel/blocks/ERROR_codes.txt:
New error code for delaying INCL_NODE_REQ
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
Fix race between INCL_NODEREQ(prio b) and GCP_PREPARE(prio a)
by also waiting for starting nodes
ndb/test/ndbapi/testNodeRestart.cpp:
Add testcase for bug#15632
This commit is contained in:
parent
fc4c198ef0
commit
5b10b7bc53
4 changed files with 100 additions and 3 deletions
|
|
@ -57,5 +57,8 @@ char ndb_version_string_buf[NDB_VERSION_STRING_BUF_SZ];
|
|||
*/
|
||||
/*#define NDB_VERSION_ID 0*/
|
||||
|
||||
#define NDBD_INCL_NODECONF_VERSION_4 MAKE_VERSION(4,1,17)
|
||||
#define NDBD_INCL_NODECONF_VERSION_5 MAKE_VERSION(5,0,18)
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -61,6 +61,8 @@ Insert system error in GCP participant when receiving GCP_SAVEREQ.
|
|||
5007:
|
||||
Delay GCP_SAVEREQ by 10 secs
|
||||
|
||||
7165: Delay INCL_NODE_REQ in starting node yeilding error in GCP_PREPARE
|
||||
|
||||
ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING:
|
||||
-----------------------------------------------------------------
|
||||
|
||||
|
|
|
|||
|
|
@ -215,7 +215,7 @@ void Dbdih::sendINCL_NODEREQ(Signal* signal, Uint32 nodeId)
|
|||
signal->theData[2] = c_nodeStartMaster.failNr;
|
||||
signal->theData[3] = 0;
|
||||
signal->theData[4] = currentgcp;
|
||||
sendSignal(nodeDihRef, GSN_INCL_NODEREQ, signal, 5, JBB);
|
||||
sendSignal(nodeDihRef, GSN_INCL_NODEREQ, signal, 5, JBA);
|
||||
}//Dbdih::sendINCL_NODEREQ()
|
||||
|
||||
void Dbdih::sendMASTER_GCPREQ(Signal* signal, Uint32 nodeId)
|
||||
|
|
@ -1857,6 +1857,14 @@ void Dbdih::gcpBlockedLab(Signal* signal)
|
|||
// global checkpoint id and the correct state. We do not wait for any reply
|
||||
// since the starting node will not send any.
|
||||
/*-------------------------------------------------------------------------*/
|
||||
Uint32 startVersion = getNodeInfo(c_nodeStartMaster.startNode).m_version;
|
||||
|
||||
if ((getMajor(startVersion) == 4 && startVersion >= NDBD_INCL_NODECONF_VERSION_4) ||
|
||||
(getMajor(startVersion) == 5 && startVersion >= NDBD_INCL_NODECONF_VERSION_5))
|
||||
{
|
||||
c_INCL_NODEREQ_Counter.setWaitingFor(c_nodeStartMaster.startNode);
|
||||
}
|
||||
|
||||
sendINCL_NODEREQ(signal, c_nodeStartMaster.startNode);
|
||||
}//Dbdih::gcpBlockedLab()
|
||||
|
||||
|
|
@ -2059,6 +2067,13 @@ void Dbdih::execINCL_NODEREQ(Signal* signal)
|
|||
jamEntry();
|
||||
Uint32 retRef = signal->theData[0];
|
||||
Uint32 nodeId = signal->theData[1];
|
||||
if (nodeId == getOwnNodeId() && ERROR_INSERTED(7165))
|
||||
{
|
||||
CLEAR_ERROR_INSERT_VALUE;
|
||||
sendSignalWithDelay(reference(), GSN_INCL_NODEREQ, signal, 5000, signal->getLength());
|
||||
return;
|
||||
}
|
||||
|
||||
Uint32 tnodeStartFailNr = signal->theData[2];
|
||||
currentgcp = signal->theData[4];
|
||||
CRASH_INSERTION(7127);
|
||||
|
|
@ -2086,6 +2101,15 @@ void Dbdih::execINCL_NODEREQ(Signal* signal)
|
|||
// id's and the lcp status.
|
||||
/*-----------------------------------------------------------------------*/
|
||||
CRASH_INSERTION(7171);
|
||||
Uint32 masterVersion = getNodeInfo(refToNode(cmasterdihref)).m_version;
|
||||
|
||||
if ((NDB_VERSION_MAJOR == 4 && masterVersion >= NDBD_INCL_NODECONF_VERSION_4) ||
|
||||
(NDB_VERSION_MAJOR == 5 && masterVersion >= NDBD_INCL_NODECONF_VERSION_5))
|
||||
{
|
||||
signal->theData[0] = getOwnNodeId();
|
||||
signal->theData[1] = getOwnNodeId();
|
||||
sendSignal(cmasterdihref, GSN_INCL_NODECONF, signal, 2, JBB);
|
||||
}
|
||||
return;
|
||||
}//if
|
||||
if (getNodeStatus(nodeId) != NodeRecord::STARTING) {
|
||||
|
|
@ -3737,8 +3761,16 @@ void Dbdih::execNODE_FAILREP(Signal* signal)
|
|||
/*------------------------------------------------------------------------*/
|
||||
// Verify that a starting node has also crashed. Reset the node start record.
|
||||
/*-------------------------------------------------------------------------*/
|
||||
if (c_nodeStartMaster.startNode != RNIL) {
|
||||
ndbrequire(getNodeStatus(c_nodeStartMaster.startNode)!= NodeRecord::ALIVE);
|
||||
if (false && c_nodeStartMaster.startNode != RNIL && getNodeStatus(c_nodeStartMaster.startNode) == NodeRecord::ALIVE)
|
||||
{
|
||||
BlockReference cntrRef = calcNdbCntrBlockRef(c_nodeStartMaster.startNode);
|
||||
SystemError * const sysErr = (SystemError*)&signal->theData[0];
|
||||
sysErr->errorCode = SystemError::StartInProgressError;
|
||||
sysErr->errorRef = reference();
|
||||
sysErr->data1= 0;
|
||||
sysErr->data2= __LINE__;
|
||||
sendSignal(cntrRef, GSN_SYSTEM_ERROR, signal, SystemError::SignalLength, JBA);
|
||||
nodeResetStart();
|
||||
}//if
|
||||
|
||||
/*--------------------------------------------------*/
|
||||
|
|
|
|||
|
|
@ -446,6 +446,56 @@ int runBug15587(NDBT_Context* ctx, NDBT_Step* step){
|
|||
return NDBT_OK;
|
||||
}
|
||||
|
||||
int runBug15632(NDBT_Context* ctx, NDBT_Step* step){
|
||||
int result = NDBT_OK;
|
||||
int loops = ctx->getNumLoops();
|
||||
int records = ctx->getNumRecords();
|
||||
NdbRestarter restarter;
|
||||
|
||||
int nodeId = restarter.getDbNodeId(1);
|
||||
|
||||
ndbout << "Restart node " << nodeId << endl;
|
||||
|
||||
if (restarter.restartOneDbNode(nodeId,
|
||||
/** initial */ false,
|
||||
/** nostart */ true,
|
||||
/** abort */ true))
|
||||
return NDBT_FAILED;
|
||||
|
||||
if (restarter.waitNodesNoStart(&nodeId, 1))
|
||||
return NDBT_FAILED;
|
||||
|
||||
if (restarter.insertErrorInNode(nodeId, 7165))
|
||||
return NDBT_FAILED;
|
||||
|
||||
if (restarter.startNodes(&nodeId, 1))
|
||||
return NDBT_FAILED;
|
||||
|
||||
if (restarter.waitNodesStarted(&nodeId, 1))
|
||||
return NDBT_FAILED;
|
||||
|
||||
if (restarter.restartOneDbNode(nodeId,
|
||||
/** initial */ false,
|
||||
/** nostart */ true,
|
||||
/** abort */ true))
|
||||
return NDBT_FAILED;
|
||||
|
||||
if (restarter.waitNodesNoStart(&nodeId, 1))
|
||||
return NDBT_FAILED;
|
||||
|
||||
if (restarter.insertErrorInNode(nodeId, 7171))
|
||||
return NDBT_FAILED;
|
||||
|
||||
if (restarter.startNodes(&nodeId, 1))
|
||||
return NDBT_FAILED;
|
||||
|
||||
if (restarter.waitNodesStarted(&nodeId, 1))
|
||||
return NDBT_FAILED;
|
||||
|
||||
ctx->stopTest();
|
||||
return NDBT_OK;
|
||||
}
|
||||
|
||||
|
||||
NDBT_TESTSUITE(testNodeRestart);
|
||||
TESTCASE("NoLoad",
|
||||
|
|
@ -596,6 +646,8 @@ TESTCASE("RestartNFDuringNR",
|
|||
INITIALIZER(runCheckAllNodesStarted);
|
||||
INITIALIZER(runLoadTable);
|
||||
STEP(runRestarts);
|
||||
STEP(runPkUpdateUntilStopped);
|
||||
STEP(runScanUpdateUntilStopped);
|
||||
FINALIZER(runScanReadVerify);
|
||||
FINALIZER(runClearTable);
|
||||
}
|
||||
|
|
@ -685,6 +737,8 @@ TESTCASE("RestartNodeDuringLCP",
|
|||
INITIALIZER(runCheckAllNodesStarted);
|
||||
INITIALIZER(runLoadTable);
|
||||
STEP(runRestarts);
|
||||
STEP(runPkUpdateUntilStopped);
|
||||
STEP(runScanUpdateUntilStopped);
|
||||
FINALIZER(runScanReadVerify);
|
||||
FINALIZER(runClearTable);
|
||||
}
|
||||
|
|
@ -716,6 +770,12 @@ TESTCASE("Bug15587",
|
|||
STEP(runBug15587);
|
||||
FINALIZER(runClearTable);
|
||||
}
|
||||
TESTCASE("Bug15632",
|
||||
"Test bug with NF during NR"){
|
||||
INITIALIZER(runLoadTable);
|
||||
STEP(runBug15632);
|
||||
FINALIZER(runClearTable);
|
||||
}
|
||||
NDBT_TESTSUITE_END(testNodeRestart);
|
||||
|
||||
int main(int argc, const char** argv){
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue