mirror of
https://github.com/MariaDB/server.git
synced 2025-01-31 11:01:52 +01:00
ndb - bug#25468
handle partially transfered LCP_FRAG_REP after node failure recommit to 51-work
This commit is contained in:
parent
a1fdeba328
commit
403efda294
4 changed files with 139 additions and 5 deletions
|
@ -637,6 +637,7 @@ private:
|
|||
void execTCGETOPSIZECONF(Signal *);
|
||||
void execTC_CLOPSIZECONF(Signal *);
|
||||
|
||||
int handle_invalid_lcp_no(const class LcpFragRep*, ReplicaRecordPtr);
|
||||
void execLCP_FRAG_REP(Signal *);
|
||||
void execLCP_COMPLETE_REP(Signal *);
|
||||
void execSTART_LCP_REQ(Signal *);
|
||||
|
|
|
@ -4046,6 +4046,11 @@ void Dbdih::execNODE_FAILREP(Signal* signal)
|
|||
Uint32 newMasterId = nodeFail->masterNodeId;
|
||||
const Uint32 noOfFailedNodes = nodeFail->noOfNodes;
|
||||
|
||||
if (ERROR_INSERTED(7179))
|
||||
{
|
||||
CLEAR_ERROR_INSERT_VALUE;
|
||||
}
|
||||
|
||||
/*-------------------------------------------------------------------------*/
|
||||
// The first step is to convert from a bit mask to an array of failed nodes.
|
||||
/*-------------------------------------------------------------------------*/
|
||||
|
@ -10256,12 +10261,42 @@ void Dbdih::execLCP_FRAG_REP(Signal* signal)
|
|||
Uint32 fragId = lcpReport->fragId;
|
||||
|
||||
jamEntry();
|
||||
|
||||
if (ERROR_INSERTED(7178) && nodeId != getOwnNodeId())
|
||||
{
|
||||
jam();
|
||||
Uint32 owng =Sysfile::getNodeGroup(getOwnNodeId(), SYSFILE->nodeGroups);
|
||||
Uint32 nodeg = Sysfile::getNodeGroup(nodeId, SYSFILE->nodeGroups);
|
||||
if (owng == nodeg)
|
||||
{
|
||||
jam();
|
||||
ndbout_c("throwing away LCP_FRAG_REP from (and killing) %d", nodeId);
|
||||
SET_ERROR_INSERT_VALUE(7179);
|
||||
signal->theData[0] = 9999;
|
||||
sendSignal(numberToRef(CMVMI, nodeId),
|
||||
GSN_NDB_TAMPER, signal, 1, JBA);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (ERROR_INSERTED(7179) && nodeId != getOwnNodeId())
|
||||
{
|
||||
jam();
|
||||
Uint32 owng =Sysfile::getNodeGroup(getOwnNodeId(), SYSFILE->nodeGroups);
|
||||
Uint32 nodeg = Sysfile::getNodeGroup(nodeId, SYSFILE->nodeGroups);
|
||||
if (owng == nodeg)
|
||||
{
|
||||
jam();
|
||||
ndbout_c("throwing away LCP_FRAG_REP from %d", nodeId);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
CRASH_INSERTION2(7025, isMaster());
|
||||
CRASH_INSERTION2(7016, !isMaster());
|
||||
|
||||
|
||||
bool fromTimeQueue = (signal->senderBlockRef() == reference());
|
||||
|
||||
|
||||
TabRecordPtr tabPtr;
|
||||
tabPtr.i = tableId;
|
||||
ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
|
||||
|
@ -10463,6 +10498,37 @@ void Dbdih::findReplica(ReplicaRecordPtr& replicaPtr,
|
|||
ndbrequire(false);
|
||||
}//Dbdih::findReplica()
|
||||
|
||||
|
||||
int
|
||||
Dbdih::handle_invalid_lcp_no(const LcpFragRep* rep,
|
||||
ReplicaRecordPtr replicaPtr)
|
||||
{
|
||||
ndbrequire(!isMaster());
|
||||
Uint32 lcpNo = rep->lcpNo;
|
||||
Uint32 lcpId = rep->lcpId;
|
||||
Uint32 replicaLcpNo = replicaPtr.p->nextLcp;
|
||||
Uint32 prevReplicaLcpNo = prevLcpNo(replicaLcpNo);
|
||||
|
||||
warningEvent("Detected previous node failure of %d during lcp",
|
||||
rep->nodeId);
|
||||
replicaPtr.p->nextLcp = lcpNo;
|
||||
replicaPtr.p->lcpId[lcpNo] = 0;
|
||||
replicaPtr.p->lcpStatus[lcpNo] = ZINVALID;
|
||||
|
||||
for (Uint32 i = lcpNo; i != lcpNo; i = nextLcpNo(i))
|
||||
{
|
||||
jam();
|
||||
if (replicaPtr.p->lcpStatus[i] == ZVALID &&
|
||||
replicaPtr.p->lcpId[i] >= lcpId)
|
||||
{
|
||||
ndbout_c("i: %d lcpId: %d", i, replicaPtr.p->lcpId[i]);
|
||||
ndbrequire(false);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if table is all fragment replicas have been checkpointed
|
||||
* to disk (in all LQHs)
|
||||
|
@ -10491,9 +10557,12 @@ Dbdih::reportLcpCompletion(const LcpFragRep* lcpReport)
|
|||
|
||||
ndbrequire(replicaPtr.p->lcpOngoingFlag == true);
|
||||
if(lcpNo != replicaPtr.p->nextLcp){
|
||||
ndbout_c("lcpNo = %d replicaPtr.p->nextLcp = %d",
|
||||
lcpNo, replicaPtr.p->nextLcp);
|
||||
ndbrequire(false);
|
||||
if (handle_invalid_lcp_no(lcpReport, replicaPtr))
|
||||
{
|
||||
ndbout_c("lcpNo = %d replicaPtr.p->nextLcp = %d",
|
||||
lcpNo, replicaPtr.p->nextLcp);
|
||||
ndbrequire(false);
|
||||
}
|
||||
}
|
||||
ndbrequire(lcpNo == replicaPtr.p->nextLcp);
|
||||
ndbrequire(lcpNo < MAX_LCP_STORED);
|
||||
|
|
|
@ -1073,6 +1073,63 @@ int runBug25364(NDBT_Context* ctx, NDBT_Step* step){
|
|||
return NDBT_OK;
|
||||
}
|
||||
|
||||
int runBug25468(NDBT_Context* ctx, NDBT_Step* step){
|
||||
|
||||
int result = NDBT_OK;
|
||||
int loops = ctx->getNumLoops();
|
||||
int records = ctx->getNumRecords();
|
||||
NdbRestarter restarter;
|
||||
|
||||
for (int i = 0; i<loops; i++)
|
||||
{
|
||||
int master = restarter.getMasterNodeId();
|
||||
int node1, node2;
|
||||
switch(i % 5){
|
||||
case 0:
|
||||
node1 = master;
|
||||
node2 = restarter.getRandomNodeSameNodeGroup(master, rand());
|
||||
break;
|
||||
case 1:
|
||||
node1 = restarter.getRandomNodeSameNodeGroup(master, rand());
|
||||
node2 = master;
|
||||
break;
|
||||
case 2:
|
||||
case 3:
|
||||
case 4:
|
||||
node1 = restarter.getRandomNodeOtherNodeGroup(master, rand());
|
||||
if (node1 == -1)
|
||||
node1 = master;
|
||||
node2 = restarter.getRandomNodeSameNodeGroup(node1, rand());
|
||||
break;
|
||||
}
|
||||
|
||||
ndbout_c("node1: %d node2: %d master: %d", node1, node2, master);
|
||||
|
||||
int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
|
||||
|
||||
if (restarter.dumpStateOneNode(node2, val2, 2))
|
||||
return NDBT_FAILED;
|
||||
|
||||
if (restarter.insertErrorInNode(node1, 7178))
|
||||
return NDBT_FAILED;
|
||||
|
||||
int val1 = 7099;
|
||||
if (restarter.dumpStateOneNode(master, &val1, 1))
|
||||
return NDBT_FAILED;
|
||||
|
||||
if (restarter.waitNodesNoStart(&node2, 1))
|
||||
return NDBT_FAILED;
|
||||
|
||||
if (restarter.startAll())
|
||||
return NDBT_FAILED;
|
||||
|
||||
if (restarter.waitClusterStarted())
|
||||
return NDBT_FAILED;
|
||||
}
|
||||
|
||||
return NDBT_OK;
|
||||
}
|
||||
|
||||
|
||||
NDBT_TESTSUITE(testNodeRestart);
|
||||
TESTCASE("NoLoad",
|
||||
|
@ -1403,6 +1460,9 @@ TESTCASE("Bug24717", ""){
|
|||
TESTCASE("Bug25364", ""){
|
||||
INITIALIZER(runBug25364);
|
||||
}
|
||||
TESTCASE("Bug25468", ""){
|
||||
INITIALIZER(runBug25468);
|
||||
}
|
||||
NDBT_TESTSUITE_END(testNodeRestart);
|
||||
|
||||
int main(int argc, const char** argv){
|
||||
|
|
|
@ -768,6 +768,10 @@ max-time: 1500
|
|||
cmd: testSystemRestart
|
||||
args: -n Bug24664
|
||||
|
||||
max-time: 1000
|
||||
cmd: testNodeRestart
|
||||
args: -n Bug25468 T1
|
||||
|
||||
# OLD FLEX
|
||||
max-time: 500
|
||||
cmd: flexBench
|
||||
|
|
Loading…
Add table
Reference in a new issue