ndb - bug#15685

Error in abort handling in TC when timeout during abort
  


ndb/src/kernel/blocks/ERROR_codes.txt:
  New error codes
ndb/src/kernel/blocks/dblqh/DblqhMain.cpp:
  New error codes
ndb/src/kernel/blocks/dbtc/DbtcMain.cpp:
  Dont release transaction record to early
ndb/test/ndbapi/testNodeRestart.cpp:
  Test case
ndb/test/run-test/daily-basic-tests.txt:
  Test case
This commit is contained in:
unknown 2005-12-12 17:19:04 +01:00
parent 5b10b7bc53
commit fd80fa2d4c
5 changed files with 67 additions and 3 deletions

View file

@ -165,6 +165,7 @@ handling in DBTC to ensure that node failures are also well handled in
time-out handling. They can also be used to test multiple node failure
handling.
ERROR CODES FOR TESTING TIME-OUT HANDLING IN DBLQH
-------------------------------------------------
5011:
@ -201,6 +202,9 @@ Delay execution of ABORTREQ signal 2 seconds to generate time-out.
8048: Make TC not choose own node for simple/dirty read
5041: Crash is receiving simple read from other TC on different node
5100,5101: Drop ABORT req in primary replica
Crash on "next" ABORT
ERROR CODES FOR TESTING TIME-OUT HANDLING IN DBTC
-------------------------------------------------
8040:

View file

@ -5870,12 +5870,21 @@ void Dblqh::execABORT(Signal* signal)
warningReport(signal, 8);
return;
}//if
TcConnectionrec * const regTcPtr = tcConnectptr.p;
if (ERROR_INSERTED(5100))
{
SET_ERROR_INSERT_VALUE(5101);
return;
}
CRASH_INSERTION2(5101, regTcPtr->nextReplica != ZNIL);
/* ------------------------------------------------------------------------- */
/*A GUIDING DESIGN PRINCIPLE IN HANDLING THESE ERROR SITUATIONS HAVE BEEN */
/*KEEP IT SIMPLE. THUS WE RATHER INSERT A WAIT AND SET THE ABORT_STATE TO */
/*ACTIVE RATHER THAN WRITE NEW CODE TO HANDLE EVERY SPECIAL SITUATION. */
/* ------------------------------------------------------------------------- */
TcConnectionrec * const regTcPtr = tcConnectptr.p;
if (regTcPtr->nextReplica != ZNIL) {
/* ------------------------------------------------------------------------- */
// We will immediately send the ABORT message also to the next LQH node in line.

View file

@ -6129,7 +6129,6 @@ void Dbtc::timeOutFoundLab(Signal* signal, Uint32 TapiConPtr)
<< " - place: " << c_apiConTimer_line[apiConnectptr.i]);
switch (apiConnectptr.p->apiConnectstate) {
case CS_STARTED:
ndbrequire(c_apiConTimer_line[apiConnectptr.i] != 3615);
if(apiConnectptr.p->lqhkeyreqrec == apiConnectptr.p->lqhkeyconfrec){
jam();
/*
@ -6389,8 +6388,8 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck)
warningEvent(buf);
ndbout_c(buf);
ndbrequire(false);
releaseAbortResources(signal);
}
releaseAbortResources(signal);
return;
}//if
TloopCount++;

View file

@ -496,6 +496,45 @@ int runBug15632(NDBT_Context* ctx, NDBT_Step* step){
return NDBT_OK;
}
int runBug15685(NDBT_Context* ctx, NDBT_Step* step){
Ndb* pNdb = GETNDB(step);
HugoOperations hugoOps(*ctx->getTab());
NdbRestarter restarter;
HugoTransactions hugoTrans(*ctx->getTab());
if (hugoTrans.loadTable(GETNDB(step), 10) != 0){
return NDBT_FAILED;
}
if(hugoOps.startTransaction(pNdb) != 0)
goto err;
if(hugoOps.pkUpdateRecord(pNdb, 0, 1, rand()) != 0)
goto err;
if(hugoOps.execute_NoCommit(pNdb) != 0)
goto err;
if (restarter.insertErrorInAllNodes(5100))
return NDBT_FAILED;
hugoOps.execute_Rollback(pNdb);
if (restarter.waitClusterStarted() != 0)
goto err;
if (restarter.insertErrorInAllNodes(0))
return NDBT_FAILED;
ctx->stopTest();
return NDBT_OK;
err:
ctx->stopTest();
return NDBT_FAILED;
}
NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad",
@ -776,6 +815,11 @@ TESTCASE("Bug15632",
STEP(runBug15632);
FINALIZER(runClearTable);
}
TESTCASE("Bug15685",
"Test bug with NF during abort"){
STEP(runBug15685);
FINALIZER(runClearTable);
}
NDBT_TESTSUITE_END(testNodeRestart);
int main(int argc, const char** argv){

View file

@ -438,6 +438,14 @@ max-time: 500
cmd: testNodeRestart
args: -n Bug15587 T1
max-time: 500
cmd: testNodeRestart
args: -n Bug15632 T1
max-time: 500
cmd: testNodeRestart
args: -n Bug15685 T1
# OLD FLEX
max-time: 500
cmd: flexBench