Merge joreland@bk-internal.mysql.com:/home/bk/mysql-4.1-wl2610

into  perch.ndb.mysql.com:/home/jonas/src/41-work
This commit is contained in:
unknown 2006-03-20 18:30:29 +01:00
commit 591aedaa2b
14 changed files with 652 additions and 223 deletions

View file

@ -44,7 +44,8 @@ private:
CHECK_WAIT_DROP_TAB_FAILED_LQH = 16,
TRIGGER_PENDING = 17,
DelayTCKEYCONF = 18
DelayTCKEYCONF = 18,
ZNF_CHECK_TRANSACTIONS = 19
};
};

View file

@ -1038,7 +1038,8 @@ private:
void prepareReplicas(FragmentstorePtr regFragptr);
void removeNodeFromStored(Uint32 nodeId,
FragmentstorePtr regFragptr,
ReplicaRecordPtr replicaPtr);
ReplicaRecordPtr replicaPtr,
bool temporary);
void removeOldStoredReplica(FragmentstorePtr regFragptr,
ReplicaRecordPtr replicaPtr);
void removeStoredReplica(FragmentstorePtr regFragptr,

View file

@ -5212,6 +5212,7 @@ void Dbdih::removeNodeFromTable(Signal* signal,
//const Uint32 lcpId = SYSFILE->latestLCP_ID;
const bool lcpOngoingFlag = (tabPtr.p->tabLcpStatus== TabRecord::TLS_ACTIVE);
const bool temporary = !tabPtr.p->storedTable;
FragmentstorePtr fragPtr;
for(Uint32 fragNo = 0; fragNo < tabPtr.p->totalfragments; fragNo++){
@ -5232,7 +5233,7 @@ void Dbdih::removeNodeFromTable(Signal* signal,
jam();
found = true;
noOfRemovedReplicas++;
removeNodeFromStored(nodeId, fragPtr, replicaPtr);
removeNodeFromStored(nodeId, fragPtr, replicaPtr, temporary);
if(replicaPtr.p->lcpOngoingFlag){
jam();
/**
@ -12051,9 +12052,18 @@ void Dbdih::removeDeadNode(NodeRecordPtr removeNodePtr)
/*---------------------------------------------------------------*/
void Dbdih::removeNodeFromStored(Uint32 nodeId,
FragmentstorePtr fragPtr,
ReplicaRecordPtr replicatePtr)
ReplicaRecordPtr replicatePtr,
bool temporary)
{
newCrashedReplica(nodeId, replicatePtr);
if (!temporary)
{
jam();
newCrashedReplica(nodeId, replicatePtr);
}
else
{
jam();
}
removeStoredReplica(fragPtr, replicatePtr);
linkOldStoredReplica(fragPtr, replicatePtr);
ndbrequire(fragPtr.p->storedReplicas != RNIL);

View file

@ -18448,6 +18448,172 @@ Dblqh::execDUMP_STATE_ORD(Signal* signal)
c_error_insert_table_id = dumpState->args[1];
SET_ERROR_INSERT_VALUE(5042);
}
TcConnectionrec *regTcConnectionrec = tcConnectionrec;
Uint32 ttcConnectrecFileSize = ctcConnectrecFileSize;
Uint32 arg = dumpState->args[0];
if(arg == 2306)
{
for(Uint32 i = 0; i<1024; i++)
{
TcConnectionrecPtr tcRec;
tcRec.i = ctransidHash[i];
while(tcRec.i != RNIL)
{
ptrCheckGuard(tcRec, ttcConnectrecFileSize, regTcConnectionrec);
ndbout << "TcConnectionrec " << tcRec.i;
signal->theData[0] = 2307;
signal->theData[1] = tcRec.i;
execDUMP_STATE_ORD(signal);
tcRec.i = tcRec.p->nextHashRec;
}
}
}
if(arg == 2307 || arg == 2308)
{
TcConnectionrecPtr tcRec;
tcRec.i = signal->theData[1];
ptrCheckGuard(tcRec, ttcConnectrecFileSize, regTcConnectionrec);
ndbout << " transactionState = " << tcRec.p->transactionState<<endl;
ndbout << " operation = " << tcRec.p->operation<<endl;
ndbout << " tcNodeFailrec = " << tcRec.p->tcNodeFailrec
<< " seqNoReplica = " << tcRec.p->seqNoReplica
<< " simpleRead = " << tcRec.p->simpleRead
<< endl;
ndbout << " replicaType = " << tcRec.p->replicaType
<< " reclenAiLqhkey = " << tcRec.p->reclenAiLqhkey
<< " opExec = " << tcRec.p->opExec
<< endl;
ndbout << " opSimple = " << tcRec.p->opSimple
<< " nextSeqNoReplica = " << tcRec.p->nextSeqNoReplica
<< " lockType = " << tcRec.p->lockType
<< endl;
ndbout << " lastReplicaNo = " << tcRec.p->lastReplicaNo
<< " indTakeOver = " << tcRec.p->indTakeOver
<< " dirtyOp = " << tcRec.p->dirtyOp
<< endl;
ndbout << " activeCreat = " << tcRec.p->activeCreat
<< " tcBlockref = " << hex << tcRec.p->tcBlockref
<< " reqBlockref = " << hex << tcRec.p->reqBlockref
<< " primKeyLen = " << tcRec.p->primKeyLen
<< endl;
ndbout << " nextReplica = " << tcRec.p->nextReplica
<< " tcBlockref = " << hex << tcRec.p->tcBlockref
<< " reqBlockref = " << hex << tcRec.p->reqBlockref
<< " primKeyLen = " << tcRec.p->primKeyLen
<< endl;
ndbout << " logStopPageNo = " << tcRec.p->logStopPageNo
<< " logStartPageNo = " << tcRec.p->logStartPageNo
<< " logStartPageIndex = " << tcRec.p->logStartPageIndex
<< endl;
ndbout << " errorCode = " << tcRec.p->errorCode
<< " clientBlockref = " << hex << tcRec.p->clientBlockref
<< " applRef = " << hex << tcRec.p->applRef
<< " totSendlenAi = " << tcRec.p->totSendlenAi
<< endl;
ndbout << " totReclenAi = " << tcRec.p->totReclenAi
<< " tcScanRec = " << tcRec.p->tcScanRec
<< " tcScanInfo = " << tcRec.p->tcScanInfo
<< " tcOprec = " << hex << tcRec.p->tcOprec
<< endl;
ndbout << " tableref = " << tcRec.p->tableref
<< " simpleTcConnect = " << tcRec.p->simpleTcConnect
<< " storedProcId = " << tcRec.p->storedProcId
<< " schemaVersion = " << tcRec.p->schemaVersion
<< endl;
ndbout << " reqinfo = " << tcRec.p->reqinfo
<< " reqRef = " << tcRec.p->reqRef
<< " readlenAi = " << tcRec.p->readlenAi
<< " prevTc = " << tcRec.p->prevTc
<< endl;
ndbout << " prevLogTcrec = " << tcRec.p->prevLogTcrec
<< " prevHashRec = " << tcRec.p->prevHashRec
<< " nodeAfterNext0 = " << tcRec.p->nodeAfterNext[0]
<< " nodeAfterNext1 = " << tcRec.p->nodeAfterNext[1]
<< endl;
ndbout << " nextTcConnectrec = " << tcRec.p->nextTcConnectrec
<< " nextTc = " << tcRec.p->nextTc
<< " nextTcLogQueue = " << tcRec.p->nextTcLogQueue
<< " nextLogTcrec = " << tcRec.p->nextLogTcrec
<< endl;
ndbout << " nextHashRec = " << tcRec.p->nextHashRec
<< " logWriteState = " << tcRec.p->logWriteState
<< " logStartFileNo = " << tcRec.p->logStartFileNo
<< " listState = " << tcRec.p->listState
<< endl;
ndbout << " lastAttrinbuf = " << tcRec.p->lastAttrinbuf
<< " lastTupkeybuf = " << tcRec.p->lastTupkeybuf
<< " hashValue = " << tcRec.p->hashValue
<< endl;
ndbout << " gci = " << tcRec.p->gci
<< " fragmentptr = " << tcRec.p->fragmentptr
<< " fragmentid = " << tcRec.p->fragmentid
<< " firstTupkeybuf = " << tcRec.p->firstTupkeybuf
<< endl;
ndbout << " firstAttrinbuf = " << tcRec.p->firstAttrinbuf
<< " currTupAiLen = " << tcRec.p->currTupAiLen
<< " currReclenAi = " << tcRec.p->currReclenAi
<< endl;
ndbout << " tcTimer = " << tcRec.p->tcTimer
<< " clientConnectrec = " << tcRec.p->clientConnectrec
<< " applOprec = " << hex << tcRec.p->applOprec
<< " abortState = " << tcRec.p->abortState
<< endl;
ndbout << " transid0 = " << hex << tcRec.p->transid[0]
<< " transid1 = " << hex << tcRec.p->transid[1]
<< " tupkeyData0 = " << tcRec.p->tupkeyData[0]
<< " tupkeyData1 = " << tcRec.p->tupkeyData[1]
<< endl;
ndbout << " tupkeyData2 = " << tcRec.p->tupkeyData[2]
<< " tupkeyData3 = " << tcRec.p->tupkeyData[3]
<< endl;
switch (tcRec.p->transactionState) {
case TcConnectionrec::SCAN_STATE_USED:
if (tcRec.p->tcScanRec < cscanrecFileSize){
ScanRecordPtr TscanPtr;
c_scanRecordPool.getPtr(TscanPtr, tcRec.p->tcScanRec);
ndbout << " scanState = " << TscanPtr.p->scanState << endl;
//TscanPtr.p->scanLocalref[2];
ndbout << " copyPtr="<<TscanPtr.p->copyPtr
<< " scanAccPtr="<<TscanPtr.p->scanAccPtr
<< " scanAiLength="<<TscanPtr.p->scanAiLength
<< endl;
ndbout << " m_curr_batch_size_rows="<<
TscanPtr.p->m_curr_batch_size_rows
<< " m_max_batch_size_rows="<<
TscanPtr.p->m_max_batch_size_rows
<< " scanErrorCounter="<<TscanPtr.p->scanErrorCounter
<< endl;
ndbout << " scanSchemaVersion="<<TscanPtr.p->scanSchemaVersion
<< " scanStoredProcId="<<TscanPtr.p->scanStoredProcId
<< " scanTcrec="<<TscanPtr.p->scanTcrec
<< endl;
ndbout << " scanType="<<TscanPtr.p->scanType
<< " scanApiBlockref="<<TscanPtr.p->scanApiBlockref
<< " scanNodeId="<<TscanPtr.p->scanNodeId
<< " scanCompletedStatus="<<TscanPtr.p->scanCompletedStatus
<< endl;
ndbout << " scanFlag="<<TscanPtr.p->scanFlag
<< " scanLockHold="<<TscanPtr.p->scanLockHold
<< " scanLockMode="<<TscanPtr.p->scanLockMode
<< " scanNumber="<<TscanPtr.p->scanNumber
<< endl;
ndbout << " scanReleaseCounter="<<TscanPtr.p->scanReleaseCounter
<< " scanTcWaiting="<<TscanPtr.p->scanTcWaiting
<< " scanKeyinfoFlag="<<TscanPtr.p->scanKeyinfoFlag
<< endl;
} else{
ndbout << "No connected scan record found" << endl;
}
break;
default:
break;
}
ndbrequire(arg != 2308);
}
}//Dblqh::execDUMP_STATE_ORD()

View file

@ -211,14 +211,6 @@ public:
LTS_ACTIVE = 1
};
enum TakeOverState {
TOS_NOT_DEFINED = 0,
TOS_IDLE = 1,
TOS_ACTIVE = 2,
TOS_COMPLETED = 3,
TOS_NODE_FAILED = 4
};
enum FailState {
FS_IDLE = 0,
FS_LISTENING = 1,
@ -636,6 +628,7 @@ public:
ConnectionState apiConnectstate;
UintR transid[2];
UintR firstTcConnect;
NdbNodeBitmask m_transaction_nodes;
//---------------------------------------------------
// Second 16 byte cache line. Hot variables.
@ -932,7 +925,6 @@ public:
struct HostRecord {
HostState hostStatus;
LqhTransState lqhTransStatus;
TakeOverState takeOverStatus;
bool inPackedList;
UintR noOfPackedWordsLqh;
UintR packedWordsLqh[26];
@ -941,6 +933,17 @@ public:
UintR noOfWordsTCINDXCONF;
UintR packedWordsTCINDXCONF[30];
BlockReference hostLqhBlockRef;
enum NodeFailBits
{
NF_TAKEOVER = 0x1,
NF_CHECK_SCAN = 0x2,
NF_CHECK_TRANSACTION = 0x4,
NF_CHECK_DROP_TAB = 0x8,
NF_NODE_FAIL_BITS = 0xF // All bits...
};
Uint32 m_nf_bits;
NdbNodeBitmask m_lqh_trans_conf;
}; /* p2c: size = 128 bytes */
typedef Ptr<HostRecord> HostRecordPtr;
@ -1578,7 +1581,7 @@ private:
void wrongSchemaVersionErrorLab(Signal* signal);
void noFreeConnectionErrorLab(Signal* signal);
void tckeyreq050Lab(Signal* signal);
void timeOutFoundLab(Signal* signal, UintR anAdd);
void timeOutFoundLab(Signal* signal, UintR anAdd, Uint32 errCode);
void completeTransAtTakeOverLab(Signal* signal, UintR TtakeOverInd);
void completeTransAtTakeOverDoLast(Signal* signal, UintR TtakeOverInd);
void completeTransAtTakeOverDoOne(Signal* signal, UintR TtakeOverInd);
@ -1600,6 +1603,9 @@ private:
void checkScanFragList(Signal*, Uint32 failedNodeId, ScanRecord * scanP,
LocalDLList<ScanFragRec>::Head&);
void nodeFailCheckTransactions(Signal*,Uint32 transPtrI,Uint32 failedNodeId);
void checkNodeFailComplete(Signal* signal, Uint32 failedNodeId, Uint32 bit);
// Initialisation
void initData();
void initRecords();
@ -1626,6 +1632,7 @@ private:
HostRecord *hostRecord;
HostRecordPtr hostptr;
UintR chostFilesize;
NdbNodeBitmask c_alive_nodes;
GcpRecord *gcpRecord;
GcpRecordPtr gcpPtr;

View file

@ -262,6 +262,10 @@ void Dbtc::execCONTINUEB(Signal* signal)
jam();
checkScanActiveInFailedLqh(signal, Tdata0, Tdata1);
return;
case TcContinueB::ZNF_CHECK_TRANSACTIONS:
jam();
nodeFailCheckTransactions(signal, Tdata0, Tdata1);
return;
case TcContinueB::CHECK_WAIT_DROP_TAB_FAILED_LQH:
jam();
checkWaitDropTabFailedLqh(signal, Tdata0, Tdata1);
@ -299,8 +303,8 @@ void Dbtc::execINCL_NODEREQ(Signal* signal)
hostptr.i = signal->theData[1];
ptrCheckGuard(hostptr, chostFilesize, hostRecord);
hostptr.p->hostStatus = HS_ALIVE;
hostptr.p->takeOverStatus = TOS_IDLE;
signal->theData[0] = cownref;
c_alive_nodes.set(hostptr.i);
sendSignal(tblockref, GSN_INCL_NODECONF, signal, 1, JBB);
}
@ -487,6 +491,7 @@ Dbtc::checkWaitDropTabFailedLqh(Signal* signal, Uint32 nodeId, Uint32 tableId)
* Finished
*/
jam();
checkNodeFailComplete(signal, nodeId, HostRecord::NF_CHECK_DROP_TAB);
return;
}
@ -850,8 +855,6 @@ void Dbtc::execREAD_NODESCONF(Signal* signal)
hostptr.i = i;
ptrCheckGuard(hostptr, chostFilesize, hostRecord);
hostptr.p->takeOverStatus = TOS_IDLE;
if (NodeBitmask::get(readNodes->inactiveNodes, i)) {
jam();
hostptr.p->hostStatus = HS_DEAD;
@ -859,6 +862,7 @@ void Dbtc::execREAD_NODESCONF(Signal* signal)
jam();
con_lineNodes++;
hostptr.p->hostStatus = HS_ALIVE;
c_alive_nodes.set(i);
}//if
}//if
}//for
@ -2314,6 +2318,7 @@ void Dbtc::initApiConnectRec(Signal* signal,
regApiPtr->commitAckMarker = RNIL;
regApiPtr->buddyPtr = RNIL;
regApiPtr->currSavePointId = 0;
regApiPtr->m_transaction_nodes.clear();
// Trigger data
releaseFiredTriggerData(&regApiPtr->theFiredTriggers),
// Index data
@ -2921,6 +2926,10 @@ void Dbtc::tckeyreq050Lab(Signal* signal)
signal->theData[0] = TdihConnectptr;
signal->theData[1] = Ttableref;
signal->theData[2] = TdistrHashValue;
signal->theData[3] = 0;
signal->theData[4] = 0;
signal->theData[5] = 0;
signal->theData[6] = 0;
/*-------------------------------------------------------------*/
/* FOR EFFICIENCY REASONS WE AVOID THE SIGNAL SENDING HERE AND */
@ -3098,6 +3107,7 @@ void Dbtc::sendlqhkeyreq(Signal* signal,
TcConnectRecord * const regTcPtr = tcConnectptr.p;
ApiConnectRecord * const regApiPtr = apiConnectptr.p;
CacheRecord * const regCachePtr = cachePtr.p;
UintR sig0, sig1, sig2, sig3, sig4, sig5, sig6;
#ifdef ERROR_INSERT
if (ERROR_INSERTED(8002)) {
systemErrorLab(signal);
@ -3135,6 +3145,9 @@ void Dbtc::sendlqhkeyreq(Signal* signal,
LqhKeyReq::setScanTakeOverFlag(tslrAttrLen, regCachePtr->scanTakeOverInd);
Tdata10 = 0;
sig0 = regCachePtr->opSimple;
sig1 = regTcPtr->operation;
bool simpleRead = (sig1 == ZREAD && sig0 == ZTRUE);
LqhKeyReq::setKeyLen(Tdata10, regCachePtr->keylen);
LqhKeyReq::setLastReplicaNo(Tdata10, regTcPtr->lastReplicaNo);
LqhKeyReq::setLockType(Tdata10, regCachePtr->opLock);
@ -3144,8 +3157,8 @@ void Dbtc::sendlqhkeyreq(Signal* signal,
LqhKeyReq::setApplicationAddressFlag(Tdata10, 1);
LqhKeyReq::setDirtyFlag(Tdata10, regTcPtr->dirtyOp);
LqhKeyReq::setInterpretedFlag(Tdata10, regCachePtr->opExec);
LqhKeyReq::setSimpleFlag(Tdata10, regCachePtr->opSimple);
LqhKeyReq::setOperation(Tdata10, regTcPtr->operation);
LqhKeyReq::setSimpleFlag(Tdata10, sig0);
LqhKeyReq::setOperation(Tdata10, sig1);
/* -----------------------------------------------------------------------
* Sequential Number of first LQH = 0, bit 22-23
* IF ATTRIBUTE INFORMATION IS SENT IN TCKEYREQ,
@ -3158,18 +3171,16 @@ void Dbtc::sendlqhkeyreq(Signal* signal,
* ----------------------------------------------------------------------- */
//LqhKeyReq::setAPIVersion(Tdata10, regCachePtr->apiVersionNo);
Uint32 commitAckMarker = regTcPtr->commitAckMarker;
const Uint32 noOfLqhs = regTcPtr->noOfNodes;
if(commitAckMarker != RNIL){
jam();
LqhKeyReq::setMarkerFlag(Tdata10, 1);
CommitAckMarker * tmp;
tmp = m_commitAckMarkerHash.getPtr(commitAckMarker);
CommitAckMarker * tmp = m_commitAckMarkerHash.getPtr(commitAckMarker);
/**
* Populate LQH array
*/
const Uint32 noOfLqhs = regTcPtr->noOfNodes;
tmp->noOfLqhs = noOfLqhs;
for(Uint32 i = 0; i<noOfLqhs; i++){
tmp->lqhNodeId[i] = regTcPtr->tcNodedata[i];
@ -3180,7 +3191,6 @@ void Dbtc::sendlqhkeyreq(Signal* signal,
/* NO READ LENGTH SENT FROM TC. SEQUENTIAL NUMBER IS 1 AND IT */
/* IS SENT TO A PRIMARY NODE. */
/* ************************************************************> */
UintR sig0, sig1, sig2, sig3, sig4, sig5, sig6;
LqhKeyReq * const lqhKeyReq = (LqhKeyReq *)signal->getDataPtrSend();
@ -3204,6 +3214,14 @@ void Dbtc::sendlqhkeyreq(Signal* signal,
sig5 = regTcPtr->clientData;
sig6 = regCachePtr->scanInfo;
if (! simpleRead)
{
regApiPtr->m_transaction_nodes.set(regTcPtr->tcNodedata[0]);
regApiPtr->m_transaction_nodes.set(regTcPtr->tcNodedata[1]);
regApiPtr->m_transaction_nodes.set(regTcPtr->tcNodedata[2]);
regApiPtr->m_transaction_nodes.set(regTcPtr->tcNodedata[3]);
}
lqhKeyReq->tableSchemaVersion = sig0;
lqhKeyReq->fragmentData = sig1;
lqhKeyReq->transId1 = sig2;
@ -4587,6 +4605,7 @@ void Dbtc::copyApi(Signal* signal)
UintR TgcpPointer = regTmpApiPtr->gcpPointer;
UintR TgcpFilesize = cgcpFilesize;
UintR TcommitAckMarker = regTmpApiPtr->commitAckMarker;
NdbNodeBitmask Tnodes = regTmpApiPtr->m_transaction_nodes;
GcpRecord *localGcpRecord = gcpRecord;
regApiPtr->ndbapiBlockref = regTmpApiPtr->ndbapiBlockref;
@ -4597,6 +4616,7 @@ void Dbtc::copyApi(Signal* signal)
regApiPtr->transid[1] = Ttransid2;
regApiPtr->lqhkeyconfrec = Tlqhkeyconfrec;
regApiPtr->commitAckMarker = TcommitAckMarker;
regApiPtr->m_transaction_nodes = Tnodes;
gcpPtr.i = TgcpPointer;
ptrCheckGuard(gcpPtr, TgcpFilesize, localGcpRecord);
@ -4607,6 +4627,7 @@ void Dbtc::copyApi(Signal* signal)
regTmpApiPtr->commitAckMarker = RNIL;
regTmpApiPtr->firstTcConnect = RNIL;
regTmpApiPtr->lastTcConnect = RNIL;
regTmpApiPtr->m_transaction_nodes.clear();
releaseAllSeizedIndexOperations(regTmpApiPtr);
}//Dbtc::copyApi()
@ -4865,7 +4886,7 @@ void Dbtc::releaseTransResources(Signal* signal)
TcConnectRecordPtr localTcConnectptr;
UintR TtcConnectFilesize = ctcConnectFilesize;
TcConnectRecord *localTcConnectRecord = tcConnectRecord;
apiConnectptr.p->m_transaction_nodes.clear();
localTcConnectptr.i = apiConnectptr.p->firstTcConnect;
do {
jam();
@ -5269,7 +5290,8 @@ void Dbtc::execTC_COMMITREQ(Signal* signal)
break;
case CS_ABORTING:
jam();
errorCode = ZABORTINPROGRESS;
errorCode = regApiPtr->returncode ?
regApiPtr->returncode : ZABORTINPROGRESS;
break;
case CS_START_SCAN:
jam();
@ -5808,9 +5830,9 @@ void Dbtc::abort010Lab(Signal* signal)
if (transP->firstTcConnect == RNIL) {
jam();
/*-----------------------------------------------------------------------*/
/* WE HAVE NO PARTICIPANTS IN THE TRANSACTION. */
/*-----------------------------------------------------------------------*/
/*--------------------------------------------------------------------*/
/* WE HAVE NO PARTICIPANTS IN THE TRANSACTION. */
/*--------------------------------------------------------------------*/
releaseAbortResources(signal);
return;
}//if
@ -6087,10 +6109,12 @@ void Dbtc::timeOutLoopStartLab(Signal* signal, Uint32 api_con_ptr)
if (api_timer != 0) {
time_out_value= time_out_param + (api_con_ptr & mask_value);
time_passed= tc_timer - api_timer;
if (time_passed > time_out_value) {
if (time_passed > time_out_value)
{
jam();
timeOutFoundLab(signal, api_con_ptr);
return;
timeOutFoundLab(signal, api_con_ptr, ZTIME_OUT_ERROR);
api_con_ptr++;
break;
}
}
}
@ -6110,10 +6134,8 @@ void Dbtc::timeOutLoopStartLab(Signal* signal, Uint32 api_con_ptr)
return;
}//Dbtc::timeOutLoopStartLab()
void Dbtc::timeOutFoundLab(Signal* signal, Uint32 TapiConPtr)
void Dbtc::timeOutFoundLab(Signal* signal, Uint32 TapiConPtr, Uint32 errCode)
{
sendContinueTimeOutControl(signal, TapiConPtr + 1);
apiConnectptr.i = TapiConPtr;
ptrCheckGuard(apiConnectptr, capiConnectFilesize, apiConnectRecord);
/*------------------------------------------------------------------*/
@ -6126,7 +6148,8 @@ void Dbtc::timeOutFoundLab(Signal* signal, Uint32 TapiConPtr)
<< "Time-out in state = " << apiConnectptr.p->apiConnectstate
<< " apiConnectptr.i = " << apiConnectptr.i
<< " - exec: " << apiConnectptr.p->m_exec_flag
<< " - place: " << c_apiConTimer_line[apiConnectptr.i]);
<< " - place: " << c_apiConTimer_line[apiConnectptr.i]
<< " code: " << errCode);
switch (apiConnectptr.p->apiConnectstate) {
case CS_STARTED:
if(apiConnectptr.p->lqhkeyreqrec == apiConnectptr.p->lqhkeyconfrec){
@ -6143,7 +6166,7 @@ void Dbtc::timeOutFoundLab(Signal* signal, Uint32 TapiConPtr)
}//if
}
apiConnectptr.p->returnsignal = RS_TCROLLBACKREP;
apiConnectptr.p->returncode = ZTIME_OUT_ERROR;
apiConnectptr.p->returncode = errCode;
abort010Lab(signal);
return;
case CS_RECEIVING:
@ -6156,7 +6179,7 @@ void Dbtc::timeOutFoundLab(Signal* signal, Uint32 TapiConPtr)
/* START ABORTING THE TRANSACTION. ALSO START CHECKING THE */
/* REMAINING TRANSACTIONS. */
/*------------------------------------------------------------------*/
terrorCode = ZTIME_OUT_ERROR;
terrorCode = errCode;
abortErrorLab(signal);
return;
case CS_COMMITTING:
@ -6800,58 +6823,44 @@ void Dbtc::execNODE_FAILREP(Signal* signal)
const Uint32 tnewMasterId = nodeFail->masterNodeId;
arrGuard(tnoOfNodes, MAX_NDB_NODES);
Uint32 i;
int index = 0;
for (unsigned i = 1; i< MAX_NDB_NODES; i++) {
if(NodeBitmask::get(nodeFail->theNodes, i)){
for (i = 1; i< MAX_NDB_NODES; i++)
{
if(NodeBitmask::get(nodeFail->theNodes, i))
{
cdata[index] = i;
index++;
}//if
}//for
cmasterNodeId = tnewMasterId;
tcNodeFailptr.i = 0;
ptrAss(tcNodeFailptr, tcFailRecord);
Uint32 tindex;
for (tindex = 0; tindex < tnoOfNodes; tindex++) {
for (i = 0; i < tnoOfNodes; i++)
{
jam();
hostptr.i = cdata[tindex];
hostptr.i = cdata[i];
ptrCheckGuard(hostptr, chostFilesize, hostRecord);
/*------------------------------------------------------------*/
/* SET STATUS OF THE FAILED NODE TO DEAD SINCE IT HAS */
/* FAILED. */
/*------------------------------------------------------------*/
hostptr.p->hostStatus = HS_DEAD;
hostptr.p->m_nf_bits = HostRecord::NF_NODE_FAIL_BITS;
c_alive_nodes.clear(hostptr.i);
if (hostptr.p->takeOverStatus == TOS_COMPLETED) {
jam();
/*------------------------------------------------------------*/
/* A VERY UNUSUAL SITUATION. THE TAKE OVER WAS COMPLETED*/
/* EVEN BEFORE WE HEARD ABOUT THE NODE FAILURE REPORT. */
/* HOWEVER UNUSUAL THIS SITUATION IS POSSIBLE. */
/*------------------------------------------------------------*/
/* RELEASE THE CURRENTLY UNUSED LQH CONNECTIONS. THE */
/* REMAINING WILL BE RELEASED WHEN THE TRANSACTION THAT */
/* USED THEM IS COMPLETED. */
/*------------------------------------------------------------*/
{
NFCompleteRep * const nfRep = (NFCompleteRep *)&signal->theData[0];
nfRep->blockNo = DBTC;
nfRep->nodeId = cownNodeid;
nfRep->failedNodeId = hostptr.i;
}
sendSignal(cdihblockref, GSN_NF_COMPLETEREP, signal,
NFCompleteRep::SignalLength, JBB);
} else {
ndbrequire(hostptr.p->takeOverStatus == TOS_IDLE);
hostptr.p->takeOverStatus = TOS_NODE_FAILED;
}//if
if (tcNodeFailptr.p->failStatus == FS_LISTENING) {
if (tcNodeFailptr.p->failStatus == FS_LISTENING)
{
jam();
/*------------------------------------------------------------*/
/* THE CURRENT TAKE OVER CAN BE AFFECTED BY THIS NODE */
/* FAILURE. */
/*------------------------------------------------------------*/
if (hostptr.p->lqhTransStatus == LTS_ACTIVE) {
if (hostptr.p->lqhTransStatus == LTS_ACTIVE)
{
jam();
/*------------------------------------------------------------*/
/* WE WERE WAITING FOR THE FAILED NODE IN THE TAKE OVER */
@ -6863,86 +6872,46 @@ void Dbtc::execNODE_FAILREP(Signal* signal)
}//if
}//if
}//for
const bool masterFailed = (cmasterNodeId != tnewMasterId);
cmasterNodeId = tnewMasterId;
if(getOwnNodeId() == cmasterNodeId && masterFailed){
/**
* Master has failed and I'm the new master
*/
jam();
for (hostptr.i = 1; hostptr.i < MAX_NDB_NODES; hostptr.i++) {
if (getOwnNodeId() != tnewMasterId)
{
jam();
ptrAss(hostptr, hostRecord);
if (hostptr.p->hostStatus != HS_ALIVE) {
jam();
if (hostptr.p->takeOverStatus == TOS_COMPLETED) {
jam();
/*------------------------------------------------------------*/
/* SEND TAKE OVER CONFIRMATION TO ALL ALIVE NODES IF */
/* TAKE OVER IS COMPLETED. THIS IS PERFORMED TO ENSURE */
/* THAT ALL NODES AGREE ON THE IDLE STATE OF THE TAKE */
/* OVER. THIS MIGHT BE MISSED IN AN ERROR SITUATION IF */
/* MASTER FAILS AFTER SENDING CONFIRMATION TO NEW */
/* MASTER BUT FAILING BEFORE SENDING TO ANOTHER NODE */
/* WHICH WAS NOT MASTER. IF THIS NODE LATER BECOMES */
/* MASTER IT MIGHT START A NEW TAKE OVER EVEN AFTER THE */
/* CRASHED NODE HAVE ALREADY RECOVERED. */
/*------------------------------------------------------------*/
for(tmpHostptr.i = 1; tmpHostptr.i < MAX_NDB_NODES;tmpHostptr.i++) {
jam();
ptrAss(tmpHostptr, hostRecord);
if (tmpHostptr.p->hostStatus == HS_ALIVE) {
jam();
tblockref = calcTcBlockRef(tmpHostptr.i);
signal->theData[0] = hostptr.i;
sendSignal(tblockref, GSN_TAKE_OVERTCCONF, signal, 1, JBB);
}//if
}//for
}//if
}//if
}//for
}
if(getOwnNodeId() == cmasterNodeId){
jam();
for (hostptr.i = 1; hostptr.i < MAX_NDB_NODES; hostptr.i++) {
/**
* Only master does takeover currently
*/
hostptr.p->m_nf_bits &= ~HostRecord::NF_TAKEOVER;
}
else
{
jam();
ptrAss(hostptr, hostRecord);
if (hostptr.p->hostStatus != HS_ALIVE) {
jam();
if (hostptr.p->takeOverStatus == TOS_NODE_FAILED) {
jam();
/*------------------------------------------------------------*/
/* CONCLUDE ALL ACTIVITIES THE FAILED TC DID CONTROL */
/* SINCE WE ARE THE MASTER. THIS COULD HAVE BEEN STARTED*/
/* BY A PREVIOUS MASTER BUT HAVE NOT BEEN CONCLUDED YET.*/
/*------------------------------------------------------------*/
hostptr.p->takeOverStatus = TOS_ACTIVE;
signal->theData[0] = hostptr.i;
sendSignal(cownref, GSN_TAKE_OVERTCREQ, signal, 1, JBB);
}//if
}//if
}//for
}//if
for (tindex = 0; tindex < tnoOfNodes; tindex++) {
jam();
hostptr.i = cdata[tindex];
ptrCheckGuard(hostptr, chostFilesize, hostRecord);
/*------------------------------------------------------------*/
/* LOOP THROUGH AND ABORT ALL SCANS THAT WHERE */
/* CONTROLLED BY THIS TC AND ACTIVE IN THE FAILED */
/* NODE'S LQH */
/*------------------------------------------------------------*/
signal->theData[0] = hostptr.i;
sendSignal(cownref, GSN_TAKE_OVERTCREQ, signal, 1, JBB);
}
checkScanActiveInFailedLqh(signal, 0, hostptr.i);
checkWaitDropTabFailedLqh(signal, hostptr.i, 0); // nodeid, tableid
}//for
nodeFailCheckTransactions(signal, 0, hostptr.i);
}
}//Dbtc::execNODE_FAILREP()
void
Dbtc::checkNodeFailComplete(Signal* signal,
Uint32 failedNodeId,
Uint32 bit)
{
hostptr.i = failedNodeId;
ptrCheckGuard(hostptr, chostFilesize, hostRecord);
hostptr.p->m_nf_bits &= ~bit;
if (hostptr.p->m_nf_bits == 0)
{
NFCompleteRep * const nfRep = (NFCompleteRep *)&signal->theData[0];
nfRep->blockNo = DBTC;
nfRep->nodeId = cownNodeid;
nfRep->failedNodeId = hostptr.i;
sendSignal(cdihblockref, GSN_NF_COMPLETEREP, signal,
NFCompleteRep::SignalLength, JBB);
}
}
void Dbtc::checkScanActiveInFailedLqh(Signal* signal,
Uint32 scanPtrI,
Uint32 failedNodeId){
@ -6984,8 +6953,44 @@ void Dbtc::checkScanActiveInFailedLqh(Signal* signal,
sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB);
return;
}//for
checkNodeFailComplete(signal, failedNodeId, HostRecord::NF_CHECK_SCAN);
}
void
Dbtc::nodeFailCheckTransactions(Signal* signal,
Uint32 transPtrI,
Uint32 failedNodeId)
{
jam();
Ptr<ApiConnectRecord> transPtr;
for (transPtr.i = transPtrI; transPtr.i < capiConnectFilesize; transPtr.i++)
{
ptrCheckGuard(transPtr, capiConnectFilesize, apiConnectRecord);
if (transPtr.p->m_transaction_nodes.get(failedNodeId))
{
jam();
// Force timeout regardless of state
Uint32 save = c_appl_timeout_value;
c_appl_timeout_value = 1;
setApiConTimer(transPtr.i, 0, __LINE__);
timeOutFoundLab(signal, transPtr.i, ZNODEFAIL_BEFORE_COMMIT);
c_appl_timeout_value = save;
}
// Send CONTINUEB to continue later
signal->theData[0] = TcContinueB::ZNF_CHECK_TRANSACTIONS;
signal->theData[1] = transPtr.i + 1; // Check next
signal->theData[2] = failedNodeId;
sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB);
return;
}
checkNodeFailComplete(signal, failedNodeId,
HostRecord::NF_CHECK_TRANSACTION);
}
void
Dbtc::checkScanFragList(Signal* signal,
Uint32 failedNodeId,
@ -7001,54 +7006,17 @@ void Dbtc::execTAKE_OVERTCCONF(Signal* signal)
tfailedNodeId = signal->theData[0];
hostptr.i = tfailedNodeId;
ptrCheckGuard(hostptr, chostFilesize, hostRecord);
switch (hostptr.p->takeOverStatus) {
case TOS_IDLE:
ndbout_c("received execTAKE_OVERTCCONF(%d) from %x (%x)",
tfailedNodeId, signal->getSendersBlockRef(), reference());
if (signal->getSendersBlockRef() != reference())
{
jam();
/*------------------------------------------------------------*/
/* THIS MESSAGE ARRIVED EVEN BEFORE THE NODE_FAILREP */
/* MESSAGE. THIS IS POSSIBLE IN EXTREME SITUATIONS. */
/* WE SET THE STATE TO TAKE_OVER_COMPLETED AND WAIT */
/* FOR THE NODE_FAILREP MESSAGE. */
/*------------------------------------------------------------*/
hostptr.p->takeOverStatus = TOS_COMPLETED;
break;
case TOS_NODE_FAILED:
case TOS_ACTIVE:
jam();
/*------------------------------------------------------------*/
/* WE ARE NOT MASTER AND THE TAKE OVER IS ACTIVE OR WE */
/* ARE MASTER AND THE TAKE OVER IS ACTIVE. IN BOTH */
/* WE SET THE STATE TO TAKE_OVER_COMPLETED. */
/*------------------------------------------------------------*/
/* RELEASE THE CURRENTLY UNUSED LQH CONNECTIONS. THE */
/* REMAINING WILL BE RELEASED WHEN THE TRANSACTION THAT */
/* USED THEM IS COMPLETED. */
/*------------------------------------------------------------*/
hostptr.p->takeOverStatus = TOS_COMPLETED;
{
NFCompleteRep * const nfRep = (NFCompleteRep *)&signal->theData[0];
nfRep->blockNo = DBTC;
nfRep->nodeId = cownNodeid;
nfRep->failedNodeId = hostptr.i;
}
sendSignal(cdihblockref, GSN_NF_COMPLETEREP, signal,
NFCompleteRep::SignalLength, JBB);
break;
case TOS_COMPLETED:
jam();
/*------------------------------------------------------------*/
/* WE HAVE ALREADY RECEIVED THE CONF SIGNAL. IT IS MOST */
/* LIKELY SENT FROM A NEW MASTER WHICH WASN'T SURE IF */
/* THIS NODE HEARD THE CONF SIGNAL FROM THE OLD MASTER. */
/* WE SIMPLY IGNORE THE MESSAGE. */
/*------------------------------------------------------------*/
/*empty*/;
break;
default:
jam();
systemErrorLab(signal);
return;
}//switch
}
checkNodeFailComplete(signal, hostptr.i, HostRecord::NF_TAKEOVER);
}//Dbtc::execTAKE_OVERTCCONF()
void Dbtc::execTAKE_OVERTCREQ(Signal* signal)
@ -7288,16 +7256,10 @@ void Dbtc::completeTransAtTakeOverDoLast(Signal* signal, UintR TtakeOverInd)
/* TO REPORT THE COMPLETION OF THE TAKE OVER TO ALL */
/* NODES THAT ARE ALIVE. */
/*------------------------------------------------------------*/
for (hostptr.i = 1; hostptr.i < MAX_NDB_NODES; hostptr.i++) {
jam();
ptrAss(hostptr, hostRecord);
if (hostptr.p->hostStatus == HS_ALIVE) {
jam();
tblockref = calcTcBlockRef(hostptr.i);
signal->theData[0] = tcNodeFailptr.p->takeOverNode;
sendSignal(tblockref, GSN_TAKE_OVERTCCONF, signal, 1, JBB);
}//if
}//for
NodeReceiverGroup rg(DBTC, c_alive_nodes);
signal->theData[0] = tcNodeFailptr.p->takeOverNode;
sendSignal(rg, GSN_TAKE_OVERTCCONF, signal, 1, JBB);
if (tcNodeFailptr.p->queueIndex > 0) {
jam();
/*------------------------------------------------------------*/
@ -7979,6 +7941,7 @@ void Dbtc::initApiConnectFail(Signal* signal)
apiConnectptr.p->ndbapiBlockref = 0;
apiConnectptr.p->ndbapiConnect = 0;
apiConnectptr.p->buddyPtr = RNIL;
apiConnectptr.p->m_transaction_nodes.clear();
setApiConTimer(apiConnectptr.i, 0, __LINE__);
switch(ttransStatus){
case LqhTransConf::Committed:
@ -9756,6 +9719,7 @@ void Dbtc::initApiConnect(Signal* signal)
apiConnectptr.p->executingIndexOp = RNIL;
apiConnectptr.p->buddyPtr = RNIL;
apiConnectptr.p->currSavePointId = 0;
apiConnectptr.p->m_transaction_nodes.clear();
}//for
apiConnectptr.i = tiacTmp - 1;
ptrCheckGuard(apiConnectptr, capiConnectFilesize, apiConnectRecord);
@ -9783,6 +9747,7 @@ void Dbtc::initApiConnect(Signal* signal)
apiConnectptr.p->executingIndexOp = RNIL;
apiConnectptr.p->buddyPtr = RNIL;
apiConnectptr.p->currSavePointId = 0;
apiConnectptr.p->m_transaction_nodes.clear();
}//for
apiConnectptr.i = (2 * tiacTmp) - 1;
ptrCheckGuard(apiConnectptr, capiConnectFilesize, apiConnectRecord);
@ -9810,6 +9775,7 @@ void Dbtc::initApiConnect(Signal* signal)
apiConnectptr.p->executingIndexOp = RNIL;
apiConnectptr.p->buddyPtr = RNIL;
apiConnectptr.p->currSavePointId = 0;
apiConnectptr.p->m_transaction_nodes.clear();
}//for
apiConnectptr.i = (3 * tiacTmp) - 1;
ptrCheckGuard(apiConnectptr, capiConnectFilesize, apiConnectRecord);
@ -9870,13 +9836,13 @@ void Dbtc::inithost(Signal* signal)
ptrAss(hostptr, hostRecord);
hostptr.p->hostStatus = HS_DEAD;
hostptr.p->inPackedList = false;
hostptr.p->takeOverStatus = TOS_NOT_DEFINED;
hostptr.p->lqhTransStatus = LTS_IDLE;
hostptr.p->noOfWordsTCKEYCONF = 0;
hostptr.p->noOfWordsTCINDXCONF = 0;
hostptr.p->noOfPackedWordsLqh = 0;
hostptr.p->hostLqhBlockRef = calcLqhBlockRef(hostptr.i);
}//for
c_alive_nodes.clear();
}//Dbtc::inithost()
void Dbtc::initialiseRecordsLab(Signal* signal, UintR Tdata0,
@ -10126,6 +10092,7 @@ void Dbtc::releaseAbortResources(Signal* signal)
}//while
apiConnectptr.p->firstTcConnect = RNIL;
apiConnectptr.p->lastTcConnect = RNIL;
apiConnectptr.p->m_transaction_nodes.clear();
// MASV let state be CS_ABORTING until all
// signals in the "air" have been received. Reset to CS_CONNECTED
@ -10199,6 +10166,7 @@ void Dbtc::releaseApiCon(Signal* signal, UintR TapiConnectPtr)
cfirstfreeApiConnect = TlocalApiConnectptr.i;
setApiConTimer(TlocalApiConnectptr.i, 0, __LINE__);
TlocalApiConnectptr.p->apiConnectstate = CS_DISCONNECTED;
ndbassert(TlocalApiConnectptr.p->m_transaction_nodes.isclear());
ndbassert(TlocalApiConnectptr.p->apiScanRec == RNIL);
TlocalApiConnectptr.p->ndbapiBlockref = 0;
}//Dbtc::releaseApiCon()
@ -10734,6 +10702,34 @@ Dbtc::execDUMP_STATE_ORD(Signal* signal)
c_theIndexOperationPool.getSize(),
c_theIndexOperationPool.getNoOfFree());
}
if (dumpState->args[0] == 2514)
{
if (signal->getLength() == 2)
{
dumpState->args[0] = DumpStateOrd::TcDumpOneApiConnectRec;
execDUMP_STATE_ORD(signal);
}
NodeReceiverGroup rg(CMVMI, c_alive_nodes);
dumpState->args[0] = 15;
sendSignal(rg, GSN_DUMP_STATE_ORD, signal, 1, JBB);
signal->theData[0] = 2515;
sendSignalWithDelay(cownref, GSN_DUMP_STATE_ORD, signal, 1000, 1);
return;
}
if (dumpState->args[0] == 2515)
{
NdbNodeBitmask mask = c_alive_nodes;
mask.clear(getOwnNodeId());
NodeReceiverGroup rg(NDBCNTR, mask);
sendSignal(rg, GSN_SYSTEM_ERROR, signal, 1, JBB);
sendSignalWithDelay(cownref, GSN_SYSTEM_ERROR, signal, 300, 1);
return;
}
}//Dbtc::execDUMP_STATE_ORD()
void Dbtc::execSET_VAR_REQ(Signal* signal)

View file

@ -257,6 +257,7 @@ void Qmgr::setArbitTimeout(UintR aArbitTimeout)
void Qmgr::execCONNECT_REP(Signal* signal)
{
jamEntry();
const Uint32 nodeId = signal->theData[0];
c_connectedNodes.set(nodeId);
NodeRecPtr nodePtr;
@ -264,9 +265,13 @@ void Qmgr::execCONNECT_REP(Signal* signal)
ptrCheckGuard(nodePtr, MAX_NODES, nodeRec);
switch(nodePtr.p->phase){
case ZSTARTING:
jam();
break;
case ZRUNNING:
jam();
if(!c_start.m_nodes.isWaitingFor(nodeId)){
jam();
return;
}
break;
case ZPREPARE_FAIL:
case ZFAIL_CLOSING:
jam();
@ -277,21 +282,28 @@ void Qmgr::execCONNECT_REP(Signal* signal)
case ZAPI_INACTIVE:
return;
}
if(!c_start.m_nodes.isWaitingFor(nodeId)){
jam();
return;
}
switch(c_start.m_gsn){
case GSN_CM_REGREQ:
jam();
sendCmRegReq(signal, nodeId);
return;
case GSN_CM_NODEINFOREQ:{
case GSN_CM_NODEINFOREQ:
jam();
sendCmNodeInfoReq(signal, nodeId, nodePtr.p);
return;
case GSN_CM_ADD:{
jam();
ndbrequire(getOwnNodeId() != cpresident);
c_start.m_nodes.clearWaitingFor(nodeId);
c_start.m_gsn = RNIL;
NodeRecPtr addNodePtr;
addNodePtr.i = nodeId;
ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
cmAddPrepare(signal, addNodePtr, nodePtr.p);
return;
}
default:
return;
@ -924,15 +936,27 @@ Qmgr::cmAddPrepare(Signal* signal, NodeRecPtr nodePtr, const NodeRec * self){
return;
case ZFAIL_CLOSING:
jam();
#ifdef VM_TRACE
ndbout_c("Enabling communication to CM_ADD node state=%d",
nodePtr.p->phase);
#endif
#if 1
warningEvent("Recieved request to incorperate node %u, "
"while error handling has not yet completed",
nodePtr.i);
ndbrequire(getOwnNodeId() != cpresident);
ndbrequire(signal->header.theVerId_signalNumber == GSN_CM_ADD);
c_start.m_nodes.clearWaitingFor();
c_start.m_nodes.setWaitingFor(nodePtr.i);
c_start.m_gsn = GSN_CM_ADD;
#else
warningEvent("Enabling communication to CM_ADD node %u state=%d",
nodePtr.i,
nodePtr.p->phase);
nodePtr.p->phase = ZSTARTING;
nodePtr.p->failState = NORMAL;
signal->theData[0] = 0;
signal->theData[1] = nodePtr.i;
sendSignal(CMVMI_REF, GSN_OPEN_COMREQ, signal, 2, JBA);
#endif
return;
case ZSTARTING:
break;
@ -1766,11 +1790,27 @@ void Qmgr::execNDB_FAILCONF(Signal* signal)
jamEntry();
failedNodePtr.i = signal->theData[0];
if (ERROR_INSERTED(930))
{
CLEAR_ERROR_INSERT_VALUE;
infoEvent("Discarding NDB_FAILCONF for %u", failedNodePtr.i);
return;
}
ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
if (failedNodePtr.p->failState == WAITING_FOR_NDB_FAILCONF){
failedNodePtr.p->failState = NORMAL;
} else {
jam();
char buf[100];
BaseString::snprintf(buf, 100,
"Received NDB_FAILCONF for node %u with state: %d %d",
failedNodePtr.i,
failedNodePtr.p->phase,
failedNodePtr.p->failState);
progError(__LINE__, 0, buf);
systemErrorLab(signal, __LINE__);
}//if
if (cpresident == getOwnNodeId()) {
@ -2077,10 +2117,42 @@ void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode,
ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
if (failedNodePtr.i == getOwnNodeId()) {
jam();
systemErrorLab(signal, __LINE__);
const char * msg = 0;
switch(aFailCause){
case FailRep::ZOWN_FAILURE:
msg = "Own failure";
break;
case FailRep::ZOTHER_NODE_WHEN_WE_START:
case FailRep::ZOTHERNODE_FAILED_DURING_START:
msg = "Other node died during start";
break;
case FailRep::ZIN_PREP_FAIL_REQ:
msg = "Prep fail";
break;
case FailRep::ZSTART_IN_REGREQ:
msg = "Start timeout";
break;
case FailRep::ZHEARTBEAT_FAILURE:
msg = "Hearbeat failure";
break;
case FailRep::ZLINK_FAILURE:
msg = "Connection failure";
break;
}
char buf[100];
BaseString::snprintf(buf, 100,
"We(%u) have been declared dead by %u reason: %s(%u)",
getOwnNodeId(),
refToNode(signal->getSendersBlockRef()),
aFailCause,
msg ? msg : "<Unknown>");
progError(__LINE__, 0, buf);
return;
}//if
myNodePtr.i = getOwnNodeId();
ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
if (myNodePtr.p->phase != ZRUNNING) {
@ -2791,6 +2863,7 @@ void Qmgr::failReport(Signal* signal,
cfailureNr = cprepareFailureNr;
ctoFailureNr = 0;
ctoStatus = Q_ACTIVE;
c_start.reset(); // Don't take over nodes being started
if (cnoCommitFailedNodes > 0) {
jam();
/**-----------------------------------------------------------------

View file

@ -450,12 +450,12 @@ NdbConnection::executeNoBlobs(ExecType aTypeOfExec,
//------------------------------------------------------------------------
Ndb* tNdb = theNdb;
Uint32 timeout = TransporterFacade::instance()->m_waitfor_timeout;
m_waitForReply = false;
executeAsynchPrepare(aTypeOfExec, NULL, NULL, abortOption);
if (m_waitForReply){
while (1) {
int noOfComp = tNdb->sendPollNdb((3 * WAITFOR_RESPONSE_TIMEOUT),
1, forceSend);
int noOfComp = tNdb->sendPollNdb(3 * timeout, 1, forceSend);
if (noOfComp == 0) {
/**
* This timeout situation can occur if NDB crashes.

View file

@ -954,23 +954,25 @@ Ndb::pollCompleted(NdbConnection** aCopyArray)
void
Ndb::check_send_timeout()
{
Uint32 timeout = TransporterFacade::instance()->m_waitfor_timeout;
NDB_TICKS current_time = NdbTick_CurrentMillisecond();
if (current_time - the_last_check_time > 1000) {
the_last_check_time = current_time;
Uint32 no_of_sent = theNoOfSentTransactions;
for (Uint32 i = 0; i < no_of_sent; i++) {
NdbConnection* a_con = theSentTransactionsArray[i];
if ((current_time - a_con->theStartTransTime) >
WAITFOR_RESPONSE_TIMEOUT) {
if ((current_time - a_con->theStartTransTime) > timeout)
{
#ifdef VM_TRACE
a_con->printState();
Uint32 t1 = a_con->theTransactionId;
Uint32 t2 = a_con->theTransactionId >> 32;
ndbout_c("[%.8x %.8x]", t1, t2);
abort();
ndbout_c("4012 [%.8x %.8x]", t1, t2);
//abort();
#endif
a_con->theReleaseOnClose = true;
a_con->setOperationErrorCodeAbort(4012);
a_con->theCommitStatus = NdbConnection::Aborted;
a_con->theCommitStatus = NdbConnection::NeedAbort;
a_con->theCompletionStatus = NdbConnection::CompletedFailure;
a_con->handleExecuteCompletion();
remove_sent_list(i);

View file

@ -567,6 +567,19 @@ TransporterFacade::init(Uint32 nodeId, const ndb_mgm_configuration* props)
}
#endif
Uint32 timeout = 120000;
iter.first();
for (iter.first(); iter.valid(); iter.next())
{
Uint32 tmp1 = 0, tmp2 = 0;
iter.get(CFG_DB_TRANSACTION_CHECK_INTERVAL, &tmp1);
iter.get(CFG_DB_TRANSACTION_DEADLOCK_TIMEOUT, &tmp2);
tmp1 += tmp2;
if (tmp1 > timeout)
timeout = tmp1;
}
m_waitfor_timeout = timeout;
if (!theTransporterRegistry->start_service(m_socket_server)){
ndbout_c("Unable to start theTransporterRegistry->start_service");
DBUG_RETURN(false);

View file

@ -172,6 +172,7 @@ private:
*/
public:
STATIC_CONST( MAX_NO_THREADS = 4711 );
Uint32 m_waitfor_timeout; // in milli seconds...
private:
struct ThreadData {

View file

@ -535,6 +535,52 @@ err:
return NDBT_FAILED;
}
int
runBug16772(NDBT_Context* ctx, NDBT_Step* step){
NdbRestarter restarter;
if (restarter.getNumDbNodes() < 2)
{
ctx->stopTest();
return NDBT_OK;
}
int aliveNodeId = restarter.getRandomNotMasterNodeId(rand());
int deadNodeId = aliveNodeId;
while (deadNodeId == aliveNodeId)
deadNodeId = restarter.getDbNodeId(rand() % restarter.getNumDbNodes());
if (restarter.insertErrorInNode(aliveNodeId, 930))
return NDBT_FAILED;
if (restarter.restartOneDbNode(deadNodeId,
/** initial */ false,
/** nostart */ true,
/** abort */ true))
return NDBT_FAILED;
if (restarter.waitNodesNoStart(&deadNodeId, 1))
return NDBT_FAILED;
if (restarter.startNodes(&deadNodeId, 1))
return NDBT_FAILED;
// It should now be hanging since we throw away NDB_FAILCONF
int ret = restarter.waitNodesStartPhase(&deadNodeId, 1, 3, 10);
// So this should fail...i.e it should not reach startphase 3
// Now send a NDB_FAILCONF for deadNo
int dump[] = { 7020, 323, 252, 0 };
dump[3] = deadNodeId;
if (restarter.dumpStateOneNode(aliveNodeId, dump, 4))
return NDBT_FAILED;
if (restarter.waitNodesStarted(&deadNodeId, 1))
return NDBT_FAILED;
return ret ? NDBT_OK : NDBT_FAILED;
}
NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad",
@ -820,6 +866,10 @@ TESTCASE("Bug15685",
STEP(runBug15685);
FINALIZER(runClearTable);
}
TESTCASE("Bug16772",
"Test bug with restarting before NF handling is complete"){
STEP(runBug16772);
}
NDBT_TESTSUITE_END(testNodeRestart);
int main(int argc, const char** argv){

View file

@ -24,6 +24,7 @@
#define TIMEOUT (Uint32)3000
Uint32 g_org_timeout = 3000;
Uint32 g_org_deadlock = 3000;
int
setTransactionTimeout(NDBT_Context* ctx, NDBT_Step* step){
@ -59,6 +60,60 @@ resetTransactionTimeout(NDBT_Context* ctx, NDBT_Step* step){
return NDBT_OK;
}
int
setDeadlockTimeout(NDBT_Context* ctx, NDBT_Step* step){
NdbRestarter restarter;
int timeout = ctx->getProperty("TransactionDeadlockTimeout", TIMEOUT);
NdbConfig conf(GETNDB(step)->getNodeId()+1);
unsigned int nodeId = conf.getMasterNodeId();
if (!conf.getProperty(nodeId,
NODE_TYPE_DB,
CFG_DB_TRANSACTION_DEADLOCK_TIMEOUT,
&g_org_deadlock))
return NDBT_FAILED;
g_err << "Setting timeout: " << timeout << endl;
int val[] = { DumpStateOrd::TcSetTransactionTimeout, timeout };
if(restarter.dumpStateAllNodes(val, 2) != 0){
return NDBT_FAILED;
}
return NDBT_OK;
}
int
getDeadlockTimeout(NDBT_Context* ctx, NDBT_Step* step){
NdbRestarter restarter;
Uint32 val = 0;
NdbConfig conf(GETNDB(step)->getNodeId()+1);
unsigned int nodeId = conf.getMasterNodeId();
if (!conf.getProperty(nodeId,
NODE_TYPE_DB,
CFG_DB_TRANSACTION_DEADLOCK_TIMEOUT,
&val))
return NDBT_FAILED;
if (val < 120000)
val = 120000;
ctx->setProperty("TransactionDeadlockTimeout", 4*val);
return NDBT_OK;
}
int
resetDeadlockTimeout(NDBT_Context* ctx, NDBT_Step* step){
NdbRestarter restarter;
int val[] = { DumpStateOrd::TcSetTransactionTimeout, g_org_deadlock };
if(restarter.dumpStateAllNodes(val, 2) != 0){
return NDBT_FAILED;
}
return NDBT_OK;
}
int runLoadTable(NDBT_Context* ctx, NDBT_Step* step){
@ -374,6 +429,43 @@ int runBuddyTransNoTimeout(NDBT_Context* ctx, NDBT_Step* step){
return result;
}
int
runError4012(NDBT_Context* ctx, NDBT_Step* step){
int result = NDBT_OK;
int loops = ctx->getNumLoops();
int stepNo = step->getStepNo();
int timeout = ctx->getProperty("TransactionDeadlockTimeout", TIMEOUT);
HugoOperations hugoOps(*ctx->getTab());
Ndb* pNdb = GETNDB(step);
do{
// Commit transaction
CHECK(hugoOps.startTransaction(pNdb) == 0);
CHECK(hugoOps.pkUpdateRecord(pNdb, 0) == 0);
int ret = hugoOps.execute_NoCommit(pNdb);
if (ret == 0)
{
int sleep = timeout;
ndbout << "Sleeping for " << sleep << " milliseconds" << endl;
NdbSleep_MilliSleep(sleep);
// Expect that transaction has NOT timed-out
CHECK(hugoOps.execute_Commit(pNdb) == 0);
}
else
{
CHECK(ret == 4012);
}
} while(false);
hugoOps.closeTransaction(pNdb);
return result;
}
NDBT_TESTSUITE(testTimeout);
TESTCASE("DontTimeoutTransaction",
"Test that the transaction does not timeout "\
@ -465,6 +557,15 @@ TESTCASE("BuddyTransNoTimeout5",
FINALIZER(resetTransactionTimeout);
FINALIZER(runClearTable);
}
TESTCASE("Error4012", ""){
TC_PROPERTY("TransactionDeadlockTimeout", 120000);
INITIALIZER(runLoadTable);
INITIALIZER(getDeadlockTimeout);
INITIALIZER(setDeadlockTimeout);
STEPS(runError4012, 2);
FINALIZER(runClearTable);
}
NDBT_TESTSUITE_END(testTimeout);
int main(int argc, const char** argv){

View file

@ -236,6 +236,10 @@ max-time: 500
cmd: testTimeout
args: -n TimeoutRandTransaction T1
max-time: 600
cmd: testTimeout
args: -n Error4012 T1
# SCAN TESTS
#
max-time: 500
@ -446,6 +450,10 @@ max-time: 500
cmd: testNodeRestart
args: -n Bug15685 T1
max-time: 500
cmd: testNodeRestart
args: -n Bug16772 T1
# OLD FLEX
max-time: 500
cmd: flexBench