diff --git a/ndb/include/kernel/signaldata/BackupImpl.hpp b/ndb/include/kernel/signaldata/BackupImpl.hpp index 2ac91570aad..2032e2347b5 100644 --- a/ndb/include/kernel/signaldata/BackupImpl.hpp +++ b/ndb/include/kernel/signaldata/BackupImpl.hpp @@ -75,7 +75,7 @@ class DefineBackupRef { friend bool printDEFINE_BACKUP_REF(FILE *, const Uint32 *, Uint32, Uint16); public: - STATIC_CONST( SignalLength = 3 ); + STATIC_CONST( SignalLength = 4 ); enum ErrorCode { Undefined = 1340, @@ -92,6 +92,7 @@ private: Uint32 backupId; Uint32 backupPtr; Uint32 errorCode; + Uint32 nodeId; }; class DefineBackupConf { @@ -158,7 +159,7 @@ class StartBackupRef { friend bool printSTART_BACKUP_REF(FILE *, const Uint32 *, Uint32, Uint16); public: - STATIC_CONST( SignalLength = 4 ); + STATIC_CONST( SignalLength = 5 ); enum ErrorCode { FailedToAllocateTriggerRecord = 1 @@ -168,6 +169,7 @@ private: Uint32 backupPtr; Uint32 signalNo; Uint32 errorCode; + Uint32 nodeId; }; class StartBackupConf { @@ -232,9 +234,8 @@ public: private: Uint32 backupId; Uint32 backupPtr; - Uint32 tableId; - Uint32 fragmentNo; Uint32 errorCode; + Uint32 nodeId; }; class BackupFragmentConf { @@ -296,12 +297,13 @@ class StopBackupRef { friend bool printSTOP_BACKUP_REF(FILE *, const Uint32 *, Uint32, Uint16); public: - STATIC_CONST( SignalLength = 3 ); + STATIC_CONST( SignalLength = 4 ); private: Uint32 backupId; Uint32 backupPtr; Uint32 errorCode; + Uint32 nodeId; }; class StopBackupConf { diff --git a/ndb/include/kernel/signaldata/BackupSignalData.hpp b/ndb/include/kernel/signaldata/BackupSignalData.hpp index fb018026a49..b38dd8d14b2 100644 --- a/ndb/include/kernel/signaldata/BackupSignalData.hpp +++ b/ndb/include/kernel/signaldata/BackupSignalData.hpp @@ -240,6 +240,9 @@ public: FileOrScanError = 1325, // slave -> coordinator BackupFailureDueToNodeFail = 1326, // slave -> slave OkToClean = 1327 // master -> slave + + ,AbortScan = 1328 + ,IncompatibleVersions = 1329 }; private: Uint32 requestType; diff --git a/ndb/src/common/debugger/signaldata/BackupImpl.cpp b/ndb/src/common/debugger/signaldata/BackupImpl.cpp index bdc34d614cf..e9b0188d93b 100644 --- a/ndb/src/common/debugger/signaldata/BackupImpl.cpp +++ b/ndb/src/common/debugger/signaldata/BackupImpl.cpp @@ -90,10 +90,8 @@ printBACKUP_FRAGMENT_REQ(FILE * out, const Uint32 * data, Uint32 l, Uint16 bno){ bool printBACKUP_FRAGMENT_REF(FILE * out, const Uint32 * data, Uint32 l, Uint16 bno){ BackupFragmentRef* sig = (BackupFragmentRef*)data; - fprintf(out, " backupPtr: %d backupId: %d\n", - sig->backupPtr, sig->backupId); - fprintf(out, " tableId: %d fragmentNo: %d errorCode: %d\n", - sig->tableId, sig->fragmentNo, sig->errorCode); + fprintf(out, " backupPtr: %d backupId: %d nodeId: %d errorCode: %d\n", + sig->backupPtr, sig->backupId, sig->nodeId, sig->errorCode); return true; } diff --git a/ndb/src/kernel/blocks/backup/Backup.cpp b/ndb/src/kernel/blocks/backup/Backup.cpp index 2e62979ce8e..713991a4f58 100644 --- a/ndb/src/kernel/blocks/backup/Backup.cpp +++ b/ndb/src/kernel/blocks/backup/Backup.cpp @@ -67,31 +67,6 @@ static const Uint32 BACKUP_SEQUENCE = 0x1F000000; //#define DEBUG_ABORT -//--------------------------------------------------------- -// Ignore this since a completed abort could have preceded -// this message. -//--------------------------------------------------------- -#define slaveAbortCheck() \ -if ((ptr.p->backupId != backupId) || \ - (ptr.p->slaveState.getState() == ABORTING)) { \ - jam(); \ - return; \ -} - -#define masterAbortCheck() \ -if ((ptr.p->backupId != backupId) || \ - (ptr.p->masterData.state.getState() == ABORTING)) { \ - jam(); \ - return; \ -} - -#define defineSlaveAbortCheck() \ - if (ptr.p->slaveState.getState() == ABORTING) { \ - jam(); \ - closeFiles(signal, ptr); \ - return; \ - } - static Uint32 g_TypeOfStart = NodeState::ST_ILLEGAL_TYPE; void @@ -221,12 +196,7 @@ Backup::execCONTINUEB(Signal* signal) jam(); BackupRecordPtr ptr; c_backupPool.getPtr(ptr, Tdata1); - - if (ptr.p->slaveState.getState() == ABORTING) { - jam(); - closeFiles(signal, ptr); - return; - }//if + BackupFilePtr filePtr; ptr.p->files.getPtr(filePtr, ptr.p->ctlFilePtr); FsBuffer & buf = filePtr.p->operation.dataBuffer; @@ -324,13 +294,7 @@ Backup::execDUMP_STATE_ORD(Signal* signal) for(c_backups.first(ptr); ptr.i != RNIL; c_backups.next(ptr)){ infoEvent("BackupRecord %d: BackupId: %d MasterRef: %x ClientRef: %x", ptr.i, ptr.p->backupId, ptr.p->masterRef, ptr.p->clientRef); - if(ptr.p->masterRef == reference()){ - infoEvent(" MasterState: %d State: %d", - ptr.p->masterData.state.getState(), - ptr.p->slaveState.getState()); - } else { - infoEvent(" State: %d", ptr.p->slaveState.getState()); - } + infoEvent(" State: %d", ptr.p->slaveState.getState()); BackupFilePtr filePtr; for(ptr.p->files.first(filePtr); filePtr.i != RNIL; ptr.p->files.next(filePtr)){ @@ -338,7 +302,7 @@ Backup::execDUMP_STATE_ORD(Signal* signal) infoEvent(" file %d: type: %d open: %d running: %d done: %d scan: %d", filePtr.i, filePtr.p->fileType, filePtr.p->fileOpened, filePtr.p->fileRunning, - filePtr.p->fileDone, filePtr.p->scanRunning); + filePtr.p->fileClosing, filePtr.p->scanRunning); } } } @@ -356,6 +320,17 @@ Backup::execDUMP_STATE_ORD(Signal* signal) infoEvent("PagePool: %d", c_pagePool.getSize()); + + if(signal->getLength() == 2 && signal->theData[1] == 2424) + { + ndbrequire(c_tablePool.getSize() == c_tablePool.getNoOfFree()); + ndbrequire(c_attributePool.getSize() == c_attributePool.getNoOfFree()); + ndbrequire(c_backupPool.getSize() == c_backupPool.getNoOfFree()); + ndbrequire(c_backupFilePool.getSize() == c_backupFilePool.getNoOfFree()); + ndbrequire(c_pagePool.getSize() == c_pagePool.getNoOfFree()); + ndbrequire(c_fragmentPool.getSize() == c_fragmentPool.getNoOfFree()); + ndbrequire(c_triggerPool.getSize() == c_triggerPool.getNoOfFree()); + } } } @@ -511,27 +486,6 @@ const char* triggerNameFormat[] = { "NDB$BACKUP_%d_%d_DELETE" }; -const Backup::State -Backup::validMasterTransitions[] = { - INITIAL, DEFINING, - DEFINING, DEFINED, - DEFINED, STARTED, - STARTED, SCANNING, - SCANNING, STOPPING, - STOPPING, INITIAL, - - DEFINING, ABORTING, - DEFINED, ABORTING, - STARTED, ABORTING, - SCANNING, ABORTING, - STOPPING, ABORTING, - ABORTING, ABORTING, - - DEFINING, INITIAL, - ABORTING, INITIAL, - INITIAL, INITIAL -}; - const Backup::State Backup::validSlaveTransitions[] = { INITIAL, DEFINING, @@ -561,10 +515,6 @@ const Uint32 Backup::validSlaveTransitionsCount = sizeof(Backup::validSlaveTransitions) / sizeof(Backup::State); -const Uint32 -Backup::validMasterTransitionsCount = -sizeof(Backup::validMasterTransitions) / sizeof(Backup::State); - void Backup::CompoundState::setState(State newState){ bool found = false; @@ -578,7 +528,8 @@ Backup::CompoundState::setState(State newState){ break; } } - ndbrequire(found); + + //ndbrequire(found); if (newState == INITIAL) abortState = INITIAL; @@ -647,8 +598,7 @@ Backup::execNODE_FAILREP(Signal* signal) Uint32 theFailedNodes[NodeBitmask::Size]; for (Uint32 i = 0; i < NodeBitmask::Size; i++) theFailedNodes[i] = rep->theNodes[i]; - -// NodeId old_master_node_id = getMasterNodeId(); + c_masterNodeId = new_master_node_id; NodePtr nodePtr; @@ -686,15 +636,24 @@ Backup::execNODE_FAILREP(Signal* signal) } bool -Backup::verifyNodesAlive(const NdbNodeBitmask& aNodeBitMask) +Backup::verifyNodesAlive(BackupRecordPtr ptr, + const NdbNodeBitmask& aNodeBitMask) { + Uint32 version = getNodeInfo(getOwnNodeId()).m_version; for (Uint32 i = 0; i < MAX_NDB_NODES; i++) { jam(); if(aNodeBitMask.get(i)) { if(!c_aliveNodes.get(i)){ jam(); + ptr.p->setErrorCode(AbortBackupOrd::BackupFailureDueToNodeFail); return false; }//if + if(getNodeInfo(i).m_version != version) + { + jam(); + ptr.p->setErrorCode(AbortBackupOrd::IncompatibleVersions); + return false; + } }//if }//for return true; @@ -709,6 +668,10 @@ Backup::checkNodeFail(Signal* signal, ndbrequire( ptr.p->nodes.get(newCoord)); /* just to make sure newCoord * is part of the backup */ + + NdbNodeBitmask mask; + mask.assign(2, theFailedNodes); + /* Update ptr.p->nodes to be up to date with current alive nodes */ NodePtr nodePtr; @@ -730,26 +693,42 @@ Backup::checkNodeFail(Signal* signal, return; // failed node is not part of backup process, safe to continue } - bool doMasterTakeover = false; - if(NodeBitmask::get(theFailedNodes, refToNode(ptr.p->masterRef))){ - jam(); - doMasterTakeover = true; - }; - - if (newCoord == getOwnNodeId()){ - jam(); - if (doMasterTakeover) { - /** - * I'm new master - */ - CRASH_INSERTION((10002)); -#ifdef DEBUG_ABORT - ndbout_c("**** Master Takeover: Node failed: Master id = %u", - refToNode(ptr.p->masterRef)); -#endif - masterTakeOver(signal, ptr); + if(mask.get(refToNode(ptr.p->masterRef))) + { + /** + * Master died...abort + */ + ptr.p->masterRef = reference(); + ptr.p->nodes.clear(); + ptr.p->nodes.set(getOwnNodeId()); + ptr.p->setErrorCode(AbortBackupOrd::BackupFailureDueToNodeFail); + switch(ptr.p->m_gsn){ + case GSN_DEFINE_BACKUP_REQ: + case GSN_START_BACKUP_REQ: + case GSN_BACKUP_FRAGMENT_REQ: + case GSN_STOP_BACKUP_REQ: + // I'm currently processing...reply to self and abort... + ptr.p->masterData.gsn = ptr.p->m_gsn; + ptr.p->masterData.sendCounter = ptr.p->nodes; return; - }//if + case GSN_DEFINE_BACKUP_REF: + case GSN_DEFINE_BACKUP_CONF: + case GSN_START_BACKUP_REF: + case GSN_START_BACKUP_CONF: + case GSN_BACKUP_FRAGMENT_REF: + case GSN_BACKUP_FRAGMENT_CONF: + case GSN_STOP_BACKUP_REF: + case GSN_STOP_BACKUP_CONF: + ptr.p->masterData.gsn = GSN_DEFINE_BACKUP_REQ; + masterAbort(signal, ptr); + return; + case GSN_ABORT_BACKUP_ORD: + // Already aborting + return; + } + } + else if (newCoord == getOwnNodeId()) + { /** * I'm master for this backup */ @@ -759,61 +738,81 @@ Backup::checkNodeFail(Signal* signal, ndbout_c("**** Master: Node failed: Master id = %u", refToNode(ptr.p->masterRef)); #endif - masterAbort(signal, ptr, false); + + Uint32 gsn, len, pos; + ptr.p->nodes.bitANDC(mask); + switch(ptr.p->masterData.gsn){ + case GSN_DEFINE_BACKUP_REQ: + { + DefineBackupRef * ref = (DefineBackupRef*)signal->getDataPtr(); + ref->backupPtr = ptr.i; + ref->backupId = ptr.p->backupId; + ref->errorCode = AbortBackupOrd::BackupFailureDueToNodeFail; + gsn= GSN_DEFINE_BACKUP_REF; + len= DefineBackupRef::SignalLength; + pos= &ref->nodeId - signal->getDataPtr(); + break; + } + case GSN_START_BACKUP_REQ: + { + StartBackupRef * ref = (StartBackupRef*)signal->getDataPtr(); + ref->backupPtr = ptr.i; + ref->backupId = ptr.p->backupId; + ref->errorCode = AbortBackupOrd::BackupFailureDueToNodeFail; + ref->signalNo = ptr.p->masterData.startBackup.signalNo; + gsn= GSN_START_BACKUP_REF; + len= StartBackupRef::SignalLength; + pos= &ref->nodeId - signal->getDataPtr(); + break; + } + case GSN_BACKUP_FRAGMENT_REQ: + { + BackupFragmentRef * ref = (BackupFragmentRef*)signal->getDataPtr(); + ref->backupPtr = ptr.i; + ref->backupId = ptr.p->backupId; + ref->errorCode = AbortBackupOrd::BackupFailureDueToNodeFail; + gsn= GSN_BACKUP_FRAGMENT_REF; + len= BackupFragmentRef::SignalLength; + pos= &ref->nodeId - signal->getDataPtr(); + break; + } + case GSN_STOP_BACKUP_REQ: + { + StopBackupRef * ref = (StopBackupRef*)signal->getDataPtr(); + ref->backupPtr = ptr.i; + ref->backupId = ptr.p->backupId; + ref->errorCode = AbortBackupOrd::BackupFailureDueToNodeFail; + gsn= GSN_STOP_BACKUP_REF; + len= StopBackupRef::SignalLength; + pos= &ref->nodeId - signal->getDataPtr(); + break; + } + case GSN_CREATE_TRIG_REQ: + case GSN_ALTER_TRIG_REQ: + case GSN_WAIT_GCP_REQ: + case GSN_UTIL_SEQUENCE_REQ: + case GSN_UTIL_LOCK_REQ: + case GSN_DROP_TRIG_REQ: + return; + } + + for(Uint32 i = 0; (i = mask.find(i+1)) != NdbNodeBitmask::NotFound; ) + { + signal->theData[pos] = i; + sendSignal(reference(), gsn, signal, len, JBB); +#ifdef DEBUG_ABORT + ndbout_c("sending %d to self from %d", gsn, i); +#endif + } return; }//if - - /** - * If there's a new master, (it's not me) - * but remember who it is - */ - ptr.p->masterRef = calcBackupBlockRef(newCoord); -#ifdef DEBUG_ABORT - ndbout_c("**** Slave: Node failed: Master id = %u", - refToNode(ptr.p->masterRef)); -#endif + /** * I abort myself as slave if not master */ CRASH_INSERTION((10021)); - // slaveAbort(signal, ptr); } -void -Backup::masterTakeOver(Signal* signal, BackupRecordPtr ptr) -{ - ptr.p->masterRef = reference(); - ptr.p->masterData.gsn = MAX_GSN + 1; - - switch(ptr.p->slaveState.getState()){ - case INITIAL: - jam(); - ptr.p->masterData.state.forceState(INITIAL); - break; - case ABORTING: - jam(); - case DEFINING: - jam(); - case DEFINED: - jam(); - case STARTED: - jam(); - case SCANNING: - jam(); - ptr.p->masterData.state.forceState(STARTED); - break; - case STOPPING: - jam(); - case CLEANING: - jam(); - ptr.p->masterData.state.forceState(STOPPING); - break; - default: - ndbrequire(false); - } - masterAbort(signal, ptr, false); -} - void Backup::execINCL_NODEREQ(Signal* signal) { @@ -895,8 +894,8 @@ Backup::execBACKUP_REQ(Signal* signal) ndbrequire(ptr.p->pages.empty()); ndbrequire(ptr.p->tables.isEmpty()); - ptr.p->masterData.state.forceState(INITIAL); - ptr.p->masterData.state.setState(DEFINING); + ptr.p->m_gsn = 0; + ptr.p->errorCode = 0; ptr.p->clientRef = senderRef; ptr.p->clientData = senderData; ptr.p->masterRef = reference(); @@ -905,6 +904,7 @@ Backup::execBACKUP_REQ(Signal* signal) ptr.p->backupKey[0] = 0; ptr.p->backupKey[1] = 0; ptr.p->backupDataLen = 0; + ptr.p->masterData.errorCode = 0; ptr.p->masterData.dropTrig.tableId = RNIL; ptr.p->masterData.alterTrig.tableId = RNIL; @@ -928,7 +928,6 @@ Backup::execUTIL_SEQUENCE_REF(Signal* signal) ndbrequire(ptr.i == RNIL); c_backupPool.getPtr(ptr); ndbrequire(ptr.p->masterData.gsn == GSN_UTIL_SEQUENCE_REQ); - ptr.p->masterData.gsn = 0; sendBackupRef(signal, ptr, BackupRef::SequenceFailure); }//execUTIL_SEQUENCE_REF() @@ -938,8 +937,7 @@ Backup::sendBackupRef(Signal* signal, BackupRecordPtr ptr, Uint32 errorCode) { jam(); sendBackupRef(ptr.p->clientRef, signal, ptr.p->clientData, errorCode); - // ptr.p->masterData.state.setState(INITIAL); - cleanupSlaveResources(ptr); + cleanup(signal, ptr); } void @@ -968,7 +966,8 @@ Backup::execUTIL_SEQUENCE_CONF(Signal* signal) UtilSequenceConf * conf = (UtilSequenceConf*)signal->getDataPtr(); - if(conf->requestType == UtilSequenceReq::Create) { + if(conf->requestType == UtilSequenceReq::Create) + { jam(); sendSTTORRY(signal); // At startup in NDB return; @@ -979,18 +978,20 @@ Backup::execUTIL_SEQUENCE_CONF(Signal* signal) c_backupPool.getPtr(ptr); ndbrequire(ptr.p->masterData.gsn == GSN_UTIL_SEQUENCE_REQ); - ptr.p->masterData.gsn = 0; - if (ptr.p->masterData.state.getState() == ABORTING) { + + if (ptr.p->checkError()) + { jam(); sendBackupRef(signal, ptr, ptr.p->errorCode); return; }//if - if (ERROR_INSERTED(10023)) { - ptr.p->masterData.state.setState(ABORTING); + + if (ERROR_INSERTED(10023)) + { sendBackupRef(signal, ptr, 323); return; }//if - ndbrequire(ptr.p->masterData.state.getState() == DEFINING); + { Uint64 backupId; @@ -1018,7 +1019,6 @@ Backup::defineBackupMutex_locked(Signal* signal, Uint32 ptrI, Uint32 retVal){ c_backupPool.getPtr(ptr); ndbrequire(ptr.p->masterData.gsn == GSN_UTIL_LOCK_REQ); - ptr.p->masterData.gsn = 0; ptr.p->masterData.gsn = GSN_UTIL_LOCK_REQ; Mutex mutex(signal, c_mutexMgr, ptr.p->masterData.m_dictCommitTableMutex); @@ -1040,14 +1040,13 @@ Backup::dictCommitTableMutex_locked(Signal* signal, Uint32 ptrI,Uint32 retVal) c_backupPool.getPtr(ptr); ndbrequire(ptr.p->masterData.gsn == GSN_UTIL_LOCK_REQ); - ptr.p->masterData.gsn = 0; if (ERROR_INSERTED(10031)) { - ptr.p->masterData.state.setState(ABORTING); ptr.p->setErrorCode(331); }//if - if (ptr.p->masterData.state.getState() == ABORTING) { + if (ptr.p->checkError()) + { jam(); /** @@ -1062,13 +1061,11 @@ Backup::dictCommitTableMutex_locked(Signal* signal, Uint32 ptrI,Uint32 retVal) Mutex mutex2(signal, c_mutexMgr, ptr.p->masterData.m_defineBackupMutex); jam(); mutex2.unlock(); // ignore response - + sendBackupRef(signal, ptr, ptr.p->errorCode); return; }//if - ndbrequire(ptr.p->masterData.state.getState() == DEFINING); - sendDefineBackupReq(signal, ptr); } @@ -1078,33 +1075,6 @@ Backup::dictCommitTableMutex_locked(Signal* signal, Uint32 ptrI,Uint32 retVal) * *****************************************************************************/ -void -Backup::sendSignalAllWait(BackupRecordPtr ptr, Uint32 gsn, Signal *signal, - Uint32 signalLength, bool executeDirect) -{ - jam(); - ptr.p->masterData.gsn = gsn; - ptr.p->masterData.sendCounter.clearWaitingFor(); - NodePtr node; - for(c_nodes.first(node); node.i != RNIL; c_nodes.next(node)){ - jam(); - const Uint32 nodeId = node.p->nodeId; - if(node.p->alive && ptr.p->nodes.get(nodeId)){ - jam(); - - ptr.p->masterData.sendCounter.setWaitingFor(nodeId); - - const BlockReference ref = numberToRef(BACKUP, nodeId); - if (!executeDirect || ref != reference()) { - sendSignal(ref, gsn, signal, signalLength, JBB); - }//if - }//if - }//for - if (executeDirect) { - EXECUTE_DIRECT(BACKUP, gsn, signal, signalLength); - } -} - bool Backup::haveAllSignals(BackupRecordPtr ptr, Uint32 gsn, Uint32 nodeId) { @@ -1114,10 +1084,6 @@ Backup::haveAllSignals(BackupRecordPtr ptr, Uint32 gsn, Uint32 nodeId) ndbrequire(ptr.p->masterData.sendCounter.isWaitingFor(nodeId)); ptr.p->masterData.sendCounter.clearWaitingFor(nodeId); - - if (ptr.p->masterData.sendCounter.done()) - ptr.p->masterData.gsn = 0; - return ptr.p->masterData.sendCounter.done(); } @@ -1138,11 +1104,12 @@ Backup::sendDefineBackupReq(Signal *signal, BackupRecordPtr ptr) req->nodes = ptr.p->nodes; req->backupDataLen = ptr.p->backupDataLen; - ptr.p->masterData.errorCode = 0; - ptr.p->okToCleanMaster = false; // master must wait with cleaning to last - sendSignalAllWait(ptr, GSN_DEFINE_BACKUP_REQ, signal, - DefineBackupReq::SignalLength, - true /* do execute direct on oneself */); + ptr.p->masterData.gsn = GSN_DEFINE_BACKUP_REQ; + ptr.p->masterData.sendCounter = ptr.p->nodes; + NodeReceiverGroup rg(BACKUP, ptr.p->nodes); + sendSignal(rg, GSN_DEFINE_BACKUP_REQ, signal, + DefineBackupReq::SignalLength, JBB); + /** * Now send backup data */ @@ -1167,17 +1134,15 @@ Backup::execDEFINE_BACKUP_REF(Signal* signal) jamEntry(); DefineBackupRef* ref = (DefineBackupRef*)signal->getDataPtr(); - + const Uint32 ptrI = ref->backupPtr; - const Uint32 backupId = ref->backupId; - const Uint32 nodeId = refToNode(signal->senderBlockRef()); - + //const Uint32 backupId = ref->backupId; + const Uint32 nodeId = ref->nodeId; + BackupRecordPtr ptr; c_backupPool.getPtr(ptr, ptrI); - - masterAbortCheck(); // macro will do return if ABORTING - ptr.p->masterData.errorCode = ref->errorCode; + ptr.p->setErrorCode(ref->errorCode); defineBackupReply(signal, ptr, nodeId); } @@ -1188,17 +1153,16 @@ Backup::execDEFINE_BACKUP_CONF(Signal* signal) DefineBackupConf* conf = (DefineBackupConf*)signal->getDataPtr(); const Uint32 ptrI = conf->backupPtr; - const Uint32 backupId = conf->backupId; + //const Uint32 backupId = conf->backupId; const Uint32 nodeId = refToNode(signal->senderBlockRef()); BackupRecordPtr ptr; c_backupPool.getPtr(ptr, ptrI); - masterAbortCheck(); // macro will do return if ABORTING - - if (ERROR_INSERTED(10024)) { - ptr.p->masterData.errorCode = 324; - }//if + if (ERROR_INSERTED(10024)) + { + ptr.p->setErrorCode(324); + } defineBackupReply(signal, ptr, nodeId); } @@ -1210,6 +1174,7 @@ Backup::defineBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32 nodeId) jam(); return; } + /** * Unlock mutexes */ @@ -1223,16 +1188,10 @@ Backup::defineBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32 nodeId) jam(); mutex2.unlock(); // ignore response - if(ptr.p->errorCode) { + if(ptr.p->checkError()) + { jam(); - ptr.p->masterData.errorCode = ptr.p->errorCode; - } - - if(ptr.p->masterData.errorCode){ - jam(); - ptr.p->setErrorCode(ptr.p->masterData.errorCode); - sendAbortBackupOrd(signal, ptr, AbortBackupOrd::OkToClean); - masterSendAbortBackup(signal, ptr); + masterAbort(signal, ptr); return; } @@ -1252,7 +1211,6 @@ Backup::defineBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32 nodeId) ptr.p->nodes.copyto(NdbNodeBitmask::Size, signal->theData+3); sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3+NdbNodeBitmask::Size, JBB); - ptr.p->masterData.state.setState(DEFINED); /** * Prepare Trig */ @@ -1286,7 +1244,6 @@ Backup::sendCreateTrig(Signal* signal, { CreateTrigReq * req =(CreateTrigReq *)signal->getDataPtrSend(); - ptr.p->errorCode = 0; ptr.p->masterData.gsn = GSN_CREATE_TRIG_REQ; ptr.p->masterData.sendCounter = 3; ptr.p->masterData.createTrig.tableId = tabPtr.p->tableId; @@ -1395,17 +1352,14 @@ Backup::createTrigReply(Signal* signal, BackupRecordPtr ptr) return; }//if - ptr.p->masterData.gsn = 0; + if (ERROR_INSERTED(10025)) + { + ptr.p->errorCode = 325; + } if(ptr.p->checkError()) { jam(); - masterAbort(signal, ptr, true); - return; - }//if - - if (ERROR_INSERTED(10025)) { - ptr.p->errorCode = 325; - masterAbort(signal, ptr, true); + masterAbort(signal, ptr); return; }//if @@ -1425,10 +1379,7 @@ Backup::createTrigReply(Signal* signal, BackupRecordPtr ptr) /** * Finished with all tables, send StartBackupReq */ - ptr.p->masterData.state.setState(STARTED); - ptr.p->tables.first(tabPtr); - ptr.p->errorCode = 0; ptr.p->masterData.startBackup.signalNo = 0; ptr.p->masterData.startBackup.noOfSignals = (ptr.p->tables.noOfElements() + StartBackupReq::MaxTableTriggers - 1) / @@ -1467,9 +1418,12 @@ Backup::sendStartBackup(Signal* signal, BackupRecordPtr ptr, TablePtr tabPtr) }//for req->noOfTableTriggers = i; - sendSignalAllWait(ptr, GSN_START_BACKUP_REQ, signal, - StartBackupReq::HeaderLength + - (i * StartBackupReq::TableTriggerLength)); + ptr.p->masterData.gsn = GSN_START_BACKUP_REQ; + ptr.p->masterData.sendCounter = ptr.p->nodes; + NodeReceiverGroup rg(BACKUP, ptr.p->nodes); + sendSignal(rg, GSN_START_BACKUP_REQ, signal, + StartBackupReq::HeaderLength + + (i * StartBackupReq::TableTriggerLength), JBB); } void @@ -1479,15 +1433,13 @@ Backup::execSTART_BACKUP_REF(Signal* signal) StartBackupRef* ref = (StartBackupRef*)signal->getDataPtr(); const Uint32 ptrI = ref->backupPtr; - const Uint32 backupId = ref->backupId; + //const Uint32 backupId = ref->backupId; const Uint32 signalNo = ref->signalNo; - const Uint32 nodeId = refToNode(signal->senderBlockRef()); + const Uint32 nodeId = ref->nodeId; BackupRecordPtr ptr; c_backupPool.getPtr(ptr, ptrI); - masterAbortCheck(); // macro will do return if ABORTING - ptr.p->setErrorCode(ref->errorCode); startBackupReply(signal, ptr, nodeId, signalNo); } @@ -1499,15 +1451,13 @@ Backup::execSTART_BACKUP_CONF(Signal* signal) StartBackupConf* conf = (StartBackupConf*)signal->getDataPtr(); const Uint32 ptrI = conf->backupPtr; - const Uint32 backupId = conf->backupId; + //const Uint32 backupId = conf->backupId; const Uint32 signalNo = conf->signalNo; const Uint32 nodeId = refToNode(signal->senderBlockRef()); BackupRecordPtr ptr; c_backupPool.getPtr(ptr, ptrI); - masterAbortCheck(); // macro will do return if ABORTING - startBackupReply(signal, ptr, nodeId, signalNo); } @@ -1524,17 +1474,16 @@ Backup::startBackupReply(Signal* signal, BackupRecordPtr ptr, return; } + if (ERROR_INSERTED(10026)) + { + ptr.p->errorCode = 326; + } + if(ptr.p->checkError()){ jam(); - masterAbort(signal, ptr, true); + masterAbort(signal, ptr); return; } - - if (ERROR_INSERTED(10026)) { - ptr.p->errorCode = 326; - masterAbort(signal, ptr, true); - return; - }//if TablePtr tabPtr; c_tablePool.getPtr(tabPtr, ptr.p->masterData.startBackup.tablePtr); @@ -1566,7 +1515,6 @@ Backup::sendAlterTrig(Signal* signal, BackupRecordPtr ptr) { AlterTrigReq * req =(AlterTrigReq *)signal->getDataPtrSend(); - ptr.p->errorCode = 0; ptr.p->masterData.gsn = GSN_ALTER_TRIG_REQ; ptr.p->masterData.sendCounter = 0; @@ -1608,6 +1556,7 @@ Backup::sendAlterTrig(Signal* signal, BackupRecordPtr ptr) return; }//if ptr.p->masterData.alterTrig.tableId = RNIL; + /** * Finished with all tables */ @@ -1669,11 +1618,9 @@ Backup::alterTrigReply(Signal* signal, BackupRecordPtr ptr) return; }//if - ptr.p->masterData.gsn = 0; - if(ptr.p->checkError()){ jam(); - masterAbort(signal, ptr, true); + masterAbort(signal, ptr); return; }//if @@ -1719,11 +1666,10 @@ Backup::execWAIT_GCP_CONF(Signal* signal){ ndbrequire(ptr.p->masterRef == reference()); ndbrequire(ptr.p->masterData.gsn == GSN_WAIT_GCP_REQ); - ptr.p->masterData.gsn = 0; if(ptr.p->checkError()) { jam(); - masterAbort(signal, ptr, true); + masterAbort(signal, ptr); return; }//if @@ -1731,13 +1677,13 @@ Backup::execWAIT_GCP_CONF(Signal* signal){ jam(); CRASH_INSERTION((10008)); ptr.p->startGCP = gcp; - ptr.p->masterData.state.setState(SCANNING); + ptr.p->masterData.sendCounter= 0; + ptr.p->masterData.gsn = GSN_BACKUP_FRAGMENT_REQ; nextFragment(signal, ptr); } else { jam(); CRASH_INSERTION((10009)); ptr.p->stopGCP = gcp; - ptr.p->masterData.state.setState(STOPPING); sendDropTrig(signal, ptr); // regular dropping of triggers }//if } @@ -1787,6 +1733,7 @@ Backup::nextFragment(Signal* signal, BackupRecordPtr ptr) req->fragmentNo = i; req->count = 0; + ptr.p->masterData.sendCounter++; const BlockReference ref = numberToRef(BACKUP, nodeId); sendSignal(ref, GSN_BACKUP_FRAGMENT_REQ, signal, BackupFragmentReq::SignalLength, JBB); @@ -1824,7 +1771,7 @@ Backup::execBACKUP_FRAGMENT_CONF(Signal* signal) BackupFragmentConf * conf = (BackupFragmentConf*)signal->getDataPtr(); const Uint32 ptrI = conf->backupPtr; - const Uint32 backupId = conf->backupId; + //const Uint32 backupId = conf->backupId; const Uint32 tableId = conf->tableId; const Uint32 fragmentNo = conf->fragmentNo; const Uint32 nodeId = refToNode(signal->senderBlockRef()); @@ -1834,10 +1781,9 @@ Backup::execBACKUP_FRAGMENT_CONF(Signal* signal) BackupRecordPtr ptr; c_backupPool.getPtr(ptr, ptrI); - masterAbortCheck(); // macro will do return if ABORTING - ptr.p->noOfBytes += noOfBytes; ptr.p->noOfRecords += noOfRecords; + ptr.p->masterData.sendCounter--; TablePtr tabPtr; ndbrequire(findTable(ptr, tabPtr, tableId)); @@ -1852,17 +1798,24 @@ Backup::execBACKUP_FRAGMENT_CONF(Signal* signal) fragPtr.p->scanned = 1; fragPtr.p->scanning = 0; - if(ptr.p->checkError()) { - jam(); - masterAbort(signal, ptr, true); - return; - }//if - if (ERROR_INSERTED(10028)) { + if (ERROR_INSERTED(10028)) + { ptr.p->errorCode = 328; - masterAbort(signal, ptr, true); - return; - }//if - nextFragment(signal, ptr); + } + + if(ptr.p->checkError()) + { + if(ptr.p->masterData.sendCounter.done()) + { + jam(); + masterAbort(signal, ptr); + return; + }//if + } + else + { + nextFragment(signal, ptr); + } } void @@ -1874,15 +1827,52 @@ Backup::execBACKUP_FRAGMENT_REF(Signal* signal) BackupFragmentRef * ref = (BackupFragmentRef*)signal->getDataPtr(); const Uint32 ptrI = ref->backupPtr; - const Uint32 backupId = ref->backupId; + //const Uint32 backupId = ref->backupId; + const Uint32 nodeId = ref->nodeId; BackupRecordPtr ptr; c_backupPool.getPtr(ptr, ptrI); - masterAbortCheck(); // macro will do return if ABORTING + TablePtr tabPtr; + ptr.p->tables.first(tabPtr); + for(; tabPtr.i != RNIL; ptr.p->tables.next(tabPtr)) { + jam(); + FragmentPtr fragPtr; + Array & frags = tabPtr.p->fragments; + const Uint32 fragCount = frags.getSize(); + + for(Uint32 i = 0; ifragments.getPtr(fragPtr, i); + if(fragPtr.p->scanning != 0 && nodeId == fragPtr.p->node) + { + jam(); + ndbrequire(fragPtr.p->scanned == 0); + fragPtr.p->scanned = 1; + fragPtr.p->scanning = 0; + goto done; + } + } + } + ndbrequire(false); +done: + ptr.p->masterData.sendCounter--; ptr.p->setErrorCode(ref->errorCode); - masterAbort(signal, ptr, true); + + if(ptr.p->masterData.sendCounter.done()) + { + jam(); + masterAbort(signal, ptr); + return; + }//if + + AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend(); + ord->backupId = ptr.p->backupId; + ord->backupPtr = ptr.i; + ord->requestType = AbortBackupOrd::LogBufferFull; + ord->senderData= ptr.i; + execABORT_BACKUP_ORD(signal); } /***************************************************************************** @@ -1910,15 +1900,7 @@ Backup::sendDropTrig(Signal* signal, BackupRecordPtr ptr) jam(); ptr.p->masterData.dropTrig.tableId = RNIL; - sendAbortBackupOrd(signal, ptr, AbortBackupOrd::OkToClean); - - if(ptr.p->masterData.state.getState() == STOPPING) { - jam(); - sendStopBackup(signal, ptr); - return; - }//if - ndbrequire(ptr.p->masterData.state.getState() == ABORTING); - masterSendAbortBackup(signal, ptr); + sendStopBackup(signal, ptr); }//if } @@ -2010,7 +1992,6 @@ Backup::dropTrigReply(Signal* signal, BackupRecordPtr ptr) return; }//if - ptr.p->masterData.gsn = 0; sendDropTrig(signal, ptr); // recursive next } @@ -2023,14 +2004,23 @@ void Backup::execSTOP_BACKUP_REF(Signal* signal) { jamEntry(); - ndbrequire(0); + + StopBackupRef* ref = (StopBackupRef*)signal->getDataPtr(); + const Uint32 ptrI = ref->backupPtr; + //const Uint32 backupId = ref->backupId; + const Uint32 nodeId = ref->nodeId; + + BackupRecordPtr ptr; + c_backupPool.getPtr(ptr, ptrI); + + ptr.p->setErrorCode(ref->errorCode); + stopBackupReply(signal, ptr, nodeId); } void Backup::sendStopBackup(Signal* signal, BackupRecordPtr ptr) { jam(); - ptr.p->masterData.gsn = GSN_STOP_BACKUP_REQ; StopBackupReq* stop = (StopBackupReq*)signal->getDataPtrSend(); stop->backupPtr = ptr.i; @@ -2038,8 +2028,11 @@ Backup::sendStopBackup(Signal* signal, BackupRecordPtr ptr) stop->startGCP = ptr.p->startGCP; stop->stopGCP = ptr.p->stopGCP; - sendSignalAllWait(ptr, GSN_STOP_BACKUP_REQ, signal, - StopBackupReq::SignalLength); + ptr.p->masterData.gsn = GSN_STOP_BACKUP_REQ; + ptr.p->masterData.sendCounter = ptr.p->nodes; + NodeReceiverGroup rg(BACKUP, ptr.p->nodes); + sendSignal(rg, GSN_STOP_BACKUP_REQ, signal, + StopBackupReq::SignalLength, JBB); } void @@ -2049,14 +2042,12 @@ Backup::execSTOP_BACKUP_CONF(Signal* signal) StopBackupConf* conf = (StopBackupConf*)signal->getDataPtr(); const Uint32 ptrI = conf->backupPtr; - const Uint32 backupId = conf->backupId; + //const Uint32 backupId = conf->backupId; const Uint32 nodeId = refToNode(signal->senderBlockRef()); BackupRecordPtr ptr; c_backupPool.getPtr(ptr, ptrI); - masterAbortCheck(); // macro will do return if ABORTING - ptr.p->noOfLogBytes += conf->noOfLogBytes; ptr.p->noOfLogRecords += conf->noOfLogRecords; @@ -2073,35 +2064,39 @@ Backup::stopBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32 nodeId) return; } - // ptr.p->masterData.state.setState(INITIAL); - - // send backup complete first to slaves so that they know sendAbortBackupOrd(signal, ptr, AbortBackupOrd::BackupComplete); - - BackupCompleteRep * rep = (BackupCompleteRep*)signal->getDataPtrSend(); - rep->backupId = ptr.p->backupId; - rep->senderData = ptr.p->clientData; - rep->startGCP = ptr.p->startGCP; - rep->stopGCP = ptr.p->stopGCP; - rep->noOfBytes = ptr.p->noOfBytes; - rep->noOfRecords = ptr.p->noOfRecords; - rep->noOfLogBytes = ptr.p->noOfLogBytes; - rep->noOfLogRecords = ptr.p->noOfLogRecords; - rep->nodes = ptr.p->nodes; - sendSignal(ptr.p->clientRef, GSN_BACKUP_COMPLETE_REP, signal, - BackupCompleteRep::SignalLength, JBB); - - signal->theData[0] = EventReport::BackupCompleted; - signal->theData[1] = ptr.p->clientRef; - signal->theData[2] = ptr.p->backupId; - signal->theData[3] = ptr.p->startGCP; - signal->theData[4] = ptr.p->stopGCP; - signal->theData[5] = ptr.p->noOfBytes; - signal->theData[6] = ptr.p->noOfRecords; - signal->theData[7] = ptr.p->noOfLogBytes; - signal->theData[8] = ptr.p->noOfLogRecords; - ptr.p->nodes.copyto(NdbNodeBitmask::Size, signal->theData+9); - sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 9+NdbNodeBitmask::Size, JBB); + + if(!ptr.p->checkError()) + { + BackupCompleteRep * rep = (BackupCompleteRep*)signal->getDataPtrSend(); + rep->backupId = ptr.p->backupId; + rep->senderData = ptr.p->clientData; + rep->startGCP = ptr.p->startGCP; + rep->stopGCP = ptr.p->stopGCP; + rep->noOfBytes = ptr.p->noOfBytes; + rep->noOfRecords = ptr.p->noOfRecords; + rep->noOfLogBytes = ptr.p->noOfLogBytes; + rep->noOfLogRecords = ptr.p->noOfLogRecords; + rep->nodes = ptr.p->nodes; + sendSignal(ptr.p->clientRef, GSN_BACKUP_COMPLETE_REP, signal, + BackupCompleteRep::SignalLength, JBB); + + signal->theData[0] = EventReport::BackupCompleted; + signal->theData[1] = ptr.p->clientRef; + signal->theData[2] = ptr.p->backupId; + signal->theData[3] = ptr.p->startGCP; + signal->theData[4] = ptr.p->stopGCP; + signal->theData[5] = ptr.p->noOfBytes; + signal->theData[6] = ptr.p->noOfRecords; + signal->theData[7] = ptr.p->noOfLogBytes; + signal->theData[8] = ptr.p->noOfLogRecords; + ptr.p->nodes.copyto(NdbNodeBitmask::Size, signal->theData+9); + sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 9+NdbNodeBitmask::Size, JBB); + } + else + { + masterAbort(signal, ptr); + } } /***************************************************************************** @@ -2110,199 +2105,96 @@ Backup::stopBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32 nodeId) * *****************************************************************************/ void -Backup::masterAbort(Signal* signal, BackupRecordPtr ptr, bool controlledAbort) +Backup::masterAbort(Signal* signal, BackupRecordPtr ptr) { - if(ptr.p->masterData.state.getState() == ABORTING) { -#ifdef DEBUG_ABORT - ndbout_c("---- Master already aborting"); -#endif - jam(); - return; - } jam(); #ifdef DEBUG_ABORT ndbout_c("************ masterAbort"); #endif - - sendAbortBackupOrd(signal, ptr, AbortBackupOrd::BackupFailure); - if (!ptr.p->checkError()) - ptr.p->errorCode = AbortBackupOrd::BackupFailureDueToNodeFail; - - const State s = ptr.p->masterData.state.getState(); - - ptr.p->masterData.state.setState(ABORTING); - - ndbrequire(s == INITIAL || - s == STARTED || - s == DEFINING || - s == DEFINED || - s == SCANNING || - s == STOPPING || - s == ABORTING); - if(ptr.p->masterData.gsn == GSN_UTIL_SEQUENCE_REQ) { + if(ptr.p->masterData.errorCode != 0) + { jam(); - DEBUG_OUT("masterAbort: gsn = GSN_UTIL_SEQUENCE_REQ"); - //------------------------------------------------------- - // We are waiting for UTIL_SEQUENCE response. We rely on - // this to arrive and check for ABORTING in response. - // No slaves are involved at this point and ABORT simply - // results in BACKUP_REF to client - //------------------------------------------------------- - /** - * Waiting for Sequence Id - * @see execUTIL_SEQUENCE_CONF - */ return; - }//if + } + + BackupAbortRep* rep = (BackupAbortRep*)signal->getDataPtrSend(); + rep->backupId = ptr.p->backupId; + rep->senderData = ptr.p->clientData; + rep->reason = ptr.p->errorCode; + sendSignal(ptr.p->clientRef, GSN_BACKUP_ABORT_REP, signal, + BackupAbortRep::SignalLength, JBB); - if(ptr.p->masterData.gsn == GSN_UTIL_LOCK_REQ) { - jam(); - DEBUG_OUT("masterAbort: gsn = GSN_UTIL_LOCK_REQ"); - //------------------------------------------------------- - // We are waiting for UTIL_LOCK response (mutex). We rely on - // this to arrive and check for ABORTING in response. - // No slaves are involved at this point and ABORT simply - // results in BACKUP_REF to client - //------------------------------------------------------- - /** - * Waiting for lock - * @see execUTIL_LOCK_CONF - */ + signal->theData[0] = EventReport::BackupAborted; + signal->theData[1] = ptr.p->clientRef; + signal->theData[2] = ptr.p->backupId; + signal->theData[3] = ptr.p->errorCode; + sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB); + + ndbrequire(ptr.p->errorCode); + ptr.p->masterData.errorCode = ptr.p->errorCode; + + AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend(); + ord->backupId = ptr.p->backupId; + ord->backupPtr = ptr.i; + ord->senderData= ptr.i; + NodeReceiverGroup rg(BACKUP, ptr.p->nodes); + + switch(ptr.p->masterData.gsn){ + case GSN_DEFINE_BACKUP_REQ: + ord->requestType = AbortBackupOrd::BackupFailure; + sendSignal(rg, GSN_ABORT_BACKUP_ORD, signal, + AbortBackupOrd::SignalLength, JBB); return; - }//if - - /** - * Unlock mutexes only at master - */ - jam(); - Mutex mutex1(signal, c_mutexMgr, ptr.p->masterData.m_dictCommitTableMutex); - jam(); - mutex1.unlock(); // ignore response - - jam(); - Mutex mutex2(signal, c_mutexMgr, ptr.p->masterData.m_defineBackupMutex); - jam(); - mutex2.unlock(); // ignore response - - if (!controlledAbort) { + case GSN_CREATE_TRIG_REQ: + case GSN_START_BACKUP_REQ: + case GSN_ALTER_TRIG_REQ: + case GSN_WAIT_GCP_REQ: + case GSN_BACKUP_FRAGMENT_REQ: jam(); - if (s == DEFINING) { - jam(); -//------------------------------------------------------- -// If we are in the defining phase all work is done by -// slaves. No triggers have been allocated thus slaves -// may free all "Master" resources, let them know... -//------------------------------------------------------- - sendAbortBackupOrd(signal, ptr, AbortBackupOrd::OkToClean); - return; - }//if - if (s == DEFINED) { - jam(); -//------------------------------------------------------- -// DEFINED is the state when triggers are created. We rely -// on that DICT will report create trigger failure in case -// of node failure. Thus no special action is needed here. -// We will check for errorCode != 0 when receiving -// replies on create trigger. -//------------------------------------------------------- - return; - }//if - if(ptr.p->masterData.gsn == GSN_WAIT_GCP_REQ) { - jam(); - DEBUG_OUT("masterAbort: gsn = GSN_WAIT_GCP_REQ"); -//------------------------------------------------------- -// We are waiting for WAIT_GCP response. We rely on -// this to arrive and check for ABORTING in response. -//------------------------------------------------------- - - /** - * Waiting for GCP - * @see execWAIT_GCP_CONF - */ - return; - }//if - - if(ptr.p->masterData.gsn == GSN_ALTER_TRIG_REQ) { - jam(); - DEBUG_OUT("masterAbort: gsn = GSN_ALTER_TRIG_REQ"); -//------------------------------------------------------- -// We are waiting for ALTER_TRIG response. We rely on -// this to arrive and check for ABORTING in response. -//------------------------------------------------------- - - /** - * All triggers haven't been created yet - */ - return; - }//if - - if(ptr.p->masterData.gsn == GSN_DROP_TRIG_REQ) { - jam(); - DEBUG_OUT("masterAbort: gsn = GSN_DROP_TRIG_REQ"); -//------------------------------------------------------- -// We are waiting for DROP_TRIG response. We rely on -// this to arrive and will continue dropping triggers -// until completed. -//------------------------------------------------------- - - /** - * I'm currently dropping the trigger - */ - return; - }//if - }//if - -//------------------------------------------------------- -// If we are waiting for START_BACKUP responses we can -// safely start dropping triggers (state == STARTED). -// We will ignore any START_BACKUP responses after this. -//------------------------------------------------------- - DEBUG_OUT("masterAbort: sendDropTrig"); - sendDropTrig(signal, ptr); // dropping due to error + ptr.p->stopGCP= ptr.p->startGCP + 1; + sendDropTrig(signal, ptr); // dropping due to error + return; + case GSN_UTIL_SEQUENCE_REQ: + case GSN_UTIL_LOCK_REQ: + case GSN_DROP_TRIG_REQ: + ndbrequire(false); + return; + case GSN_STOP_BACKUP_REQ: + return; + } } void -Backup::masterSendAbortBackup(Signal* signal, BackupRecordPtr ptr) +Backup::abort_scan(Signal * signal, BackupRecordPtr ptr) { - if (ptr.p->masterData.state.getState() != ABORTING) { - sendAbortBackupOrd(signal, ptr, AbortBackupOrd::BackupFailure); - ptr.p->masterData.state.setState(ABORTING); + AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend(); + ord->backupId = ptr.p->backupId; + ord->backupPtr = ptr.i; + ord->senderData= ptr.i; + ord->requestType = AbortBackupOrd::AbortScan; + + TablePtr tabPtr; + ptr.p->tables.first(tabPtr); + for(; tabPtr.i != RNIL; ptr.p->tables.next(tabPtr)) { + jam(); + FragmentPtr fragPtr; + Array & frags = tabPtr.p->fragments; + const Uint32 fragCount = frags.getSize(); + + for(Uint32 i = 0; ifragments.getPtr(fragPtr, i); + const Uint32 nodeId = fragPtr.p->node; + if(fragPtr.p->scanning != 0 && ptr.p->nodes.get(nodeId)) { + jam(); + + const BlockReference ref = numberToRef(BACKUP, nodeId); + sendSignal(ref, GSN_ABORT_BACKUP_ORD, signal, + AbortBackupOrd::SignalLength, JBB); + + } + } } - const State s = ptr.p->masterData.state.getAbortState(); - - /** - * First inform to client - */ - if(s == DEFINING) { - jam(); -#ifdef DEBUG_ABORT - ndbout_c("** Abort: sending BACKUP_REF to mgmtsrvr"); -#endif - sendBackupRef(ptr.p->clientRef, signal, ptr.p->clientData, - ptr.p->errorCode); - - } else { - jam(); -#ifdef DEBUG_ABORT - ndbout_c("** Abort: sending BACKUP_ABORT_REP to mgmtsrvr"); -#endif - BackupAbortRep* rep = (BackupAbortRep*)signal->getDataPtrSend(); - rep->backupId = ptr.p->backupId; - rep->senderData = ptr.p->clientData; - rep->reason = ptr.p->errorCode; - sendSignal(ptr.p->clientRef, GSN_BACKUP_ABORT_REP, signal, - BackupAbortRep::SignalLength, JBB); - - signal->theData[0] = EventReport::BackupAborted; - signal->theData[1] = ptr.p->clientRef; - signal->theData[2] = ptr.p->backupId; - signal->theData[3] = ptr.p->errorCode; - sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB); - }//if - - // ptr.p->masterData.state.setState(INITIAL); - - sendAbortBackupOrd(signal, ptr, AbortBackupOrd::BackupFailure); } /***************************************************************************** @@ -2313,26 +2205,17 @@ Backup::masterSendAbortBackup(Signal* signal, BackupRecordPtr ptr) void Backup::defineBackupRef(Signal* signal, BackupRecordPtr ptr, Uint32 errCode) { - if (ptr.p->slaveState.getState() == ABORTING) { - jam(); - return; - } - ptr.p->slaveState.setState(ABORTING); - - if (errCode != 0) { - jam(); - ptr.p->setErrorCode(errCode); - }//if + ptr.p->m_gsn = GSN_DEFINE_BACKUP_REF; + ptr.p->setErrorCode(errCode); ndbrequire(ptr.p->errorCode != 0); - + DefineBackupRef* ref = (DefineBackupRef*)signal->getDataPtrSend(); ref->backupId = ptr.p->backupId; ref->backupPtr = ptr.i; ref->errorCode = ptr.p->errorCode; + ref->nodeId = getOwnNodeId(); sendSignal(ptr.p->masterRef, GSN_DEFINE_BACKUP_REF, signal, DefineBackupRef::SignalLength, JBB); - - closeFiles(signal, ptr); } void @@ -2366,6 +2249,7 @@ Backup::execDEFINE_BACKUP_REQ(Signal* signal) CRASH_INSERTION((10014)); + ptr.p->m_gsn = GSN_DEFINE_BACKUP_REQ; ptr.p->slaveState.forceState(INITIAL); ptr.p->slaveState.setState(DEFINING); ptr.p->errorCode = 0; @@ -2432,7 +2316,7 @@ Backup::execDEFINE_BACKUP_REQ(Signal* signal) files[i].p->tableId = RNIL; files[i].p->backupPtr = ptr.i; files[i].p->filePointer = RNIL; - files[i].p->fileDone = 0; + files[i].p->fileClosing = 0; files[i].p->fileOpened = 0; files[i].p->fileRunning = 0; files[i].p->scanRunning = 0; @@ -2468,17 +2352,14 @@ Backup::execDEFINE_BACKUP_REQ(Signal* signal) ptr.p->logFilePtr = files[1].i; ptr.p->dataFilePtr = files[2].i; - if (!verifyNodesAlive(ptr.p->nodes)) { + if (!verifyNodesAlive(ptr, ptr.p->nodes)) { jam(); defineBackupRef(signal, ptr, DefineBackupRef::Undefined); - // sendBackupRef(signal, ptr, - // ptr.p->errorCode?ptr.p->errorCode:BackupRef::Undefined); return; }//if if (ERROR_INSERTED(10027)) { jam(); defineBackupRef(signal, ptr, 327); - // sendBackupRef(signal, ptr, 327); return; }//if @@ -2546,8 +2427,6 @@ Backup::execLIST_TABLES_CONF(Signal* signal) return; }//if - defineSlaveAbortCheck(); - /** * All tables fetched */ @@ -2679,8 +2558,6 @@ Backup::openFilesReply(Signal* signal, }//if }//for - defineSlaveAbortCheck(); - /** * Did open succeed for all files */ @@ -2810,8 +2687,6 @@ Backup::execGET_TABINFOREF(Signal* signal) BackupRecordPtr ptr; c_backupPool.getPtr(ptr, senderData); - defineSlaveAbortCheck(); - defineBackupRef(signal, ptr, ref->errorCode); } @@ -2833,8 +2708,6 @@ Backup::execGET_TABINFO_CONF(Signal* signal) BackupRecordPtr ptr; c_backupPool.getPtr(ptr, senderData); - defineSlaveAbortCheck(); - SegmentedSectionPtr dictTabInfoPtr; signal->getSection(dictTabInfoPtr, GetTabInfoConf::DICT_TAB_INFO); ndbrequire(dictTabInfoPtr.sz == len); @@ -3047,8 +2920,6 @@ Backup::execDI_FCOUNTCONF(Signal* signal) BackupRecordPtr ptr; c_backupPool.getPtr(ptr, senderData); - defineSlaveAbortCheck(); - TablePtr tabPtr; ndbrequire(findTable(ptr, tabPtr, tableId)); @@ -3127,8 +2998,6 @@ Backup::execDIGETPRIMCONF(Signal* signal) BackupRecordPtr ptr; c_backupPool.getPtr(ptr, senderData); - defineSlaveAbortCheck(); - TablePtr tabPtr; ndbrequire(findTable(ptr, tabPtr, tableId)); @@ -3143,9 +3012,7 @@ Backup::execDIGETPRIMCONF(Signal* signal) void Backup::getFragmentInfoDone(Signal* signal, BackupRecordPtr ptr) { - // Slave must now hold on to master data until - // AbortBackupOrd::OkToClean signal - ptr.p->okToCleanMaster = false; + ptr.p->m_gsn = GSN_DEFINE_BACKUP_CONF; ptr.p->slaveState.setState(DEFINED); DefineBackupConf * conf = (DefineBackupConf*)signal->getDataPtr(); conf->backupPtr = ptr.i; @@ -3169,16 +3036,15 @@ Backup::execSTART_BACKUP_REQ(Signal* signal) StartBackupReq* req = (StartBackupReq*)signal->getDataPtr(); const Uint32 ptrI = req->backupPtr; - const Uint32 backupId = req->backupId; + //const Uint32 backupId = req->backupId; const Uint32 signalNo = req->signalNo; - + BackupRecordPtr ptr; c_backupPool.getPtr(ptr, ptrI); - - slaveAbortCheck(); // macro will do return if ABORTING ptr.p->slaveState.setState(STARTED); - + ptr.p->m_gsn = GSN_START_BACKUP_REQ; + for(Uint32 i = 0; inoOfTableTriggers; i++) { jam(); TablePtr tabPtr; @@ -3191,11 +3057,13 @@ Backup::execSTART_BACKUP_REQ(Signal* signal) TriggerPtr trigPtr; if(!ptr.p->triggers.seizeId(trigPtr, triggerId)) { jam(); + ptr.p->m_gsn = GSN_START_BACKUP_REF; StartBackupRef* ref = (StartBackupRef*)signal->getDataPtrSend(); ref->backupPtr = ptr.i; ref->backupId = ptr.p->backupId; ref->signalNo = signalNo; ref->errorCode = StartBackupRef::FailedToAllocateTriggerRecord; + ref->nodeId = getOwnNodeId(); sendSignal(ptr.p->masterRef, GSN_START_BACKUP_REF, signal, StartBackupRef::SignalLength, JBB); return; @@ -3233,6 +3101,7 @@ Backup::execSTART_BACKUP_REQ(Signal* signal) }//if }//for + ptr.p->m_gsn = GSN_START_BACKUP_CONF; StartBackupConf* conf = (StartBackupConf*)signal->getDataPtrSend(); conf->backupPtr = ptr.i; conf->backupId = ptr.p->backupId; @@ -3255,7 +3124,7 @@ Backup::execBACKUP_FRAGMENT_REQ(Signal* signal) CRASH_INSERTION((10016)); const Uint32 ptrI = req->backupPtr; - const Uint32 backupId = req->backupId; + //const Uint32 backupId = req->backupId; const Uint32 tableId = req->tableId; const Uint32 fragNo = req->fragmentNo; const Uint32 count = req->count; @@ -3266,10 +3135,9 @@ Backup::execBACKUP_FRAGMENT_REQ(Signal* signal) BackupRecordPtr ptr; c_backupPool.getPtr(ptr, ptrI); - slaveAbortCheck(); // macro will do return if ABORTING - ptr.p->slaveState.setState(SCANNING); - + ptr.p->m_gsn = GSN_BACKUP_FRAGMENT_REQ; + /** * Get file */ @@ -3280,7 +3148,7 @@ Backup::execBACKUP_FRAGMENT_REQ(Signal* signal) ndbrequire(filePtr.p->fileOpened == 1); ndbrequire(filePtr.p->fileRunning == 1); ndbrequire(filePtr.p->scanRunning == 0); - ndbrequire(filePtr.p->fileDone == 0); + ndbrequire(filePtr.p->fileClosing == 0); /** * Get table @@ -3350,7 +3218,7 @@ Backup::execBACKUP_FRAGMENT_REQ(Signal* signal) req->transId1 = 0; req->transId2 = (BACKUP << 20) + (getOwnNodeId() << 8); req->clientOpPtr= filePtr.i; - req->batch_size_rows= 16; + req->batch_size_rows= parallelism; req->batch_size_bytes= 0; sendSignal(DBLQH_REF, GSN_SCAN_FRAGREQ, signal, ScanFragReq::SignalLength, JBB); @@ -3572,6 +3440,13 @@ Backup::OperationRecord::newScan() return false; } +bool +Backup::OperationRecord::closeScan() +{ + opNoDone = opNoConf = opLen = 0; + return true; +} + bool Backup::OperationRecord::scanConf(Uint32 noOfOps, Uint32 total_len) { @@ -3600,11 +3475,9 @@ Backup::execSCAN_FRAGREF(Signal* signal) c_backupFilePool.getPtr(filePtr, filePtrI); filePtr.p->errorCode = ref->errorCode; + filePtr.p->scanRunning = 0; - BackupRecordPtr ptr; - c_backupPool.getPtr(ptr, filePtr.p->backupPtr); - - abortFile(signal, ptr, filePtr); + backupFragmentRef(signal, filePtr); } void @@ -3639,9 +3512,11 @@ Backup::fragmentCompleted(Signal* signal, BackupFilePtr filePtr) { jam(); - if(filePtr.p->errorCode != 0){ + if(filePtr.p->errorCode != 0) + { jam(); - abortFileHook(signal, filePtr, true); // Scan completed + filePtr.p->scanRunning = 0; + backupFragmentRef(signal, filePtr); // Scan completed return; }//if @@ -3669,20 +3544,51 @@ Backup::fragmentCompleted(Signal* signal, BackupFilePtr filePtr) sendSignal(ptr.p->masterRef, GSN_BACKUP_FRAGMENT_CONF, signal, BackupFragmentConf::SignalLength, JBB); + ptr.p->m_gsn = GSN_BACKUP_FRAGMENT_CONF; ptr.p->slaveState.setState(STARTED); return; } + +void +Backup::backupFragmentRef(Signal * signal, BackupFilePtr filePtr) +{ + BackupRecordPtr ptr; + c_backupPool.getPtr(ptr, filePtr.p->backupPtr); + + ptr.p->m_gsn = GSN_BACKUP_FRAGMENT_REF; + + BackupFragmentRef * ref = (BackupFragmentRef*)signal->getDataPtrSend(); + ref->backupId = ptr.p->backupId; + ref->backupPtr = ptr.i; + ref->nodeId = getOwnNodeId(); + ref->errorCode = ptr.p->errorCode; + sendSignal(ptr.p->masterRef, GSN_BACKUP_FRAGMENT_REF, signal, + BackupFragmentRef::SignalLength, JBB); +} void Backup::checkScan(Signal* signal, BackupFilePtr filePtr) { - if(filePtr.p->errorCode != 0){ + OperationRecord & op = filePtr.p->operation; + + if(filePtr.p->errorCode != 0) + { jam(); - abortFileHook(signal, filePtr, false); // Scan not completed + + /** + * Close scan + */ + op.closeScan(); + ScanFragNextReq * req = (ScanFragNextReq *)signal->getDataPtrSend(); + req->senderData = filePtr.i; + req->closeFlag = 1; + req->transId1 = 0; + req->transId2 = (BACKUP << 20) + (getOwnNodeId() << 8); + sendSignal(DBLQH_REF, GSN_SCAN_NEXTREQ, signal, + ScanFragNextReq::SignalLength, JBB); return; }//if - - OperationRecord & op = filePtr.p->operation; + if(op.newScan()) { jam(); @@ -3693,8 +3599,28 @@ Backup::checkScan(Signal* signal, BackupFilePtr filePtr) req->transId2 = (BACKUP << 20) + (getOwnNodeId() << 8); req->batch_size_rows= 16; req->batch_size_bytes= 0; - sendSignal(DBLQH_REF, GSN_SCAN_NEXTREQ, signal, - ScanFragNextReq::SignalLength, JBB); + if(ERROR_INSERTED(10032)) + sendSignalWithDelay(DBLQH_REF, GSN_SCAN_NEXTREQ, signal, + 100, ScanFragNextReq::SignalLength); + else if(ERROR_INSERTED(10033)) + { + SET_ERROR_INSERT_VALUE(10032); + sendSignalWithDelay(DBLQH_REF, GSN_SCAN_NEXTREQ, signal, + 10000, ScanFragNextReq::SignalLength); + + BackupRecordPtr ptr; + c_backupPool.getPtr(ptr, filePtr.p->backupPtr); + AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend(); + ord->backupId = ptr.p->backupId; + ord->backupPtr = ptr.i; + ord->requestType = AbortBackupOrd::FileOrScanError; + ord->senderData= ptr.i; + sendSignal(ptr.p->masterRef, GSN_ABORT_BACKUP_ORD, signal, + AbortBackupOrd::SignalLength, JBB); + } + else + sendSignal(DBLQH_REF, GSN_SCAN_NEXTREQ, signal, + ScanFragNextReq::SignalLength, JBB); return; }//if @@ -3718,11 +3644,8 @@ Backup::execFSAPPENDREF(Signal* signal) filePtr.p->fileRunning = 0; filePtr.p->errorCode = errCode; - - BackupRecordPtr ptr; - c_backupPool.getPtr(ptr, filePtr.p->backupPtr); - - abortFile(signal, ptr, filePtr); + + checkFile(signal, filePtr); } void @@ -3738,12 +3661,6 @@ Backup::execFSAPPENDCONF(Signal* signal) BackupFilePtr filePtr; c_backupFilePool.getPtr(filePtr, filePtrI); - - if (ERROR_INSERTED(10029)) { - BackupRecordPtr ptr; - c_backupPool.getPtr(ptr, filePtr.p->backupPtr); - abortFile(signal, ptr, filePtr); - }//if OperationRecord & op = filePtr.p->operation; @@ -3761,30 +3678,25 @@ Backup::checkFile(Signal* signal, BackupFilePtr filePtr) #endif OperationRecord & op = filePtr.p->operation; - + Uint32 * tmp, sz; bool eof; - if(op.dataBuffer.getReadPtr(&tmp, &sz, &eof)) { + if(op.dataBuffer.getReadPtr(&tmp, &sz, &eof)) + { jam(); - if(filePtr.p->errorCode == 0) { - jam(); - FsAppendReq * req = (FsAppendReq *)signal->getDataPtrSend(); - req->filePointer = filePtr.p->filePointer; - req->userPointer = filePtr.i; - req->userReference = reference(); - req->varIndex = 0; - req->offset = tmp - c_startOfPages; - req->size = sz; - - sendSignal(NDBFS_REF, GSN_FSAPPENDREQ, signal, - FsAppendReq::SignalLength, JBA); - return; - } else { - jam(); - if (filePtr.p->scanRunning == 1) - eof = false; - }//if - }//if + jam(); + FsAppendReq * req = (FsAppendReq *)signal->getDataPtrSend(); + req->filePointer = filePtr.p->filePointer; + req->userPointer = filePtr.i; + req->userReference = reference(); + req->varIndex = 0; + req->offset = tmp - c_startOfPages; + req->size = sz; + + sendSignal(NDBFS_REF, GSN_FSAPPENDREQ, signal, + FsAppendReq::SignalLength, JBA); + return; + } if(!eof) { jam(); @@ -3794,9 +3706,7 @@ Backup::checkFile(Signal* signal, BackupFilePtr filePtr) return; }//if - ndbrequire(filePtr.p->fileDone == 1); - - if(sz > 0 && filePtr.p->errorCode == 0) { + if(sz > 0) { jam(); FsAppendReq * req = (FsAppendReq *)signal->getDataPtrSend(); req->filePointer = filePtr.p->filePointer; @@ -3812,6 +3722,7 @@ Backup::checkFile(Signal* signal, BackupFilePtr filePtr) }//if filePtr.p->fileRunning = 0; + filePtr.p->fileClosing = 1; FsCloseReq * req = (FsCloseReq *)signal->getDataPtrSend(); req->filePointer = filePtr.p->filePointer; @@ -3819,64 +3730,11 @@ Backup::checkFile(Signal* signal, BackupFilePtr filePtr) req->userReference = reference(); req->fileFlag = 0; #ifdef DEBUG_ABORT - ndbout_c("***** FSCLOSEREQ filePtr.i = %u", filePtr.i); + ndbout_c("***** a FSCLOSEREQ filePtr.i = %u", filePtr.i); #endif sendSignal(NDBFS_REF, GSN_FSCLOSEREQ, signal, FsCloseReq::SignalLength, JBA); } -void -Backup::abortFile(Signal* signal, BackupRecordPtr ptr, BackupFilePtr filePtr) -{ - jam(); - - if(ptr.p->slaveState.getState() != ABORTING) { - /** - * Inform master of failure - */ - jam(); - ptr.p->slaveState.setState(ABORTING); - ptr.p->setErrorCode(AbortBackupOrd::FileOrScanError); - sendAbortBackupOrdSlave(signal, ptr, AbortBackupOrd::FileOrScanError); - return; - }//if - - - for(ptr.p->files.first(filePtr); - filePtr.i!=RNIL; - ptr.p->files.next(filePtr)){ - jam(); - filePtr.p->errorCode = 1; - }//for - - closeFiles(signal, ptr); -} - -void -Backup::abortFileHook(Signal* signal, BackupFilePtr filePtr, bool scanComplete) -{ - jam(); - - if(!scanComplete) { - jam(); - - ScanFragNextReq * req = (ScanFragNextReq *)signal->getDataPtrSend(); - req->senderData = filePtr.i; - req->closeFlag = 1; - req->transId1 = 0; - req->transId2 = (BACKUP << 20) + (getOwnNodeId() << 8); - sendSignal(DBLQH_REF, GSN_SCAN_NEXTREQ, signal, - ScanFragNextReq::SignalLength, JBB); - return; - }//if - - filePtr.p->scanRunning = 0; - - BackupRecordPtr ptr; - c_backupPool.getPtr(ptr, filePtr.p->backupPtr); - - filePtr.i = RNIL; - abortFile(signal, ptr, filePtr); -} /**************************************************************************** * @@ -3953,27 +3811,30 @@ Backup::execTRIG_ATTRINFO(Signal* signal) { }//if BackupFormat::LogFile::LogEntry * logEntry = trigPtr.p->logEntry; - if(logEntry == 0) { + if(logEntry == 0) + { jam(); Uint32 * dst; FsBuffer & buf = trigPtr.p->operation->dataBuffer; ndbrequire(trigPtr.p->maxRecordSize <= buf.getMaxWrite()); - BackupRecordPtr ptr; - c_backupPool.getPtr(ptr, trigPtr.p->backupPtr); - if(!buf.getWritePtr(&dst, trigPtr.p->maxRecordSize)) { + if(ERROR_INSERTED(10030) || + !buf.getWritePtr(&dst, trigPtr.p->maxRecordSize)) + { jam(); + BackupRecordPtr ptr; + c_backupPool.getPtr(ptr, trigPtr.p->backupPtr); trigPtr.p->errorCode = AbortBackupOrd::LogBufferFull; - sendAbortBackupOrdSlave(signal, ptr, AbortBackupOrd::LogBufferFull); + AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend(); + ord->backupId = ptr.p->backupId; + ord->backupPtr = ptr.i; + ord->requestType = AbortBackupOrd::LogBufferFull; + ord->senderData= ptr.i; + sendSignal(ptr.p->masterRef, GSN_ABORT_BACKUP_ORD, signal, + AbortBackupOrd::SignalLength, JBB); return; }//if - if(trigPtr.p->operation->noOfBytes > 123 && ERROR_INSERTED(10030)) { - jam(); - trigPtr.p->errorCode = AbortBackupOrd::LogBufferFull; - sendAbortBackupOrdSlave(signal, ptr, AbortBackupOrd::LogBufferFull); - return; - }//if - + logEntry = (BackupFormat::LogFile::LogEntry *)dst; trigPtr.p->logEntry = logEntry; logEntry->Length = 0; @@ -4015,9 +3876,10 @@ Backup::execFIRE_TRIG_ORD(Signal* signal) BackupRecordPtr ptr; c_backupPool.getPtr(ptr, trigPtr.p->backupPtr); - if(gci != ptr.p->currGCP) { + if(gci != ptr.p->currGCP) + { jam(); - + trigPtr.p->logEntry->TriggerEvent = htonl(trigPtr.p->event | 0x10000); trigPtr.p->logEntry->Data[len] = htonl(gci); len ++; @@ -4035,20 +3897,6 @@ Backup::execFIRE_TRIG_ORD(Signal* signal) trigPtr.p->operation->noOfRecords += 1; } -void -Backup::sendAbortBackupOrdSlave(Signal* signal, BackupRecordPtr ptr, - Uint32 requestType) -{ - jam(); - AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend(); - ord->backupId = ptr.p->backupId; - ord->backupPtr = ptr.i; - ord->requestType = requestType; - ord->senderData= ptr.i; - sendSignal(ptr.p->masterRef, GSN_ABORT_BACKUP_ORD, signal, - AbortBackupOrd::SignalLength, JBB); -} - void Backup::sendAbortBackupOrd(Signal* signal, BackupRecordPtr ptr, Uint32 requestType) @@ -4085,7 +3933,7 @@ Backup::execSTOP_BACKUP_REQ(Signal* signal) CRASH_INSERTION((10020)); const Uint32 ptrI = req->backupPtr; - const Uint32 backupId = req->backupId; + //const Uint32 backupId = req->backupId; const Uint32 startGCP = req->startGCP; const Uint32 stopGCP = req->stopGCP; @@ -4101,7 +3949,7 @@ Backup::execSTOP_BACKUP_REQ(Signal* signal) c_backupPool.getPtr(ptr, ptrI); ptr.p->slaveState.setState(STOPPING); - slaveAbortCheck(); // macro will do return if ABORTING + ptr.p->m_gsn = GSN_STOP_BACKUP_REQ; /** * Insert footers @@ -4140,12 +3988,6 @@ Backup::execSTOP_BACKUP_REQ(Signal* signal) void Backup::closeFiles(Signal* sig, BackupRecordPtr ptr) { - if (ptr.p->closingFiles) { - jam(); - return; - } - ptr.p->closingFiles = true; - /** * Close all files */ @@ -4161,12 +4003,12 @@ Backup::closeFiles(Signal* sig, BackupRecordPtr ptr) jam(); openCount++; - if(filePtr.p->fileDone == 1){ + if(filePtr.p->fileClosing == 1){ jam(); continue; }//if - filePtr.p->fileDone = 1; + filePtr.p->fileClosing = 1; if(filePtr.p->fileRunning == 1){ jam(); @@ -4183,7 +4025,7 @@ Backup::closeFiles(Signal* sig, BackupRecordPtr ptr) req->userReference = reference(); req->fileFlag = 0; #ifdef DEBUG_ABORT - ndbout_c("***** FSCLOSEREQ filePtr.i = %u", filePtr.i); + ndbout_c("***** b FSCLOSEREQ filePtr.i = %u", filePtr.i); #endif sendSignal(NDBFS_REF, GSN_FSCLOSEREQ, sig, FsCloseReq::SignalLength, JBA); @@ -4210,11 +4052,6 @@ Backup::execFSCLOSEREF(Signal* signal) BackupRecordPtr ptr; c_backupPool.getPtr(ptr, filePtr.p->backupPtr); - /** - * This should only happen during abort of backup - */ - ndbrequire(ptr.p->slaveState.getState() == ABORTING); - filePtr.p->fileOpened = 1; FsConf * conf = (FsConf*)signal->getDataPtr(); conf->userPointer = filePtrI; @@ -4237,7 +4074,7 @@ Backup::execFSCLOSECONF(Signal* signal) ndbout_c("***** FSCLOSECONF filePtrI = %u", filePtrI); #endif - ndbrequire(filePtr.p->fileDone == 1); + ndbrequire(filePtr.p->fileClosing == 1); ndbrequire(filePtr.p->fileOpened == 1); ndbrequire(filePtr.p->fileRunning == 0); ndbrequire(filePtr.p->scanRunning == 0); @@ -4265,25 +4102,20 @@ Backup::closeFilesDone(Signal* signal, BackupRecordPtr ptr) { jam(); - if(ptr.p->slaveState.getState() == STOPPING) { - jam(); - BackupFilePtr filePtr; - ptr.p->files.getPtr(filePtr, ptr.p->logFilePtr); - - StopBackupConf* conf = (StopBackupConf*)signal->getDataPtrSend(); - conf->backupId = ptr.p->backupId; - conf->backupPtr = ptr.i; - conf->noOfLogBytes = filePtr.p->operation.noOfBytes; - conf->noOfLogRecords = filePtr.p->operation.noOfRecords; - sendSignal(ptr.p->masterRef, GSN_STOP_BACKUP_CONF, signal, - StopBackupConf::SignalLength, JBB); - - ptr.p->slaveState.setState(CLEANING); - return; - }//if + jam(); + BackupFilePtr filePtr; + ptr.p->files.getPtr(filePtr, ptr.p->logFilePtr); - ndbrequire(ptr.p->slaveState.getState() == ABORTING); - removeBackup(signal, ptr); + StopBackupConf* conf = (StopBackupConf*)signal->getDataPtrSend(); + conf->backupId = ptr.p->backupId; + conf->backupPtr = ptr.i; + conf->noOfLogBytes = filePtr.p->operation.noOfBytes; + conf->noOfLogRecords = filePtr.p->operation.noOfRecords; + sendSignal(ptr.p->masterRef, GSN_STOP_BACKUP_CONF, signal, + StopBackupConf::SignalLength, JBB); + + ptr.p->m_gsn = GSN_STOP_BACKUP_CONF; + ptr.p->slaveState.setState(CLEANING); } /***************************************************************************** @@ -4291,57 +4123,6 @@ Backup::closeFilesDone(Signal* signal, BackupRecordPtr ptr) * Slave functionallity: Abort backup * *****************************************************************************/ -void -Backup::removeBackup(Signal* signal, BackupRecordPtr ptr) -{ - jam(); - - FsRemoveReq * req = (FsRemoveReq *)signal->getDataPtrSend(); - req->userReference = reference(); - req->userPointer = ptr.i; - req->directory = 1; - req->ownDirectory = 1; - FsOpenReq::setVersion(req->fileNumber, 2); - FsOpenReq::setSuffix(req->fileNumber, FsOpenReq::S_CTL); - FsOpenReq::v2_setSequence(req->fileNumber, ptr.p->backupId); - FsOpenReq::v2_setNodeId(req->fileNumber, getOwnNodeId()); - sendSignal(NDBFS_REF, GSN_FSREMOVEREQ, signal, - FsRemoveReq::SignalLength, JBA); -} - -void -Backup::execFSREMOVEREF(Signal* signal) -{ - jamEntry(); - ndbrequire(0); -} - -void -Backup::execFSREMOVECONF(Signal* signal){ - jamEntry(); - - FsConf * conf = (FsConf*)signal->getDataPtr(); - const Uint32 ptrI = conf->userPointer; - - /** - * Get backup record - */ - BackupRecordPtr ptr; - c_backupPool.getPtr(ptr, ptrI); - - ndbrequire(ptr.p->slaveState.getState() == ABORTING); - if (ptr.p->masterRef == reference()) { - if (ptr.p->masterData.state.getAbortState() == DEFINING) { - jam(); - sendBackupRef(signal, ptr, ptr.p->errorCode); - return; - } else { - jam(); - }//if - }//if - cleanupSlaveResources(ptr); -} - /***************************************************************************** * * Slave functionallity: Abort backup @@ -4394,8 +4175,7 @@ Backup::execABORT_BACKUP_ORD(Signal* signal) if (c_backupPool.findId(senderData)) { jam(); c_backupPool.getPtr(ptr, senderData); - } else { // TODO might be abort sent to not master, - // or master aborting too early + } else { jam(); #ifdef DEBUG_ABORT ndbout_c("Backup: abort request type=%u on id=%u,%u not found", @@ -4405,15 +4185,15 @@ Backup::execABORT_BACKUP_ORD(Signal* signal) } }//if + ptr.p->m_gsn = GSN_ABORT_BACKUP_ORD; const bool isCoordinator = (ptr.p->masterRef == reference()); - + bool ok = false; switch(requestType){ /** * Requests sent to master */ - case AbortBackupOrd::ClientAbort: jam(); // fall through @@ -4422,113 +4202,61 @@ Backup::execABORT_BACKUP_ORD(Signal* signal) // fall through case AbortBackupOrd::FileOrScanError: jam(); - if(ptr.p->masterData.state.getState() == ABORTING) { -#ifdef DEBUG_ABORT - ndbout_c("---- Already aborting"); -#endif - jam(); - return; - } + ndbrequire(isCoordinator); ptr.p->setErrorCode(requestType); - ndbrequire(isCoordinator); // Sent from slave to coordinator - masterAbort(signal, ptr, false); + if(ptr.p->masterData.gsn == GSN_BACKUP_FRAGMENT_REQ) + { + /** + * Only scans are actively aborted + */ + abort_scan(signal, ptr); + } return; - - /** - * Info sent to slave - */ - - case AbortBackupOrd::OkToClean: - jam(); - cleanupMasterResources(ptr); - return; - + /** * Requests sent to slave */ - + case AbortBackupOrd::AbortScan: + jam(); + ptr.p->setErrorCode(requestType); + return; + case AbortBackupOrd::BackupComplete: jam(); - if (ptr.p->slaveState.getState() == CLEANING) { // TODO what if state is - // not CLEANING? - jam(); - cleanupSlaveResources(ptr); - }//if + cleanup(signal, ptr); return; - break; - case AbortBackupOrd::BackupFailureDueToNodeFail: - jam(); - ok = true; - if (ptr.p->errorCode != 0) - ptr.p->setErrorCode(requestType); - break; case AbortBackupOrd::BackupFailure: - jam(); - ok = true; - break; + case AbortBackupOrd::BackupFailureDueToNodeFail: + case AbortBackupOrd::OkToClean: + case AbortBackupOrd::IncompatibleVersions: +#ifndef VM_TRACE + default: +#endif + ptr.p->setErrorCode(requestType); + ok= true; } ndbrequire(ok); - /** - * Slave abort - */ - slaveAbort(signal, ptr); -} - -void -Backup::slaveAbort(Signal* signal, BackupRecordPtr ptr) -{ - if(ptr.p->slaveState.getState() == ABORTING) { -#ifdef DEBUG_ABORT - ndbout_c("---- Slave already aborting"); -#endif - jam(); - return; + Uint32 ref= ptr.p->masterRef; + ptr.p->masterRef = reference(); + ptr.p->nodes.clear(); + ptr.p->nodes.set(getOwnNodeId()); + + if(ref == reference()) + { + ptr.p->stopGCP= ptr.p->startGCP + 1; + sendDropTrig(signal, ptr); } -#ifdef DEBUG_ABORT - ndbout_c("************* slaveAbort"); -#endif - - State slaveState = ptr.p->slaveState.getState(); - ptr.p->slaveState.setState(ABORTING); - switch(slaveState) { - case DEFINING: - jam(); - return; -//------------------------------------------ -// Will watch for the abort at various places -// in the defining phase. -//------------------------------------------ - case ABORTING: - jam(); - //Fall through - case DEFINED: - jam(); - //Fall through - case STOPPING: - jam(); + else + { + ptr.p->masterData.gsn = GSN_STOP_BACKUP_REQ; + ptr.p->masterData.sendCounter.clearWaitingFor(); + ptr.p->masterData.sendCounter.setWaitingFor(getOwnNodeId()); closeFiles(signal, ptr); - return; - case STARTED: - jam(); - //Fall through - case SCANNING: - jam(); - BackupFilePtr filePtr; - filePtr.i = RNIL; - abortFile(signal, ptr, filePtr); - return; - case CLEANING: - jam(); - cleanupSlaveResources(ptr); - return; - case INITIAL: - jam(); - ndbrequire(false); - return; } } + void Backup::dumpUsedResources() { @@ -4576,12 +4304,8 @@ Backup::dumpUsedResources() } void -Backup::cleanupMasterResources(BackupRecordPtr ptr) +Backup::cleanup(Signal* signal, BackupRecordPtr ptr) { -#ifdef DEBUG_ABORT - ndbout_c("******** Cleanup Master Resources *********"); - ndbout_c("backupId = %u, errorCode = %u", ptr.p->backupId, ptr.p->errorCode); -#endif TablePtr tabPtr; for(ptr.p->tables.first(tabPtr); tabPtr.i != RNIL;ptr.p->tables.next(tabPtr)) @@ -4601,20 +4325,6 @@ Backup::cleanupMasterResources(BackupRecordPtr ptr) tabPtr.p->triggerIds[j] = ILLEGAL_TRIGGER_ID; }//for }//for - ptr.p->tables.release(); - ptr.p->triggers.release(); - ptr.p->okToCleanMaster = true; - - cleanupFinalResources(ptr); -} - -void -Backup::cleanupSlaveResources(BackupRecordPtr ptr) -{ -#ifdef DEBUG_ABORT - ndbout_c("******** Clean Up Slave Resources*********"); - ndbout_c("backupId = %u, errorCode = %u", ptr.p->backupId, ptr.p->errorCode); -#endif BackupFilePtr filePtr; for(ptr.p->files.first(filePtr); @@ -4626,35 +4336,65 @@ Backup::cleanupSlaveResources(BackupRecordPtr ptr) ndbrequire(filePtr.p->scanRunning == 0); filePtr.p->pages.release(); }//for - ptr.p->files.release(); - cleanupFinalResources(ptr); + ptr.p->files.release(); + ptr.p->tables.release(); + ptr.p->triggers.release(); + + ptr.p->tables.release(); + ptr.p->triggers.release(); + ptr.p->pages.release(); + ptr.p->backupId = ~0; + + if(ptr.p->checkError()) + removeBackup(signal, ptr); + else + c_backups.release(ptr); +} + + +void +Backup::removeBackup(Signal* signal, BackupRecordPtr ptr) +{ + jam(); + + FsRemoveReq * req = (FsRemoveReq *)signal->getDataPtrSend(); + req->userReference = reference(); + req->userPointer = ptr.i; + req->directory = 1; + req->ownDirectory = 1; + FsOpenReq::setVersion(req->fileNumber, 2); + FsOpenReq::setSuffix(req->fileNumber, FsOpenReq::S_CTL); + FsOpenReq::v2_setSequence(req->fileNumber, ptr.p->backupId); + FsOpenReq::v2_setNodeId(req->fileNumber, getOwnNodeId()); + sendSignal(NDBFS_REF, GSN_FSREMOVEREQ, signal, + FsRemoveReq::SignalLength, JBA); } void -Backup::cleanupFinalResources(BackupRecordPtr ptr) +Backup::execFSREMOVEREF(Signal* signal) { -#ifdef DEBUG_ABORT - ndbout_c("******** Clean Up Final Resources*********"); - ndbout_c("backupId = %u, errorCode = %u", ptr.p->backupId, ptr.p->errorCode); -#endif + jamEntry(); + FsRef * ref = (FsRef*)signal->getDataPtr(); + const Uint32 ptrI = ref->userPointer; - // if (!ptr.p->tables.empty() || !ptr.p->files.empty()) { - if (!ptr.p->okToCleanMaster || !ptr.p->files.empty()) { - jam(); -#ifdef DEBUG_ABORT - ndbout_c("******** Waiting to do final cleanup"); -#endif - return; - } - ptr.p->pages.release(); - ptr.p->masterData.state.setState(INITIAL); - ptr.p->slaveState.setState(INITIAL); - ptr.p->backupId = 0; - - ptr.p->closingFiles = false; - ptr.p->okToCleanMaster = true; - - c_backups.release(ptr); - // ndbrequire(false); + FsConf * conf = (FsConf*)signal->getDataPtr(); + conf->userPointer = ptrI; + execFSREMOVECONF(signal); } + +void +Backup::execFSREMOVECONF(Signal* signal){ + jamEntry(); + + FsConf * conf = (FsConf*)signal->getDataPtr(); + const Uint32 ptrI = conf->userPointer; + + /** + * Get backup record + */ + BackupRecordPtr ptr; + c_backupPool.getPtr(ptr, ptrI); + c_backups.release(ptr); +} + diff --git a/ndb/src/kernel/blocks/backup/Backup.hpp b/ndb/src/kernel/blocks/backup/Backup.hpp index 1a5d6c7a925..7bcea5655b4 100644 --- a/ndb/src/kernel/blocks/backup/Backup.hpp +++ b/ndb/src/kernel/blocks/backup/Backup.hpp @@ -232,6 +232,7 @@ public: */ bool newScan(); bool scanConf(Uint32 noOfOps, Uint32 opLen); + bool closeScan(); /** * Per record @@ -330,7 +331,7 @@ public: Uint8 fileOpened; Uint8 fileRunning; - Uint8 fileDone; + Uint8 fileClosing; Uint8 scanRunning; }; typedef Ptr BackupFilePtr; @@ -403,13 +404,11 @@ public: ArrayPool & trp) : slaveState(b, validSlaveTransitions, validSlaveTransitionsCount,1) , tables(tp), triggers(trp), files(bp), pages(pp) - , masterData(b, validMasterTransitions, validMasterTransitionsCount) - , backup(b) - { - closingFiles = false; - okToCleanMaster = true; - } + , masterData(b), backup(b) + { + } + Uint32 m_gsn; CompoundState slaveState; Uint32 clientRef; @@ -420,9 +419,6 @@ public: Uint32 errorCode; NdbNodeBitmask nodes; - bool okToCleanMaster; - bool closingFiles; - Uint64 noOfBytes; Uint64 noOfRecords; Uint64 noOfLogBytes; @@ -444,15 +440,13 @@ public: SimpleProperties props;// Used for (un)packing backup request struct MasterData { - MasterData(Backup & b, const State valid[], Uint32 count) - : state(b, valid, count, 0) - { - } + MasterData(Backup & b) + { + } MutexHandle2 m_defineBackupMutex; MutexHandle2 m_dictCommitTableMutex; Uint32 gsn; - CompoundState state; SignalCounter sendCounter; Uint32 errorCode; struct { @@ -557,7 +551,8 @@ public: void stopBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32 nodeId); void defineBackupRef(Signal*, BackupRecordPtr, Uint32 errCode = 0); - + void backupFragmentRef(Signal * signal, BackupFilePtr filePtr); + void nextFragment(Signal*, BackupRecordPtr); void sendCreateTrig(Signal*, BackupRecordPtr ptr, TablePtr tabPtr); @@ -578,14 +573,14 @@ public: void sendAbortBackupOrd(Signal* signal, BackupRecordPtr ptr, Uint32 errCode); void sendAbortBackupOrdSlave(Signal* signal, BackupRecordPtr ptr, Uint32 errCode); - void masterAbort(Signal*, BackupRecordPtr ptr, bool controlledAbort); + void masterAbort(Signal*, BackupRecordPtr ptr); void masterSendAbortBackup(Signal*, BackupRecordPtr ptr); void slaveAbort(Signal*, BackupRecordPtr ptr); void abortFile(Signal* signal, BackupRecordPtr ptr, BackupFilePtr filePtr); void abortFileHook(Signal* signal, BackupFilePtr filePtr, bool scanDone); - bool verifyNodesAlive(const NdbNodeBitmask& aNodeBitMask); + bool verifyNodesAlive(BackupRecordPtr, const NdbNodeBitmask& aNodeBitMask); bool checkAbort(BackupRecordPtr ptr); void checkNodeFail(Signal* signal, BackupRecordPtr ptr, @@ -603,9 +598,8 @@ public: void sendBackupRef(BlockReference ref, Signal *signal, Uint32 senderData, Uint32 errorCode); void dumpUsedResources(); - void cleanupMasterResources(BackupRecordPtr ptr); - void cleanupSlaveResources(BackupRecordPtr ptr); - void cleanupFinalResources(BackupRecordPtr ptr); + void cleanup(Signal*, BackupRecordPtr ptr); + void abort_scan(Signal*, BackupRecordPtr ptr); void removeBackup(Signal*, BackupRecordPtr ptr); void sendSTTORRY(Signal*); diff --git a/ndb/src/kernel/blocks/backup/Backup.txt b/ndb/src/kernel/blocks/backup/Backup.txt index ee5e02bb549..73942c6ebdc 100644 --- a/ndb/src/kernel/blocks/backup/Backup.txt +++ b/ndb/src/kernel/blocks/backup/Backup.txt @@ -341,3 +341,28 @@ start backup (ERROR_INSERTED(10022))) { if (ERROR_INSERTED(10029)) { if(trigPtr.p->operation->noOfBytes > 123 && ERROR_INSERTED(10030)) { + +----- XXX --- + +DEFINE_BACKUP_REF -> + ABORT_BACKUP_ORD(no reply) when all DEFINE_BACKUP replies has arrived + +START_BACKUP_REF + ABORT_BACKUP_ORD(no reply) when all START_BACKUP_ replies has arrived + +BACKUP_FRAGMENT_REF + ABORT_BACKUP_ORD(reply) directly to all nodes running BACKUP_FRAGMENT + + When all nodes has replied BACKUP_FRAGMENT + ABORT_BACKUP_ORD(no reply) + +STOP_BACKUP_REF + ABORT_BACKUP_ORD(no reply) when all STOP_BACKUP_ replies has arrived + +NF_COMPLETE_REP + slave dies + master sends OUTSTANDING_REF to self + slave does nothing + + master dies + slave elects self as master and sets only itself as participant diff --git a/ndb/src/kernel/blocks/backup/BackupInit.cpp b/ndb/src/kernel/blocks/backup/BackupInit.cpp index 08fa089a9c0..eae72f43db5 100644 --- a/ndb/src/kernel/blocks/backup/BackupInit.cpp +++ b/ndb/src/kernel/blocks/backup/BackupInit.cpp @@ -175,7 +175,7 @@ Backup::Backup(const Configuration & conf) : addRecSignal(GSN_START_BACKUP_CONF, &Backup::execSTART_BACKUP_CONF); addRecSignal(GSN_BACKUP_FRAGMENT_REQ, &Backup::execBACKUP_FRAGMENT_REQ); - //addRecSignal(GSN_BACKUP_FRAGMENT_REF, &Backup::execBACKUP_FRAGMENT_REF); + addRecSignal(GSN_BACKUP_FRAGMENT_REF, &Backup::execBACKUP_FRAGMENT_REF); addRecSignal(GSN_BACKUP_FRAGMENT_CONF, &Backup::execBACKUP_FRAGMENT_CONF); addRecSignal(GSN_STOP_BACKUP_REQ, &Backup::execSTOP_BACKUP_REQ); diff --git a/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp b/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp index dfae180ae71..0274ef4af3e 100644 --- a/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp +++ b/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp @@ -126,6 +126,7 @@ Cmvmi::Cmvmi(const Configuration & conf) : } setNodeInfo(getOwnNodeId()).m_connected = true; + setNodeInfo(getOwnNodeId()).m_version = ndbGetOwnVersion(); } Cmvmi::~Cmvmi() diff --git a/ndb/src/kernel/blocks/dbacc/Dbacc.hpp b/ndb/src/kernel/blocks/dbacc/Dbacc.hpp index 64b947b5462..246afc5ceb8 100644 --- a/ndb/src/kernel/blocks/dbacc/Dbacc.hpp +++ b/ndb/src/kernel/blocks/dbacc/Dbacc.hpp @@ -1022,7 +1022,7 @@ private: Uint32 placeReadInLockQueue(Signal* signal); void placeSerialQueueRead(Signal* signal); void checkOnlyReadEntry(Signal* signal); - void getNoParallelTransaction(Signal* signal); + Uint32 getNoParallelTransaction(const Operationrec*); void moveLastParallelQueue(Signal* signal); void moveLastParallelQueueWrite(Signal* signal); Uint32 placeWriteInLockQueue(Signal* signal); @@ -1100,6 +1100,8 @@ private: Uint32 executeNextOperation(Signal* signal); void releaselock(Signal* signal); void takeOutFragWaitQue(Signal* signal); + void check_lock_upgrade(Signal* signal, OperationrecPtr lock_owner, + OperationrecPtr release_op); void allocOverflowPage(Signal* signal); bool getrootfragmentrec(Signal* signal, RootfragmentrecPtr&, Uint32 fragId); void insertLockOwnersList(Signal* signal, const OperationrecPtr&); @@ -1263,7 +1265,6 @@ private: OperationrecPtr mlpqOperPtr; OperationrecPtr queOperPtr; OperationrecPtr readWriteOpPtr; - OperationrecPtr tgnptMainOpPtr; Uint32 cfreeopRec; Uint32 coprecsize; /* --------------------------------------------------------------------------------- */ @@ -1514,7 +1515,6 @@ private: Uint32 turlIndex; Uint32 tlfrTmp1; Uint32 tlfrTmp2; - Uint32 tgnptNrTransaction; Uint32 tudqeIndex; Uint32 tscanTrid1; Uint32 tscanTrid2; diff --git a/ndb/src/kernel/blocks/dbacc/DbaccMain.cpp b/ndb/src/kernel/blocks/dbacc/DbaccMain.cpp index 17c5a31cbed..d566639489c 100644 --- a/ndb/src/kernel/blocks/dbacc/DbaccMain.cpp +++ b/ndb/src/kernel/blocks/dbacc/DbaccMain.cpp @@ -1936,9 +1936,7 @@ void Dbacc::insertelementLab(Signal* signal) /* --------------------------------------------------------------------------------- */ Uint32 Dbacc::placeReadInLockQueue(Signal* signal) { - tgnptMainOpPtr = queOperPtr; - getNoParallelTransaction(signal); - if (tgnptNrTransaction == 1) { + if (getNoParallelTransaction(queOperPtr.p) == 1) { if ((queOperPtr.p->transId1 == operationRecPtr.p->transId1) && (queOperPtr.p->transId2 == operationRecPtr.p->transId2)) { /* --------------------------------------------------------------------------------- */ @@ -2021,9 +2019,7 @@ void Dbacc::placeSerialQueueRead(Signal* signal) checkOnlyReadEntry(signal); return; }//if - tgnptMainOpPtr = readWriteOpPtr; - getNoParallelTransaction(signal); - if (tgnptNrTransaction == 1) { + if (getNoParallelTransaction(readWriteOpPtr.p) == 1) { jam(); /* --------------------------------------------------------------------------------- */ /* THERE WAS ONLY ONE TRANSACTION INVOLVED IN THE PARALLEL QUEUE. IF THIS IS OUR */ @@ -2104,24 +2100,23 @@ void Dbacc::checkOnlyReadEntry(Signal* signal) /* --------------------------------------------------------------------------------- */ /* GET_NO_PARALLEL_TRANSACTION */ /* --------------------------------------------------------------------------------- */ -void Dbacc::getNoParallelTransaction(Signal* signal) +Uint32 +Dbacc::getNoParallelTransaction(const Operationrec * op) { - OperationrecPtr tnptOpPtr; - - tgnptNrTransaction = 1; - tnptOpPtr.i = tgnptMainOpPtr.p->nextParallelQue; - while ((tnptOpPtr.i != RNIL) && - (tgnptNrTransaction == 1)) { + OperationrecPtr tmp; + + tmp.i= op->nextParallelQue; + Uint32 transId[2] = { op->transId1, op->transId2 }; + while (tmp.i != RNIL) + { jam(); - ptrCheckGuard(tnptOpPtr, coprecsize, operationrec); - if ((tnptOpPtr.p->transId1 == tgnptMainOpPtr.p->transId1) && - (tnptOpPtr.p->transId2 == tgnptMainOpPtr.p->transId2)) { - tnptOpPtr.i = tnptOpPtr.p->nextParallelQue; - } else { - jam(); - tgnptNrTransaction++; - }//if - }//while + ptrCheckGuard(tmp, coprecsize, operationrec); + if (tmp.p->transId1 == transId[0] && tmp.p->transId2 == transId[1]) + tmp.i = tmp.p->nextParallelQue; + else + return 2; + } + return 1; }//Dbacc::getNoParallelTransaction() void Dbacc::moveLastParallelQueue(Signal* signal) @@ -2162,9 +2157,7 @@ void Dbacc::moveLastParallelQueueWrite(Signal* signal) /* --------------------------------------------------------------------------------- */ Uint32 Dbacc::placeWriteInLockQueue(Signal* signal) { - tgnptMainOpPtr = queOperPtr; - getNoParallelTransaction(signal); - if (!((tgnptNrTransaction == 1) && + if (!((getNoParallelTransaction(queOperPtr.p) == 1) && (queOperPtr.p->transId1 == operationRecPtr.p->transId1) && (queOperPtr.p->transId2 == operationRecPtr.p->transId2))) { jam(); @@ -2215,9 +2208,7 @@ void Dbacc::placeSerialQueueWrite(Signal* signal) }//if readWriteOpPtr.i = readWriteOpPtr.p->nextSerialQue; ptrCheckGuard(readWriteOpPtr, coprecsize, operationrec); - tgnptMainOpPtr = readWriteOpPtr; - getNoParallelTransaction(signal); - if (tgnptNrTransaction == 1) { + if (getNoParallelTransaction(readWriteOpPtr.p) == 1) { /* --------------------------------------------------------------------------------- */ /* THERE WAS ONLY ONE TRANSACTION INVOLVED IN THE PARALLEL QUEUE. IF THIS IS OUR */ /* TRANSACTION WE CAN STILL GET HOLD OF THE LOCK. */ @@ -5802,9 +5793,154 @@ void Dbacc::commitOperation(Signal* signal) ptrCheckGuard(tolqTmpPtr, coprecsize, operationrec); tolqTmpPtr.p->prevParallelQue = operationRecPtr.p->prevParallelQue; }//if - }//if + + /** + * Check possible lock upgrade + * 1) Find lock owner + * 2) Count transactions in parallel que + * 3) If count == 1 and TRANSID(next serial) == TRANSID(lock owner) + * upgrade next serial + */ + if(operationRecPtr.p->lockMode) + { + jam(); + /** + * Committing a non shared operation can't lead to lock upgrade + */ + return; + } + + OperationrecPtr lock_owner; + lock_owner.i = operationRecPtr.p->prevParallelQue; + ptrCheckGuard(lock_owner, coprecsize, operationrec); + Uint32 transid[2] = { lock_owner.p->transId1, + lock_owner.p->transId2 }; + + + while(lock_owner.p->prevParallelQue != RNIL) + { + lock_owner.i = lock_owner.p->prevParallelQue; + ptrCheckGuard(lock_owner, coprecsize, operationrec); + + if(lock_owner.p->transId1 != transid[0] || + lock_owner.p->transId2 != transid[1]) + { + jam(); + /** + * If more than 1 trans in lock queue -> no lock upgrade + */ + return; + } + } + + check_lock_upgrade(signal, lock_owner, operationRecPtr); + } }//Dbacc::commitOperation() +void +Dbacc::check_lock_upgrade(Signal* signal, + OperationrecPtr lock_owner, + OperationrecPtr release_op) +{ + if((lock_owner.p->transId1 == release_op.p->transId1 && + lock_owner.p->transId2 == release_op.p->transId2) || + release_op.p->lockMode || + lock_owner.p->nextSerialQue == RNIL) + { + jam(); + /** + * No lock upgrade if same trans or lock owner has no serial queue + * or releasing non shared op + */ + return; + } + + OperationrecPtr next; + next.i = lock_owner.p->nextSerialQue; + ptrCheckGuard(next, coprecsize, operationrec); + + if(lock_owner.p->transId1 != next.p->transId1 || + lock_owner.p->transId2 != next.p->transId2) + { + jam(); + /** + * No lock upgrad if !same trans in serial queue + */ + return; + } + + if (getNoParallelTransaction(lock_owner.p) > 1) + { + jam(); + /** + * No lock upgrade if more than 1 transaction in parallell queue + */ + return; + } + + if (getNoParallelTransaction(next.p) > 1) + { + jam(); + /** + * No lock upgrade if more than 1 transaction in next's parallell queue + */ + return; + } + + OperationrecPtr tmp; + tmp.i = lock_owner.p->nextSerialQue = next.p->nextSerialQue; + if(tmp.i != RNIL) + { + ptrCheckGuard(tmp, coprecsize, operationrec); + ndbassert(tmp.p->prevSerialQue == next.i); + tmp.p->prevSerialQue = lock_owner.i; + } + next.p->nextSerialQue = next.p->prevSerialQue = RNIL; + + // Find end of parallell que + tmp = lock_owner; + while(tmp.p->nextParallelQue != RNIL) + { + jam(); + tmp.i = tmp.p->nextParallelQue; + ptrCheckGuard(tmp, coprecsize, operationrec); + } + + next.p->prevParallelQue = tmp.i; + tmp.p->nextParallelQue = next.i; + + OperationrecPtr save = operationRecPtr; + Uint32 lockMode = lock_owner.p->lockMode; + + Uint32 TelementIsDisappeared = 0; // lock upgrade = all reads + Uint32 ThashValue = lock_owner.p->hashValue; + Uint32 localdata[2]; + localdata[0] = lock_owner.p->localdata[0]; + localdata[1] = lock_owner.p->localdata[1]; + do { + next.p->elementIsDisappeared = TelementIsDisappeared; + next.p->hashValue = ThashValue; + next.p->localdata[0] = localdata[0]; + next.p->localdata[1] = localdata[1]; + + operationRecPtr = next; + next.p->lockMode = lockMode; + TelementIsDisappeared = executeNextOperation(signal); + if (next.p->nextParallelQue != RNIL) + { + jam(); + next.i = next.p->nextParallelQue; + ptrCheckGuard(next, coprecsize, operationrec); + } else { + jam(); + break; + }//if + } while (1); + + operationRecPtr = save; + +} + /* ------------------------------------------------------------------------- */ /* RELEASELOCK */ /* RESETS LOCK OF AN ELEMENT. */ @@ -5841,6 +5977,8 @@ void Dbacc::releaselock(Signal* signal) ptrCheckGuard(trlTmpOperPtr, coprecsize, operationrec); trlTmpOperPtr.p->prevSerialQue = trlOperPtr.i; }//if + + check_lock_upgrade(signal, copyInOperPtr, operationRecPtr); /* --------------------------------------------------------------------------------- */ /* SINCE THERE ARE STILL ITEMS IN THE PARALLEL QUEUE WE NEED NOT WORRY ABOUT */ /* STARTING QUEUED OPERATIONS. THUS WE CAN END HERE. */ @@ -8348,7 +8486,7 @@ void Dbacc::checkSendLcpConfLab(Signal* signal) break; }//switch lcpConnectptr.p->noOfLcpConf++; - ndbrequire(lcpConnectptr.p->noOfLcpConf <= 2); + ndbrequire(lcpConnectptr.p->noOfLcpConf <= 4); fragrecptr.p->fragState = ACTIVEFRAG; rlpPageptr.i = fragrecptr.p->zeroPagePtr; ptrCheckGuard(rlpPageptr, cpagesize, page8); @@ -8366,7 +8504,7 @@ void Dbacc::checkSendLcpConfLab(Signal* signal) }//for signal->theData[0] = fragrecptr.p->lcpLqhPtr; sendSignal(lcpConnectptr.p->lcpUserblockref, GSN_ACC_LCPCONF, signal, 1, JBB); - if (lcpConnectptr.p->noOfLcpConf == 2) { + if (lcpConnectptr.p->noOfLcpConf == 4) { jam(); releaseLcpConnectRec(signal); rootfragrecptr.i = fragrecptr.p->myroot; @@ -8397,6 +8535,13 @@ void Dbacc::execACC_CONTOPREQ(Signal* signal) /* LOCAL FRAG ID */ tresult = 0; ptrCheckGuard(lcpConnectptr, clcpConnectsize, lcpConnectrec); + if(ERROR_INSERTED(3002) && lcpConnectptr.p->noOfLcpConf < 2) + { + sendSignalWithDelay(cownBlockref, GSN_ACC_CONTOPREQ, signal, 300, + signal->getLength()); + return; + } + ndbrequire(lcpConnectptr.p->lcpstate == LCP_ACTIVE); rootfragrecptr.i = lcpConnectptr.p->rootrecptr; ptrCheckGuard(rootfragrecptr, crootfragmentsize, rootfragmentrec); @@ -8430,6 +8575,15 @@ void Dbacc::execACC_CONTOPREQ(Signal* signal) }//while signal->theData[0] = fragrecptr.p->lcpLqhPtr; sendSignal(lcpConnectptr.p->lcpUserblockref, GSN_ACC_CONTOPCONF, signal, 1, JBA); + + lcpConnectptr.p->noOfLcpConf++; + if (lcpConnectptr.p->noOfLcpConf == 4) { + jam(); + releaseLcpConnectRec(signal); + rootfragrecptr.i = fragrecptr.p->myroot; + ptrCheckGuard(rootfragrecptr, crootfragmentsize, rootfragmentrec); + rootfragrecptr.p->rootState = ACTIVEROOT; + }//if return; /* ALL QUEUED OPERATION ARE RESTARTED IF NEEDED. */ }//Dbacc::execACC_CONTOPREQ() diff --git a/ndb/src/kernel/blocks/dbdict/Dbdict.cpp b/ndb/src/kernel/blocks/dbdict/Dbdict.cpp index 7247b7e2b9c..184db794057 100644 --- a/ndb/src/kernel/blocks/dbdict/Dbdict.cpp +++ b/ndb/src/kernel/blocks/dbdict/Dbdict.cpp @@ -9811,11 +9811,20 @@ Dbdict::execBUILDINDXREQ(Signal* signal) requestType == BuildIndxReq::RT_ALTER_INDEX || requestType == BuildIndxReq::RT_SYSTEMRESTART) { jam(); + + const bool isLocal = req->getRequestFlag() & RequestFlag::RF_LOCAL; + NdbNodeBitmask receiverNodes = c_aliveNodes; + if (isLocal) { + receiverNodes.clear(); + receiverNodes.set(getOwnNodeId()); + } + if (signal->getLength() == BuildIndxReq::SignalLength) { jam(); - if (getOwnNodeId() != c_masterNodeId) { + + if (!isLocal && getOwnNodeId() != c_masterNodeId) { jam(); - + releaseSections(signal); OpBuildIndex opBad; opPtr.p = &opBad; @@ -9828,9 +9837,9 @@ Dbdict::execBUILDINDXREQ(Signal* signal) } // forward initial request plus operation key to all req->setOpKey(++c_opRecordSequence); - NodeReceiverGroup rg(DBDICT, c_aliveNodes); + NodeReceiverGroup rg(DBDICT, receiverNodes); sendSignal(rg, GSN_BUILDINDXREQ, - signal, BuildIndxReq::SignalLength + 1, JBB); + signal, BuildIndxReq::SignalLength + 1, JBB); return; } // seize operation record @@ -9853,7 +9862,7 @@ Dbdict::execBUILDINDXREQ(Signal* signal) } c_opBuildIndex.add(opPtr); // master expects to hear from all - opPtr.p->m_signalCounter = c_aliveNodes; + opPtr.p->m_signalCounter = receiverNodes; buildIndex_sendReply(signal, opPtr, false); return; } @@ -10208,10 +10217,20 @@ Dbdict::buildIndex_sendSlaveReq(Signal* signal, OpBuildIndexPtr opPtr) req->setConnectionPtr(opPtr.p->key); req->setRequestType(opPtr.p->m_requestType); req->addRequestFlag(opPtr.p->m_requestFlag); - opPtr.p->m_signalCounter = c_aliveNodes; - NodeReceiverGroup rg(DBDICT, c_aliveNodes); - sendSignal(rg, GSN_BUILDINDXREQ, - signal, BuildIndxReq::SignalLength, JBB); + if(opPtr.p->m_requestFlag & RequestFlag::RF_LOCAL) + { + opPtr.p->m_signalCounter.clearWaitingFor(); + opPtr.p->m_signalCounter.setWaitingFor(getOwnNodeId()); + sendSignal(reference(), GSN_BUILDINDXREQ, + signal, BuildIndxReq::SignalLength, JBB); + } + else + { + opPtr.p->m_signalCounter = c_aliveNodes; + NodeReceiverGroup rg(DBDICT, c_aliveNodes); + sendSignal(rg, GSN_BUILDINDXREQ, + signal, BuildIndxReq::SignalLength, JBB); + } } void diff --git a/ndb/src/kernel/blocks/dblqh/Dblqh.hpp b/ndb/src/kernel/blocks/dblqh/Dblqh.hpp index 0c63cb5fe17..19e055a3011 100644 --- a/ndb/src/kernel/blocks/dblqh/Dblqh.hpp +++ b/ndb/src/kernel/blocks/dblqh/Dblqh.hpp @@ -968,7 +968,6 @@ public: enum LcpState { LCP_IDLE = 0, - LCP_STARTED = 1, LCP_COMPLETED = 2, LCP_WAIT_FRAGID = 3, LCP_WAIT_TUP_PREPLCP = 4, @@ -2266,7 +2265,7 @@ private: void sendCopyActiveConf(Signal* signal,Uint32 tableId); void checkLcpCompleted(Signal* signal); void checkLcpHoldop(Signal* signal); - void checkLcpStarted(Signal* signal); + bool checkLcpStarted(Signal* signal); void checkLcpTupprep(Signal* signal); void getNextFragForLcp(Signal* signal); void initLcpLocAcc(Signal* signal, Uint32 fragId); diff --git a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp index 3358593b150..3967098b8e9 100644 --- a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp +++ b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp @@ -10351,8 +10351,8 @@ void Dblqh::execTUP_LCPSTARTED(Signal* signal) void Dblqh::lcpStartedLab(Signal* signal) { - checkLcpStarted(signal); - if (lcpPtr.p->lcpState == LcpRecord::LCP_STARTED) { + if (checkLcpStarted(signal)) + { jam(); /* ---------------------------------------------------------------------- * THE LOCAL CHECKPOINT HAS BEEN STARTED. IT IS NOW TIME TO @@ -10432,26 +10432,7 @@ void Dblqh::execLQH_RESTART_OP(Signal* signal) lcpPtr.i = signal->theData[1]; ptrCheckGuard(lcpPtr, clcpFileSize, lcpRecord); ndbrequire(fragptr.p->fragStatus == Fragrecord::BLOCKED); - if (lcpPtr.p->lcpState == LcpRecord::LCP_STARTED) { - jam(); - /***********************************************************************/ - /* THIS SIGNAL CAN ONLY BE RECEIVED WHEN FRAGMENT IS BLOCKED AND - * THE LOCAL CHECKPOINT HAS BEEN STARTED. THE BLOCKING WILL BE - * REMOVED AS SOON AS ALL OPERATIONS HAVE BEEN STARTED. - ***********************************************************************/ - restartOperationsLab(signal); - } else if (lcpPtr.p->lcpState == LcpRecord::LCP_BLOCKED_COMP) { - jam(); - /*******************************************************************> - * THE CHECKPOINT IS COMPLETED BUT HAS NOT YET STARTED UP - * ALL OPERATIONS AGAIN. - * WE PERFORM THIS START-UP BEFORE CONTINUING WITH THE NEXT - * FRAGMENT OF THE LOCAL CHECKPOINT TO AVOID ANY STRANGE ERRORS. - *******************************************************************> */ - restartOperationsLab(signal); - } else { - ndbrequire(false); - } + restartOperationsLab(signal); }//Dblqh::execLQH_RESTART_OP() void Dblqh::restartOperationsLab(Signal* signal) @@ -11000,7 +10981,8 @@ void Dblqh::checkLcpHoldop(Signal* signal) * * SUBROUTINE SHORT NAME = CLS * ========================================================================== */ -void Dblqh::checkLcpStarted(Signal* signal) +bool +Dblqh::checkLcpStarted(Signal* signal) { LcpLocRecordPtr clsLcpLocptr; @@ -11010,7 +10992,7 @@ void Dblqh::checkLcpStarted(Signal* signal) do { ptrCheckGuard(clsLcpLocptr, clcpLocrecFileSize, lcpLocRecord); if (clsLcpLocptr.p->lcpLocstate == LcpLocRecord::ACC_WAIT_STARTED){ - return; + return false; }//if clsLcpLocptr.i = clsLcpLocptr.p->nextLcpLoc; i++; @@ -11021,12 +11003,13 @@ void Dblqh::checkLcpStarted(Signal* signal) do { ptrCheckGuard(clsLcpLocptr, clcpLocrecFileSize, lcpLocRecord); if (clsLcpLocptr.p->lcpLocstate == LcpLocRecord::TUP_WAIT_STARTED){ - return; + return false; }//if clsLcpLocptr.i = clsLcpLocptr.p->nextLcpLoc; i++; } while (clsLcpLocptr.i != RNIL); - lcpPtr.p->lcpState = LcpRecord::LCP_STARTED; + + return true; }//Dblqh::checkLcpStarted() /* ========================================================================== @@ -11187,20 +11170,12 @@ void Dblqh::sendAccContOp(Signal* signal) do { ptrCheckGuard(sacLcpLocptr, clcpLocrecFileSize, lcpLocRecord); sacLcpLocptr.p->accContCounter = 0; - if(sacLcpLocptr.p->lcpLocstate == LcpLocRecord::ACC_STARTED){ - /* ------------------------------------------------------------------- */ - /*SEND START OPERATIONS TO ACC AGAIN */ - /* ------------------------------------------------------------------- */ - signal->theData[0] = lcpPtr.p->lcpAccptr; - signal->theData[1] = sacLcpLocptr.p->locFragid; - sendSignal(fragptr.p->accBlockref, GSN_ACC_CONTOPREQ, signal, 2, JBA); - count++; - } else if(sacLcpLocptr.p->lcpLocstate == LcpLocRecord::ACC_COMPLETED){ - signal->theData[0] = sacLcpLocptr.i; - sendSignal(reference(), GSN_ACC_CONTOPCONF, signal, 1, JBB); - } else { - ndbrequire(false); - } + /* ------------------------------------------------------------------- */ + /*SEND START OPERATIONS TO ACC AGAIN */ + /* ------------------------------------------------------------------- */ + signal->theData[0] = lcpPtr.p->lcpAccptr; + signal->theData[1] = sacLcpLocptr.p->locFragid; + sendSignal(fragptr.p->accBlockref, GSN_ACC_CONTOPREQ, signal, 2, JBA); sacLcpLocptr.i = sacLcpLocptr.p->nextLcpLoc; } while (sacLcpLocptr.i != RNIL); @@ -11236,9 +11211,18 @@ void Dblqh::sendStartLcp(Signal* signal) signal->theData[0] = stlLcpLocptr.i; signal->theData[1] = cownref; signal->theData[2] = stlLcpLocptr.p->tupRef; - sendSignal(fragptr.p->tupBlockref, GSN_TUP_LCPREQ, signal, 3, JBA); + if(ERROR_INSERTED(5077)) + sendSignalWithDelay(fragptr.p->tupBlockref, GSN_TUP_LCPREQ, + signal, 5000, 3); + else + sendSignal(fragptr.p->tupBlockref, GSN_TUP_LCPREQ, signal, 3, JBA); stlLcpLocptr.i = stlLcpLocptr.p->nextLcpLoc; } while (stlLcpLocptr.i != RNIL); + + if(ERROR_INSERTED(5077)) + { + ndbout_c("Delayed TUP_LCPREQ with 5 sec"); + } }//Dblqh::sendStartLcp() /* ------------------------------------------------------------------------- */ diff --git a/ndb/src/kernel/blocks/ndbfs/OpenFiles.hpp b/ndb/src/kernel/blocks/ndbfs/OpenFiles.hpp index b944bb5485b..0fee687f1bc 100644 --- a/ndb/src/kernel/blocks/ndbfs/OpenFiles.hpp +++ b/ndb/src/kernel/blocks/ndbfs/OpenFiles.hpp @@ -82,8 +82,14 @@ inline bool OpenFiles::insert(AsyncFile* file, Uint16 id){ continue; if(strcmp(m_files[i].m_file->theFileName.c_str(), - file->theFileName.c_str()) == 0){ - ERROR_SET(fatal, AFS_ERROR_ALLREADY_OPEN,"","OpenFiles::insert()"); + file->theFileName.c_str()) == 0) + { + BaseString names; + names.assfmt("open: >%s< existing: >%s<", + file->theFileName.c_str(), + m_files[i].m_file->theFileName.c_str()); + ERROR_SET(fatal, AFS_ERROR_ALLREADY_OPEN, names.c_str(), + "OpenFiles::insert()"); } } diff --git a/ndb/src/mgmapi/mgmapi.cpp b/ndb/src/mgmapi/mgmapi.cpp index 68106c4689d..a8931fb32ea 100644 --- a/ndb/src/mgmapi/mgmapi.cpp +++ b/ndb/src/mgmapi/mgmapi.cpp @@ -1565,9 +1565,9 @@ ndb_mgm_start_backup(NdbMgmHandle handle, int wait_completed, { // start backup can take some time, set timeout high Uint64 old_timeout= handle->read_timeout; if (wait_completed == 2) - handle->read_timeout= 30*60*1000; // 30 minutes + handle->read_timeout= 48*60*60*1000; // 48 hours else if (wait_completed == 1) - handle->read_timeout= 5*60*1000; // 5 minutes + handle->read_timeout= 10*60*1000; // 10 minutes reply = ndb_mgm_call(handle, start_backup_reply, "start backup", &args); handle->read_timeout= old_timeout; } diff --git a/ndb/src/mgmsrv/MgmtSrvr.cpp b/ndb/src/mgmsrv/MgmtSrvr.cpp index fb05e57e138..ceaedc9955b 100644 --- a/ndb/src/mgmsrv/MgmtSrvr.cpp +++ b/ndb/src/mgmsrv/MgmtSrvr.cpp @@ -791,7 +791,7 @@ MgmtSrvr::restartNode(int processId, bool nostart, result = sendSignal(processId, NO_WAIT, signal, true); } - if (result == -1) { + if (result == -1 && theWaitState != WAIT_NODEFAILURE) { m_stopRec.inUse = false; return SEND_OR_RECEIVE_FAILED; } @@ -1920,6 +1920,7 @@ MgmtSrvr::handleReceivedSignal(NdbApiSignal* signal) #ifdef VM_TRACE ndbout_c("I'm not master resending to %d", aNodeId); #endif + theWaitNode= aNodeId; NdbApiSignal aSignal(_ownReference); BackupReq* req = CAST_PTR(BackupReq, aSignal.getDataPtrSend()); aSignal.set(TestOrd::TraceAPI, BACKUP, GSN_BACKUP_REQ, @@ -1947,6 +1948,7 @@ MgmtSrvr::handleReceivedSignal(NdbApiSignal* signal) event.Event = BackupEvent::BackupAborted; event.Aborted.Reason = rep->reason; event.Aborted.BackupId = rep->backupId; + event.Aborted.ErrorCode = rep->reason; backupCallback(event); } break; @@ -2076,6 +2078,13 @@ MgmtSrvr::handleStatus(NodeId nodeId, bool alive, bool nfComplete) handleStopReply(nodeId, 0); DBUG_VOID_RETURN; } + + if(theWaitNode == nodeId && + theWaitState != NO_WAIT && theWaitState != WAIT_STOP) + { + theWaitState = WAIT_NODEFAILURE; + NdbCondition_Signal(theMgmtWaitForResponseCondPtr); + } } eventReport(_ownNodeId, theData); @@ -2427,7 +2436,7 @@ MgmtSrvr::startBackup(Uint32& backupId, int waitCompleted) int result; if (waitCompleted == 2) { result = sendRecSignal(nodeId, WAIT_BACKUP_COMPLETED, - signal, true, 30*60*1000 /*30 secs*/); + signal, true, 48*60*60*1000 /* 48 hours */); } else if (waitCompleted == 1) { result = sendRecSignal(nodeId, WAIT_BACKUP_STARTED, @@ -2456,22 +2465,6 @@ MgmtSrvr::startBackup(Uint32& backupId, int waitCompleted) return -1; break; } - } else { - switch(m_lastBackupEvent.Event){ - case BackupEvent::BackupCompleted: - backupId = m_lastBackupEvent.Completed.BackupId; - break; - case BackupEvent::BackupStarted: - backupId = m_lastBackupEvent.Started.BackupId; - break; - case BackupEvent::BackupFailedToStart: - return m_lastBackupEvent.FailedToStart.ErrorCode; - case BackupEvent::BackupAborted: - return m_lastBackupEvent.Aborted.ErrorCode; - default: - return -1; - break; - } } return 0; diff --git a/ndb/src/mgmsrv/MgmtSrvr.hpp b/ndb/src/mgmsrv/MgmtSrvr.hpp index a05b29b7f31..ce78983b3c3 100644 --- a/ndb/src/mgmsrv/MgmtSrvr.hpp +++ b/ndb/src/mgmsrv/MgmtSrvr.hpp @@ -611,7 +611,8 @@ private: WAIT_STOP, WAIT_BACKUP_STARTED, WAIT_BACKUP_COMPLETED, - WAIT_VERSION + WAIT_VERSION, + WAIT_NODEFAILURE }; /** @@ -695,6 +696,7 @@ private: NdbApiSignal* theSignalIdleList; // List of unused signals + Uint32 theWaitNode; WaitSignalType theWaitState; // State denoting a set of signals we accept to recieve. diff --git a/ndb/src/mgmsrv/MgmtSrvrGeneralSignalHandling.cpp b/ndb/src/mgmsrv/MgmtSrvrGeneralSignalHandling.cpp index 2126c9d358d..f93948abc75 100644 --- a/ndb/src/mgmsrv/MgmtSrvrGeneralSignalHandling.cpp +++ b/ndb/src/mgmsrv/MgmtSrvrGeneralSignalHandling.cpp @@ -108,6 +108,7 @@ MgmtSrvr::sendRecSignal(Uint16 aNodeId, return -1; } theWaitState = aWaitState; + theWaitNode = aNodeId; return receiveOptimisedResponse(waitTime); } @@ -119,11 +120,12 @@ MgmtSrvr::receiveOptimisedResponse(int waitTime) theFacade->checkForceSend(_blockNumber); NDB_TICKS maxTime = NdbTick_CurrentMillisecond() + waitTime; - while (theWaitState != NO_WAIT && waitTime > 0) { + while (theWaitState != NO_WAIT && theWaitState != WAIT_NODEFAILURE + && waitTime > 0) { NdbCondition_WaitTimeout(theMgmtWaitForResponseCondPtr, theFacade->theMutexPtr, waitTime); - if(theWaitState == NO_WAIT) + if(theWaitState == NO_WAIT || theWaitState == WAIT_NODEFAILURE) break; waitTime = (maxTime - NdbTick_CurrentMillisecond()); }//while diff --git a/ndb/src/ndbapi/Ndbif.cpp b/ndb/src/ndbapi/Ndbif.cpp index a4f233709c4..1caebe436ef 100644 --- a/ndb/src/ndbapi/Ndbif.cpp +++ b/ndb/src/ndbapi/Ndbif.cpp @@ -453,7 +453,7 @@ Ndb::handleReceivedSignal(NdbApiSignal* aSignal, LinearSectionPtr ptr[3]) tFirstDataPtr = int2void(tFirstData); if(tFirstDataPtr != 0){ tOp = void2rec_op(tFirstDataPtr); - if (tOp->checkMagicNumber() == 0) { + if (tOp->checkMagicNumber(false) == 0) { tCon = tOp->theNdbCon; if (tCon != NULL) { if ((tCon->theSendStatus == NdbConnection::sendTC_OP) || @@ -466,11 +466,11 @@ Ndb::handleReceivedSignal(NdbApiSignal* aSignal, LinearSectionPtr ptr[3]) }//if }//if }//if - } else { -#ifdef VM_TRACE - ndbout_c("Recevied TCKEY_FAILREF wo/ operation"); -#endif } +#ifdef VM_TRACE + ndbout_c("Recevied TCKEY_FAILREF wo/ operation"); +#endif + return; break; } case GSN_TCKEYREF: diff --git a/ndb/src/ndbapi/ndberror.c b/ndb/src/ndbapi/ndberror.c index 94e1aeb5545..cf1fd2ffdd0 100644 --- a/ndb/src/ndbapi/ndberror.c +++ b/ndb/src/ndbapi/ndberror.c @@ -140,10 +140,10 @@ ErrorBundle ErrorCodes[] = { { 4008, UR, "Receive from NDB failed" }, { 4009, UR, "Cluster Failure" }, { 4012, UR, - "Time-out, most likely caused by simple read or cluster failure" }, + "Request ndbd time-out, maybe due to high load or communication problems"}, { 4024, UR, - "Time-out, most likely caused by simple read or cluster failure" }, - + "Time-out, most likely caused by simple read or cluster failure" }, + /** * TemporaryResourceError */ @@ -347,7 +347,7 @@ ErrorBundle ErrorCodes[] = { { 1325, IE, "File or scan error" }, { 1326, IE, "Backup abortet due to node failure" }, { 1327, IE, "1327" }, - + { 1340, IE, "Backup undefined error" }, { 1342, AE, "Backup failed to allocate buffers (check configuration)" }, { 1343, AE, "Backup failed to setup fs buffers (check configuration)" }, @@ -357,7 +357,8 @@ ErrorBundle ErrorCodes[] = { { 1347, AE, "Backup failed to allocate table memory (check configuration)" }, { 1348, AE, "Backup failed to allocate file record (check configuration)" }, { 1349, AE, "Backup failed to allocate attribute record (check configuration)" }, - + { 1329, AE, "Backup during software upgrade not supported" }, + /** * Still uncategorized */ diff --git a/ndb/test/ndbapi/testBackup.cpp b/ndb/test/ndbapi/testBackup.cpp index 77b9d0a4baa..bea5d5307e2 100644 --- a/ndb/test/ndbapi/testBackup.cpp +++ b/ndb/test/ndbapi/testBackup.cpp @@ -74,20 +74,20 @@ int runAbort(NDBT_Context* ctx, NDBT_Step* step){ if (testMaster) { if (testSlave) { - if (backup.NFMasterAsSlave(restarter) == -1){ + if (backup.NFMasterAsSlave(restarter) != NDBT_OK){ return NDBT_FAILED; } } else { - if (backup.NFMaster(restarter) == -1){ + if (backup.NFMaster(restarter) != NDBT_OK){ return NDBT_FAILED; } } } else { - if (backup.NFSlave(restarter) == -1){ + if (backup.NFSlave(restarter) != NDBT_OK){ return NDBT_FAILED; } } - + return NDBT_OK; } @@ -108,16 +108,16 @@ int runFail(NDBT_Context* ctx, NDBT_Step* step){ if (testMaster) { if (testSlave) { - if (backup.FailMasterAsSlave(restarter) == -1){ + if (backup.FailMasterAsSlave(restarter) != NDBT_OK){ return NDBT_FAILED; } } else { - if (backup.FailMaster(restarter) == -1){ + if (backup.FailMaster(restarter) != NDBT_OK){ return NDBT_FAILED; } } } else { - if (backup.FailSlave(restarter) == -1){ + if (backup.FailSlave(restarter) != NDBT_OK){ return NDBT_FAILED; } } diff --git a/ndb/test/ndbapi/testIndex.cpp b/ndb/test/ndbapi/testIndex.cpp index 6623ad35a7f..d359f83257f 100644 --- a/ndb/test/ndbapi/testIndex.cpp +++ b/ndb/test/ndbapi/testIndex.cpp @@ -1329,7 +1329,7 @@ TESTCASE("NFNR2_O", INITIALIZER(runLoadTable); STEP(runRestarts); STEP(runTransactions2); - STEP(runTransactions2); + //STEP(runTransactions2); FINALIZER(runVerifyIndex); FINALIZER(createRandomIndex_Drop); FINALIZER(createPkIndex_Drop); diff --git a/ndb/test/ndbapi/testOperations.cpp b/ndb/test/ndbapi/testOperations.cpp index 9f1d5ee1191..773511a0475 100644 --- a/ndb/test/ndbapi/testOperations.cpp +++ b/ndb/test/ndbapi/testOperations.cpp @@ -547,21 +547,64 @@ runLockUpgrade1(NDBT_Context* ctx, NDBT_Step* step){ do { CHECK(hugoOps.startTransaction(pNdb) == 0); - CHECK(hugoOps.pkReadRecord(pNdb, 0, 1, NdbOperation::LM_Read) == 0); - CHECK(hugoOps.execute_NoCommit(pNdb) == 0); + if(ctx->getProperty("LOCK_UPGRADE", 1) == 1) + { + CHECK(hugoOps.pkReadRecord(pNdb, 0, 1, NdbOperation::LM_Read) == 0); + CHECK(hugoOps.execute_NoCommit(pNdb) == 0); - ctx->setProperty("READ_DONE", 1); - ctx->broadcast(); - ndbout_c("wait 2"); - ctx->getPropertyWait("READ_DONE", 2); - ndbout_c("wait 2 - done"); + ctx->setProperty("READ_DONE", 1); + ctx->broadcast(); + ndbout_c("wait 2"); + ctx->getPropertyWait("READ_DONE", 2); + ndbout_c("wait 2 - done"); + } + else + { + ctx->setProperty("READ_DONE", 1); + ctx->broadcast(); + ctx->getPropertyWait("READ_DONE", 2); + ndbout_c("wait 2 - done"); + CHECK(hugoOps.pkReadRecord(pNdb, 0, 1, NdbOperation::LM_Read) == 0); + CHECK(hugoOps.execute_NoCommit(pNdb) == 0); + } + if(ctx->getProperty("LU_OP", o_INS) == o_INS) + { + CHECK(hugoOps.pkDeleteRecord(pNdb, 0, 1) == 0); + CHECK(hugoOps.pkInsertRecord(pNdb, 0, 1, 2) == 0); + } + else if(ctx->getProperty("LU_OP", o_UPD) == o_UPD) + { + CHECK(hugoOps.pkUpdateRecord(pNdb, 0, 1, 2) == 0); + } + else + { + CHECK(hugoOps.pkDeleteRecord(pNdb, 0, 1) == 0); + } ctx->setProperty("READ_DONE", 3); ctx->broadcast(); ndbout_c("before update"); - CHECK(hugoOps.pkUpdateRecord(pNdb, 0, 1, 2) == 0); ndbout_c("wait update"); - CHECK(hugoOps.execute_NoCommit(pNdb) == 0); - CHECK(hugoOps.closeTransaction(pNdb)); + CHECK(hugoOps.execute_Commit(pNdb) == 0); + CHECK(hugoOps.closeTransaction(pNdb) == 0); + + CHECK(hugoOps.startTransaction(pNdb) == 0); + CHECK(hugoOps.pkReadRecord(pNdb, 0, 1) == 0); + int res= hugoOps.execute_Commit(pNdb); + if(ctx->getProperty("LU_OP", o_INS) == o_INS) + { + CHECK(res == 0); + CHECK(hugoOps.verifyUpdatesValue(2) == 0); + } + else if(ctx->getProperty("LU_OP", o_UPD) == o_UPD) + { + CHECK(res == 0); + CHECK(hugoOps.verifyUpdatesValue(2) == 0); + } + else + { + CHECK(res == 626); + } + } while(0); return result; @@ -592,10 +635,10 @@ runLockUpgrade2(NDBT_Context* ctx, NDBT_Step* step){ ndbout_c("wait 3 - done"); NdbSleep_MilliSleep(200); - CHECK(hugoOps.execute_Commit(pNdb) == 0); + CHECK(hugoOps.execute_Commit(pNdb) == 0); } while(0); - return NDBT_FAILED; + return result; } int @@ -607,11 +650,16 @@ main(int argc, const char** argv){ NDBT_TestSuite ts("testOperations"); + for(Uint32 i = 0; i <6; i++) { BaseString name("bug_9749"); + name.appfmt("_%d", i); NDBT_TestCaseImpl1 *pt = new NDBT_TestCaseImpl1(&ts, name.c_str(), ""); + pt->setProperty("LOCK_UPGRADE", 1 + (i & 1)); + pt->setProperty("LU_OP", 1 + (i >> 1)); + pt->addInitializer(new NDBT_Initializer(pt, "runClearTable", runClearTable)); diff --git a/ndb/test/run-test/daily-basic-tests.txt b/ndb/test/run-test/daily-basic-tests.txt index 453fe1ad7ae..2d7c435e8b4 100644 --- a/ndb/test/run-test/daily-basic-tests.txt +++ b/ndb/test/run-test/daily-basic-tests.txt @@ -2,6 +2,30 @@ max-time: 3600 cmd: atrt-mysql-test-run args: --force +max-time: 600 +cmd: atrt-testBackup +args: -n NFMaster T1 + +max-time: 600 +cmd: atrt-testBackup +args: -n NFMasterAsSlave T1 + +max-time: 600 +cmd: atrt-testBackup +args: -n NFSlave T1 + +max-time: 600 +cmd: atrt-testBackup +args: -n FailMaster T1 + +max-time: 600 +cmd: atrt-testBackup +args: -n FailMasterAsSlave T1 + +max-time: 600 +cmd: atrt-testBackup +args: -n FailSlave T1 + max-time: 600 cmd: atrt-testBackup args: -n BackupOne T1 T6 T3 I3 diff --git a/ndb/test/src/NdbBackup.cpp b/ndb/test/src/NdbBackup.cpp index 5e22468692e..28724323bd7 100644 --- a/ndb/test/src/NdbBackup.cpp +++ b/ndb/test/src/NdbBackup.cpp @@ -244,7 +244,11 @@ NdbBackup::NFSlave(NdbRestarter& _restarter){ int NdbBackup::NF(NdbRestarter& _restarter, int *NFDuringBackup_codes, const int sz, bool onMaster){ + int nNodes = _restarter.getNumDbNodes(); { + if(nNodes == 1) + return NDBT_OK; + int nodeId = _restarter.getMasterNodeId(); CHECK(_restarter.restartOneDbNode(nodeId, false, true, true) == 0, @@ -255,15 +259,11 @@ NdbBackup::NF(NdbRestarter& _restarter, int *NFDuringBackup_codes, const int sz, CHECK(_restarter.startNodes(&nodeId, 1) == 0, "failed to start node"); - - NdbSleep_SecSleep(10); } - + CHECK(_restarter.waitClusterStarted() == 0, "waitClusterStarted failed"); - - int nNodes = _restarter.getNumDbNodes(); - + myRandom48Init(NdbTick_CurrentMillisecond()); for(int i = 0; i