From 2609e057eb74a3a7ca314c6f4e7a113beb34894b Mon Sep 17 00:00:00 2001 From: "jonas@perch.ndb.mysql.com" <> Date: Mon, 25 Jun 2007 07:05:25 +0200 Subject: [PATCH 1/2] ndb - bug#29057 bug#29067 and bug#29118 Fix 3 problems in REDO validation that could(will) lead to "Error while reading REDO log" --- ndb/src/kernel/blocks/dblqh/Dblqh.hpp | 5 +- ndb/src/kernel/blocks/dblqh/DblqhMain.cpp | 341 +++++++++++++++++----- 2 files changed, 263 insertions(+), 83 deletions(-) diff --git a/ndb/src/kernel/blocks/dblqh/Dblqh.hpp b/ndb/src/kernel/blocks/dblqh/Dblqh.hpp index 3c0faff7236..77df76ba462 100644 --- a/ndb/src/kernel/blocks/dblqh/Dblqh.hpp +++ b/ndb/src/kernel/blocks/dblqh/Dblqh.hpp @@ -1617,7 +1617,8 @@ public: ACTIVE_WRITE_LOG = 17, ///< A write operation during ///< writing of log READ_SR_INVALIDATE_PAGES = 18, - WRITE_SR_INVALIDATE_PAGES = 19 + WRITE_SR_INVALIDATE_PAGES = 19, + WRITE_SR_INVALIDATE_PAGES_UPDATE_PAGE0 = 20 }; /** * We have to remember the log pages read. @@ -2437,7 +2438,7 @@ private: void errorReport(Signal* signal, int place); void warningReport(Signal* signal, int place); void invalidateLogAfterLastGCI(Signal *signal); - void readFileInInvalidate(Signal *signal); + void readFileInInvalidate(Signal *signal, bool stepNext); void exitFromInvalidate(Signal* signal); Uint32 calcPageCheckSum(LogPageRecordPtr logP); diff --git a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp index f4b99e8be2f..9c6242b3600 100644 --- a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp +++ b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp @@ -110,6 +110,10 @@ operator<<(NdbOut& out, Dblqh::ScanRecord::ScanType state){ //#define MARKER_TRACE 1 //#define TRACE_SCAN_TAKEOVER 1 +#ifndef DEBUG_REDO +#define DEBUG_REDO 0 +#endif + const Uint32 NR_ScanNo = 0; void Dblqh::execACC_COM_BLOCK(Signal* signal) @@ -11987,9 +11991,6 @@ void Dblqh::execFSCLOSECONF(Signal* signal) case LogFileRecord::CLOSE_SR_INVALIDATE_PAGES: jam(); logFilePtr.p->logFileStatus = LogFileRecord::CLOSED; - // Set the prev file to check if we shall close it. - logFilePtr.i = logFilePtr.p->prevLogFile; - ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord); logPartPtr.i = logFilePtr.p->logPartRec; ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord); @@ -12039,7 +12040,7 @@ void Dblqh::execFSOPENCONF(Signal* signal) case LogFileRecord::OPEN_SR_INVALIDATE_PAGES: jam(); logFilePtr.p->logFileStatus = LogFileRecord::OPEN; - readFileInInvalidate(signal); + readFileInInvalidate(signal, false); return; case LogFileRecord::OPENING_INIT: jam(); @@ -12219,6 +12220,7 @@ void Dblqh::execFSWRITECONF(Signal* signal) case LogFileOperationRecord::WRITE_SR_INVALIDATE_PAGES: jam(); invalidateLogAfterLastGCI(signal); + CRASH_INSERTION(5047); return; case LogFileOperationRecord::WRITE_PAGE_ZERO: jam(); @@ -12256,6 +12258,14 @@ void Dblqh::execFSWRITECONF(Signal* signal) jam(); firstPageWriteLab(signal); return; + case LogFileOperationRecord::WRITE_SR_INVALIDATE_PAGES_UPDATE_PAGE0: + jam(); + // We are done...send completed signal and exit this phase. + releaseLfo(signal); + signal->theData[0] = ZSR_FOURTH_COMP; + signal->theData[1] = logPartPtr.i; + sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB); + return; default: jam(); systemErrorLab(signal, __LINE__); @@ -13326,6 +13336,12 @@ void Dblqh::writeSinglePage(Signal* signal, Uint32 pageNo, signal->theData[6] = logPagePtr.i; signal->theData[7] = pageNo; sendSignal(NDBFS_REF, GSN_FSWRITEREQ, signal, 8, JBA); + + if (DEBUG_REDO) + ndbout_c("writeSingle 1 page at part: %u file: %u pos: %u", + logPartPtr.i, + logFilePtr.p->fileNo, + pageNo); }//Dblqh::writeSinglePage() /* ########################################################################## @@ -13391,6 +13407,12 @@ void Dblqh::openSrLastFileLab(Signal* signal) void Dblqh::readSrLastFileLab(Signal* signal) { logPartPtr.p->logLap = logPagePtr.p->logPageWord[ZPOS_LOG_LAP]; + if (DEBUG_REDO) + ndbout_c("readSrLastFileLab part: %u logExecState: %u logPartState: %u logLap: %u", + logPartPtr.i, + logPartPtr.p->logExecState, + logPartPtr.p->logPartState, + logPartPtr.p->logLap); if (logPartPtr.p->noLogFiles > ZMAX_LOG_FILES_IN_PAGE_ZERO) { jam(); initGciInLogFileRec(signal, ZMAX_LOG_FILES_IN_PAGE_ZERO); @@ -14608,6 +14630,20 @@ void Dblqh::srLogLimits(Signal* signal) break; }//if }//while + + if (DEBUG_REDO) + { + LogFileRecordPtr tmp; + tmp.i = logPartPtr.p->stopLogfile; + ptrCheckGuard(tmp, clogFileFileSize, logFileRecord); + ndbout_c("srLogLimits part: %u start file: %u mb: %u stop file: %u mb: %u", + logPartPtr.i, + tlastPrepRef >> 16, + tlastPrepRef & 65535, + tmp.p->fileNo, + logPartPtr.p->stopMbyte); + } + /* ------------------------------------------------------------------------ * WE HAVE NOW FOUND BOTH THE START AND THE STOP OF THE LOG. NOW START * EXECUTING THE LOG. THE FIRST ACTION IS TO OPEN THE LOG FILE WHERE TO @@ -15034,6 +15070,12 @@ void Dblqh::execSr(Signal* signal) case ZCOMPLETED_GCI_TYPE: jam(); logWord = readLogword(signal); + if (DEBUG_REDO) + ndbout_c("found gci: %u part: %u file: %u page: %u", + logWord, + logPartPtr.i, + logFilePtr.p->fileNo, + logFilePtr.p->currentFilepage); if (logWord == logPartPtr.p->logLastGci) { jam(); /*---------------------------------------------------------------------------*/ @@ -15050,6 +15092,10 @@ void Dblqh::execSr(Signal* signal) logPartPtr.p->headPageNo = logFilePtr.p->currentFilepage; logPartPtr.p->headPageIndex = logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX]; + logPartPtr.p->logLap = logPagePtr.p->logPageWord[ZPOS_LOG_LAP]; + if (DEBUG_REDO) + ndbout_c("execSr part: %u logLap: %u", + logPartPtr.i, logPartPtr.p->logLap); }//if /*---------------------------------------------------------------------------*/ /* THERE IS NO NEED OF EXECUTING PAST THIS LINE SINCE THERE WILL ONLY BE LOG */ @@ -15212,67 +15258,140 @@ void Dblqh::invalidateLogAfterLastGCI(Signal* signal) { } switch (lfoPtr.p->lfoState) { - case LogFileOperationRecord::WRITE_SR_INVALIDATE_PAGES: - jam(); - releaseLfo(signal); - releaseLogpage(signal); - if (logPartPtr.p->invalidatePageNo < (ZNO_MBYTES_IN_FILE * ZPAGES_IN_MBYTE - 1)) { - // We continue in this file. - logPartPtr.p->invalidatePageNo++; - } else { - // We continue in the next file. - logFilePtr.i = logFilePtr.p->nextLogFile; - ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord); - logPartPtr.p->invalidateFileNo = logFilePtr.p->fileNo; - // Page 0 is used for file descriptors. - logPartPtr.p->invalidatePageNo = 1; - if (logFilePtr.p->logFileStatus != LogFileRecord::OPEN) { - jam(); - logFilePtr.p->logFileStatus = LogFileRecord::OPEN_SR_INVALIDATE_PAGES; - openFileRw(signal, logFilePtr); - return; - break; - } - } - // Read a page from the log file. - readFileInInvalidate(signal); - return; - break; - case LogFileOperationRecord::READ_SR_INVALIDATE_PAGES: jam(); - releaseLfo(signal); // Check if this page must be invalidated. // If the log lap number on a page after the head of the tail is the same // as the actual log lap number we must invalidate this page. Otherwise it // could be impossible to find the end of the log in a later system/node // restart. - if (logPagePtr.p->logPageWord[ZPOS_LOG_LAP] == logPartPtr.p->logLap) { + if (logPagePtr.p->logPageWord[ZPOS_LOG_LAP] == logPartPtr.p->logLap) + { // This page must be invalidated. - logPagePtr.p->logPageWord[ZPOS_LOG_LAP] = 0; - // Contact NDBFS. Real time break. - writeSinglePage(signal, logPartPtr.p->invalidatePageNo, - ZPAGE_SIZE - 1, __LINE__); - lfoPtr.p->lfoState = LogFileOperationRecord::WRITE_SR_INVALIDATE_PAGES; - } else { - // We are done with invalidating. Finish start phase 3.4. - exitFromInvalidate(signal); + // We search for end + // read next + releaseLfo(signal); + releaseLogpage(signal); + readFileInInvalidate(signal, true); + lfoPtr.p->lfoState = LogFileOperationRecord::READ_SR_INVALIDATE_PAGES; + return; } - return; - break; + /** + * We found the "last" page to invalidate... + * Invalidate backwards until head... + */ + + // Fall through... + case LogFileOperationRecord::WRITE_SR_INVALIDATE_PAGES: + jam(); + + releaseLfo(signal); + releaseLogpage(signal); + + // Step backwards... + logPartPtr.p->invalidatePageNo--; + + if (logPartPtr.p->invalidatePageNo == 0) + { + jam(); + + if (logFilePtr.p->fileNo == 0) + { + /** + * We're wrapping in the log... + * update logLap + */ + logPartPtr.p->logLap--; + ndbrequire(logPartPtr.p->logLap); // Should always be > 0 + if (DEBUG_REDO) + ndbout_c("invalidateLogAfterLastGCI part: %u wrap from file 0 -> logLap: %u", + logPartPtr.i, logPartPtr.p->logLap); + } + + /** + * Move to prev file + */ + logFilePtr.i = logFilePtr.p->prevLogFile; + ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord); + logPartPtr.p->invalidateFileNo = logFilePtr.p->fileNo; + logPartPtr.p->invalidatePageNo = ZNO_MBYTES_IN_FILE * ZPAGES_IN_MBYTE - 1; + } + + if (logPartPtr.p->invalidateFileNo == logPartPtr.p->headFileNo && + logPartPtr.p->invalidatePageNo == logPartPtr.p->headPageNo) + { + /** + * Done... + */ + logFilePtr.i = logPartPtr.p->currentLogfile; + ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord); + + logFilePtr.i = logFilePtr.p->nextLogFile; + ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord); + + // Close files if necessary. Current file and the next file should be + // left open. + exitFromInvalidate(signal); + return; + } + + seizeLogpage(signal); + + /** + * Make page really empty + */ + bzero(logPagePtr.p, sizeof(LogPageRecord)); + writeSinglePage(signal, logPartPtr.p->invalidatePageNo, + ZPAGE_SIZE - 1, __LINE__); + + lfoPtr.p->lfoState = LogFileOperationRecord::WRITE_SR_INVALIDATE_PAGES; + return; default: jam(); systemError(signal, __LINE__); return; break; } - - return; }//Dblqh::invalidateLogAfterLastGCI -void Dblqh::readFileInInvalidate(Signal* signal) { +void Dblqh::readFileInInvalidate(Signal* signal, bool stepNext) +{ jam(); + + if (stepNext) + { + logPartPtr.p->invalidatePageNo++; + if (logPartPtr.p->invalidatePageNo == (ZNO_MBYTES_IN_FILE * ZPAGES_IN_MBYTE)) + { + // We continue in the next file. + logFilePtr.i = logFilePtr.p->nextLogFile; + ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord); + logPartPtr.p->invalidateFileNo = logFilePtr.p->fileNo; + // Page 0 is used for file descriptors. + logPartPtr.p->invalidatePageNo = 1; + + if (logFilePtr.p->fileNo == 0) + { + /** + * We're wrapping in the log... + * update logLap + */ + logPartPtr.p->logLap++; + if (DEBUG_REDO) + ndbout_c("readFileInInvalidate part: %u wrap to file 0 -> logLap: %u", + logPartPtr.i, logPartPtr.p->logLap); + } + if (logFilePtr.p->logFileStatus != LogFileRecord::OPEN) + { + jam(); + logFilePtr.p->logFileStatus = LogFileRecord::OPEN_SR_INVALIDATE_PAGES; + openFileRw(signal, logFilePtr); + return; + } + } + } + // Contact NDBFS. Real time break. readSinglePage(signal, logPartPtr.p->invalidatePageNo); lfoPtr.p->lfoState = LogFileOperationRecord::READ_SR_INVALIDATE_PAGES; @@ -15280,35 +15399,58 @@ void Dblqh::readFileInInvalidate(Signal* signal) { void Dblqh::exitFromInvalidate(Signal* signal) { jam(); - // Close files if necessary. Current file and the next file should be - // left open. - if (logFilePtr.i != logPartPtr.p->currentLogfile) { - LogFileRecordPtr currentLogFilePtr; - LogFileRecordPtr nextAfterCurrentLogFilePtr; - currentLogFilePtr.i = logPartPtr.p->currentLogfile; - ptrCheckGuard(currentLogFilePtr, clogFileFileSize, logFileRecord); +loop: + logFilePtr.i = logFilePtr.p->nextLogFile; + ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord); - nextAfterCurrentLogFilePtr.i = currentLogFilePtr.p->nextLogFile; + if (logFilePtr.i == logPartPtr.p->currentLogfile) + { + jam(); + goto done; + } + + if (logFilePtr.p->fileNo == 0) + { + jam(); + /** + * Logfile 0 shoult *not* be closed + */ + goto loop; + } + + if (logFilePtr.p->logFileStatus == LogFileRecord::CLOSED) + { + jam(); + goto done; + } - if (logFilePtr.i != nextAfterCurrentLogFilePtr.i) { - // This file should be closed. - logFilePtr.p->logFileStatus = LogFileRecord::CLOSE_SR_INVALIDATE_PAGES; - closeFile(signal, logFilePtr); - // Return from this function and wait for close confirm. Then come back - // and test the previous file for closing. - return; - } - } + jam(); + ndbrequire(logFilePtr.p->logFileStatus == LogFileRecord::OPEN); + logFilePtr.p->logFileStatus = LogFileRecord::CLOSE_SR_INVALIDATE_PAGES; + closeFile(signal, logFilePtr); + return; - // We are done with closing files, send completed signal and exit this phase. - signal->theData[0] = ZSR_FOURTH_COMP; - signal->theData[1] = logPartPtr.i; - sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB); +done: + if (DEBUG_REDO) + ndbout_c("exitFromInvalidate part: %u head file: %u page: %u", + logPartPtr.i, + logPartPtr.p->headFileNo, + logPartPtr.p->headPageNo); + + logFilePtr.i = logPartPtr.p->firstLogfile; + ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord); + logPagePtr.i = logFilePtr.p->logPageZero; + ptrCheckGuard(logPagePtr, clogPageFileSize, logPageRecord); + logPagePtr.p->logPageWord[ZPAGE_HEADER_SIZE + ZPOS_FILE_NO] = + logPartPtr.p->headFileNo; + writeSinglePage(signal, 0, ZPAGE_SIZE - 1, __LINE__); + + lfoPtr.p->logFileRec = logFilePtr.i; + lfoPtr.p->lfoState = LogFileOperationRecord::WRITE_SR_INVALIDATE_PAGES_UPDATE_PAGE0; return; } - /*---------------------------------------------------------------------------*/ /* THE EXECUTION OF A LOG RECORD IS COMPLETED. RELEASE PAGES IF THEY WERE */ /* READ FROM DISK FOR THIS PARTICULAR OPERATION. */ @@ -15694,20 +15836,10 @@ void Dblqh::readSrFourthZeroLab(Signal* signal) ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord); logPartPtr.p->invalidateFileNo = logPartPtr.p->headFileNo; logPartPtr.p->invalidatePageNo = logPartPtr.p->headPageNo; - logPartPtr.p->logExecState = LogPartRecord::LES_EXEC_LOG_INVALIDATE; - seizeLfo(signal); - initLfo(signal); - // The state here is a little confusing, but simulates that we return - // to invalidateLogAfterLastGCI() from an invalidate write and are ready - // to read a page from file. - lfoPtr.p->lfoState = LogFileOperationRecord::WRITE_SR_INVALIDATE_PAGES; - - /** - * Make sure we dont release zero page - */ - seizeLogpage(signal); - invalidateLogAfterLastGCI(signal); + + readFileInInvalidate(signal, true); + lfoPtr.p->lfoState = LogFileOperationRecord::READ_SR_INVALIDATE_PAGES; return; }//Dblqh::readSrFourthZeroLab() @@ -16279,6 +16411,14 @@ void Dblqh::completedLogPage(Signal* signal, Uint32 clpType, Uint32 place) signal->theData[4] = ZVAR_NO_LOG_PAGE_WORD; signal->theData[5] = twlpNoPages; sendSignal(NDBFS_REF, GSN_FSWRITEREQ, signal, 15, JBA); + + if (DEBUG_REDO) + ndbout_c("writing %d pages at part: %u file: %u pos: %u", + twlpNoPages, + logPartPtr.i, + logFilePtr.p->fileNo, + logFilePtr.p->filePosition); + if (twlpType == ZNORMAL) { jam(); lfoPtr.p->lfoState = LogFileOperationRecord::ACTIVE_WRITE_LOG; @@ -17518,6 +17658,14 @@ void Dblqh::readExecLog(Signal* signal) signal->theData[14] = lfoPtr.p->logPageArray[8]; signal->theData[15] = lfoPtr.p->logPageArray[9]; sendSignal(NDBFS_REF, GSN_FSREADREQ, signal, 16, JBA); + + if (DEBUG_REDO) + ndbout_c("readExecLog %u page at part: %u file: %u pos: %u", + lfoPtr.p->noPagesRw, + logPartPtr.i, + logFilePtr.p->fileNo, + logPartPtr.p->execSrStartPageNo); + }//Dblqh::readExecLog() /* ------------------------------------------------------------------------- */ @@ -17580,6 +17728,14 @@ void Dblqh::readExecSr(Signal* signal) signal->theData[13] = lfoPtr.p->logPageArray[7]; signal->theData[14] = tresPageid; sendSignal(NDBFS_REF, GSN_FSREADREQ, signal, 15, JBA); + + if (DEBUG_REDO) + ndbout_c("readExecSr %u page at part: %u file: %u pos: %u", + 8, + logPartPtr.i, + logFilePtr.p->fileNo, + tresPageid); + }//Dblqh::readExecSr() /* ------------------------------------------------------------------------- */ @@ -17729,6 +17885,13 @@ void Dblqh::readSinglePage(Signal* signal, Uint32 pageNo) signal->theData[6] = logPagePtr.i; signal->theData[7] = pageNo; sendSignal(NDBFS_REF, GSN_FSREADREQ, signal, 8, JBA); + + if (DEBUG_REDO) + ndbout_c("readSinglePage 1 page at part: %u file: %u pos: %u", + logPartPtr.i, + logFilePtr.p->fileNo, + pageNo); + }//Dblqh::readSinglePage() /* -------------------------------------------------------------------------- @@ -18311,8 +18474,17 @@ void Dblqh::writeCompletedGciLog(Signal* signal) jam(); changeMbyte(signal); }//if + logFilePtr.p->remainingWordsInMbyte = logFilePtr.p->remainingWordsInMbyte - ZCOMPLETED_GCI_LOG_SIZE; + + if (DEBUG_REDO) + ndbout_c("writeCompletedGciLog gci: %u part: %u file: %u page: %u", + cnewestCompletedGci, + logPartPtr.i, + logFilePtr.p->fileNo, + logFilePtr.p->currentFilepage); + writeLogWord(signal, ZCOMPLETED_GCI_TYPE); writeLogWord(signal, cnewestCompletedGci); logPartPtr.p->logPartNewestCompletedGCI = cnewestCompletedGci; @@ -18349,6 +18521,13 @@ void Dblqh::writeDirty(Signal* signal, Uint32 place) signal->theData[6] = logPagePtr.i; signal->theData[7] = logPartPtr.p->prevFilepage; sendSignal(NDBFS_REF, GSN_FSWRITEREQ, signal, 8, JBA); + + if (DEBUG_REDO) + ndbout_c("writeDirty 1 page at part: %u file: %u pos: %u", + logPartPtr.i, + logFilePtr.p->fileNo, + logPartPtr.p->prevFilepage); + }//Dblqh::writeDirty() /* -------------------------------------------------------------------------- From 259810d4586b4572fbd578d7c7549a7dc49a7dad Mon Sep 17 00:00:00 2001 From: "jonas@perch.ndb.mysql.com" <> Date: Mon, 25 Jun 2007 07:12:27 +0200 Subject: [PATCH 2/2] ndb - post merge 50/51 - bug#29057 bug#29067 and bug#29118 --- storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp index d5f95ea9cac..47640df8d5c 100644 --- a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp +++ b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp @@ -15605,7 +15605,7 @@ loop: jam(); ndbrequire(logFilePtr.p->logFileStatus == LogFileRecord::OPEN); logFilePtr.p->logFileStatus = LogFileRecord::CLOSE_SR_INVALIDATE_PAGES; - closeFile(signal, logFilePtr); + closeFile(signal, logFilePtr, __LINE__); return; done: