From b60be73461bb295b8b797a49c1a1b8b97baa55fe Mon Sep 17 00:00:00 2001 From: "jonas@perch.ndb.mysql.com" <> Date: Thu, 24 Aug 2006 07:14:46 +0200 Subject: [PATCH 1/3] ndb - bug#21800 read TransactionDeadlockTimeout (for scans) to cater for insane settings --- ndb/src/ndbapi/NdbScanOperation.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/ndb/src/ndbapi/NdbScanOperation.cpp b/ndb/src/ndbapi/NdbScanOperation.cpp index 0a39651ce28..469231512f5 100644 --- a/ndb/src/ndbapi/NdbScanOperation.cpp +++ b/ndb/src/ndbapi/NdbScanOperation.cpp @@ -504,6 +504,8 @@ int NdbScanOperation::nextResult(bool fetchAllowed, bool forceSend) idx = m_current_api_receiver; last = m_api_receivers_count; + + Uint32 timeout = tp->m_waitfor_timeout; do { if(theError.code){ @@ -531,7 +533,7 @@ int NdbScanOperation::nextResult(bool fetchAllowed, bool forceSend) */ theNdb->theImpl->theWaiter.m_node = nodeId; theNdb->theImpl->theWaiter.m_state = WAIT_SCAN; - int return_code = theNdb->receiveResponse(WAITFOR_SCAN_TIMEOUT); + int return_code = theNdb->receiveResponse(3*timeout); if (return_code == 0 && seq == tp->getNodeSequence(nodeId)) { continue; } else { @@ -1372,6 +1374,7 @@ NdbIndexScanOperation::next_result_ordered(bool fetchAllowed, return -1; Uint32 seq = theNdbCon->theNodeSequence; Uint32 nodeId = theNdbCon->theDBnode; + Uint32 timeout = tp->m_waitfor_timeout; if(seq == tp->getNodeSequence(nodeId) && !send_next_scan_ordered(s_idx, forceSend)){ Uint32 tmp = m_sent_receivers_count; @@ -1379,7 +1382,7 @@ NdbIndexScanOperation::next_result_ordered(bool fetchAllowed, while(m_sent_receivers_count > 0 && !theError.code){ theNdb->theImpl->theWaiter.m_node = nodeId; theNdb->theImpl->theWaiter.m_state = WAIT_SCAN; - int return_code = theNdb->receiveResponse(WAITFOR_SCAN_TIMEOUT); + int return_code = theNdb->receiveResponse(3*timeout); if (return_code == 0 && seq == tp->getNodeSequence(nodeId)) { continue; } @@ -1520,6 +1523,8 @@ NdbScanOperation::close_impl(TransporterFacade* tp, bool forceSend){ return -1; } + Uint32 timeout = tp->m_waitfor_timeout; + /** * Wait for outstanding */ @@ -1527,7 +1532,7 @@ NdbScanOperation::close_impl(TransporterFacade* tp, bool forceSend){ { theNdb->theImpl->theWaiter.m_node = nodeId; theNdb->theImpl->theWaiter.m_state = WAIT_SCAN; - int return_code = theNdb->receiveResponse(WAITFOR_SCAN_TIMEOUT); + int return_code = theNdb->receiveResponse(3*timeout); switch(return_code){ case 0: break; @@ -1597,7 +1602,7 @@ NdbScanOperation::close_impl(TransporterFacade* tp, bool forceSend){ { theNdb->theImpl->theWaiter.m_node = nodeId; theNdb->theImpl->theWaiter.m_state = WAIT_SCAN; - int return_code = theNdb->receiveResponse(WAITFOR_SCAN_TIMEOUT); + int return_code = theNdb->receiveResponse(3*timeout); switch(return_code){ case 0: break; From 67a5d98c1d7aa2485e649c44a274b9365df2994b Mon Sep 17 00:00:00 2001 From: "jonas@perch.ndb.mysql.com" <> Date: Mon, 28 Aug 2006 10:26:21 +0200 Subject: [PATCH 2/3] ndb - bug#21615 Improve error message when detecting corrupted REDO log --- ndb/src/kernel/blocks/dblqh/DblqhMain.cpp | 78 ++++++++++++++--------- 1 file changed, 48 insertions(+), 30 deletions(-) diff --git a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp index b5cfd4aae6d..7286481002f 100644 --- a/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp +++ b/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp @@ -14622,6 +14622,8 @@ void Dblqh::execSr(Signal* signal) LogFileRecordPtr nextLogFilePtr; LogPageRecordPtr tmpLogPagePtr; Uint32 logWord; + Uint32 line; + const char * crash_msg = 0; jamEntry(); logPartPtr.i = signal->theData[0]; @@ -14832,8 +14834,14 @@ void Dblqh::execSr(Signal* signal) /* PLACE THAN IN THE FIRST PAGE OF A NEW FILE IN THE FIRST POSITION AFTER THE*/ /* HEADER. */ /*---------------------------------------------------------------------------*/ - ndbrequire(logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] == - (ZPAGE_HEADER_SIZE + ZPOS_NO_FD)); + if (unlikely(logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] != + (ZPAGE_HEADER_SIZE + ZPOS_NO_FD))) + { + line = __LINE__; + logWord = logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX]; + crash_msg = "ZFD_TYPE at incorrect position!"; + goto crash; + } { Uint32 noFdDescriptors = logPagePtr.p->logPageWord[ZPAGE_HEADER_SIZE + ZPOS_NO_FD]; @@ -14870,19 +14878,10 @@ void Dblqh::execSr(Signal* signal) /*---------------------------------------------------------------------------*/ /* SEND A SIGNAL TO THE SIGNAL LOG AND THEN CRASH THE SYSTEM. */ /*---------------------------------------------------------------------------*/ - signal->theData[0] = RNIL; - signal->theData[1] = logPartPtr.i; - Uint32 tmp = logFilePtr.p->fileName[3]; - tmp = (tmp >> 8) & 0xff;// To get the Directory, DXX. - signal->theData[2] = tmp; - signal->theData[3] = logFilePtr.p->fileNo; - signal->theData[4] = logFilePtr.p->currentFilepage; - signal->theData[5] = logFilePtr.p->currentMbyte; - signal->theData[6] = logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX]; - signal->theData[7] = ~0; - signal->theData[8] = __LINE__; - sendSignal(cownref, GSN_DEBUG_SIG, signal, 9, JBA); - return; + line = __LINE__; + logWord = ZNEXT_MBYTE_TYPE; + crash_msg = "end of log wo/ having found last GCI"; + goto crash; }//if }//if /*---------------------------------------------------------------------------*/ @@ -14937,19 +14936,9 @@ void Dblqh::execSr(Signal* signal) /*---------------------------------------------------------------------------*/ /* SEND A SIGNAL TO THE SIGNAL LOG AND THEN CRASH THE SYSTEM. */ /*---------------------------------------------------------------------------*/ - signal->theData[0] = RNIL; - signal->theData[1] = logPartPtr.i; - Uint32 tmp = logFilePtr.p->fileName[3]; - tmp = (tmp >> 8) & 0xff;// To get the Directory, DXX. - signal->theData[2] = tmp; - signal->theData[3] = logFilePtr.p->fileNo; - signal->theData[4] = logFilePtr.p->currentMbyte; - signal->theData[5] = logFilePtr.p->currentFilepage; - signal->theData[6] = logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX]; - signal->theData[7] = logWord; - signal->theData[8] = __LINE__; - sendSignal(cownref, GSN_DEBUG_SIG, signal, 9, JBA); - return; + line = __LINE__; + crash_msg = "Invalid logword"; + goto crash; break; }//switch /*---------------------------------------------------------------------------*/ @@ -14957,6 +14946,35 @@ void Dblqh::execSr(Signal* signal) // that we reach a new page. /*---------------------------------------------------------------------------*/ } while (1); + return; + +crash: + signal->theData[0] = RNIL; + signal->theData[1] = logPartPtr.i; + Uint32 tmp = logFilePtr.p->fileName[3]; + tmp = (tmp >> 8) & 0xff;// To get the Directory, DXX. + signal->theData[2] = tmp; + signal->theData[3] = logFilePtr.p->fileNo; + signal->theData[4] = logFilePtr.p->currentMbyte; + signal->theData[5] = logFilePtr.p->currentFilepage; + signal->theData[6] = logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX]; + signal->theData[7] = logWord; + signal->theData[8] = line; + + char buf[255]; + BaseString::snprintf(buf, sizeof(buf), + "Error while reading REDO log. from %d\n" + "D=%d, F=%d Mb=%d FP=%d W1=%d W2=%d : %s", + signal->theData[8], + signal->theData[2], + signal->theData[3], + signal->theData[4], + signal->theData[5], + signal->theData[6], + signal->theData[7], + crash_msg ? crash_msg : ""); + + progError(__LINE__, NDBD_EXIT_SR_REDOLOG, buf); }//Dblqh::execSr() /*---------------------------------------------------------------------------*/ @@ -14972,8 +14990,8 @@ void Dblqh::execDEBUG_SIG(Signal* signal) UintR tdebug; jamEntry(); - logPagePtr.i = signal->theData[0]; - tdebug = logPagePtr.p->logPageWord[0]; + //logPagePtr.i = signal->theData[0]; + //tdebug = logPagePtr.p->logPageWord[0]; char buf[100]; BaseString::snprintf(buf, 100, From 96ff8b4c521aba696ba62154605ea2a04a927c6e Mon Sep 17 00:00:00 2001 From: "jonas@perch.ndb.mysql.com" <> Date: Mon, 4 Sep 2006 13:43:34 +0200 Subject: [PATCH 3/3] bug#21965 - replication fix deadlock if master switches log file in parallell with "show master logs" --- sql/log.cc | 11 ++++++++--- sql/sql_class.h | 1 + sql/sql_repl.cc | 8 ++++++-- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/sql/log.cc b/sql/log.cc index c530f15a84f..5ae89dfeb50 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -409,12 +409,17 @@ shutdown the MySQL server and restart it.", log_name, errno); int MYSQL_LOG::get_current_log(LOG_INFO* linfo) { pthread_mutex_lock(&LOCK_log); - strmake(linfo->log_file_name, log_file_name, sizeof(linfo->log_file_name)-1); - linfo->pos = my_b_tell(&log_file); + int ret = raw_get_current_log(linfo); pthread_mutex_unlock(&LOCK_log); - return 0; + return ret; } +int MYSQL_LOG::raw_get_current_log(LOG_INFO* linfo) +{ + strmake(linfo->log_file_name, log_file_name, sizeof(linfo->log_file_name)-1); + linfo->pos = my_b_tell(&log_file); + return 0; +} /* Move all data up in a file in an filename index file diff --git a/sql/sql_class.h b/sql/sql_class.h index e8fe175cd7c..a995a492bc8 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -177,6 +177,7 @@ public: bool need_mutex); int find_next_log(LOG_INFO* linfo, bool need_mutex); int get_current_log(LOG_INFO* linfo); + int raw_get_current_log(LOG_INFO* linfo); uint next_file_id(); inline bool is_open() { return log_type != LOG_CLOSED; } inline char* get_index_fname() { return index_file_name;} diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index 963c4ccf5a6..2a7ab55b8c4 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -1359,10 +1359,14 @@ int show_binlogs(THD* thd) MYSQL_TYPE_LONGLONG)); if (protocol->send_fields(&field_list, 1)) DBUG_RETURN(1); + + pthread_mutex_lock(mysql_bin_log.get_log_lock()); mysql_bin_log.lock_index(); index_file=mysql_bin_log.get_index_file(); - - mysql_bin_log.get_current_log(&cur); + + mysql_bin_log.raw_get_current_log(&cur); // dont take mutex + pthread_mutex_unlock(mysql_bin_log.get_log_lock()); // lockdep, OK + cur_dir_len= dirname_length(cur.log_file_name); reinit_io_cache(index_file, READ_CACHE, (my_off_t) 0, 0, 0);