Bug #19148 Backup causes cluster down if _really_ early abort happens

Early abort, failing to allocate buffers, would go down the abort track and
 end up trying to sendDropTriggers (as the standard stop backup does). 
However, it is possible to not have any tables yet defined in the backup 
(due to allocating buffers is before this). Simple check in sendDropTrig 
(and continue to next step). Files may be open, so we continue to close them.

Also updated ERROR_Codes (and added one to test this bug).


storage/ndb/src/kernel/blocks/ERROR_codes.txt:
  update BACKUP error insert codes to reflect current reality.
storage/ndb/src/kernel/blocks/backup/Backup.cpp:
  Allow for early abort when no tables have been included in backup.
  
  Allow for abort when file records have been seized but not yet allocated
  to specific uses (is the case when buffer allocation fails)
storage/ndb/src/kernel/blocks/backup/Backup.hpp:
  initialize BackupRecord file Ptrs
storage/ndb/test/src/NdbBackup.cpp:
  Add extra failure codes for buffer allocation failure in backup
This commit is contained in:
unknown 2006-10-18 18:19:16 +10:00
parent 675557c844
commit 7968c44c0e
4 changed files with 43 additions and 13 deletions

View file

@ -8,7 +8,7 @@ Next DBDICT 6007
Next DBDIH 7178
Next DBTC 8038
Next CMVMI 9000
Next BACKUP 10022
Next BACKUP 10036
Next DBUTIL 11002
Next DBTUX 12008
Next SUMA 13001
@ -411,6 +411,11 @@ Backup Stuff:
10028: Abort backup by error at reception of BACKUP_FRAGMENT_CONF at master (code 305)
10029: Abort backup by error at reception of FSAPPENDCONF in slave (FileOrScanError = 5)
10030: Simulate buffer full from trigger execution => abort backup
10031: Error 331 for dictCommitTableMutex_locked
10032: backup checkscan
10033: backup checkscan
10034: define backup reply error
10035: Fail to allocate buffers
11001: Send UTIL_SEQUENCE_REF (in master)

View file

@ -2004,7 +2004,15 @@ Backup::sendDropTrig(Signal* signal, BackupRecordPtr ptr)
if (ptr.p->slaveData.dropTrig.tableId == RNIL) {
jam();
ptr.p->tables.first(tabPtr);
if(ptr.p->tables.count())
ptr.p->tables.first(tabPtr);
else
{
// Early abort, go to close files
jam();
closeFiles(signal, ptr);
return;
}
} else {
jam();
ndbrequire(findTable(ptr, tabPtr, ptr.p->slaveData.dropTrig.tableId));
@ -2105,8 +2113,11 @@ Backup::execDROP_TRIG_REF(Signal* signal)
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, ptrI);
ndbout << "ERROR DROPPING TRIGGER: " << ref->getConf()->getTriggerId();
ndbout << " Err: " << (Uint32)ref->getErrorCode() << endl << endl;
if(ref->getConf()->getTriggerId() != -1)
{
ndbout << "ERROR DROPPING TRIGGER: " << ref->getConf()->getTriggerId();
ndbout << " Err: " << (Uint32)ref->getErrorCode() << endl << endl;
}
dropTrigReply(signal, ptr);
}
@ -2538,8 +2549,9 @@ Backup::execDEFINE_BACKUP_REQ(Signal* signal)
files[i].p->filePointer = RNIL;
files[i].p->m_flags = 0;
files[i].p->errorCode = 0;
if(files[i].p->pages.seize(noOfPages[i]) == false) {
if(ERROR_INSERTED(10035) || files[i].p->pages.seize(noOfPages[i]) == false)
{
jam();
DEBUG_OUT("Failed to seize " << noOfPages[i] << " pages");
defineBackupRef(signal, ptr, DefineBackupRef::FailedToAllocateBuffers);
@ -4451,14 +4463,24 @@ Backup::closeFilesDone(Signal* signal, BackupRecordPtr ptr)
}
jam();
BackupFilePtr filePtr;
ptr.p->files.getPtr(filePtr, ptr.p->logFilePtr);
StopBackupConf* conf = (StopBackupConf*)signal->getDataPtrSend();
conf->backupId = ptr.p->backupId;
conf->backupPtr = ptr.i;
conf->noOfLogBytes = filePtr.p->operation.noOfBytes;
conf->noOfLogRecords = filePtr.p->operation.noOfRecords;
BackupFilePtr filePtr;
if(ptr.p->logFilePtr != RNIL)
{
ptr.p->files.getPtr(filePtr, ptr.p->logFilePtr);
conf->noOfLogBytes= filePtr.p->operation.noOfBytes;
conf->noOfLogRecords= filePtr.p->operation.noOfRecords;
}
else
{
conf->noOfLogBytes= 0;
conf->noOfLogRecords= 0;
}
sendSignal(ptr.p->masterRef, GSN_STOP_BACKUP_CONF, signal,
StopBackupConf::SignalLength, JBB);

View file

@ -427,6 +427,7 @@ public:
: slaveState(b, validSlaveTransitions, validSlaveTransitionsCount,1)
, tables(tp), triggers(trp), files(bp)
, masterData(b), backup(b)
, ctlFilePtr(RNIL), logFilePtr(RNIL), dataFilePtr(RNIL)
{
}

View file

@ -350,7 +350,8 @@ int
FailS_codes[] = {
10025,
10027,
10033
10033,
10035
};
int
@ -362,7 +363,8 @@ FailM_codes[] = {
10027,
10028,
10031,
10033
10033,
10035
};
int