mirror of
https://github.com/MariaDB/server.git
synced 2025-01-19 05:22:25 +01:00
Merge clam.ndb.mysql.com:/space/pekka/ndb/version/my50-bug18781
into clam.ndb.mysql.com:/space/pekka/ndb/version/my51-bug18781
This commit is contained in:
commit
47a2004431
19 changed files with 1140 additions and 16 deletions
78
ndb/include/kernel/signaldata/DictLock.hpp
Normal file
78
ndb/include/kernel/signaldata/DictLock.hpp
Normal file
|
@ -0,0 +1,78 @@
|
|||
/* Copyright (C) 2003 MySQL AB
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
|
||||
|
||||
#ifndef DICT_LOCK_HPP
|
||||
#define DICT_LOCK_HPP
|
||||
|
||||
#include "SignalData.hpp"
|
||||
|
||||
// see comments in Dbdict.hpp
|
||||
|
||||
class DictLockReq {
|
||||
friend class Dbdict;
|
||||
friend class Dbdih;
|
||||
public:
|
||||
STATIC_CONST( SignalLength = 3 );
|
||||
enum LockType {
|
||||
NoLock = 0,
|
||||
NodeRestartLock = 1
|
||||
};
|
||||
private:
|
||||
Uint32 userPtr;
|
||||
Uint32 lockType;
|
||||
Uint32 userRef;
|
||||
};
|
||||
|
||||
class DictLockConf {
|
||||
friend class Dbdict;
|
||||
friend class Dbdih;
|
||||
public:
|
||||
STATIC_CONST( SignalLength = 3 );
|
||||
private:
|
||||
Uint32 userPtr;
|
||||
Uint32 lockType;
|
||||
Uint32 lockPtr;
|
||||
};
|
||||
|
||||
class DictLockRef {
|
||||
friend class Dbdict;
|
||||
friend class Dbdih;
|
||||
public:
|
||||
STATIC_CONST( SignalLength = 3 );
|
||||
enum ErrorCode {
|
||||
NotMaster = 1,
|
||||
InvalidLockType = 2,
|
||||
BadUserRef = 3,
|
||||
TooLate = 4,
|
||||
TooManyRequests = 5
|
||||
};
|
||||
private:
|
||||
Uint32 userPtr;
|
||||
Uint32 lockType;
|
||||
Uint32 errorCode;
|
||||
};
|
||||
|
||||
class DictUnlockOrd {
|
||||
friend class Dbdict;
|
||||
friend class Dbdih;
|
||||
public:
|
||||
STATIC_CONST( SignalLength = 2 );
|
||||
private:
|
||||
Uint32 lockPtr;
|
||||
Uint32 lockType;
|
||||
};
|
||||
|
||||
#endif
|
94
ndb/src/kernel/blocks/dbdict/DictLock.txt
Normal file
94
ndb/src/kernel/blocks/dbdict/DictLock.txt
Normal file
|
@ -0,0 +1,94 @@
|
|||
Lock master DICT against schema operations
|
||||
|
||||
Implementation
|
||||
--------------
|
||||
|
||||
[ see comments in Dbdict.hpp ]
|
||||
|
||||
Use case: Node startup INR / NR
|
||||
-------------------------------
|
||||
|
||||
Master DICT (like any block) keeps list of alive nodes (c_aliveNodes).
|
||||
These are participants in schema ops.
|
||||
|
||||
(1) c_aliveNodes is initialized when DICT starts
|
||||
in sp3 in READ_NODESCONF from CNTR
|
||||
|
||||
(2) when slave node fails (in any sp of the slave node)
|
||||
it is removed from c_aliveNodes in NODE_FAILREP
|
||||
|
||||
(3) when slave starts, it is added to c_aliveNodes
|
||||
in sp4 of the starting node in INCL_NODEREQ
|
||||
|
||||
Slave DIH locks master DICT in sp2 and releases the lock when started.
|
||||
Based on the constraints:
|
||||
|
||||
- the lock is taken when master DICT is known
|
||||
DIH reads this in sp2 in READ_NODESCONF
|
||||
|
||||
- the lock is taken before (3)
|
||||
|
||||
- the lock is taken before copying starts and held until it is done
|
||||
in sp4 DIH meta, DICT meta, tuple data
|
||||
|
||||
- on INR in sp2 in START_PERMREQ the LCP info of the slave is erased
|
||||
in all DIH in invalidateNodeLCP() - not safe under schema ops
|
||||
|
||||
Signals:
|
||||
|
||||
All but DICT_LOCK are standard v5.0 signals.
|
||||
s=starting node, m=master, a=all participants, l=local block.
|
||||
|
||||
* sp2 - DICT_LOCK and START_PERM
|
||||
|
||||
DIH/s
|
||||
DICT_LOCK_REQ
|
||||
DICT/m
|
||||
DICT_LOCK_CONF
|
||||
DIH/s
|
||||
START_PERMREQ
|
||||
DIH/m
|
||||
START_INFOREQ
|
||||
DIH/a
|
||||
invalidateNodeLCP() if INR
|
||||
DIH/a
|
||||
START_INFOCONF
|
||||
DIH/m
|
||||
START_PERMCONF
|
||||
DIH/s
|
||||
|
||||
* sp4 - START_ME (copy metadata, no changes)
|
||||
|
||||
DIH/s
|
||||
START_MEREQ
|
||||
DIH/m
|
||||
COPY_TABREQ
|
||||
DIH/s
|
||||
COPY_TABCONF
|
||||
DIH/m
|
||||
DICTSTARTREQ
|
||||
DICT/s
|
||||
GET_SCHEMA_INFOREQ
|
||||
DICT/m
|
||||
SCHEMA_INFO
|
||||
DICT/s
|
||||
DICTSTARTCONF
|
||||
DIH/m
|
||||
INCL_NODEREQ
|
||||
DIH/a
|
||||
INCL_NODEREQ
|
||||
ANY/l
|
||||
INCL_NODECONF
|
||||
DIH/a
|
||||
INCL_NODECONF
|
||||
DIH/m
|
||||
START_MECONF
|
||||
DIH/s
|
||||
|
||||
* sp7 - release DICT lock
|
||||
|
||||
DIH/s
|
||||
DICT_UNLOCK_ORD
|
||||
DICT/m
|
||||
|
||||
# vim: set et sw=4:
|
|
@ -517,16 +517,12 @@ extern const GlobalSignalNumber NO_OF_SIGNAL_NAMES;
|
|||
#define GSN_TEST_ORD 407
|
||||
#define GSN_TESTSIG 408
|
||||
#define GSN_TIME_SIGNAL 409
|
||||
/* 410 unused */
|
||||
/* 411 unused */
|
||||
/* 412 unused */
|
||||
#define GSN_TUP_ABORTREQ 414
|
||||
#define GSN_TUP_ADD_ATTCONF 415
|
||||
#define GSN_TUP_ADD_ATTRREF 416
|
||||
#define GSN_TUP_ADD_ATTRREQ 417
|
||||
#define GSN_TUP_ATTRINFO 418
|
||||
#define GSN_TUP_COMMITREQ 419
|
||||
/* 420 unused */
|
||||
|
||||
/* 421 unused */
|
||||
/* 422 unused */
|
||||
|
@ -981,4 +977,10 @@ extern const GlobalSignalNumber NO_OF_SIGNAL_NAMES;
|
|||
#define GSN_DICT_ABORT_REF 668
|
||||
#define GSN_DICT_ABORT_CONF 669
|
||||
|
||||
/* DICT LOCK signals */
|
||||
#define GSN_DICT_LOCK_REQ 410
|
||||
#define GSN_DICT_LOCK_CONF 411
|
||||
#define GSN_DICT_LOCK_REF 412
|
||||
#define GSN_DICT_UNLOCK_ORD 420
|
||||
|
||||
#endif
|
||||
|
|
|
@ -196,6 +196,7 @@ public:
|
|||
InvalidTableVersion = 241,
|
||||
DropInProgress = 283,
|
||||
Busy = 701,
|
||||
BusyWithNR = 711,
|
||||
NotMaster = 702,
|
||||
InvalidFormat = 703,
|
||||
AttributeNameTooLong = 704,
|
||||
|
|
|
@ -77,6 +77,7 @@ public:
|
|||
enum ErrorCode {
|
||||
NoError = 0,
|
||||
Busy = 701,
|
||||
BusyWithNR = 711,
|
||||
NotMaster = 702,
|
||||
InvalidFormat = 703,
|
||||
AttributeNameTooLong = 704,
|
||||
|
|
|
@ -53,6 +53,7 @@ public:
|
|||
|
||||
enum ErrorCode {
|
||||
Busy = 701,
|
||||
BusyWithNR = 711,
|
||||
NotMaster = 702,
|
||||
NoSuchTable = 709,
|
||||
InvalidTableVersion = 241,
|
||||
|
|
|
@ -64,5 +64,8 @@ char ndb_version_string_buf[NDB_VERSION_STRING_BUF_SZ];
|
|||
#define NDBD_INCL_NODECONF_VERSION_4 MAKE_VERSION(4,1,17)
|
||||
#define NDBD_INCL_NODECONF_VERSION_5 MAKE_VERSION(5,0,18)
|
||||
#define NDBD_FRAGID_VERSION (MAKE_VERSION(5,1,6))
|
||||
#define NDBD_DICT_LOCK_VERSION_5 MAKE_VERSION(5,0,23)
|
||||
#define NDBD_DICT_LOCK_VERSION_5_1 MAKE_VERSION(5,1,12)
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -139,7 +139,7 @@ SignalLoggerManager::log(LogMode logMode, const char * params)
|
|||
} else {
|
||||
for (int i = 0; i < count; ++i){
|
||||
BlockNumber number = getBlockNo(blocks[i]);
|
||||
cnt += log(SLM_ON, number-MIN_BLOCK_NO, logMode);
|
||||
cnt += log(SLM_ON, number, logMode);
|
||||
}
|
||||
}
|
||||
for(int i = 0; i<count; i++){
|
||||
|
|
|
@ -624,5 +624,12 @@ const GsnName SignalNames [] = {
|
|||
,{ GSN_LCP_PREPARE_REQ, "LCP_PREPARE_REQ" }
|
||||
,{ GSN_LCP_PREPARE_REF, "LCP_PREPARE_REF" }
|
||||
,{ GSN_LCP_PREPARE_CONF, "LCP_PREPARE_CONF" }
|
||||
|
||||
/* DICT LOCK */
|
||||
,{ GSN_DICT_LOCK_REQ, "DICT_LOCK_REQ" }
|
||||
,{ GSN_DICT_LOCK_CONF, "DICT_LOCK_CONF" }
|
||||
,{ GSN_DICT_LOCK_REF, "DICT_LOCK_REF" }
|
||||
,{ GSN_DICT_UNLOCK_ORD, "DICT_UNLOCK_ORD" }
|
||||
|
||||
};
|
||||
const unsigned short NO_OF_SIGNAL_NAMES = sizeof(SignalNames)/sizeof(GsnName);
|
||||
|
|
|
@ -5,7 +5,7 @@ Next DBACC 3002
|
|||
Next DBTUP 4013
|
||||
Next DBLQH 5043
|
||||
Next DBDICT 6007
|
||||
Next DBDIH 7174
|
||||
Next DBDIH 7177
|
||||
Next DBTC 8037
|
||||
Next CMVMI 9000
|
||||
Next BACKUP 10022
|
||||
|
@ -312,6 +312,10 @@ Test Crashes in handling node restarts
|
|||
|
||||
7170: Crash when receiving START_PERMREF (InitialStartRequired)
|
||||
|
||||
7174: Crash starting node before sending DICT_LOCK_REQ
|
||||
7175: Master sends one fake START_PERMREF (ZNODE_ALREADY_STARTING_ERROR)
|
||||
7176: Slave NR pretends master does not support DICT lock (rolling upgrade)
|
||||
|
||||
DICT:
|
||||
6000 Crash during NR when receiving DICTSTARTREQ
|
||||
6001 Crash during NR when receiving SCHEMA_INFO
|
||||
|
|
|
@ -322,6 +322,11 @@ void Dbdict::execCONTINUEB(Signal* signal)
|
|||
sendGetTabResponse(signal);
|
||||
break;
|
||||
|
||||
case ZDICT_LOCK_POLL:
|
||||
jam();
|
||||
checkDictLockQueue(signal, true);
|
||||
break;
|
||||
|
||||
default :
|
||||
ndbrequire(false);
|
||||
break;
|
||||
|
@ -1500,8 +1505,9 @@ Dbdict::Dbdict(Block_context& ctx):
|
|||
c_Trans(c_opRecordPool),
|
||||
c_opCreateObj(c_schemaOp),
|
||||
c_opDropObj(c_schemaOp),
|
||||
c_opRecordSequence(0)
|
||||
|
||||
c_opRecordSequence(0),
|
||||
c_dictLockQueue(c_dictLockPool),
|
||||
c_dictLockPoll(false)
|
||||
{
|
||||
BLOCK_CONSTRUCTOR(Dbdict);
|
||||
|
||||
|
@ -1670,6 +1676,8 @@ Dbdict::Dbdict(Block_context& ctx):
|
|||
addRecSignal(GSN_DICT_ABORT_REF, &Dbdict::execDICT_ABORT_REF);
|
||||
addRecSignal(GSN_DICT_ABORT_CONF, &Dbdict::execDICT_ABORT_CONF);
|
||||
|
||||
addRecSignal(GSN_DICT_LOCK_REQ, &Dbdict::execDICT_LOCK_REQ);
|
||||
addRecSignal(GSN_DICT_UNLOCK_ORD, &Dbdict::execDICT_UNLOCK_ORD);
|
||||
}//Dbdict::Dbdict()
|
||||
|
||||
Dbdict::~Dbdict()
|
||||
|
@ -2061,6 +2069,8 @@ void Dbdict::execREAD_CONFIG_REQ(Signal* signal)
|
|||
c_opCreateTrigger.setSize(8);
|
||||
c_opDropTrigger.setSize(8);
|
||||
c_opAlterTrigger.setSize(8);
|
||||
|
||||
c_dictLockPool.setSize(32);
|
||||
|
||||
// Initialize schema file copies
|
||||
c_schemaFile[0].schemaPage =
|
||||
|
@ -3535,6 +3545,10 @@ void Dbdict::execNODE_FAILREP(Signal* signal)
|
|||
c_blockState = BS_NODE_FAILURE;
|
||||
ok = true;
|
||||
break;
|
||||
case BS_NODE_RESTART:
|
||||
jam();
|
||||
ok = true;
|
||||
break;
|
||||
}
|
||||
ndbrequire(ok);
|
||||
|
||||
|
@ -3557,6 +3571,15 @@ void Dbdict::execNODE_FAILREP(Signal* signal)
|
|||
}//if
|
||||
}//for
|
||||
|
||||
/*
|
||||
* NODE_FAILREP guarantees that no "in flight" signal from
|
||||
* a dead node is accepted, and also that the job buffer contains
|
||||
* no such (un-executed) signals. Therefore no DICT_UNLOCK_ORD
|
||||
* from a dead node (leading to master crash) is possible after
|
||||
* this clean-up removes the lock record.
|
||||
*/
|
||||
removeStaleDictLocks(signal, theFailedNodes);
|
||||
|
||||
}//execNODE_FAILREP()
|
||||
|
||||
|
||||
|
@ -3625,6 +3648,12 @@ Dbdict::execCREATE_TABLE_REQ(Signal* signal){
|
|||
break;
|
||||
}
|
||||
|
||||
if (c_blockState == BS_NODE_RESTART){
|
||||
jam();
|
||||
parseRecord.errorCode = CreateTableRef::BusyWithNR;
|
||||
break;
|
||||
}
|
||||
|
||||
if (c_blockState != BS_IDLE){
|
||||
jam();
|
||||
parseRecord.errorCode = CreateTableRef::Busy;
|
||||
|
@ -3804,6 +3833,12 @@ Dbdict::execALTER_TABLE_REQ(Signal* signal)
|
|||
return;
|
||||
}
|
||||
|
||||
if(c_blockState == BS_NODE_RESTART){
|
||||
jam();
|
||||
alterTableRef(signal, req, AlterTableRef::BusyWithNR);
|
||||
return;
|
||||
}
|
||||
|
||||
if(c_blockState != BS_IDLE){
|
||||
jam();
|
||||
alterTableRef(signal, req, AlterTableRef::Busy);
|
||||
|
@ -6357,6 +6392,12 @@ Dbdict::execDROP_TABLE_REQ(Signal* signal){
|
|||
return;
|
||||
}
|
||||
|
||||
if(c_blockState == BS_NODE_RESTART){
|
||||
jam();
|
||||
dropTableRef(signal, req, DropTableRef::BusyWithNR);
|
||||
return;
|
||||
}
|
||||
|
||||
if(c_blockState != BS_IDLE){
|
||||
jam();
|
||||
dropTableRef(signal, req, DropTableRef::Busy);
|
||||
|
@ -13279,6 +13320,275 @@ Dbdict::getIndexAttrMask(TableRecordPtr indexPtr, AttributeMask& mask)
|
|||
}
|
||||
}
|
||||
|
||||
// DICT lock master
|
||||
|
||||
const Dbdict::DictLockType*
|
||||
Dbdict::getDictLockType(Uint32 lockType)
|
||||
{
|
||||
static const DictLockType lt[] = {
|
||||
{ DictLockReq::NodeRestartLock, BS_NODE_RESTART, "NodeRestart" }
|
||||
};
|
||||
for (int i = 0; i < sizeof(lt)/sizeof(lt[0]); i++) {
|
||||
if (lt[i].lockType == lockType)
|
||||
return <[i];
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void
|
||||
Dbdict::sendDictLockInfoEvent(Uint32 pollCount)
|
||||
{
|
||||
DictLockPtr loopPtr;
|
||||
c_dictLockQueue.first(loopPtr);
|
||||
unsigned count = 0;
|
||||
|
||||
char queue_buf[100];
|
||||
char *p = &queue_buf[0];
|
||||
const char *const q = &queue_buf[sizeof(queue_buf)];
|
||||
*p = 0;
|
||||
|
||||
while (loopPtr.i != RNIL) {
|
||||
jam();
|
||||
my_snprintf(p, q-p, "%s%u%s",
|
||||
++count == 1 ? "" : " ",
|
||||
(unsigned)refToNode(loopPtr.p->req.userRef),
|
||||
loopPtr.p->locked ? "L" : "");
|
||||
p += strlen(p);
|
||||
c_dictLockQueue.next(loopPtr);
|
||||
}
|
||||
|
||||
infoEvent("DICT: lock bs: %d ops: %d poll: %d cnt: %d queue: %s",
|
||||
(int)c_blockState,
|
||||
c_opRecordPool.getSize() - c_opRecordPool.getNoOfFree(),
|
||||
c_dictLockPoll, (int)pollCount, queue_buf);
|
||||
}
|
||||
|
||||
void
|
||||
Dbdict::sendDictLockInfoEvent(DictLockPtr lockPtr, const char* text)
|
||||
{
|
||||
infoEvent("DICT: %s %u for %s",
|
||||
text,
|
||||
(unsigned)refToNode(lockPtr.p->req.userRef), lockPtr.p->lt->text);
|
||||
}
|
||||
|
||||
void
|
||||
Dbdict::execDICT_LOCK_REQ(Signal* signal)
|
||||
{
|
||||
jamEntry();
|
||||
const DictLockReq* req = (const DictLockReq*)&signal->theData[0];
|
||||
|
||||
// make sure bad request crashes slave, not master (us)
|
||||
|
||||
if (getOwnNodeId() != c_masterNodeId) {
|
||||
jam();
|
||||
sendDictLockRef(signal, *req, DictLockRef::NotMaster);
|
||||
return;
|
||||
}
|
||||
|
||||
const DictLockType* lt = getDictLockType(req->lockType);
|
||||
if (lt == NULL) {
|
||||
jam();
|
||||
sendDictLockRef(signal, *req, DictLockRef::InvalidLockType);
|
||||
return;
|
||||
}
|
||||
|
||||
if (req->userRef != signal->getSendersBlockRef() ||
|
||||
getNodeInfo(refToNode(req->userRef)).m_type != NodeInfo::DB) {
|
||||
jam();
|
||||
sendDictLockRef(signal, *req, DictLockRef::BadUserRef);
|
||||
return;
|
||||
}
|
||||
|
||||
if (c_aliveNodes.get(refToNode(req->userRef))) {
|
||||
jam();
|
||||
sendDictLockRef(signal, *req, DictLockRef::TooLate);
|
||||
return;
|
||||
}
|
||||
|
||||
DictLockPtr lockPtr;
|
||||
if (! c_dictLockQueue.seize(lockPtr)) {
|
||||
jam();
|
||||
sendDictLockRef(signal, *req, DictLockRef::TooManyRequests);
|
||||
return;
|
||||
}
|
||||
|
||||
lockPtr.p->req = *req;
|
||||
lockPtr.p->locked = false;
|
||||
lockPtr.p->lt = lt;
|
||||
|
||||
checkDictLockQueue(signal, false);
|
||||
|
||||
if (! lockPtr.p->locked)
|
||||
sendDictLockInfoEvent(lockPtr, "lock request by node");
|
||||
}
|
||||
|
||||
void
|
||||
Dbdict::checkDictLockQueue(Signal* signal, bool poll)
|
||||
{
|
||||
Uint32 pollCount = ! poll ? 0 : signal->theData[1];
|
||||
|
||||
DictLockPtr lockPtr;
|
||||
|
||||
do {
|
||||
if (! c_dictLockQueue.first(lockPtr)) {
|
||||
jam();
|
||||
setDictLockPoll(signal, false, pollCount);
|
||||
return;
|
||||
}
|
||||
|
||||
if (lockPtr.p->locked) {
|
||||
jam();
|
||||
ndbrequire(c_blockState == lockPtr.p->lt->blockState);
|
||||
break;
|
||||
}
|
||||
|
||||
if (c_opRecordPool.getNoOfFree() != c_opRecordPool.getSize()) {
|
||||
jam();
|
||||
break;
|
||||
}
|
||||
|
||||
ndbrequire(c_blockState == BS_IDLE);
|
||||
lockPtr.p->locked = true;
|
||||
c_blockState = lockPtr.p->lt->blockState;
|
||||
sendDictLockConf(signal, lockPtr);
|
||||
|
||||
sendDictLockInfoEvent(lockPtr, "locked by node");
|
||||
} while (0);
|
||||
|
||||
// poll while first request is open
|
||||
// this routine is called again when it is removed for any reason
|
||||
|
||||
bool on = ! lockPtr.p->locked;
|
||||
setDictLockPoll(signal, on, pollCount);
|
||||
}
|
||||
|
||||
void
|
||||
Dbdict::execDICT_UNLOCK_ORD(Signal* signal)
|
||||
{
|
||||
jamEntry();
|
||||
const DictUnlockOrd* ord = (const DictUnlockOrd*)&signal->theData[0];
|
||||
|
||||
DictLockPtr lockPtr;
|
||||
c_dictLockQueue.getPtr(lockPtr, ord->lockPtr);
|
||||
ndbrequire(lockPtr.p->lt->lockType == ord->lockType);
|
||||
|
||||
if (lockPtr.p->locked) {
|
||||
jam();
|
||||
ndbrequire(c_blockState == lockPtr.p->lt->blockState);
|
||||
ndbrequire(c_opRecordPool.getNoOfFree() == c_opRecordPool.getSize());
|
||||
ndbrequire(! c_dictLockQueue.hasPrev(lockPtr));
|
||||
|
||||
c_blockState = BS_IDLE;
|
||||
sendDictLockInfoEvent(lockPtr, "unlocked by node");
|
||||
} else {
|
||||
sendDictLockInfoEvent(lockPtr, "lock request removed by node");
|
||||
}
|
||||
|
||||
c_dictLockQueue.release(lockPtr);
|
||||
|
||||
checkDictLockQueue(signal, false);
|
||||
}
|
||||
|
||||
void
|
||||
Dbdict::sendDictLockConf(Signal* signal, DictLockPtr lockPtr)
|
||||
{
|
||||
DictLockConf* conf = (DictLockConf*)&signal->theData[0];
|
||||
const DictLockReq& req = lockPtr.p->req;
|
||||
|
||||
conf->userPtr = req.userPtr;
|
||||
conf->lockType = req.lockType;
|
||||
conf->lockPtr = lockPtr.i;
|
||||
|
||||
sendSignal(req.userRef, GSN_DICT_LOCK_CONF, signal,
|
||||
DictLockConf::SignalLength, JBB);
|
||||
}
|
||||
|
||||
void
|
||||
Dbdict::sendDictLockRef(Signal* signal, DictLockReq req, Uint32 errorCode)
|
||||
{
|
||||
DictLockRef* ref = (DictLockRef*)&signal->theData[0];
|
||||
|
||||
ref->userPtr = req.userPtr;
|
||||
ref->lockType = req.lockType;
|
||||
ref->errorCode = errorCode;
|
||||
|
||||
sendSignal(req.userRef, GSN_DICT_LOCK_REF, signal,
|
||||
DictLockRef::SignalLength, JBB);
|
||||
}
|
||||
|
||||
// control polling
|
||||
|
||||
void
|
||||
Dbdict::setDictLockPoll(Signal* signal, bool on, Uint32 pollCount)
|
||||
{
|
||||
if (on) {
|
||||
jam();
|
||||
signal->theData[0] = ZDICT_LOCK_POLL;
|
||||
signal->theData[1] = pollCount + 1;
|
||||
sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 100, 2);
|
||||
}
|
||||
|
||||
bool change = (c_dictLockPoll != on);
|
||||
|
||||
if (change) {
|
||||
jam();
|
||||
c_dictLockPoll = on;
|
||||
}
|
||||
|
||||
// avoid too many messages if master is stuck busy (BS_NODE_FAILURE)
|
||||
bool periodic =
|
||||
pollCount < 8 ||
|
||||
pollCount < 64 && pollCount % 8 == 0 ||
|
||||
pollCount < 512 && pollCount % 64 == 0 ||
|
||||
pollCount < 4096 && pollCount % 512 == 0 ||
|
||||
pollCount % 4096 == 0; // about every 6 minutes
|
||||
|
||||
if (change || periodic)
|
||||
sendDictLockInfoEvent(pollCount);
|
||||
}
|
||||
|
||||
// NF handling
|
||||
|
||||
void
|
||||
Dbdict::removeStaleDictLocks(Signal* signal, const Uint32* theFailedNodes)
|
||||
{
|
||||
DictLockPtr loopPtr;
|
||||
c_dictLockQueue.first(loopPtr);
|
||||
|
||||
if (getOwnNodeId() != c_masterNodeId) {
|
||||
ndbrequire(loopPtr.i == RNIL);
|
||||
return;
|
||||
}
|
||||
|
||||
while (loopPtr.i != RNIL) {
|
||||
jam();
|
||||
DictLockPtr lockPtr = loopPtr;
|
||||
c_dictLockQueue.next(loopPtr);
|
||||
|
||||
Uint32 nodeId = refToNode(lockPtr.p->req.userRef);
|
||||
|
||||
if (NodeBitmask::get(theFailedNodes, nodeId)) {
|
||||
if (lockPtr.p->locked) {
|
||||
jam();
|
||||
ndbrequire(c_blockState == lockPtr.p->lt->blockState);
|
||||
ndbrequire(c_opRecordPool.getNoOfFree() == c_opRecordPool.getSize());
|
||||
ndbrequire(! c_dictLockQueue.hasPrev(lockPtr));
|
||||
|
||||
c_blockState = BS_IDLE;
|
||||
|
||||
sendDictLockInfoEvent(lockPtr, "remove lock by failed node");
|
||||
} else {
|
||||
sendDictLockInfoEvent(lockPtr, "remove lock request by failed node");
|
||||
}
|
||||
|
||||
c_dictLockQueue.release(lockPtr);
|
||||
}
|
||||
}
|
||||
|
||||
checkDictLockQueue(signal, false);
|
||||
}
|
||||
|
||||
|
||||
/* **************************************************************** */
|
||||
/* ---------------------------------------------------------------- */
|
||||
/* MODULE: STORE/RESTORE SCHEMA FILE---------------------- */
|
||||
|
|
|
@ -51,6 +51,7 @@
|
|||
#include <signaldata/CreateTrig.hpp>
|
||||
#include <signaldata/DropTrig.hpp>
|
||||
#include <signaldata/AlterTrig.hpp>
|
||||
#include <signaldata/DictLock.hpp>
|
||||
#include "SchemaFile.hpp"
|
||||
#include <blocks/mutexes.hpp>
|
||||
#include <SafeCounter.hpp>
|
||||
|
@ -68,6 +69,7 @@
|
|||
/*--------------------------------------------------------------*/
|
||||
#define ZPACK_TABLE_INTO_PAGES 0
|
||||
#define ZSEND_GET_TAB_RESPONSE 3
|
||||
#define ZDICT_LOCK_POLL 4
|
||||
|
||||
|
||||
/*--------------------------------------------------------------*/
|
||||
|
@ -812,6 +814,9 @@ private:
|
|||
void execDROP_FILEGROUP_REF(Signal* signal);
|
||||
void execDROP_FILEGROUP_CONF(Signal* signal);
|
||||
|
||||
void execDICT_LOCK_REQ(Signal* signal);
|
||||
void execDICT_UNLOCK_ORD(Signal* signal);
|
||||
|
||||
/*
|
||||
* 2.4 COMMON STORED VARIABLES
|
||||
*/
|
||||
|
@ -1046,12 +1051,43 @@ private:
|
|||
// State variables
|
||||
/* ----------------------------------------------------------------------- */
|
||||
|
||||
#ifndef ndb_dbdict_log_block_state
|
||||
enum BlockState {
|
||||
BS_IDLE = 0,
|
||||
BS_CREATE_TAB = 1,
|
||||
BS_BUSY = 2,
|
||||
BS_NODE_FAILURE = 3
|
||||
BS_NODE_FAILURE = 3,
|
||||
BS_NODE_RESTART = 4
|
||||
};
|
||||
#else // quick hack to log changes
|
||||
enum {
|
||||
BS_IDLE = 0,
|
||||
BS_CREATE_TAB = 1,
|
||||
BS_BUSY = 2,
|
||||
BS_NODE_FAILURE = 3,
|
||||
BS_NODE_RESTART = 4
|
||||
};
|
||||
struct BlockState;
|
||||
friend struct BlockState;
|
||||
struct BlockState {
|
||||
BlockState() :
|
||||
m_value(BS_IDLE) {
|
||||
}
|
||||
BlockState(int value) :
|
||||
m_value(value) {
|
||||
}
|
||||
operator int() const {
|
||||
return m_value;
|
||||
}
|
||||
BlockState& operator=(const BlockState& bs) {
|
||||
Dbdict* dict = (Dbdict*)globalData.getBlock(DBDICT);
|
||||
dict->infoEvent("DICT: bs %d->%d", m_value, bs.m_value);
|
||||
m_value = bs.m_value;
|
||||
return *this;
|
||||
}
|
||||
int m_value;
|
||||
};
|
||||
#endif
|
||||
BlockState c_blockState;
|
||||
|
||||
struct PackTable {
|
||||
|
@ -2015,6 +2051,65 @@ private:
|
|||
// Unique key for operation XXX move to some system table
|
||||
Uint32 c_opRecordSequence;
|
||||
|
||||
/*
|
||||
* Master DICT can be locked in 2 mutually exclusive ways:
|
||||
*
|
||||
* 1) for schema ops, via operation records
|
||||
* 2) against schema ops, via a lock queue
|
||||
*
|
||||
* Current use of 2) is by a starting node, to prevent schema ops
|
||||
* until started. The ops are refused (BlockState != BS_IDLE),
|
||||
* not queued.
|
||||
*
|
||||
* Master failure is not handled, in node start case the starting
|
||||
* node will crash too anyway. Use lock table in future..
|
||||
*
|
||||
* The lock queue is "serial" but other behaviour is possible
|
||||
* by checking lock types e.g. to allow parallel node starts.
|
||||
*
|
||||
* Checking release of last op record is not convenient with
|
||||
* current structure (5.0). Instead we poll via continueB.
|
||||
*
|
||||
* XXX only table ops check BlockState
|
||||
*/
|
||||
|
||||
struct DictLockType {
|
||||
DictLockReq::LockType lockType;
|
||||
BlockState blockState;
|
||||
const char* text;
|
||||
};
|
||||
|
||||
struct DictLockRecord {
|
||||
DictLockReq req;
|
||||
const DictLockType* lt;
|
||||
bool locked;
|
||||
union {
|
||||
Uint32 nextPool;
|
||||
Uint32 nextList;
|
||||
};
|
||||
Uint32 prevList;
|
||||
};
|
||||
|
||||
typedef Ptr<DictLockRecord> DictLockPtr;
|
||||
ArrayPool<DictLockRecord> c_dictLockPool;
|
||||
DLFifoList<DictLockRecord> c_dictLockQueue;
|
||||
bool c_dictLockPoll;
|
||||
|
||||
static const DictLockType* getDictLockType(Uint32 lockType);
|
||||
void sendDictLockInfoEvent(Uint32 pollCount);
|
||||
void sendDictLockInfoEvent(DictLockPtr lockPtr, const char* text);
|
||||
|
||||
void checkDictLockQueue(Signal* signal, bool poll);
|
||||
void sendDictLockConf(Signal* signal, DictLockPtr lockPtr);
|
||||
void sendDictLockRef(Signal* signal, DictLockReq req, Uint32 errorCode);
|
||||
|
||||
// control polling i.e. continueB loop
|
||||
void setDictLockPoll(Signal* signal, bool on, Uint32 pollCount);
|
||||
|
||||
// NF handling
|
||||
void removeStaleDictLocks(Signal* signal, const Uint32* theFailedNodes);
|
||||
|
||||
|
||||
// Statement blocks
|
||||
|
||||
/* ------------------------------------------------------------ */
|
||||
|
|
|
@ -718,6 +718,9 @@ private:
|
|||
void checkPrepDropTabComplete(Signal *, TabRecordPtr tabPtr);
|
||||
void checkWaitDropTabFailedLqh(Signal *, Uint32 nodeId, Uint32 tableId);
|
||||
|
||||
void execDICT_LOCK_CONF(Signal* signal);
|
||||
void execDICT_LOCK_REF(Signal* signal);
|
||||
|
||||
// Statement blocks
|
||||
//------------------------------------
|
||||
// Methods that send signals
|
||||
|
@ -935,6 +938,7 @@ private:
|
|||
void initialStartCompletedLab(Signal *);
|
||||
void allNodesLcpCompletedLab(Signal *);
|
||||
void nodeRestartPh2Lab(Signal *);
|
||||
void nodeRestartPh2Lab2(Signal *);
|
||||
void initGciFilesLab(Signal *);
|
||||
void dictStartConfLab(Signal *);
|
||||
void nodeDictStartConfLab(Signal *);
|
||||
|
@ -1603,6 +1607,30 @@ private:
|
|||
void startInfoReply(Signal *, Uint32 nodeId);
|
||||
|
||||
void dump_replica_info();
|
||||
|
||||
/*
|
||||
* Lock master DICT. Only current use is by starting node
|
||||
* during NR. A pool of slave records is convenient anyway.
|
||||
*/
|
||||
struct DictLockSlaveRecord {
|
||||
Uint32 lockPtr;
|
||||
Uint32 lockType;
|
||||
bool locked;
|
||||
Callback callback;
|
||||
Uint32 nextPool;
|
||||
};
|
||||
|
||||
typedef Ptr<DictLockSlaveRecord> DictLockSlavePtr;
|
||||
ArrayPool<DictLockSlaveRecord> c_dictLockSlavePool;
|
||||
|
||||
// slave
|
||||
void sendDictLockReq(Signal* signal, Uint32 lockType, Callback c);
|
||||
void recvDictLockConf(Signal* signal);
|
||||
void sendDictUnlockOrd(Signal* signal, Uint32 lockSlavePtrI);
|
||||
|
||||
// NR
|
||||
Uint32 c_dictLockSlavePtrI_nodeRestart; // userPtr for NR
|
||||
void recvDictLockConf_nodeRestart(Signal* signal, Uint32 data, Uint32 ret);
|
||||
};
|
||||
|
||||
#if (DIH_CDATA_SIZE < _SYSFILE_SIZE32)
|
||||
|
|
|
@ -53,6 +53,9 @@ void Dbdih::initData()
|
|||
waitGCPProxyPool.setSize(ZPROXY_FILE_SIZE);
|
||||
waitGCPMasterPool.setSize(ZPROXY_MASTER_FILE_SIZE);
|
||||
|
||||
c_dictLockSlavePool.setSize(1); // assert single usage
|
||||
c_dictLockSlavePtrI_nodeRestart = RNIL;
|
||||
|
||||
cgcpOrderBlocked = 0;
|
||||
c_lcpState.ctcCounter = 0;
|
||||
cwaitLcpSr = false;
|
||||
|
@ -251,6 +254,9 @@ Dbdih::Dbdih(Block_context& ctx):
|
|||
addRecSignal(GSN_CREATE_FRAGMENTATION_REQ,
|
||||
&Dbdih::execCREATE_FRAGMENTATION_REQ);
|
||||
|
||||
addRecSignal(GSN_DICT_LOCK_CONF, &Dbdih::execDICT_LOCK_CONF);
|
||||
addRecSignal(GSN_DICT_LOCK_REF, &Dbdih::execDICT_LOCK_REF);
|
||||
|
||||
apiConnectRecord = 0;
|
||||
connectRecord = 0;
|
||||
fileRecord = 0;
|
||||
|
|
|
@ -68,6 +68,7 @@
|
|||
#include <signaldata/LqhFrag.hpp>
|
||||
#include <signaldata/FsOpenReq.hpp>
|
||||
#include <signaldata/DihFragCount.hpp>
|
||||
#include <signaldata/DictLock.hpp>
|
||||
#include <DebuggerNames.hpp>
|
||||
|
||||
#include <EventLogger.hpp>
|
||||
|
@ -545,7 +546,7 @@ void Dbdih::execCONTINUEB(Signal* signal)
|
|||
break;
|
||||
case DihContinueB::ZSTART_PERMREQ_AGAIN:
|
||||
jam();
|
||||
nodeRestartPh2Lab(signal);
|
||||
nodeRestartPh2Lab2(signal);
|
||||
return;
|
||||
break;
|
||||
case DihContinueB::SwitchReplica:
|
||||
|
@ -1295,6 +1296,7 @@ void Dbdih::execNDB_STTOR(Signal* signal)
|
|||
case NodeState::ST_INITIAL_NODE_RESTART:
|
||||
case NodeState::ST_NODE_RESTART:
|
||||
jam();
|
||||
|
||||
/***********************************************************************
|
||||
* When starting nodes while system is operational we must be controlled
|
||||
* by the master since only one node restart is allowed at a time.
|
||||
|
@ -1305,7 +1307,7 @@ void Dbdih::execNDB_STTOR(Signal* signal)
|
|||
req->startingRef = reference();
|
||||
req->startingVersion = 0; // Obsolete
|
||||
sendSignal(cmasterdihref, GSN_START_MEREQ, signal,
|
||||
StartMeReq::SignalLength, JBB);
|
||||
StartMeReq::SignalLength, JBB);
|
||||
return;
|
||||
}
|
||||
ndbrequire(false);
|
||||
|
@ -1365,6 +1367,24 @@ void Dbdih::execNDB_STTOR(Signal* signal)
|
|||
}
|
||||
ndbrequire(false);
|
||||
break;
|
||||
case ZNDB_SPH7:
|
||||
jam();
|
||||
switch (typestart) {
|
||||
case NodeState::ST_INITIAL_START:
|
||||
case NodeState::ST_SYSTEM_RESTART:
|
||||
jam();
|
||||
ndbsttorry10Lab(signal, __LINE__);
|
||||
return;
|
||||
case NodeState::ST_NODE_RESTART:
|
||||
case NodeState::ST_INITIAL_NODE_RESTART:
|
||||
jam();
|
||||
sendDictUnlockOrd(signal, c_dictLockSlavePtrI_nodeRestart);
|
||||
c_dictLockSlavePtrI_nodeRestart = RNIL;
|
||||
ndbsttorry10Lab(signal, __LINE__);
|
||||
return;
|
||||
}
|
||||
ndbrequire(false);
|
||||
break;
|
||||
default:
|
||||
jam();
|
||||
ndbsttorry10Lab(signal, __LINE__);
|
||||
|
@ -1574,6 +1594,34 @@ void Dbdih::execREAD_NODESCONF(Signal* signal)
|
|||
/* START NODE LOGIC FOR NODE RESTART */
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void Dbdih::nodeRestartPh2Lab(Signal* signal)
|
||||
{
|
||||
/*
|
||||
* Lock master DICT to avoid metadata operations during INR/NR.
|
||||
* Done just before START_PERMREQ.
|
||||
*
|
||||
* It would be more elegant to do this just before START_MEREQ.
|
||||
* The problem is, on INR we end up in massive invalidateNodeLCP
|
||||
* which is not fully protected against metadata ops.
|
||||
*/
|
||||
ndbrequire(c_dictLockSlavePtrI_nodeRestart == RNIL);
|
||||
|
||||
// check that we are not yet taking part in schema ops
|
||||
CRASH_INSERTION(7174);
|
||||
|
||||
Uint32 lockType = DictLockReq::NodeRestartLock;
|
||||
Callback c = { safe_cast(&Dbdih::recvDictLockConf_nodeRestart), 0 };
|
||||
sendDictLockReq(signal, lockType, c);
|
||||
}
|
||||
|
||||
void Dbdih::recvDictLockConf_nodeRestart(Signal* signal, Uint32 data, Uint32 ret)
|
||||
{
|
||||
ndbrequire(c_dictLockSlavePtrI_nodeRestart == RNIL);
|
||||
c_dictLockSlavePtrI_nodeRestart = data;
|
||||
|
||||
nodeRestartPh2Lab2(signal);
|
||||
}
|
||||
|
||||
void Dbdih::nodeRestartPh2Lab2(Signal* signal)
|
||||
{
|
||||
/*------------------------------------------------------------------------*/
|
||||
// REQUEST FOR PERMISSION FROM MASTER TO START A NODE IN AN ALREADY
|
||||
|
@ -1585,7 +1633,7 @@ void Dbdih::nodeRestartPh2Lab(Signal* signal)
|
|||
req->nodeId = cownNodeId;
|
||||
req->startType = cstarttype;
|
||||
sendSignal(cmasterdihref, GSN_START_PERMREQ, signal, 3, JBB);
|
||||
}//Dbdih::nodeRestartPh2Lab()
|
||||
}
|
||||
|
||||
void Dbdih::execSTART_PERMCONF(Signal* signal)
|
||||
{
|
||||
|
@ -1710,12 +1758,12 @@ void Dbdih::execSTART_PERMREQ(Signal* signal)
|
|||
const BlockReference retRef = req->blockRef;
|
||||
const Uint32 nodeId = req->nodeId;
|
||||
const Uint32 typeStart = req->startType;
|
||||
|
||||
CRASH_INSERTION(7122);
|
||||
ndbrequire(isMaster());
|
||||
ndbrequire(refToNode(retRef) == nodeId);
|
||||
if ((c_nodeStartMaster.activeState) ||
|
||||
(c_nodeStartMaster.wait != ZFALSE)) {
|
||||
(c_nodeStartMaster.wait != ZFALSE) ||
|
||||
ERROR_INSERTED_CLEAR(7175)) {
|
||||
jam();
|
||||
signal->theData[0] = nodeId;
|
||||
signal->theData[1] = StartPermRef::ZNODE_ALREADY_STARTING_ERROR;
|
||||
|
@ -10780,6 +10828,10 @@ void Dbdih::crashSystemAtGcpStop(Signal* signal)
|
|||
c_copyGCIMaster.m_copyReason,
|
||||
c_copyGCIMaster.m_waiting);
|
||||
break;
|
||||
case GCP_READY: // shut up lint
|
||||
case GCP_PREPARE_SENT:
|
||||
case GCP_COMMIT_SENT:
|
||||
break;
|
||||
}
|
||||
|
||||
ndbout_c("c_copyGCISlave: sender{Data, Ref} %d %x reason: %d nextWord: %d",
|
||||
|
@ -14978,3 +15030,118 @@ Dbdih::NodeRecord::NodeRecord(){
|
|||
copyCompleted = false;
|
||||
allowNodeStart = true;
|
||||
}
|
||||
|
||||
// DICT lock slave
|
||||
|
||||
void
|
||||
Dbdih::sendDictLockReq(Signal* signal, Uint32 lockType, Callback c)
|
||||
{
|
||||
DictLockReq* req = (DictLockReq*)&signal->theData[0];
|
||||
DictLockSlavePtr lockPtr;
|
||||
|
||||
c_dictLockSlavePool.seize(lockPtr);
|
||||
ndbrequire(lockPtr.i != RNIL);
|
||||
|
||||
req->userPtr = lockPtr.i;
|
||||
req->lockType = lockType;
|
||||
req->userRef = reference();
|
||||
|
||||
lockPtr.p->lockPtr = RNIL;
|
||||
lockPtr.p->lockType = lockType;
|
||||
lockPtr.p->locked = false;
|
||||
lockPtr.p->callback = c;
|
||||
|
||||
// handle rolling upgrade
|
||||
{
|
||||
Uint32 masterVersion = getNodeInfo(cmasterNodeId).m_version;
|
||||
|
||||
unsigned int get_major = getMajor(masterVersion);
|
||||
unsigned int get_minor = getMinor(masterVersion);
|
||||
unsigned int get_build = getBuild(masterVersion);
|
||||
|
||||
ndbrequire(get_major == 4 || get_major == 5);
|
||||
|
||||
if (masterVersion < NDBD_DICT_LOCK_VERSION_5 ||
|
||||
ERROR_INSERTED(7176)) {
|
||||
jam();
|
||||
|
||||
infoEvent("DIH: detect upgrade: master node %u old version %u.%u.%u",
|
||||
(unsigned int)cmasterNodeId, get_major, get_minor, get_build);
|
||||
|
||||
DictLockConf* conf = (DictLockConf*)&signal->theData[0];
|
||||
conf->userPtr = lockPtr.i;
|
||||
conf->lockType = lockType;
|
||||
conf->lockPtr = ZNIL;
|
||||
|
||||
sendSignal(reference(), GSN_DICT_LOCK_CONF, signal,
|
||||
DictLockConf::SignalLength, JBB);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
BlockReference dictMasterRef = calcDictBlockRef(cmasterNodeId);
|
||||
sendSignal(dictMasterRef, GSN_DICT_LOCK_REQ, signal,
|
||||
DictLockReq::SignalLength, JBB);
|
||||
}
|
||||
|
||||
void
|
||||
Dbdih::execDICT_LOCK_CONF(Signal* signal)
|
||||
{
|
||||
jamEntry();
|
||||
recvDictLockConf(signal);
|
||||
}
|
||||
|
||||
void
|
||||
Dbdih::execDICT_LOCK_REF(Signal* signal)
|
||||
{
|
||||
jamEntry();
|
||||
ndbrequire(false);
|
||||
}
|
||||
|
||||
void
|
||||
Dbdih::recvDictLockConf(Signal* signal)
|
||||
{
|
||||
const DictLockConf* conf = (const DictLockConf*)&signal->theData[0];
|
||||
|
||||
DictLockSlavePtr lockPtr;
|
||||
c_dictLockSlavePool.getPtr(lockPtr, conf->userPtr);
|
||||
|
||||
lockPtr.p->lockPtr = conf->lockPtr;
|
||||
ndbrequire(lockPtr.p->lockType == conf->lockType);
|
||||
ndbrequire(lockPtr.p->locked == false);
|
||||
lockPtr.p->locked = true;
|
||||
|
||||
lockPtr.p->callback.m_callbackData = lockPtr.i;
|
||||
execute(signal, lockPtr.p->callback, 0);
|
||||
}
|
||||
|
||||
void
|
||||
Dbdih::sendDictUnlockOrd(Signal* signal, Uint32 lockSlavePtrI)
|
||||
{
|
||||
DictUnlockOrd* ord = (DictUnlockOrd*)&signal->theData[0];
|
||||
|
||||
DictLockSlavePtr lockPtr;
|
||||
c_dictLockSlavePool.getPtr(lockPtr, lockSlavePtrI);
|
||||
|
||||
ord->lockPtr = lockPtr.p->lockPtr;
|
||||
ord->lockType = lockPtr.p->lockType;
|
||||
|
||||
c_dictLockSlavePool.release(lockPtr);
|
||||
|
||||
// handle rolling upgrade
|
||||
{
|
||||
Uint32 masterVersion = getNodeInfo(cmasterNodeId).m_version;
|
||||
|
||||
unsigned int get_major = getMajor(masterVersion);
|
||||
ndbrequire(get_major == 4 || get_major == 5);
|
||||
|
||||
if (masterVersion < NDBD_DICT_LOCK_VERSION_5 ||
|
||||
ERROR_INSERTED(7176)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
BlockReference dictMasterRef = calcDictBlockRef(cmasterNodeId);
|
||||
sendSignal(dictMasterRef, GSN_DICT_UNLOCK_ORD, signal,
|
||||
DictUnlockOrd::SignalLength, JBB);
|
||||
}
|
||||
|
|
|
@ -2477,7 +2477,7 @@ void Qmgr::execDISCONNECT_REP(Signal* signal)
|
|||
{
|
||||
jam();
|
||||
CRASH_INSERTION(932);
|
||||
BaseString::snprintf(buf, 100, "Node %u disconected", nodeId);
|
||||
BaseString::snprintf(buf, 100, "Node %u disconnected", nodeId);
|
||||
progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
|
||||
ndbrequire(false);
|
||||
}
|
||||
|
@ -2500,7 +2500,7 @@ void Qmgr::execDISCONNECT_REP(Signal* signal)
|
|||
ndbrequire(false);
|
||||
case ZAPI_INACTIVE:
|
||||
{
|
||||
BaseString::snprintf(buf, 100, "Node %u disconected", nodeId);
|
||||
BaseString::snprintf(buf, 100, "Node %u disconnected", nodeId);
|
||||
progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
|
||||
ndbrequire(false);
|
||||
}
|
||||
|
|
|
@ -420,6 +420,10 @@ int main(int argc, char** argv)
|
|||
FILE * signalLog = fopen(buf, "a");
|
||||
globalSignalLoggers.setOwnNodeId(globalData.ownId);
|
||||
globalSignalLoggers.setOutputStream(signalLog);
|
||||
#if 0 // to log startup
|
||||
globalSignalLoggers.log(SignalLoggerManager::LogInOut, "BLOCK=DBDICT,DBDIH");
|
||||
globalData.testOn = 1;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
catchsigs(false);
|
||||
|
|
|
@ -125,11 +125,13 @@
|
|||
#ifdef ERROR_INSERT
|
||||
#define ERROR_INSERT_VARIABLE UintR cerrorInsert
|
||||
#define ERROR_INSERTED(x) (cerrorInsert == (x))
|
||||
#define ERROR_INSERTED_CLEAR(x) (cerrorInsert == (x) ? (cerrorInsert = 0, true) : false)
|
||||
#define SET_ERROR_INSERT_VALUE(x) cerrorInsert = x
|
||||
#define CLEAR_ERROR_INSERT_VALUE cerrorInsert = 0
|
||||
#else
|
||||
#define ERROR_INSERT_VARIABLE typedef void * cerrorInsert // Will generate compiler error if used
|
||||
#define ERROR_INSERTED(x) false
|
||||
#define ERROR_INSERTED_CLEAR(x) false
|
||||
#define SET_ERROR_INSERT_VALUE(x)
|
||||
#define CLEAR_ERROR_INSERT_VALUE
|
||||
#endif
|
||||
|
|
|
@ -1644,6 +1644,299 @@ end:
|
|||
return result;
|
||||
}
|
||||
|
||||
// NFNR
|
||||
|
||||
// Restarter controls dict ops : 1-run 2-pause 3-stop
|
||||
// synced by polling...
|
||||
|
||||
static bool
|
||||
send_dict_ops_cmd(NDBT_Context* ctx, Uint32 cmd)
|
||||
{
|
||||
ctx->setProperty("DictOps_CMD", cmd);
|
||||
while (1) {
|
||||
if (ctx->isTestStopped())
|
||||
return false;
|
||||
if (ctx->getProperty("DictOps_ACK") == cmd)
|
||||
break;
|
||||
NdbSleep_MilliSleep(100);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
recv_dict_ops_run(NDBT_Context* ctx)
|
||||
{
|
||||
while (1) {
|
||||
if (ctx->isTestStopped())
|
||||
return false;
|
||||
Uint32 cmd = ctx->getProperty("DictOps_CMD");
|
||||
ctx->setProperty("DictOps_ACK", cmd);
|
||||
if (cmd == 1)
|
||||
break;
|
||||
if (cmd == 3)
|
||||
return false;
|
||||
NdbSleep_MilliSleep(100);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int
|
||||
runRestarts(NDBT_Context* ctx, NDBT_Step* step)
|
||||
{
|
||||
static int errlst_master[] = { // non-crashing
|
||||
7175, // send one fake START_PERMREF
|
||||
0
|
||||
};
|
||||
static int errlst_node[] = {
|
||||
7174, // crash before sending DICT_LOCK_REQ
|
||||
7176, // pretend master does not support DICT lock
|
||||
7121, // crash at receive START_PERMCONF
|
||||
0
|
||||
};
|
||||
const uint errcnt_master = sizeof(errlst_master)/sizeof(errlst_master[0]);
|
||||
const uint errcnt_node = sizeof(errlst_node)/sizeof(errlst_node[0]);
|
||||
|
||||
myRandom48Init(NdbTick_CurrentMillisecond());
|
||||
NdbRestarter restarter;
|
||||
int result = NDBT_OK;
|
||||
const int loops = ctx->getNumLoops();
|
||||
|
||||
for (int l = 0; l < loops && result == NDBT_OK; l++) {
|
||||
g_info << "1: === loop " << l << " ===" << endl;
|
||||
|
||||
// assuming 2-way replicated
|
||||
|
||||
int numnodes = restarter.getNumDbNodes();
|
||||
CHECK(numnodes >= 1);
|
||||
if (numnodes == 1)
|
||||
break;
|
||||
|
||||
int masterNodeId = restarter.getMasterNodeId();
|
||||
CHECK(masterNodeId != -1);
|
||||
|
||||
// for more complex cases need more restarter support methods
|
||||
|
||||
int nodeIdList[2] = { 0, 0 };
|
||||
int nodeIdCnt = 0;
|
||||
|
||||
if (numnodes >= 2) {
|
||||
int rand = myRandom48(numnodes);
|
||||
int nodeId = restarter.getRandomNotMasterNodeId(rand);
|
||||
CHECK(nodeId != -1);
|
||||
nodeIdList[nodeIdCnt++] = nodeId;
|
||||
}
|
||||
|
||||
if (numnodes >= 4 && myRandom48(2) == 0) {
|
||||
int rand = myRandom48(numnodes);
|
||||
int nodeId = restarter.getRandomNodeOtherNodeGroup(nodeIdList[0], rand);
|
||||
CHECK(nodeId != -1);
|
||||
if (nodeId != masterNodeId)
|
||||
nodeIdList[nodeIdCnt++] = nodeId;
|
||||
}
|
||||
|
||||
g_info << "1: master=" << masterNodeId << " nodes=" << nodeIdList[0] << "," << nodeIdList[1] << endl;
|
||||
|
||||
const uint timeout = 60; //secs for node wait
|
||||
const unsigned maxsleep = 2000; //ms
|
||||
|
||||
bool NF_ops = ctx->getProperty("Restart_NF_ops");
|
||||
uint NF_type = ctx->getProperty("Restart_NF_type");
|
||||
bool NR_ops = ctx->getProperty("Restart_NR_ops");
|
||||
bool NR_error = ctx->getProperty("Restart_NR_error");
|
||||
|
||||
g_info << "1: " << (NF_ops ? "run" : "pause") << " dict ops" << endl;
|
||||
if (! send_dict_ops_cmd(ctx, NF_ops ? 1 : 2))
|
||||
break;
|
||||
NdbSleep_MilliSleep(myRandom48(maxsleep));
|
||||
|
||||
{
|
||||
for (int i = 0; i < nodeIdCnt; i++) {
|
||||
int nodeId = nodeIdList[i];
|
||||
|
||||
bool nostart = true;
|
||||
bool abort = NF_type == 0 ? myRandom48(2) : (NF_type == 2);
|
||||
bool initial = myRandom48(2);
|
||||
|
||||
char flags[40];
|
||||
strcpy(flags, "flags: nostart");
|
||||
if (abort)
|
||||
strcat(flags, ",abort");
|
||||
if (initial)
|
||||
strcat(flags, ",initial");
|
||||
|
||||
g_info << "1: restart " << nodeId << " " << flags << endl;
|
||||
CHECK(restarter.restartOneDbNode(nodeId, initial, nostart, abort) == 0);
|
||||
}
|
||||
}
|
||||
|
||||
g_info << "1: wait for nostart" << endl;
|
||||
CHECK(restarter.waitNodesNoStart(nodeIdList, nodeIdCnt, timeout) == 0);
|
||||
NdbSleep_MilliSleep(myRandom48(maxsleep));
|
||||
|
||||
int err_master = 0;
|
||||
int err_node[2] = { 0, 0 };
|
||||
|
||||
if (NR_error) {
|
||||
err_master = errlst_master[l % errcnt_master];
|
||||
|
||||
// limitation: cannot have 2 node restarts and crash_insert
|
||||
// one node may die for real (NF during startup)
|
||||
|
||||
for (int i = 0; i < nodeIdCnt && nodeIdCnt == 1; i++) {
|
||||
err_node[i] = errlst_node[l % errcnt_node];
|
||||
|
||||
// 7176 - no DICT lock protection
|
||||
|
||||
if (err_node[i] == 7176) {
|
||||
g_info << "1: no dict ops due to error insert "
|
||||
<< err_node[i] << endl;
|
||||
NR_ops = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
g_info << "1: " << (NR_ops ? "run" : "pause") << " dict ops" << endl;
|
||||
if (! send_dict_ops_cmd(ctx, NR_ops ? 1 : 2))
|
||||
break;
|
||||
NdbSleep_MilliSleep(myRandom48(maxsleep));
|
||||
|
||||
g_info << "1: start nodes" << endl;
|
||||
CHECK(restarter.startNodes(nodeIdList, nodeIdCnt) == 0);
|
||||
|
||||
if (NR_error) {
|
||||
{
|
||||
int err = err_master;
|
||||
if (err != 0) {
|
||||
g_info << "1: insert master error " << err << endl;
|
||||
CHECK(restarter.insertErrorInNode(masterNodeId, err) == 0);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < nodeIdCnt; i++) {
|
||||
int nodeId = nodeIdList[i];
|
||||
|
||||
int err = err_node[i];
|
||||
if (err != 0) {
|
||||
g_info << "1: insert node " << nodeId << " error " << err << endl;
|
||||
CHECK(restarter.insertErrorInNode(nodeId, err) == 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
NdbSleep_MilliSleep(myRandom48(maxsleep));
|
||||
|
||||
g_info << "1: wait cluster started" << endl;
|
||||
CHECK(restarter.waitClusterStarted(timeout) == 0);
|
||||
NdbSleep_MilliSleep(myRandom48(maxsleep));
|
||||
|
||||
g_info << "1: restart done" << endl;
|
||||
}
|
||||
|
||||
g_info << "1: stop dict ops" << endl;
|
||||
send_dict_ops_cmd(ctx, 3);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
int
|
||||
runDictOps(NDBT_Context* ctx, NDBT_Step* step)
|
||||
{
|
||||
myRandom48Init(NdbTick_CurrentMillisecond());
|
||||
int result = NDBT_OK;
|
||||
|
||||
for (int l = 0; result == NDBT_OK; l++) {
|
||||
if (! recv_dict_ops_run(ctx))
|
||||
break;
|
||||
|
||||
g_info << "2: === loop " << l << " ===" << endl;
|
||||
|
||||
Ndb* pNdb = GETNDB(step);
|
||||
NdbDictionary::Dictionary* pDic = pNdb->getDictionary();
|
||||
const NdbDictionary::Table* pTab = ctx->getTab();
|
||||
const char* tabName = pTab->getName();
|
||||
|
||||
const unsigned long maxsleep = 100; //ms
|
||||
|
||||
g_info << "2: create table" << endl;
|
||||
{
|
||||
uint count = 0;
|
||||
try_create:
|
||||
count++;
|
||||
if (pDic->createTable(*pTab) != 0) {
|
||||
const NdbError err = pDic->getNdbError();
|
||||
if (count == 1)
|
||||
g_err << "2: " << tabName << ": create failed: " << err << endl;
|
||||
if (err.code != 711) {
|
||||
result = NDBT_FAILED;
|
||||
break;
|
||||
}
|
||||
NdbSleep_MilliSleep(myRandom48(maxsleep));
|
||||
goto try_create;
|
||||
}
|
||||
}
|
||||
NdbSleep_MilliSleep(myRandom48(maxsleep));
|
||||
|
||||
g_info << "2: verify create" << endl;
|
||||
const NdbDictionary::Table* pTab2 = pDic->getTable(tabName);
|
||||
if (pTab2 == NULL) {
|
||||
const NdbError err = pDic->getNdbError();
|
||||
g_err << "2: " << tabName << ": verify create: " << err << endl;
|
||||
result = NDBT_FAILED;
|
||||
break;
|
||||
}
|
||||
NdbSleep_MilliSleep(myRandom48(maxsleep));
|
||||
|
||||
// replace by the Retrieved table
|
||||
pTab = pTab2;
|
||||
|
||||
int records = myRandom48(ctx->getNumRecords());
|
||||
g_info << "2: load " << records << " records" << endl;
|
||||
HugoTransactions hugoTrans(*pTab);
|
||||
if (hugoTrans.loadTable(pNdb, records) != 0) {
|
||||
// XXX get error code from hugo
|
||||
g_err << "2: " << tabName << ": load failed" << endl;
|
||||
result = NDBT_FAILED;
|
||||
break;
|
||||
}
|
||||
NdbSleep_MilliSleep(myRandom48(maxsleep));
|
||||
|
||||
g_info << "2: drop" << endl;
|
||||
{
|
||||
uint count = 0;
|
||||
try_drop:
|
||||
count++;
|
||||
if (pDic->dropTable(tabName) != 0) {
|
||||
const NdbError err = pDic->getNdbError();
|
||||
if (count == 1)
|
||||
g_err << "2: " << tabName << ": drop failed: " << err << endl;
|
||||
if (err.code != 711) {
|
||||
result = NDBT_FAILED;
|
||||
break;
|
||||
}
|
||||
NdbSleep_MilliSleep(myRandom48(maxsleep));
|
||||
goto try_drop;
|
||||
}
|
||||
}
|
||||
NdbSleep_MilliSleep(myRandom48(maxsleep));
|
||||
|
||||
g_info << "2: verify drop" << endl;
|
||||
const NdbDictionary::Table* pTab3 = pDic->getTable(tabName);
|
||||
if (pTab3 != NULL) {
|
||||
g_err << "2: " << tabName << ": verify drop: table exists" << endl;
|
||||
result = NDBT_FAILED;
|
||||
break;
|
||||
}
|
||||
if (pDic->getNdbError().code != 709) {
|
||||
const NdbError err = pDic->getNdbError();
|
||||
g_err << "2: " << tabName << ": verify drop: " << err << endl;
|
||||
result = NDBT_FAILED;
|
||||
break;
|
||||
}
|
||||
NdbSleep_MilliSleep(myRandom48(maxsleep));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
NDBT_TESTSUITE(testDict);
|
||||
TESTCASE("CreateAndDrop",
|
||||
"Try to create and drop the table loop number of times\n"){
|
||||
|
@ -1757,6 +2050,34 @@ TESTCASE("FailAddFragment",
|
|||
"Fail add fragment or attribute in ACC or TUP or TUX\n"){
|
||||
INITIALIZER(runFailAddFragment);
|
||||
}
|
||||
TESTCASE("Restart_NF1",
|
||||
"DICT ops during node graceful shutdown (not master)"){
|
||||
TC_PROPERTY("Restart_NF_ops", 1);
|
||||
TC_PROPERTY("Restart_NF_type", 1);
|
||||
STEP(runRestarts);
|
||||
STEP(runDictOps);
|
||||
}
|
||||
TESTCASE("Restart_NF2",
|
||||
"DICT ops during node shutdown abort (not master)"){
|
||||
TC_PROPERTY("Restart_NF_ops", 1);
|
||||
TC_PROPERTY("Restart_NF_type", 2);
|
||||
STEP(runRestarts);
|
||||
STEP(runDictOps);
|
||||
}
|
||||
TESTCASE("Restart_NR1",
|
||||
"DICT ops during node startup (not master)"){
|
||||
TC_PROPERTY("Restart_NR_ops", 1);
|
||||
STEP(runRestarts);
|
||||
STEP(runDictOps);
|
||||
}
|
||||
TESTCASE("Restart_NR2",
|
||||
"DICT ops during node startup with crash inserts (not master)"){
|
||||
TC_PROPERTY("Restart_NR_ops", 1);
|
||||
TC_PROPERTY("Restart_NR_error", 1);
|
||||
STEP(runRestarts);
|
||||
STEP(runDictOps);
|
||||
}
|
||||
|
||||
NDBT_TESTSUITE_END(testDict);
|
||||
|
||||
int main(int argc, const char** argv){
|
||||
|
|
Loading…
Reference in a new issue