mirror of
https://github.com/MariaDB/server.git
synced 2025-01-31 11:01:52 +01:00
MySQL Bugs: #16772: Starting node joins cluster too early, workaround avoiding the issue for dynamically allocated nodeid's
This commit is contained in:
parent
8188f77c17
commit
fd6a7d35b9
9 changed files with 386 additions and 4 deletions
|
@ -111,9 +111,9 @@ extern const GlobalSignalNumber NO_OF_SIGNAL_NAMES;
|
|||
/* 57 unused */
|
||||
/* 58 unused */
|
||||
/* 59 unused */
|
||||
/* 60 unused */
|
||||
/* 61 unused */
|
||||
/* 62 unused */
|
||||
#define GSN_ALLOC_NODEID_REQ 60
|
||||
#define GSN_ALLOC_NODEID_CONF 61
|
||||
#define GSN_ALLOC_NODEID_REF 62
|
||||
/* 63 unused */
|
||||
/* 64 unused */
|
||||
/* 65 unused */
|
||||
|
|
65
storage/ndb/include/kernel/signaldata/AllocNodeId.hpp
Normal file
65
storage/ndb/include/kernel/signaldata/AllocNodeId.hpp
Normal file
|
@ -0,0 +1,65 @@
|
|||
/* Copyright (C) 2003 MySQL AB
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
|
||||
|
||||
#ifndef ALLOC_NODE_ID_HPP
|
||||
#define ALLOC_NODE_ID_HPP
|
||||
|
||||
#include "SignalData.hpp"
|
||||
#include <NodeBitmask.hpp>
|
||||
|
||||
/**
|
||||
* Request to allocate node id
|
||||
*/
|
||||
class AllocNodeIdReq {
|
||||
public:
|
||||
STATIC_CONST( SignalLength = 3 );
|
||||
|
||||
Uint32 senderRef;
|
||||
Uint32 senderData;
|
||||
Uint32 nodeId;
|
||||
};
|
||||
|
||||
class AllocNodeIdConf {
|
||||
public:
|
||||
STATIC_CONST( SignalLength = 3 );
|
||||
|
||||
Uint32 senderRef;
|
||||
Uint32 senderData;
|
||||
Uint32 nodeId;
|
||||
};
|
||||
|
||||
class AllocNodeIdRef {
|
||||
public:
|
||||
STATIC_CONST( SignalLength = 5 );
|
||||
|
||||
enum ErrorCodes {
|
||||
NoError = 0,
|
||||
Undefined = 1,
|
||||
NF_FakeErrorREF = 11,
|
||||
Busy = 701,
|
||||
NotMaster = 702,
|
||||
NodeReserved = 1701,
|
||||
NodeConnected = 1702,
|
||||
NodeFailureHandlingNotCompleted = 1703
|
||||
};
|
||||
|
||||
Uint32 senderRef;
|
||||
Uint32 senderData;
|
||||
Uint32 nodeId;
|
||||
Uint32 errorCode;
|
||||
Uint32 masterRef;
|
||||
};
|
||||
#endif
|
|
@ -1509,6 +1509,9 @@ void Ndbcntr::execNODE_FAILREP(Signal* signal)
|
|||
sendSignal(SUMA_REF, GSN_NODE_FAILREP, signal,
|
||||
NodeFailRep::SignalLength, JBB);
|
||||
|
||||
sendSignal(QMGR_REF, GSN_NODE_FAILREP, signal,
|
||||
NodeFailRep::SignalLength, JBB);
|
||||
|
||||
Uint32 nodeId = 0;
|
||||
while(!allFailed.isclear()){
|
||||
nodeId = allFailed.find(nodeId + 1);
|
||||
|
|
|
@ -29,6 +29,10 @@
|
|||
#include <signaldata/CmRegSignalData.hpp>
|
||||
#include <signaldata/ApiRegSignalData.hpp>
|
||||
#include <signaldata/FailRep.hpp>
|
||||
#include <signaldata/AllocNodeId.hpp>
|
||||
|
||||
#include <SafeCounter.hpp>
|
||||
#include <RequestTracker.hpp>
|
||||
|
||||
#include "timer.hpp"
|
||||
|
||||
|
@ -222,6 +226,12 @@ private:
|
|||
void execAPI_VERSION_REQ(Signal* signal);
|
||||
void execAPI_BROADCAST_REP(Signal* signal);
|
||||
|
||||
void execNODE_FAILREP(Signal *);
|
||||
void execALLOC_NODEID_REQ(Signal *);
|
||||
void execALLOC_NODEID_CONF(Signal *);
|
||||
void execALLOC_NODEID_REF(Signal *);
|
||||
void completeAllocNodeIdReq(Signal *);
|
||||
|
||||
// Arbitration signals
|
||||
void execARBIT_CFG(Signal* signal);
|
||||
void execARBIT_PREPREQ(Signal* signal);
|
||||
|
@ -388,6 +398,14 @@ private:
|
|||
Uint16 cprepFailedNodes[MAX_NDB_NODES];
|
||||
Uint16 ccommitFailedNodes[MAX_NDB_NODES];
|
||||
|
||||
struct OpAllocNodeIdReq {
|
||||
RequestTracker m_tracker;
|
||||
AllocNodeIdReq m_req;
|
||||
Uint32 m_connectCount;
|
||||
Uint32 m_error;
|
||||
};
|
||||
|
||||
struct OpAllocNodeIdReq opAllocNodeIdReq;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -85,6 +85,11 @@ Qmgr::Qmgr(const class Configuration & conf)
|
|||
addRecSignal(GSN_READ_NODESREQ, &Qmgr::execREAD_NODESREQ);
|
||||
addRecSignal(GSN_SET_VAR_REQ, &Qmgr::execSET_VAR_REQ);
|
||||
addRecSignal(GSN_API_BROADCAST_REP, &Qmgr::execAPI_BROADCAST_REP);
|
||||
|
||||
addRecSignal(GSN_NODE_FAILREP, &Qmgr::execNODE_FAILREP);
|
||||
addRecSignal(GSN_ALLOC_NODEID_REQ, &Qmgr::execALLOC_NODEID_REQ);
|
||||
addRecSignal(GSN_ALLOC_NODEID_CONF, &Qmgr::execALLOC_NODEID_CONF);
|
||||
addRecSignal(GSN_ALLOC_NODEID_REF, &Qmgr::execALLOC_NODEID_REF);
|
||||
|
||||
// Arbitration signals
|
||||
addRecSignal(GSN_ARBIT_PREPREQ, &Qmgr::execARBIT_PREPREQ);
|
||||
|
|
|
@ -3984,3 +3984,167 @@ Qmgr::execAPI_BROADCAST_REP(Signal* signal)
|
|||
NodeReceiverGroup rg(API_CLUSTERMGR, mask);
|
||||
sendSignal(rg, api.gsn, signal, len, JBB); // forward sections
|
||||
}
|
||||
|
||||
void
|
||||
Qmgr::execNODE_FAILREP(Signal * signal)
|
||||
{
|
||||
jamEntry();
|
||||
// make sure any distributed signals get acknowledged
|
||||
// destructive of the signal
|
||||
c_counterMgr.execNODE_FAILREP(signal);
|
||||
}
|
||||
|
||||
void
|
||||
Qmgr::execALLOC_NODEID_REQ(Signal * signal)
|
||||
{
|
||||
jamEntry();
|
||||
const AllocNodeIdReq * req = (AllocNodeIdReq*)signal->getDataPtr();
|
||||
Uint32 senderRef = req->senderRef;
|
||||
Uint32 nodeId = req->nodeId;
|
||||
Uint32 error = 0;
|
||||
|
||||
if (refToBlock(senderRef) != QMGR) // request from management server
|
||||
{
|
||||
/* master */
|
||||
|
||||
if (getOwnNodeId() != cpresident)
|
||||
error = AllocNodeIdRef::NotMaster;
|
||||
else if (!opAllocNodeIdReq.m_tracker.done())
|
||||
error = AllocNodeIdRef::Busy;
|
||||
else if (c_connectedNodes.get(nodeId))
|
||||
error = AllocNodeIdRef::NodeConnected;
|
||||
|
||||
if (error)
|
||||
{
|
||||
jam();
|
||||
AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtrSend();
|
||||
ref->senderRef = reference();
|
||||
ref->errorCode = error;
|
||||
ref->masterRef = numberToRef(QMGR, cpresident);
|
||||
sendSignal(senderRef, GSN_ALLOC_NODEID_REF, signal,
|
||||
AllocNodeIdRef::SignalLength, JBB);
|
||||
return;
|
||||
}
|
||||
|
||||
opAllocNodeIdReq.m_req = *req;
|
||||
opAllocNodeIdReq.m_error = 0;
|
||||
opAllocNodeIdReq.m_connectCount = getNodeInfo(refToNode(senderRef)).m_connectCount;
|
||||
|
||||
jam();
|
||||
AllocNodeIdReq * req = (AllocNodeIdReq*)signal->getDataPtrSend();
|
||||
req->senderRef = reference();
|
||||
NodeReceiverGroup rg(QMGR, c_clusterNodes);
|
||||
RequestTracker & p = opAllocNodeIdReq.m_tracker;
|
||||
p.init<AllocNodeIdRef>(c_counterMgr, rg, GSN_ALLOC_NODEID_REF, 0);
|
||||
|
||||
sendSignal(rg, GSN_ALLOC_NODEID_REQ, signal,
|
||||
AllocNodeIdReq::SignalLength, JBB);
|
||||
return;
|
||||
}
|
||||
|
||||
/* participant */
|
||||
|
||||
if (c_connectedNodes.get(nodeId))
|
||||
error = AllocNodeIdRef::NodeConnected;
|
||||
else
|
||||
{
|
||||
NodeRecPtr nodePtr;
|
||||
nodePtr.i = nodeId;
|
||||
ptrAss(nodePtr, nodeRec);
|
||||
if (nodePtr.p->failState != NORMAL)
|
||||
error = AllocNodeIdRef::NodeFailureHandlingNotCompleted;
|
||||
}
|
||||
|
||||
if (error)
|
||||
{
|
||||
AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtrSend();
|
||||
ref->senderRef = reference();
|
||||
ref->errorCode = error;
|
||||
sendSignal(senderRef, GSN_ALLOC_NODEID_REF, signal,
|
||||
AllocNodeIdRef::SignalLength, JBB);
|
||||
return;
|
||||
}
|
||||
|
||||
AllocNodeIdConf * conf = (AllocNodeIdConf*)signal->getDataPtrSend();
|
||||
conf->senderRef = reference();
|
||||
sendSignal(senderRef, GSN_ALLOC_NODEID_CONF, signal,
|
||||
AllocNodeIdConf::SignalLength, JBB);
|
||||
}
|
||||
|
||||
void
|
||||
Qmgr::execALLOC_NODEID_CONF(Signal * signal)
|
||||
{
|
||||
/* master */
|
||||
|
||||
jamEntry();
|
||||
const AllocNodeIdConf * conf = (AllocNodeIdConf*)signal->getDataPtr();
|
||||
opAllocNodeIdReq.m_tracker.reportConf(c_counterMgr,
|
||||
refToNode(conf->senderRef));
|
||||
completeAllocNodeIdReq(signal);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
Qmgr::execALLOC_NODEID_REF(Signal * signal)
|
||||
{
|
||||
/* master */
|
||||
|
||||
jamEntry();
|
||||
const AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtr();
|
||||
if (ref->errorCode == AllocNodeIdRef::NF_FakeErrorREF)
|
||||
{
|
||||
opAllocNodeIdReq.m_tracker.ignoreRef(c_counterMgr,
|
||||
refToNode(ref->senderRef));
|
||||
}
|
||||
else
|
||||
{
|
||||
opAllocNodeIdReq.m_tracker.reportRef(c_counterMgr,
|
||||
refToNode(ref->senderRef));
|
||||
if (opAllocNodeIdReq.m_error == 0)
|
||||
opAllocNodeIdReq.m_error = ref->errorCode;
|
||||
}
|
||||
completeAllocNodeIdReq(signal);
|
||||
}
|
||||
|
||||
void
|
||||
Qmgr::completeAllocNodeIdReq(Signal *signal)
|
||||
{
|
||||
/* master */
|
||||
|
||||
if (!opAllocNodeIdReq.m_tracker.done())
|
||||
{
|
||||
jam();
|
||||
return;
|
||||
}
|
||||
|
||||
if (opAllocNodeIdReq.m_connectCount !=
|
||||
getNodeInfo(refToNode(opAllocNodeIdReq.m_req.senderRef)).m_connectCount)
|
||||
{
|
||||
// management server not same version as the original requester
|
||||
jam();
|
||||
return;
|
||||
}
|
||||
|
||||
if (opAllocNodeIdReq.m_tracker.hasRef())
|
||||
{
|
||||
jam();
|
||||
AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtrSend();
|
||||
ref->senderRef = reference();
|
||||
ref->senderData = opAllocNodeIdReq.m_req.senderData;
|
||||
ref->nodeId = opAllocNodeIdReq.m_req.nodeId;
|
||||
ref->errorCode = opAllocNodeIdReq.m_error;
|
||||
ref->masterRef = numberToRef(QMGR, cpresident);
|
||||
ndbassert(AllocNodeIdRef::SignalLength == 5);
|
||||
sendSignal(opAllocNodeIdReq.m_req.senderRef, GSN_ALLOC_NODEID_REF, signal,
|
||||
AllocNodeIdRef::SignalLength, JBB);
|
||||
return;
|
||||
}
|
||||
jam();
|
||||
AllocNodeIdConf * conf = (AllocNodeIdConf*)signal->getDataPtrSend();
|
||||
conf->senderRef = reference();
|
||||
conf->senderData = opAllocNodeIdReq.m_req.senderData;
|
||||
conf->nodeId = opAllocNodeIdReq.m_req.nodeId;
|
||||
ndbassert(AllocNodeIdConf::SignalLength == 3);
|
||||
sendSignal(opAllocNodeIdReq.m_req.senderRef, GSN_ALLOC_NODEID_CONF, signal,
|
||||
AllocNodeIdConf::SignalLength, JBB);
|
||||
}
|
||||
|
|
|
@ -40,6 +40,7 @@
|
|||
#include <signaldata/ManagementServer.hpp>
|
||||
#include <signaldata/NFCompleteRep.hpp>
|
||||
#include <signaldata/NodeFailRep.hpp>
|
||||
#include <signaldata/AllocNodeId.hpp>
|
||||
#include <NdbSleep.h>
|
||||
#include <EventLogger.hpp>
|
||||
#include <DebuggerNames.hpp>
|
||||
|
@ -1712,6 +1713,88 @@ MgmtSrvr::get_connected_nodes(NodeBitmask &connected_nodes) const
|
|||
}
|
||||
}
|
||||
|
||||
int
|
||||
MgmtSrvr::alloc_node_id_req(Uint32 free_node_id)
|
||||
{
|
||||
SignalSender ss(theFacade);
|
||||
ss.lock(); // lock will be released on exit
|
||||
|
||||
SimpleSignal ssig;
|
||||
AllocNodeIdReq* req = CAST_PTR(AllocNodeIdReq, ssig.getDataPtrSend());
|
||||
ssig.set(ss, TestOrd::TraceAPI, QMGR, GSN_ALLOC_NODEID_REQ,
|
||||
AllocNodeIdReq::SignalLength);
|
||||
|
||||
req->senderRef = ss.getOwnRef();
|
||||
req->senderData = 19;
|
||||
req->nodeId = free_node_id;
|
||||
|
||||
int do_send = 1;
|
||||
NodeId nodeId = 0;
|
||||
while (1)
|
||||
{
|
||||
if (nodeId == 0)
|
||||
{
|
||||
bool next;
|
||||
while((next = getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true &&
|
||||
theFacade->get_node_alive(nodeId) == false);
|
||||
if (!next)
|
||||
return NO_CONTACT_WITH_DB_NODES;
|
||||
do_send = 1;
|
||||
}
|
||||
if (do_send)
|
||||
{
|
||||
if (ss.sendSignal(nodeId, &ssig) != SEND_OK) {
|
||||
return SEND_OR_RECEIVE_FAILED;
|
||||
}
|
||||
do_send = 0;
|
||||
}
|
||||
|
||||
SimpleSignal *signal = ss.waitFor();
|
||||
|
||||
int gsn = signal->readSignalNumber();
|
||||
switch (gsn) {
|
||||
case GSN_ALLOC_NODEID_CONF:
|
||||
{
|
||||
const AllocNodeIdConf * const conf =
|
||||
CAST_CONSTPTR(AllocNodeIdConf, signal->getDataPtr());
|
||||
return 0;
|
||||
}
|
||||
case GSN_ALLOC_NODEID_REF:
|
||||
{
|
||||
const AllocNodeIdRef * const ref =
|
||||
CAST_CONSTPTR(AllocNodeIdRef, signal->getDataPtr());
|
||||
if (ref->errorCode == AllocNodeIdRef::NotMaster ||
|
||||
ref->errorCode == AllocNodeIdRef::Busy)
|
||||
{
|
||||
do_send = 1;
|
||||
nodeId = refToNode(ref->masterRef);
|
||||
continue;
|
||||
}
|
||||
return ref->errorCode;
|
||||
}
|
||||
case GSN_NF_COMPLETEREP:
|
||||
{
|
||||
const NFCompleteRep * const rep =
|
||||
CAST_CONSTPTR(NFCompleteRep, signal->getDataPtr());
|
||||
#ifdef VM_TRACE
|
||||
ndbout_c("Node %d fail completed", rep->failedNodeId);
|
||||
#endif
|
||||
if (rep->failedNodeId == nodeId)
|
||||
nodeId = 0;
|
||||
continue;
|
||||
}
|
||||
case GSN_NODE_FAILREP:{
|
||||
// ignore NF_COMPLETEREP will come
|
||||
continue;
|
||||
}
|
||||
default:
|
||||
report_unknown_signal(signal);
|
||||
return SEND_OR_RECEIVE_FAILED;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool
|
||||
MgmtSrvr::alloc_node_id(NodeId * nodeId,
|
||||
enum ndb_mgm_node_type type,
|
||||
|
@ -1836,6 +1919,39 @@ MgmtSrvr::alloc_node_id(NodeId * nodeId,
|
|||
}
|
||||
NdbMutex_Unlock(m_configMutex);
|
||||
|
||||
if (id_found && client_addr != 0)
|
||||
{
|
||||
int res = alloc_node_id_req(id_found);
|
||||
unsigned save_id_found = id_found;
|
||||
switch (res)
|
||||
{
|
||||
case 0:
|
||||
// ok continue
|
||||
break;
|
||||
case NO_CONTACT_WITH_DB_NODES:
|
||||
// ok continue
|
||||
break;
|
||||
default:
|
||||
// something wrong
|
||||
id_found = 0;
|
||||
break;
|
||||
|
||||
}
|
||||
if (id_found == 0)
|
||||
{
|
||||
char buf[128];
|
||||
ndb_error_string(res, buf, sizeof(buf));
|
||||
error_string.appfmt("Cluster refused allocation of id %d. Error: %d (%s).",
|
||||
save_id_found, res, buf);
|
||||
g_eventLogger.warning("Cluster refused allocation of id %d. "
|
||||
"Connection from ip %s. "
|
||||
"Returned error string \"%s\"", save_id_found,
|
||||
inet_ntoa(((struct sockaddr_in *)(client_addr))->sin_addr),
|
||||
error_string.c_str());
|
||||
DBUG_RETURN(false);
|
||||
}
|
||||
}
|
||||
|
||||
if (id_found)
|
||||
{
|
||||
*nodeId= id_found;
|
||||
|
|
|
@ -506,7 +506,8 @@ private:
|
|||
* @return -1 if block not found, otherwise block number
|
||||
*/
|
||||
int getBlockNumber(const BaseString &blockName);
|
||||
|
||||
|
||||
int alloc_node_id_req(Uint32 free_node_id);
|
||||
//**************************************************************************
|
||||
|
||||
int _blockNumber;
|
||||
|
|
|
@ -81,6 +81,7 @@ static const char* empty_string = "";
|
|||
* 1400 - SUMA
|
||||
* 1500 - LGMAN
|
||||
* 1600 - TSMAN
|
||||
* 1700 - QMGR
|
||||
* 4000 - API
|
||||
* 4100 - ""
|
||||
* 4200 - ""
|
||||
|
@ -450,6 +451,15 @@ ErrorBundle ErrorCodes[] = {
|
|||
{ 1348, DMEC, AE, "Backup failed to allocate file record (check configuration)" },
|
||||
{ 1349, DMEC, AE, "Backup failed to allocate attribute record (check configuration)" },
|
||||
{ 1329, DMEC, AE, "Backup during software upgrade not supported" },
|
||||
|
||||
/**
|
||||
* Node id allocation error codes
|
||||
*/
|
||||
|
||||
{ 1700, DMEC, IE, "Undefined error" },
|
||||
{ 1701, DMEC, AE, "Node already reserved" },
|
||||
{ 1702, DMEC, AE, "Node already connected" },
|
||||
{ 1703, DMEC, AE, "Node failure handling not completed" },
|
||||
|
||||
/**
|
||||
* Still uncategorized
|
||||
|
|
Loading…
Add table
Reference in a new issue