mirror of
https://github.com/MariaDB/server.git
synced 2025-01-23 23:34:34 +01:00
500 lines
12 KiB
C
500 lines
12 KiB
C
|
/*-
|
||
|
* See the file LICENSE for redistribution information.
|
||
|
*
|
||
|
* Copyright (c) 2004-2005
|
||
|
* Sleepycat Software. All rights reserved.
|
||
|
*
|
||
|
* $Id: rep_verify.c,v 12.21 2005/10/19 19:06:37 sue Exp $
|
||
|
*/
|
||
|
|
||
|
#include "db_config.h"
|
||
|
|
||
|
#ifndef NO_SYSTEM_INCLUDES
|
||
|
#if TIME_WITH_SYS_TIME
|
||
|
#include <sys/time.h>
|
||
|
#include <time.h>
|
||
|
#else
|
||
|
#if HAVE_SYS_TIME_H
|
||
|
#include <sys/time.h>
|
||
|
#else
|
||
|
#include <time.h>
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
#include <stdlib.h>
|
||
|
#include <string.h>
|
||
|
#endif
|
||
|
|
||
|
#include "db_int.h"
|
||
|
#include "dbinc/db_page.h"
|
||
|
#include "dbinc/db_am.h"
|
||
|
#include "dbinc/log.h"
|
||
|
#include "dbinc/txn.h"
|
||
|
|
||
|
static int __rep_dorecovery __P((DB_ENV *, DB_LSN *, DB_LSN *));
|
||
|
|
||
|
/*
|
||
|
* __rep_verify --
|
||
|
* Handle a REP_VERIFY message.
|
||
|
*
|
||
|
* PUBLIC: int __rep_verify __P((DB_ENV *, REP_CONTROL *, DBT *, int, time_t));
|
||
|
*/
|
||
|
int
|
||
|
__rep_verify(dbenv, rp, rec, eid, savetime)
|
||
|
DB_ENV *dbenv;
|
||
|
REP_CONTROL *rp;
|
||
|
DBT *rec;
|
||
|
int eid;
|
||
|
time_t savetime;
|
||
|
{
|
||
|
DB_LOG *dblp;
|
||
|
DB_LOGC *logc;
|
||
|
DB_LSN lsn;
|
||
|
DB_REP *db_rep;
|
||
|
DBT mylog;
|
||
|
LOG *lp;
|
||
|
REP *rep;
|
||
|
u_int32_t rectype;
|
||
|
int match, ret, t_ret;
|
||
|
|
||
|
ret = 0;
|
||
|
db_rep = dbenv->rep_handle;
|
||
|
rep = db_rep->region;
|
||
|
dblp = dbenv->lg_handle;
|
||
|
lp = dblp->reginfo.primary;
|
||
|
|
||
|
if (IS_ZERO_LSN(lp->verify_lsn))
|
||
|
return (ret);
|
||
|
|
||
|
if ((ret = __log_cursor(dbenv, &logc)) != 0)
|
||
|
return (ret);
|
||
|
memset(&mylog, 0, sizeof(mylog));
|
||
|
if ((ret = __log_c_get(logc, &rp->lsn, &mylog, DB_SET)) != 0)
|
||
|
goto err;;
|
||
|
match = 0;
|
||
|
memcpy(&rectype, mylog.data, sizeof(rectype));
|
||
|
if (mylog.size == rec->size &&
|
||
|
memcmp(mylog.data, rec->data, rec->size) == 0)
|
||
|
match = 1;
|
||
|
/*
|
||
|
* If we don't have a match, backup to the previous
|
||
|
* identification record and try again.
|
||
|
*/
|
||
|
if (match == 0) {
|
||
|
ZERO_LSN(lsn);
|
||
|
if ((ret = __rep_log_backup(logc, &lsn)) == 0) {
|
||
|
MUTEX_LOCK(dbenv, rep->mtx_clientdb);
|
||
|
lp->verify_lsn = lsn;
|
||
|
lp->rcvd_recs = 0;
|
||
|
lp->wait_recs = rep->request_gap;
|
||
|
MUTEX_UNLOCK(dbenv, rep->mtx_clientdb);
|
||
|
(void)__rep_send_message(dbenv, eid, REP_VERIFY_REQ,
|
||
|
&lsn, NULL, 0, DB_REP_ANYWHERE);
|
||
|
} else if (ret == DB_NOTFOUND) {
|
||
|
/*
|
||
|
* We've either run out of records because
|
||
|
* logs have been removed or we've rolled back
|
||
|
* all the way to the beginning. In the latter
|
||
|
* we don't think these sites were ever part of
|
||
|
* the same environment and we'll say so.
|
||
|
* In the former, request internal backup.
|
||
|
*/
|
||
|
if (rp->lsn.file == 1) {
|
||
|
__db_err(dbenv,
|
||
|
"Client was never part of master's environment");
|
||
|
ret = DB_REP_JOIN_FAILURE;
|
||
|
} else {
|
||
|
rep->stat.st_outdated++;
|
||
|
|
||
|
LOG_SYSTEM_LOCK(dbenv);
|
||
|
lsn = lp->lsn;
|
||
|
LOG_SYSTEM_UNLOCK(dbenv);
|
||
|
REP_SYSTEM_LOCK(dbenv);
|
||
|
F_CLR(rep, REP_F_RECOVER_VERIFY);
|
||
|
if (FLD_ISSET(rep->config, REP_C_NOAUTOINIT))
|
||
|
ret = DB_REP_JOIN_FAILURE;
|
||
|
else {
|
||
|
F_SET(rep, REP_F_RECOVER_UPDATE);
|
||
|
ZERO_LSN(rep->first_lsn);
|
||
|
}
|
||
|
REP_SYSTEM_UNLOCK(dbenv);
|
||
|
if (ret == 0)
|
||
|
(void)__rep_send_message(dbenv,
|
||
|
eid, REP_UPDATE_REQ, NULL,
|
||
|
NULL, 0, DB_REP_ANYWHERE);
|
||
|
}
|
||
|
}
|
||
|
} else
|
||
|
ret = __rep_verify_match(dbenv, &rp->lsn, savetime);
|
||
|
|
||
|
err: if ((t_ret = __log_c_close(logc)) != 0 && ret == 0)
|
||
|
ret = t_ret;
|
||
|
return (ret);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* __rep_verify_fail --
|
||
|
* Handle a REP_VERIFY_FAIL message.
|
||
|
*
|
||
|
* PUBLIC: int __rep_verify_fail __P((DB_ENV *, REP_CONTROL *, int));
|
||
|
*/
|
||
|
int
|
||
|
__rep_verify_fail(dbenv, rp, eid)
|
||
|
DB_ENV *dbenv;
|
||
|
REP_CONTROL *rp;
|
||
|
int eid;
|
||
|
{
|
||
|
DB_LOG *dblp;
|
||
|
DB_REP *db_rep;
|
||
|
LOG *lp;
|
||
|
REP *rep;
|
||
|
int ret;
|
||
|
|
||
|
ret = 0;
|
||
|
db_rep = dbenv->rep_handle;
|
||
|
rep = db_rep->region;
|
||
|
dblp = dbenv->lg_handle;
|
||
|
lp = dblp->reginfo.primary;
|
||
|
|
||
|
/*
|
||
|
* If any recovery flags are set, but not VERIFY,
|
||
|
* then we ignore this message. We are already
|
||
|
* in the middle of updating.
|
||
|
*/
|
||
|
if (F_ISSET(rep, REP_F_RECOVER_MASK) &&
|
||
|
!F_ISSET(rep, REP_F_RECOVER_VERIFY))
|
||
|
return (0);
|
||
|
rep->stat.st_outdated++;
|
||
|
|
||
|
MUTEX_LOCK(dbenv, rep->mtx_clientdb);
|
||
|
REP_SYSTEM_LOCK(dbenv);
|
||
|
/*
|
||
|
* We don't want an old or delayed VERIFY_FAIL
|
||
|
* message to throw us into internal initialization
|
||
|
* when we shouldn't be.
|
||
|
*
|
||
|
* Only go into internal initialization if:
|
||
|
* We are set for AUTOINIT mode.
|
||
|
* We are in RECOVER_VERIFY and this LSN == verify_lsn.
|
||
|
* We are not in any RECOVERY and we are expecting
|
||
|
* an LSN that no longer exists on the master.
|
||
|
* Otherwise, ignore this message.
|
||
|
*/
|
||
|
if (FLD_ISSET(rep->config, REP_C_NOAUTOINIT) &&
|
||
|
((F_ISSET(rep, REP_F_RECOVER_VERIFY) &&
|
||
|
log_compare(&rp->lsn, &lp->verify_lsn) == 0) ||
|
||
|
(F_ISSET(rep, REP_F_RECOVER_MASK) == 0 &&
|
||
|
log_compare(&rp->lsn, &lp->ready_lsn) >= 0))) {
|
||
|
ret = DB_REP_JOIN_FAILURE;
|
||
|
goto unlock;
|
||
|
}
|
||
|
if (((F_ISSET(rep, REP_F_RECOVER_VERIFY)) &&
|
||
|
log_compare(&rp->lsn, &lp->verify_lsn) == 0) ||
|
||
|
(F_ISSET(rep, REP_F_RECOVER_MASK) == 0 &&
|
||
|
log_compare(&rp->lsn, &lp->ready_lsn) >= 0)) {
|
||
|
F_CLR(rep, REP_F_RECOVER_VERIFY);
|
||
|
F_SET(rep, REP_F_RECOVER_UPDATE);
|
||
|
ZERO_LSN(rep->first_lsn);
|
||
|
lp->wait_recs = rep->request_gap;
|
||
|
REP_SYSTEM_UNLOCK(dbenv);
|
||
|
MUTEX_UNLOCK(dbenv, rep->mtx_clientdb);
|
||
|
(void)__rep_send_message(dbenv,
|
||
|
eid, REP_UPDATE_REQ, NULL, NULL, 0, 0);
|
||
|
} else {
|
||
|
unlock: REP_SYSTEM_UNLOCK(dbenv);
|
||
|
MUTEX_UNLOCK(dbenv, rep->mtx_clientdb);
|
||
|
}
|
||
|
return (ret);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* __rep_verify_req --
|
||
|
* Handle a REP_VERIFY_REQ message.
|
||
|
*
|
||
|
* PUBLIC: int __rep_verify_req __P((DB_ENV *, REP_CONTROL *, int));
|
||
|
*/
|
||
|
int
|
||
|
__rep_verify_req(dbenv, rp, eid)
|
||
|
DB_ENV *dbenv;
|
||
|
REP_CONTROL *rp;
|
||
|
int eid;
|
||
|
{
|
||
|
DB_LOGC *logc;
|
||
|
DB_REP *db_rep;
|
||
|
DBT *d, data_dbt;
|
||
|
REP *rep;
|
||
|
u_int32_t type;
|
||
|
int old, ret;
|
||
|
|
||
|
ret = 0;
|
||
|
db_rep = dbenv->rep_handle;
|
||
|
rep = db_rep->region;
|
||
|
|
||
|
type = REP_VERIFY;
|
||
|
if ((ret = __log_cursor(dbenv, &logc)) != 0)
|
||
|
return (ret);
|
||
|
d = &data_dbt;
|
||
|
memset(d, 0, sizeof(data_dbt));
|
||
|
F_SET(logc, DB_LOG_SILENT_ERR);
|
||
|
ret = __log_c_get(logc, &rp->lsn, d, DB_SET);
|
||
|
/*
|
||
|
* If the LSN was invalid, then we might get a not
|
||
|
* found, we might get an EIO, we could get anything.
|
||
|
* If we get a DB_NOTFOUND, then there is a chance that
|
||
|
* the LSN comes before the first file present in which
|
||
|
* case we need to return a fail so that the client can return
|
||
|
* a DB_OUTDATED.
|
||
|
*
|
||
|
* If we're a client servicing this request and we get a
|
||
|
* NOTFOUND, return it so the caller can rerequest from
|
||
|
* a better source.
|
||
|
*/
|
||
|
if (ret == DB_NOTFOUND) {
|
||
|
if (F_ISSET(rep, REP_F_CLIENT))
|
||
|
goto notfound;
|
||
|
else if (__log_is_outdated(dbenv, rp->lsn.file, &old) == 0 &&
|
||
|
old != 0)
|
||
|
type = REP_VERIFY_FAIL;
|
||
|
}
|
||
|
|
||
|
if (ret != 0)
|
||
|
d = NULL;
|
||
|
|
||
|
(void)__rep_send_message(dbenv, eid, type, &rp->lsn, d, 0, 0);
|
||
|
notfound:
|
||
|
ret = __log_c_close(logc);
|
||
|
return (ret);
|
||
|
}
|
||
|
|
||
|
static int
|
||
|
__rep_dorecovery(dbenv, lsnp, trunclsnp)
|
||
|
DB_ENV *dbenv;
|
||
|
DB_LSN *lsnp, *trunclsnp;
|
||
|
{
|
||
|
DB_LSN lsn;
|
||
|
DB_REP *db_rep;
|
||
|
DBT mylog;
|
||
|
DB_LOGC *logc;
|
||
|
int ret, t_ret, update;
|
||
|
u_int32_t rectype;
|
||
|
__txn_regop_args *txnrec;
|
||
|
|
||
|
db_rep = dbenv->rep_handle;
|
||
|
|
||
|
/* Figure out if we are backing out any committed transactions. */
|
||
|
if ((ret = __log_cursor(dbenv, &logc)) != 0)
|
||
|
return (ret);
|
||
|
|
||
|
memset(&mylog, 0, sizeof(mylog));
|
||
|
update = 0;
|
||
|
while (update == 0 &&
|
||
|
(ret = __log_c_get(logc, &lsn, &mylog, DB_PREV)) == 0 &&
|
||
|
log_compare(&lsn, lsnp) > 0) {
|
||
|
memcpy(&rectype, mylog.data, sizeof(rectype));
|
||
|
if (rectype == DB___txn_regop) {
|
||
|
if ((ret =
|
||
|
__txn_regop_read(dbenv, mylog.data, &txnrec)) != 0)
|
||
|
goto err;
|
||
|
if (txnrec->opcode != TXN_ABORT)
|
||
|
update = 1;
|
||
|
__os_free(dbenv, txnrec);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* If we successfully run recovery, we've opened all the necessary
|
||
|
* files. We are guaranteed to be single-threaded here, so no mutex
|
||
|
* is necessary.
|
||
|
*/
|
||
|
if ((ret = __db_apprec(dbenv, lsnp, trunclsnp, update, 0)) == 0)
|
||
|
F_SET(db_rep, DBREP_OPENFILES);
|
||
|
|
||
|
err: if ((t_ret = __log_c_close(logc)) != 0 && ret == 0)
|
||
|
ret = t_ret;
|
||
|
|
||
|
return (ret);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* __rep_verify_match --
|
||
|
* We have just received a matching log record during verification.
|
||
|
* Figure out if we're going to need to run recovery. If so, wait until
|
||
|
* everything else has exited the library. If not, set up the world
|
||
|
* correctly and move forward.
|
||
|
*
|
||
|
* PUBLIC: int __rep_verify_match __P((DB_ENV *, DB_LSN *, time_t));
|
||
|
*/
|
||
|
int
|
||
|
__rep_verify_match(dbenv, reclsnp, savetime)
|
||
|
DB_ENV *dbenv;
|
||
|
DB_LSN *reclsnp;
|
||
|
time_t savetime;
|
||
|
{
|
||
|
DB_LOG *dblp;
|
||
|
DB_LSN trunclsn;
|
||
|
DB_REP *db_rep;
|
||
|
LOG *lp;
|
||
|
REGENV *renv;
|
||
|
REGINFO *infop;
|
||
|
REP *rep;
|
||
|
int done, master, ret;
|
||
|
u_int32_t unused;
|
||
|
|
||
|
dblp = dbenv->lg_handle;
|
||
|
db_rep = dbenv->rep_handle;
|
||
|
rep = db_rep->region;
|
||
|
lp = dblp->reginfo.primary;
|
||
|
ret = 0;
|
||
|
infop = dbenv->reginfo;
|
||
|
renv = infop->primary;
|
||
|
|
||
|
/*
|
||
|
* Check if the savetime is different than our current time stamp.
|
||
|
* If it is, then we're racing with another thread trying to recover
|
||
|
* and we lost. We must give up.
|
||
|
*/
|
||
|
MUTEX_LOCK(dbenv, rep->mtx_clientdb);
|
||
|
done = savetime != renv->rep_timestamp;
|
||
|
if (done) {
|
||
|
MUTEX_UNLOCK(dbenv, rep->mtx_clientdb);
|
||
|
return (0);
|
||
|
}
|
||
|
ZERO_LSN(lp->verify_lsn);
|
||
|
MUTEX_UNLOCK(dbenv, rep->mtx_clientdb);
|
||
|
|
||
|
/*
|
||
|
* Make sure the world hasn't changed while we tried to get
|
||
|
* the lock. If it hasn't then it's time for us to kick all
|
||
|
* operations out of DB and run recovery.
|
||
|
*/
|
||
|
REP_SYSTEM_LOCK(dbenv);
|
||
|
if (!F_ISSET(rep, REP_F_RECOVER_LOG) &&
|
||
|
(F_ISSET(rep, REP_F_READY) || rep->in_recovery != 0)) {
|
||
|
rep->stat.st_msgs_recover++;
|
||
|
goto errunlock;
|
||
|
}
|
||
|
|
||
|
if ((ret = __rep_lockout(dbenv, rep, 1)) != 0)
|
||
|
goto errunlock;
|
||
|
|
||
|
/* OK, everyone is out, we can now run recovery. */
|
||
|
REP_SYSTEM_UNLOCK(dbenv);
|
||
|
|
||
|
if ((ret = __rep_dorecovery(dbenv, reclsnp, &trunclsn)) != 0) {
|
||
|
REP_SYSTEM_LOCK(dbenv);
|
||
|
rep->in_recovery = 0;
|
||
|
F_CLR(rep, REP_F_READY);
|
||
|
goto errunlock;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* The log has been truncated (either directly by us or by __db_apprec)
|
||
|
* We want to make sure we're waiting for the LSN at the new end-of-log,
|
||
|
* not some later point.
|
||
|
*/
|
||
|
MUTEX_LOCK(dbenv, rep->mtx_clientdb);
|
||
|
lp->ready_lsn = trunclsn;
|
||
|
ZERO_LSN(lp->waiting_lsn);
|
||
|
ZERO_LSN(lp->max_wait_lsn);
|
||
|
lp->max_perm_lsn = *reclsnp;
|
||
|
lp->wait_recs = 0;
|
||
|
lp->rcvd_recs = 0;
|
||
|
ZERO_LSN(lp->verify_lsn);
|
||
|
|
||
|
/*
|
||
|
* Discard any log records we have queued; we're about to re-request
|
||
|
* them, and can't trust the ones in the queue. We need to set the
|
||
|
* DB_AM_RECOVER bit in this handle, so that the operation doesn't
|
||
|
* deadlock.
|
||
|
*/
|
||
|
F_SET(db_rep->rep_db, DB_AM_RECOVER);
|
||
|
MUTEX_UNLOCK(dbenv, rep->mtx_clientdb);
|
||
|
ret = __db_truncate(db_rep->rep_db, NULL, &unused);
|
||
|
MUTEX_LOCK(dbenv, rep->mtx_clientdb);
|
||
|
F_CLR(db_rep->rep_db, DB_AM_RECOVER);
|
||
|
|
||
|
REP_SYSTEM_LOCK(dbenv);
|
||
|
rep->stat.st_log_queued = 0;
|
||
|
rep->in_recovery = 0;
|
||
|
F_CLR(rep, REP_F_NOARCHIVE | REP_F_RECOVER_MASK);
|
||
|
|
||
|
if (ret != 0)
|
||
|
goto errunlock2;
|
||
|
|
||
|
/*
|
||
|
* If the master_id is invalid, this means that since
|
||
|
* the last record was sent, somebody declared an
|
||
|
* election and we may not have a master to request
|
||
|
* things of.
|
||
|
*
|
||
|
* This is not an error; when we find a new master,
|
||
|
* we'll re-negotiate where the end of the log is and
|
||
|
* try to bring ourselves up to date again anyway.
|
||
|
*
|
||
|
* !!!
|
||
|
* We cannot assert the election flags though because
|
||
|
* somebody may have declared an election and then
|
||
|
* got an error, thus clearing the election flags
|
||
|
* but we still have an invalid master_id.
|
||
|
*/
|
||
|
master = rep->master_id;
|
||
|
REP_SYSTEM_UNLOCK(dbenv);
|
||
|
if (master == DB_EID_INVALID) {
|
||
|
MUTEX_UNLOCK(dbenv, rep->mtx_clientdb);
|
||
|
ret = 0;
|
||
|
} else {
|
||
|
/*
|
||
|
* We're making an ALL_REQ. But now that we've
|
||
|
* cleared the flags, we're likely receiving new
|
||
|
* log records from the master, resulting in a gap
|
||
|
* immediately. So to avoid multiple data streams,
|
||
|
* set the wait_recs value high now to give the master
|
||
|
* a chance to start sending us these records before
|
||
|
* the gap code re-requests the same gap. Wait_recs
|
||
|
* will get reset once we start receiving these
|
||
|
* records.
|
||
|
*/
|
||
|
lp->wait_recs = rep->max_gap;
|
||
|
MUTEX_UNLOCK(dbenv, rep->mtx_clientdb);
|
||
|
(void)__rep_send_message(dbenv,
|
||
|
master, REP_ALL_REQ, reclsnp, NULL, 0, DB_REP_ANYWHERE);
|
||
|
}
|
||
|
if (0) {
|
||
|
errunlock2: MUTEX_UNLOCK(dbenv, rep->mtx_clientdb);
|
||
|
errunlock: REP_SYSTEM_UNLOCK(dbenv);
|
||
|
}
|
||
|
return (ret);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* __rep_log_backup --
|
||
|
*
|
||
|
* In the verify handshake, we walk backward looking for
|
||
|
* identification records. Those are the only record types
|
||
|
* we verify and match on.
|
||
|
*
|
||
|
* PUBLIC: int __rep_log_backup __P((DB_LOGC *, DB_LSN *));
|
||
|
*/
|
||
|
int
|
||
|
__rep_log_backup(logc, lsn)
|
||
|
DB_LOGC *logc;
|
||
|
DB_LSN *lsn;
|
||
|
{
|
||
|
DBT mylog;
|
||
|
u_int32_t rectype;
|
||
|
int ret;
|
||
|
|
||
|
ret = 0;
|
||
|
memset(&mylog, 0, sizeof(mylog));
|
||
|
while ((ret = __log_c_get(logc, lsn, &mylog, DB_PREV)) == 0) {
|
||
|
/*
|
||
|
* Look at the record type. Only txn_regop and txn_ckp
|
||
|
* are interesting to us.
|
||
|
*/
|
||
|
memcpy(&rectype, mylog.data, sizeof(rectype));
|
||
|
if (rectype == DB___txn_ckp || rectype == DB___txn_regop)
|
||
|
break;
|
||
|
}
|
||
|
return (ret);
|
||
|
}
|