mirror of
https://github.com/MariaDB/server.git
synced 2025-01-18 21:12:26 +01:00
868 lines
20 KiB
C
868 lines
20 KiB
C
|
/*-
|
||
|
* See the file LICENSE for redistribution information.
|
||
|
*
|
||
|
* Copyright (c) 2001-2002
|
||
|
* Sleepycat Software. All rights reserved.
|
||
|
*/
|
||
|
|
||
|
#include "db_config.h"
|
||
|
|
||
|
#ifndef lint
|
||
|
static const char revid[] = "$Id: rep_util.c,v 1.51 2002/09/05 02:30:00 margo Exp $";
|
||
|
#endif /* not lint */
|
||
|
|
||
|
#ifndef NO_SYSTEM_INCLUDES
|
||
|
#include <stdlib.h>
|
||
|
#include <string.h>
|
||
|
#endif
|
||
|
|
||
|
#include "db_int.h"
|
||
|
#include "dbinc/db_page.h"
|
||
|
#include "dbinc/btree.h"
|
||
|
#include "dbinc/fop.h"
|
||
|
#include "dbinc/hash.h"
|
||
|
#include "dbinc/log.h"
|
||
|
#include "dbinc/qam.h"
|
||
|
#include "dbinc/rep.h"
|
||
|
#include "dbinc/txn.h"
|
||
|
|
||
|
/*
|
||
|
* rep_util.c:
|
||
|
* Miscellaneous replication-related utility functions, including
|
||
|
* those called by other subsystems.
|
||
|
*/
|
||
|
static int __rep_cmp_bylsn __P((const void *, const void *));
|
||
|
static int __rep_cmp_bypage __P((const void *, const void *));
|
||
|
|
||
|
#ifdef REP_DIAGNOSTIC
|
||
|
static void __rep_print_logmsg __P((DB_ENV *, const DBT *, DB_LSN *));
|
||
|
#endif
|
||
|
|
||
|
/*
|
||
|
* __rep_check_alloc --
|
||
|
* Make sure the array of TXN_REC entries is of at least size n.
|
||
|
* (This function is called by the __*_getpgnos() functions in
|
||
|
* *.src.)
|
||
|
*
|
||
|
* PUBLIC: int __rep_check_alloc __P((DB_ENV *, TXN_RECS *, int));
|
||
|
*/
|
||
|
int
|
||
|
__rep_check_alloc(dbenv, r, n)
|
||
|
DB_ENV *dbenv;
|
||
|
TXN_RECS *r;
|
||
|
int n;
|
||
|
{
|
||
|
int nalloc, ret;
|
||
|
|
||
|
while (r->nalloc < r->npages + n) {
|
||
|
nalloc = r->nalloc == 0 ? 20 : r->nalloc * 2;
|
||
|
|
||
|
if ((ret = __os_realloc(dbenv, nalloc * sizeof(LSN_PAGE),
|
||
|
&r->array)) != 0)
|
||
|
return (ret);
|
||
|
|
||
|
r->nalloc = nalloc;
|
||
|
}
|
||
|
|
||
|
return (0);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* __rep_send_message --
|
||
|
* This is a wrapper for sending a message. It takes care of constructing
|
||
|
* the REP_CONTROL structure and calling the user's specified send function.
|
||
|
*
|
||
|
* PUBLIC: int __rep_send_message __P((DB_ENV *, int,
|
||
|
* PUBLIC: u_int32_t, DB_LSN *, const DBT *, u_int32_t));
|
||
|
*/
|
||
|
int
|
||
|
__rep_send_message(dbenv, eid, rtype, lsnp, dbtp, flags)
|
||
|
DB_ENV *dbenv;
|
||
|
int eid;
|
||
|
u_int32_t rtype;
|
||
|
DB_LSN *lsnp;
|
||
|
const DBT *dbtp;
|
||
|
u_int32_t flags;
|
||
|
{
|
||
|
DB_REP *db_rep;
|
||
|
REP *rep;
|
||
|
DBT cdbt, scrap_dbt;
|
||
|
REP_CONTROL cntrl;
|
||
|
u_int32_t send_flags;
|
||
|
int ret;
|
||
|
|
||
|
db_rep = dbenv->rep_handle;
|
||
|
rep = db_rep->region;
|
||
|
|
||
|
/* Set up control structure. */
|
||
|
memset(&cntrl, 0, sizeof(cntrl));
|
||
|
if (lsnp == NULL)
|
||
|
ZERO_LSN(cntrl.lsn);
|
||
|
else
|
||
|
cntrl.lsn = *lsnp;
|
||
|
cntrl.rectype = rtype;
|
||
|
cntrl.flags = flags;
|
||
|
cntrl.rep_version = DB_REPVERSION;
|
||
|
cntrl.log_version = DB_LOGVERSION;
|
||
|
MUTEX_LOCK(dbenv, db_rep->mutexp);
|
||
|
cntrl.gen = rep->gen;
|
||
|
MUTEX_UNLOCK(dbenv, db_rep->mutexp);
|
||
|
|
||
|
memset(&cdbt, 0, sizeof(cdbt));
|
||
|
cdbt.data = &cntrl;
|
||
|
cdbt.size = sizeof(cntrl);
|
||
|
|
||
|
/* Don't assume the send function will be tolerant of NULL records. */
|
||
|
if (dbtp == NULL) {
|
||
|
memset(&scrap_dbt, 0, sizeof(DBT));
|
||
|
dbtp = &scrap_dbt;
|
||
|
}
|
||
|
|
||
|
send_flags = (LF_ISSET(DB_PERMANENT) ? DB_REP_PERMANENT : 0);
|
||
|
|
||
|
#if 0
|
||
|
__rep_print_message(dbenv, eid, &cntrl, "rep_send_message");
|
||
|
#endif
|
||
|
#ifdef REP_DIAGNOSTIC
|
||
|
if (rtype == REP_LOG)
|
||
|
__rep_print_logmsg(dbenv, dbtp, lsnp);
|
||
|
#endif
|
||
|
ret = db_rep->rep_send(dbenv, &cdbt, dbtp, eid, send_flags);
|
||
|
|
||
|
/*
|
||
|
* We don't hold the rep lock, so this could miscount if we race.
|
||
|
* I don't think it's worth grabbing the mutex for that bit of
|
||
|
* extra accuracy.
|
||
|
*/
|
||
|
if (ret == 0)
|
||
|
rep->stat.st_msgs_sent++;
|
||
|
else
|
||
|
rep->stat.st_msgs_send_failures++;
|
||
|
|
||
|
return (ret);
|
||
|
}
|
||
|
|
||
|
#ifdef REP_DIAGNOSTIC
|
||
|
|
||
|
/*
|
||
|
* __rep_print_logmsg --
|
||
|
* This is a debugging routine for printing out log records that
|
||
|
* we are about to transmit to a client.
|
||
|
*/
|
||
|
|
||
|
static void
|
||
|
__rep_print_logmsg(dbenv, logdbt, lsnp)
|
||
|
DB_ENV *dbenv;
|
||
|
const DBT *logdbt;
|
||
|
DB_LSN *lsnp;
|
||
|
{
|
||
|
/* Static structures to hold the printing functions. */
|
||
|
static int (**ptab)__P((DB_ENV *,
|
||
|
DBT *, DB_LSN *, db_recops, void *)) = NULL;
|
||
|
size_t ptabsize = 0;
|
||
|
|
||
|
if (ptabsize == 0) {
|
||
|
/* Initialize the table. */
|
||
|
(void)__bam_init_print(dbenv, &ptab, &ptabsize);
|
||
|
(void)__crdel_init_print(dbenv, &ptab, &ptabsize);
|
||
|
(void)__db_init_print(dbenv, &ptab, &ptabsize);
|
||
|
(void)__dbreg_init_print(dbenv, &ptab, &ptabsize);
|
||
|
(void)__fop_init_print(dbenv, &ptab, &ptabsize);
|
||
|
(void)__qam_init_print(dbenv, &ptab, &ptabsize);
|
||
|
(void)__ham_init_print(dbenv, &ptab, &ptabsize);
|
||
|
(void)__txn_init_print(dbenv, &ptab, &ptabsize);
|
||
|
}
|
||
|
|
||
|
(void)__db_dispatch(dbenv,
|
||
|
ptab, ptabsize, (DBT *)logdbt, lsnp, DB_TXN_PRINT, NULL);
|
||
|
}
|
||
|
|
||
|
#endif
|
||
|
/*
|
||
|
* __rep_new_master --
|
||
|
* Called after a master election to sync back up with a new master.
|
||
|
* It's possible that we already know of this new master in which case
|
||
|
* we don't need to do anything.
|
||
|
*
|
||
|
* This is written assuming that this message came from the master; we
|
||
|
* need to enforce that in __rep_process_record, but right now, we have
|
||
|
* no way to identify the master.
|
||
|
*
|
||
|
* PUBLIC: int __rep_new_master __P((DB_ENV *, REP_CONTROL *, int));
|
||
|
*/
|
||
|
int
|
||
|
__rep_new_master(dbenv, cntrl, eid)
|
||
|
DB_ENV *dbenv;
|
||
|
REP_CONTROL *cntrl;
|
||
|
int eid;
|
||
|
{
|
||
|
DB_LOG *dblp;
|
||
|
DB_LOGC *logc;
|
||
|
DB_LSN last_lsn, lsn;
|
||
|
DB_REP *db_rep;
|
||
|
DBT dbt;
|
||
|
LOG *lp;
|
||
|
REP *rep;
|
||
|
int change, ret, t_ret;
|
||
|
|
||
|
db_rep = dbenv->rep_handle;
|
||
|
rep = db_rep->region;
|
||
|
MUTEX_LOCK(dbenv, db_rep->mutexp);
|
||
|
ELECTION_DONE(rep);
|
||
|
change = rep->gen != cntrl->gen || rep->master_id != eid;
|
||
|
if (change) {
|
||
|
rep->gen = cntrl->gen;
|
||
|
rep->master_id = eid;
|
||
|
F_SET(rep, REP_F_RECOVER);
|
||
|
rep->stat.st_master_changes++;
|
||
|
}
|
||
|
MUTEX_UNLOCK(dbenv, db_rep->mutexp);
|
||
|
|
||
|
if (!change)
|
||
|
return (0);
|
||
|
|
||
|
/*
|
||
|
* If the master changed, we need to start the process of
|
||
|
* figuring out what our last valid log record is. However,
|
||
|
* if both the master and we agree that the max LSN is 0,0,
|
||
|
* then there is no recovery to be done. If we are at 0 and
|
||
|
* the master is not, then we just need to request all the log
|
||
|
* records from the master.
|
||
|
*/
|
||
|
dblp = dbenv->lg_handle;
|
||
|
lp = dblp->reginfo.primary;
|
||
|
R_LOCK(dbenv, &dblp->reginfo);
|
||
|
last_lsn = lsn = lp->lsn;
|
||
|
if (last_lsn.offset > sizeof(LOGP))
|
||
|
last_lsn.offset -= lp->len;
|
||
|
R_UNLOCK(dbenv, &dblp->reginfo);
|
||
|
if (IS_INIT_LSN(lsn) || IS_ZERO_LSN(lsn)) {
|
||
|
empty: MUTEX_LOCK(dbenv, db_rep->mutexp);
|
||
|
F_CLR(rep, REP_F_RECOVER);
|
||
|
MUTEX_UNLOCK(dbenv, db_rep->mutexp);
|
||
|
|
||
|
if (IS_INIT_LSN(cntrl->lsn))
|
||
|
ret = 0;
|
||
|
else
|
||
|
ret = __rep_send_message(dbenv, rep->master_id,
|
||
|
REP_ALL_REQ, &lsn, NULL, 0);
|
||
|
|
||
|
if (ret == 0)
|
||
|
ret = DB_REP_NEWMASTER;
|
||
|
return (ret);
|
||
|
} else if (last_lsn.offset <= sizeof(LOGP)) {
|
||
|
/*
|
||
|
* We have just changed log files and need to set lastlsn
|
||
|
* to the last record in the previous log files.
|
||
|
*/
|
||
|
if ((ret = dbenv->log_cursor(dbenv, &logc, 0)) != 0)
|
||
|
return (ret);
|
||
|
memset(&dbt, 0, sizeof(dbt));
|
||
|
ret = logc->get(logc, &last_lsn, &dbt, DB_LAST);
|
||
|
if ((t_ret = logc->close(logc, 0)) != 0 && ret == 0)
|
||
|
ret = t_ret;
|
||
|
if (ret == DB_NOTFOUND)
|
||
|
goto empty;
|
||
|
if (ret != 0)
|
||
|
return (ret);
|
||
|
}
|
||
|
|
||
|
R_LOCK(dbenv, &dblp->reginfo);
|
||
|
lp->verify_lsn = last_lsn;
|
||
|
R_UNLOCK(dbenv, &dblp->reginfo);
|
||
|
if ((ret = __rep_send_message(dbenv,
|
||
|
eid, REP_VERIFY_REQ, &last_lsn, NULL, 0)) != 0)
|
||
|
return (ret);
|
||
|
|
||
|
return (DB_REP_NEWMASTER);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* __rep_lockpgno_init
|
||
|
* Create a dispatch table for acquiring locks on each log record.
|
||
|
*
|
||
|
* PUBLIC: int __rep_lockpgno_init __P((DB_ENV *,
|
||
|
* PUBLIC: int (***)(DB_ENV *, DBT *, DB_LSN *, db_recops, void *),
|
||
|
* PUBLIC: size_t *));
|
||
|
*/
|
||
|
int
|
||
|
__rep_lockpgno_init(dbenv, dtabp, dtabsizep)
|
||
|
DB_ENV *dbenv;
|
||
|
int (***dtabp)__P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
|
||
|
size_t *dtabsizep;
|
||
|
{
|
||
|
int ret;
|
||
|
|
||
|
/* Initialize dispatch table. */
|
||
|
*dtabsizep = 0;
|
||
|
*dtabp = NULL;
|
||
|
if ((ret = __bam_init_getpgnos(dbenv, dtabp, dtabsizep)) != 0 ||
|
||
|
(ret = __crdel_init_getpgnos(dbenv, dtabp, dtabsizep)) != 0 ||
|
||
|
(ret = __db_init_getpgnos(dbenv, dtabp, dtabsizep)) != 0 ||
|
||
|
(ret = __dbreg_init_getpgnos(dbenv, dtabp, dtabsizep)) != 0 ||
|
||
|
(ret = __fop_init_getpgnos(dbenv, dtabp, dtabsizep)) != 0 ||
|
||
|
(ret = __qam_init_getpgnos(dbenv, dtabp, dtabsizep)) != 0 ||
|
||
|
(ret = __ham_init_getpgnos(dbenv, dtabp, dtabsizep)) != 0 ||
|
||
|
(ret = __txn_init_getpgnos(dbenv, dtabp, dtabsizep)) != 0)
|
||
|
return (ret);
|
||
|
|
||
|
return (0);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* __rep_unlockpages --
|
||
|
* Unlock the pages locked in __rep_lockpages.
|
||
|
*
|
||
|
* PUBLIC: int __rep_unlockpages __P((DB_ENV *, u_int32_t));
|
||
|
*/
|
||
|
int
|
||
|
__rep_unlockpages(dbenv, lid)
|
||
|
DB_ENV *dbenv;
|
||
|
u_int32_t lid;
|
||
|
{
|
||
|
DB_LOCKREQ req, *lvp;
|
||
|
|
||
|
req.op = DB_LOCK_PUT_ALL;
|
||
|
return (dbenv->lock_vec(dbenv, lid, 0, &req, 1, &lvp));
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* __rep_lockpages --
|
||
|
* Called to gather and lock pages in preparation for both
|
||
|
* single transaction apply as well as client synchronization
|
||
|
* with a new master. A non-NULL key_lsn means that we're locking
|
||
|
* in order to apply a single log record during client recovery
|
||
|
* to the joint LSN. A non-NULL max_lsn means that we are applying
|
||
|
* a transaction whose commit is at max_lsn.
|
||
|
*
|
||
|
* PUBLIC: int __rep_lockpages __P((DB_ENV *,
|
||
|
* PUBLIC: int (**)(DB_ENV *, DBT *, DB_LSN *, db_recops, void *),
|
||
|
* PUBLIC: size_t, DB_LSN *, DB_LSN *, TXN_RECS *, u_int32_t));
|
||
|
*/
|
||
|
int
|
||
|
__rep_lockpages(dbenv, dtab, dtabsize, key_lsn, max_lsn, recs, lid)
|
||
|
DB_ENV *dbenv;
|
||
|
int (**dtab)__P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
|
||
|
size_t dtabsize;
|
||
|
DB_LSN *key_lsn, *max_lsn;
|
||
|
TXN_RECS *recs;
|
||
|
u_int32_t lid;
|
||
|
{
|
||
|
DBT data_dbt, lo;
|
||
|
DB_LOCK l;
|
||
|
DB_LOCKREQ *lvp;
|
||
|
DB_LOGC *logc;
|
||
|
DB_LSN tmp_lsn;
|
||
|
TXN_RECS tmp, *t;
|
||
|
db_pgno_t cur_pgno;
|
||
|
linfo_t locks;
|
||
|
int i, ret, t_ret, unique;
|
||
|
u_int32_t cur_fid;
|
||
|
|
||
|
/*
|
||
|
* There are two phases: First, we have to traverse backwards through
|
||
|
* the log records gathering the list of all the pages accessed. Once
|
||
|
* we have this information we can acquire all the locks we need.
|
||
|
*/
|
||
|
|
||
|
/* Initialization */
|
||
|
memset(&locks, 0, sizeof(locks));
|
||
|
ret = 0;
|
||
|
|
||
|
t = recs != NULL ? recs : &tmp;
|
||
|
t->npages = t->nalloc = 0;
|
||
|
t->array = NULL;
|
||
|
|
||
|
/*
|
||
|
* We've got to be in one mode or the other; else life will either
|
||
|
* be excessively boring or overly exciting.
|
||
|
*/
|
||
|
DB_ASSERT(key_lsn != NULL || max_lsn != NULL);
|
||
|
DB_ASSERT(key_lsn == NULL || max_lsn == NULL);
|
||
|
|
||
|
/*
|
||
|
* Phase 1: Fill in the pgno array.
|
||
|
*/
|
||
|
memset(&data_dbt, 0, sizeof(data_dbt));
|
||
|
if (F_ISSET(dbenv, DB_ENV_THREAD))
|
||
|
F_SET(&data_dbt, DB_DBT_REALLOC);
|
||
|
|
||
|
/* Single transaction apply. */
|
||
|
if (max_lsn != NULL) {
|
||
|
DB_ASSERT(0); /* XXX */
|
||
|
/*
|
||
|
tmp_lsn = *max_lsn;
|
||
|
if ((ret = __rep_apply_thread(dbenv, dtab, dtabsize,
|
||
|
&data_dbt, &tmp_lsn, t)) != 0)
|
||
|
goto err;
|
||
|
*/
|
||
|
}
|
||
|
|
||
|
/* In recovery. */
|
||
|
if (key_lsn != NULL) {
|
||
|
if ((ret = dbenv->log_cursor(dbenv, &logc, 0)) != 0)
|
||
|
goto err;
|
||
|
ret = logc->get(logc, key_lsn, &data_dbt, DB_SET);
|
||
|
|
||
|
/* Save lsn values, since dispatch functions can change them. */
|
||
|
tmp_lsn = *key_lsn;
|
||
|
ret = __db_dispatch(dbenv,
|
||
|
dtab, dtabsize, &data_dbt, &tmp_lsn, DB_TXN_GETPGNOS, t);
|
||
|
|
||
|
if ((t_ret = logc->close(logc, 0)) != 0 && ret == 0)
|
||
|
ret = t_ret;
|
||
|
|
||
|
/*
|
||
|
* If ret == DB_DELETED, this record refers to a temporary
|
||
|
* file and there's nothing to apply.
|
||
|
*/
|
||
|
if (ret == DB_DELETED) {
|
||
|
ret = 0;
|
||
|
goto out;
|
||
|
} else if (ret != 0)
|
||
|
goto err;
|
||
|
}
|
||
|
|
||
|
if (t->npages == 0)
|
||
|
goto out;
|
||
|
|
||
|
/* Phase 2: Write lock all the pages. */
|
||
|
|
||
|
/* Sort the entries in the array by page number. */
|
||
|
qsort(t->array, t->npages, sizeof(LSN_PAGE), __rep_cmp_bypage);
|
||
|
|
||
|
/* Count the number of unique pages. */
|
||
|
cur_fid = DB_LOGFILEID_INVALID;
|
||
|
cur_pgno = PGNO_INVALID;
|
||
|
unique = 0;
|
||
|
for (i = 0; i < t->npages; i++) {
|
||
|
if (F_ISSET(&t->array[i], LSN_PAGE_NOLOCK))
|
||
|
continue;
|
||
|
if (t->array[i].pgdesc.pgno != cur_pgno ||
|
||
|
t->array[i].fid != cur_fid) {
|
||
|
cur_pgno = t->array[i].pgdesc.pgno;
|
||
|
cur_fid = t->array[i].fid;
|
||
|
unique++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (unique == 0)
|
||
|
goto out;
|
||
|
|
||
|
/* Handle single lock case specially, else allocate space for locks. */
|
||
|
if (unique == 1) {
|
||
|
memset(&lo, 0, sizeof(lo));
|
||
|
lo.data = &t->array[0].pgdesc;
|
||
|
lo.size = sizeof(t->array[0].pgdesc);
|
||
|
ret = dbenv->lock_get(dbenv, lid, 0, &lo, DB_LOCK_WRITE, &l);
|
||
|
goto out2;
|
||
|
}
|
||
|
|
||
|
/* Multi-lock case. */
|
||
|
locks.n = unique;
|
||
|
if ((ret = __os_calloc(dbenv,
|
||
|
unique, sizeof(DB_LOCKREQ), &locks.reqs)) != 0)
|
||
|
goto err;
|
||
|
if ((ret = __os_calloc(dbenv, unique, sizeof(DBT), &locks.objs)) != 0)
|
||
|
goto err;
|
||
|
|
||
|
unique = 0;
|
||
|
cur_fid = DB_LOGFILEID_INVALID;
|
||
|
cur_pgno = PGNO_INVALID;
|
||
|
for (i = 0; i < t->npages; i++) {
|
||
|
if (F_ISSET(&t->array[i], LSN_PAGE_NOLOCK))
|
||
|
continue;
|
||
|
if (t->array[i].pgdesc.pgno != cur_pgno ||
|
||
|
t->array[i].fid != cur_fid) {
|
||
|
cur_pgno = t->array[i].pgdesc.pgno;
|
||
|
cur_fid = t->array[i].fid;
|
||
|
locks.reqs[unique].op = DB_LOCK_GET;
|
||
|
locks.reqs[unique].mode = DB_LOCK_WRITE;
|
||
|
locks.reqs[unique].obj = &locks.objs[unique];
|
||
|
locks.objs[unique].data = &t->array[i].pgdesc;
|
||
|
locks.objs[unique].size = sizeof(t->array[i].pgdesc);
|
||
|
unique++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Finally, get the locks. */
|
||
|
if ((ret =
|
||
|
dbenv->lock_vec(dbenv, lid, 0, locks.reqs, unique, &lvp)) != 0) {
|
||
|
/*
|
||
|
* If we were unsuccessful, unlock any locks we acquired before
|
||
|
* the error and return the original error value.
|
||
|
*/
|
||
|
(void)__rep_unlockpages(dbenv, lid);
|
||
|
}
|
||
|
|
||
|
err:
|
||
|
out: if (locks.objs != NULL)
|
||
|
__os_free(dbenv, locks.objs);
|
||
|
if (locks.reqs != NULL)
|
||
|
__os_free(dbenv, locks.reqs);
|
||
|
|
||
|
/*
|
||
|
* Before we return, sort by LSN so that we apply records in the
|
||
|
* right order.
|
||
|
*/
|
||
|
qsort(t->array, t->npages, sizeof(LSN_PAGE), __rep_cmp_bylsn);
|
||
|
|
||
|
out2: if ((ret != 0 || recs == NULL) && t->nalloc != 0) {
|
||
|
__os_free(dbenv, t->array);
|
||
|
t->array = NULL;
|
||
|
t->npages = t->nalloc = 0;
|
||
|
}
|
||
|
|
||
|
if (F_ISSET(&data_dbt, DB_DBT_REALLOC) && data_dbt.data != NULL)
|
||
|
__os_ufree(dbenv, data_dbt.data);
|
||
|
|
||
|
return (ret);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* __rep_cmp_bypage and __rep_cmp_bylsn --
|
||
|
* Sort functions for qsort. "bypage" sorts first by page numbers and
|
||
|
* then by the LSN. "bylsn" sorts first by the LSN, then by page numbers.
|
||
|
*/
|
||
|
static int
|
||
|
__rep_cmp_bypage(a, b)
|
||
|
const void *a, *b;
|
||
|
{
|
||
|
LSN_PAGE *ap, *bp;
|
||
|
|
||
|
ap = (LSN_PAGE *)a;
|
||
|
bp = (LSN_PAGE *)b;
|
||
|
|
||
|
if (ap->fid < bp->fid)
|
||
|
return (-1);
|
||
|
|
||
|
if (ap->fid > bp->fid)
|
||
|
return (1);
|
||
|
|
||
|
if (ap->pgdesc.pgno < bp->pgdesc.pgno)
|
||
|
return (-1);
|
||
|
|
||
|
if (ap->pgdesc.pgno > bp->pgdesc.pgno)
|
||
|
return (1);
|
||
|
|
||
|
if (ap->lsn.file < bp->lsn.file)
|
||
|
return (-1);
|
||
|
|
||
|
if (ap->lsn.file > bp->lsn.file)
|
||
|
return (1);
|
||
|
|
||
|
if (ap->lsn.offset < bp->lsn.offset)
|
||
|
return (-1);
|
||
|
|
||
|
if (ap->lsn.offset > bp->lsn.offset)
|
||
|
return (1);
|
||
|
|
||
|
return (0);
|
||
|
}
|
||
|
|
||
|
static int
|
||
|
__rep_cmp_bylsn(a, b)
|
||
|
const void *a, *b;
|
||
|
{
|
||
|
LSN_PAGE *ap, *bp;
|
||
|
|
||
|
ap = (LSN_PAGE *)a;
|
||
|
bp = (LSN_PAGE *)b;
|
||
|
|
||
|
if (ap->lsn.file < bp->lsn.file)
|
||
|
return (-1);
|
||
|
|
||
|
if (ap->lsn.file > bp->lsn.file)
|
||
|
return (1);
|
||
|
|
||
|
if (ap->lsn.offset < bp->lsn.offset)
|
||
|
return (-1);
|
||
|
|
||
|
if (ap->lsn.offset > bp->lsn.offset)
|
||
|
return (1);
|
||
|
|
||
|
if (ap->fid < bp->fid)
|
||
|
return (-1);
|
||
|
|
||
|
if (ap->fid > bp->fid)
|
||
|
return (1);
|
||
|
|
||
|
if (ap->pgdesc.pgno < bp->pgdesc.pgno)
|
||
|
return (-1);
|
||
|
|
||
|
if (ap->pgdesc.pgno > bp->pgdesc.pgno)
|
||
|
return (1);
|
||
|
|
||
|
return (0);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* __rep_is_client
|
||
|
* Used by other subsystems to figure out if this is a replication
|
||
|
* client sites.
|
||
|
*
|
||
|
* PUBLIC: int __rep_is_client __P((DB_ENV *));
|
||
|
*/
|
||
|
int
|
||
|
__rep_is_client(dbenv)
|
||
|
DB_ENV *dbenv;
|
||
|
{
|
||
|
DB_REP *db_rep;
|
||
|
REP *rep;
|
||
|
int ret;
|
||
|
|
||
|
if ((db_rep = dbenv->rep_handle) == NULL)
|
||
|
return (0);
|
||
|
rep = db_rep->region;
|
||
|
|
||
|
MUTEX_LOCK(dbenv, db_rep->mutexp);
|
||
|
ret = F_ISSET(rep, REP_F_UPGRADE | REP_F_LOGSONLY);
|
||
|
MUTEX_UNLOCK(dbenv, db_rep->mutexp);
|
||
|
return (ret);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* __rep_send_vote
|
||
|
* Send this site's vote for the election.
|
||
|
*
|
||
|
* PUBLIC: int __rep_send_vote __P((DB_ENV *, DB_LSN *, int, int, int));
|
||
|
*/
|
||
|
int
|
||
|
__rep_send_vote(dbenv, lsnp, nsites, pri, tiebreaker)
|
||
|
DB_ENV *dbenv;
|
||
|
DB_LSN *lsnp;
|
||
|
int nsites, pri, tiebreaker;
|
||
|
{
|
||
|
DBT vote_dbt;
|
||
|
REP_VOTE_INFO vi;
|
||
|
|
||
|
memset(&vi, 0, sizeof(vi));
|
||
|
|
||
|
vi.priority = pri;
|
||
|
vi.nsites = nsites;
|
||
|
vi.tiebreaker = tiebreaker;
|
||
|
|
||
|
memset(&vote_dbt, 0, sizeof(vote_dbt));
|
||
|
vote_dbt.data = &vi;
|
||
|
vote_dbt.size = sizeof(vi);
|
||
|
|
||
|
return (__rep_send_message(dbenv,
|
||
|
DB_EID_BROADCAST, REP_VOTE1, lsnp, &vote_dbt, 0));
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* __rep_grow_sites --
|
||
|
* Called to allocate more space in the election tally information.
|
||
|
* Called with the rep mutex held. We need to call the region mutex, so
|
||
|
* we need to make sure that we *never* acquire those mutexes in the
|
||
|
* opposite order.
|
||
|
*
|
||
|
* PUBLIC: int __rep_grow_sites __P((DB_ENV *dbenv, int nsites));
|
||
|
*/
|
||
|
int
|
||
|
__rep_grow_sites(dbenv, nsites)
|
||
|
DB_ENV *dbenv;
|
||
|
int nsites;
|
||
|
{
|
||
|
REGENV *renv;
|
||
|
REGINFO *infop;
|
||
|
REP *rep;
|
||
|
int nalloc, ret, *tally;
|
||
|
|
||
|
rep = ((DB_REP *)dbenv->rep_handle)->region;
|
||
|
|
||
|
/*
|
||
|
* Allocate either twice the current allocation or nsites,
|
||
|
* whichever is more.
|
||
|
*/
|
||
|
|
||
|
nalloc = 2 * rep->asites;
|
||
|
if (nalloc < nsites)
|
||
|
nalloc = nsites;
|
||
|
|
||
|
infop = dbenv->reginfo;
|
||
|
renv = infop->primary;
|
||
|
MUTEX_LOCK(dbenv, &renv->mutex);
|
||
|
if ((ret = __db_shalloc(infop->addr,
|
||
|
sizeof(nalloc * sizeof(int)), sizeof(int), &tally)) == 0) {
|
||
|
if (rep->tally_off != INVALID_ROFF)
|
||
|
__db_shalloc_free(infop->addr,
|
||
|
R_ADDR(infop, rep->tally_off));
|
||
|
rep->asites = nalloc;
|
||
|
rep->nsites = nsites;
|
||
|
rep->tally_off = R_OFFSET(infop, tally);
|
||
|
}
|
||
|
MUTEX_UNLOCK(dbenv, &renv->mutex);
|
||
|
return (ret);
|
||
|
}
|
||
|
|
||
|
#ifdef NOTYET
|
||
|
static int __rep_send_file __P((DB_ENV *, DBT *, u_int32_t));
|
||
|
/*
|
||
|
* __rep_send_file --
|
||
|
* Send an entire file, one block at a time.
|
||
|
*/
|
||
|
static int
|
||
|
__rep_send_file(dbenv, rec, eid)
|
||
|
DB_ENV *dbenv;
|
||
|
DBT *rec;
|
||
|
u_int32_t eid;
|
||
|
{
|
||
|
DB *dbp;
|
||
|
DB_LOCK lk;
|
||
|
DB_MPOOLFILE *mpf;
|
||
|
DBC *dbc;
|
||
|
DBT rec_dbt;
|
||
|
PAGE *pagep;
|
||
|
db_pgno_t last_pgno, pgno;
|
||
|
int ret, t_ret;
|
||
|
|
||
|
dbp = NULL;
|
||
|
dbc = NULL;
|
||
|
pagep = NULL;
|
||
|
mpf = NULL;
|
||
|
LOCK_INIT(lk);
|
||
|
|
||
|
if ((ret = db_create(&dbp, dbenv, 0)) != 0)
|
||
|
goto err;
|
||
|
|
||
|
if ((ret = dbp->open(dbp, rec->data, NULL, DB_UNKNOWN, 0, 0)) != 0)
|
||
|
goto err;
|
||
|
|
||
|
if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0)
|
||
|
goto err;
|
||
|
/*
|
||
|
* Force last_pgno to some value that will let us read the meta-dat
|
||
|
* page in the following loop.
|
||
|
*/
|
||
|
memset(&rec_dbt, 0, sizeof(rec_dbt));
|
||
|
last_pgno = 1;
|
||
|
for (pgno = 0; pgno <= last_pgno; pgno++) {
|
||
|
if ((ret = __db_lget(dbc, 0, pgno, DB_LOCK_READ, 0, &lk)) != 0)
|
||
|
goto err;
|
||
|
|
||
|
if ((ret = mpf->get(mpf, &pgno, 0, &pagep)) != 0)
|
||
|
goto err;
|
||
|
|
||
|
if (pgno == 0)
|
||
|
last_pgno = ((DBMETA *)pagep)->last_pgno;
|
||
|
|
||
|
rec_dbt.data = pagep;
|
||
|
rec_dbt.size = dbp->pgsize;
|
||
|
if ((ret = __rep_send_message(dbenv, eid,
|
||
|
REP_FILE, NULL, &rec_dbt, pgno == last_pgno)) != 0)
|
||
|
goto err;
|
||
|
ret = mpf->put(mpf, pagep, 0);
|
||
|
pagep = NULL;
|
||
|
if (ret != 0)
|
||
|
goto err;
|
||
|
ret = __LPUT(dbc, lk);
|
||
|
LOCK_INIT(lk);
|
||
|
if (ret != 0)
|
||
|
goto err;
|
||
|
}
|
||
|
|
||
|
err: if (LOCK_ISSET(lk) && (t_ret = __LPUT(dbc, lk)) != 0 && ret == 0)
|
||
|
ret = t_ret;
|
||
|
if (dbc != NULL && (t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
|
||
|
ret = t_ret;
|
||
|
if (pagep != NULL && (t_ret = mpf->put(mpf, pagep, 0)) != 0 && ret == 0)
|
||
|
ret = t_ret;
|
||
|
if (dbp != NULL && (t_ret = dbp->close(dbp, 0)) != 0 && ret == 0)
|
||
|
ret = t_ret;
|
||
|
return (ret);
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
#if 0
|
||
|
/*
|
||
|
* PUBLIC: void __rep_print_message __P((DB_ENV *, int, REP_CONTROL *, char *));
|
||
|
*/
|
||
|
void
|
||
|
__rep_print_message(dbenv, eid, rp, str)
|
||
|
DB_ENV *dbenv;
|
||
|
int eid;
|
||
|
REP_CONTROL *rp;
|
||
|
char *str;
|
||
|
{
|
||
|
char *type;
|
||
|
switch (rp->rectype) {
|
||
|
case REP_ALIVE:
|
||
|
type = "alive";
|
||
|
break;
|
||
|
case REP_ALIVE_REQ:
|
||
|
type = "alive_req";
|
||
|
break;
|
||
|
case REP_ALL_REQ:
|
||
|
type = "all_req";
|
||
|
break;
|
||
|
case REP_ELECT:
|
||
|
type = "elect";
|
||
|
break;
|
||
|
case REP_FILE:
|
||
|
type = "file";
|
||
|
break;
|
||
|
case REP_FILE_REQ:
|
||
|
type = "file_req";
|
||
|
break;
|
||
|
case REP_LOG:
|
||
|
type = "log";
|
||
|
break;
|
||
|
case REP_LOG_MORE:
|
||
|
type = "log_more";
|
||
|
break;
|
||
|
case REP_LOG_REQ:
|
||
|
type = "log_req";
|
||
|
break;
|
||
|
case REP_MASTER_REQ:
|
||
|
type = "master_req";
|
||
|
break;
|
||
|
case REP_NEWCLIENT:
|
||
|
type = "newclient";
|
||
|
break;
|
||
|
case REP_NEWFILE:
|
||
|
type = "newfile";
|
||
|
break;
|
||
|
case REP_NEWMASTER:
|
||
|
type = "newmaster";
|
||
|
break;
|
||
|
case REP_NEWSITE:
|
||
|
type = "newsite";
|
||
|
break;
|
||
|
case REP_PAGE:
|
||
|
type = "page";
|
||
|
break;
|
||
|
case REP_PAGE_REQ:
|
||
|
type = "page_req";
|
||
|
break;
|
||
|
case REP_PLIST:
|
||
|
type = "plist";
|
||
|
break;
|
||
|
case REP_PLIST_REQ:
|
||
|
type = "plist_req";
|
||
|
break;
|
||
|
case REP_VERIFY:
|
||
|
type = "verify";
|
||
|
break;
|
||
|
case REP_VERIFY_FAIL:
|
||
|
type = "verify_fail";
|
||
|
break;
|
||
|
case REP_VERIFY_REQ:
|
||
|
type = "verify_req";
|
||
|
break;
|
||
|
case REP_VOTE1:
|
||
|
type = "vote1";
|
||
|
break;
|
||
|
case REP_VOTE2:
|
||
|
type = "vote2";
|
||
|
break;
|
||
|
default:
|
||
|
type = "NOTYPE";
|
||
|
break;
|
||
|
}
|
||
|
printf("%s %s: gen = %d eid %d, type %s, LSN [%u][%u]\n",
|
||
|
dbenv->db_home, str, rp->gen, eid, type, rp->lsn.file,
|
||
|
rp->lsn.offset);
|
||
|
}
|
||
|
#endif
|