mariadb/storage/bdb/db/db_rec.c
2005-12-05 10:27:46 -08:00

1266 lines
31 KiB
C

/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996-2005
* Sleepycat Software. All rights reserved.
*
* $Id: db_rec.c,v 12.12 2005/10/27 01:03:01 bostic Exp $
*/
#include "db_config.h"
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <string.h>
#endif
#include "db_int.h"
#include "dbinc/db_page.h"
#include "dbinc/db_shash.h"
#include "dbinc/log.h"
#include "dbinc/mp.h"
#include "dbinc/hash.h"
static int __db_pg_free_recover_int __P((DB_ENV *,
__db_pg_freedata_args *, DB *, DB_LSN *, DB_MPOOLFILE *, db_recops, int));
/*
* PUBLIC: int __db_addrem_recover
* PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
*
* This log message is generated whenever we add or remove a duplicate
* to/from a duplicate page. On recover, we just do the opposite.
*/
int
__db_addrem_recover(dbenv, dbtp, lsnp, op, info)
DB_ENV *dbenv;
DBT *dbtp;
DB_LSN *lsnp;
db_recops op;
void *info;
{
__db_addrem_args *argp;
DB *file_dbp;
DBC *dbc;
DB_MPOOLFILE *mpf;
PAGE *pagep;
u_int32_t change;
int cmp_n, cmp_p, ret;
pagep = NULL;
COMPQUIET(info, NULL);
REC_PRINT(__db_addrem_print);
REC_INTRO(__db_addrem_read, 1, 1);
REC_FGET(mpf, argp->pgno, &pagep, done);
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
CHECK_LSN(dbenv, op, cmp_p, &LSN(pagep), &argp->pagelsn);
change = 0;
if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_ADD_DUP) ||
(cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_REM_DUP)) {
/* Need to redo an add, or undo a delete. */
if ((ret = __db_pitem(dbc, pagep, argp->indx, argp->nbytes,
argp->hdr.size == 0 ? NULL : &argp->hdr,
argp->dbt.size == 0 ? NULL : &argp->dbt)) != 0)
goto out;
change = DB_MPOOL_DIRTY;
} else if ((cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_ADD_DUP) ||
(cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_REM_DUP)) {
/* Need to undo an add, or redo a delete. */
if ((ret = __db_ditem(dbc,
pagep, argp->indx, argp->nbytes)) != 0)
goto out;
change = DB_MPOOL_DIRTY;
}
if (change) {
if (DB_REDO(op))
LSN(pagep) = *lsnp;
else
LSN(pagep) = argp->pagelsn;
}
if ((ret = __memp_fput(mpf, pagep, change)) != 0)
goto out;
pagep = NULL;
done: *lsnp = argp->prev_lsn;
ret = 0;
out: if (pagep != NULL)
(void)__memp_fput(mpf, pagep, 0);
REC_CLOSE;
}
/*
* PUBLIC: int __db_big_recover
* PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
*/
int
__db_big_recover(dbenv, dbtp, lsnp, op, info)
DB_ENV *dbenv;
DBT *dbtp;
DB_LSN *lsnp;
db_recops op;
void *info;
{
__db_big_args *argp;
DB *file_dbp;
DBC *dbc;
DB_MPOOLFILE *mpf;
PAGE *pagep;
u_int32_t change;
int cmp_n, cmp_p, ret;
pagep = NULL;
COMPQUIET(info, NULL);
REC_PRINT(__db_big_print);
REC_INTRO(__db_big_read, 1, 0);
REC_FGET(mpf, argp->pgno, &pagep, ppage);
/*
* There are three pages we need to check. The one on which we are
* adding data, the previous one whose next_pointer may have
* been updated, and the next one whose prev_pointer may have
* been updated.
*/
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
CHECK_LSN(dbenv, op, cmp_p, &LSN(pagep), &argp->pagelsn);
change = 0;
if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_ADD_BIG) ||
(cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_REM_BIG)) {
/* We are either redo-ing an add, or undoing a delete. */
P_INIT(pagep, file_dbp->pgsize, argp->pgno, argp->prev_pgno,
argp->next_pgno, 0, P_OVERFLOW);
OV_LEN(pagep) = argp->dbt.size;
OV_REF(pagep) = 1;
memcpy((u_int8_t *)pagep + P_OVERHEAD(file_dbp), argp->dbt.data,
argp->dbt.size);
PREV_PGNO(pagep) = argp->prev_pgno;
change = DB_MPOOL_DIRTY;
} else if ((cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_ADD_BIG) ||
(cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_REM_BIG)) {
/*
* We are either undo-ing an add or redo-ing a delete.
* The page is about to be reclaimed in either case, so
* there really isn't anything to do here.
*/
change = DB_MPOOL_DIRTY;
}
if (change)
LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn;
if ((ret = __memp_fput(mpf, pagep, change)) != 0)
goto out;
pagep = NULL;
/*
* We only delete a whole chain of overflow.
* Each page is handled individually
*/
if (argp->opcode == DB_REM_BIG)
goto done;
/* Now check the previous page. */
ppage: if (argp->prev_pgno != PGNO_INVALID) {
change = 0;
REC_FGET(mpf, argp->prev_pgno, &pagep, npage);
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->prevlsn);
CHECK_LSN(dbenv, op, cmp_p, &LSN(pagep), &argp->prevlsn);
if (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_ADD_BIG) {
/* Redo add, undo delete. */
NEXT_PGNO(pagep) = argp->pgno;
change = DB_MPOOL_DIRTY;
} else if (cmp_n == 0 &&
DB_UNDO(op) && argp->opcode == DB_ADD_BIG) {
/* Redo delete, undo add. */
NEXT_PGNO(pagep) = argp->next_pgno;
change = DB_MPOOL_DIRTY;
}
if (change)
LSN(pagep) = DB_REDO(op) ? *lsnp : argp->prevlsn;
if ((ret = __memp_fput(mpf, pagep, change)) != 0)
goto out;
}
pagep = NULL;
/* Now check the next page. Can only be set on a delete. */
npage: if (argp->next_pgno != PGNO_INVALID) {
change = 0;
REC_FGET(mpf, argp->next_pgno, &pagep, done);
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->nextlsn);
CHECK_LSN(dbenv, op, cmp_p, &LSN(pagep), &argp->nextlsn);
if (cmp_p == 0 && DB_REDO(op)) {
PREV_PGNO(pagep) = PGNO_INVALID;
change = DB_MPOOL_DIRTY;
} else if (cmp_n == 0 && DB_UNDO(op)) {
PREV_PGNO(pagep) = argp->pgno;
change = DB_MPOOL_DIRTY;
}
if (change)
LSN(pagep) = DB_REDO(op) ? *lsnp : argp->nextlsn;
if ((ret = __memp_fput(mpf, pagep, change)) != 0)
goto out;
}
pagep = NULL;
done: *lsnp = argp->prev_lsn;
ret = 0;
out: if (pagep != NULL)
(void)__memp_fput(mpf, pagep, 0);
REC_CLOSE;
}
/*
* __db_ovref_recover --
* Recovery function for __db_ovref().
*
* PUBLIC: int __db_ovref_recover
* PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
*/
int
__db_ovref_recover(dbenv, dbtp, lsnp, op, info)
DB_ENV *dbenv;
DBT *dbtp;
DB_LSN *lsnp;
db_recops op;
void *info;
{
__db_ovref_args *argp;
DB *file_dbp;
DBC *dbc;
DB_MPOOLFILE *mpf;
PAGE *pagep;
int cmp, modified, ret;
pagep = NULL;
COMPQUIET(info, NULL);
REC_PRINT(__db_ovref_print);
REC_INTRO(__db_ovref_read, 1, 0);
REC_FGET(mpf, argp->pgno, &pagep, done);
modified = 0;
cmp = log_compare(&LSN(pagep), &argp->lsn);
CHECK_LSN(dbenv, op, cmp, &LSN(pagep), &argp->lsn);
if (cmp == 0 && DB_REDO(op)) {
/* Need to redo update described. */
OV_REF(pagep) += argp->adjust;
pagep->lsn = *lsnp;
modified = 1;
} else if (log_compare(lsnp, &LSN(pagep)) == 0 && DB_UNDO(op)) {
/* Need to undo update described. */
OV_REF(pagep) -= argp->adjust;
pagep->lsn = argp->lsn;
modified = 1;
}
if ((ret = __memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
goto out;
pagep = NULL;
done: *lsnp = argp->prev_lsn;
ret = 0;
out: if (pagep != NULL)
(void)__memp_fput(mpf, pagep, 0);
REC_CLOSE;
}
/*
* __db_debug_recover --
* Recovery function for debug.
*
* PUBLIC: int __db_debug_recover __P((DB_ENV *,
* PUBLIC: DBT *, DB_LSN *, db_recops, void *));
*/
int
__db_debug_recover(dbenv, dbtp, lsnp, op, info)
DB_ENV *dbenv;
DBT *dbtp;
DB_LSN *lsnp;
db_recops op;
void *info;
{
__db_debug_args *argp;
int ret;
COMPQUIET(dbenv, NULL);
COMPQUIET(op, DB_TXN_ABORT);
COMPQUIET(info, NULL);
REC_PRINT(__db_debug_print);
REC_NOOP_INTRO(__db_debug_read);
*lsnp = argp->prev_lsn;
ret = 0;
REC_NOOP_CLOSE;
}
/*
* __db_noop_recover --
* Recovery function for noop.
*
* PUBLIC: int __db_noop_recover __P((DB_ENV *,
* PUBLIC: DBT *, DB_LSN *, db_recops, void *));
*/
int
__db_noop_recover(dbenv, dbtp, lsnp, op, info)
DB_ENV *dbenv;
DBT *dbtp;
DB_LSN *lsnp;
db_recops op;
void *info;
{
__db_noop_args *argp;
DB *file_dbp;
DBC *dbc;
DB_MPOOLFILE *mpf;
PAGE *pagep;
u_int32_t change;
int cmp_n, cmp_p, ret;
pagep = NULL;
COMPQUIET(info, NULL);
REC_PRINT(__db_noop_print);
REC_INTRO(__db_noop_read, 0, 0);
REC_FGET(mpf, argp->pgno, &pagep, done);
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->prevlsn);
CHECK_LSN(dbenv, op, cmp_p, &LSN(pagep), &argp->prevlsn);
change = 0;
if (cmp_p == 0 && DB_REDO(op)) {
LSN(pagep) = *lsnp;
change = DB_MPOOL_DIRTY;
} else if (cmp_n == 0 && DB_UNDO(op)) {
LSN(pagep) = argp->prevlsn;
change = DB_MPOOL_DIRTY;
}
ret = __memp_fput(mpf, pagep, change);
pagep = NULL;
done: *lsnp = argp->prev_lsn;
out: if (pagep != NULL)
(void)__memp_fput(mpf, pagep, 0);
REC_CLOSE;
}
/*
* __db_pg_alloc_recover --
* Recovery function for pg_alloc.
*
* PUBLIC: int __db_pg_alloc_recover
* PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
*/
int
__db_pg_alloc_recover(dbenv, dbtp, lsnp, op, info)
DB_ENV *dbenv;
DBT *dbtp;
DB_LSN *lsnp;
db_recops op;
void *info;
{
__db_pg_alloc_args *argp;
DB *file_dbp;
DBC *dbc;
DBMETA *meta;
DB_MPOOLFILE *mpf;
PAGE *pagep;
db_pgno_t pgno;
int cmp_n, cmp_p, created, level, meta_modified, modified, ret;
meta = NULL;
pagep = NULL;
created = meta_modified = modified = 0;
REC_PRINT(__db_pg_alloc_print);
REC_INTRO(__db_pg_alloc_read, 0, 0);
/*
* Fix up the metadata page. If we're redoing the operation, we have
* to get the metadata page and update its LSN and its free pointer.
* If we're undoing the operation and the page was ever created, we put
* it on the freelist.
*/
pgno = PGNO_BASE_MD;
if ((ret = __memp_fget(mpf, &pgno, 0, &meta)) != 0) {
/* The metadata page must always exist on redo. */
if (DB_REDO(op)) {
ret = __db_pgerr(file_dbp, pgno, ret);
goto out;
} else
goto done;
}
cmp_n = log_compare(lsnp, &LSN(meta));
cmp_p = log_compare(&LSN(meta), &argp->meta_lsn);
CHECK_LSN(dbenv, op, cmp_p, &LSN(meta), &argp->meta_lsn);
if (cmp_p == 0 && DB_REDO(op)) {
/* Need to redo update described. */
LSN(meta) = *lsnp;
meta->free = argp->next;
meta_modified = 1;
if (argp->pgno > meta->last_pgno)
meta->last_pgno = argp->pgno;
} else if (cmp_n == 0 && DB_UNDO(op)) {
/* Need to undo update described. */
LSN(meta) = argp->meta_lsn;
/*
* If the page has a zero LSN then its newly created
* and will be truncated or go into limbo rather than
* directly on the free list.
*/
if (!IS_ZERO_LSN(argp->page_lsn))
meta->free = argp->pgno;
#ifdef HAVE_FTRUNCATE
/*
* With truncate we will restore the file to
* its original length. Without truncate
* the last_pgno never goes backward.
*/
meta->last_pgno = argp->last_pgno;
#endif
meta_modified = 1;
}
#ifdef HAVE_FTRUNCATE
/*
* Check to see if we are keeping a sorted
* freelist, if so put this back in the in
* memory list. It must be the first element.
*/
if (op == DB_TXN_ABORT && !IS_ZERO_LSN(argp->page_lsn)) {
db_pgno_t *list;
u_int32_t nelem;
if ((ret = __memp_get_freelist(mpf, &nelem, &list)) != 0)
goto out;
if (list != NULL) {
if ((ret =
__memp_extend_freelist(mpf, nelem + 1, &list)) != 0)
goto out;
if (nelem != 0)
memmove(list + 1, list, nelem * sizeof(list));
*list = argp->pgno;
}
}
#endif
/*
* Fix up the allocated page. If the page does not exist
* and we can truncate it then don't create it.
* Otherwise if we're redoing the operation, we have
* to get the page (creating it if it doesn't exist), and update its
* LSN. If we're undoing the operation, we have to reset the page's
* LSN and put it on the free list, or into limbo..
*/
if ((ret = __memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
/*
* We have to be able to identify if a page was newly
* created so we can recover it properly. We cannot simply
* look for an empty header, because hash uses a pgin
* function that will set the header. Instead, we explicitly
* try for the page without CREATE and if that fails, then
* create it.
*/
#ifdef HAVE_FTRUNCATE
if (DB_UNDO(op))
goto do_truncate;
#endif
if ((ret = __memp_fget(
mpf, &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) {
if (DB_UNDO(op) && ret == ENOSPC)
goto do_truncate;
ret = __db_pgerr(file_dbp, argp->pgno, ret);
goto out;
}
created = modified = 1;
}
/* Fix up the allocated page. */
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->page_lsn);
/*
* If an initial allocation is aborted and then reallocated during
* an archival restore the log record will have an LSN for the page
* but the page will be empty.
* If we we rolled back this allocation previously during an
* archive restore, the page may have INIT_LSN from the limbo list.
*/
if (IS_ZERO_LSN(LSN(pagep)) ||
(IS_ZERO_LSN(argp->page_lsn) && IS_INIT_LSN(LSN(pagep))))
cmp_p = 0;
CHECK_LSN(dbenv, op, cmp_p, &LSN(pagep), &argp->page_lsn);
/*
* Another special case we have to handle is if we ended up with a
* page of all 0's which can happen if we abort between allocating a
* page in mpool and initializing it. In that case, even if we're
* undoing, we need to re-initialize the page.
*/
if (DB_REDO(op) && cmp_p == 0) {
/* Need to redo update described. */
switch (argp->ptype) {
case P_LBTREE:
case P_LRECNO:
case P_LDUP:
level = LEAFLEVEL;
break;
default:
level = 0;
break;
}
P_INIT(pagep, file_dbp->pgsize,
argp->pgno, PGNO_INVALID, PGNO_INVALID, level, argp->ptype);
pagep->lsn = *lsnp;
modified = 1;
} else if (DB_UNDO(op) && (cmp_n == 0 || created)) {
/*
* This is where we handle the case of a 0'd page (pagep->pgno
* is equal to PGNO_INVALID).
* Undo the allocation, reinitialize the page and
* link its next pointer to the free list.
*/
P_INIT(pagep, file_dbp->pgsize,
argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID);
pagep->lsn = argp->page_lsn;
modified = 1;
}
do_truncate:
/*
* If the page was newly created, give it back, if
* possible. Otherwise put it into limbo.
*/
if ((pagep == NULL || IS_ZERO_LSN(LSN(pagep))) &&
IS_ZERO_LSN(argp->page_lsn) && DB_UNDO(op)) {
#ifdef HAVE_FTRUNCATE
COMPQUIET(info, NULL);
/* Discard the page. */
if (pagep != NULL) {
if ((ret =
__memp_fput(mpf, pagep, DB_MPOOL_DISCARD)) != 0)
goto out;
pagep = NULL;
/* Give the page back to the OS. */
if (meta->last_pgno <= argp->pgno &&
(ret = __memp_ftruncate(mpf, argp->pgno, 0)) != 0)
goto out;
}
#else
/* Put the page in limbo.*/
if ((ret = __db_add_limbo(dbenv,
info, argp->fileid, argp->pgno, 1)) != 0)
goto out;
/* The last_pgno grows if this was a new page. */
if (argp->pgno > meta->last_pgno) {
meta->last_pgno = argp->pgno;
meta_modified = 1;
}
#endif
}
if (pagep != NULL &&
(ret = __memp_fput(mpf,
pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
goto out;
pagep = NULL;
if ((ret = __memp_fput(mpf,
meta, meta_modified ? DB_MPOOL_DIRTY : 0)) != 0)
goto out;
meta = NULL;
done: *lsnp = argp->prev_lsn;
ret = 0;
out: if (pagep != NULL)
(void)__memp_fput(mpf, pagep, 0);
if (meta != NULL)
(void)__memp_fput(mpf, meta, 0);
if (ret == ENOENT && op == DB_TXN_BACKWARD_ALLOC)
ret = 0;
REC_CLOSE;
}
/*
* __db_pg_free_recover_int --
*/
static int
__db_pg_free_recover_int(dbenv, argp, file_dbp, lsnp, mpf, op, data)
DB_ENV *dbenv;
__db_pg_freedata_args *argp;
DB *file_dbp;
DB_LSN *lsnp;
DB_MPOOLFILE *mpf;
db_recops op;
int data;
{
DBMETA *meta;
DB_LSN copy_lsn;
PAGE *pagep, *prevp;
int cmp_n, cmp_p, is_meta, meta_modified, modified, ret;
meta = NULL;
pagep = NULL;
prevp = NULL;
meta_modified = modified = 0;
/*
* Get the "metapage". This will either be the metapage
* or the previous page in the free list if we are doing
* sorted allocations. If its a previous page then
* we will not be truncating.
*/
is_meta = argp->meta_pgno == PGNO_BASE_MD;
REC_FGET(mpf, argp->meta_pgno, &meta, check_meta);
if (argp->meta_pgno != PGNO_BASE_MD)
prevp = (PAGE *)meta;
cmp_n = log_compare(lsnp, &LSN(meta));
cmp_p = log_compare(&LSN(meta), &argp->meta_lsn);
CHECK_LSN(dbenv, op, cmp_p, &LSN(meta), &argp->meta_lsn);
/*
* Fix up the metadata page. If we're redoing or undoing the operation
* we get the page and update its LSN, last and free pointer.
*/
if (cmp_p == 0 && DB_REDO(op)) {
#ifdef HAVE_FTRUNCATE
/*
* If we are at the end of the file truncate, otherwise
* put on the free list.
*/
if (argp->pgno == argp->last_pgno)
meta->last_pgno = argp->pgno - 1;
else if (prevp == NULL)
meta->free = argp->pgno;
else
NEXT_PGNO(prevp) = argp->pgno;
#else
/* Need to redo the deallocation. */
if (prevp == NULL)
meta->free = argp->pgno;
else
NEXT_PGNO(prevp) = argp->pgno;
/*
* If this was a compensating transaction and
* we are a replica, then we never executed the
* original allocation which incremented meta->free.
*/
if (prevp == NULL && meta->last_pgno < meta->free)
meta->last_pgno = meta->free;
#endif
LSN(meta) = *lsnp;
meta_modified = 1;
} else if (cmp_n == 0 && DB_UNDO(op)) {
/* Need to undo the deallocation. */
if (prevp == NULL)
meta->free = argp->next;
else
NEXT_PGNO(prevp) = argp->next;
LSN(meta) = argp->meta_lsn;
if (prevp == NULL && meta->last_pgno < argp->pgno)
meta->last_pgno = argp->pgno;
meta_modified = 1;
}
check_meta:
if (ret != 0 && is_meta) {
/* The metadata page must always exist. */
ret = __db_pgerr(file_dbp, argp->meta_pgno, ret);
goto out;
}
/*
* Get the freed page. If we support truncate then don't
* create the page if we are going to free it. If we're
* redoing the operation we get the page and explicitly discard
* its contents, then update its LSN. If we're undoing the
* operation, we get the page and restore its header.
* If we don't support truncate, then we must create the page
* and roll it back.
*/
#ifdef HAVE_FTRUNCATE
if (DB_REDO(op) || (is_meta && meta->last_pgno < argp->pgno)) {
if ((ret = __memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
if (ret == DB_PAGE_NOTFOUND)
goto done;
goto out;
}
} else
#endif
if ((ret =
__memp_fget(mpf, &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0)
goto out;
(void)__ua_memcpy(&copy_lsn, &LSN(argp->header.data), sizeof(DB_LSN));
cmp_n = IS_ZERO_LSN(LSN(pagep)) ? 0 : log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &copy_lsn);
#ifdef HAVE_FTRUNCATE
/*
* This page got extended by a later allocation,
* but its allocation was not in the scope of this
* recovery pass.
*/
if (IS_ZERO_LSN(LSN(pagep)))
cmp_p = 0;
#endif
CHECK_LSN(dbenv, op, cmp_p, &LSN(pagep), &copy_lsn);
if (DB_REDO(op) &&
(cmp_p == 0 ||
(IS_ZERO_LSN(copy_lsn) &&
log_compare(&LSN(pagep), &argp->meta_lsn) <= 0))) {
/* Need to redo the deallocation. */
#ifdef HAVE_FTRUNCATE
/*
* The page can be truncated if it was truncated at runtime
* and the current metapage reflects the truncation.
*/
if (is_meta && meta->last_pgno <= argp->pgno &&
argp->last_pgno <= argp->pgno) {
if ((ret =
__memp_fput(mpf, pagep, DB_MPOOL_DISCARD)) != 0)
goto out;
pagep = NULL;
if ((ret = __memp_ftruncate(mpf, argp->pgno, 0)) != 0)
goto out;
} else if (argp->last_pgno == argp->pgno) {
/* The page was truncated at runtime, zero it out. */
P_INIT(pagep, 0, PGNO_INVALID,
PGNO_INVALID, PGNO_INVALID, 0, P_INVALID);
ZERO_LSN(pagep->lsn);
modified = 1;
} else
#endif
{
P_INIT(pagep, file_dbp->pgsize,
argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID);
pagep->lsn = *lsnp;
modified = 1;
}
} else if (cmp_n == 0 && DB_UNDO(op)) {
/* Need to reallocate the page. */
memcpy(pagep, argp->header.data, argp->header.size);
if (data)
memcpy((u_int8_t*)pagep + HOFFSET(pagep),
argp->data.data, argp->data.size);
modified = 1;
}
if (pagep != NULL &&
(ret = __memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
goto out;
pagep = NULL;
#ifdef HAVE_FTRUNCATE
/*
* If we are keeping an in memory free list remove this
* element from the list.
*/
if (op == DB_TXN_ABORT && argp->pgno != argp->last_pgno) {
db_pgno_t *lp;
u_int32_t nelem, pos;
if ((ret = __memp_get_freelist(mpf, &nelem, &lp)) != 0)
goto out;
if (lp != NULL) {
pos = 0;
if (!is_meta && nelem != 0) {
__db_freelist_pos(argp->pgno, lp, nelem, &pos);
DB_ASSERT(argp->pgno == lp[pos]);
DB_ASSERT(argp->meta_pgno == lp[pos - 1]);
}
if (nelem != 0 && pos != nelem)
memmove(&lp[pos], &lp[pos + 1],
(nelem - pos) * sizeof(*lp));
/* Shrink the list */
if ((ret =
__memp_extend_freelist(mpf, nelem - 1, &lp)) != 0)
goto out;
}
}
done:
#endif
if (meta != NULL && (ret = __memp_fput(mpf,
meta, meta_modified ? DB_MPOOL_DIRTY : 0)) != 0)
goto out;
meta = NULL;
ret = 0;
out: if (pagep != NULL)
(void)__memp_fput(mpf, pagep, 0);
if (meta != NULL)
(void)__memp_fput(mpf, meta, 0);
return (ret);
}
/*
* __db_pg_free_recover --
* Recovery function for pg_free.
*
* PUBLIC: int __db_pg_free_recover
* PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
*/
int
__db_pg_free_recover(dbenv, dbtp, lsnp, op, info)
DB_ENV *dbenv;
DBT *dbtp;
DB_LSN *lsnp;
db_recops op;
void *info;
{
DB *file_dbp;
DBC *dbc;
DB_MPOOLFILE *mpf;
__db_pg_free_args *argp;
int ret;
COMPQUIET(info, NULL);
REC_PRINT(__db_pg_free_print);
REC_INTRO(__db_pg_free_read, 1, 0);
ret = __db_pg_free_recover_int(dbenv,
(__db_pg_freedata_args *)argp, file_dbp, lsnp, mpf, op, 0);
done: *lsnp = argp->prev_lsn;
out:
REC_CLOSE;
}
/*
* __db_pg_new_recover --
* A new page from the file was put on the free list.
* This record is only generated during a LIMBO_COMPENSATE.
*
* PUBLIC: int __db_pg_new_recover
* PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
*/
int
__db_pg_new_recover(dbenv, dbtp, lsnp, op, info)
DB_ENV *dbenv;
DBT *dbtp;
DB_LSN *lsnp;
db_recops op;
void *info;
{
#ifndef HAVE_FTRUNCATE
DB *file_dbp;
DBC *dbc;
DB_MPOOLFILE *mpf;
__db_pg_free_args *argp;
int ret;
REC_PRINT(__db_pg_free_print);
REC_INTRO(__db_pg_free_read, 1, 0);
COMPQUIET(op, DB_TXN_ABORT);
if ((ret =
__db_add_limbo(dbenv, info, argp->fileid, argp->pgno, 1)) == 0)
*lsnp = argp->prev_lsn;
done:
out:
REC_CLOSE;
#else
COMPQUIET(dbenv, NULL);
COMPQUIET(dbtp, NULL);
COMPQUIET(lsnp, NULL);
COMPQUIET(op, DB_TXN_PRINT);
COMPQUIET(info, NULL);
return (0);
#endif
}
/*
* __db_pg_freedata_recover --
* Recovery function for pg_freedata.
*
* PUBLIC: int __db_pg_freedata_recover
* PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
*/
int
__db_pg_freedata_recover(dbenv, dbtp, lsnp, op, info)
DB_ENV *dbenv;
DBT *dbtp;
DB_LSN *lsnp;
db_recops op;
void *info;
{
DB *file_dbp;
DBC *dbc;
DB_MPOOLFILE *mpf;
__db_pg_freedata_args *argp;
int ret;
COMPQUIET(info, NULL);
REC_PRINT(__db_pg_freedata_print);
REC_INTRO(__db_pg_freedata_read, 1, 0);
ret = __db_pg_free_recover_int(dbenv, argp, file_dbp, lsnp, mpf, op, 1);
done: *lsnp = argp->prev_lsn;
out:
REC_CLOSE;
}
/*
* __db_cksum_recover --
* Recovery function for checksum failure log record.
*
* PUBLIC: int __db_cksum_recover __P((DB_ENV *,
* PUBLIC: DBT *, DB_LSN *, db_recops, void *));
*/
int
__db_cksum_recover(dbenv, dbtp, lsnp, op, info)
DB_ENV *dbenv;
DBT *dbtp;
DB_LSN *lsnp;
db_recops op;
void *info;
{
__db_cksum_args *argp;
int ret;
COMPQUIET(info, NULL);
COMPQUIET(lsnp, NULL);
COMPQUIET(op, DB_TXN_ABORT);
REC_PRINT(__db_cksum_print);
if ((ret = __db_cksum_read(dbenv, dbtp->data, &argp)) != 0)
return (ret);
/*
* We had a checksum failure -- the only option is to run catastrophic
* recovery.
*/
if (F_ISSET(dbenv, DB_ENV_FATAL))
ret = 0;
else {
__db_err(dbenv,
"Checksum failure requires catastrophic recovery");
ret = __db_panic(dbenv, DB_RUNRECOVERY);
}
__os_free(dbenv, argp);
return (ret);
}
/*
* __db_pg_prepare_recover --
* Recovery function for pg_prepare.
*
* PUBLIC: int __db_pg_prepare_recover
* PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
*/
int
__db_pg_prepare_recover(dbenv, dbtp, lsnp, op, info)
DB_ENV *dbenv;
DBT *dbtp;
DB_LSN *lsnp;
db_recops op;
void *info;
{
#ifndef HAVE_FTRUNCATE
__db_pg_prepare_args *argp;
DB *file_dbp;
DBC *dbc;
DB_MPOOLFILE *mpf;
PAGE *pagep;
int ret, t_ret;
REC_PRINT(__db_pg_prepare_print);
REC_INTRO(__db_pg_prepare_read, 1, 0);
mpf = file_dbp->mpf;
/*
* If this made it into the limbo list at prepare time then
* it was a new free page allocated by an aborted subtransaction.
* Only that subtransaction could have toched the page.
* All other pages in the free list at this point are
* either of the same nature or were put there by this subtransactions
* other subtransactions that followed this one. If
* they were put there by this subtransaction the log records
* of the following allocations will reflect that.
* Note that only one transaction could have had the
* metapage locked at the point of the crash.
* All this is to say that we can P_INIT this page without
* loosing other pages on the free list because they
* will be linked in by records earlier in the log for
* this transaction which we will roll back.
*/
if (op == DB_TXN_ABORT) {
if ((ret = __memp_fget(
mpf, &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0)
goto out;
P_INIT(pagep, file_dbp->pgsize,
argp->pgno, PGNO_INVALID, PGNO_INVALID, 0, P_INVALID);
ZERO_LSN(pagep->lsn);
ret = __db_add_limbo(dbenv, info, argp->fileid, argp->pgno, 1);
if ((t_ret =
__memp_fput(mpf, pagep, DB_MPOOL_DIRTY)) != 0 && ret == 0)
ret = t_ret;
}
done: if (ret == 0)
*lsnp = argp->prev_lsn;
out: REC_CLOSE;
#else
COMPQUIET(dbenv, NULL);
COMPQUIET(dbtp, NULL);
COMPQUIET(lsnp, NULL);
COMPQUIET(op, DB_TXN_PRINT);
COMPQUIET(info, NULL);
return (0);
#endif
}
/*
* __db_pg_init_recover --
* Recovery function to reinit pages for truncate.
*
* PUBLIC: int __db_pg_init_recover
* PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
*/
int
__db_pg_init_recover(dbenv, dbtp, lsnp, op, info)
DB_ENV *dbenv;
DBT *dbtp;
DB_LSN *lsnp;
db_recops op;
void *info;
{
__db_pg_init_args *argp;
DB *file_dbp;
DBC *dbc;
DB_LSN copy_lsn;
DB_MPOOLFILE *mpf;
PAGE *pagep;
int cmp_n, cmp_p, modified, ret, type;
COMPQUIET(info, NULL);
REC_PRINT(__db_pg_init_print);
REC_INTRO(__db_pg_init_read, 1, 0);
mpf = file_dbp->mpf;
REC_FGET(mpf, argp->pgno, &pagep, done);
modified = 0;
(void)__ua_memcpy(&copy_lsn, &LSN(argp->header.data), sizeof(DB_LSN));
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &copy_lsn);
CHECK_LSN(dbenv, op, cmp_p, &LSN(pagep), &copy_lsn);
if (cmp_p == 0 && DB_REDO(op)) {
if (TYPE(pagep) == P_HASH)
type = P_HASH;
else
type = file_dbp->type == DB_RECNO ? P_LRECNO : P_LBTREE;
P_INIT(pagep, file_dbp->pgsize, PGNO(pagep), PGNO_INVALID,
PGNO_INVALID, TYPE(pagep) == P_HASH ? 0 : 1, type);
pagep->lsn = *lsnp;
modified = 1;
} else if (cmp_n == 0 && DB_UNDO(op)) {
/* Put the data back on the page. */
memcpy(pagep, argp->header.data, argp->header.size);
if (argp->data.size > 0)
memcpy((u_int8_t*)pagep + HOFFSET(pagep),
argp->data.data, argp->data.size);
modified = 1;
}
if ((ret = __memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
goto out;
done: *lsnp = argp->prev_lsn;
out:
REC_CLOSE;
}
/*
* __db_pg_sort_recover --
* Recovery function for pg_sort.
*
* PUBLIC: int __db_pg_sort_recover
* PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
*/
int
__db_pg_sort_recover(dbenv, dbtp, lsnp, op, info)
DB_ENV *dbenv;
DBT *dbtp;
DB_LSN *lsnp;
db_recops op;
void *info;
{
#ifdef HAVE_FTRUNCATE
__db_pg_sort_args *argp;
DB *file_dbp;
DBC *dbc;
DBMETA *meta;
DB_MPOOLFILE *mpf;
PAGE *pagep;
db_pgno_t pgno, *list;
u_int32_t felem, nelem;
struct pglist *pglist, *lp;
int modified, ret;
COMPQUIET(info, NULL);
REC_PRINT(__db_pg_sort_print);
REC_INTRO(__db_pg_sort_read, 1, 1);
modified = 0;
pglist = (struct pglist *) argp->list.data;
nelem = argp->list.size / sizeof(struct pglist);
if (DB_REDO(op)) {
pgno = argp->last_pgno;
if ((ret = __db_pg_truncate(mpf,
pglist, NULL, &nelem, &pgno, lsnp, 1)) != 0)
goto out;
if (argp->last_free != PGNO_INVALID) {
if ((ret = __memp_fget(mpf,
&argp->last_free, 0, &meta)) == 0) {
if (log_compare(&LSN(meta),
&argp->last_lsn) == 0) {
NEXT_PGNO(meta) = PGNO_INVALID;
LSN(meta) = *lsnp;
modified = 1;
}
if ((ret = __memp_fput(mpf,
meta, modified ? DB_MPOOL_DIRTY : 0)) != 0)
goto out;
meta = NULL;
modified = 0;
} else if (ret != DB_PAGE_NOTFOUND)
goto out;
}
if ((ret = __memp_fget(mpf, &argp->meta, 0, &meta)) != 0)
goto out;
if (log_compare(&LSN(meta), &argp->meta_lsn) == 0) {
if (argp->last_free == PGNO_INVALID) {
if (nelem == 0)
meta->free = PGNO_INVALID;
else
meta->free = pglist->pgno;
}
meta->last_pgno = pgno;
LSN(meta) = *lsnp;
modified = 1;
}
} else {
/* Put the free list back in its original order. */
for (lp = pglist; lp < &pglist[nelem]; lp++) {
if ((ret = __memp_fget(mpf,
&lp->pgno, DB_MPOOL_CREATE, &pagep)) != 0)
goto out;
if (IS_ZERO_LSN(LSN(pagep)) ||
log_compare(&LSN(pagep), lsnp) == 0) {
if (lp == &pglist[nelem - 1])
pgno = PGNO_INVALID;
else
pgno = lp[1].pgno;
P_INIT(pagep, file_dbp->pgsize,
lp->pgno, PGNO_INVALID, pgno, 0, P_INVALID);
LSN(pagep) = lp->lsn;
modified = 1;
}
if ((ret = __memp_fput(mpf,
pagep, modified ? DB_MPOOL_DIRTY: 0)) != 0)
goto out;
}
if (argp->last_free != PGNO_INVALID) {
if ((ret = __memp_fget(mpf,
&argp->last_free, 0, &meta)) == 0) {
if (log_compare(&LSN(meta), lsnp) == 0) {
NEXT_PGNO(meta) = pglist->pgno;
LSN(meta) = argp->last_lsn;
modified = 1;
}
if ((ret = __memp_fput(mpf,
meta, modified ? DB_MPOOL_DIRTY : 0)) != 0)
goto out;
} else if (ret != DB_PAGE_NOTFOUND)
goto out;
modified = 0;
meta = NULL;
}
if ((ret = __memp_fget(mpf, &argp->meta, 0, &meta)) != 0)
goto out;
if (log_compare(&LSN(meta), lsnp) == 0) {
meta->last_pgno = argp->last_pgno;
if (argp->last_pgno == PGNO_INVALID)
meta->free = pglist->pgno;
LSN(meta) = argp->meta_lsn;
modified = 1;
}
}
if (op == DB_TXN_ABORT) {
if ((ret = __memp_get_freelist(mpf, &felem, &list)) != 0)
goto out;
if (list != NULL) {
DB_ASSERT(felem == 0 ||
argp->last_free == list[felem - 1]);
if ((ret = __memp_extend_freelist(
mpf, felem + nelem, &list)) != 0)
goto out;
for (lp = pglist; lp < &pglist[nelem]; lp++)
list[felem++] = lp->pgno;
}
}
if ((ret = __memp_fput(mpf, meta, modified ? DB_MPOOL_DIRTY : 0)) != 0)
goto out;
done: *lsnp = argp->prev_lsn;
ret = 0;
out: REC_CLOSE;
#else
/*
* If HAVE_FTRUNCATE is not defined, we'll never see pg_sort records
* to recover.
*/
COMPQUIET(dbenv, NULL);
COMPQUIET(dbtp, NULL);
COMPQUIET(lsnp, NULL);
COMPQUIET(op, DB_TXN_ABORT);
COMPQUIET(info, NULL);
return (EINVAL);
#endif
}