mariadb/storage/bdb/hash/hash_rec.c
2005-12-05 10:27:46 -08:00

1206 lines
32 KiB
C

/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996-2005
* Sleepycat Software. All rights reserved.
*/
/*
* Copyright (c) 1995, 1996
* Margo Seltzer. All rights reserved.
*/
/*
* Copyright (c) 1995, 1996
* The President and Fellows of Harvard University. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Margo Seltzer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $Id: hash_rec.c,v 12.7 2005/09/28 17:44:52 margo Exp $
*/
#include "db_config.h"
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <string.h>
#endif
#include "db_int.h"
#include "dbinc/db_page.h"
#include "dbinc/db_shash.h"
#include "dbinc/btree.h"
#include "dbinc/hash.h"
#include "dbinc/log.h"
#include "dbinc/mp.h"
static int __ham_alloc_pages __P((DB *, __ham_groupalloc_args *, DB_LSN *));
/*
* __ham_insdel_recover --
*
* PUBLIC: int __ham_insdel_recover
* PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
*/
int
__ham_insdel_recover(dbenv, dbtp, lsnp, op, info)
DB_ENV *dbenv;
DBT *dbtp;
DB_LSN *lsnp;
db_recops op;
void *info;
{
__ham_insdel_args *argp;
DB *file_dbp;
DBC *dbc;
DB_MPOOLFILE *mpf;
PAGE *pagep;
u_int32_t flags, opcode;
int cmp_n, cmp_p, ret, type;
pagep = NULL;
COMPQUIET(info, NULL);
REC_PRINT(__ham_insdel_print);
REC_INTRO(__ham_insdel_read, 1, 0);
if ((ret = __memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
if (DB_UNDO(op)) {
if (ret == DB_PAGE_NOTFOUND)
goto done;
else {
ret = __db_pgerr(file_dbp, argp->pgno, ret);
goto out;
}
}
#ifdef HAVE_FTRUNCATE
/* If the page is not here then it was later truncated. */
if (!IS_ZERO_LSN(argp->pagelsn))
goto done;
#endif
/*
* This page was created by a group allocation and
* the file may not have been extend yet.
* Create the page if necessary.
*/
if ((ret = __memp_fget(mpf,
&argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) {
ret = __db_pgerr(file_dbp, argp->pgno, ret);
goto out;
}
}
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
CHECK_LSN(dbenv, op, cmp_p, &LSN(pagep), &argp->pagelsn);
/*
* Two possible things going on:
* redo a delete/undo a put: delete the item from the page.
* redo a put/undo a delete: add the item to the page.
* If we are undoing a delete, then the information logged is the
* entire entry off the page, not just the data of a dbt. In
* this case, we want to copy it back onto the page verbatim.
* We do this by calling __putitem with the type H_OFFPAGE instead
* of H_KEYDATA.
*/
opcode = OPCODE_OF(argp->opcode);
flags = 0;
if ((opcode == DELPAIR && cmp_n == 0 && DB_UNDO(op)) ||
(opcode == PUTPAIR && cmp_p == 0 && DB_REDO(op))) {
/*
* Need to redo a PUT or undo a delete. If we are undoing a
* delete, we've got to restore the item back to its original
* position. That's a royal pain in the butt (because we do
* not store item lengths on the page), but there's no choice.
*/
if (opcode != DELPAIR ||
argp->ndx == (u_int32_t)NUM_ENT(pagep)) {
__ham_putitem(file_dbp, pagep, &argp->key,
DB_UNDO(op) || PAIR_ISKEYBIG(argp->opcode) ?
H_OFFPAGE : H_KEYDATA);
if (PAIR_ISDATADUP(argp->opcode))
type = H_DUPLICATE;
else if (DB_UNDO(op) || PAIR_ISDATABIG(argp->opcode))
type = H_OFFPAGE;
else
type = H_KEYDATA;
__ham_putitem(file_dbp, pagep, &argp->data, type);
} else
__ham_reputpair(file_dbp, pagep,
argp->ndx, &argp->key, &argp->data);
LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn;
flags = DB_MPOOL_DIRTY;
} else if ((opcode == DELPAIR && cmp_p == 0 && DB_REDO(op)) ||
(opcode == PUTPAIR && cmp_n == 0 && DB_UNDO(op))) {
/* Need to undo a put or redo a delete. */
__ham_dpair(file_dbp, pagep, argp->ndx);
LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn;
flags = DB_MPOOL_DIRTY;
}
if ((ret = __memp_fput(mpf, pagep, flags)) != 0)
goto out;
pagep = NULL;
/* Return the previous LSN. */
done: *lsnp = argp->prev_lsn;
ret = 0;
out: if (pagep != NULL)
(void)__memp_fput(mpf, pagep, 0);
REC_CLOSE;
}
/*
* __ham_newpage_recover --
* This log message is used when we add/remove overflow pages. This
* message takes care of the pointer chains, not the data on the pages.
*
* PUBLIC: int __ham_newpage_recover
* PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
*/
int
__ham_newpage_recover(dbenv, dbtp, lsnp, op, info)
DB_ENV *dbenv;
DBT *dbtp;
DB_LSN *lsnp;
db_recops op;
void *info;
{
__ham_newpage_args *argp;
DB *file_dbp;
DBC *dbc;
DB_MPOOLFILE *mpf;
PAGE *pagep;
u_int32_t flags;
int cmp_n, cmp_p, ret;
pagep = NULL;
COMPQUIET(info, NULL);
REC_PRINT(__ham_newpage_print);
REC_INTRO(__ham_newpage_read, 1, 0);
REC_FGET(mpf, argp->new_pgno, &pagep, ppage);
/*
* There are potentially three pages we need to check: the one
* that we created/deleted, the one before it and the one after
* it.
*/
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
CHECK_LSN(dbenv, op, cmp_p, &LSN(pagep), &argp->pagelsn);
flags = 0;
if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == PUTOVFL) ||
(cmp_n == 0 && DB_UNDO(op) && argp->opcode == DELOVFL)) {
/* Redo a create new page or undo a delete new page. */
P_INIT(pagep, file_dbp->pgsize, argp->new_pgno,
argp->prev_pgno, argp->next_pgno, 0, P_HASH);
flags = DB_MPOOL_DIRTY;
} else if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DELOVFL) ||
(cmp_n == 0 && DB_UNDO(op) && argp->opcode == PUTOVFL)) {
/*
* Redo a delete or undo a create new page. All we
* really need to do is change the LSN.
*/
flags = DB_MPOOL_DIRTY;
}
if (flags)
LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn;
if ((ret = __memp_fput(mpf, pagep, flags)) != 0)
goto out;
pagep = NULL;
/* Now do the prev page. */
ppage: if (argp->prev_pgno != PGNO_INVALID) {
REC_FGET(mpf, argp->prev_pgno, &pagep, npage);
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->prevlsn);
CHECK_LSN(dbenv, op, cmp_p, &LSN(pagep), &argp->prevlsn);
flags = 0;
if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == PUTOVFL) ||
(cmp_n == 0 && DB_UNDO(op) && argp->opcode == DELOVFL)) {
/* Redo a create new page or undo a delete new page. */
pagep->next_pgno = argp->new_pgno;
flags = DB_MPOOL_DIRTY;
} else if ((cmp_p == 0 &&
DB_REDO(op) && argp->opcode == DELOVFL) ||
(cmp_n == 0 && DB_UNDO(op) && argp->opcode == PUTOVFL)) {
/* Redo a delete or undo a create new page. */
pagep->next_pgno = argp->next_pgno;
flags = DB_MPOOL_DIRTY;
}
if (flags)
LSN(pagep) = DB_REDO(op) ? *lsnp : argp->prevlsn;
if ((ret = __memp_fput(mpf, pagep, flags)) != 0)
goto out;
pagep = NULL;
}
/* Now time to do the next page */
npage: if (argp->next_pgno != PGNO_INVALID) {
REC_FGET(mpf, argp->next_pgno, &pagep, done);
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->nextlsn);
CHECK_LSN(dbenv, op, cmp_p, &LSN(pagep), &argp->nextlsn);
flags = 0;
if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == PUTOVFL) ||
(cmp_n == 0 && DB_UNDO(op) && argp->opcode == DELOVFL)) {
/* Redo a create new page or undo a delete new page. */
pagep->prev_pgno = argp->new_pgno;
flags = DB_MPOOL_DIRTY;
} else if ((cmp_p == 0 &&
DB_REDO(op) && argp->opcode == DELOVFL) ||
(cmp_n == 0 && DB_UNDO(op) && argp->opcode == PUTOVFL)) {
/* Redo a delete or undo a create new page. */
pagep->prev_pgno = argp->prev_pgno;
flags = DB_MPOOL_DIRTY;
}
if (flags)
LSN(pagep) = DB_REDO(op) ? *lsnp : argp->nextlsn;
if ((ret = __memp_fput(mpf, pagep, flags)) != 0)
goto out;
pagep = NULL;
}
done: *lsnp = argp->prev_lsn;
ret = 0;
out: if (pagep != NULL)
(void)__memp_fput(mpf, pagep, 0);
REC_CLOSE;
}
/*
* __ham_replace_recover --
* This log message refers to partial puts that are local to a single
* page. You can think of them as special cases of the more general
* insdel log message.
*
* PUBLIC: int __ham_replace_recover
* PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
*/
int
__ham_replace_recover(dbenv, dbtp, lsnp, op, info)
DB_ENV *dbenv;
DBT *dbtp;
DB_LSN *lsnp;
db_recops op;
void *info;
{
__ham_replace_args *argp;
DB *file_dbp;
DBC *dbc;
DB_MPOOLFILE *mpf;
DBT dbt;
PAGE *pagep;
u_int32_t flags;
u_int32_t change;
int cmp_n, cmp_p, is_plus, ret;
u_int8_t *hk;
pagep = NULL;
COMPQUIET(info, NULL);
REC_PRINT(__ham_replace_print);
REC_INTRO(__ham_replace_read, 1, 0);
REC_FGET(mpf, argp->pgno, &pagep, done);
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
CHECK_LSN(dbenv, op, cmp_p, &LSN(pagep), &argp->pagelsn);
memset(&dbt, 0, sizeof(dbt));
flags = 0;
/*
* Before we know the direction of the transformation we will
* determine the size differential; then once we know if we are
* redoing or undoing, we'll adjust the sign (is_plus) appropriately.
*/
if (argp->newitem.size > argp->olditem.size) {
change = argp->newitem.size - argp->olditem.size;
is_plus = 1;
} else {
change = argp->olditem.size - argp->newitem.size;
is_plus = 0;
}
if (cmp_p == 0 && DB_REDO(op)) {
/* Reapply the change as specified. */
dbt.data = argp->newitem.data;
dbt.size = argp->newitem.size;
LSN(pagep) = *lsnp;
/*
* The is_plus flag is set properly to reflect
* newitem.size - olditem.size.
*/
flags = DB_MPOOL_DIRTY;
} else if (cmp_n == 0 && DB_UNDO(op)) {
/* Undo the already applied change. */
dbt.data = argp->olditem.data;
dbt.size = argp->olditem.size;
/*
* Invert is_plus to reflect sign of
* olditem.size - newitem.size.
*/
is_plus = !is_plus;
LSN(pagep) = argp->pagelsn;
flags = DB_MPOOL_DIRTY;
}
if (flags) {
__ham_onpage_replace(file_dbp, pagep,
argp->ndx, argp->off, change, is_plus, &dbt);
if (argp->makedup) {
hk = P_ENTRY(file_dbp, pagep, argp->ndx);
if (DB_REDO(op))
HPAGE_PTYPE(hk) = H_DUPLICATE;
else
HPAGE_PTYPE(hk) = H_KEYDATA;
}
}
if ((ret = __memp_fput(mpf, pagep, flags)) != 0)
goto out;
pagep = NULL;
done: *lsnp = argp->prev_lsn;
ret = 0;
out: if (pagep != NULL)
(void)__memp_fput(mpf, pagep, 0);
REC_CLOSE;
}
/*
* __ham_splitdata_recover --
*
* PUBLIC: int __ham_splitdata_recover
* PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
*/
int
__ham_splitdata_recover(dbenv, dbtp, lsnp, op, info)
DB_ENV *dbenv;
DBT *dbtp;
DB_LSN *lsnp;
db_recops op;
void *info;
{
__ham_splitdata_args *argp;
DB *file_dbp;
DBC *dbc;
DB_MPOOLFILE *mpf;
PAGE *pagep;
u_int32_t flags;
int cmp_n, cmp_p, ret;
pagep = NULL;
COMPQUIET(info, NULL);
REC_PRINT(__ham_splitdata_print);
REC_INTRO(__ham_splitdata_read, 1, 0);
if ((ret = __memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
if (DB_UNDO(op)) {
if (ret == DB_PAGE_NOTFOUND)
goto done;
else {
ret = __db_pgerr(file_dbp, argp->pgno, ret);
goto out;
}
}
#ifdef HAVE_FTRUNCATE
/* If the page is not here then it was later truncated. */
if (!IS_ZERO_LSN(argp->pagelsn))
goto done;
#endif
/*
* This page was created by a group allocation and
* the file may not have been extend yet.
* Create the page if necessary.
*/
if ((ret = __memp_fget(mpf,
&argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) {
ret = __db_pgerr(file_dbp, argp->pgno, ret);
goto out;
}
}
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
CHECK_LSN(dbenv, op, cmp_p, &LSN(pagep), &argp->pagelsn);
/*
* There are two types of log messages here, one for the old page
* and one for the new pages created. The original image in the
* SPLITOLD record is used for undo. The image in the SPLITNEW
* is used for redo. We should never have a case where there is
* a redo operation and the SPLITOLD record is on disk, but not
* the SPLITNEW record. Therefore, we only have work to do when
* redo NEW messages and undo OLD messages, but we have to update
* LSNs in both cases.
*/
flags = 0;
if (cmp_p == 0 && DB_REDO(op)) {
if (argp->opcode == SPLITNEW)
/* Need to redo the split described. */
memcpy(pagep, argp->pageimage.data,
argp->pageimage.size);
LSN(pagep) = *lsnp;
flags = DB_MPOOL_DIRTY;
} else if (cmp_n == 0 && DB_UNDO(op)) {
if (argp->opcode == SPLITOLD) {
/* Put back the old image. */
memcpy(pagep, argp->pageimage.data,
argp->pageimage.size);
} else
P_INIT(pagep, file_dbp->pgsize, argp->pgno,
PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
LSN(pagep) = argp->pagelsn;
flags = DB_MPOOL_DIRTY;
}
if ((ret = __memp_fput(mpf, pagep, flags)) != 0)
goto out;
pagep = NULL;
done: *lsnp = argp->prev_lsn;
ret = 0;
out: if (pagep != NULL)
(void)__memp_fput(mpf, pagep, 0);
REC_CLOSE;
}
/*
* __ham_copypage_recover --
* Recovery function for copypage.
*
* PUBLIC: int __ham_copypage_recover
* PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
*/
int
__ham_copypage_recover(dbenv, dbtp, lsnp, op, info)
DB_ENV *dbenv;
DBT *dbtp;
DB_LSN *lsnp;
db_recops op;
void *info;
{
__ham_copypage_args *argp;
DB *file_dbp;
DBC *dbc;
DB_MPOOLFILE *mpf;
PAGE *pagep;
u_int32_t flags;
int cmp_n, cmp_p, ret;
pagep = NULL;
COMPQUIET(info, NULL);
REC_PRINT(__ham_copypage_print);
REC_INTRO(__ham_copypage_read, 1, 0);
flags = 0;
/* This is the bucket page. */
REC_FGET(mpf, argp->pgno, &pagep, donext);
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
CHECK_LSN(dbenv, op, cmp_p, &LSN(pagep), &argp->pagelsn);
if (cmp_p == 0 && DB_REDO(op)) {
/* Need to redo update described. */
memcpy(pagep, argp->page.data, argp->page.size);
PGNO(pagep) = argp->pgno;
PREV_PGNO(pagep) = PGNO_INVALID;
LSN(pagep) = *lsnp;
flags = DB_MPOOL_DIRTY;
} else if (cmp_n == 0 && DB_UNDO(op)) {
/* Need to undo update described. */
P_INIT(pagep, file_dbp->pgsize, argp->pgno, PGNO_INVALID,
argp->next_pgno, 0, P_HASH);
LSN(pagep) = argp->pagelsn;
flags = DB_MPOOL_DIRTY;
}
if ((ret = __memp_fput(mpf, pagep, flags)) != 0)
goto out;
pagep = NULL;
donext: /* Now fix up the "next" page. */
REC_FGET(mpf, argp->next_pgno, &pagep, do_nn);
/* For REDO just update the LSN. For UNDO copy page back. */
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->nextlsn);
CHECK_LSN(dbenv, op, cmp_p, &LSN(pagep), &argp->nextlsn);
flags = 0;
if (cmp_p == 0 && DB_REDO(op)) {
LSN(pagep) = *lsnp;
flags = DB_MPOOL_DIRTY;
} else if (cmp_n == 0 && DB_UNDO(op)) {
/* Need to undo update described. */
memcpy(pagep, argp->page.data, argp->page.size);
flags = DB_MPOOL_DIRTY;
}
if ((ret = __memp_fput(mpf, pagep, flags)) != 0)
goto out;
pagep = NULL;
/* Now fix up the next's next page. */
do_nn: if (argp->nnext_pgno == PGNO_INVALID)
goto done;
REC_FGET(mpf, argp->nnext_pgno, &pagep, done);
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->nnextlsn);
CHECK_LSN(dbenv, op, cmp_p, &LSN(pagep), &argp->nnextlsn);
flags = 0;
if (cmp_p == 0 && DB_REDO(op)) {
/* Need to redo update described. */
PREV_PGNO(pagep) = argp->pgno;
LSN(pagep) = *lsnp;
flags = DB_MPOOL_DIRTY;
} else if (cmp_n == 0 && DB_UNDO(op)) {
/* Need to undo update described. */
PREV_PGNO(pagep) = argp->next_pgno;
LSN(pagep) = argp->nnextlsn;
flags = DB_MPOOL_DIRTY;
}
if ((ret = __memp_fput(mpf, pagep, flags)) != 0)
goto out;
pagep = NULL;
done: *lsnp = argp->prev_lsn;
ret = 0;
out: if (pagep != NULL)
(void)__memp_fput(mpf, pagep, 0);
REC_CLOSE;
}
/*
* __ham_metagroup_recover --
* Recovery function for metagroup.
*
* PUBLIC: int __ham_metagroup_recover
* PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
*/
int
__ham_metagroup_recover(dbenv, dbtp, lsnp, op, info)
DB_ENV *dbenv;
DBT *dbtp;
DB_LSN *lsnp;
db_recops op;
void *info;
{
__ham_metagroup_args *argp;
HASH_CURSOR *hcp;
DB *file_dbp;
DBMETA *mmeta;
DBC *dbc;
DB_MPOOLFILE *mpf;
PAGE *pagep;
db_pgno_t pgno;
u_int32_t flags, mmeta_flags;
int cmp_n, cmp_p, did_alloc, did_recover, groupgrow, ret;
COMPQUIET(info, NULL);
mmeta_flags = 0;
did_alloc = 0;
mmeta = NULL;
REC_PRINT(__ham_metagroup_print);
REC_INTRO(__ham_metagroup_read, 1, 1);
/*
* This logs the virtual create of pages pgno to pgno + bucket
* If HAVE_FTRUNCATE is not supported the mpool page-allocation is not
* transaction protected, we can never undo it. Even in an abort,
* we have to allocate these pages to the hash table if they
* were actually created. In particular, during disaster
* recovery the metapage may be before this point if we
* are rolling backward. If the file has not been extended
* then the metapage could not have been updated.
* The log record contains:
* bucket: old maximum bucket
* pgno: page number of the new bucket.
* We round up on log calculations, so we can figure out if we are
* about to double the hash table if argp->bucket+1 is a power of 2.
* If it is, then we are allocating an entire doubling of pages,
* otherwise, we are simply allocated one new page.
*/
groupgrow =
(u_int32_t)(1 << __db_log2(argp->bucket + 1)) == argp->bucket + 1;
pgno = argp->pgno;
if (argp->newalloc)
pgno += argp->bucket;
flags = 0;
pagep = NULL;
#ifndef HAVE_FTRUNCATE
flags = DB_MPOOL_CREATE;
#endif
ret = __memp_fget(mpf, &pgno, flags, &pagep);
#ifdef HAVE_FTRUNCATE
/* If we are undoing, then we don't want to create the page. */
if (ret != 0 && DB_REDO(op))
ret = __memp_fget(mpf, &pgno, DB_MPOOL_CREATE, &pagep);
else if (ret == DB_PAGE_NOTFOUND)
goto do_meta;
#endif
if (ret != 0) {
if (ret != ENOSPC)
goto out;
pgno = 0;
goto do_meta;
}
/*
* When we get here then either we did not grow the file
* (groupgrow == 0) or we did grow the file and the allocation
* of those new pages succeeded.
*/
did_alloc = groupgrow;
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
CHECK_LSN(dbenv, op, cmp_p, &LSN(pagep), &argp->pagelsn);
flags = 0;
if (cmp_p == 0 && DB_REDO(op)) {
pagep->lsn = *lsnp;
flags = DB_MPOOL_DIRTY;
} else if (cmp_n == 0 && DB_UNDO(op)) {
#ifdef HAVE_FTRUNCATE
/* If this record allocated the pages give them back. */
if (argp->newalloc) {
if (pagep != NULL && (ret =
__memp_fput(mpf, pagep, DB_MPOOL_DISCARD)) != 0)
goto out;
pagep = NULL;
if ((ret = __memp_ftruncate(mpf, argp->pgno, 0)) != 0)
goto out;
} else
#endif
{
/*
* Otherwise just roll the page back to its
* previous state.
*/
pagep->lsn = argp->pagelsn;
flags = DB_MPOOL_DIRTY;
}
}
if (pagep != NULL && (ret = __memp_fput(mpf, pagep, flags)) != 0)
goto out;
do_meta:
/* Now we have to update the meta-data page. */
hcp = (HASH_CURSOR *)dbc->internal;
if ((ret = __ham_get_meta(dbc)) != 0)
goto out;
cmp_n = log_compare(lsnp, &hcp->hdr->dbmeta.lsn);
cmp_p = log_compare(&hcp->hdr->dbmeta.lsn, &argp->metalsn);
CHECK_LSN(dbenv, op, cmp_p, &hcp->hdr->dbmeta.lsn, &argp->metalsn);
did_recover = 0;
if (cmp_p == 0 && DB_REDO(op)) {
/* Redo the actual updating of bucket counts. */
++hcp->hdr->max_bucket;
if (groupgrow) {
hcp->hdr->low_mask = hcp->hdr->high_mask;
hcp->hdr->high_mask =
(argp->bucket + 1) | hcp->hdr->low_mask;
}
hcp->hdr->dbmeta.lsn = *lsnp;
did_recover = 1;
} else if (cmp_n == 0 && DB_UNDO(op)) {
/* Undo the actual updating of bucket counts. */
hcp->hdr->max_bucket = argp->bucket;
if (groupgrow) {
hcp->hdr->high_mask = argp->bucket;
hcp->hdr->low_mask = hcp->hdr->high_mask >> 1;
}
hcp->hdr->dbmeta.lsn = argp->metalsn;
did_recover = 1;
}
/*
* Now we need to fix up the spares array. Each entry in the
* spares array indicates the beginning page number for the
* indicated doubling. We need to fill this in whenever the
* spares array is invalid, if we never reclaim pages then
* we have to allocate the pages to the spares array in both
* the redo and undo cases.
*/
if (did_alloc &&
#ifdef HAVE_FTRUNCATE
!DB_UNDO(op) &&
#endif
hcp->hdr->spares[__db_log2(argp->bucket + 1) + 1] == PGNO_INVALID) {
hcp->hdr->spares[__db_log2(argp->bucket + 1) + 1] =
(argp->pgno - argp->bucket) - 1;
did_recover = 1;
}
#ifdef HAVE_FTRUNCATE
if (cmp_n == 0 && groupgrow && DB_UNDO(op)) {
hcp->hdr->spares[
__db_log2(argp->bucket + 1) + 1] = PGNO_INVALID;
did_recover = 1;
}
#endif
/*
* Finally, we need to potentially fix up the last_pgno field
* in the master meta-data page (which may or may not be the
* same as the hash header page).
*/
if (argp->mmpgno != argp->mpgno) {
if ((ret = __memp_fget(mpf, &argp->mmpgno, 0, &mmeta)) != 0)
goto out;
mmeta_flags = 0;
cmp_n = log_compare(lsnp, &mmeta->lsn);
cmp_p = log_compare(&mmeta->lsn, &argp->mmetalsn);
if (cmp_p == 0 && DB_REDO(op))
mmeta->lsn = *lsnp;
else if (cmp_n == 0 && DB_UNDO(op))
mmeta->lsn = argp->mmetalsn;
} else
mmeta = (DBMETA *)hcp->hdr;
#ifdef HAVE_FTRUNCATE
if (cmp_n == 0 && DB_UNDO(op))
mmeta->last_pgno = argp->last_pgno;
else if (DB_REDO(op))
#endif
if (mmeta->last_pgno < pgno)
mmeta->last_pgno = pgno;
mmeta_flags = DB_MPOOL_DIRTY;
if (argp->mmpgno != argp->mpgno &&
(ret = __memp_fput(mpf, mmeta, mmeta_flags)) != 0)
goto out;
mmeta = NULL;
if (did_recover)
F_SET(hcp, H_DIRTY);
done: *lsnp = argp->prev_lsn;
ret = 0;
out: if (mmeta != NULL)
(void)__memp_fput(mpf, mmeta, 0);
if (dbc != NULL)
(void)__ham_release_meta(dbc);
if (ret == ENOENT && op == DB_TXN_BACKWARD_ALLOC)
ret = 0;
REC_CLOSE;
}
/*
* __ham_groupalloc_recover --
* Recover the batch creation of a set of pages for a new database.
*
* PUBLIC: int __ham_groupalloc_recover
* PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
*/
int
__ham_groupalloc_recover(dbenv, dbtp, lsnp, op, info)
DB_ENV *dbenv;
DBT *dbtp;
DB_LSN *lsnp;
db_recops op;
void *info;
{
__ham_groupalloc_args *argp;
DBMETA *mmeta;
DB_MPOOLFILE *mpf;
DB *file_dbp;
DBC *dbc;
PAGE *pagep;
db_pgno_t pgno;
int cmp_n, cmp_p, modified, ret;
mmeta = NULL;
modified = 0;
REC_PRINT(__ham_groupalloc_print);
REC_INTRO(__ham_groupalloc_read, 0, 0);
pgno = PGNO_BASE_MD;
if ((ret = __memp_fget(mpf, &pgno, 0, &mmeta)) != 0) {
if (DB_REDO(op)) {
ret = __db_pgerr(file_dbp, pgno, ret);
goto out;
} else
goto done;
}
cmp_n = log_compare(lsnp, &LSN(mmeta));
cmp_p = log_compare(&LSN(mmeta), &argp->meta_lsn);
CHECK_LSN(dbenv, op, cmp_p, &LSN(mmeta), &argp->meta_lsn);
/*
* Basically, we used mpool to allocate a chunk of pages.
* We need to either add those to a free list (in the undo
* case) or initialize them (in the redo case).
*
* If we are redoing and this is a hash subdatabase, it's possible
* that the pages were never allocated, so we'd better check for
* that and handle it here.
*/
pgno = argp->start_pgno + argp->num - 1;
if (DB_REDO(op)) {
if ((ret = __ham_alloc_pages(file_dbp, argp, lsnp)) != 0)
goto out;
if (cmp_p == 0) {
LSN(mmeta) = *lsnp;
modified = 1;
}
} else if (DB_UNDO(op)) {
/*
* Fetch the last page and determine if it is in
* the post allocation state.
*/
pagep = NULL;
if ((ret = __memp_fget(mpf, &pgno, 0, &pagep)) == 0) {
if (log_compare(&pagep->lsn, lsnp) != 0) {
if ((ret = __memp_fput(mpf,
pagep, DB_MPOOL_DISCARD)) != 0)
goto out;
pagep = NULL;
}
} else if (ret != DB_PAGE_NOTFOUND)
goto out;
#ifdef HAVE_FTRUNCATE
COMPQUIET(info, NULL);
/*
* If the last page was allocated then truncate back
* to the first page.
*/
if (pagep != NULL) {
if ((ret =
__memp_fput(mpf, pagep, DB_MPOOL_DISCARD)) != 0)
goto out;
if ((ret =
__memp_ftruncate(mpf, argp->start_pgno, 0)) != 0)
goto out;
}
/*
* If we are rolling back the metapage, then make
* sure it reflects the the correct last_pgno.
*/
if (cmp_n == 0) {
mmeta->last_pgno = argp->last_pgno;
modified = 1;
}
pgno = 0;
#else
/*
* Reset the last page back to its preallocation state.
*/
if (pagep != NULL) {
if (log_compare(&pagep->lsn, lsnp) == 0)
ZERO_LSN(pagep->lsn);
if ((ret =
__memp_fput(mpf, pagep, DB_MPOOL_DIRTY)) != 0)
goto out;
}
/*
* Put the pages into the limbo list and free them later.
*/
if ((ret = __db_add_limbo(dbenv,
info, argp->fileid, argp->start_pgno, argp->num)) != 0)
goto out;
#endif
if (cmp_n == 0) {
LSN(mmeta) = argp->meta_lsn;
modified = 1;
}
}
/*
* In both REDO and UNDO, we have grown the file and need to make
* sure that last_pgno is correct. If we HAVE_FTRUNCATE pgno
* will only be valid on REDO.
*/
if (pgno > mmeta->last_pgno) {
mmeta->last_pgno = pgno;
modified = 1;
}
done: if (ret == 0)
*lsnp = argp->prev_lsn;
ret = 0;
out: if (mmeta != NULL)
(void)__memp_fput(mpf, mmeta, modified ? DB_MPOOL_DIRTY : 0);
if (ret == ENOENT && op == DB_TXN_BACKWARD_ALLOC)
ret = 0;
REC_CLOSE;
}
/*
* __ham_alloc_pages --
*
* Called during redo of a file create. We create new pages in the file
* using the MPOOL_NEW_GROUP flag. We then log the meta-data page with a
* __crdel_metasub message. If we manage to crash without the newly written
* pages getting to disk (I'm not sure this can happen anywhere except our
* test suite?!), then we need to go through a recreate the final pages.
* Hash normally has holes in its files and handles them appropriately.
*/
static int
__ham_alloc_pages(dbp, argp, lsnp)
DB *dbp;
__ham_groupalloc_args *argp;
DB_LSN *lsnp;
{
DB_MPOOLFILE *mpf;
PAGE *pagep;
db_pgno_t pgno;
int ret;
mpf = dbp->mpf;
/* Read the last page of the allocation. */
pgno = argp->start_pgno + argp->num - 1;
/* If the page exists, and it has been initialized, then we're done. */
if ((ret = __memp_fget(mpf, &pgno, 0, &pagep)) == 0) {
if (NUM_ENT(pagep) == 0 && IS_ZERO_LSN(pagep->lsn))
goto reinit_page;
if ((ret = __memp_fput(mpf, pagep, 0)) != 0)
return (ret);
return (0);
}
/* Had to create the page. */
if ((ret = __memp_fget(mpf, &pgno, DB_MPOOL_CREATE, &pagep)) != 0)
return (__db_pgerr(dbp, pgno, ret));
reinit_page:
/* Initialize the newly allocated page. */
P_INIT(pagep, dbp->pgsize, pgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
pagep->lsn = *lsnp;
if ((ret = __memp_fput(mpf, pagep, DB_MPOOL_DIRTY)) != 0)
return (ret);
return (0);
}
/*
* __ham_curadj_recover --
* Undo cursor adjustments if a subtransaction fails.
*
* PUBLIC: int __ham_curadj_recover
* PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
*/
int
__ham_curadj_recover(dbenv, dbtp, lsnp, op, info)
DB_ENV *dbenv;
DBT *dbtp;
DB_LSN *lsnp;
db_recops op;
void *info;
{
__ham_curadj_args *argp;
DB_MPOOLFILE *mpf;
DB *file_dbp;
DBC *dbc;
int ret;
HASH_CURSOR *hcp;
COMPQUIET(info, NULL);
REC_PRINT(__ham_curadj_print);
REC_INTRO(__ham_curadj_read, 0, 1);
if (op != DB_TXN_ABORT)
goto done;
/*
* Undo the adjustment by reinitializing the the cursor to look like
* the one that was used to do the adjustment, then we invert the
* add so that undo the adjustment.
*/
hcp = (HASH_CURSOR *)dbc->internal;
hcp->pgno = argp->pgno;
hcp->indx = argp->indx;
hcp->dup_off = argp->dup_off;
hcp->order = argp->order;
if (!argp->add)
F_SET(hcp, H_DELETED);
(void)__ham_c_update(dbc, argp->len, !argp->add, argp->is_dup);
done: *lsnp = argp->prev_lsn;
out: REC_CLOSE;
}
/*
* __ham_chgpg_recover --
* Undo cursor adjustments if a subtransaction fails.
*
* PUBLIC: int __ham_chgpg_recover
* PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
*/
int
__ham_chgpg_recover(dbenv, dbtp, lsnp, op, info)
DB_ENV *dbenv;
DBT *dbtp;
DB_LSN *lsnp;
db_recops op;
void *info;
{
__ham_chgpg_args *argp;
BTREE_CURSOR *opdcp;
DB_MPOOLFILE *mpf;
DB *file_dbp, *ldbp;
DBC *dbc;
int ret;
DBC *cp;
HASH_CURSOR *lcp;
u_int32_t order, indx;
COMPQUIET(info, NULL);
REC_PRINT(__ham_chgpg_print);
REC_INTRO(__ham_chgpg_read, 0, 0);
if (op != DB_TXN_ABORT)
goto done;
/* Overloaded fields for DB_HAM_DEL*PG */
indx = argp->old_indx;
order = argp->new_indx;
MUTEX_LOCK(dbenv, dbenv->mtx_dblist);
for (ldbp = __dblist_get(dbenv, file_dbp->adj_fileid);
ldbp != NULL && ldbp->adj_fileid == file_dbp->adj_fileid;
ldbp = LIST_NEXT(ldbp, dblistlinks)) {
MUTEX_LOCK(dbenv, file_dbp->mutex);
for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL;
cp = TAILQ_NEXT(cp, links)) {
lcp = (HASH_CURSOR *)cp->internal;
switch (argp->mode) {
case DB_HAM_DELFIRSTPG:
if (lcp->pgno != argp->new_pgno)
break;
if (lcp->indx != indx ||
!F_ISSET(lcp, H_DELETED) ||
lcp->order >= order) {
lcp->pgno = argp->old_pgno;
if (lcp->indx == indx)
lcp->order -= order;
}
break;
case DB_HAM_DELMIDPG:
case DB_HAM_DELLASTPG:
if (lcp->pgno == argp->new_pgno &&
lcp->indx == indx &&
F_ISSET(lcp, H_DELETED) &&
lcp->order >= order) {
lcp->pgno = argp->old_pgno;
lcp->order -= order;
lcp->indx = 0;
}
break;
case DB_HAM_CHGPG:
/*
* If we're doing a CHGPG, we're undoing
* the move of a non-deleted item to a
* new page. Any cursors with the deleted
* flag set do not belong to this item;
* don't touch them.
*/
if (F_ISSET(lcp, H_DELETED))
break;
/* FALLTHROUGH */
case DB_HAM_SPLIT:
if (lcp->pgno == argp->new_pgno &&
lcp->indx == argp->new_indx) {
lcp->indx = argp->old_indx;
lcp->pgno = argp->old_pgno;
}
break;
case DB_HAM_DUP:
if (lcp->opd == NULL)
break;
opdcp = (BTREE_CURSOR *)lcp->opd->internal;
if (opdcp->pgno != argp->new_pgno ||
opdcp->indx != argp->new_indx)
break;
if (F_ISSET(opdcp, C_DELETED))
F_SET(lcp, H_DELETED);
/*
* We can't close a cursor while we have the
* dbp mutex locked, since c_close reacquires
* it. It should be safe to drop the mutex
* here, though, since newly opened cursors
* are put only at the end of the tailq and
* the cursor we're adjusting can't be closed
* under us.
*/
MUTEX_UNLOCK(dbenv, file_dbp->mutex);
if ((ret = __db_c_close(lcp->opd)) != 0)
goto out;
MUTEX_LOCK(dbenv, file_dbp->mutex);
lcp->opd = NULL;
break;
}
}
MUTEX_UNLOCK(dbenv, file_dbp->mutex);
}
MUTEX_UNLOCK(dbenv, dbenv->mtx_dblist);
done: *lsnp = argp->prev_lsn;
out: REC_CLOSE;
}