/*- * See the file LICENSE for redistribution information. * * Copyright (c) 2001-2002 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint static const char revid[] = "$Id: txn_recover.c,v 1.36 2002/08/19 16:59:15 bostic Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include #include #endif #include "db_int.h" #include "dbinc/txn.h" #include "dbinc/db_page.h" #include "dbinc/log.h" #include "dbinc_auto/db_auto.h" #include "dbinc_auto/crdel_auto.h" #include "dbinc_auto/db_ext.h" /* * __txn_continue * Fill in the fields of the local transaction structure given * the detail transaction structure. * * XXX * I'm not sure that we work correctly with nested txns. * * PUBLIC: void __txn_continue __P((DB_ENV *, DB_TXN *, TXN_DETAIL *, size_t)); */ void __txn_continue(env, txnp, td, off) DB_ENV *env; DB_TXN *txnp; TXN_DETAIL *td; size_t off; { txnp->mgrp = env->tx_handle; txnp->parent = NULL; txnp->last_lsn = td->last_lsn; txnp->txnid = td->txnid; txnp->off = (roff_t)off; txnp->abort = __txn_abort; txnp->commit = __txn_commit; txnp->discard = __txn_discard; txnp->id = __txn_id; txnp->prepare = __txn_prepare; txnp->flags = 0; } /* * __txn_map_gid * Return the txn that corresponds to this global ID. * * PUBLIC: int __txn_map_gid __P((DB_ENV *, * PUBLIC: u_int8_t *, TXN_DETAIL **, size_t *)); */ int __txn_map_gid(dbenv, gid, tdp, offp) DB_ENV *dbenv; u_int8_t *gid; TXN_DETAIL **tdp; size_t *offp; { DB_TXNMGR *mgr; DB_TXNREGION *tmr; mgr = dbenv->tx_handle; tmr = mgr->reginfo.primary; /* * Search the internal active transaction table to find the * matching xid. If this is a performance hit, then we * can create a hash table, but I doubt it's worth it. */ R_LOCK(dbenv, &mgr->reginfo); for (*tdp = SH_TAILQ_FIRST(&tmr->active_txn, __txn_detail); *tdp != NULL; *tdp = SH_TAILQ_NEXT(*tdp, links, __txn_detail)) if (memcmp(gid, (*tdp)->xid, sizeof((*tdp)->xid)) == 0) break; R_UNLOCK(dbenv, &mgr->reginfo); if (*tdp == NULL) return (EINVAL); *offp = R_OFFSET(&mgr->reginfo, *tdp); return (0); } /* * __txn_recover -- * Public interface to retrieve the list of prepared, but not yet * commited transactions. See __txn_get_prepared for details. This * function and __db_xa_recover both wrap that one. * * PUBLIC: int __txn_recover * PUBLIC: __P((DB_ENV *, DB_PREPLIST *, long, long *, u_int32_t)); */ int __txn_recover(dbenv, preplist, count, retp, flags) DB_ENV *dbenv; DB_PREPLIST *preplist; long count, *retp; u_int32_t flags; { PANIC_CHECK(dbenv); ENV_REQUIRES_CONFIG( dbenv, dbenv->tx_handle, "txn_recover", DB_INIT_TXN); if (F_ISSET((DB_TXNREGION *) ((DB_TXNMGR *)dbenv->tx_handle)->reginfo.primary, TXN_IN_RECOVERY)) { __db_err(dbenv, "operation not permitted while in recovery"); return (EINVAL); } return (__txn_get_prepared(dbenv, NULL, preplist, count, retp, flags)); } /* * __txn_get_prepared -- * Returns a list of prepared (and for XA, heuristically completed) * transactions (less than or equal to the count parameter). One of * xids or txns must be set to point to an array of the appropriate type. * The count parameter indicates the number of entries in the xids and/or * txns array. The retp parameter will be set to indicate the number of * entries returned in the xids/txns array. Flags indicates the operation, * one of DB_FIRST or DB_NEXT. * * PUBLIC: int __txn_get_prepared __P((DB_ENV *, * PUBLIC: XID *, DB_PREPLIST *, long, long *, u_int32_t)); */ int __txn_get_prepared(dbenv, xids, txns, count, retp, flags) DB_ENV *dbenv; XID *xids; DB_PREPLIST *txns; long count; /* This is long for XA compatibility. */ long *retp; u_int32_t flags; { DBT data; DB_LOGC *logc; DB_LSN min, open_lsn; DB_PREPLIST *prepp; DB_TXNMGR *mgr; DB_TXNREGION *tmr; TXN_DETAIL *td; XID *xidp; __txn_ckp_args *ckp_args; long i; int nrestores, open_files, ret, t_ret; void *txninfo; *retp = 0; logc = NULL; MAX_LSN(min); prepp = txns; xidp = xids; nrestores = ret = 0; open_files = 1; /* * If we are starting a scan, then we traverse the active transaction * list once making sure that all transactions are marked as not having * been collected. Then on each pass, we mark the ones we collected * so that if we cannot collect them all at once, we can finish up * next time with a continue. */ mgr = dbenv->tx_handle; tmr = mgr->reginfo.primary; /* * During this pass we need to figure out if we are going to need * to open files. We need to open files if we've never collected * before (in which case, none of the COLLECTED bits will be set) * and the ones that we are collecting are restored (if they aren't * restored, then we never crashed; just the main server did). */ R_LOCK(dbenv, &mgr->reginfo); if (flags == DB_FIRST) { for (td = SH_TAILQ_FIRST(&tmr->active_txn, __txn_detail); td != NULL; td = SH_TAILQ_NEXT(td, links, __txn_detail)) { if (F_ISSET(td, TXN_RESTORED)) nrestores++; if (F_ISSET(td, TXN_COLLECTED)) open_files = 0; F_CLR(td, TXN_COLLECTED); } mgr->n_discards = 0; } else open_files = 0; /* Now begin collecting active transactions. */ for (td = SH_TAILQ_FIRST(&tmr->active_txn, __txn_detail); td != NULL && *retp < count; td = SH_TAILQ_NEXT(td, links, __txn_detail)) { if (td->status != TXN_PREPARED || F_ISSET(td, TXN_COLLECTED)) continue; if (xids != NULL) { xidp->formatID = td->format; xidp->gtrid_length = td->gtrid; xidp->bqual_length = td->bqual; memcpy(xidp->data, td->xid, sizeof(td->xid)); xidp++; } if (txns != NULL) { if ((ret = __os_calloc(dbenv, 1, sizeof(DB_TXN), &prepp->txn)) != 0) goto err; __txn_continue(dbenv, prepp->txn, td, R_OFFSET(&mgr->reginfo, td)); F_SET(prepp->txn, TXN_MALLOC); memcpy(prepp->gid, td->xid, sizeof(td->xid)); prepp++; } if (log_compare(&td->begin_lsn, &min) < 0) min = td->begin_lsn; (*retp)++; F_SET(td, TXN_COLLECTED); } R_UNLOCK(dbenv, &mgr->reginfo); /* * Now link all the transactions into the transaction manager's list. */ if (txns != NULL) { MUTEX_THREAD_LOCK(dbenv, mgr->mutexp); for (i = 0; i < *retp; i++) TAILQ_INSERT_TAIL(&mgr->txn_chain, txns[i].txn, links); MUTEX_THREAD_UNLOCK(dbenv, mgr->mutexp); } if (open_files && nrestores && *retp != 0 && !IS_MAX_LSN(min)) { /* * Figure out the last checkpoint before the smallest * start_lsn in the region. */ F_SET((DB_LOG *)dbenv->lg_handle, DBLOG_RECOVER); if ((ret = dbenv->log_cursor(dbenv, &logc, 0)) != 0) goto err; memset(&data, 0, sizeof(data)); if ((ret = __txn_getckp(dbenv, &open_lsn)) == 0) while (!IS_ZERO_LSN(open_lsn) && (ret = logc->get(logc, &open_lsn, &data, DB_SET)) == 0 && log_compare(&min, &open_lsn) < 0) { /* Format the log record. */ if ((ret = __txn_ckp_read(dbenv, data.data, &ckp_args)) != 0) { __db_err(dbenv, "Invalid checkpoint record at [%lu][%lu]", (u_long)open_lsn.file, (u_long)open_lsn.offset); goto err; } open_lsn = ckp_args->last_ckp; __os_free(dbenv, ckp_args); } /* * There are three ways by which we may have gotten here. * - We got a DB_NOTFOUND -- we need to read the first * log record. * - We found a checkpoint before min. We're done. * - We found a checkpoint after min who's last_ckp is 0. We * need to start at the beginning of the log. */ if ((ret == DB_NOTFOUND || IS_ZERO_LSN(open_lsn)) && (ret = logc->get(logc, &open_lsn, &data, DB_FIRST)) != 0) { __db_err(dbenv, "No log records"); goto err; } if ((ret = __db_txnlist_init(dbenv, 0, 0, NULL, &txninfo)) != 0) goto err; ret = __env_openfiles(dbenv, logc, txninfo, &data, &open_lsn, NULL, 0, 0); if (txninfo != NULL) __db_txnlist_end(dbenv, txninfo); } err: F_CLR((DB_LOG *)dbenv->lg_handle, DBLOG_RECOVER); if (logc != NULL && (t_ret = logc->close(logc, 0)) != 0 && ret == 0) ret = t_ret; return (ret); }