mariadb/bdb/lock/lock_deadlock.c

887 lines
23 KiB
C
Raw Normal View History

2001-03-04 19:42:05 -05:00
/*-
* See the file LICENSE for redistribution information.
*
2002-10-30 15:57:05 +04:00
* Copyright (c) 1996-2002
2001-03-04 19:42:05 -05:00
* Sleepycat Software. All rights reserved.
*/
#include "db_config.h"
#ifndef lint
2002-10-30 15:57:05 +04:00
static const char revid[] = "$Id: lock_deadlock.c,v 11.54 2002/08/06 05:05:21 bostic Exp $";
2001-03-04 19:42:05 -05:00
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <string.h>
#endif
#include "db_int.h"
2002-10-30 15:57:05 +04:00
#include "dbinc/db_shash.h"
#include "dbinc/lock.h"
#include "dbinc/txn.h"
#include "dbinc/rep.h"
2001-03-04 19:42:05 -05:00
#define ISSET_MAP(M, N) ((M)[(N) / 32] & (1 << (N) % 32))
#define CLEAR_MAP(M, N) { \
u_int32_t __i; \
for (__i = 0; __i < (N); __i++) \
(M)[__i] = 0; \
}
#define SET_MAP(M, B) ((M)[(B) / 32] |= (1 << ((B) % 32)))
#define CLR_MAP(M, B) ((M)[(B) / 32] &= ~(1 << ((B) % 32)))
#define OR_MAP(D, S, N) { \
u_int32_t __i; \
for (__i = 0; __i < (N); __i++) \
D[__i] |= S[__i]; \
}
#define BAD_KILLID 0xffffffff
typedef struct {
int valid;
2002-10-30 15:57:05 +04:00
int self_wait;
u_int32_t count;
2001-03-04 19:42:05 -05:00
u_int32_t id;
u_int32_t last_lock;
u_int32_t last_locker_id;
db_pgno_t pgno;
} locker_info;
static int __dd_abort __P((DB_ENV *, locker_info *));
2002-10-30 15:57:05 +04:00
static int __dd_build __P((DB_ENV *,
u_int32_t, u_int32_t **, u_int32_t *, u_int32_t *, locker_info **));
static int __dd_find __P((DB_ENV *,
u_int32_t *, locker_info *, u_int32_t, u_int32_t, u_int32_t ***));
static int __dd_isolder __P((u_int32_t, u_int32_t, u_int32_t, u_int32_t));
static int __dd_verify __P((locker_info *, u_int32_t *, u_int32_t *,
u_int32_t *, u_int32_t, u_int32_t, u_int32_t));
2001-03-04 19:42:05 -05:00
#ifdef DIAGNOSTIC
2002-10-30 15:57:05 +04:00
static void __dd_debug
__P((DB_ENV *, locker_info *, u_int32_t *, u_int32_t, u_int32_t));
2001-03-04 19:42:05 -05:00
#endif
2002-10-30 15:57:05 +04:00
/*
* lock_detect --
*
* PUBLIC: int __lock_detect __P((DB_ENV *, u_int32_t, u_int32_t, int *));
*/
2001-03-04 19:42:05 -05:00
int
2002-10-30 15:57:05 +04:00
__lock_detect(dbenv, flags, atype, abortp)
2001-03-04 19:42:05 -05:00
DB_ENV *dbenv;
u_int32_t flags, atype;
int *abortp;
{
DB_LOCKREGION *region;
DB_LOCKTAB *lt;
2002-10-30 15:57:05 +04:00
DB_TXNMGR *tmgr;
2001-03-04 19:42:05 -05:00
locker_info *idmap;
2002-10-30 15:57:05 +04:00
u_int32_t *bitmap, *copymap, **deadp, **free_me, *tmpmap;
u_int32_t i, keeper, killid, limit, nalloc, nlockers;
u_int32_t lock_max, txn_max;
int ret;
2001-03-04 19:42:05 -05:00
PANIC_CHECK(dbenv);
2002-10-30 15:57:05 +04:00
ENV_REQUIRES_CONFIG(dbenv,
dbenv->lk_handle, "DB_ENV->lock_detect", DB_INIT_LOCK);
/* Validate arguments. */
if ((ret = __db_fchk(dbenv, "DB_ENV->lock_detect", flags, 0)) != 0)
return (ret);
switch (atype) {
case DB_LOCK_DEFAULT:
case DB_LOCK_EXPIRE:
case DB_LOCK_MAXLOCKS:
case DB_LOCK_MINLOCKS:
case DB_LOCK_MINWRITE:
case DB_LOCK_OLDEST:
case DB_LOCK_RANDOM:
case DB_LOCK_YOUNGEST:
break;
default:
__db_err(dbenv,
"DB_ENV->lock_detect: unknown deadlock detection mode specified");
return (EINVAL);
}
/*
* If this environment is a replication client, then we must use the
* MINWRITE detection discipline.
*/
if (__rep_is_client(dbenv))
atype = DB_LOCK_MINWRITE;
free_me = NULL;
2001-03-04 19:42:05 -05:00
lt = dbenv->lk_handle;
if (abortp != NULL)
*abortp = 0;
/* Check if a detector run is necessary. */
LOCKREGION(dbenv, lt);
2002-10-30 15:57:05 +04:00
/* Make a pass only if auto-detect would run. */
region = lt->reginfo.primary;
if (region->need_dd == 0) {
UNLOCKREGION(dbenv, lt);
return (0);
2001-03-04 19:42:05 -05:00
}
2002-10-30 15:57:05 +04:00
/* Reset need_dd, so we know we've run the detector. */
region->need_dd = 0;
2001-03-04 19:42:05 -05:00
/* Build the waits-for bitmap. */
2002-10-30 15:57:05 +04:00
ret = __dd_build(dbenv, atype, &bitmap, &nlockers, &nalloc, &idmap);
lock_max = region->stat.st_cur_maxid;
2001-03-04 19:42:05 -05:00
UNLOCKREGION(dbenv, lt);
2002-10-30 15:57:05 +04:00
/*
* We need the cur_maxid from the txn region as well. In order
* to avoid tricky synchronization between the lock and txn
* regions, we simply unlock the lock region and then lock the
* txn region. This introduces a small window during which the
* transaction system could then wrap. We're willing to return
* the wrong answer for "oldest" or "youngest" in those rare
* circumstances.
*/
tmgr = dbenv->tx_handle;
if (tmgr != NULL) {
R_LOCK(dbenv, &tmgr->reginfo);
txn_max = ((DB_TXNREGION *)tmgr->reginfo.primary)->cur_maxid;
R_UNLOCK(dbenv, &tmgr->reginfo);
} else
txn_max = TXN_MAXIMUM;
if (ret != 0 || atype == DB_LOCK_EXPIRE)
2001-03-04 19:42:05 -05:00
return (ret);
if (nlockers == 0)
return (0);
#ifdef DIAGNOSTIC
if (FLD_ISSET(dbenv->verbose, DB_VERB_WAITSFOR))
2002-10-30 15:57:05 +04:00
__dd_debug(dbenv, idmap, bitmap, nlockers, nalloc);
2001-03-04 19:42:05 -05:00
#endif
2002-10-30 15:57:05 +04:00
/* Now duplicate the bitmaps so we can verify deadlock participants. */
if ((ret = __os_calloc(dbenv, (size_t)nlockers,
sizeof(u_int32_t) * nalloc, &copymap)) != 0)
goto err;
memcpy(copymap, bitmap, nlockers * sizeof(u_int32_t) * nalloc);
if ((ret = __os_calloc(dbenv, sizeof(u_int32_t), nalloc, &tmpmap)) != 0)
goto err1;
2001-03-04 19:42:05 -05:00
/* Find a deadlock. */
2002-10-30 15:57:05 +04:00
if ((ret =
__dd_find(dbenv, bitmap, idmap, nlockers, nalloc, &deadp)) != 0)
2001-03-04 19:42:05 -05:00
return (ret);
killid = BAD_KILLID;
free_me = deadp;
for (; *deadp != NULL; deadp++) {
if (abortp != NULL)
++*abortp;
2002-10-30 15:57:05 +04:00
killid = (u_int32_t)((*deadp - bitmap) / nalloc);
limit = killid;
keeper = BAD_KILLID;
2001-03-04 19:42:05 -05:00
2002-10-30 15:57:05 +04:00
if (atype == DB_LOCK_DEFAULT || atype == DB_LOCK_RANDOM)
goto dokill;
/*
* It's conceivable that under XA, the locker could
* have gone away.
*/
if (killid == BAD_KILLID)
2001-03-04 19:42:05 -05:00
break;
2002-10-30 15:57:05 +04:00
/*
* Start with the id that we know is deadlocked
* and then examine all other set bits and see
* if any are a better candidate for abortion
* and that they are genuinely part of the
* deadlock. The definition of "best":
* OLDEST: smallest id
* YOUNGEST: largest id
* MAXLOCKS: maximum count
* MINLOCKS: minimum count
* MINWRITE: minimum count
*/
2001-03-04 19:42:05 -05:00
2002-10-30 15:57:05 +04:00
for (i = (killid + 1) % nlockers;
i != limit;
i = (i + 1) % nlockers) {
if (!ISSET_MAP(*deadp, i))
continue;
switch (atype) {
case DB_LOCK_OLDEST:
if (__dd_isolder(idmap[killid].id,
idmap[i].id, lock_max, txn_max))
continue;
keeper = i;
break;
case DB_LOCK_YOUNGEST:
if (__dd_isolder(idmap[i].id,
idmap[killid].id, lock_max, txn_max))
continue;
keeper = i;
break;
case DB_LOCK_MAXLOCKS:
if (idmap[i].count < idmap[killid].count)
continue;
keeper = i;
break;
case DB_LOCK_MINLOCKS:
case DB_LOCK_MINWRITE:
if (idmap[i].count > idmap[killid].count)
continue;
keeper = i;
break;
default:
killid = BAD_KILLID;
ret = EINVAL;
goto dokill;
}
if (__dd_verify(idmap, *deadp,
tmpmap, copymap, nlockers, nalloc, i))
killid = i;
2001-03-04 19:42:05 -05:00
}
2002-10-30 15:57:05 +04:00
dokill: if (killid == BAD_KILLID)
2001-03-04 19:42:05 -05:00
continue;
2002-10-30 15:57:05 +04:00
/*
* There are cases in which our general algorithm will
* fail. Returning 1 from verify indicates that the
* particular locker is not only involved in a deadlock,
* but that killing him will allow others to make forward
* progress. Unfortunately, there are cases where we need
* to abort someone, but killing them will not necessarily
* ensure forward progress (imagine N readers all trying to
* acquire a write lock). In such a scenario, we'll have
* gotten all the way through the loop, we will have found
* someone to keep (keeper will be valid), but killid will
* still be the initial deadlocker. In this case, if the
* initial killid satisfies __dd_verify, kill it, else abort
* keeper and indicate that we need to run deadlock detection
* again.
*/
if (keeper != BAD_KILLID && killid == limit &&
__dd_verify(idmap, *deadp,
tmpmap, copymap, nlockers, nalloc, killid) == 0) {
LOCKREGION(dbenv, lt);
region->need_dd = 1;
UNLOCKREGION(dbenv, lt);
killid = keeper;
}
2001-03-04 19:42:05 -05:00
/* Kill the locker with lockid idmap[killid]. */
if ((ret = __dd_abort(dbenv, &idmap[killid])) != 0) {
/*
* It's possible that the lock was already aborted;
* this isn't necessarily a problem, so do not treat
* it as an error.
*/
if (ret == DB_ALREADY_ABORTED)
ret = 0;
else
__db_err(dbenv,
"warning: unable to abort locker %lx",
(u_long)idmap[killid].id);
} else if (FLD_ISSET(dbenv->verbose, DB_VERB_DEADLOCK))
__db_err(dbenv,
"Aborting locker %lx", (u_long)idmap[killid].id);
}
2002-10-30 15:57:05 +04:00
__os_free(dbenv, tmpmap);
err1: __os_free(dbenv, copymap);
err: if (free_me != NULL)
__os_free(dbenv, free_me);
__os_free(dbenv, bitmap);
__os_free(dbenv, idmap);
2001-03-04 19:42:05 -05:00
return (ret);
}
/*
* ========================================================================
* Utilities
*/
# define DD_INVALID_ID ((u_int32_t) -1)
static int
2002-10-30 15:57:05 +04:00
__dd_build(dbenv, atype, bmp, nlockers, allocp, idmap)
2001-03-04 19:42:05 -05:00
DB_ENV *dbenv;
2002-10-30 15:57:05 +04:00
u_int32_t atype, **bmp, *nlockers, *allocp;
2001-03-04 19:42:05 -05:00
locker_info **idmap;
{
struct __db_lock *lp;
DB_LOCKER *lip, *lockerp, *child;
DB_LOCKOBJ *op, *lo;
DB_LOCKREGION *region;
DB_LOCKTAB *lt;
locker_info *id_array;
2002-10-30 15:57:05 +04:00
db_timeval_t now;
u_int32_t *bitmap, count, dd, *entryp, id, ndx, nentries, *tmpmap;
2001-03-04 19:42:05 -05:00
u_int8_t *pptr;
2002-10-30 15:57:05 +04:00
int expire_only, is_first, need_timeout, ret;
2001-03-04 19:42:05 -05:00
lt = dbenv->lk_handle;
region = lt->reginfo.primary;
2002-10-30 15:57:05 +04:00
LOCK_SET_TIME_INVALID(&now);
need_timeout = 0;
expire_only = atype == DB_LOCK_EXPIRE;
/*
* While we always check for expired timeouts, if we are called
* with DB_LOCK_EXPIRE, then we are only checking for timeouts
* (i.e., not doing deadlock detection at all). If we aren't
* doing real deadlock detection, then we can skip a significant,
* amount of the processing. In particular we do not build
* the conflict array and our caller needs to expect this.
*/
if (expire_only) {
count = 0;
nentries = 0;
goto obj_loop;
}
2001-03-04 19:42:05 -05:00
/*
* We'll check how many lockers there are, add a few more in for
* good measure and then allocate all the structures. Then we'll
* verify that we have enough room when we go back in and get the
* mutex the second time.
*/
2002-10-30 15:57:05 +04:00
retry: count = region->stat.st_nlockers;
2001-03-04 19:42:05 -05:00
if (count == 0) {
*nlockers = 0;
return (0);
}
if (FLD_ISSET(dbenv->verbose, DB_VERB_DEADLOCK))
__db_err(dbenv, "%lu lockers", (u_long)count);
2002-10-30 15:57:05 +04:00
count += 20;
2001-03-04 19:42:05 -05:00
nentries = ALIGN(count, 32) / 32;
/*
* Allocate enough space for a count by count bitmap matrix.
*
* XXX
* We can probably save the malloc's between iterations just
* reallocing if necessary because count grew by too much.
*/
if ((ret = __os_calloc(dbenv, (size_t)count,
sizeof(u_int32_t) * nentries, &bitmap)) != 0)
return (ret);
if ((ret = __os_calloc(dbenv,
sizeof(u_int32_t), nentries, &tmpmap)) != 0) {
2002-10-30 15:57:05 +04:00
__os_free(dbenv, bitmap);
2001-03-04 19:42:05 -05:00
return (ret);
}
if ((ret = __os_calloc(dbenv,
(size_t)count, sizeof(locker_info), &id_array)) != 0) {
2002-10-30 15:57:05 +04:00
__os_free(dbenv, bitmap);
__os_free(dbenv, tmpmap);
2001-03-04 19:42:05 -05:00
return (ret);
}
/*
* Now go back in and actually fill in the matrix.
*/
2002-10-30 15:57:05 +04:00
if (region->stat.st_nlockers > count) {
__os_free(dbenv, bitmap);
__os_free(dbenv, tmpmap);
__os_free(dbenv, id_array);
2001-03-04 19:42:05 -05:00
goto retry;
}
/*
* First we go through and assign each locker a deadlock detector id.
*/
2002-10-30 15:57:05 +04:00
for (id = 0, lip = SH_TAILQ_FIRST(&region->lockers, __db_locker);
lip != NULL;
lip = SH_TAILQ_NEXT(lip, ulinks, __db_locker)) {
if (F_ISSET(lip, DB_LOCKER_INABORT))
continue;
if (lip->master_locker == INVALID_ROFF) {
lip->dd_id = id++;
id_array[lip->dd_id].id = lip->id;
if (atype == DB_LOCK_MINLOCKS ||
atype == DB_LOCK_MAXLOCKS)
id_array[lip->dd_id].count = lip->nlocks;
if (atype == DB_LOCK_MINWRITE)
id_array[lip->dd_id].count = lip->nwrites;
} else
lip->dd_id = DD_INVALID_ID;
2001-03-04 19:42:05 -05:00
}
/*
* We only need consider objects that have waiters, so we use
* the list of objects with waiters (dd_objs) instead of traversing
* the entire hash table. For each object, we traverse the waiters
* list and add an entry in the waitsfor matrix for each waiter/holder
* combination.
*/
2002-10-30 15:57:05 +04:00
obj_loop:
2001-03-04 19:42:05 -05:00
for (op = SH_TAILQ_FIRST(&region->dd_objs, __db_lockobj);
op != NULL; op = SH_TAILQ_NEXT(op, dd_links, __db_lockobj)) {
2002-10-30 15:57:05 +04:00
if (expire_only)
goto look_waiters;
2001-03-04 19:42:05 -05:00
CLEAR_MAP(tmpmap, nentries);
/*
* First we go through and create a bit map that
* represents all the holders of this object.
*/
for (lp = SH_TAILQ_FIRST(&op->holders, __db_lock);
lp != NULL;
lp = SH_TAILQ_NEXT(lp, links, __db_lock)) {
LOCKER_LOCK(lt, region, lp->holder, ndx);
if ((ret = __lock_getlocker(lt,
lp->holder, ndx, 0, &lockerp)) != 0)
continue;
2002-10-30 15:57:05 +04:00
if (F_ISSET(lockerp, DB_LOCKER_INABORT))
continue;
if (lockerp->dd_id == DD_INVALID_ID) {
dd = ((DB_LOCKER *)R_ADDR(&lt->reginfo,
lockerp->master_locker))->dd_id;
lockerp->dd_id = dd;
if (atype == DB_LOCK_MINLOCKS ||
atype == DB_LOCK_MAXLOCKS)
id_array[dd].count += lockerp->nlocks;
if (atype == DB_LOCK_MINWRITE)
id_array[dd].count += lockerp->nwrites;
} else
2001-03-04 19:42:05 -05:00
dd = lockerp->dd_id;
id_array[dd].valid = 1;
/*
* If the holder has already been aborted, then
* we should ignore it for now.
*/
if (lp->status == DB_LSTAT_HELD)
SET_MAP(tmpmap, dd);
}
/*
* Next, for each waiter, we set its row in the matrix
* equal to the map of holders we set up above.
*/
2002-10-30 15:57:05 +04:00
look_waiters:
2001-03-04 19:42:05 -05:00
for (is_first = 1,
lp = SH_TAILQ_FIRST(&op->waiters, __db_lock);
lp != NULL;
is_first = 0,
lp = SH_TAILQ_NEXT(lp, links, __db_lock)) {
LOCKER_LOCK(lt, region, lp->holder, ndx);
if ((ret = __lock_getlocker(lt,
lp->holder, ndx, 0, &lockerp)) != 0)
continue;
2002-10-30 15:57:05 +04:00
if (lp->status == DB_LSTAT_WAITING) {
if (__lock_expired(dbenv,
&now, &lockerp->lk_expire)) {
lp->status = DB_LSTAT_EXPIRED;
MUTEX_UNLOCK(dbenv, &lp->mutex);
continue;
}
need_timeout =
LOCK_TIME_ISVALID(&lockerp->lk_expire);
}
if (expire_only)
continue;
if (lockerp->dd_id == DD_INVALID_ID) {
dd = ((DB_LOCKER *)R_ADDR(&lt->reginfo,
lockerp->master_locker))->dd_id;
lockerp->dd_id = dd;
if (atype == DB_LOCK_MINLOCKS ||
atype == DB_LOCK_MAXLOCKS)
id_array[dd].count += lockerp->nlocks;
if (atype == DB_LOCK_MINWRITE)
id_array[dd].count += lockerp->nwrites;
} else
2001-03-04 19:42:05 -05:00
dd = lockerp->dd_id;
id_array[dd].valid = 1;
/*
* If the transaction is pending abortion, then
* ignore it on this iteration.
*/
if (lp->status != DB_LSTAT_WAITING)
continue;
entryp = bitmap + (nentries * dd);
OR_MAP(entryp, tmpmap, nentries);
/*
* If this is the first waiter on the queue,
* then we remove the waitsfor relationship
* with oneself. However, if it's anywhere
* else on the queue, then we have to keep
* it and we have an automatic deadlock.
*/
2002-10-30 15:57:05 +04:00
if (is_first) {
if (ISSET_MAP(entryp, dd))
id_array[dd].self_wait = 1;
2001-03-04 19:42:05 -05:00
CLR_MAP(entryp, dd);
2002-10-30 15:57:05 +04:00
}
2001-03-04 19:42:05 -05:00
}
}
2002-10-30 15:57:05 +04:00
if (expire_only) {
region->need_dd = need_timeout;
return (0);
}
2001-03-04 19:42:05 -05:00
/* Now for each locker; record its last lock. */
for (id = 0; id < count; id++) {
if (!id_array[id].valid)
continue;
LOCKER_LOCK(lt, region, id_array[id].id, ndx);
if ((ret = __lock_getlocker(lt,
id_array[id].id, ndx, 0, &lockerp)) != 0) {
__db_err(dbenv,
"No locks for locker %lu", (u_long)id_array[id].id);
continue;
}
/*
* If this is a master transaction, try to
* find one of its children's locks first,
* as they are probably more recent.
*/
child = SH_LIST_FIRST(&lockerp->child_locker, __db_locker);
if (child != NULL) {
do {
lp = SH_LIST_FIRST(&child->heldby, __db_lock);
if (lp != NULL &&
2002-10-30 15:57:05 +04:00
lp->status == DB_LSTAT_WAITING) {
2001-03-04 19:42:05 -05:00
id_array[id].last_locker_id = child->id;
goto get_lock;
}
child = SH_LIST_NEXT(
child, child_link, __db_locker);
} while (child != NULL);
}
lp = SH_LIST_FIRST(&lockerp->heldby, __db_lock);
if (lp != NULL) {
id_array[id].last_locker_id = lockerp->id;
get_lock: id_array[id].last_lock = R_OFFSET(&lt->reginfo, lp);
lo = (DB_LOCKOBJ *)((u_int8_t *)lp + lp->obj);
pptr = SH_DBT_PTR(&lo->lockobj);
if (lo->lockobj.size >= sizeof(db_pgno_t))
memcpy(&id_array[id].pgno,
pptr, sizeof(db_pgno_t));
else
id_array[id].pgno = 0;
}
}
2002-10-30 15:57:05 +04:00
/*
* Pass complete, reset the deadlock detector bit,
* unless we have pending timeouts.
*/
region->need_dd = need_timeout;
2001-03-04 19:42:05 -05:00
/*
* Now we can release everything except the bitmap matrix that we
* created.
*/
*nlockers = id;
*idmap = id_array;
*bmp = bitmap;
2002-10-30 15:57:05 +04:00
*allocp = nentries;
__os_free(dbenv, tmpmap);
2001-03-04 19:42:05 -05:00
return (0);
}
static int
2002-10-30 15:57:05 +04:00
__dd_find(dbenv, bmp, idmap, nlockers, nalloc, deadp)
2001-03-04 19:42:05 -05:00
DB_ENV *dbenv;
2002-10-30 15:57:05 +04:00
u_int32_t *bmp, nlockers, nalloc;
2001-03-04 19:42:05 -05:00
locker_info *idmap;
u_int32_t ***deadp;
{
2002-10-30 15:57:05 +04:00
u_int32_t i, j, k, *mymap, *tmpmap;
2001-03-04 19:42:05 -05:00
u_int32_t **retp;
int ndead, ndeadalloc, ret;
#undef INITIAL_DEAD_ALLOC
#define INITIAL_DEAD_ALLOC 8
ndeadalloc = INITIAL_DEAD_ALLOC;
ndead = 0;
if ((ret = __os_malloc(dbenv,
2002-10-30 15:57:05 +04:00
ndeadalloc * sizeof(u_int32_t *), &retp)) != 0)
2001-03-04 19:42:05 -05:00
return (ret);
/*
* For each locker, OR in the bits from the lockers on which that
* locker is waiting.
*/
2002-10-30 15:57:05 +04:00
for (mymap = bmp, i = 0; i < nlockers; i++, mymap += nalloc) {
2001-03-04 19:42:05 -05:00
if (!idmap[i].valid)
continue;
for (j = 0; j < nlockers; j++) {
if (!ISSET_MAP(mymap, j))
continue;
/* Find the map for this bit. */
2002-10-30 15:57:05 +04:00
tmpmap = bmp + (nalloc * j);
OR_MAP(mymap, tmpmap, nalloc);
2001-03-04 19:42:05 -05:00
if (!ISSET_MAP(mymap, i))
continue;
/* Make sure we leave room for NULL. */
if (ndead + 2 >= ndeadalloc) {
ndeadalloc <<= 1;
/*
* If the alloc fails, then simply return the
* deadlocks that we already have.
*/
if (__os_realloc(dbenv,
ndeadalloc * sizeof(u_int32_t),
2002-10-30 15:57:05 +04:00
&retp) != 0) {
2001-03-04 19:42:05 -05:00
retp[ndead] = NULL;
*deadp = retp;
return (0);
}
}
retp[ndead++] = mymap;
/* Mark all participants in this deadlock invalid. */
for (k = 0; k < nlockers; k++)
if (ISSET_MAP(mymap, k))
idmap[k].valid = 0;
break;
}
}
retp[ndead] = NULL;
*deadp = retp;
return (0);
}
static int
__dd_abort(dbenv, info)
DB_ENV *dbenv;
locker_info *info;
{
struct __db_lock *lockp;
DB_LOCKER *lockerp;
DB_LOCKOBJ *sh_obj;
DB_LOCKREGION *region;
DB_LOCKTAB *lt;
u_int32_t ndx;
int ret;
lt = dbenv->lk_handle;
region = lt->reginfo.primary;
LOCKREGION(dbenv, lt);
2002-10-30 15:57:05 +04:00
2001-03-04 19:42:05 -05:00
/* Find the locker's last lock. */
LOCKER_LOCK(lt, region, info->last_locker_id, ndx);
if ((ret = __lock_getlocker(lt,
info->last_locker_id, ndx, 0, &lockerp)) != 0 || lockerp == NULL) {
if (ret == 0)
ret = DB_ALREADY_ABORTED;
goto out;
}
2002-10-30 15:57:05 +04:00
/* It's possible that this locker was already aborted. */
if ((lockp = SH_LIST_FIRST(&lockerp->heldby, __db_lock)) == NULL) {
ret = DB_ALREADY_ABORTED;
goto out;
}
if (R_OFFSET(&lt->reginfo, lockp) != info->last_lock ||
2001-03-04 19:42:05 -05:00
lockp->status != DB_LSTAT_WAITING) {
ret = DB_ALREADY_ABORTED;
goto out;
}
sh_obj = (DB_LOCKOBJ *)((u_int8_t *)lockp + lockp->obj);
SH_LIST_REMOVE(lockp, locker_links, __db_lock);
/* Abort lock, take it off list, and wake up this lock. */
SHOBJECT_LOCK(lt, region, sh_obj, ndx);
lockp->status = DB_LSTAT_ABORTED;
SH_TAILQ_REMOVE(&sh_obj->waiters, lockp, links, __db_lock);
/*
* Either the waiters list is now empty, in which case we remove
* it from dd_objs, or it is not empty, in which case we need to
* do promotion.
*/
if (SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock) == NULL)
SH_TAILQ_REMOVE(&region->dd_objs,
sh_obj, dd_links, __db_lockobj);
else
ret = __lock_promote(lt, sh_obj, 0);
MUTEX_UNLOCK(dbenv, &lockp->mutex);
2002-10-30 15:57:05 +04:00
region->stat.st_ndeadlocks++;
2001-03-04 19:42:05 -05:00
UNLOCKREGION(dbenv, lt);
return (0);
out: UNLOCKREGION(dbenv, lt);
return (ret);
}
#ifdef DIAGNOSTIC
static void
2002-10-30 15:57:05 +04:00
__dd_debug(dbenv, idmap, bitmap, nlockers, nalloc)
2001-03-04 19:42:05 -05:00
DB_ENV *dbenv;
locker_info *idmap;
2002-10-30 15:57:05 +04:00
u_int32_t *bitmap, nlockers, nalloc;
2001-03-04 19:42:05 -05:00
{
2002-10-30 15:57:05 +04:00
u_int32_t i, j, *mymap;
2001-03-04 19:42:05 -05:00
char *msgbuf;
__db_err(dbenv, "Waitsfor array\nWaiter:\tWaiting on:");
/* Allocate space to print 10 bytes per item waited on. */
#undef MSGBUF_LEN
#define MSGBUF_LEN ((nlockers + 1) * 10 + 64)
2002-10-30 15:57:05 +04:00
if (__os_malloc(dbenv, MSGBUF_LEN, &msgbuf) != 0)
2001-03-04 19:42:05 -05:00
return;
2002-10-30 15:57:05 +04:00
for (mymap = bitmap, i = 0; i < nlockers; i++, mymap += nalloc) {
2001-03-04 19:42:05 -05:00
if (!idmap[i].valid)
continue;
sprintf(msgbuf, /* Waiter. */
"%lx/%lu:\t", (u_long)idmap[i].id, (u_long)idmap[i].pgno);
for (j = 0; j < nlockers; j++)
if (ISSET_MAP(mymap, j))
sprintf(msgbuf, "%s %lx", msgbuf,
(u_long)idmap[j].id);
(void)sprintf(msgbuf,
"%s %lu", msgbuf, (u_long)idmap[i].last_lock);
__db_err(dbenv, msgbuf);
}
2002-10-30 15:57:05 +04:00
__os_free(dbenv, msgbuf);
2001-03-04 19:42:05 -05:00
}
#endif
2002-10-30 15:57:05 +04:00
/*
* Given a bitmap that contains a deadlock, verify that the bit
* specified in the which parameter indicates a transaction that
* is actually deadlocked. Return 1 if really deadlocked, 0 otherwise.
* deadmap is the array that identified the deadlock.
* tmpmap is a copy of the initial bitmaps from the dd_build phase
* origmap is a temporary bit map into which we can OR things
* nlockers is the number of actual lockers under consideration
* nalloc is the number of words allocated for the bitmap
* which is the locker in question
*/
static int
__dd_verify(idmap, deadmap, tmpmap, origmap, nlockers, nalloc, which)
locker_info *idmap;
u_int32_t *deadmap, *tmpmap, *origmap;
u_int32_t nlockers, nalloc, which;
{
u_int32_t *tmap;
u_int32_t j;
int count;
memset(tmpmap, 0, sizeof(u_int32_t) * nalloc);
/*
* In order for "which" to be actively involved in
* the deadlock, removing him from the evaluation
* must remove the deadlock. So, we OR together everyone
* except which; if all the participants still have their
* bits set, then the deadlock persists and which does
* not participate. If the deadlock does not persist
* then "which" does participate.
*/
count = 0;
for (j = 0; j < nlockers; j++) {
if (!ISSET_MAP(deadmap, j) || j == which)
continue;
/* Find the map for this bit. */
tmap = origmap + (nalloc * j);
/*
* We special case the first waiter who is also a holder, so
* we don't automatically call that a deadlock. However, if
* it really is a deadlock, we need the bit set now so that
* we treat the first waiter like other waiters.
*/
if (idmap[j].self_wait)
SET_MAP(tmap, j);
OR_MAP(tmpmap, tmap, nalloc);
count++;
}
if (count == 1)
return (1);
/*
* Now check the resulting map and see whether
* all participants still have their bit set.
*/
for (j = 0; j < nlockers; j++) {
if (!ISSET_MAP(deadmap, j) || j == which)
continue;
if (!ISSET_MAP(tmpmap, j))
return (1);
}
return (0);
}
/*
* __dd_isolder --
*
* Figure out the relative age of two lockers. We make all lockers
* older than all transactions, because that's how it's worked
* historically (because lockers are lower ids).
*/
static int
__dd_isolder(a, b, lock_max, txn_max)
u_int32_t a, b;
u_int32_t lock_max, txn_max;
{
u_int32_t max;
/* Check for comparing lock-id and txnid. */
if (a <= DB_LOCK_MAXID && b > DB_LOCK_MAXID)
return (1);
if (b <= DB_LOCK_MAXID && a > DB_LOCK_MAXID)
return (0);
/* In the same space; figure out which one. */
max = txn_max;
if (a <= DB_LOCK_MAXID)
max = lock_max;
/*
* We can't get a 100% correct ordering, because we don't know
* where the current interval started and if there were older
* lockers outside the interval. We do the best we can.
*/
/*
* Check for a wrapped case with ids above max.
*/
if (a > max && b < max)
return (1);
if (b > max && a < max)
return (0);
return (a < b);
}