2001-03-04 19:42:05 -05:00
|
|
|
/*-
|
|
|
|
* See the file LICENSE for redistribution information.
|
|
|
|
*
|
2005-12-05 10:27:46 -08:00
|
|
|
* Copyright (c) 1996-2005
|
2001-03-04 19:42:05 -05:00
|
|
|
* Sleepycat Software. All rights reserved.
|
2005-07-20 15:48:22 -07:00
|
|
|
*
|
2005-12-05 10:27:46 -08:00
|
|
|
* $Id: env_region.c,v 12.13 2005/10/21 19:13:01 bostic Exp $
|
2001-03-04 19:42:05 -05:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include "db_config.h"
|
|
|
|
|
|
|
|
#ifndef NO_SYSTEM_INCLUDES
|
|
|
|
#include <sys/types.h>
|
|
|
|
|
2005-12-05 10:27:46 -08:00
|
|
|
#if TIME_WITH_SYS_TIME
|
|
|
|
#include <sys/time.h>
|
|
|
|
#include <time.h>
|
|
|
|
#else
|
|
|
|
#if HAVE_SYS_TIME_H
|
|
|
|
#include <sys/time.h>
|
|
|
|
#else
|
|
|
|
#include <time.h>
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
2001-03-04 19:42:05 -05:00
|
|
|
#include <string.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include "db_int.h"
|
2002-10-30 15:57:05 +04:00
|
|
|
#include "dbinc/db_shash.h"
|
2005-07-20 15:48:22 -07:00
|
|
|
#include "dbinc/crypto.h"
|
2002-10-30 15:57:05 +04:00
|
|
|
#include "dbinc/mp.h"
|
2001-03-04 19:42:05 -05:00
|
|
|
|
2005-12-05 10:27:46 -08:00
|
|
|
static void __db_des_destroy __P((DB_ENV *, REGION *));
|
2001-03-04 19:42:05 -05:00
|
|
|
static int __db_des_get __P((DB_ENV *, REGINFO *, REGINFO *, REGION **));
|
|
|
|
static int __db_e_remfile __P((DB_ENV *));
|
2002-10-30 15:57:05 +04:00
|
|
|
static int __db_faultmem __P((DB_ENV *, void *, size_t, int));
|
2001-03-04 19:42:05 -05:00
|
|
|
|
|
|
|
/*
|
|
|
|
* __db_e_attach
|
|
|
|
* Join/create the environment
|
|
|
|
*
|
|
|
|
* PUBLIC: int __db_e_attach __P((DB_ENV *, u_int32_t *));
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
__db_e_attach(dbenv, init_flagsp)
|
|
|
|
DB_ENV *dbenv;
|
|
|
|
u_int32_t *init_flagsp;
|
|
|
|
{
|
|
|
|
REGENV *renv;
|
|
|
|
REGENV_REF ref;
|
|
|
|
REGINFO *infop;
|
|
|
|
REGION *rp, tregion;
|
|
|
|
size_t size;
|
|
|
|
size_t nrw;
|
2005-12-05 10:27:46 -08:00
|
|
|
u_int32_t bytes, i, mbytes, nregions;
|
2005-07-20 15:48:22 -07:00
|
|
|
u_int retry_cnt;
|
2005-12-05 10:27:46 -08:00
|
|
|
int majver, minver, patchver, ret, segid;
|
2001-03-04 19:42:05 -05:00
|
|
|
char buf[sizeof(DB_REGION_FMT) + 20];
|
|
|
|
|
|
|
|
/* Initialization */
|
|
|
|
retry_cnt = 0;
|
|
|
|
|
|
|
|
/* Repeated initialization. */
|
|
|
|
loop: renv = NULL;
|
|
|
|
|
|
|
|
/* Set up the DB_ENV's REG_INFO structure. */
|
|
|
|
if ((ret = __os_calloc(dbenv, 1, sizeof(REGINFO), &infop)) != 0)
|
|
|
|
return (ret);
|
2005-07-20 15:48:22 -07:00
|
|
|
infop->dbenv = dbenv;
|
2001-03-04 19:42:05 -05:00
|
|
|
infop->type = REGION_TYPE_ENV;
|
|
|
|
infop->id = REGION_ID_ENV;
|
|
|
|
infop->flags = REGION_JOIN_OK;
|
|
|
|
if (F_ISSET(dbenv, DB_ENV_CREATE))
|
|
|
|
F_SET(infop, REGION_CREATE_OK);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We have to single-thread the creation of the REGENV region. Once
|
2005-12-05 10:27:46 -08:00
|
|
|
* it exists, we can serialize using region mutexes, but until then
|
|
|
|
* we have to be the only player in the game.
|
2001-03-04 19:42:05 -05:00
|
|
|
*
|
|
|
|
* If this is a private environment, we are only called once and there
|
|
|
|
* are no possible race conditions.
|
|
|
|
*
|
|
|
|
* If this is a public environment, we use the filesystem to ensure
|
|
|
|
* the creation of the environment file is single-threaded.
|
|
|
|
*/
|
2002-10-30 15:57:05 +04:00
|
|
|
if (F_ISSET(dbenv, DB_ENV_PRIVATE)) {
|
|
|
|
if ((ret = __os_strdup(dbenv,
|
|
|
|
"process-private", &infop->name)) != 0)
|
|
|
|
goto err;
|
2001-03-04 19:42:05 -05:00
|
|
|
goto creation;
|
2002-10-30 15:57:05 +04:00
|
|
|
}
|
2001-03-04 19:42:05 -05:00
|
|
|
|
|
|
|
/* Build the region name. */
|
|
|
|
(void)snprintf(buf, sizeof(buf), "%s", DB_REGION_ENV);
|
|
|
|
if ((ret = __db_appname(dbenv,
|
2002-10-30 15:57:05 +04:00
|
|
|
DB_APP_NONE, buf, 0, NULL, &infop->name)) != 0)
|
2001-03-04 19:42:05 -05:00
|
|
|
goto err;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Try to create the file, if we have the authority. We have to ensure
|
|
|
|
* that multiple threads/processes attempting to simultaneously create
|
|
|
|
* the file are properly ordered. Open using the O_CREAT and O_EXCL
|
|
|
|
* flags so that multiple attempts to create the region will return
|
|
|
|
* failure in all but one. POSIX 1003.1 requires that EEXIST be the
|
|
|
|
* errno return value -- I sure hope they're right.
|
|
|
|
*/
|
|
|
|
if (F_ISSET(dbenv, DB_ENV_CREATE)) {
|
2002-10-30 15:57:05 +04:00
|
|
|
if ((ret = __os_open(dbenv, infop->name,
|
2005-07-20 15:48:22 -07:00
|
|
|
DB_OSO_CREATE | DB_OSO_EXCL | DB_OSO_REGION,
|
|
|
|
dbenv->db_mode, &dbenv->lockfhp)) == 0)
|
2001-03-04 19:42:05 -05:00
|
|
|
goto creation;
|
|
|
|
if (ret != EEXIST) {
|
|
|
|
__db_err(dbenv,
|
|
|
|
"%s: %s", infop->name, db_strerror(ret));
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we couldn't create the file, try and open it. (If that fails,
|
|
|
|
* we're done.)
|
|
|
|
*/
|
2005-07-20 15:48:22 -07:00
|
|
|
if ((ret = __os_open(
|
|
|
|
dbenv, infop->name, DB_OSO_REGION, 0, &dbenv->lockfhp)) != 0)
|
2001-03-04 19:42:05 -05:00
|
|
|
goto err;
|
|
|
|
|
2005-07-20 15:48:22 -07:00
|
|
|
/* The region exists, it's not okay to recreate it. */
|
|
|
|
F_CLR(infop, REGION_CREATE_OK);
|
|
|
|
|
2001-03-04 19:42:05 -05:00
|
|
|
/*
|
|
|
|
* !!!
|
|
|
|
* The region may be in system memory not backed by the filesystem
|
|
|
|
* (more specifically, not backed by this file), and we're joining
|
|
|
|
* it. In that case, the process that created it will have written
|
|
|
|
* out a REGENV_REF structure as its only contents. We read that
|
|
|
|
* structure before we do anything further, e.g., we can't just map
|
|
|
|
* that file in and then figure out what's going on.
|
|
|
|
*
|
|
|
|
* All of this noise is because some systems don't have a coherent VM
|
|
|
|
* and buffer cache, and what's worse, when you mix operations on the
|
|
|
|
* VM and buffer cache, half the time you hang the system.
|
|
|
|
*
|
|
|
|
* If the file is the size of an REGENV_REF structure, then we know
|
|
|
|
* the real region is in some other memory. (The only way you get a
|
|
|
|
* file that size is to deliberately write it, as it's smaller than
|
|
|
|
* any possible disk sector created by writing a file or mapping the
|
|
|
|
* file into memory.) In which case, retrieve the structure from the
|
|
|
|
* file and use it to acquire the referenced memory.
|
|
|
|
*
|
|
|
|
* If the structure is larger than a REGENV_REF structure, then this
|
|
|
|
* file is backing the shared memory region, and we just map it into
|
|
|
|
* memory.
|
|
|
|
*
|
|
|
|
* And yes, this makes me want to take somebody and kill them. (I
|
|
|
|
* digress -- but you have no freakin' idea. This is unbelievably
|
|
|
|
* stupid and gross, and I've probably spent six months of my life,
|
|
|
|
* now, trying to make different versions of it work.)
|
|
|
|
*/
|
|
|
|
if ((ret = __os_ioinfo(dbenv, infop->name,
|
|
|
|
dbenv->lockfhp, &mbytes, &bytes, NULL)) != 0) {
|
|
|
|
__db_err(dbenv, "%s: %s", infop->name, db_strerror(ret));
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* !!!
|
|
|
|
* A size_t is OK -- regions get mapped into memory, and so can't
|
|
|
|
* be larger than a size_t.
|
|
|
|
*/
|
|
|
|
size = mbytes * MEGABYTE + bytes;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the size is less than the size of a REGENV_REF structure, the
|
|
|
|
* region (or, possibly, the REGENV_REF structure) has not yet been
|
2005-12-05 10:27:46 -08:00
|
|
|
* completely written. Shouldn't be possible, but there's no reason
|
|
|
|
* not to wait awhile and try again.
|
2001-03-04 19:42:05 -05:00
|
|
|
*
|
|
|
|
* Otherwise, if the size is the size of a REGENV_REF structure,
|
|
|
|
* read it into memory and use it as a reference to the real region.
|
|
|
|
*/
|
|
|
|
if (size <= sizeof(ref)) {
|
|
|
|
if (size != sizeof(ref))
|
|
|
|
goto retry;
|
|
|
|
|
|
|
|
if ((ret = __os_read(dbenv, dbenv->lockfhp, &ref,
|
|
|
|
sizeof(ref), &nrw)) != 0 || nrw < (size_t)sizeof(ref)) {
|
|
|
|
if (ret == 0)
|
|
|
|
ret = EIO;
|
|
|
|
__db_err(dbenv,
|
|
|
|
"%s: unable to read system-memory information from: %s",
|
|
|
|
infop->name, db_strerror(ret));
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
size = ref.size;
|
|
|
|
segid = ref.segid;
|
|
|
|
|
|
|
|
F_SET(dbenv, DB_ENV_SYSTEM_MEM);
|
|
|
|
} else if (F_ISSET(dbenv, DB_ENV_SYSTEM_MEM)) {
|
|
|
|
ret = EINVAL;
|
|
|
|
__db_err(dbenv,
|
|
|
|
"%s: existing environment not created in system memory: %s",
|
|
|
|
infop->name, db_strerror(ret));
|
|
|
|
goto err;
|
|
|
|
} else
|
|
|
|
segid = INVALID_REGION_SEGID;
|
|
|
|
|
2005-12-05 10:27:46 -08:00
|
|
|
#ifndef HAVE_MUTEX_FCNTL
|
2001-03-04 19:42:05 -05:00
|
|
|
/*
|
2005-12-05 10:27:46 -08:00
|
|
|
* If we're not doing fcntl locking, we can close the file handle. We
|
|
|
|
* no longer need it and the less contact between the buffer cache and
|
|
|
|
* the VM, the better.
|
2001-03-04 19:42:05 -05:00
|
|
|
*/
|
2005-07-20 15:48:22 -07:00
|
|
|
(void)__os_closehandle(dbenv, dbenv->lockfhp);
|
|
|
|
dbenv->lockfhp = NULL;
|
2001-03-04 19:42:05 -05:00
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Call the region join routine to acquire the region. */
|
|
|
|
memset(&tregion, 0, sizeof(tregion));
|
2002-10-30 15:57:05 +04:00
|
|
|
tregion.size = (roff_t)size;
|
2001-03-04 19:42:05 -05:00
|
|
|
tregion.segid = segid;
|
|
|
|
if ((ret = __os_r_attach(dbenv, infop, &tregion)) != 0)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The environment's REGENV structure has to live at offset 0 instead
|
|
|
|
* of the usual shalloc information. Set the primary reference and
|
|
|
|
* correct the "addr" value to reference the shalloc region. Note,
|
|
|
|
* this means that all of our offsets (R_ADDR/R_OFFSET) get shifted
|
|
|
|
* as well, but that should be fine.
|
|
|
|
*/
|
2005-07-20 15:48:22 -07:00
|
|
|
infop->primary = infop->addr;
|
2001-03-04 19:42:05 -05:00
|
|
|
infop->addr = (u_int8_t *)infop->addr + sizeof(REGENV);
|
2005-07-20 15:48:22 -07:00
|
|
|
renv = infop->primary;
|
|
|
|
|
2005-12-05 10:27:46 -08:00
|
|
|
/*
|
|
|
|
* Make sure the region matches our build. Special case a region
|
|
|
|
* that's all nul bytes, just treat it like any other corruption.
|
|
|
|
*
|
|
|
|
* !!!
|
|
|
|
* We don't display the major/minor version from the environment,
|
|
|
|
* because it may be in a different place in the two regions.
|
|
|
|
*/
|
2005-07-20 15:48:22 -07:00
|
|
|
if (renv->majver != DB_VERSION_MAJOR ||
|
|
|
|
renv->minver != DB_VERSION_MINOR) {
|
2005-12-05 10:27:46 -08:00
|
|
|
if (renv->majver != 0 || renv->minver != 0) {
|
|
|
|
__db_err(dbenv,
|
|
|
|
"Program version %d.%d doesn't match environment version %d.%d",
|
|
|
|
DB_VERSION_MAJOR, DB_VERSION_MINOR,
|
|
|
|
renv->majver, renv->minver);
|
|
|
|
ret = DB_VERSION_MISMATCH;
|
|
|
|
} else
|
|
|
|
ret = EINVAL;
|
2005-07-20 15:48:22 -07:00
|
|
|
goto err;
|
|
|
|
}
|
2001-03-04 19:42:05 -05:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Check if the environment has had a catastrophic failure.
|
|
|
|
*
|
|
|
|
* Check the magic number to ensure the region is initialized. If the
|
|
|
|
* magic number isn't set, the lock may not have been initialized, and
|
|
|
|
* an attempt to use it could lead to random behavior.
|
|
|
|
*
|
|
|
|
* The panic and magic values aren't protected by any lock, so we never
|
|
|
|
* use them in any check that's more complex than set/not-set.
|
|
|
|
*
|
|
|
|
* !!!
|
|
|
|
* I'd rather play permissions games using the underlying file, but I
|
|
|
|
* can't because Windows/NT filesystems won't open files mode 0.
|
|
|
|
*/
|
2005-12-05 10:27:46 -08:00
|
|
|
if (renv->panic && !F_ISSET(dbenv, DB_ENV_NOPANIC)) {
|
2001-03-04 19:42:05 -05:00
|
|
|
ret = __db_panic_msg(dbenv);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
if (renv->magic != DB_REGION_MAGIC)
|
|
|
|
goto retry;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get a reference to the underlying REGION information for this
|
|
|
|
* environment.
|
|
|
|
*/
|
2005-12-05 10:27:46 -08:00
|
|
|
if ((ret = __db_des_get(dbenv, infop, infop, &rp)) != 0 || rp == NULL)
|
2001-03-04 19:42:05 -05:00
|
|
|
goto find_err;
|
|
|
|
infop->rp = rp;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* There's still a possibility for inconsistent data. When we acquired
|
|
|
|
* the size of the region and attached to it, it might have still been
|
|
|
|
* growing as part of its creation. We can detect this by checking the
|
|
|
|
* size we originally found against the region's current size. (The
|
|
|
|
* region's current size has to be final, the creator finished growing
|
2005-12-05 10:27:46 -08:00
|
|
|
* it before setting the magic number in the region.)
|
2001-03-04 19:42:05 -05:00
|
|
|
*/
|
2005-12-05 10:27:46 -08:00
|
|
|
if (rp->size != size)
|
2001-03-04 19:42:05 -05:00
|
|
|
goto retry;
|
|
|
|
|
|
|
|
/* Increment the reference count. */
|
2005-12-05 10:27:46 -08:00
|
|
|
MUTEX_LOCK(dbenv, renv->mtx_regenv);
|
2001-03-04 19:42:05 -05:00
|
|
|
++renv->refcnt;
|
2005-12-05 10:27:46 -08:00
|
|
|
MUTEX_UNLOCK(dbenv, renv->mtx_regenv);
|
2001-03-04 19:42:05 -05:00
|
|
|
|
|
|
|
/*
|
2005-12-05 10:27:46 -08:00
|
|
|
* Check our callers configuration flags, it's an error to configure
|
|
|
|
* incompatible or additional subsystems in an existing environment.
|
|
|
|
* Return the total set of flags to the caller so they initialize the
|
|
|
|
* correct set of subsystems.
|
2001-03-04 19:42:05 -05:00
|
|
|
*/
|
2005-07-20 15:48:22 -07:00
|
|
|
if (init_flagsp != NULL) {
|
2005-12-05 10:27:46 -08:00
|
|
|
FLD_CLR(*init_flagsp, renv->init_flags);
|
|
|
|
if (*init_flagsp != 0) {
|
|
|
|
__db_err(dbenv,
|
|
|
|
"configured environment flags incompatible with existing environment");
|
|
|
|
ret = EINVAL;
|
|
|
|
goto err;
|
|
|
|
}
|
2001-03-04 19:42:05 -05:00
|
|
|
*init_flagsp = renv->init_flags;
|
2005-07-20 15:48:22 -07:00
|
|
|
}
|
2001-03-04 19:42:05 -05:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Fault the pages into memory. Note, do this AFTER releasing the
|
|
|
|
* lock, because we're only reading the pages, not writing them.
|
|
|
|
*/
|
2002-10-30 15:57:05 +04:00
|
|
|
(void)__db_faultmem(dbenv, infop->primary, rp->size, 0);
|
2001-03-04 19:42:05 -05:00
|
|
|
|
|
|
|
/* Everything looks good, we're done. */
|
|
|
|
dbenv->reginfo = infop;
|
|
|
|
return (0);
|
|
|
|
|
|
|
|
creation:
|
|
|
|
/* Create the environment region. */
|
|
|
|
F_SET(infop, REGION_CREATE);
|
|
|
|
|
|
|
|
/*
|
2005-12-05 10:27:46 -08:00
|
|
|
* Allocate room for REGION structures plus overhead.
|
2002-10-30 15:57:05 +04:00
|
|
|
*
|
2005-12-05 10:27:46 -08:00
|
|
|
* XXX
|
|
|
|
* Overhead is so high because encryption passwds are stored in the
|
|
|
|
* base environment region, as are replication vote arrays. This is
|
|
|
|
* a bug, not a feature, replication needs its own region.
|
2001-03-04 19:42:05 -05:00
|
|
|
*/
|
|
|
|
memset(&tregion, 0, sizeof(tregion));
|
2005-12-05 10:27:46 -08:00
|
|
|
nregions = dbenv->mp_ncache + 10;
|
|
|
|
tregion.size =
|
|
|
|
(roff_t)(nregions * sizeof(REGION) + dbenv->passwd_len + 16 * 1024);
|
2001-03-04 19:42:05 -05:00
|
|
|
tregion.segid = INVALID_REGION_SEGID;
|
|
|
|
if ((ret = __os_r_attach(dbenv, infop, &tregion)) != 0)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Fault the pages into memory. Note, do this BEFORE we initialize
|
|
|
|
* anything, because we're writing the pages, not just reading them.
|
|
|
|
*/
|
2002-10-30 15:57:05 +04:00
|
|
|
(void)__db_faultmem(dbenv, infop->addr, tregion.size, 1);
|
2001-03-04 19:42:05 -05:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The first object in the region is the REGENV structure. This is
|
|
|
|
* different from the other regions, and, from everything else in
|
|
|
|
* this region, where all objects are allocated from the pool, i.e.,
|
|
|
|
* there aren't any fixed locations. The remaining space is made
|
|
|
|
* available for later allocation.
|
|
|
|
*
|
|
|
|
* The allocation space must be size_t aligned, because that's what
|
|
|
|
* the initialization routine is going to store there. To make sure
|
|
|
|
* that happens, the REGENV structure was padded with a final size_t.
|
|
|
|
* No other region needs to worry about it because all of them treat
|
|
|
|
* the entire region as allocation space.
|
|
|
|
*
|
|
|
|
* Set the primary reference and correct the "addr" value to reference
|
|
|
|
* the shalloc region. Note, this requires that we "uncorrect" it at
|
|
|
|
* region detach, and that all of our offsets (R_ADDR/R_OFFSET) will be
|
|
|
|
* shifted as well, but that should be fine.
|
|
|
|
*/
|
2005-07-20 15:48:22 -07:00
|
|
|
infop->primary = infop->addr;
|
2001-03-04 19:42:05 -05:00
|
|
|
infop->addr = (u_int8_t *)infop->addr + sizeof(REGENV);
|
2005-07-20 15:48:22 -07:00
|
|
|
__db_shalloc_init(infop, tregion.size - sizeof(REGENV));
|
2001-03-04 19:42:05 -05:00
|
|
|
|
|
|
|
/*
|
2005-12-05 10:27:46 -08:00
|
|
|
* Initialize the rest of the REGENV structure. (Don't set the magic
|
|
|
|
* number to the correct value, that would validate the environment).
|
2001-03-04 19:42:05 -05:00
|
|
|
*/
|
|
|
|
renv = infop->primary;
|
2005-12-05 10:27:46 -08:00
|
|
|
renv->magic = 0;
|
|
|
|
renv->panic = 0;
|
|
|
|
|
|
|
|
(void)db_version(&majver, &minver, &patchver);
|
|
|
|
renv->majver = (u_int32_t)majver;
|
|
|
|
renv->minver = (u_int32_t)minver;
|
|
|
|
renv->patchver = (u_int32_t)patchver;
|
|
|
|
|
|
|
|
(void)time(&renv->timestamp);
|
2005-07-20 15:48:22 -07:00
|
|
|
__os_unique_id(dbenv, &renv->envid);
|
2005-12-05 10:27:46 -08:00
|
|
|
|
|
|
|
if ((ret = __mutex_alloc(
|
|
|
|
dbenv, MTX_ENV_REGION, 0, &renv->mtx_regenv)) != 0)
|
|
|
|
goto err;
|
2001-03-04 19:42:05 -05:00
|
|
|
renv->refcnt = 1;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize init_flags to store the flags that any other environment
|
|
|
|
* handle that uses DB_JOINENV to join this environment will need.
|
|
|
|
*/
|
|
|
|
renv->init_flags = (init_flagsp == NULL) ? 0 : *init_flagsp;
|
|
|
|
|
|
|
|
/*
|
2005-12-05 10:27:46 -08:00
|
|
|
* Set up the region array. We use an array rather than a linked list
|
|
|
|
* as we have to traverse this list after failure in some cases, and
|
|
|
|
* we don't want to infinitely loop should the application fail while
|
|
|
|
* we're manipulating the list.
|
2001-03-04 19:42:05 -05:00
|
|
|
*/
|
2005-12-05 10:27:46 -08:00
|
|
|
renv->region_cnt = nregions;
|
|
|
|
if ((ret =
|
|
|
|
__db_shalloc(infop, nregions * sizeof(REGION), 0, &rp)) != 0) {
|
|
|
|
__db_err(dbenv, "unable to create new master region array: %s",
|
|
|
|
db_strerror(ret));
|
2001-03-04 19:42:05 -05:00
|
|
|
goto err;
|
|
|
|
}
|
2005-12-05 10:27:46 -08:00
|
|
|
renv->region_off = R_OFFSET(infop, rp);
|
|
|
|
for (i = 0; i < nregions; ++i, ++rp)
|
|
|
|
rp->id = INVALID_REGION_ID;
|
2001-03-04 19:42:05 -05:00
|
|
|
|
2005-12-05 10:27:46 -08:00
|
|
|
renv->cipher_off = INVALID_ROFF;
|
|
|
|
|
|
|
|
renv->rep_off = INVALID_ROFF;
|
|
|
|
renv->flags = 0;
|
|
|
|
renv->op_timestamp = renv->rep_timestamp = 0;
|
2001-03-04 19:42:05 -05:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Get the underlying REGION structure for this environment. Note,
|
|
|
|
* we created the underlying OS region before we acquired the REGION
|
|
|
|
* structure, which is backwards from the normal procedure. Update
|
|
|
|
* the REGION structure.
|
|
|
|
*/
|
|
|
|
if ((ret = __db_des_get(dbenv, infop, infop, &rp)) != 0) {
|
2005-12-05 10:27:46 -08:00
|
|
|
find_err: __db_err(dbenv, "%s: unable to find environment", infop->name);
|
2001-03-04 19:42:05 -05:00
|
|
|
if (ret == 0)
|
|
|
|
ret = EINVAL;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
infop->rp = rp;
|
|
|
|
rp->size = tregion.size;
|
|
|
|
rp->segid = tregion.segid;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* !!!
|
|
|
|
* If we create an environment where regions are public and in system
|
|
|
|
* memory, we have to inform processes joining the environment how to
|
|
|
|
* attach to the shared memory segment. So, we write the shared memory
|
|
|
|
* identifier into the file, to be read by those other processes.
|
|
|
|
*
|
|
|
|
* XXX
|
|
|
|
* This is really OS-layer information, but I can't see any easy way
|
|
|
|
* to move it down there without passing down information that it has
|
|
|
|
* no right to know, e.g., that this is the one-and-only REGENV region
|
|
|
|
* and not some other random region.
|
|
|
|
*/
|
|
|
|
if (tregion.segid != INVALID_REGION_SEGID) {
|
|
|
|
ref.size = tregion.size;
|
|
|
|
ref.segid = tregion.segid;
|
2002-10-30 15:57:05 +04:00
|
|
|
if ((ret = __os_write(
|
|
|
|
dbenv, dbenv->lockfhp, &ref, sizeof(ref), &nrw)) != 0) {
|
2001-03-04 19:42:05 -05:00
|
|
|
__db_err(dbenv,
|
|
|
|
"%s: unable to write out public environment ID: %s",
|
|
|
|
infop->name, db_strerror(ret));
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-12-05 10:27:46 -08:00
|
|
|
#ifndef HAVE_MUTEX_FCNTL
|
2001-03-04 19:42:05 -05:00
|
|
|
/*
|
2005-12-05 10:27:46 -08:00
|
|
|
* If we're not doing fcntl locking, we can close the file handle. We
|
|
|
|
* no longer need it and the less contact between the buffer cache and
|
|
|
|
* the VM, the better.
|
2001-03-04 19:42:05 -05:00
|
|
|
*/
|
2005-07-20 15:48:22 -07:00
|
|
|
if (dbenv->lockfhp != NULL) {
|
|
|
|
(void)__os_closehandle(dbenv, dbenv->lockfhp);
|
|
|
|
dbenv->lockfhp = NULL;
|
|
|
|
}
|
2001-03-04 19:42:05 -05:00
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Everything looks good, we're done. */
|
|
|
|
dbenv->reginfo = infop;
|
|
|
|
return (0);
|
|
|
|
|
|
|
|
err:
|
|
|
|
retry: /* Close any open file handle. */
|
2005-07-20 15:48:22 -07:00
|
|
|
if (dbenv->lockfhp != NULL) {
|
2002-10-30 15:57:05 +04:00
|
|
|
(void)__os_closehandle(dbenv, dbenv->lockfhp);
|
2005-07-20 15:48:22 -07:00
|
|
|
dbenv->lockfhp = NULL;
|
|
|
|
}
|
2001-03-04 19:42:05 -05:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If we joined or created the region, detach from it. If we created
|
|
|
|
* it, destroy it. Note, there's a path in the above code where we're
|
|
|
|
* using a temporary REGION structure because we haven't yet allocated
|
|
|
|
* the real one. In that case the region address (addr) will be filled
|
|
|
|
* in, but the REGION pointer (rp) won't. Fix it.
|
|
|
|
*/
|
|
|
|
if (infop->addr != NULL) {
|
|
|
|
if (infop->rp == NULL)
|
|
|
|
infop->rp = &tregion;
|
|
|
|
|
|
|
|
/* Reset the addr value that we "corrected" above. */
|
|
|
|
infop->addr = infop->primary;
|
|
|
|
(void)__os_r_detach(dbenv,
|
|
|
|
infop, F_ISSET(infop, REGION_CREATE));
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Free the allocated name and/or REGINFO structure. */
|
|
|
|
if (infop->name != NULL)
|
2002-10-30 15:57:05 +04:00
|
|
|
__os_free(dbenv, infop->name);
|
|
|
|
__os_free(dbenv, infop);
|
2001-03-04 19:42:05 -05:00
|
|
|
|
|
|
|
/* If we had a temporary error, wait awhile and try again. */
|
|
|
|
if (ret == 0) {
|
|
|
|
if (++retry_cnt > 3) {
|
|
|
|
__db_err(dbenv, "unable to join the environment");
|
|
|
|
ret = EAGAIN;
|
|
|
|
} else {
|
|
|
|
__os_sleep(dbenv, retry_cnt * 3, 0);
|
|
|
|
goto loop;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
2005-12-05 10:27:46 -08:00
|
|
|
/*
|
|
|
|
* __db_e_golive --
|
|
|
|
* Turn on the created environment.
|
|
|
|
*
|
|
|
|
* PUBLIC: int __db_e_golive __P((DB_ENV *));
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
__db_e_golive(dbenv)
|
|
|
|
DB_ENV *dbenv;
|
|
|
|
{
|
|
|
|
REGENV *renv;
|
|
|
|
REGINFO *infop;
|
|
|
|
|
|
|
|
infop = dbenv->reginfo;
|
|
|
|
renv = infop->primary;
|
|
|
|
|
|
|
|
/* If we didn't create the region, there's no need for further work. */
|
|
|
|
if (!F_ISSET(infop, REGION_CREATE))
|
|
|
|
return (0);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Validate the file. All other threads of control are waiting
|
|
|
|
* on this value to be written -- "Let slip the hounds of war!"
|
|
|
|
*/
|
|
|
|
renv->magic = DB_REGION_MAGIC;
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2001-03-04 19:42:05 -05:00
|
|
|
/*
|
|
|
|
* __db_e_detach --
|
|
|
|
* Detach from the environment.
|
|
|
|
*
|
|
|
|
* PUBLIC: int __db_e_detach __P((DB_ENV *, int));
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
__db_e_detach(dbenv, destroy)
|
|
|
|
DB_ENV *dbenv;
|
|
|
|
int destroy;
|
|
|
|
{
|
|
|
|
REGENV *renv;
|
|
|
|
REGINFO *infop;
|
2005-12-05 10:27:46 -08:00
|
|
|
REGION rp;
|
|
|
|
int ret, t_ret;
|
2001-03-04 19:42:05 -05:00
|
|
|
|
|
|
|
infop = dbenv->reginfo;
|
|
|
|
renv = infop->primary;
|
2005-12-05 10:27:46 -08:00
|
|
|
ret = 0;
|
2001-03-04 19:42:05 -05:00
|
|
|
|
2002-10-30 15:57:05 +04:00
|
|
|
if (F_ISSET(dbenv, DB_ENV_PRIVATE))
|
|
|
|
destroy = 1;
|
2005-07-20 15:48:22 -07:00
|
|
|
|
2001-03-04 19:42:05 -05:00
|
|
|
/* Decrement the reference count. */
|
2005-12-05 10:27:46 -08:00
|
|
|
MUTEX_LOCK(dbenv, renv->mtx_regenv);
|
|
|
|
if (renv->refcnt == 0)
|
|
|
|
__db_err(dbenv, "environment reference count went negative");
|
|
|
|
else
|
2001-03-04 19:42:05 -05:00
|
|
|
--renv->refcnt;
|
2005-12-05 10:27:46 -08:00
|
|
|
MUTEX_UNLOCK(dbenv, renv->mtx_regenv);
|
2001-03-04 19:42:05 -05:00
|
|
|
|
|
|
|
/* Close the locking file handle. */
|
2005-07-20 15:48:22 -07:00
|
|
|
if (dbenv->lockfhp != NULL) {
|
2005-12-05 10:27:46 -08:00
|
|
|
if ((t_ret =
|
|
|
|
__os_closehandle(dbenv, dbenv->lockfhp)) != 0 && ret == 0)
|
|
|
|
ret = t_ret;
|
2005-07-20 15:48:22 -07:00
|
|
|
dbenv->lockfhp = NULL;
|
|
|
|
}
|
2001-03-04 19:42:05 -05:00
|
|
|
|
|
|
|
/*
|
2005-12-05 10:27:46 -08:00
|
|
|
* Release the region, and kill our reference.
|
2001-03-04 19:42:05 -05:00
|
|
|
*/
|
2002-10-30 15:57:05 +04:00
|
|
|
if (destroy) {
|
2005-07-20 15:48:22 -07:00
|
|
|
#ifdef HAVE_CRYPTO
|
2005-12-05 10:27:46 -08:00
|
|
|
/*
|
|
|
|
* Destroy any system resources the crypto subsystem may have
|
|
|
|
* acquired.
|
|
|
|
*/
|
|
|
|
if ((t_ret = __crypto_region_destroy(dbenv)) != 0 && ret == 0)
|
|
|
|
ret = t_ret;
|
2005-07-20 15:48:22 -07:00
|
|
|
#endif
|
2005-12-05 10:27:46 -08:00
|
|
|
/*
|
|
|
|
* Destroy any system resources the replication subsystem may
|
|
|
|
* have acquired.
|
|
|
|
*/
|
|
|
|
if ((t_ret = __rep_region_destroy(dbenv)) != 0 && ret == 0)
|
|
|
|
ret = t_ret;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Free the REGION array.
|
|
|
|
*
|
|
|
|
* The actual underlying region structure is allocated from the
|
|
|
|
* primary shared region, and we're about to free it. Save a
|
|
|
|
* copy on our stack for the REGINFO to reference when it calls
|
|
|
|
* down into the OS layer to release the shared memory segment.
|
|
|
|
*/
|
|
|
|
rp = *infop->rp;
|
|
|
|
infop->rp = &rp;
|
|
|
|
|
|
|
|
if (renv->region_off != INVALID_ROFF)
|
|
|
|
__db_shalloc_free(
|
|
|
|
infop, R_ADDR(infop, renv->region_off));
|
|
|
|
|
|
|
|
/* Discard any mutex resources we may have acquired. */
|
|
|
|
if ((t_ret =
|
|
|
|
__mutex_free(dbenv, &renv->mtx_regenv)) != 0 && ret == 0)
|
|
|
|
ret = t_ret;
|
2002-10-30 15:57:05 +04:00
|
|
|
}
|
2001-03-04 19:42:05 -05:00
|
|
|
|
|
|
|
/*
|
2005-12-05 10:27:46 -08:00
|
|
|
* Set the DB_ENV->reginfo field to NULL. First, DB_ENV->remove calls
|
|
|
|
* __env_remove to do the region remove, and __envremove attached and
|
|
|
|
* then detaches from the region. We don't want to return to
|
|
|
|
* DB_ENV->remove with a non-NULL DB_ENV->reginfo field because it will
|
|
|
|
* attempt to detach again as part of its cleanup.
|
2001-03-04 19:42:05 -05:00
|
|
|
*
|
2005-12-05 10:27:46 -08:00
|
|
|
* Second, DB code uses DB_ENV->reginfo to decide if it's OK to read
|
|
|
|
* the underlying region. We're about to destroy what it references,
|
|
|
|
* so it needs to be cleared.
|
2005-07-20 15:48:22 -07:00
|
|
|
*/
|
2005-12-05 10:27:46 -08:00
|
|
|
dbenv->reginfo = NULL;
|
2005-07-20 15:48:22 -07:00
|
|
|
|
|
|
|
/* Reset the addr value that we "corrected" above. */
|
|
|
|
infop->addr = infop->primary;
|
|
|
|
|
2005-12-05 10:27:46 -08:00
|
|
|
if ((t_ret = __os_r_detach(dbenv, infop, destroy)) != 0 && ret == 0)
|
|
|
|
ret = t_ret;
|
2005-07-20 15:48:22 -07:00
|
|
|
if (infop->name != NULL)
|
|
|
|
__os_free(dbenv, infop->name);
|
|
|
|
|
2005-12-05 10:27:46 -08:00
|
|
|
/* Discard the DB_ENV->reginfo field's memory. */
|
|
|
|
__os_free(dbenv, infop);
|
2001-03-04 19:42:05 -05:00
|
|
|
|
2005-12-05 10:27:46 -08:00
|
|
|
return (ret);
|
2001-03-04 19:42:05 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* __db_e_remove --
|
|
|
|
* Discard an environment if it's not in use.
|
|
|
|
*
|
2002-10-30 15:57:05 +04:00
|
|
|
* PUBLIC: int __db_e_remove __P((DB_ENV *, u_int32_t));
|
2001-03-04 19:42:05 -05:00
|
|
|
*/
|
|
|
|
int
|
2002-10-30 15:57:05 +04:00
|
|
|
__db_e_remove(dbenv, flags)
|
2001-03-04 19:42:05 -05:00
|
|
|
DB_ENV *dbenv;
|
2002-10-30 15:57:05 +04:00
|
|
|
u_int32_t flags;
|
2001-03-04 19:42:05 -05:00
|
|
|
{
|
|
|
|
REGENV *renv;
|
|
|
|
REGINFO *infop, reginfo;
|
|
|
|
REGION *rp;
|
2005-12-05 10:27:46 -08:00
|
|
|
u_int32_t db_env_reset, i;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
db_env_reset = F_ISSET(dbenv, DB_ENV_NOLOCKING | DB_ENV_NOPANIC);
|
2001-03-04 19:42:05 -05:00
|
|
|
|
|
|
|
/*
|
|
|
|
* This routine has to walk a nasty line between not looking into
|
|
|
|
* the environment (which may be corrupted after an app or system
|
|
|
|
* crash), and removing everything that needs removing. What we
|
|
|
|
* do is:
|
2005-12-05 10:27:46 -08:00
|
|
|
* 1. Connect to the environment.
|
2001-03-04 19:42:05 -05:00
|
|
|
* 2. If the environment is in use (reference count is non-zero),
|
|
|
|
* return EBUSY.
|
2005-12-05 10:27:46 -08:00
|
|
|
* 3. Panic it and overwrite the magic number so any threads of
|
|
|
|
* control attempting to connect (or racing with us) backoff
|
|
|
|
* and retry or just die.
|
|
|
|
* 4. Walk the array of regions. Connect to each region and then
|
2001-03-04 19:42:05 -05:00
|
|
|
* disconnect with the destroy flag set. This shouldn't cause
|
|
|
|
* any problems, even if the region is corrupted, because we
|
2005-12-05 10:27:46 -08:00
|
|
|
* never look inside the region (with the single exception of
|
|
|
|
* mutex regions on systems where we have to return resources
|
|
|
|
* to the underlying system).
|
2001-03-04 19:42:05 -05:00
|
|
|
* 5. Walk the list of files in the directory, unlinking any
|
|
|
|
* files that match a region name. Unlink the environment
|
|
|
|
* file last.
|
|
|
|
*
|
|
|
|
* If the force flag is set, we do not acquire any locks during this
|
|
|
|
* process.
|
2005-12-05 10:27:46 -08:00
|
|
|
*
|
|
|
|
* We're going to panic the environment, so we'll want to ignore that
|
|
|
|
* flag.
|
2001-03-04 19:42:05 -05:00
|
|
|
*/
|
2005-12-05 10:27:46 -08:00
|
|
|
if (LF_ISSET(DB_FORCE))
|
2002-10-30 15:57:05 +04:00
|
|
|
F_SET(dbenv, DB_ENV_NOLOCKING);
|
|
|
|
F_SET(dbenv, DB_ENV_NOPANIC);
|
2001-03-04 19:42:05 -05:00
|
|
|
|
|
|
|
/* Join the environment. */
|
|
|
|
if ((ret = __db_e_attach(dbenv, NULL)) != 0) {
|
|
|
|
/*
|
|
|
|
* If we can't join it, we assume that's because it doesn't
|
|
|
|
* exist. It would be better to know why we failed, but it
|
|
|
|
* probably isn't important.
|
|
|
|
*/
|
|
|
|
ret = 0;
|
2005-12-05 10:27:46 -08:00
|
|
|
if (LF_ISSET(DB_FORCE))
|
2001-03-04 19:42:05 -05:00
|
|
|
goto remfiles;
|
2002-10-30 15:57:05 +04:00
|
|
|
goto done;
|
2001-03-04 19:42:05 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
infop = dbenv->reginfo;
|
|
|
|
renv = infop->primary;
|
|
|
|
|
|
|
|
/* Lock the environment. */
|
2005-12-05 10:27:46 -08:00
|
|
|
MUTEX_LOCK(dbenv, renv->mtx_regenv);
|
2001-03-04 19:42:05 -05:00
|
|
|
|
2002-10-30 15:57:05 +04:00
|
|
|
/*
|
|
|
|
* If it's in use, we're done unless we're forcing the issue or the
|
|
|
|
* environment has panic'd. (Presumably, if the environment panic'd,
|
|
|
|
* the thread holding the reference count may not have cleaned up.)
|
|
|
|
*/
|
2005-12-05 10:27:46 -08:00
|
|
|
if (renv->refcnt == 1 || renv->panic == 1 || LF_ISSET(DB_FORCE)) {
|
2001-03-04 19:42:05 -05:00
|
|
|
/*
|
|
|
|
* Set the panic flag and overwrite the magic number.
|
|
|
|
*
|
|
|
|
* !!!
|
|
|
|
* From this point on, there's no going back, we pretty
|
|
|
|
* much ignore errors, and just whack on whatever we can.
|
|
|
|
*/
|
|
|
|
renv->magic = 0;
|
2005-12-05 10:27:46 -08:00
|
|
|
renv->panic = 1;
|
2001-03-04 19:42:05 -05:00
|
|
|
|
|
|
|
/*
|
2005-12-05 10:27:46 -08:00
|
|
|
* Unlock the environment -- nobody should need this lock
|
|
|
|
* because we've poisoned the pool.
|
2001-03-04 19:42:05 -05:00
|
|
|
*/
|
2005-12-05 10:27:46 -08:00
|
|
|
MUTEX_UNLOCK(dbenv, renv->mtx_regenv);
|
2001-03-04 19:42:05 -05:00
|
|
|
|
2005-12-05 10:27:46 -08:00
|
|
|
/* Attach to each sub-region and destroy it. */
|
|
|
|
for (rp = R_ADDR(infop, renv->region_off),
|
|
|
|
i = 0; i < renv->region_cnt; ++i, ++rp) {
|
|
|
|
if (rp->id == INVALID_REGION_ID ||
|
|
|
|
rp->type == REGION_TYPE_ENV)
|
2001-03-04 19:42:05 -05:00
|
|
|
continue;
|
2005-12-05 10:27:46 -08:00
|
|
|
/*
|
|
|
|
* !!!
|
|
|
|
* The REGION_CREATE_OK flag is set for Windows/95 --
|
|
|
|
* regions are zero'd out when the last reference to
|
|
|
|
* the region goes away, in which case the underlying
|
|
|
|
* OS region code requires callers be prepared to
|
|
|
|
* create the region in order to join it.
|
|
|
|
*/
|
|
|
|
memset(®info, 0, sizeof(reginfo));
|
|
|
|
reginfo.id = rp->id;
|
|
|
|
reginfo.flags = REGION_CREATE_OK;
|
2001-03-04 19:42:05 -05:00
|
|
|
|
2005-07-20 15:48:22 -07:00
|
|
|
/*
|
|
|
|
* If we get here and can't attach and/or detach to the
|
|
|
|
* region, it's a mess. Ignore errors, there's nothing
|
|
|
|
* we can do about them.
|
|
|
|
*/
|
2005-12-05 10:27:46 -08:00
|
|
|
if (__db_r_attach(dbenv, ®info, 0) != 0)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
#ifdef HAVE_MUTEX_SYSTEM_RESOURCES
|
|
|
|
/*
|
|
|
|
* If destroying the mutex region, return any system
|
|
|
|
* resources to the system.
|
|
|
|
*/
|
|
|
|
if (reginfo.type == REGION_TYPE_MUTEX)
|
|
|
|
__mutex_resource_return(dbenv, ®info);
|
|
|
|
#endif
|
|
|
|
(void)__db_r_detach(dbenv, ®info, 1);
|
2001-03-04 19:42:05 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Destroy the environment's region. */
|
|
|
|
(void)__db_e_detach(dbenv, 1);
|
|
|
|
|
2002-10-30 15:57:05 +04:00
|
|
|
/* Discard any remaining physical files. */
|
2001-03-04 19:42:05 -05:00
|
|
|
remfiles: (void)__db_e_remfile(dbenv);
|
|
|
|
} else {
|
|
|
|
/* Unlock the environment. */
|
2005-12-05 10:27:46 -08:00
|
|
|
MUTEX_UNLOCK(dbenv, renv->mtx_regenv);
|
2001-03-04 19:42:05 -05:00
|
|
|
|
|
|
|
/* Discard the environment. */
|
|
|
|
(void)__db_e_detach(dbenv, 0);
|
|
|
|
|
|
|
|
ret = EBUSY;
|
|
|
|
}
|
|
|
|
|
2002-10-30 15:57:05 +04:00
|
|
|
done: F_CLR(dbenv, DB_ENV_NOLOCKING | DB_ENV_NOPANIC);
|
|
|
|
F_SET(dbenv, db_env_reset);
|
|
|
|
|
2001-03-04 19:42:05 -05:00
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* __db_e_remfile --
|
|
|
|
* Discard any region files in the filesystem.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
__db_e_remfile(dbenv)
|
|
|
|
DB_ENV *dbenv;
|
|
|
|
{
|
|
|
|
int cnt, fcnt, lastrm, ret;
|
|
|
|
const char *dir;
|
2005-07-20 15:48:22 -07:00
|
|
|
char saved_char, *p, **names, *path, buf[sizeof(DB_REGION_FMT) + 20];
|
2001-03-04 19:42:05 -05:00
|
|
|
|
|
|
|
/* Get the full path of a file in the environment. */
|
|
|
|
(void)snprintf(buf, sizeof(buf), "%s", DB_REGION_ENV);
|
2002-10-30 15:57:05 +04:00
|
|
|
if ((ret = __db_appname(dbenv, DB_APP_NONE, buf, 0, NULL, &path)) != 0)
|
2001-03-04 19:42:05 -05:00
|
|
|
return (ret);
|
|
|
|
|
|
|
|
/* Get the parent directory for the environment. */
|
|
|
|
if ((p = __db_rpath(path)) == NULL) {
|
|
|
|
p = path;
|
2005-07-20 15:48:22 -07:00
|
|
|
saved_char = *p;
|
2001-03-04 19:42:05 -05:00
|
|
|
|
|
|
|
dir = PATH_DOT;
|
|
|
|
} else {
|
2005-07-20 15:48:22 -07:00
|
|
|
saved_char = *p;
|
2001-03-04 19:42:05 -05:00
|
|
|
*p = '\0';
|
|
|
|
|
|
|
|
dir = path;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Get the list of file names. */
|
2002-10-30 15:57:05 +04:00
|
|
|
if ((ret = __os_dirlist(dbenv, dir, &names, &fcnt)) != 0)
|
|
|
|
__db_err(dbenv, "%s: %s", dir, db_strerror(ret));
|
2001-03-04 19:42:05 -05:00
|
|
|
|
|
|
|
/* Restore the path, and free it. */
|
2005-07-20 15:48:22 -07:00
|
|
|
*p = saved_char;
|
2002-10-30 15:57:05 +04:00
|
|
|
__os_free(dbenv, path);
|
2001-03-04 19:42:05 -05:00
|
|
|
|
2002-10-30 15:57:05 +04:00
|
|
|
if (ret != 0)
|
2001-03-04 19:42:05 -05:00
|
|
|
return (ret);
|
|
|
|
|
|
|
|
/*
|
2005-07-20 15:48:22 -07:00
|
|
|
* Remove files from the region directory.
|
2001-03-04 19:42:05 -05:00
|
|
|
*/
|
|
|
|
for (lastrm = -1, cnt = fcnt; --cnt >= 0;) {
|
2005-07-20 15:48:22 -07:00
|
|
|
/* Skip anything outside our name space. */
|
|
|
|
if (strncmp(names[cnt],
|
|
|
|
DB_REGION_PREFIX, sizeof(DB_REGION_PREFIX) - 1))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* Skip queue extent files. */
|
|
|
|
if (strncmp(names[cnt], "__dbq.", 6) == 0)
|
2001-03-04 19:42:05 -05:00
|
|
|
continue;
|
2005-07-20 15:48:22 -07:00
|
|
|
|
2005-12-05 10:27:46 -08:00
|
|
|
/* Skip registry files. */
|
|
|
|
if (strncmp(names[cnt], "__db.register", 13) == 0)
|
|
|
|
continue;
|
|
|
|
|
2005-07-20 15:48:22 -07:00
|
|
|
/* Skip replication files. */
|
|
|
|
if (strncmp(names[cnt], "__db.rep.", 9) == 0)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Remove the primary environment region last, because it's
|
|
|
|
* the key to this whole mess.
|
|
|
|
*/
|
2001-03-04 19:42:05 -05:00
|
|
|
if (strcmp(names[cnt], DB_REGION_ENV) == 0) {
|
|
|
|
lastrm = cnt;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2005-07-20 15:48:22 -07:00
|
|
|
/* Remove the file. */
|
2001-03-04 19:42:05 -05:00
|
|
|
if (__db_appname(dbenv,
|
2002-10-30 15:57:05 +04:00
|
|
|
DB_APP_NONE, names[cnt], 0, NULL, &path) == 0) {
|
2005-07-20 15:48:22 -07:00
|
|
|
/*
|
|
|
|
* Overwrite region files. Temporary files would have
|
|
|
|
* been maintained in encrypted format, so there's no
|
|
|
|
* reason to overwrite them. This is not an exact
|
|
|
|
* check on the file being a region file, but it's
|
|
|
|
* not likely to be wrong, and the worst thing that can
|
|
|
|
* happen is we overwrite a file that didn't need to be
|
|
|
|
* overwritten.
|
|
|
|
*/
|
|
|
|
if (F_ISSET(dbenv, DB_ENV_OVERWRITE) &&
|
|
|
|
strlen(names[cnt]) == DB_REGION_NAME_LENGTH)
|
2005-12-05 10:27:46 -08:00
|
|
|
(void)__db_file_multi_write(dbenv, path);
|
2001-03-04 19:42:05 -05:00
|
|
|
(void)__os_unlink(dbenv, path);
|
2002-10-30 15:57:05 +04:00
|
|
|
__os_free(dbenv, path);
|
2001-03-04 19:42:05 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (lastrm != -1)
|
|
|
|
if (__db_appname(dbenv,
|
2002-10-30 15:57:05 +04:00
|
|
|
DB_APP_NONE, names[lastrm], 0, NULL, &path) == 0) {
|
|
|
|
if (F_ISSET(dbenv, DB_ENV_OVERWRITE))
|
2005-12-05 10:27:46 -08:00
|
|
|
(void)__db_file_multi_write(dbenv, path);
|
2001-03-04 19:42:05 -05:00
|
|
|
(void)__os_unlink(dbenv, path);
|
2002-10-30 15:57:05 +04:00
|
|
|
__os_free(dbenv, path);
|
2001-03-04 19:42:05 -05:00
|
|
|
}
|
2002-10-30 15:57:05 +04:00
|
|
|
__os_dirfree(dbenv, names, fcnt);
|
2001-03-04 19:42:05 -05:00
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* __db_r_attach
|
|
|
|
* Join/create a region.
|
|
|
|
*
|
|
|
|
* PUBLIC: int __db_r_attach __P((DB_ENV *, REGINFO *, size_t));
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
__db_r_attach(dbenv, infop, size)
|
|
|
|
DB_ENV *dbenv;
|
|
|
|
REGINFO *infop;
|
|
|
|
size_t size;
|
|
|
|
{
|
|
|
|
REGION *rp;
|
|
|
|
int ret;
|
|
|
|
char buf[sizeof(DB_REGION_FMT) + 20];
|
|
|
|
|
2002-10-30 15:57:05 +04:00
|
|
|
/*
|
|
|
|
* Find or create a REGION structure for this region. If we create
|
|
|
|
* it, the REGION_CREATE flag will be set in the infop structure.
|
|
|
|
*/
|
|
|
|
F_CLR(infop, REGION_CREATE);
|
2005-12-05 10:27:46 -08:00
|
|
|
if ((ret = __db_des_get(dbenv, dbenv->reginfo, infop, &rp)) != 0)
|
2001-03-04 19:42:05 -05:00
|
|
|
return (ret);
|
2005-07-20 15:48:22 -07:00
|
|
|
infop->dbenv = dbenv;
|
2001-03-04 19:42:05 -05:00
|
|
|
infop->rp = rp;
|
|
|
|
infop->type = rp->type;
|
|
|
|
infop->id = rp->id;
|
|
|
|
|
2005-12-05 10:27:46 -08:00
|
|
|
/*
|
|
|
|
* __db_des_get may have created the region and reset the create
|
|
|
|
* flag. If we're creating the region, set the desired size.
|
|
|
|
*/
|
2001-03-04 19:42:05 -05:00
|
|
|
if (F_ISSET(infop, REGION_CREATE))
|
2002-10-30 15:57:05 +04:00
|
|
|
rp->size = (roff_t)size;
|
2001-03-04 19:42:05 -05:00
|
|
|
|
|
|
|
/* Join/create the underlying region. */
|
|
|
|
(void)snprintf(buf, sizeof(buf), DB_REGION_FMT, infop->id);
|
|
|
|
if ((ret = __db_appname(dbenv,
|
2002-10-30 15:57:05 +04:00
|
|
|
DB_APP_NONE, buf, 0, NULL, &infop->name)) != 0)
|
2001-03-04 19:42:05 -05:00
|
|
|
goto err;
|
|
|
|
if ((ret = __os_r_attach(dbenv, infop, rp)) != 0)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Fault the pages into memory. Note, do this BEFORE we initialize
|
|
|
|
* anything because we're writing pages in created regions, not just
|
|
|
|
* reading them.
|
|
|
|
*/
|
2002-10-30 15:57:05 +04:00
|
|
|
(void)__db_faultmem(dbenv,
|
|
|
|
infop->addr, rp->size, F_ISSET(infop, REGION_CREATE));
|
2001-03-04 19:42:05 -05:00
|
|
|
|
|
|
|
/*
|
|
|
|
* !!!
|
|
|
|
* The underlying layer may have just decided that we are going
|
|
|
|
* to create the region. There are various system issues that
|
|
|
|
* can result in a useless region that requires re-initialization.
|
|
|
|
*
|
|
|
|
* If we created the region, initialize it for allocation.
|
|
|
|
*/
|
2005-07-20 15:48:22 -07:00
|
|
|
if (F_ISSET(infop, REGION_CREATE))
|
|
|
|
__db_shalloc_init(infop, rp->size);
|
2001-03-04 19:42:05 -05:00
|
|
|
|
|
|
|
return (0);
|
|
|
|
|
2005-07-20 15:48:22 -07:00
|
|
|
err: /* Discard the underlying region. */
|
|
|
|
if (infop->addr != NULL)
|
2001-03-04 19:42:05 -05:00
|
|
|
(void)__os_r_detach(dbenv,
|
|
|
|
infop, F_ISSET(infop, REGION_CREATE));
|
|
|
|
infop->rp = NULL;
|
|
|
|
infop->id = INVALID_REGION_ID;
|
|
|
|
|
|
|
|
/* Discard the REGION structure if we created it. */
|
2002-10-30 15:57:05 +04:00
|
|
|
if (F_ISSET(infop, REGION_CREATE)) {
|
2005-12-05 10:27:46 -08:00
|
|
|
__db_des_destroy(dbenv, rp);
|
2002-10-30 15:57:05 +04:00
|
|
|
F_CLR(infop, REGION_CREATE);
|
|
|
|
}
|
2001-03-04 19:42:05 -05:00
|
|
|
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* __db_r_detach --
|
|
|
|
* Detach from a region.
|
|
|
|
*
|
|
|
|
* PUBLIC: int __db_r_detach __P((DB_ENV *, REGINFO *, int));
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
__db_r_detach(dbenv, infop, destroy)
|
|
|
|
DB_ENV *dbenv;
|
|
|
|
REGINFO *infop;
|
|
|
|
int destroy;
|
|
|
|
{
|
|
|
|
REGION *rp;
|
2005-12-05 10:27:46 -08:00
|
|
|
int ret;
|
2001-03-04 19:42:05 -05:00
|
|
|
|
|
|
|
rp = infop->rp;
|
2002-10-30 15:57:05 +04:00
|
|
|
if (F_ISSET(dbenv, DB_ENV_PRIVATE))
|
|
|
|
destroy = 1;
|
2001-03-04 19:42:05 -05:00
|
|
|
|
|
|
|
/*
|
2005-12-05 10:27:46 -08:00
|
|
|
* When discarding the regions as we shut down a database environment,
|
|
|
|
* discard any allocated shared memory segments. This is the last time
|
|
|
|
* we use them, and db_region_destroy is the last region-specific call
|
|
|
|
* we make.
|
2001-03-04 19:42:05 -05:00
|
|
|
*/
|
2005-12-05 10:27:46 -08:00
|
|
|
if (F_ISSET(dbenv, DB_ENV_PRIVATE) && infop->primary != NULL)
|
|
|
|
__db_shalloc_free(infop, infop->primary);
|
2001-03-04 19:42:05 -05:00
|
|
|
|
|
|
|
/* Detach from the underlying OS region. */
|
|
|
|
ret = __os_r_detach(dbenv, infop, destroy);
|
|
|
|
|
2005-12-05 10:27:46 -08:00
|
|
|
/* If we destroyed the region, discard the REGION structure. */
|
|
|
|
if (destroy)
|
|
|
|
__db_des_destroy(dbenv, rp);
|
2001-03-04 19:42:05 -05:00
|
|
|
|
|
|
|
/* Destroy the structure. */
|
|
|
|
if (infop->name != NULL)
|
2002-10-30 15:57:05 +04:00
|
|
|
__os_free(dbenv, infop->name);
|
2001-03-04 19:42:05 -05:00
|
|
|
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* __db_des_get --
|
|
|
|
* Return a reference to the shared information for a REGION,
|
|
|
|
* optionally creating a new entry.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
__db_des_get(dbenv, env_infop, infop, rpp)
|
|
|
|
DB_ENV *dbenv;
|
|
|
|
REGINFO *env_infop, *infop;
|
|
|
|
REGION **rpp;
|
|
|
|
{
|
|
|
|
REGENV *renv;
|
2005-12-05 10:27:46 -08:00
|
|
|
REGION *rp, *empty_slot, *first_type;
|
|
|
|
u_int32_t i, maxid;
|
2001-03-04 19:42:05 -05:00
|
|
|
|
|
|
|
*rpp = NULL;
|
|
|
|
renv = env_infop->primary;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the caller wants to join a region, walk through the existing
|
|
|
|
* regions looking for a matching ID (if ID specified) or matching
|
|
|
|
* type (if type specified). If we return based on a matching type
|
|
|
|
* return the "primary" region, that is, the first region that was
|
|
|
|
* created of this type.
|
|
|
|
*
|
2005-12-05 10:27:46 -08:00
|
|
|
* Track the first empty slot and maximum region ID for new region
|
|
|
|
* allocation.
|
|
|
|
*
|
|
|
|
* MaxID starts at REGION_ID_ENV, the ID of the primary environment.
|
2001-03-04 19:42:05 -05:00
|
|
|
*/
|
|
|
|
maxid = REGION_ID_ENV;
|
2005-12-05 10:27:46 -08:00
|
|
|
empty_slot = first_type = NULL;
|
|
|
|
for (rp = R_ADDR(env_infop, renv->region_off),
|
|
|
|
i = 0; i < renv->region_cnt; ++i, ++rp) {
|
|
|
|
if (rp->id == INVALID_REGION_ID) {
|
|
|
|
if (empty_slot == NULL)
|
|
|
|
empty_slot = rp;
|
|
|
|
continue;
|
|
|
|
}
|
2001-03-04 19:42:05 -05:00
|
|
|
if (infop->id != INVALID_REGION_ID) {
|
|
|
|
if (infop->id == rp->id)
|
|
|
|
break;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (infop->type == rp->type &&
|
|
|
|
F_ISSET(infop, REGION_JOIN_OK) &&
|
|
|
|
(first_type == NULL || first_type->id > rp->id))
|
|
|
|
first_type = rp;
|
|
|
|
|
|
|
|
if (rp->id > maxid)
|
|
|
|
maxid = rp->id;
|
|
|
|
}
|
2005-12-05 10:27:46 -08:00
|
|
|
|
|
|
|
/* If we found a matching ID (or a matching type), return it. */
|
|
|
|
if (i >= renv->region_cnt)
|
2001-03-04 19:42:05 -05:00
|
|
|
rp = first_type;
|
2005-12-05 10:27:46 -08:00
|
|
|
if (rp != NULL) {
|
|
|
|
*rpp = rp;
|
|
|
|
return (0);
|
|
|
|
}
|
2001-03-04 19:42:05 -05:00
|
|
|
|
|
|
|
/*
|
2005-12-05 10:27:46 -08:00
|
|
|
* If we didn't find a region and we don't have permission to create
|
|
|
|
* the region, fail. The caller generates any error message.
|
2001-03-04 19:42:05 -05:00
|
|
|
*/
|
2005-12-05 10:27:46 -08:00
|
|
|
if (!F_ISSET(infop, REGION_CREATE_OK))
|
2001-03-04 19:42:05 -05:00
|
|
|
return (ENOENT);
|
|
|
|
|
|
|
|
/*
|
2005-12-05 10:27:46 -08:00
|
|
|
* If we didn't find a region and don't have room to create the region
|
|
|
|
* fail with an error message, there's a sizing problem.
|
2001-03-04 19:42:05 -05:00
|
|
|
*/
|
2005-12-05 10:27:46 -08:00
|
|
|
if (empty_slot == NULL) {
|
|
|
|
__db_err(dbenv, "no room remaining for additional REGIONs");
|
|
|
|
return (ENOENT);
|
|
|
|
}
|
2001-03-04 19:42:05 -05:00
|
|
|
|
2005-12-05 10:27:46 -08:00
|
|
|
/*
|
|
|
|
* Initialize a REGION structure for the caller. If id was set, use
|
|
|
|
* that value, otherwise we use the next available ID.
|
|
|
|
*/
|
|
|
|
memset(empty_slot, 0, sizeof(REGION));
|
|
|
|
empty_slot->segid = INVALID_REGION_SEGID;
|
2001-03-04 19:42:05 -05:00
|
|
|
|
2005-12-05 10:27:46 -08:00
|
|
|
/*
|
|
|
|
* Set the type and ID; if no region ID was specified,
|
|
|
|
* allocate one.
|
|
|
|
*/
|
|
|
|
empty_slot->type = infop->type;
|
|
|
|
empty_slot->id = infop->id == INVALID_REGION_ID ? maxid + 1 : infop->id;
|
2001-03-04 19:42:05 -05:00
|
|
|
|
2005-12-05 10:27:46 -08:00
|
|
|
F_SET(infop, REGION_CREATE);
|
2001-03-04 19:42:05 -05:00
|
|
|
|
2005-12-05 10:27:46 -08:00
|
|
|
*rpp = empty_slot;
|
2001-03-04 19:42:05 -05:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* __db_des_destroy --
|
|
|
|
* Destroy a reference to a REGION.
|
|
|
|
*/
|
2005-12-05 10:27:46 -08:00
|
|
|
static void
|
|
|
|
__db_des_destroy(dbenv, rp)
|
2001-03-04 19:42:05 -05:00
|
|
|
DB_ENV *dbenv;
|
|
|
|
REGION *rp;
|
|
|
|
{
|
2005-12-05 10:27:46 -08:00
|
|
|
COMPQUIET(dbenv, NULL);
|
2001-03-04 19:42:05 -05:00
|
|
|
|
2005-12-05 10:27:46 -08:00
|
|
|
rp->id = INVALID_REGION_ID;
|
2001-03-04 19:42:05 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* __db_faultmem --
|
|
|
|
* Fault the region into memory.
|
|
|
|
*/
|
|
|
|
static int
|
2002-10-30 15:57:05 +04:00
|
|
|
__db_faultmem(dbenv, addr, size, created)
|
|
|
|
DB_ENV *dbenv;
|
2001-03-04 19:42:05 -05:00
|
|
|
void *addr;
|
|
|
|
size_t size;
|
|
|
|
int created;
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
u_int8_t *p, *t;
|
|
|
|
|
2005-07-20 15:48:22 -07:00
|
|
|
/* Ignore heap regions. */
|
|
|
|
if (F_ISSET(dbenv, DB_ENV_PRIVATE))
|
|
|
|
return (0);
|
|
|
|
|
2001-03-04 19:42:05 -05:00
|
|
|
/*
|
|
|
|
* It's sometimes significantly faster to page-fault in all of the
|
|
|
|
* region's pages before we run the application, as we see nasty
|
|
|
|
* side-effects when we page-fault while holding various locks, i.e.,
|
|
|
|
* the lock takes a long time to acquire because of the underlying
|
|
|
|
* page fault, and the other threads convoy behind the lock holder.
|
|
|
|
*
|
|
|
|
* If we created the region, we write a non-zero value so that the
|
|
|
|
* system can't cheat. If we're just joining the region, we can
|
|
|
|
* only read the value and try to confuse the compiler sufficiently
|
|
|
|
* that it doesn't figure out that we're never really using it.
|
2005-12-05 10:27:46 -08:00
|
|
|
*
|
|
|
|
* Touch every page (assuming pages are 512B, the smallest VM page
|
|
|
|
* size used in any general purpose processor).
|
2001-03-04 19:42:05 -05:00
|
|
|
*/
|
|
|
|
ret = 0;
|
2002-10-30 15:57:05 +04:00
|
|
|
if (F_ISSET(dbenv, DB_ENV_REGION_INIT)) {
|
2001-03-04 19:42:05 -05:00
|
|
|
if (created)
|
2005-12-05 10:27:46 -08:00
|
|
|
for (p = addr,
|
|
|
|
t = (u_int8_t *)addr + size; p < t; p += 512)
|
2001-03-04 19:42:05 -05:00
|
|
|
p[0] = 0xdb;
|
|
|
|
else
|
2005-12-05 10:27:46 -08:00
|
|
|
for (p = addr,
|
|
|
|
t = (u_int8_t *)addr + size; p < t; p += 512)
|
2001-03-04 19:42:05 -05:00
|
|
|
ret |= p[0];
|
|
|
|
}
|
|
|
|
|
|
|
|
return (ret);
|
|
|
|
}
|