mirror of
https://github.com/MariaDB/server.git
synced 2026-05-14 19:07:15 +02:00
BDB 4.1.24
BitKeeper/deleted/.del-ex_access.wpj~3df6ae8c99bf7c5f: Delete: bdb/build_vxworks/ex_access/ex_access.wpj BitKeeper/deleted/.del-ex_btrec.wpj~a7622f1c6f432dc6: Delete: bdb/build_vxworks/ex_btrec/ex_btrec.wpj BitKeeper/deleted/.del-ex_dbclient.wpj~7345440f3b204cdd: Delete: bdb/build_vxworks/ex_dbclient/ex_dbclient.wpj BitKeeper/deleted/.del-ex_env.wpj~fbe1ab10b04e8b74: Delete: bdb/build_vxworks/ex_env/ex_env.wpj BitKeeper/deleted/.del-ex_mpool.wpj~4479cfd5c45f327d: Delete: bdb/build_vxworks/ex_mpool/ex_mpool.wpj BitKeeper/deleted/.del-ex_tpcb.wpj~f78093006e14bf41: Delete: bdb/build_vxworks/ex_tpcb/ex_tpcb.wpj BitKeeper/deleted/.del-db_buildall.dsp~bd749ff6da11682: Delete: bdb/build_win32/db_buildall.dsp BitKeeper/deleted/.del-cxx_app.cpp~ad8df8e0791011ed: Delete: bdb/cxx/cxx_app.cpp BitKeeper/deleted/.del-cxx_log.cpp~a50ff3118fe06952: Delete: bdb/cxx/cxx_log.cpp BitKeeper/deleted/.del-cxx_table.cpp~ecd751e79b055556: Delete: bdb/cxx/cxx_table.cpp BitKeeper/deleted/.del-namemap.txt~796a3acd3885d8fd: Delete: bdb/cxx/namemap.txt BitKeeper/deleted/.del-Design.fileop~3ca4da68f1727373: Delete: bdb/db/Design.fileop BitKeeper/deleted/.del-db185_int.h~61bee3736e7959ef: Delete: bdb/db185/db185_int.h BitKeeper/deleted/.del-acconfig.h~411e8854d67ad8b5: Delete: bdb/dist/acconfig.h BitKeeper/deleted/.del-mutex.m4~a13383cde18a64e1: Delete: bdb/dist/aclocal/mutex.m4 BitKeeper/deleted/.del-options.m4~b9d0ca637213750a: Delete: bdb/dist/aclocal/options.m4 BitKeeper/deleted/.del-programs.m4~3ce7890b47732b30: Delete: bdb/dist/aclocal/programs.m4 BitKeeper/deleted/.del-tcl.m4~f944e2db93c3b6db: Delete: bdb/dist/aclocal/tcl.m4 BitKeeper/deleted/.del-types.m4~59cae158c9a32cff: Delete: bdb/dist/aclocal/types.m4 BitKeeper/deleted/.del-script~d38f6d3a4f159cb4: Delete: bdb/dist/build/script BitKeeper/deleted/.del-configure.in~ac795a92c8fe049c: Delete: bdb/dist/configure.in BitKeeper/deleted/.del-ltconfig~66bbd007d8024af: Delete: bdb/dist/ltconfig BitKeeper/deleted/.del-rec_ctemp~a28554362534f00a: Delete: bdb/dist/rec_ctemp BitKeeper/deleted/.del-s_tcl~2ffe4326459fcd9f: Delete: bdb/dist/s_tcl BitKeeper/deleted/.del-.IGNORE_ME~d8148b08fa7d5d15: Delete: bdb/dist/template/.IGNORE_ME BitKeeper/deleted/.del-btree.h~179f2aefec1753d: Delete: bdb/include/btree.h BitKeeper/deleted/.del-cxx_int.h~6b649c04766508f8: Delete: bdb/include/cxx_int.h BitKeeper/deleted/.del-db.src~6b433ae615b16a8d: Delete: bdb/include/db.src BitKeeper/deleted/.del-db_185.h~ad8b373d9391d35c: Delete: bdb/include/db_185.h BitKeeper/deleted/.del-db_am.h~a714912b6b75932f: Delete: bdb/include/db_am.h BitKeeper/deleted/.del-db_cxx.h~fcafadf45f5d19e9: Delete: bdb/include/db_cxx.h BitKeeper/deleted/.del-db_dispatch.h~6844f20f7eb46904: Delete: bdb/include/db_dispatch.h BitKeeper/deleted/.del-db_int.src~419a3f48b6a01da7: Delete: bdb/include/db_int.src BitKeeper/deleted/.del-db_join.h~76f9747a42c3399a: Delete: bdb/include/db_join.h BitKeeper/deleted/.del-db_page.h~e302ca3a4db3abdc: Delete: bdb/include/db_page.h BitKeeper/deleted/.del-db_server_int.h~e1d20b6ba3bca1ab: Delete: bdb/include/db_server_int.h BitKeeper/deleted/.del-db_shash.h~5fbf2d696fac90f3: Delete: bdb/include/db_shash.h BitKeeper/deleted/.del-db_swap.h~1e60887550864a59: Delete: bdb/include/db_swap.h BitKeeper/deleted/.del-db_upgrade.h~c644eee73701fc8d: Delete: bdb/include/db_upgrade.h BitKeeper/deleted/.del-db_verify.h~b8d6c297c61f342e: Delete: bdb/include/db_verify.h BitKeeper/deleted/.del-debug.h~dc2b4f2cf27ccebc: Delete: bdb/include/debug.h BitKeeper/deleted/.del-hash.h~2aaa548b28882dfb: Delete: bdb/include/hash.h BitKeeper/deleted/.del-lock.h~a761c1b7de57b77f: Delete: bdb/include/lock.h BitKeeper/deleted/.del-log.h~ff20184238e35e4d: Delete: bdb/include/log.h BitKeeper/deleted/.del-mp.h~7e317597622f3411: Delete: bdb/include/mp.h BitKeeper/deleted/.del-mutex.h~d3ae7a2977a68137: Delete: bdb/include/mutex.h BitKeeper/deleted/.del-os.h~91867cc8757cd0e3: Delete: bdb/include/os.h BitKeeper/deleted/.del-os_jump.h~e1b939fa5151d4be: Delete: bdb/include/os_jump.h BitKeeper/deleted/.del-qam.h~6fad0c1b5723d597: Delete: bdb/include/qam.h BitKeeper/deleted/.del-queue.h~4c72c0826c123d5: Delete: bdb/include/queue.h BitKeeper/deleted/.del-region.h~513fe04d977ca0fc: Delete: bdb/include/region.h BitKeeper/deleted/.del-shqueue.h~525fc3e6c2025c36: Delete: bdb/include/shqueue.h BitKeeper/deleted/.del-tcl_db.h~c536fd61a844f23f: Delete: bdb/include/tcl_db.h BitKeeper/deleted/.del-txn.h~c8d94b221ec147e4: Delete: bdb/include/txn.h BitKeeper/deleted/.del-xa.h~ecc466493aae9d9a: Delete: bdb/include/xa.h BitKeeper/deleted/.del-DbRecoveryInit.java~756b52601a0b9023: Delete: bdb/java/src/com/sleepycat/db/DbRecoveryInit.java BitKeeper/deleted/.del-DbTxnRecover.java~74607cba7ab89d6d: Delete: bdb/java/src/com/sleepycat/db/DbTxnRecover.java BitKeeper/deleted/.del-lock_conflict.c~fc5e0f14cf597a2b: Delete: bdb/lock/lock_conflict.c BitKeeper/deleted/.del-log.src~53ac9e7b5cb023f2: Delete: bdb/log/log.src BitKeeper/deleted/.del-log_findckp.c~24287f008916e81f: Delete: bdb/log/log_findckp.c BitKeeper/deleted/.del-log_rec.c~d51711f2cac09297: Delete: bdb/log/log_rec.c BitKeeper/deleted/.del-log_register.c~b40bb4efac75ca15: Delete: bdb/log/log_register.c BitKeeper/deleted/.del-Design~b3d0f179f2767b: Delete: bdb/mp/Design BitKeeper/deleted/.del-os_finit.c~95dbefc6fe79b26c: Delete: bdb/os/os_finit.c BitKeeper/deleted/.del-os_abs.c~df95d1e7db81924: Delete: bdb/os_vxworks/os_abs.c BitKeeper/deleted/.del-os_finit.c~803b484bdb9d0122: Delete: bdb/os_vxworks/os_finit.c BitKeeper/deleted/.del-os_map.c~3a6d7926398b76d3: Delete: bdb/os_vxworks/os_map.c BitKeeper/deleted/.del-os_finit.c~19a227c6d3c78ad: Delete: bdb/os_win32/os_finit.c BitKeeper/deleted/.del-log-corruption.patch~1cf2ecc7c6408d5d: Delete: bdb/patches/log-corruption.patch BitKeeper/deleted/.del-Btree.pm~af6d0c5eaed4a98e: Delete: bdb/perl.BerkeleyDB/BerkeleyDB/Btree.pm BitKeeper/deleted/.del-BerkeleyDB.pm~7244036d4482643: Delete: bdb/perl.BerkeleyDB/BerkeleyDB.pm BitKeeper/deleted/.del-BerkeleyDB.pod~e7b18fd6132448e3: Delete: bdb/perl.BerkeleyDB/BerkeleyDB.pod BitKeeper/deleted/.del-Hash.pm~10292a26c06a5c95: Delete: bdb/perl.BerkeleyDB/BerkeleyDB/Hash.pm BitKeeper/deleted/.del-BerkeleyDB.pod.P~79f76a1495eda203: Delete: bdb/perl.BerkeleyDB/BerkeleyDB.pod.P BitKeeper/deleted/.del-BerkeleyDB.xs~80c99afbd98e392c: Delete: bdb/perl.BerkeleyDB/BerkeleyDB.xs BitKeeper/deleted/.del-Changes~729c1891efa60de9: Delete: bdb/perl.BerkeleyDB/Changes BitKeeper/deleted/.del-MANIFEST~63a1e34aecf157a0: Delete: bdb/perl.BerkeleyDB/MANIFEST BitKeeper/deleted/.del-Makefile.PL~c68797707d8df87a: Delete: bdb/perl.BerkeleyDB/Makefile.PL BitKeeper/deleted/.del-README~5f2f579b1a241407: Delete: bdb/perl.BerkeleyDB/README BitKeeper/deleted/.del-Todo~dca3c66c193adda9: Delete: bdb/perl.BerkeleyDB/Todo BitKeeper/deleted/.del-config.in~ae81681e450e0999: Delete: bdb/perl.BerkeleyDB/config.in BitKeeper/deleted/.del-dbinfo~28ad67d83be4f68e: Delete: bdb/perl.BerkeleyDB/dbinfo BitKeeper/deleted/.del-mkconsts~543ab60669c7a04e: Delete: bdb/perl.BerkeleyDB/mkconsts BitKeeper/deleted/.del-mkpod~182c0ca54e439afb: Delete: bdb/perl.BerkeleyDB/mkpod BitKeeper/deleted/.del-5.004~e008cb5a48805543: Delete: bdb/perl.BerkeleyDB/patches/5.004 BitKeeper/deleted/.del-irix_6_5.pl~61662bb08afcdec8: Delete: bdb/perl.BerkeleyDB/hints/irix_6_5.pl BitKeeper/deleted/.del-solaris.pl~6771e7182394e152: Delete: bdb/perl.BerkeleyDB/hints/solaris.pl BitKeeper/deleted/.del-typemap~783b8f5295b05f3d: Delete: bdb/perl.BerkeleyDB/typemap BitKeeper/deleted/.del-5.004_01~6081ce2fff7b0bc: Delete: bdb/perl.BerkeleyDB/patches/5.004_01 BitKeeper/deleted/.del-5.004_02~87214eac35ad9e6: Delete: bdb/perl.BerkeleyDB/patches/5.004_02 BitKeeper/deleted/.del-5.004_03~9a672becec7cb40f: Delete: bdb/perl.BerkeleyDB/patches/5.004_03 BitKeeper/deleted/.del-5.004_04~e326cb51af09d154: Delete: bdb/perl.BerkeleyDB/patches/5.004_04 BitKeeper/deleted/.del-5.004_05~7ab457a1e41a92fe: Delete: bdb/perl.BerkeleyDB/patches/5.004_05 BitKeeper/deleted/.del-5.005~f9e2d59b5964cd4b: Delete: bdb/perl.BerkeleyDB/patches/5.005 BitKeeper/deleted/.del-5.005_01~3eb9fb7b5842ea8e: Delete: bdb/perl.BerkeleyDB/patches/5.005_01 BitKeeper/deleted/.del-5.005_02~67477ce0bef717cb: Delete: bdb/perl.BerkeleyDB/patches/5.005_02 BitKeeper/deleted/.del-5.005_03~c4c29a1fb21e290a: Delete: bdb/perl.BerkeleyDB/patches/5.005_03 BitKeeper/deleted/.del-5.6.0~e1fb9897d124ee22: Delete: bdb/perl.BerkeleyDB/patches/5.6.0 BitKeeper/deleted/.del-btree.t~e4a1a3c675ddc406: Delete: bdb/perl.BerkeleyDB/t/btree.t BitKeeper/deleted/.del-db-3.0.t~d2c60991d84558f2: Delete: bdb/perl.BerkeleyDB/t/db-3.0.t BitKeeper/deleted/.del-db-3.1.t~6ee88cd13f55e018: Delete: bdb/perl.BerkeleyDB/t/db-3.1.t BitKeeper/deleted/.del-db-3.2.t~f73b6461f98fd1cf: Delete: bdb/perl.BerkeleyDB/t/db-3.2.t BitKeeper/deleted/.del-destroy.t~cc6a2ae1980a2ecd: Delete: bdb/perl.BerkeleyDB/t/destroy.t BitKeeper/deleted/.del-env.t~a8604a4499c4bd07: Delete: bdb/perl.BerkeleyDB/t/env.t BitKeeper/deleted/.del-examples.t~2571b77c3cc75574: Delete: bdb/perl.BerkeleyDB/t/examples.t BitKeeper/deleted/.del-examples.t.T~8228bdd75ac78b88: Delete: bdb/perl.BerkeleyDB/t/examples.t.T BitKeeper/deleted/.del-examples3.t.T~66a186897a87026d: Delete: bdb/perl.BerkeleyDB/t/examples3.t.T BitKeeper/deleted/.del-examples3.t~fe3822ba2f2d7f83: Delete: bdb/perl.BerkeleyDB/t/examples3.t BitKeeper/deleted/.del-filter.t~f87b045c1b708637: Delete: bdb/perl.BerkeleyDB/t/filter.t BitKeeper/deleted/.del-hash.t~616bfb4d644de3a3: Delete: bdb/perl.BerkeleyDB/t/hash.t BitKeeper/deleted/.del-join.t~29fc39f74a83ca22: Delete: bdb/perl.BerkeleyDB/t/join.t BitKeeper/deleted/.del-mldbm.t~31f5015341eea040: Delete: bdb/perl.BerkeleyDB/t/mldbm.t BitKeeper/deleted/.del-queue.t~8f338034ce44a641: Delete: bdb/perl.BerkeleyDB/t/queue.t BitKeeper/deleted/.del-recno.t~d4ddbd3743add63e: Delete: bdb/perl.BerkeleyDB/t/recno.t BitKeeper/deleted/.del-strict.t~6885cdd2ea71ca2d: Delete: bdb/perl.BerkeleyDB/t/strict.t BitKeeper/deleted/.del-subdb.t~aab62a5d5864c603: Delete: bdb/perl.BerkeleyDB/t/subdb.t BitKeeper/deleted/.del-txn.t~65033b8558ae1216: Delete: bdb/perl.BerkeleyDB/t/txn.t BitKeeper/deleted/.del-unknown.t~f3710458682665e1: Delete: bdb/perl.BerkeleyDB/t/unknown.t BitKeeper/deleted/.del-Changes~436f74a5c414c65b: Delete: bdb/perl.DB_File/Changes BitKeeper/deleted/.del-DB_File.pm~ae0951c6c7665a82: Delete: bdb/perl.DB_File/DB_File.pm BitKeeper/deleted/.del-DB_File.xs~89e49a0b5556f1d8: Delete: bdb/perl.DB_File/DB_File.xs BitKeeper/deleted/.del-DB_File_BS~290fad5dbbb87069: Delete: bdb/perl.DB_File/DB_File_BS BitKeeper/deleted/.del-MANIFEST~90ee581572bdd4ac: Delete: bdb/perl.DB_File/MANIFEST BitKeeper/deleted/.del-Makefile.PL~ac0567bb5a377e38: Delete: bdb/perl.DB_File/Makefile.PL BitKeeper/deleted/.del-README~77e924a5a9bae6b3: Delete: bdb/perl.DB_File/README BitKeeper/deleted/.del-config.in~ab4c2792b86a810b: Delete: bdb/perl.DB_File/config.in BitKeeper/deleted/.del-dbinfo~461c43b30fab2cb: Delete: bdb/perl.DB_File/dbinfo BitKeeper/deleted/.del-dynixptx.pl~50dcddfae25d17e9: Delete: bdb/perl.DB_File/hints/dynixptx.pl BitKeeper/deleted/.del-typemap~55cffb3288a9e587: Delete: bdb/perl.DB_File/typemap BitKeeper/deleted/.del-version.c~a4df0e646f8b3975: Delete: bdb/perl.DB_File/version.c BitKeeper/deleted/.del-5.004_01~d6830d0082702af7: Delete: bdb/perl.DB_File/patches/5.004_01 BitKeeper/deleted/.del-5.004_02~78b082dc80c91031: Delete: bdb/perl.DB_File/patches/5.004_02 BitKeeper/deleted/.del-5.004~4411ec2e3c9e008b: Delete: bdb/perl.DB_File/patches/5.004 BitKeeper/deleted/.del-sco.pl~1e795fe14fe4dcfe: Delete: bdb/perl.DB_File/hints/sco.pl BitKeeper/deleted/.del-5.004_03~33f274648b160d95: Delete: bdb/perl.DB_File/patches/5.004_03 BitKeeper/deleted/.del-5.004_04~8f3d1b3cf18bb20a: Delete: bdb/perl.DB_File/patches/5.004_04 BitKeeper/deleted/.del-5.004_05~9c0f02e7331e142: Delete: bdb/perl.DB_File/patches/5.004_05 BitKeeper/deleted/.del-5.005~c2108cb2e3c8d951: Delete: bdb/perl.DB_File/patches/5.005 BitKeeper/deleted/.del-5.005_01~3b45e9673afc4cfa: Delete: bdb/perl.DB_File/patches/5.005_01 BitKeeper/deleted/.del-5.005_02~9fe5766bb02a4522: Delete: bdb/perl.DB_File/patches/5.005_02 BitKeeper/deleted/.del-5.005_03~ffa1c38c19ae72ea: Delete: bdb/perl.DB_File/patches/5.005_03 BitKeeper/deleted/.del-5.6.0~373be3a5ce47be85: Delete: bdb/perl.DB_File/patches/5.6.0 BitKeeper/deleted/.del-db-btree.t~3231595a1c241eb3: Delete: bdb/perl.DB_File/t/db-btree.t BitKeeper/deleted/.del-db-hash.t~7c4ad0c795c7fad2: Delete: bdb/perl.DB_File/t/db-hash.t BitKeeper/deleted/.del-db-recno.t~6c2d3d80b9ba4a50: Delete: bdb/perl.DB_File/t/db-recno.t BitKeeper/deleted/.del-db_server.sed~cdb00ebcd48a64e2: Delete: bdb/rpc_server/db_server.sed BitKeeper/deleted/.del-db_server_proc.c~d46c8f409c3747f4: Delete: bdb/rpc_server/db_server_proc.c BitKeeper/deleted/.del-db_server_svc.sed~3f5e59f334fa4607: Delete: bdb/rpc_server/db_server_svc.sed BitKeeper/deleted/.del-db_server_util.c~a809f3a4629acda: Delete: bdb/rpc_server/db_server_util.c BitKeeper/deleted/.del-log.tcl~ff1b41f1355b97d7: Delete: bdb/test/log.tcl BitKeeper/deleted/.del-mpool.tcl~b0df4dc1b04db26c: Delete: bdb/test/mpool.tcl BitKeeper/deleted/.del-mutex.tcl~52fd5c73a150565: Delete: bdb/test/mutex.tcl BitKeeper/deleted/.del-txn.tcl~c4ff071550b5446e: Delete: bdb/test/txn.tcl BitKeeper/deleted/.del-README~e800a12a5392010a: Delete: bdb/test/upgrade/README BitKeeper/deleted/.del-pack-2.6.6.pl~89d5076d758d3e98: Delete: bdb/test/upgrade/generate-2.X/pack-2.6.6.pl BitKeeper/deleted/.del-test-2.6.patch~4a52dc83d447547b: Delete: bdb/test/upgrade/generate-2.X/test-2.6.patch
This commit is contained in:
parent
b8798d25ab
commit
155e78f014
1191 changed files with 170446 additions and 57453 deletions
|
|
@ -1,52 +0,0 @@
|
|||
$Id: Design,v 11.2 1999/11/21 23:08:27 bostic Exp $
|
||||
|
||||
There are three ways we do locking in the mpool code:
|
||||
|
||||
Locking a handle mutex to provide concurrency for DB_THREAD operations.
|
||||
Locking the region mutex to provide mutual exclusion while reading and
|
||||
writing structures in the shared region.
|
||||
Locking buffer header mutexes during I/O.
|
||||
|
||||
The first will not be further described here. We use the shared mpool
|
||||
region lock to provide mutual exclusion while reading/modifying all of
|
||||
the data structures, including the buffer headers. We use a per-buffer
|
||||
header lock to wait on buffer I/O. The order of locking is as follows:
|
||||
|
||||
Searching for a buffer:
|
||||
Acquire the region lock.
|
||||
Find the buffer header.
|
||||
Increment the reference count (guarantee the buffer stays).
|
||||
While the BH_LOCKED flag is set (I/O is going on) {
|
||||
Release the region lock.
|
||||
Explicitly yield the processor if it's not the first pass
|
||||
through this loop, otherwise, we can simply spin because
|
||||
we'll be simply switching between the two locks.
|
||||
Request the buffer lock.
|
||||
The I/O will complete...
|
||||
Acquire the buffer lock.
|
||||
Release the buffer lock.
|
||||
Acquire the region lock.
|
||||
}
|
||||
Return the buffer.
|
||||
|
||||
Reading/writing a buffer:
|
||||
Acquire the region lock.
|
||||
Find/create the buffer header.
|
||||
If reading, increment the reference count (guarantee the buffer stays).
|
||||
Set the BH_LOCKED flag.
|
||||
Acquire the buffer lock (guaranteed not to block).
|
||||
Release the region lock.
|
||||
Do the I/O and/or initialize the buffer contents.
|
||||
Release the buffer lock.
|
||||
At this point, the buffer lock is available, but the logical
|
||||
operation (flagged by BH_LOCKED) is not yet completed. For
|
||||
this reason, among others, threads checking the BH_LOCKED flag
|
||||
must loop around their test.
|
||||
Acquire the region lock.
|
||||
Clear the BH_LOCKED flag.
|
||||
Release the region lock.
|
||||
Return/discard the buffer.
|
||||
|
||||
Pointers to DB_MPOOL, MPOOL, DB_MPOOLFILE and MPOOLFILE structures are
|
||||
not reacquired when a region lock is reacquired because they couldn't
|
||||
have been closed/discarded and because they never move in memory.
|
||||
|
|
@ -1,22 +1,31 @@
|
|||
/*-
|
||||
* See the file LICENSE for redistribution information.
|
||||
*
|
||||
* Copyright (c) 1996, 1997, 1998, 1999, 2000
|
||||
* Copyright (c) 1996-2002
|
||||
* Sleepycat Software. All rights reserved.
|
||||
*/
|
||||
#include "db_config.h"
|
||||
|
||||
#ifndef lint
|
||||
static const char revid[] = "$Id: mp_alloc.c,v 11.7 2000/04/20 21:14:18 bostic Exp $";
|
||||
static const char revid[] = "$Id: mp_alloc.c,v 11.31 2002/08/14 17:21:37 ubell Exp $";
|
||||
#endif /* not lint */
|
||||
|
||||
#ifndef NO_SYSTEM_INCLUDES
|
||||
#include <sys/types.h>
|
||||
#include <string.h>
|
||||
#endif
|
||||
|
||||
#include "db_int.h"
|
||||
#include "db_shash.h"
|
||||
#include "mp.h"
|
||||
#include "dbinc/db_shash.h"
|
||||
#include "dbinc/mp.h"
|
||||
|
||||
typedef struct {
|
||||
DB_MPOOL_HASH *bucket;
|
||||
u_int32_t priority;
|
||||
} HS;
|
||||
|
||||
static void __memp_bad_buffer __P((DB_MPOOL_HASH *));
|
||||
static void __memp_reset_lru __P((DB_ENV *, REGINFO *, MPOOL *));
|
||||
|
||||
/*
|
||||
* __memp_alloc --
|
||||
|
|
@ -34,14 +43,32 @@ __memp_alloc(dbmp, memreg, mfp, len, offsetp, retp)
|
|||
roff_t *offsetp;
|
||||
void *retp;
|
||||
{
|
||||
BH *bhp, *nbhp;
|
||||
BH *bhp;
|
||||
DB_ENV *dbenv;
|
||||
DB_MPOOL_HASH *dbht, *hp, *hp_end, *hp_tmp;
|
||||
DB_MUTEX *mutexp;
|
||||
MPOOL *c_mp;
|
||||
MPOOLFILE *bh_mfp;
|
||||
size_t total;
|
||||
int nomore, restart, ret, wrote;
|
||||
size_t freed_space;
|
||||
u_int32_t buckets, buffers, high_priority, max_na, priority;
|
||||
int aggressive, ret;
|
||||
void *p;
|
||||
|
||||
dbenv = dbmp->dbenv;
|
||||
c_mp = memreg->primary;
|
||||
dbht = R_ADDR(memreg, c_mp->htab);
|
||||
hp_end = &dbht[c_mp->htab_buckets];
|
||||
|
||||
buckets = buffers = 0;
|
||||
aggressive = 0;
|
||||
|
||||
c_mp->stat.st_alloc++;
|
||||
|
||||
/*
|
||||
* Get aggressive if we've tried to flush the number of pages as are
|
||||
* in the system without finding space.
|
||||
*/
|
||||
max_na = 5 * c_mp->htab_buckets;
|
||||
|
||||
/*
|
||||
* If we're allocating a buffer, and the one we're discarding is the
|
||||
|
|
@ -53,100 +80,363 @@ __memp_alloc(dbmp, memreg, mfp, len, offsetp, retp)
|
|||
if (mfp != NULL)
|
||||
len = (sizeof(BH) - sizeof(u_int8_t)) + mfp->stat.st_pagesize;
|
||||
|
||||
nomore = 0;
|
||||
R_LOCK(dbenv, memreg);
|
||||
|
||||
/*
|
||||
* On every buffer allocation we update the buffer generation number
|
||||
* and check for wraparound.
|
||||
*/
|
||||
if (++c_mp->lru_count == UINT32_T_MAX)
|
||||
__memp_reset_lru(dbenv, memreg, c_mp);
|
||||
|
||||
/*
|
||||
* Anything newer than 1/10th of the buffer pool is ignored during
|
||||
* allocation (unless allocation starts failing).
|
||||
*/
|
||||
DB_ASSERT(c_mp->lru_count > c_mp->stat.st_pages / 10);
|
||||
high_priority = c_mp->lru_count - c_mp->stat.st_pages / 10;
|
||||
|
||||
/*
|
||||
* First we try to allocate from free memory. If that fails, scan the
|
||||
* buffer pool to find buffers with low priorities. We consider small
|
||||
* sets of hash buckets each time to limit the amount of work needing
|
||||
* to be done. This approximates LRU, but not very well. We either
|
||||
* find a buffer of the same size to use, or we will free 3 times what
|
||||
* we need in the hopes it will coalesce into a contiguous chunk of the
|
||||
* right size. In the latter case we branch back here and try again.
|
||||
*/
|
||||
alloc: if ((ret = __db_shalloc(memreg->addr, len, MUTEX_ALIGN, &p)) == 0) {
|
||||
if (offsetp != NULL)
|
||||
if (mfp != NULL)
|
||||
c_mp->stat.st_pages++;
|
||||
R_UNLOCK(dbenv, memreg);
|
||||
|
||||
found: if (offsetp != NULL)
|
||||
*offsetp = R_OFFSET(memreg, p);
|
||||
*(void **)retp = p;
|
||||
|
||||
/*
|
||||
* Update the search statistics.
|
||||
*
|
||||
* We're not holding the region locked here, these statistics
|
||||
* can't be trusted.
|
||||
*/
|
||||
if (buckets != 0) {
|
||||
if (buckets > c_mp->stat.st_alloc_max_buckets)
|
||||
c_mp->stat.st_alloc_max_buckets = buckets;
|
||||
c_mp->stat.st_alloc_buckets += buckets;
|
||||
}
|
||||
if (buffers != 0) {
|
||||
if (buffers > c_mp->stat.st_alloc_max_pages)
|
||||
c_mp->stat.st_alloc_max_pages = buffers;
|
||||
c_mp->stat.st_alloc_pages += buffers;
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
if (nomore) {
|
||||
__db_err(dbmp->dbenv,
|
||||
"Unable to allocate %lu bytes from mpool shared region: %s\n",
|
||||
(u_long)len, db_strerror(ret));
|
||||
return (ret);
|
||||
}
|
||||
|
||||
retry: /* Find a buffer we can flush; pure LRU. */
|
||||
restart = total = 0;
|
||||
for (bhp =
|
||||
SH_TAILQ_FIRST(&c_mp->bhq, __bh); bhp != NULL; bhp = nbhp) {
|
||||
nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
|
||||
/*
|
||||
* We re-attempt the allocation every time we've freed 3 times what
|
||||
* we need. Reset our free-space counter.
|
||||
*/
|
||||
freed_space = 0;
|
||||
|
||||
/* Ignore pinned or locked (I/O in progress) buffers. */
|
||||
if (bhp->ref != 0 || F_ISSET(bhp, BH_LOCKED))
|
||||
/*
|
||||
* Walk the hash buckets and find the next two with potentially useful
|
||||
* buffers. Free the buffer with the lowest priority from the buckets'
|
||||
* chains.
|
||||
*/
|
||||
for (hp_tmp = NULL;;) {
|
||||
/* Check for wrap around. */
|
||||
hp = &dbht[c_mp->last_checked++];
|
||||
if (hp >= hp_end) {
|
||||
c_mp->last_checked = 0;
|
||||
|
||||
/*
|
||||
* If we've gone through all of the hash buckets, try
|
||||
* an allocation. If the cache is small, the old page
|
||||
* size is small, and the new page size is large, we
|
||||
* might have freed enough memory (but not 3 times the
|
||||
* memory).
|
||||
*/
|
||||
goto alloc;
|
||||
}
|
||||
|
||||
/*
|
||||
* Skip empty buckets.
|
||||
*
|
||||
* We can check for empty buckets before locking as we
|
||||
* only care if the pointer is zero or non-zero.
|
||||
*/
|
||||
if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* The failure mode is when there are too many buffers we can't
|
||||
* write or there's not enough memory in the system. We don't
|
||||
* have a metric for deciding if allocation has no possible way
|
||||
* to succeed, so we don't ever fail, we assume memory will be
|
||||
* available if we wait long enough.
|
||||
*
|
||||
* Get aggressive if we've tried to flush 5 times the number of
|
||||
* hash buckets as are in the system -- it's possible we have
|
||||
* been repeatedly trying to flush the same buffers, although
|
||||
* it's unlikely. Aggressive means:
|
||||
*
|
||||
* a: set a flag to attempt to flush high priority buffers as
|
||||
* well as other buffers.
|
||||
* b: sync the mpool to force out queue extent pages. While we
|
||||
* might not have enough space for what we want and flushing
|
||||
* is expensive, why not?
|
||||
* c: sleep for a second -- hopefully someone else will run and
|
||||
* free up some memory. Try to allocate memory too, in case
|
||||
* the other thread returns its memory to the region.
|
||||
* d: look at a buffer in every hash bucket rather than choose
|
||||
* the more preferable of two.
|
||||
*
|
||||
* !!!
|
||||
* This test ignores pathological cases like no buffers in the
|
||||
* system -- that shouldn't be possible.
|
||||
*/
|
||||
if ((++buckets % max_na) == 0) {
|
||||
aggressive = 1;
|
||||
|
||||
R_UNLOCK(dbenv, memreg);
|
||||
|
||||
(void)__memp_sync_int(
|
||||
dbenv, NULL, 0, DB_SYNC_ALLOC, NULL);
|
||||
|
||||
(void)__os_sleep(dbenv, 1, 0);
|
||||
|
||||
R_LOCK(dbenv, memreg);
|
||||
goto alloc;
|
||||
}
|
||||
|
||||
if (!aggressive) {
|
||||
/* Skip high priority buckets. */
|
||||
if (hp->hash_priority > high_priority)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Find two buckets and select the one with the lowest
|
||||
* priority. Performance testing shows that looking
|
||||
* at two improves the LRUness and looking at more only
|
||||
* does a little better.
|
||||
*/
|
||||
if (hp_tmp == NULL) {
|
||||
hp_tmp = hp;
|
||||
continue;
|
||||
}
|
||||
if (hp->hash_priority > hp_tmp->hash_priority)
|
||||
hp = hp_tmp;
|
||||
hp_tmp = NULL;
|
||||
}
|
||||
|
||||
/* Remember the priority of the buffer we're looking for. */
|
||||
priority = hp->hash_priority;
|
||||
|
||||
/* Unlock the region and lock the hash bucket. */
|
||||
R_UNLOCK(dbenv, memreg);
|
||||
mutexp = &hp->hash_mutex;
|
||||
MUTEX_LOCK(dbenv, mutexp);
|
||||
|
||||
#ifdef DIAGNOSTIC
|
||||
__memp_check_order(hp);
|
||||
#endif
|
||||
/*
|
||||
* The lowest priority page is first in the bucket, as they are
|
||||
* maintained in sorted order.
|
||||
*
|
||||
* The buffer may have been freed or its priority changed while
|
||||
* we switched from the region lock to the hash lock. If so,
|
||||
* we have to restart. We will still take the first buffer on
|
||||
* the bucket's list, though, if it has a low enough priority.
|
||||
*/
|
||||
if ((bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh)) == NULL ||
|
||||
bhp->ref != 0 || bhp->priority > priority)
|
||||
goto next_hb;
|
||||
|
||||
buffers++;
|
||||
|
||||
/* Find the associated MPOOLFILE. */
|
||||
bh_mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset);
|
||||
|
||||
/* Write the page if it's dirty. */
|
||||
/* If the page is dirty, pin it and write it. */
|
||||
ret = 0;
|
||||
if (F_ISSET(bhp, BH_DIRTY)) {
|
||||
++bhp->ref;
|
||||
if ((ret = __memp_bhwrite(dbmp,
|
||||
bh_mfp, bhp, &restart, &wrote)) != 0)
|
||||
return (ret);
|
||||
ret = __memp_bhwrite(dbmp, hp, bh_mfp, bhp, 0);
|
||||
--bhp->ref;
|
||||
|
||||
/*
|
||||
* Another process may have acquired this buffer and
|
||||
* incremented the ref count after we wrote it.
|
||||
*/
|
||||
if (bhp->ref != 0)
|
||||
goto retry;
|
||||
|
||||
/*
|
||||
* If we wrote the page, continue and free the buffer.
|
||||
* We don't have to rewalk the list to acquire the
|
||||
* buffer because it was never available for any other
|
||||
* process to modify it.
|
||||
*
|
||||
* If we didn't write the page, but we discarded and
|
||||
* reacquired the region lock, restart the list walk.
|
||||
*
|
||||
* If we neither wrote the buffer nor discarded the
|
||||
* region lock, continue down the buffer list.
|
||||
*/
|
||||
if (wrote)
|
||||
if (ret == 0)
|
||||
++c_mp->stat.st_rw_evict;
|
||||
else {
|
||||
if (restart)
|
||||
goto retry;
|
||||
continue;
|
||||
}
|
||||
} else
|
||||
++c_mp->stat.st_ro_evict;
|
||||
|
||||
/*
|
||||
* If a write fails for any reason, we can't proceed.
|
||||
*
|
||||
* We released the hash bucket lock while doing I/O, so another
|
||||
* thread may have acquired this buffer and incremented the ref
|
||||
* count after we wrote it, in which case we can't have it.
|
||||
*
|
||||
* If there's a write error, avoid selecting this buffer again
|
||||
* by making it the bucket's least-desirable buffer.
|
||||
*/
|
||||
if (ret != 0 || bhp->ref != 0) {
|
||||
if (ret != 0 && aggressive)
|
||||
__memp_bad_buffer(hp);
|
||||
goto next_hb;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to see if the buffer is the size we're looking for.
|
||||
* If it is, simply reuse it.
|
||||
* If so, we can simply reuse it. Else, free the buffer and
|
||||
* its space and keep looking.
|
||||
*/
|
||||
if (mfp != NULL &&
|
||||
mfp->stat.st_pagesize == bh_mfp->stat.st_pagesize) {
|
||||
__memp_bhfree(dbmp, bhp, 0);
|
||||
__memp_bhfree(dbmp, hp, bhp, 0);
|
||||
|
||||
if (offsetp != NULL)
|
||||
*offsetp = R_OFFSET(memreg, bhp);
|
||||
*(void **)retp = bhp;
|
||||
return (0);
|
||||
p = bhp;
|
||||
goto found;
|
||||
}
|
||||
|
||||
/* Note how much space we've freed, and free the buffer. */
|
||||
total += __db_shsizeof(bhp);
|
||||
__memp_bhfree(dbmp, bhp, 1);
|
||||
freed_space += __db_shsizeof(bhp);
|
||||
__memp_bhfree(dbmp, hp, bhp, 1);
|
||||
|
||||
/*
|
||||
* Retry as soon as we've freed up sufficient space. If we
|
||||
* have to coalesce of memory to satisfy the request, don't
|
||||
* try until it's likely (possible?) that we'll succeed.
|
||||
* Unlock this hash bucket and re-acquire the region lock. If
|
||||
* we're reaching here as a result of calling memp_bhfree, the
|
||||
* hash bucket lock has already been discarded.
|
||||
*/
|
||||
if (total >= 3 * len)
|
||||
goto alloc;
|
||||
if (0) {
|
||||
next_hb: MUTEX_UNLOCK(dbenv, mutexp);
|
||||
}
|
||||
R_LOCK(dbenv, memreg);
|
||||
|
||||
/* Restart the walk if we discarded the region lock. */
|
||||
if (restart)
|
||||
goto retry;
|
||||
/*
|
||||
* Retry the allocation as soon as we've freed up sufficient
|
||||
* space. We're likely to have to coalesce of memory to
|
||||
* satisfy the request, don't try until it's likely (possible?)
|
||||
* we'll succeed.
|
||||
*/
|
||||
if (freed_space >= 3 * len)
|
||||
goto alloc;
|
||||
}
|
||||
nomore = 1;
|
||||
goto alloc;
|
||||
/* NOTREACHED */
|
||||
}
|
||||
|
||||
/*
|
||||
* __memp_bad_buffer --
|
||||
* Make the first buffer in a hash bucket the least desirable buffer.
|
||||
*/
|
||||
static void
|
||||
__memp_bad_buffer(hp)
|
||||
DB_MPOOL_HASH *hp;
|
||||
{
|
||||
BH *bhp, *t_bhp;
|
||||
u_int32_t priority;
|
||||
|
||||
/* Remove the first buffer from the bucket. */
|
||||
bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
|
||||
SH_TAILQ_REMOVE(&hp->hash_bucket, bhp, hq, __bh);
|
||||
|
||||
/*
|
||||
* Find the highest priority buffer in the bucket. Buffers are
|
||||
* sorted by priority, so it's the last one in the bucket.
|
||||
*
|
||||
* XXX
|
||||
* Should use SH_TAILQ_LAST, but I think that macro is broken.
|
||||
*/
|
||||
priority = bhp->priority;
|
||||
for (t_bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
|
||||
t_bhp != NULL; t_bhp = SH_TAILQ_NEXT(t_bhp, hq, __bh))
|
||||
priority = t_bhp->priority;
|
||||
|
||||
/*
|
||||
* Set our buffer's priority to be just as bad, and append it to
|
||||
* the bucket.
|
||||
*/
|
||||
bhp->priority = priority;
|
||||
SH_TAILQ_INSERT_TAIL(&hp->hash_bucket, bhp, hq);
|
||||
|
||||
/* Reset the hash bucket's priority. */
|
||||
hp->hash_priority = SH_TAILQ_FIRST(&hp->hash_bucket, __bh)->priority;
|
||||
}
|
||||
|
||||
/*
|
||||
* __memp_reset_lru --
|
||||
* Reset the cache LRU counter.
|
||||
*/
|
||||
static void
|
||||
__memp_reset_lru(dbenv, memreg, c_mp)
|
||||
DB_ENV *dbenv;
|
||||
REGINFO *memreg;
|
||||
MPOOL *c_mp;
|
||||
{
|
||||
BH *bhp;
|
||||
DB_MPOOL_HASH *hp;
|
||||
int bucket;
|
||||
|
||||
/*
|
||||
* Update the counter so all future allocations will start at the
|
||||
* bottom.
|
||||
*/
|
||||
c_mp->lru_count -= MPOOL_BASE_DECREMENT;
|
||||
|
||||
/* Release the region lock. */
|
||||
R_UNLOCK(dbenv, memreg);
|
||||
|
||||
/* Adjust the priority of every buffer in the system. */
|
||||
for (hp = R_ADDR(memreg, c_mp->htab),
|
||||
bucket = 0; bucket < c_mp->htab_buckets; ++hp, ++bucket) {
|
||||
/*
|
||||
* Skip empty buckets.
|
||||
*
|
||||
* We can check for empty buckets before locking as we
|
||||
* only care if the pointer is zero or non-zero.
|
||||
*/
|
||||
if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL)
|
||||
continue;
|
||||
|
||||
MUTEX_LOCK(dbenv, &hp->hash_mutex);
|
||||
for (bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
|
||||
bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
|
||||
if (bhp->priority != UINT32_T_MAX &&
|
||||
bhp->priority > MPOOL_BASE_DECREMENT)
|
||||
bhp->priority -= MPOOL_BASE_DECREMENT;
|
||||
MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
|
||||
}
|
||||
|
||||
/* Reacquire the region lock. */
|
||||
R_LOCK(dbenv, memreg);
|
||||
}
|
||||
|
||||
#ifdef DIAGNOSTIC
|
||||
/*
|
||||
* __memp_check_order --
|
||||
* Verify the priority ordering of a hash bucket chain.
|
||||
*
|
||||
* PUBLIC: #ifdef DIAGNOSTIC
|
||||
* PUBLIC: void __memp_check_order __P((DB_MPOOL_HASH *));
|
||||
* PUBLIC: #endif
|
||||
*/
|
||||
void
|
||||
__memp_check_order(hp)
|
||||
DB_MPOOL_HASH *hp;
|
||||
{
|
||||
BH *bhp;
|
||||
u_int32_t priority;
|
||||
|
||||
/*
|
||||
* Assumes the hash bucket is locked.
|
||||
*/
|
||||
if ((bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh)) == NULL)
|
||||
return;
|
||||
|
||||
DB_ASSERT(bhp->priority == hp->hash_priority);
|
||||
|
||||
for (priority = bhp->priority;
|
||||
(bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) != NULL;
|
||||
priority = bhp->priority)
|
||||
DB_ASSERT(priority <= bhp->priority);
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
576
bdb/mp/mp_bh.c
576
bdb/mp/mp_bh.c
|
|
@ -1,13 +1,13 @@
|
|||
/*-
|
||||
* See the file LICENSE for redistribution information.
|
||||
*
|
||||
* Copyright (c) 1996, 1997, 1998, 1999, 2000
|
||||
* Copyright (c) 1996-2002
|
||||
* Sleepycat Software. All rights reserved.
|
||||
*/
|
||||
#include "db_config.h"
|
||||
|
||||
#ifndef lint
|
||||
static const char revid[] = "$Id: mp_bh.c,v 11.25 2001/01/10 04:50:53 ubell Exp $";
|
||||
static const char revid[] = "$Id: mp_bh.c,v 11.71 2002/09/04 19:06:45 margo Exp $";
|
||||
#endif /* not lint */
|
||||
|
||||
#ifndef NO_SYSTEM_INCLUDES
|
||||
|
|
@ -18,40 +18,41 @@ static const char revid[] = "$Id: mp_bh.c,v 11.25 2001/01/10 04:50:53 ubell Exp
|
|||
#endif
|
||||
|
||||
#include "db_int.h"
|
||||
#include "db_shash.h"
|
||||
#include "mp.h"
|
||||
#include "log.h"
|
||||
#include "db_page.h"
|
||||
#include "dbinc/db_shash.h"
|
||||
#include "dbinc/mp.h"
|
||||
#include "dbinc/log.h"
|
||||
#include "dbinc/db_page.h"
|
||||
|
||||
static int __memp_pgwrite
|
||||
__P((DB_MPOOL *, DB_MPOOLFILE *, DB_MPOOL_HASH *, BH *));
|
||||
static int __memp_upgrade __P((DB_MPOOL *, DB_MPOOLFILE *, MPOOLFILE *));
|
||||
|
||||
/*
|
||||
* __memp_bhwrite --
|
||||
* Write the page associated with a given bucket header.
|
||||
* Write the page associated with a given buffer header.
|
||||
*
|
||||
* PUBLIC: int __memp_bhwrite
|
||||
* PUBLIC: __P((DB_MPOOL *, MPOOLFILE *, BH *, int *, int *));
|
||||
* PUBLIC: int __memp_bhwrite __P((DB_MPOOL *,
|
||||
* PUBLIC: DB_MPOOL_HASH *, MPOOLFILE *, BH *, int));
|
||||
*/
|
||||
int
|
||||
__memp_bhwrite(dbmp, mfp, bhp, restartp, wrotep)
|
||||
__memp_bhwrite(dbmp, hp, mfp, bhp, open_extents)
|
||||
DB_MPOOL *dbmp;
|
||||
DB_MPOOL_HASH *hp;
|
||||
MPOOLFILE *mfp;
|
||||
BH *bhp;
|
||||
int *restartp, *wrotep;
|
||||
int open_extents;
|
||||
{
|
||||
DB_ENV *dbenv;
|
||||
DB_MPOOLFILE *dbmfp;
|
||||
DB_MPREG *mpreg;
|
||||
int incremented, ret;
|
||||
int local_open, incremented, ret;
|
||||
|
||||
if (restartp != NULL)
|
||||
*restartp = 0;
|
||||
if (wrotep != NULL)
|
||||
*wrotep = 0;
|
||||
incremented = 0;
|
||||
dbenv = dbmp->dbenv;
|
||||
local_open = incremented = 0;
|
||||
|
||||
/*
|
||||
* If the file has been removed or is a closed temporary file, Jump
|
||||
* right ahead and pretend that we've found the file we want-- the
|
||||
* If the file has been removed or is a closed temporary file, jump
|
||||
* right ahead and pretend that we've found the file we want -- the
|
||||
* page-write function knows how to handle the fact that we don't have
|
||||
* (or need!) any real file descriptor information.
|
||||
*/
|
||||
|
|
@ -66,52 +67,60 @@ __memp_bhwrite(dbmp, mfp, bhp, restartp, wrotep)
|
|||
* If we find a descriptor on the file that's not open for writing, we
|
||||
* try and upgrade it to make it writeable. If that fails, we're done.
|
||||
*/
|
||||
MUTEX_THREAD_LOCK(dbmp->dbenv, dbmp->mutexp);
|
||||
MUTEX_THREAD_LOCK(dbenv, dbmp->mutexp);
|
||||
for (dbmfp = TAILQ_FIRST(&dbmp->dbmfq);
|
||||
dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q))
|
||||
if (dbmfp->mfp == mfp) {
|
||||
if (F_ISSET(dbmfp, MP_READONLY) &&
|
||||
__memp_upgrade(dbmp, dbmfp, mfp)) {
|
||||
MUTEX_THREAD_UNLOCK(dbmp->dbenv, dbmp->mutexp);
|
||||
return (0);
|
||||
!F_ISSET(dbmfp, MP_UPGRADE) &&
|
||||
(F_ISSET(dbmfp, MP_UPGRADE_FAIL) ||
|
||||
__memp_upgrade(dbmp, dbmfp, mfp))) {
|
||||
MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp);
|
||||
return (EPERM);
|
||||
}
|
||||
|
||||
/*
|
||||
* Increment the reference count -- see the comment in
|
||||
* memp_fclose().
|
||||
* __memp_fclose_int().
|
||||
*/
|
||||
++dbmfp->ref;
|
||||
incremented = 1;
|
||||
break;
|
||||
}
|
||||
MUTEX_THREAD_UNLOCK(dbmp->dbenv, dbmp->mutexp);
|
||||
MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp);
|
||||
|
||||
if (dbmfp != NULL)
|
||||
goto found;
|
||||
|
||||
/*
|
||||
* !!!
|
||||
* It's the caller's choice if we're going to open extent files.
|
||||
*/
|
||||
if (!open_extents && F_ISSET(mfp, MP_EXTENT))
|
||||
return (EPERM);
|
||||
|
||||
/*
|
||||
* !!!
|
||||
* Don't try to attach to temporary files. There are two problems in
|
||||
* trying to do that. First, if we have different privileges than the
|
||||
* process that "owns" the temporary file, we might create the backing
|
||||
* disk file such that the owning process couldn't read/write its own
|
||||
* buffers, e.g., memp_trickle() running as root creating a file owned
|
||||
* buffers, e.g., memp_trickle running as root creating a file owned
|
||||
* as root, mode 600. Second, if the temporary file has already been
|
||||
* created, we don't have any way of finding out what its real name is,
|
||||
* and, even if we did, it was already unlinked (so that it won't be
|
||||
* left if the process dies horribly). This decision causes a problem,
|
||||
* however: if the temporary file consumes the entire buffer cache,
|
||||
* and the owner doesn't flush the buffers to disk, we could end up
|
||||
* with resource starvation, and the memp_trickle() thread couldn't do
|
||||
* with resource starvation, and the memp_trickle thread couldn't do
|
||||
* anything about it. That's a pretty unlikely scenario, though.
|
||||
*
|
||||
* Note that we should never get here when the temporary file
|
||||
* in question has already been closed in another process, in which
|
||||
* case it should be marked MP_DEADFILE.
|
||||
* Note we should never get here when the temporary file in question
|
||||
* has already been closed in another process, in which case it should
|
||||
* be marked MP_DEADFILE.
|
||||
*/
|
||||
if (F_ISSET(mfp, MP_TEMP)) {
|
||||
DB_ASSERT(!F_ISSET(mfp, MP_DEADFILE));
|
||||
return (0);
|
||||
}
|
||||
if (F_ISSET(mfp, MP_TEMP))
|
||||
return (EPERM);
|
||||
|
||||
/*
|
||||
* It's not a page from a file we've opened. If the file requires
|
||||
|
|
@ -120,14 +129,14 @@ __memp_bhwrite(dbmp, mfp, bhp, restartp, wrotep)
|
|||
* nothing we can do.
|
||||
*/
|
||||
if (mfp->ftype != 0) {
|
||||
MUTEX_THREAD_LOCK(dbmp->dbenv, dbmp->mutexp);
|
||||
MUTEX_THREAD_LOCK(dbenv, dbmp->mutexp);
|
||||
for (mpreg = LIST_FIRST(&dbmp->dbregq);
|
||||
mpreg != NULL; mpreg = LIST_NEXT(mpreg, q))
|
||||
if (mpreg->ftype == mfp->ftype)
|
||||
break;
|
||||
MUTEX_THREAD_UNLOCK(dbmp->dbenv, dbmp->mutexp);
|
||||
MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp);
|
||||
if (mpreg == NULL)
|
||||
return (0);
|
||||
return (EPERM);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -138,17 +147,24 @@ __memp_bhwrite(dbmp, mfp, bhp, restartp, wrotep)
|
|||
* There's no negative cache, so we may repeatedly try and open files
|
||||
* that we have previously tried (and failed) to open.
|
||||
*/
|
||||
if (__memp_fopen(dbmp, mfp, R_ADDR(dbmp->reginfo, mfp->path_off),
|
||||
0, 0, mfp->stat.st_pagesize, 0, NULL, &dbmfp) != 0)
|
||||
return (0);
|
||||
|
||||
found: ret = __memp_pgwrite(dbmp, dbmfp, bhp, restartp, wrotep);
|
||||
|
||||
if (incremented) {
|
||||
MUTEX_THREAD_LOCK(dbmp->dbenv, dbmp->mutexp);
|
||||
--dbmfp->ref;
|
||||
MUTEX_THREAD_UNLOCK(dbmp->dbenv, dbmp->mutexp);
|
||||
if ((ret = dbenv->memp_fcreate(dbenv, &dbmfp, 0)) != 0)
|
||||
return (ret);
|
||||
if ((ret = __memp_fopen_int(dbmfp, mfp,
|
||||
R_ADDR(dbmp->reginfo, mfp->path_off),
|
||||
0, 0, mfp->stat.st_pagesize)) != 0) {
|
||||
(void)dbmfp->close(dbmfp, 0);
|
||||
return (ret);
|
||||
}
|
||||
local_open = 1;
|
||||
|
||||
found: ret = __memp_pgwrite(dbmp, dbmfp, hp, bhp);
|
||||
|
||||
MUTEX_THREAD_LOCK(dbenv, dbmp->mutexp);
|
||||
if (incremented)
|
||||
--dbmfp->ref;
|
||||
else if (local_open)
|
||||
F_SET(dbmfp, MP_FLUSH);
|
||||
MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp);
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
|
@ -157,11 +173,12 @@ found: ret = __memp_pgwrite(dbmp, dbmfp, bhp, restartp, wrotep);
|
|||
* __memp_pgread --
|
||||
* Read a page from a file.
|
||||
*
|
||||
* PUBLIC: int __memp_pgread __P((DB_MPOOLFILE *, BH *, int));
|
||||
* PUBLIC: int __memp_pgread __P((DB_MPOOLFILE *, DB_MUTEX *, BH *, int));
|
||||
*/
|
||||
int
|
||||
__memp_pgread(dbmfp, bhp, can_create)
|
||||
__memp_pgread(dbmfp, mutexp, bhp, can_create)
|
||||
DB_MPOOLFILE *dbmfp;
|
||||
DB_MUTEX *mutexp;
|
||||
BH *bhp;
|
||||
int can_create;
|
||||
{
|
||||
|
|
@ -169,171 +186,129 @@ __memp_pgread(dbmfp, bhp, can_create)
|
|||
DB_ENV *dbenv;
|
||||
DB_MPOOL *dbmp;
|
||||
MPOOLFILE *mfp;
|
||||
size_t len, pagesize;
|
||||
size_t nr;
|
||||
int created, ret;
|
||||
size_t len, nr, pagesize;
|
||||
int ret;
|
||||
|
||||
dbmp = dbmfp->dbmp;
|
||||
dbenv = dbmp->dbenv;
|
||||
mfp = dbmfp->mfp;
|
||||
pagesize = mfp->stat.st_pagesize;
|
||||
|
||||
/* We should never be called with a dirty or a locked buffer. */
|
||||
DB_ASSERT(!F_ISSET(bhp, BH_DIRTY | BH_DIRTY_CREATE | BH_LOCKED));
|
||||
|
||||
/* Lock the buffer and swap the hash bucket lock for the buffer lock. */
|
||||
F_SET(bhp, BH_LOCKED | BH_TRASH);
|
||||
MUTEX_LOCK(dbenv, &bhp->mutex, dbenv->lockfhp);
|
||||
R_UNLOCK(dbenv, dbmp->reginfo);
|
||||
MUTEX_LOCK(dbenv, &bhp->mutex);
|
||||
MUTEX_UNLOCK(dbenv, mutexp);
|
||||
|
||||
/*
|
||||
* Temporary files may not yet have been created. We don't create
|
||||
* them now, we create them when the pages have to be flushed.
|
||||
*/
|
||||
nr = 0;
|
||||
if (F_ISSET(&dbmfp->fh, DB_FH_VALID)) {
|
||||
/*
|
||||
* Ignore read errors if we have permission to create the page.
|
||||
* Assume that the page doesn't exist, and that we'll create it
|
||||
* when we write it out.
|
||||
*
|
||||
* XXX
|
||||
* Theoretically, we could overwrite a page of data if it were
|
||||
* possible for a file to be successfully opened for reading
|
||||
* and then for the read to fail. Shouldn't ever happen, but
|
||||
* it might be worth checking to see if the offset is past the
|
||||
* known end-of-file.
|
||||
*/
|
||||
db_io.fhp = &dbmfp->fh;
|
||||
if (F_ISSET(dbmfp->fhp, DB_FH_VALID)) {
|
||||
db_io.fhp = dbmfp->fhp;
|
||||
db_io.mutexp = dbmfp->mutexp;
|
||||
db_io.pagesize = db_io.bytes = pagesize;
|
||||
db_io.pgno = bhp->pgno;
|
||||
db_io.buf = bhp->buf;
|
||||
|
||||
ret = __os_io(dbenv, &db_io, DB_IO_READ, &nr);
|
||||
} else
|
||||
ret = 0;
|
||||
|
||||
created = 0;
|
||||
if (nr < pagesize) {
|
||||
if (can_create)
|
||||
created = 1;
|
||||
else {
|
||||
/*
|
||||
* If we had a short read, ret may be 0. This may not
|
||||
* be an error -- in particular DB recovery processing
|
||||
* may request pages that have never been written to
|
||||
* disk, in which case we won't find the page. So, the
|
||||
* caller must know how to handle the error.
|
||||
*/
|
||||
if (ret == 0)
|
||||
ret = EIO;
|
||||
/*
|
||||
* The page may not exist; if it doesn't, nr may well be 0,
|
||||
* but we expect the underlying OS calls not to return an
|
||||
* error code in this case.
|
||||
*/
|
||||
if ((ret = __os_io(dbenv, &db_io, DB_IO_READ, &nr)) != 0)
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear any bytes we didn't read that need to be cleared. If we're
|
||||
* running in diagnostic mode, smash any bytes on the page that are
|
||||
* unknown quantities for the caller.
|
||||
*/
|
||||
if (nr != pagesize) {
|
||||
if (nr < pagesize) {
|
||||
/*
|
||||
* Don't output error messages for short reads. In particular,
|
||||
* DB recovery processing may request pages never written to
|
||||
* disk or for which only some part have been written to disk,
|
||||
* in which case we won't find the page. The caller must know
|
||||
* how to handle the error.
|
||||
*/
|
||||
if (can_create == 0) {
|
||||
ret = DB_PAGE_NOTFOUND;
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* Clear any bytes that need to be cleared. */
|
||||
len = mfp->clear_len == 0 ? pagesize : mfp->clear_len;
|
||||
if (nr < len)
|
||||
memset(bhp->buf + nr, 0, len - nr);
|
||||
#ifdef DIAGNOSTIC
|
||||
if (nr > len)
|
||||
len = nr;
|
||||
memset(bhp->buf, 0, len);
|
||||
|
||||
#if defined(DIAGNOSTIC) || defined(UMRW)
|
||||
/*
|
||||
* If we're running in diagnostic mode, corrupt any bytes on
|
||||
* the page that are unknown quantities for the caller.
|
||||
*/
|
||||
if (len < pagesize)
|
||||
memset(bhp->buf + len, CLEAR_BYTE, pagesize - len);
|
||||
#endif
|
||||
}
|
||||
++mfp->stat.st_page_create;
|
||||
} else
|
||||
++mfp->stat.st_page_in;
|
||||
|
||||
/* Call any pgin function. */
|
||||
ret = mfp->ftype == 0 ? 0 : __memp_pg(dbmfp, bhp, 1);
|
||||
|
||||
/* Unlock the buffer and reacquire the region lock. */
|
||||
/* Unlock the buffer and reacquire the hash bucket lock. */
|
||||
err: MUTEX_UNLOCK(dbenv, &bhp->mutex);
|
||||
R_LOCK(dbenv, dbmp->reginfo);
|
||||
MUTEX_LOCK(dbenv, mutexp);
|
||||
|
||||
/*
|
||||
* If no errors occurred, the data is now valid, clear the BH_TRASH
|
||||
* flag; regardless, clear the lock bit and let other threads proceed.
|
||||
*/
|
||||
F_CLR(bhp, BH_LOCKED);
|
||||
if (ret == 0) {
|
||||
if (ret == 0)
|
||||
F_CLR(bhp, BH_TRASH);
|
||||
|
||||
/* Update the statistics. */
|
||||
if (created)
|
||||
++mfp->stat.st_page_create;
|
||||
else
|
||||
++mfp->stat.st_page_in;
|
||||
}
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* __memp_pgwrite --
|
||||
* Write a page to a file.
|
||||
*
|
||||
* PUBLIC: int __memp_pgwrite
|
||||
* PUBLIC: __P((DB_MPOOL *, DB_MPOOLFILE *, BH *, int *, int *));
|
||||
*/
|
||||
int
|
||||
__memp_pgwrite(dbmp, dbmfp, bhp, restartp, wrotep)
|
||||
static int
|
||||
__memp_pgwrite(dbmp, dbmfp, hp, bhp)
|
||||
DB_MPOOL *dbmp;
|
||||
DB_MPOOLFILE *dbmfp;
|
||||
DB_MPOOL_HASH *hp;
|
||||
BH *bhp;
|
||||
int *restartp, *wrotep;
|
||||
{
|
||||
DB_ENV *dbenv;
|
||||
DB_IO db_io;
|
||||
DB_LSN lsn;
|
||||
MPOOL *c_mp, *mp;
|
||||
MPOOLFILE *mfp;
|
||||
size_t nw;
|
||||
int callpgin, dosync, ret, syncfail;
|
||||
const char *fail;
|
||||
int callpgin, ret;
|
||||
|
||||
dbenv = dbmp->dbenv;
|
||||
mp = dbmp->reginfo[0].primary;
|
||||
mfp = dbmfp == NULL ? NULL : dbmfp->mfp;
|
||||
|
||||
if (restartp != NULL)
|
||||
*restartp = 0;
|
||||
if (wrotep != NULL)
|
||||
*wrotep = 0;
|
||||
callpgin = 0;
|
||||
callpgin = ret = 0;
|
||||
|
||||
/*
|
||||
* Check the dirty bit -- this buffer may have been written since we
|
||||
* decided to write it.
|
||||
* We should never be called with a clean or trash buffer.
|
||||
* The sync code does call us with already locked buffers.
|
||||
*/
|
||||
if (!F_ISSET(bhp, BH_DIRTY)) {
|
||||
if (wrotep != NULL)
|
||||
*wrotep = 1;
|
||||
return (0);
|
||||
}
|
||||
|
||||
MUTEX_LOCK(dbenv, &bhp->mutex, dbenv->lockfhp);
|
||||
DB_ASSERT(F_ISSET(bhp, BH_DIRTY));
|
||||
DB_ASSERT(!F_ISSET(bhp, BH_TRASH));
|
||||
|
||||
/*
|
||||
* If there were two writers, we may have just been waiting while the
|
||||
* other writer completed I/O on this buffer. Check the dirty bit one
|
||||
* more time.
|
||||
* If we have not already traded the hash bucket lock for the buffer
|
||||
* lock, do so now.
|
||||
*/
|
||||
if (!F_ISSET(bhp, BH_DIRTY)) {
|
||||
MUTEX_UNLOCK(dbenv, &bhp->mutex);
|
||||
|
||||
if (wrotep != NULL)
|
||||
*wrotep = 1;
|
||||
return (0);
|
||||
if (!F_ISSET(bhp, BH_LOCKED)) {
|
||||
F_SET(bhp, BH_LOCKED);
|
||||
MUTEX_LOCK(dbenv, &bhp->mutex);
|
||||
MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
|
||||
}
|
||||
|
||||
F_SET(bhp, BH_LOCKED);
|
||||
R_UNLOCK(dbenv, dbmp->reginfo);
|
||||
|
||||
if (restartp != NULL)
|
||||
*restartp = 1;
|
||||
|
||||
/*
|
||||
* It's possible that the underlying file doesn't exist, either
|
||||
* because of an outright removal or because it was a temporary
|
||||
|
|
@ -347,155 +322,122 @@ __memp_pgwrite(dbmp, dbmfp, bhp, restartp, wrotep)
|
|||
goto file_dead;
|
||||
|
||||
/*
|
||||
* Ensure the appropriate log records are on disk. If the page is
|
||||
* being written as part of a sync operation, the flush has already
|
||||
* been done, unless it was written by the application *after* the
|
||||
* sync was scheduled.
|
||||
* If the page is in a file for which we have LSN information, we have
|
||||
* to ensure the appropriate log records are on disk.
|
||||
*/
|
||||
if (LOGGING_ON(dbenv) &&
|
||||
(!F_ISSET(bhp, BH_SYNC) || F_ISSET(bhp, BH_SYNC_LOGFLSH))) {
|
||||
if (LOGGING_ON(dbenv) && mfp->lsn_off != -1) {
|
||||
memcpy(&lsn, bhp->buf + mfp->lsn_off, sizeof(DB_LSN));
|
||||
if ((ret = log_flush(dbenv, &lsn)) != 0)
|
||||
if ((ret = dbenv->log_flush(dbenv, &lsn)) != 0)
|
||||
goto err;
|
||||
}
|
||||
DB_ASSERT(!LOGGING_ON(dbenv) ||
|
||||
log_compare(&((LOG *)((DB_LOG *)
|
||||
dbenv->lg_handle)->reginfo.primary)->s_lsn, &LSN(bhp->buf)) > 0);
|
||||
|
||||
#ifdef DIAGNOSTIC
|
||||
/*
|
||||
* Verify write-ahead logging semantics.
|
||||
*
|
||||
* !!!
|
||||
* One special case. There is a single field on the meta-data page,
|
||||
* the last-page-number-in-the-file field, for which we do not log
|
||||
* changes. If the page was originally created in a database that
|
||||
* didn't have logging turned on, we can see a page marked dirty but
|
||||
* for which no corresponding log record has been written. However,
|
||||
* the only way that a page can be created for which there isn't a
|
||||
* previous log record and valid LSN is when the page was created
|
||||
* without logging turned on, and so we check for that special-case
|
||||
* LSN value.
|
||||
*/
|
||||
if (LOGGING_ON(dbenv) && !IS_NOT_LOGGED_LSN(LSN(bhp->buf))) {
|
||||
/*
|
||||
* There is a potential race here. If we are in the midst of
|
||||
* switching log files, it's possible we could test against the
|
||||
* old file and the new offset in the log region's LSN. If we
|
||||
* fail the first test, acquire the log mutex and check again.
|
||||
*/
|
||||
DB_LOG *dblp;
|
||||
LOG *lp;
|
||||
|
||||
dblp = dbenv->lg_handle;
|
||||
lp = dblp->reginfo.primary;
|
||||
if (!IS_NOT_LOGGED_LSN(LSN(bhp->buf)) &&
|
||||
log_compare(&lp->s_lsn, &LSN(bhp->buf)) <= 0) {
|
||||
R_LOCK(dbenv, &dblp->reginfo);
|
||||
DB_ASSERT(log_compare(&lp->s_lsn, &LSN(bhp->buf)) > 0);
|
||||
R_UNLOCK(dbenv, &dblp->reginfo);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Call any pgout function. We set the callpgin flag so that we flag
|
||||
* that the contents of the buffer will need to be passed through pgin
|
||||
* before they are reused.
|
||||
*/
|
||||
if (mfp->ftype == 0)
|
||||
ret = 0;
|
||||
else {
|
||||
if (mfp->ftype != 0) {
|
||||
callpgin = 1;
|
||||
if ((ret = __memp_pg(dbmfp, bhp, 0)) != 0)
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* Temporary files may not yet have been created. */
|
||||
if (!F_ISSET(&dbmfp->fh, DB_FH_VALID)) {
|
||||
if (!F_ISSET(dbmfp->fhp, DB_FH_VALID)) {
|
||||
MUTEX_THREAD_LOCK(dbenv, dbmp->mutexp);
|
||||
if (!F_ISSET(&dbmfp->fh, DB_FH_VALID) &&
|
||||
((ret = __db_appname(dbenv, DB_APP_TMP, NULL, NULL,
|
||||
DB_OSO_CREATE | DB_OSO_EXCL | DB_OSO_TEMP,
|
||||
&dbmfp->fh, NULL)) != 0 ||
|
||||
!F_ISSET(&dbmfp->fh, DB_FH_VALID))) {
|
||||
MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp);
|
||||
ret = F_ISSET(dbmfp->fhp, DB_FH_VALID) ? 0 :
|
||||
__db_appname(dbenv, DB_APP_TMP, NULL,
|
||||
F_ISSET(dbenv, DB_ENV_DIRECT_DB) ? DB_OSO_DIRECT : 0,
|
||||
dbmfp->fhp, NULL);
|
||||
MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp);
|
||||
if (ret != 0) {
|
||||
__db_err(dbenv,
|
||||
"unable to create temporary backing file");
|
||||
goto err;
|
||||
}
|
||||
MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp);
|
||||
}
|
||||
|
||||
/* Write the page. */
|
||||
db_io.fhp = &dbmfp->fh;
|
||||
db_io.fhp = dbmfp->fhp;
|
||||
db_io.mutexp = dbmfp->mutexp;
|
||||
db_io.pagesize = db_io.bytes = mfp->stat.st_pagesize;
|
||||
db_io.pgno = bhp->pgno;
|
||||
db_io.buf = bhp->buf;
|
||||
if ((ret = __os_io(dbenv, &db_io, DB_IO_WRITE, &nw)) != 0) {
|
||||
ret = __db_panic(dbenv, ret);
|
||||
fail = "write";
|
||||
goto syserr;
|
||||
}
|
||||
if (nw != mfp->stat.st_pagesize) {
|
||||
ret = EIO;
|
||||
fail = "write";
|
||||
goto syserr;
|
||||
__db_err(dbenv, "%s: write failed for page %lu",
|
||||
__memp_fn(dbmfp), (u_long)bhp->pgno);
|
||||
goto err;
|
||||
}
|
||||
++mfp->stat.st_page_out;
|
||||
|
||||
err:
|
||||
file_dead:
|
||||
/*
|
||||
* !!!
|
||||
* Once we pass this point, dbmfp and mfp may be NULL, we may not have
|
||||
* a valid file reference.
|
||||
*
|
||||
* Unlock the buffer and reacquire the region lock.
|
||||
* Unlock the buffer and reacquire the hash lock.
|
||||
*/
|
||||
MUTEX_UNLOCK(dbenv, &bhp->mutex);
|
||||
R_LOCK(dbenv, dbmp->reginfo);
|
||||
MUTEX_LOCK(dbenv, &hp->hash_mutex);
|
||||
|
||||
/*
|
||||
* Clean up the flags based on a successful write.
|
||||
*
|
||||
* If we rewrote the page, it will need processing by the pgin
|
||||
* routine before reuse.
|
||||
*/
|
||||
if (callpgin)
|
||||
F_SET(bhp, BH_CALLPGIN);
|
||||
F_CLR(bhp, BH_DIRTY | BH_LOCKED);
|
||||
|
||||
/*
|
||||
* If we write a buffer for which a checkpoint is waiting, update
|
||||
* the count of pending buffers (both in the mpool as a whole and
|
||||
* for this file). If the count for this file goes to zero, set a
|
||||
* flag so we flush the writes.
|
||||
* Update the hash bucket statistics, reset the flags.
|
||||
* If we were successful, the page is no longer dirty.
|
||||
*/
|
||||
dosync = 0;
|
||||
if (F_ISSET(bhp, BH_SYNC)) {
|
||||
F_CLR(bhp, BH_SYNC | BH_SYNC_LOGFLSH);
|
||||
if (ret == 0) {
|
||||
DB_ASSERT(hp->hash_page_dirty != 0);
|
||||
--hp->hash_page_dirty;
|
||||
|
||||
--mp->lsn_cnt;
|
||||
if (mfp != NULL)
|
||||
dosync = --mfp->lsn_cnt == 0 ? 1 : 0;
|
||||
F_CLR(bhp, BH_DIRTY | BH_DIRTY_CREATE);
|
||||
}
|
||||
|
||||
/* Update the page clean/dirty statistics. */
|
||||
c_mp = BH_TO_CACHE(dbmp, bhp);
|
||||
++c_mp->stat.st_page_clean;
|
||||
--c_mp->stat.st_page_dirty;
|
||||
|
||||
/* Update I/O statistics. */
|
||||
if (mfp != NULL)
|
||||
++mfp->stat.st_page_out;
|
||||
|
||||
/*
|
||||
* Do the sync after everything else has been updated, so any incoming
|
||||
* checkpoint doesn't see inconsistent information.
|
||||
*
|
||||
* XXX:
|
||||
* Don't lock the region around the sync, fsync(2) has no atomicity
|
||||
* issues.
|
||||
*
|
||||
* XXX:
|
||||
* We ignore errors from the sync -- it makes no sense to return an
|
||||
* error to the calling process, so set a flag causing the checkpoint
|
||||
* to be retried later. There is a possibility, of course, that a
|
||||
* subsequent checkpoint was started and that we're going to force it
|
||||
* to fail. That should be unlikely, and fixing it would be difficult.
|
||||
*/
|
||||
if (dosync) {
|
||||
R_UNLOCK(dbenv, dbmp->reginfo);
|
||||
syncfail = __os_fsync(dbenv, &dbmfp->fh) != 0;
|
||||
R_LOCK(dbenv, dbmp->reginfo);
|
||||
if (syncfail)
|
||||
F_SET(mp, MP_LSN_RETRY);
|
||||
}
|
||||
|
||||
if (wrotep != NULL)
|
||||
*wrotep = 1;
|
||||
|
||||
return (0);
|
||||
|
||||
syserr: __db_err(dbenv, "%s: %s failed for page %lu",
|
||||
__memp_fn(dbmfp), fail, (u_long)bhp->pgno);
|
||||
|
||||
err: /* Unlock the buffer and reacquire the region lock. */
|
||||
MUTEX_UNLOCK(dbenv, &bhp->mutex);
|
||||
R_LOCK(dbenv, dbmp->reginfo);
|
||||
|
||||
/*
|
||||
* Clean up the flags based on a failure.
|
||||
*
|
||||
* The page remains dirty but we remove our lock. If we rewrote the
|
||||
* page, it will need processing by the pgin routine before reuse.
|
||||
*/
|
||||
if (callpgin)
|
||||
F_SET(bhp, BH_CALLPGIN);
|
||||
/* Regardless, clear any sync wait-for count and remove our lock. */
|
||||
bhp->ref_sync = 0;
|
||||
F_CLR(bhp, BH_LOCKED);
|
||||
|
||||
return (ret);
|
||||
|
|
@ -514,15 +456,17 @@ __memp_pg(dbmfp, bhp, is_pgin)
|
|||
int is_pgin;
|
||||
{
|
||||
DBT dbt, *dbtp;
|
||||
DB_ENV *dbenv;
|
||||
DB_MPOOL *dbmp;
|
||||
DB_MPREG *mpreg;
|
||||
MPOOLFILE *mfp;
|
||||
int ftype, ret;
|
||||
|
||||
dbmp = dbmfp->dbmp;
|
||||
dbenv = dbmp->dbenv;
|
||||
mfp = dbmfp->mfp;
|
||||
|
||||
MUTEX_THREAD_LOCK(dbmp->dbenv, dbmp->mutexp);
|
||||
MUTEX_THREAD_LOCK(dbenv, dbmp->mutexp);
|
||||
|
||||
ftype = mfp->ftype;
|
||||
for (mpreg = LIST_FIRST(&dbmp->dbregq);
|
||||
|
|
@ -536,28 +480,28 @@ __memp_pg(dbmfp, bhp, is_pgin)
|
|||
dbt.data = R_ADDR(dbmp->reginfo, mfp->pgcookie_off);
|
||||
dbtp = &dbt;
|
||||
}
|
||||
MUTEX_THREAD_UNLOCK(dbmp->dbenv, dbmp->mutexp);
|
||||
MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp);
|
||||
|
||||
if (is_pgin) {
|
||||
if (mpreg->pgin != NULL &&
|
||||
(ret = mpreg->pgin(dbmp->dbenv,
|
||||
(ret = mpreg->pgin(dbenv,
|
||||
bhp->pgno, bhp->buf, dbtp)) != 0)
|
||||
goto err;
|
||||
} else
|
||||
if (mpreg->pgout != NULL &&
|
||||
(ret = mpreg->pgout(dbmp->dbenv,
|
||||
(ret = mpreg->pgout(dbenv,
|
||||
bhp->pgno, bhp->buf, dbtp)) != 0)
|
||||
goto err;
|
||||
break;
|
||||
}
|
||||
|
||||
if (mpreg == NULL)
|
||||
MUTEX_THREAD_UNLOCK(dbmp->dbenv, dbmp->mutexp);
|
||||
MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp);
|
||||
|
||||
return (0);
|
||||
|
||||
err: MUTEX_THREAD_UNLOCK(dbmp->dbenv, dbmp->mutexp);
|
||||
__db_err(dbmp->dbenv, "%s: %s failed for page %lu",
|
||||
err: MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp);
|
||||
__db_err(dbenv, "%s: %s failed for page %lu",
|
||||
__memp_fn(dbmfp), is_pgin ? "pgin" : "pgout", (u_long)bhp->pgno);
|
||||
return (ret);
|
||||
}
|
||||
|
|
@ -566,55 +510,78 @@ err: MUTEX_THREAD_UNLOCK(dbmp->dbenv, dbmp->mutexp);
|
|||
* __memp_bhfree --
|
||||
* Free a bucket header and its referenced data.
|
||||
*
|
||||
* PUBLIC: void __memp_bhfree __P((DB_MPOOL *, BH *, int));
|
||||
* PUBLIC: void __memp_bhfree __P((DB_MPOOL *, DB_MPOOL_HASH *, BH *, int));
|
||||
*/
|
||||
void
|
||||
__memp_bhfree(dbmp, bhp, free_mem)
|
||||
__memp_bhfree(dbmp, hp, bhp, free_mem)
|
||||
DB_MPOOL *dbmp;
|
||||
DB_MPOOL_HASH *hp;
|
||||
BH *bhp;
|
||||
int free_mem;
|
||||
{
|
||||
DB_HASHTAB *dbht;
|
||||
DB_ENV *dbenv;
|
||||
MPOOL *c_mp, *mp;
|
||||
MPOOLFILE *mfp;
|
||||
int n_bucket, n_cache;
|
||||
u_int32_t n_cache;
|
||||
|
||||
/*
|
||||
* Assumes the hash bucket is locked and the MPOOL is not.
|
||||
*/
|
||||
dbenv = dbmp->dbenv;
|
||||
mp = dbmp->reginfo[0].primary;
|
||||
c_mp = BH_TO_CACHE(dbmp, bhp);
|
||||
n_cache = NCACHE(mp, bhp->pgno);
|
||||
n_bucket = NBUCKET(c_mp, bhp->mf_offset, bhp->pgno);
|
||||
dbht = R_ADDR(&dbmp->reginfo[n_cache], c_mp->htab);
|
||||
n_cache = NCACHE(mp, bhp->mf_offset, bhp->pgno);
|
||||
|
||||
/* Delete the buffer header from the hash bucket queue. */
|
||||
SH_TAILQ_REMOVE(&dbht[n_bucket], bhp, hq, __bh);
|
||||
/*
|
||||
* Delete the buffer header from the hash bucket queue and reset
|
||||
* the hash bucket's priority, if necessary.
|
||||
*/
|
||||
SH_TAILQ_REMOVE(&hp->hash_bucket, bhp, hq, __bh);
|
||||
if (bhp->priority == hp->hash_priority)
|
||||
hp->hash_priority =
|
||||
SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL ?
|
||||
0 : SH_TAILQ_FIRST(&hp->hash_bucket, __bh)->priority;
|
||||
|
||||
/* Delete the buffer header from the LRU queue. */
|
||||
SH_TAILQ_REMOVE(&c_mp->bhq, bhp, q, __bh);
|
||||
/*
|
||||
* Discard the hash bucket's mutex, it's no longer needed, and
|
||||
* we don't want to be holding it when acquiring other locks.
|
||||
*/
|
||||
MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
|
||||
|
||||
/* Clear the mutex this buffer recorded */
|
||||
__db_shlocks_clear(&bhp->mutex, &dbmp->reginfo[n_cache],
|
||||
(REGMAINT *)R_ADDR(&dbmp->reginfo[n_cache], mp->maint_off));
|
||||
/*
|
||||
* Find the underlying MPOOLFILE and decrement its reference count.
|
||||
* If this is its last reference, remove it.
|
||||
*/
|
||||
mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset);
|
||||
MUTEX_LOCK(dbenv, &mfp->mutex);
|
||||
if (--mfp->block_cnt == 0 && mfp->mpf_cnt == 0)
|
||||
__memp_mf_discard(dbmp, mfp);
|
||||
else
|
||||
MUTEX_UNLOCK(dbenv, &mfp->mutex);
|
||||
|
||||
R_LOCK(dbenv, &dbmp->reginfo[n_cache]);
|
||||
|
||||
/*
|
||||
* If we're not reusing it immediately, free the buffer header
|
||||
* Clear the mutex this buffer recorded; requires the region lock
|
||||
* be held.
|
||||
*/
|
||||
__db_shlocks_clear(&bhp->mutex, &dbmp->reginfo[n_cache],
|
||||
(REGMAINT *)R_ADDR(&dbmp->reginfo[n_cache], mp->maint_off));
|
||||
|
||||
/*
|
||||
* If we're not reusing the buffer immediately, free the buffer header
|
||||
* and data for real.
|
||||
*/
|
||||
if (free_mem) {
|
||||
--c_mp->stat.st_page_clean;
|
||||
__db_shalloc_free(dbmp->reginfo[n_cache].addr, bhp);
|
||||
c_mp = dbmp->reginfo[n_cache].primary;
|
||||
c_mp->stat.st_pages--;
|
||||
}
|
||||
R_UNLOCK(dbenv, &dbmp->reginfo[n_cache]);
|
||||
}
|
||||
|
||||
/*
|
||||
* __memp_upgrade --
|
||||
* Upgrade a file descriptor from readonly to readwrite.
|
||||
* Upgrade a file descriptor from read-only to read-write.
|
||||
*/
|
||||
static int
|
||||
__memp_upgrade(dbmp, dbmfp, mfp)
|
||||
|
|
@ -622,41 +589,58 @@ __memp_upgrade(dbmp, dbmfp, mfp)
|
|||
DB_MPOOLFILE *dbmfp;
|
||||
MPOOLFILE *mfp;
|
||||
{
|
||||
DB_FH fh;
|
||||
DB_ENV *dbenv;
|
||||
DB_FH *fhp, *tfhp;
|
||||
int ret;
|
||||
char *rpath;
|
||||
|
||||
/*
|
||||
* !!!
|
||||
* We expect the handle to already be locked.
|
||||
*/
|
||||
|
||||
/* Check to see if we've already upgraded. */
|
||||
if (F_ISSET(dbmfp, MP_UPGRADE))
|
||||
return (0);
|
||||
|
||||
/* Check to see if we've already failed. */
|
||||
if (F_ISSET(dbmfp, MP_UPGRADE_FAIL))
|
||||
return (1);
|
||||
dbenv = dbmp->dbenv;
|
||||
fhp = NULL;
|
||||
rpath = NULL;
|
||||
|
||||
/*
|
||||
* Calculate the real name for this file and try to open it read/write.
|
||||
* We know we have a valid pathname for the file because it's the only
|
||||
* way we could have gotten a file descriptor of any kind.
|
||||
*/
|
||||
if ((ret = __db_appname(dbmp->dbenv, DB_APP_DATA,
|
||||
NULL, R_ADDR(dbmp->reginfo, mfp->path_off), 0, NULL, &rpath)) != 0)
|
||||
return (ret);
|
||||
if (__os_open(dbmp->dbenv, rpath, 0, 0, &fh) != 0) {
|
||||
if ((ret = __os_calloc(dbenv, 1, sizeof(DB_FH), &fhp)) != 0)
|
||||
goto err;
|
||||
|
||||
if ((ret = __db_appname(dbenv, DB_APP_DATA,
|
||||
R_ADDR(dbmp->reginfo, mfp->path_off), 0, NULL, &rpath)) != 0)
|
||||
goto err;
|
||||
|
||||
if (__os_open(dbenv, rpath,
|
||||
F_ISSET(mfp, MP_DIRECT) ? DB_OSO_DIRECT : 0, 0, fhp) != 0) {
|
||||
F_SET(dbmfp, MP_UPGRADE_FAIL);
|
||||
ret = 1;
|
||||
} else {
|
||||
/* Swap the descriptors and set the upgrade flag. */
|
||||
(void)__os_closehandle(&dbmfp->fh);
|
||||
dbmfp->fh = fh;
|
||||
F_SET(dbmfp, MP_UPGRADE);
|
||||
ret = 0;
|
||||
goto err;
|
||||
}
|
||||
__os_freestr(rpath);
|
||||
|
||||
/*
|
||||
* Swap the descriptors and set the upgrade flag.
|
||||
*
|
||||
* XXX
|
||||
* There is a race here. If another process schedules a read using the
|
||||
* existing file descriptor and is swapped out before making the system
|
||||
* call, this code could theoretically close the file descriptor out
|
||||
* from under it. While it's very unlikely, this code should still be
|
||||
* rewritten.
|
||||
*/
|
||||
tfhp = dbmfp->fhp;
|
||||
dbmfp->fhp = fhp;
|
||||
fhp = tfhp;
|
||||
|
||||
(void)__os_closehandle(dbenv, fhp);
|
||||
F_SET(dbmfp, MP_UPGRADE);
|
||||
|
||||
ret = 0;
|
||||
if (0) {
|
||||
err: ret = 1;
|
||||
}
|
||||
if (fhp != NULL)
|
||||
__os_free(dbenv, fhp);
|
||||
if (rpath != NULL)
|
||||
__os_free(dbenv, rpath);
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
|
|
|||
789
bdb/mp/mp_fget.c
789
bdb/mp/mp_fget.c
|
|
@ -1,13 +1,13 @@
|
|||
/*-
|
||||
* See the file LICENSE for redistribution information.
|
||||
*
|
||||
* Copyright (c) 1996, 1997, 1998, 1999, 2000
|
||||
* Copyright (c) 1996-2002
|
||||
* Sleepycat Software. All rights reserved.
|
||||
*/
|
||||
#include "db_config.h"
|
||||
|
||||
#ifndef lint
|
||||
static const char revid[] = "$Id: mp_fget.c,v 11.28 2001/01/10 04:50:53 ubell Exp $";
|
||||
static const char revid[] = "$Id: mp_fget.c,v 11.68 2002/08/06 04:58:09 bostic Exp $";
|
||||
#endif /* not lint */
|
||||
|
||||
#ifndef NO_SYSTEM_INCLUDES
|
||||
|
|
@ -16,51 +16,54 @@ static const char revid[] = "$Id: mp_fget.c,v 11.28 2001/01/10 04:50:53 ubell Ex
|
|||
#include <string.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_RPC
|
||||
#include "db_server.h"
|
||||
#endif
|
||||
|
||||
#include "db_int.h"
|
||||
#include "db_shash.h"
|
||||
#include "mp.h"
|
||||
#include "dbinc/db_shash.h"
|
||||
#include "dbinc/mp.h"
|
||||
|
||||
#ifdef HAVE_RPC
|
||||
#include "gen_client_ext.h"
|
||||
#include "rpc_client_ext.h"
|
||||
#ifdef HAVE_FILESYSTEM_NOTZERO
|
||||
static int __memp_fs_notzero
|
||||
__P((DB_ENV *, DB_MPOOLFILE *, MPOOLFILE *, db_pgno_t *));
|
||||
#endif
|
||||
|
||||
/*
|
||||
* memp_fget --
|
||||
* __memp_fget --
|
||||
* Get a page from the file.
|
||||
*
|
||||
* PUBLIC: int __memp_fget
|
||||
* PUBLIC: __P((DB_MPOOLFILE *, db_pgno_t *, u_int32_t, void *));
|
||||
*/
|
||||
int
|
||||
memp_fget(dbmfp, pgnoaddr, flags, addrp)
|
||||
__memp_fget(dbmfp, pgnoaddr, flags, addrp)
|
||||
DB_MPOOLFILE *dbmfp;
|
||||
db_pgno_t *pgnoaddr;
|
||||
u_int32_t flags;
|
||||
void *addrp;
|
||||
{
|
||||
BH *bhp;
|
||||
enum { FIRST_FOUND, FIRST_MISS, SECOND_FOUND, SECOND_MISS } state;
|
||||
BH *alloc_bhp, *bhp;
|
||||
DB_ENV *dbenv;
|
||||
DB_MPOOL *dbmp;
|
||||
DB_HASHTAB *dbht;
|
||||
DB_MPOOL_HASH *hp;
|
||||
MPOOL *c_mp, *mp;
|
||||
MPOOLFILE *mfp;
|
||||
size_t n_bucket, n_cache, mf_offset;
|
||||
u_int32_t st_hsearch;
|
||||
int b_incr, first, ret;
|
||||
roff_t mf_offset;
|
||||
u_int32_t n_cache, st_hsearch;
|
||||
int b_incr, extending, first, ret;
|
||||
|
||||
*(void **)addrp = NULL;
|
||||
|
||||
dbmp = dbmfp->dbmp;
|
||||
dbenv = dbmp->dbenv;
|
||||
mp = dbmp->reginfo[0].primary;
|
||||
mfp = dbmfp->mfp;
|
||||
#ifdef HAVE_RPC
|
||||
if (F_ISSET(dbenv, DB_ENV_RPCCLIENT))
|
||||
return (__dbcl_memp_fget(dbmfp, pgnoaddr, flags, addrp));
|
||||
#endif
|
||||
|
||||
PANIC_CHECK(dbenv);
|
||||
|
||||
mp = dbmp->reginfo[0].primary;
|
||||
mfp = dbmfp->mfp;
|
||||
mf_offset = R_OFFSET(dbmp->reginfo, mfp);
|
||||
alloc_bhp = bhp = NULL;
|
||||
hp = NULL;
|
||||
b_incr = extending = ret = 0;
|
||||
|
||||
/*
|
||||
* Validate arguments.
|
||||
*
|
||||
|
|
@ -74,100 +77,35 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp)
|
|||
* is to keep database files small. It's sleazy as hell, but we catch
|
||||
* any attempt to actually write the file in memp_fput().
|
||||
*/
|
||||
#define OKFLAGS \
|
||||
(DB_MPOOL_CREATE | DB_MPOOL_LAST | \
|
||||
DB_MPOOL_NEW | DB_MPOOL_NEW_GROUP | DB_MPOOL_EXTENT)
|
||||
#define OKFLAGS (DB_MPOOL_CREATE | DB_MPOOL_LAST | DB_MPOOL_NEW)
|
||||
if (flags != 0) {
|
||||
if ((ret = __db_fchk(dbenv, "memp_fget", flags, OKFLAGS)) != 0)
|
||||
return (ret);
|
||||
|
||||
switch (flags & ~DB_MPOOL_EXTENT) {
|
||||
switch (flags) {
|
||||
case DB_MPOOL_CREATE:
|
||||
break;
|
||||
case DB_MPOOL_LAST:
|
||||
/* Get the last page number in the file. */
|
||||
if (flags == DB_MPOOL_LAST) {
|
||||
R_LOCK(dbenv, dbmp->reginfo);
|
||||
*pgnoaddr = mfp->last_pgno;
|
||||
R_UNLOCK(dbenv, dbmp->reginfo);
|
||||
}
|
||||
break;
|
||||
case DB_MPOOL_NEW:
|
||||
case DB_MPOOL_NEW_GROUP:
|
||||
case 0:
|
||||
/*
|
||||
* If always creating a page, skip the first search
|
||||
* of the hash bucket.
|
||||
*/
|
||||
if (flags == DB_MPOOL_NEW)
|
||||
goto alloc;
|
||||
break;
|
||||
default:
|
||||
return (__db_ferr(dbenv, "memp_fget", 1));
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DIAGNOSTIC
|
||||
/*
|
||||
* XXX
|
||||
* We want to switch threads as often as possible. Yield every time
|
||||
* we get a new page to ensure contention.
|
||||
*/
|
||||
if (DB_GLOBAL(db_pageyield))
|
||||
__os_yield(dbenv, 1);
|
||||
#endif
|
||||
|
||||
/* Initialize remaining local variables. */
|
||||
mf_offset = R_OFFSET(dbmp->reginfo, mfp);
|
||||
bhp = NULL;
|
||||
st_hsearch = 0;
|
||||
b_incr = ret = 0;
|
||||
|
||||
R_LOCK(dbenv, dbmp->reginfo);
|
||||
|
||||
/*
|
||||
* Check for the new, last or last + 1 page requests.
|
||||
*
|
||||
* Examine and update the file's last_pgno value. We don't care if
|
||||
* the last_pgno value immediately changes due to another thread --
|
||||
* at this instant in time, the value is correct. We do increment the
|
||||
* current last_pgno value if the thread is asking for a new page,
|
||||
* however, to ensure that two threads creating pages don't get the
|
||||
* same one.
|
||||
*
|
||||
* If we create a page, there is the potential that a page after it
|
||||
* in the file will be written before it will be written. Recovery
|
||||
* depends on pages that are "created" in the file by subsequent pages
|
||||
* being written be zeroed out, not have random garbage. Ensure that
|
||||
* the OS agrees.
|
||||
*
|
||||
* !!!
|
||||
* DB_MPOOL_NEW_GROUP is undocumented -- the hash access method needs
|
||||
* to allocate contiguous groups of pages in order to do subdatabases.
|
||||
* We return the first page in the group, but the caller must put an
|
||||
* LSN on the *last* page and write it, otherwise after a crash we may
|
||||
* not create all of the pages we need to create.
|
||||
*/
|
||||
if (LF_ISSET(DB_MPOOL_LAST | DB_MPOOL_NEW | DB_MPOOL_NEW_GROUP)) {
|
||||
if (LF_ISSET(DB_MPOOL_NEW)) {
|
||||
if (F_ISSET(&dbmfp->fh, DB_FH_VALID) && (ret =
|
||||
__os_fpinit(dbenv, &dbmfp->fh, mfp->last_pgno + 1,
|
||||
1, mfp->stat.st_pagesize)) != 0) {
|
||||
R_UNLOCK(dbenv, dbmp->reginfo);
|
||||
return (ret);
|
||||
}
|
||||
++mfp->last_pgno;
|
||||
}
|
||||
if (LF_ISSET(DB_MPOOL_NEW_GROUP)) {
|
||||
if (F_ISSET(&dbmfp->fh, DB_FH_VALID) && (ret =
|
||||
__os_fpinit(dbenv, &dbmfp->fh, mfp->last_pgno + 1,
|
||||
(int)*pgnoaddr, mfp->stat.st_pagesize)) != 0) {
|
||||
R_UNLOCK(dbenv, dbmp->reginfo);
|
||||
return (ret);
|
||||
}
|
||||
mfp->last_pgno += *pgnoaddr;
|
||||
}
|
||||
*pgnoaddr = mfp->last_pgno;
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine the hash bucket where this page will live, and get local
|
||||
* pointers to the cache and its hash table.
|
||||
*/
|
||||
n_cache = NCACHE(mp, *pgnoaddr);
|
||||
c_mp = dbmp->reginfo[n_cache].primary;
|
||||
n_bucket = NBUCKET(c_mp, mf_offset, *pgnoaddr);
|
||||
dbht = R_ADDR(&dbmp->reginfo[n_cache], c_mp->htab);
|
||||
|
||||
if (LF_ISSET(DB_MPOOL_NEW | DB_MPOOL_NEW_GROUP))
|
||||
goto alloc;
|
||||
|
||||
/*
|
||||
* If mmap'ing the file and the page is not past the end of the file,
|
||||
* just return a pointer.
|
||||
|
|
@ -183,235 +121,534 @@ memp_fget(dbmfp, pgnoaddr, flags, addrp)
|
|||
* goes through the cache. All pages previously returned will be safe,
|
||||
* as long as the correct locking protocol was observed.
|
||||
*
|
||||
* XXX
|
||||
* We don't discard the map because we don't know when all of the
|
||||
* pages will have been discarded from the process' address space.
|
||||
* It would be possible to do so by reference counting the open
|
||||
* pages from the mmap, but it's unclear to me that it's worth it.
|
||||
*/
|
||||
if (dbmfp->addr != NULL && F_ISSET(mfp, MP_CAN_MMAP)) {
|
||||
if (*pgnoaddr > mfp->orig_last_pgno) {
|
||||
/*
|
||||
* !!!
|
||||
* See the comment above about non-existent pages and
|
||||
* the hash access method.
|
||||
*/
|
||||
if (!LF_ISSET(DB_MPOOL_CREATE)) {
|
||||
if (!LF_ISSET(DB_MPOOL_EXTENT))
|
||||
__db_err(dbenv,
|
||||
"%s: page %lu doesn't exist",
|
||||
__memp_fn(dbmfp), (u_long)*pgnoaddr);
|
||||
ret = EINVAL;
|
||||
goto err;
|
||||
}
|
||||
} else {
|
||||
*(void **)addrp =
|
||||
R_ADDR(dbmfp, *pgnoaddr * mfp->stat.st_pagesize);
|
||||
++mfp->stat.st_map;
|
||||
goto done;
|
||||
}
|
||||
if (dbmfp->addr != NULL &&
|
||||
F_ISSET(mfp, MP_CAN_MMAP) && *pgnoaddr <= mfp->orig_last_pgno) {
|
||||
*(void **)addrp =
|
||||
R_ADDR(dbmfp, *pgnoaddr * mfp->stat.st_pagesize);
|
||||
++mfp->stat.st_map;
|
||||
return (0);
|
||||
}
|
||||
|
||||
hb_search:
|
||||
/*
|
||||
* Determine the cache and hash bucket where this page lives and get
|
||||
* local pointers to them. Reset on each pass through this code, the
|
||||
* page number can change.
|
||||
*/
|
||||
n_cache = NCACHE(mp, mf_offset, *pgnoaddr);
|
||||
c_mp = dbmp->reginfo[n_cache].primary;
|
||||
hp = R_ADDR(&dbmp->reginfo[n_cache], c_mp->htab);
|
||||
hp = &hp[NBUCKET(c_mp, mf_offset, *pgnoaddr)];
|
||||
|
||||
/* Search the hash chain for the page. */
|
||||
for (bhp = SH_TAILQ_FIRST(&dbht[n_bucket], __bh);
|
||||
retry: st_hsearch = 0;
|
||||
MUTEX_LOCK(dbenv, &hp->hash_mutex);
|
||||
for (bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
|
||||
bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh)) {
|
||||
++st_hsearch;
|
||||
if (bhp->pgno != *pgnoaddr || bhp->mf_offset != mf_offset)
|
||||
continue;
|
||||
|
||||
/* Increment the reference count. */
|
||||
/*
|
||||
* Increment the reference count. We may discard the hash
|
||||
* bucket lock as we evaluate and/or read the buffer, so we
|
||||
* need to ensure it doesn't move and its contents remain
|
||||
* unchanged.
|
||||
*/
|
||||
if (bhp->ref == UINT16_T_MAX) {
|
||||
__db_err(dbenv,
|
||||
"%s: page %lu: reference count overflow",
|
||||
__memp_fn(dbmfp), (u_long)bhp->pgno);
|
||||
ret = EINVAL;
|
||||
MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
|
||||
goto err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Increment the reference count. We may discard the region
|
||||
* lock as we evaluate and/or read the buffer, so we need to
|
||||
* ensure that it doesn't move and that its contents remain
|
||||
* unchanged.
|
||||
*/
|
||||
++bhp->ref;
|
||||
b_incr = 1;
|
||||
|
||||
/*
|
||||
* Any buffer we find might be trouble.
|
||||
*
|
||||
* BH_LOCKED --
|
||||
* I/O is in progress. Because we've incremented the buffer
|
||||
* reference count, we know the buffer can't move. Unlock
|
||||
* the region lock, wait for the I/O to complete, and reacquire
|
||||
* the region.
|
||||
* I/O is in progress or sync is waiting on the buffer to write
|
||||
* it. Because we've incremented the buffer reference count,
|
||||
* we know the buffer can't move. Unlock the bucket lock, wait
|
||||
* for the buffer to become available, reacquire the bucket.
|
||||
*/
|
||||
for (first = 1; F_ISSET(bhp, BH_LOCKED); first = 0) {
|
||||
R_UNLOCK(dbenv, dbmp->reginfo);
|
||||
|
||||
for (first = 1; F_ISSET(bhp, BH_LOCKED) &&
|
||||
!F_ISSET(dbenv, DB_ENV_NOLOCKING); first = 0) {
|
||||
/*
|
||||
* Explicitly yield the processor if it's not the first
|
||||
* pass through this loop -- if we don't, we might end
|
||||
* up running to the end of our CPU quantum as we will
|
||||
* simply be swapping between the two locks.
|
||||
* If someone is trying to sync this buffer and the
|
||||
* buffer is hot, they may never get in. Give up
|
||||
* and try again.
|
||||
*/
|
||||
if (!first && bhp->ref_sync != 0) {
|
||||
--bhp->ref;
|
||||
b_incr = 0;
|
||||
MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
|
||||
__os_yield(dbenv, 1);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
|
||||
/*
|
||||
* Explicitly yield the processor if not the first pass
|
||||
* through this loop -- if we don't, we might run to the
|
||||
* end of our CPU quantum as we will simply be swapping
|
||||
* between the two locks.
|
||||
*/
|
||||
if (!first)
|
||||
__os_yield(dbenv, 1);
|
||||
|
||||
MUTEX_LOCK(dbenv, &bhp->mutex, dbenv->lockfhp);
|
||||
MUTEX_LOCK(dbenv, &bhp->mutex);
|
||||
/* Wait for I/O to finish... */
|
||||
MUTEX_UNLOCK(dbenv, &bhp->mutex);
|
||||
R_LOCK(dbenv, dbmp->reginfo);
|
||||
}
|
||||
|
||||
/*
|
||||
* BH_TRASH --
|
||||
* The contents of the buffer are garbage. Shouldn't happen,
|
||||
* and this read is likely to fail, but might as well try.
|
||||
*/
|
||||
if (F_ISSET(bhp, BH_TRASH))
|
||||
goto reread;
|
||||
|
||||
/*
|
||||
* BH_CALLPGIN --
|
||||
* The buffer was converted so it could be written, and the
|
||||
* contents need to be converted again.
|
||||
*/
|
||||
if (F_ISSET(bhp, BH_CALLPGIN)) {
|
||||
if ((ret = __memp_pg(dbmfp, bhp, 1)) != 0)
|
||||
goto err;
|
||||
F_CLR(bhp, BH_CALLPGIN);
|
||||
MUTEX_LOCK(dbenv, &hp->hash_mutex);
|
||||
}
|
||||
|
||||
++mfp->stat.st_cache_hit;
|
||||
*(void **)addrp = bhp->buf;
|
||||
goto done;
|
||||
}
|
||||
|
||||
alloc: /* Allocate new buffer header and data space. */
|
||||
if ((ret = __memp_alloc(dbmp,
|
||||
&dbmp->reginfo[n_cache], mfp, 0, NULL, &bhp)) != 0)
|
||||
goto err;
|
||||
|
||||
++c_mp->stat.st_page_clean;
|
||||
|
||||
/*
|
||||
* Initialize the BH fields so that we can call the __memp_bhfree
|
||||
* routine if an error occurs.
|
||||
*/
|
||||
memset(bhp, 0, sizeof(BH));
|
||||
bhp->ref = 1;
|
||||
bhp->pgno = *pgnoaddr;
|
||||
bhp->mf_offset = mf_offset;
|
||||
|
||||
/* Increment the count of buffers referenced by this MPOOLFILE. */
|
||||
++mfp->block_cnt;
|
||||
|
||||
/*
|
||||
* Prepend the bucket header to the head of the appropriate MPOOL
|
||||
* bucket hash list. Append the bucket header to the tail of the
|
||||
* MPOOL LRU chain.
|
||||
*/
|
||||
SH_TAILQ_INSERT_HEAD(&dbht[n_bucket], bhp, hq, __bh);
|
||||
SH_TAILQ_INSERT_TAIL(&c_mp->bhq, bhp, q);
|
||||
|
||||
#ifdef DIAGNOSTIC
|
||||
if ((db_alignp_t)bhp->buf & (sizeof(size_t) - 1)) {
|
||||
__db_err(dbenv, "Internal error: BH data NOT size_t aligned.");
|
||||
ret = EINVAL;
|
||||
__memp_bhfree(dbmp, bhp, 1);
|
||||
goto err;
|
||||
}
|
||||
#endif
|
||||
|
||||
if ((ret = __db_shmutex_init(dbenv, &bhp->mutex,
|
||||
R_OFFSET(dbmp->reginfo, &bhp->mutex) + DB_FCNTL_OFF_MPOOL,
|
||||
0, &dbmp->reginfo[n_cache],
|
||||
(REGMAINT *)R_ADDR(&dbmp->reginfo[n_cache], c_mp->maint_off)))
|
||||
!= 0) {
|
||||
__memp_bhfree(dbmp, bhp, 1);
|
||||
goto err;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we created the page, zero it out and continue.
|
||||
*
|
||||
* !!!
|
||||
* Note: DB_MPOOL_NEW specifically doesn't call the pgin function.
|
||||
* If DB_MPOOL_CREATE is used, then the application's pgin function
|
||||
* has to be able to handle pages of 0's -- if it uses DB_MPOOL_NEW,
|
||||
* it can detect all of its page creates, and not bother.
|
||||
*
|
||||
* If we're running in diagnostic mode, smash any bytes on the
|
||||
* page that are unknown quantities for the caller.
|
||||
*
|
||||
* Otherwise, read the page into memory, optionally creating it if
|
||||
* DB_MPOOL_CREATE is set.
|
||||
* Update the hash bucket search statistics -- do now because our next
|
||||
* search may be for a different bucket.
|
||||
*/
|
||||
if (LF_ISSET(DB_MPOOL_NEW | DB_MPOOL_NEW_GROUP)) {
|
||||
if (mfp->clear_len == 0)
|
||||
memset(bhp->buf, 0, mfp->stat.st_pagesize);
|
||||
else {
|
||||
memset(bhp->buf, 0, mfp->clear_len);
|
||||
#ifdef DIAGNOSTIC
|
||||
memset(bhp->buf + mfp->clear_len, CLEAR_BYTE,
|
||||
mfp->stat.st_pagesize - mfp->clear_len);
|
||||
#endif
|
||||
}
|
||||
++c_mp->stat.st_hash_searches;
|
||||
if (st_hsearch > c_mp->stat.st_hash_longest)
|
||||
c_mp->stat.st_hash_longest = st_hsearch;
|
||||
c_mp->stat.st_hash_examined += st_hsearch;
|
||||
|
||||
++mfp->stat.st_page_create;
|
||||
} else {
|
||||
/*
|
||||
* There are 4 possible paths to this location:
|
||||
*
|
||||
* FIRST_MISS:
|
||||
* Didn't find the page in the hash bucket on our first pass:
|
||||
* bhp == NULL, alloc_bhp == NULL
|
||||
*
|
||||
* FIRST_FOUND:
|
||||
* Found the page in the hash bucket on our first pass:
|
||||
* bhp != NULL, alloc_bhp == NULL
|
||||
*
|
||||
* SECOND_FOUND:
|
||||
* Didn't find the page in the hash bucket on the first pass,
|
||||
* allocated space, and found the page in the hash bucket on
|
||||
* our second pass:
|
||||
* bhp != NULL, alloc_bhp != NULL
|
||||
*
|
||||
* SECOND_MISS:
|
||||
* Didn't find the page in the hash bucket on the first pass,
|
||||
* allocated space, and didn't find the page in the hash bucket
|
||||
* on our second pass:
|
||||
* bhp == NULL, alloc_bhp != NULL
|
||||
*/
|
||||
state = bhp == NULL ?
|
||||
(alloc_bhp == NULL ? FIRST_MISS : SECOND_MISS) :
|
||||
(alloc_bhp == NULL ? FIRST_FOUND : SECOND_FOUND);
|
||||
switch (state) {
|
||||
case FIRST_FOUND:
|
||||
/* We found the buffer in our first check -- we're done. */
|
||||
break;
|
||||
case FIRST_MISS:
|
||||
/*
|
||||
* It's possible for the read function to fail, which means
|
||||
* that we fail as well. Note, the __memp_pgread() function
|
||||
* discards the region lock, so the buffer must be pinned
|
||||
* down so that it cannot move and its contents are unchanged.
|
||||
* We didn't find the buffer in our first check. Figure out
|
||||
* if the page exists, and allocate structures so we can add
|
||||
* the page to the buffer pool.
|
||||
*/
|
||||
reread: if ((ret = __memp_pgread(dbmfp,
|
||||
bhp, LF_ISSET(DB_MPOOL_CREATE|DB_MPOOL_EXTENT))) != 0) {
|
||||
/*
|
||||
* !!!
|
||||
* Discard the buffer unless another thread is waiting
|
||||
* on our I/O to complete. Regardless, the header has
|
||||
* the BH_TRASH flag set.
|
||||
*/
|
||||
if (bhp->ref == 1)
|
||||
__memp_bhfree(dbmp, bhp, 1);
|
||||
MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
|
||||
|
||||
alloc: /*
|
||||
* If DB_MPOOL_NEW is set, we have to allocate a page number.
|
||||
* If neither DB_MPOOL_CREATE or DB_MPOOL_CREATE is set, then
|
||||
* it's an error to try and get a page past the end of file.
|
||||
*/
|
||||
COMPQUIET(n_cache, 0);
|
||||
|
||||
extending = ret = 0;
|
||||
R_LOCK(dbenv, dbmp->reginfo);
|
||||
switch (flags) {
|
||||
case DB_MPOOL_NEW:
|
||||
extending = 1;
|
||||
*pgnoaddr = mfp->last_pgno + 1;
|
||||
break;
|
||||
case DB_MPOOL_CREATE:
|
||||
extending = *pgnoaddr > mfp->last_pgno;
|
||||
break;
|
||||
default:
|
||||
ret = *pgnoaddr > mfp->last_pgno ? DB_PAGE_NOTFOUND : 0;
|
||||
break;
|
||||
}
|
||||
R_UNLOCK(dbenv, dbmp->reginfo);
|
||||
if (ret != 0)
|
||||
goto err;
|
||||
|
||||
/*
|
||||
* !!!
|
||||
* In the DB_MPOOL_NEW code path, mf_offset and n_cache have
|
||||
* not yet been initialized.
|
||||
*/
|
||||
mf_offset = R_OFFSET(dbmp->reginfo, mfp);
|
||||
n_cache = NCACHE(mp, mf_offset, *pgnoaddr);
|
||||
|
||||
/* Allocate a new buffer header and data space. */
|
||||
if ((ret = __memp_alloc(dbmp,
|
||||
&dbmp->reginfo[n_cache], mfp, 0, NULL, &alloc_bhp)) != 0)
|
||||
goto err;
|
||||
#ifdef DIAGNOSTIC
|
||||
if ((db_alignp_t)alloc_bhp->buf & (sizeof(size_t) - 1)) {
|
||||
__db_err(dbenv,
|
||||
"Error: buffer data is NOT size_t aligned");
|
||||
ret = EINVAL;
|
||||
goto err;
|
||||
}
|
||||
#endif
|
||||
/*
|
||||
* If we are extending the file, we'll need the region lock
|
||||
* again.
|
||||
*/
|
||||
if (extending)
|
||||
R_LOCK(dbenv, dbmp->reginfo);
|
||||
|
||||
++mfp->stat.st_cache_miss;
|
||||
/*
|
||||
* DB_MPOOL_NEW does not guarantee you a page unreferenced by
|
||||
* any other thread of control. (That guarantee is interesting
|
||||
* for DB_MPOOL_NEW, unlike DB_MPOOL_CREATE, because the caller
|
||||
* did not specify the page number, and so, may reasonably not
|
||||
* have any way to lock the page outside of mpool.) Regardless,
|
||||
* if we allocate the page, and some other thread of control
|
||||
* requests the page by number, we will not detect that and the
|
||||
* thread of control that allocated using DB_MPOOL_NEW may not
|
||||
* have a chance to initialize the page. (Note: we *could*
|
||||
* detect this case if we set a flag in the buffer header which
|
||||
* guaranteed that no gets of the page would succeed until the
|
||||
* reference count went to 0, that is, until the creating page
|
||||
* put the page.) What we do guarantee is that if two threads
|
||||
* of control are both doing DB_MPOOL_NEW calls, they won't
|
||||
* collide, that is, they won't both get the same page.
|
||||
*
|
||||
* There's a possibility that another thread allocated the page
|
||||
* we were planning to allocate while we were off doing buffer
|
||||
* allocation. We can do that by making sure the page number
|
||||
* we were going to use is still available. If it's not, then
|
||||
* we check to see if the next available page number hashes to
|
||||
* the same mpool region as the old one -- if it does, we can
|
||||
* continue, otherwise, we have to start over.
|
||||
*/
|
||||
if (flags == DB_MPOOL_NEW && *pgnoaddr != mfp->last_pgno + 1) {
|
||||
*pgnoaddr = mfp->last_pgno + 1;
|
||||
if (n_cache != NCACHE(mp, mf_offset, *pgnoaddr)) {
|
||||
__db_shalloc_free(
|
||||
dbmp->reginfo[n_cache].addr, alloc_bhp);
|
||||
/*
|
||||
* flags == DB_MPOOL_NEW, so extending is set
|
||||
* and we're holding the region locked.
|
||||
*/
|
||||
R_UNLOCK(dbenv, dbmp->reginfo);
|
||||
|
||||
alloc_bhp = NULL;
|
||||
goto alloc;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We released the region lock, so another thread might have
|
||||
* extended the file. Update the last_pgno and initialize
|
||||
* the file, as necessary, if we extended the file.
|
||||
*/
|
||||
if (extending) {
|
||||
#ifdef HAVE_FILESYSTEM_NOTZERO
|
||||
if (*pgnoaddr > mfp->last_pgno &&
|
||||
__os_fs_notzero() &&
|
||||
F_ISSET(dbmfp->fhp, DB_FH_VALID))
|
||||
ret = __memp_fs_notzero(
|
||||
dbenv, dbmfp, mfp, pgnoaddr);
|
||||
else
|
||||
ret = 0;
|
||||
#endif
|
||||
if (ret == 0 && *pgnoaddr > mfp->last_pgno)
|
||||
mfp->last_pgno = *pgnoaddr;
|
||||
|
||||
R_UNLOCK(dbenv, dbmp->reginfo);
|
||||
if (ret != 0)
|
||||
goto err;
|
||||
}
|
||||
goto hb_search;
|
||||
case SECOND_FOUND:
|
||||
/*
|
||||
* We allocated buffer space for the requested page, but then
|
||||
* found the page in the buffer cache on our second check.
|
||||
* That's OK -- we can use the page we found in the pool,
|
||||
* unless DB_MPOOL_NEW is set.
|
||||
*
|
||||
* Free the allocated memory, we no longer need it. Since we
|
||||
* can't acquire the region lock while holding the hash bucket
|
||||
* lock, we have to release the hash bucket and re-acquire it.
|
||||
* That's OK, because we have the buffer pinned down.
|
||||
*/
|
||||
MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
|
||||
R_LOCK(dbenv, &dbmp->reginfo[n_cache]);
|
||||
__db_shalloc_free(dbmp->reginfo[n_cache].addr, alloc_bhp);
|
||||
alloc_bhp = NULL;
|
||||
R_UNLOCK(dbenv, &dbmp->reginfo[n_cache]);
|
||||
MUTEX_LOCK(dbenv, &hp->hash_mutex);
|
||||
|
||||
/*
|
||||
* We can't use the page we found in the pool if DB_MPOOL_NEW
|
||||
* was set. (For details, see the above comment beginning
|
||||
* "DB_MPOOL_NEW does not guarantee you a page unreferenced by
|
||||
* any other thread of control".) If DB_MPOOL_NEW is set, we
|
||||
* release our pin on this particular buffer, and try to get
|
||||
* another one.
|
||||
*/
|
||||
if (flags == DB_MPOOL_NEW) {
|
||||
--bhp->ref;
|
||||
b_incr = 0;
|
||||
goto alloc;
|
||||
}
|
||||
break;
|
||||
case SECOND_MISS:
|
||||
/*
|
||||
* We allocated buffer space for the requested page, and found
|
||||
* the page still missing on our second pass through the buffer
|
||||
* cache. Instantiate the page.
|
||||
*/
|
||||
bhp = alloc_bhp;
|
||||
alloc_bhp = NULL;
|
||||
|
||||
/*
|
||||
* Initialize all the BH and hash bucket fields so we can call
|
||||
* __memp_bhfree if an error occurs.
|
||||
*
|
||||
* Append the buffer to the tail of the bucket list and update
|
||||
* the hash bucket's priority.
|
||||
*/
|
||||
b_incr = 1;
|
||||
|
||||
memset(bhp, 0, sizeof(BH));
|
||||
bhp->ref = 1;
|
||||
bhp->priority = UINT32_T_MAX;
|
||||
bhp->pgno = *pgnoaddr;
|
||||
bhp->mf_offset = mf_offset;
|
||||
SH_TAILQ_INSERT_TAIL(&hp->hash_bucket, bhp, hq);
|
||||
hp->hash_priority =
|
||||
SH_TAILQ_FIRST(&hp->hash_bucket, __bh)->priority;
|
||||
|
||||
/* If we extended the file, make sure the page is never lost. */
|
||||
if (extending) {
|
||||
++hp->hash_page_dirty;
|
||||
F_SET(bhp, BH_DIRTY | BH_DIRTY_CREATE);
|
||||
}
|
||||
|
||||
/*
|
||||
* If we created the page, zero it out. If we didn't create
|
||||
* the page, read from the backing file.
|
||||
*
|
||||
* !!!
|
||||
* DB_MPOOL_NEW doesn't call the pgin function.
|
||||
*
|
||||
* If DB_MPOOL_CREATE is used, then the application's pgin
|
||||
* function has to be able to handle pages of 0's -- if it
|
||||
* uses DB_MPOOL_NEW, it can detect all of its page creates,
|
||||
* and not bother.
|
||||
*
|
||||
* If we're running in diagnostic mode, smash any bytes on the
|
||||
* page that are unknown quantities for the caller.
|
||||
*
|
||||
* Otherwise, read the page into memory, optionally creating it
|
||||
* if DB_MPOOL_CREATE is set.
|
||||
*/
|
||||
if (extending) {
|
||||
if (mfp->clear_len == 0)
|
||||
memset(bhp->buf, 0, mfp->stat.st_pagesize);
|
||||
else {
|
||||
memset(bhp->buf, 0, mfp->clear_len);
|
||||
#if defined(DIAGNOSTIC) || defined(UMRW)
|
||||
memset(bhp->buf + mfp->clear_len, CLEAR_BYTE,
|
||||
mfp->stat.st_pagesize - mfp->clear_len);
|
||||
#endif
|
||||
}
|
||||
|
||||
if (flags == DB_MPOOL_CREATE && mfp->ftype != 0)
|
||||
F_SET(bhp, BH_CALLPGIN);
|
||||
|
||||
++mfp->stat.st_page_create;
|
||||
} else {
|
||||
F_SET(bhp, BH_TRASH);
|
||||
++mfp->stat.st_cache_miss;
|
||||
}
|
||||
|
||||
/* Increment buffer count referenced by MPOOLFILE. */
|
||||
MUTEX_LOCK(dbenv, &mfp->mutex);
|
||||
++mfp->block_cnt;
|
||||
MUTEX_UNLOCK(dbenv, &mfp->mutex);
|
||||
|
||||
/*
|
||||
* Initialize the mutex. This is the last initialization step,
|
||||
* because it's the only one that can fail, and everything else
|
||||
* must be set up or we can't jump to the err label because it
|
||||
* will call __memp_bhfree.
|
||||
*/
|
||||
if ((ret = __db_mutex_setup(dbenv,
|
||||
&dbmp->reginfo[n_cache], &bhp->mutex, 0)) != 0)
|
||||
goto err;
|
||||
}
|
||||
|
||||
DB_ASSERT(bhp->ref != 0);
|
||||
|
||||
/*
|
||||
* If we're the only reference, update buffer and bucket priorities.
|
||||
* We may be about to release the hash bucket lock, and everything
|
||||
* should be correct, first. (We've already done this if we created
|
||||
* the buffer, so there is no need to do it again.)
|
||||
*/
|
||||
if (state != SECOND_MISS && bhp->ref == 1) {
|
||||
bhp->priority = UINT32_T_MAX;
|
||||
SH_TAILQ_REMOVE(&hp->hash_bucket, bhp, hq, __bh);
|
||||
SH_TAILQ_INSERT_TAIL(&hp->hash_bucket, bhp, hq);
|
||||
hp->hash_priority =
|
||||
SH_TAILQ_FIRST(&hp->hash_bucket, __bh)->priority;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we're returning a page after our current notion of the last-page,
|
||||
* update our information. Note, there's no way to un-instantiate this
|
||||
* page, it's going to exist whether it's returned to us dirty or not.
|
||||
* BH_TRASH --
|
||||
* The buffer we found may need to be filled from the disk.
|
||||
*
|
||||
* It's possible for the read function to fail, which means we fail as
|
||||
* well. Note, the __memp_pgread() function discards and reacquires
|
||||
* the hash lock, so the buffer must be pinned down so that it cannot
|
||||
* move and its contents are unchanged. Discard the buffer on failure
|
||||
* unless another thread is waiting on our I/O to complete. It's OK to
|
||||
* leave the buffer around, as the waiting thread will see the BH_TRASH
|
||||
* flag set, and will also attempt to discard it. If there's a waiter,
|
||||
* we need to decrement our reference count.
|
||||
*/
|
||||
if (bhp->pgno > mfp->last_pgno)
|
||||
mfp->last_pgno = bhp->pgno;
|
||||
if (F_ISSET(bhp, BH_TRASH) &&
|
||||
(ret = __memp_pgread(dbmfp,
|
||||
&hp->hash_mutex, bhp, LF_ISSET(DB_MPOOL_CREATE) ? 1 : 0)) != 0)
|
||||
goto err;
|
||||
|
||||
*(void **)addrp = bhp->buf;
|
||||
|
||||
done: /* Update the chain search statistics. */
|
||||
if (st_hsearch) {
|
||||
++c_mp->stat.st_hash_searches;
|
||||
if (st_hsearch > c_mp->stat.st_hash_longest)
|
||||
c_mp->stat.st_hash_longest = st_hsearch;
|
||||
c_mp->stat.st_hash_examined += st_hsearch;
|
||||
/*
|
||||
* BH_CALLPGIN --
|
||||
* The buffer was processed for being written to disk, and now has
|
||||
* to be re-converted for use.
|
||||
*/
|
||||
if (F_ISSET(bhp, BH_CALLPGIN)) {
|
||||
if ((ret = __memp_pg(dbmfp, bhp, 1)) != 0)
|
||||
goto err;
|
||||
F_CLR(bhp, BH_CALLPGIN);
|
||||
}
|
||||
|
||||
++dbmfp->pinref;
|
||||
MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
|
||||
|
||||
#ifdef DIAGNOSTIC
|
||||
/* Update the file's pinned reference count. */
|
||||
R_LOCK(dbenv, dbmp->reginfo);
|
||||
++dbmfp->pinref;
|
||||
R_UNLOCK(dbenv, dbmp->reginfo);
|
||||
|
||||
/*
|
||||
* We want to switch threads as often as possible, and at awkward
|
||||
* times. Yield every time we get a new page to ensure contention.
|
||||
*/
|
||||
if (F_ISSET(dbenv, DB_ENV_YIELDCPU))
|
||||
__os_yield(dbenv, 1);
|
||||
#endif
|
||||
|
||||
*(void **)addrp = bhp->buf;
|
||||
return (0);
|
||||
|
||||
err: /* Discard our reference. */
|
||||
if (b_incr)
|
||||
--bhp->ref;
|
||||
R_UNLOCK(dbenv, dbmp->reginfo);
|
||||
err: /*
|
||||
* Discard our reference. If we're the only reference, discard the
|
||||
* the buffer entirely. If we held a reference to a buffer, we are
|
||||
* also still holding the hash bucket mutex.
|
||||
*/
|
||||
if (b_incr) {
|
||||
if (bhp->ref == 1)
|
||||
(void)__memp_bhfree(dbmp, hp, bhp, 1);
|
||||
else {
|
||||
--bhp->ref;
|
||||
MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
|
||||
}
|
||||
}
|
||||
|
||||
/* If alloc_bhp is set, free the memory. */
|
||||
if (alloc_bhp != NULL)
|
||||
__db_shalloc_free(dbmp->reginfo[n_cache].addr, alloc_bhp);
|
||||
|
||||
*(void **)addrp = NULL;
|
||||
return (ret);
|
||||
}
|
||||
|
||||
#ifdef HAVE_FILESYSTEM_NOTZERO
|
||||
/*
|
||||
* __memp_fs_notzero --
|
||||
* Initialize the underlying allocated pages in the file.
|
||||
*/
|
||||
static int
|
||||
__memp_fs_notzero(dbenv, dbmfp, mfp, pgnoaddr)
|
||||
DB_ENV *dbenv;
|
||||
DB_MPOOLFILE *dbmfp;
|
||||
MPOOLFILE *mfp;
|
||||
db_pgno_t *pgnoaddr;
|
||||
{
|
||||
DB_IO db_io;
|
||||
u_int32_t i, npages;
|
||||
size_t nw;
|
||||
int ret;
|
||||
u_int8_t *page;
|
||||
char *fail;
|
||||
|
||||
/*
|
||||
* Pages allocated by writing pages past end-of-file are not zeroed,
|
||||
* on some systems. Recovery could theoretically be fooled by a page
|
||||
* showing up that contained garbage. In order to avoid this, we
|
||||
* have to write the pages out to disk, and flush them. The reason
|
||||
* for the flush is because if we don't sync, the allocation of another
|
||||
* page subsequent to this one might reach the disk first, and if we
|
||||
* crashed at the right moment, leave us with this page as the one
|
||||
* allocated by writing a page past it in the file.
|
||||
*
|
||||
* Hash is the only access method that allocates groups of pages. We
|
||||
* know that it will use the existence of the last page in a group to
|
||||
* signify that the entire group is OK; so, write all the pages but
|
||||
* the last one in the group, flush them to disk, and then write the
|
||||
* last one to disk and flush it.
|
||||
*/
|
||||
if ((ret = __os_calloc(dbenv, 1, mfp->stat.st_pagesize, &page)) != 0)
|
||||
return (ret);
|
||||
|
||||
db_io.fhp = dbmfp->fhp;
|
||||
db_io.mutexp = dbmfp->mutexp;
|
||||
db_io.pagesize = db_io.bytes = mfp->stat.st_pagesize;
|
||||
db_io.buf = page;
|
||||
|
||||
npages = *pgnoaddr - mfp->last_pgno;
|
||||
for (i = 1; i < npages; ++i) {
|
||||
db_io.pgno = mfp->last_pgno + i;
|
||||
if ((ret = __os_io(dbenv, &db_io, DB_IO_WRITE, &nw)) != 0) {
|
||||
fail = "write";
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
if (i != 1 && (ret = __os_fsync(dbenv, dbmfp->fhp)) != 0) {
|
||||
fail = "sync";
|
||||
goto err;
|
||||
}
|
||||
|
||||
db_io.pgno = mfp->last_pgno + npages;
|
||||
if ((ret = __os_io(dbenv, &db_io, DB_IO_WRITE, &nw)) != 0) {
|
||||
fail = "write";
|
||||
goto err;
|
||||
}
|
||||
if ((ret = __os_fsync(dbenv, dbmfp->fhp)) != 0) {
|
||||
fail = "sync";
|
||||
err: __db_err(dbenv, "%s: %s failed for page %lu",
|
||||
__memp_fn(dbmfp), fail, (u_long)db_io.pgno);
|
||||
}
|
||||
|
||||
__os_free(dbenv, page);
|
||||
return (ret);
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
1297
bdb/mp/mp_fopen.c
1297
bdb/mp/mp_fopen.c
File diff suppressed because it is too large
Load diff
202
bdb/mp/mp_fput.c
202
bdb/mp/mp_fput.c
|
|
@ -1,13 +1,13 @@
|
|||
/*-
|
||||
* See the file LICENSE for redistribution information.
|
||||
*
|
||||
* Copyright (c) 1996, 1997, 1998, 1999, 2000
|
||||
* Copyright (c) 1996-2002
|
||||
* Sleepycat Software. All rights reserved.
|
||||
*/
|
||||
#include "db_config.h"
|
||||
|
||||
#ifndef lint
|
||||
static const char revid[] = "$Id: mp_fput.c,v 11.16 2000/11/30 00:58:41 ubell Exp $";
|
||||
static const char revid[] = "$Id: mp_fput.c,v 11.36 2002/08/09 19:04:11 bostic Exp $";
|
||||
#endif /* not lint */
|
||||
|
||||
#ifndef NO_SYSTEM_INCLUDES
|
||||
|
|
@ -15,43 +15,32 @@ static const char revid[] = "$Id: mp_fput.c,v 11.16 2000/11/30 00:58:41 ubell Ex
|
|||
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_RPC
|
||||
#include "db_server.h"
|
||||
#endif
|
||||
|
||||
#include "db_int.h"
|
||||
#include "db_shash.h"
|
||||
#include "mp.h"
|
||||
|
||||
#ifdef HAVE_RPC
|
||||
#include "gen_client_ext.h"
|
||||
#include "rpc_client_ext.h"
|
||||
#endif
|
||||
#include "dbinc/db_shash.h"
|
||||
#include "dbinc/mp.h"
|
||||
|
||||
/*
|
||||
* memp_fput --
|
||||
* __memp_fput --
|
||||
* Mpool file put function.
|
||||
*
|
||||
* PUBLIC: int __memp_fput __P((DB_MPOOLFILE *, void *, u_int32_t));
|
||||
*/
|
||||
int
|
||||
memp_fput(dbmfp, pgaddr, flags)
|
||||
__memp_fput(dbmfp, pgaddr, flags)
|
||||
DB_MPOOLFILE *dbmfp;
|
||||
void *pgaddr;
|
||||
u_int32_t flags;
|
||||
{
|
||||
BH *bhp;
|
||||
BH *argbhp, *bhp, *prev;
|
||||
DB_ENV *dbenv;
|
||||
DB_MPOOL *dbmp;
|
||||
MPOOL *c_mp, *mp;
|
||||
int ret, wrote;
|
||||
DB_MPOOL_HASH *hp;
|
||||
MPOOL *c_mp;
|
||||
u_int32_t n_cache;
|
||||
int adjust, ret;
|
||||
|
||||
dbmp = dbmfp->dbmp;
|
||||
dbenv = dbmp->dbenv;
|
||||
mp = dbmp->reginfo[0].primary;
|
||||
|
||||
#ifdef HAVE_RPC
|
||||
if (F_ISSET(dbenv, DB_ENV_RPCCLIENT))
|
||||
return (__dbcl_memp_fput(dbmfp, pgaddr, flags));
|
||||
#endif
|
||||
|
||||
PANIC_CHECK(dbenv);
|
||||
|
||||
|
|
@ -72,17 +61,6 @@ memp_fput(dbmfp, pgaddr, flags)
|
|||
}
|
||||
}
|
||||
|
||||
R_LOCK(dbenv, dbmp->reginfo);
|
||||
|
||||
/* Decrement the pinned reference count. */
|
||||
if (dbmfp->pinref == 0) {
|
||||
__db_err(dbenv,
|
||||
"%s: more pages returned than retrieved", __memp_fn(dbmfp));
|
||||
R_UNLOCK(dbenv, dbmp->reginfo);
|
||||
return (EINVAL);
|
||||
} else
|
||||
--dbmfp->pinref;
|
||||
|
||||
/*
|
||||
* If we're mapping the file, there's nothing to do. Because we can
|
||||
* stop mapping the file at any time, we have to check on each buffer
|
||||
|
|
@ -90,40 +68,51 @@ memp_fput(dbmfp, pgaddr, flags)
|
|||
* region.
|
||||
*/
|
||||
if (dbmfp->addr != NULL && pgaddr >= dbmfp->addr &&
|
||||
(u_int8_t *)pgaddr <= (u_int8_t *)dbmfp->addr + dbmfp->len) {
|
||||
R_UNLOCK(dbenv, dbmp->reginfo);
|
||||
(u_int8_t *)pgaddr <= (u_int8_t *)dbmfp->addr + dbmfp->len)
|
||||
return (0);
|
||||
|
||||
#ifdef DIAGNOSTIC
|
||||
/*
|
||||
* Decrement the per-file pinned buffer count (mapped pages aren't
|
||||
* counted).
|
||||
*/
|
||||
R_LOCK(dbenv, dbmp->reginfo);
|
||||
if (dbmfp->pinref == 0) {
|
||||
ret = EINVAL;
|
||||
__db_err(dbenv,
|
||||
"%s: more pages returned than retrieved", __memp_fn(dbmfp));
|
||||
} else {
|
||||
ret = 0;
|
||||
--dbmfp->pinref;
|
||||
}
|
||||
R_UNLOCK(dbenv, dbmp->reginfo);
|
||||
if (ret != 0)
|
||||
return (ret);
|
||||
#endif
|
||||
|
||||
/* Convert the page address to a buffer header. */
|
||||
/* Convert a page address to a buffer header and hash bucket. */
|
||||
bhp = (BH *)((u_int8_t *)pgaddr - SSZA(BH, buf));
|
||||
n_cache = NCACHE(dbmp->reginfo[0].primary, bhp->mf_offset, bhp->pgno);
|
||||
c_mp = dbmp->reginfo[n_cache].primary;
|
||||
hp = R_ADDR(&dbmp->reginfo[n_cache], c_mp->htab);
|
||||
hp = &hp[NBUCKET(c_mp, bhp->mf_offset, bhp->pgno)];
|
||||
|
||||
/* Convert the buffer header to a cache. */
|
||||
c_mp = BH_TO_CACHE(dbmp, bhp);
|
||||
|
||||
/* UNLOCK THE REGION, LOCK THE CACHE. */
|
||||
MUTEX_LOCK(dbenv, &hp->hash_mutex);
|
||||
|
||||
/* Set/clear the page bits. */
|
||||
if (LF_ISSET(DB_MPOOL_CLEAN) && F_ISSET(bhp, BH_DIRTY)) {
|
||||
++c_mp->stat.st_page_clean;
|
||||
--c_mp->stat.st_page_dirty;
|
||||
if (LF_ISSET(DB_MPOOL_CLEAN) &&
|
||||
F_ISSET(bhp, BH_DIRTY) && !F_ISSET(bhp, BH_DIRTY_CREATE)) {
|
||||
DB_ASSERT(hp->hash_page_dirty != 0);
|
||||
--hp->hash_page_dirty;
|
||||
F_CLR(bhp, BH_DIRTY);
|
||||
}
|
||||
if (LF_ISSET(DB_MPOOL_DIRTY) && !F_ISSET(bhp, BH_DIRTY)) {
|
||||
--c_mp->stat.st_page_clean;
|
||||
++c_mp->stat.st_page_dirty;
|
||||
++hp->hash_page_dirty;
|
||||
F_SET(bhp, BH_DIRTY);
|
||||
}
|
||||
if (LF_ISSET(DB_MPOOL_DISCARD))
|
||||
F_SET(bhp, BH_DISCARD);
|
||||
|
||||
/*
|
||||
* If the page is dirty and being scheduled to be written as part of
|
||||
* a checkpoint, we no longer know that the log is up-to-date.
|
||||
*/
|
||||
if (F_ISSET(bhp, BH_DIRTY) && F_ISSET(bhp, BH_SYNC))
|
||||
F_SET(bhp, BH_SYNC_LOGFLSH);
|
||||
|
||||
/*
|
||||
* Check for a reference count going to zero. This can happen if the
|
||||
* application returns a page twice.
|
||||
|
|
@ -131,56 +120,83 @@ memp_fput(dbmfp, pgaddr, flags)
|
|||
if (bhp->ref == 0) {
|
||||
__db_err(dbenv, "%s: page %lu: unpinned page returned",
|
||||
__memp_fn(dbmfp), (u_long)bhp->pgno);
|
||||
R_UNLOCK(dbenv, dbmp->reginfo);
|
||||
MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
/*
|
||||
* If more than one reference to the page, we're done. Ignore the
|
||||
* discard flags (for now) and leave it at its position in the LRU
|
||||
* chain. The rest gets done at last reference close.
|
||||
* If more than one reference to the page or a reference other than a
|
||||
* thread waiting to flush the buffer to disk, we're done. Ignore the
|
||||
* discard flags (for now) and leave the buffer's priority alone.
|
||||
*/
|
||||
if (--bhp->ref > 0) {
|
||||
R_UNLOCK(dbenv, dbmp->reginfo);
|
||||
if (--bhp->ref > 1 || (bhp->ref == 1 && !F_ISSET(bhp, BH_LOCKED))) {
|
||||
MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Move the buffer to the head/tail of the LRU chain. We do this
|
||||
* before writing the buffer for checkpoint purposes, as the write
|
||||
* can discard the region lock and allow another process to acquire
|
||||
* buffer. We could keep that from happening, but there seems no
|
||||
* reason to do so.
|
||||
*/
|
||||
SH_TAILQ_REMOVE(&c_mp->bhq, bhp, q, __bh);
|
||||
if (F_ISSET(bhp, BH_DISCARD))
|
||||
SH_TAILQ_INSERT_HEAD(&c_mp->bhq, bhp, q, __bh);
|
||||
else
|
||||
SH_TAILQ_INSERT_TAIL(&c_mp->bhq, bhp, q);
|
||||
/* Update priority values. */
|
||||
if (F_ISSET(bhp, BH_DISCARD) ||
|
||||
dbmfp->mfp->priority == MPOOL_PRI_VERY_LOW)
|
||||
bhp->priority = 0;
|
||||
else {
|
||||
/*
|
||||
* We don't lock the LRU counter or the stat.st_pages field, if
|
||||
* we get garbage (which won't happen on a 32-bit machine), it
|
||||
* only means a buffer has the wrong priority.
|
||||
*/
|
||||
bhp->priority = c_mp->lru_count;
|
||||
|
||||
/*
|
||||
* If this buffer is scheduled for writing because of a checkpoint, we
|
||||
* need to write it (if it's dirty), or update the checkpoint counters
|
||||
* (if it's not dirty). If we try to write it and can't, that's not
|
||||
* necessarily an error as it's not completely unreasonable that the
|
||||
* application have permission to write the underlying file, but set a
|
||||
* flag so that the next time the memp_sync function is called we try
|
||||
* writing it there, as the checkpoint thread of control better be able
|
||||
* to write all of the files.
|
||||
*/
|
||||
if (F_ISSET(bhp, BH_SYNC)) {
|
||||
if (F_ISSET(bhp, BH_DIRTY)) {
|
||||
if (__memp_bhwrite(dbmp,
|
||||
dbmfp->mfp, bhp, NULL, &wrote) != 0 || !wrote)
|
||||
F_SET(mp, MP_LSN_RETRY);
|
||||
} else {
|
||||
F_CLR(bhp, BH_SYNC);
|
||||
adjust = 0;
|
||||
if (dbmfp->mfp->priority != 0)
|
||||
adjust =
|
||||
(int)c_mp->stat.st_pages / dbmfp->mfp->priority;
|
||||
if (F_ISSET(bhp, BH_DIRTY))
|
||||
adjust += c_mp->stat.st_pages / MPOOL_PRI_DIRTY;
|
||||
|
||||
--mp->lsn_cnt;
|
||||
--dbmfp->mfp->lsn_cnt;
|
||||
}
|
||||
if (adjust > 0) {
|
||||
if (UINT32_T_MAX - bhp->priority <= (u_int32_t)adjust)
|
||||
bhp->priority += adjust;
|
||||
} else if (adjust < 0)
|
||||
if (bhp->priority > (u_int32_t)-adjust)
|
||||
bhp->priority += adjust;
|
||||
}
|
||||
|
||||
R_UNLOCK(dbenv, dbmp->reginfo);
|
||||
/*
|
||||
* Buffers on hash buckets are sorted by priority -- move the buffer
|
||||
* to the correct position in the list.
|
||||
*/
|
||||
argbhp = bhp;
|
||||
SH_TAILQ_REMOVE(&hp->hash_bucket, argbhp, hq, __bh);
|
||||
|
||||
prev = NULL;
|
||||
for (bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
|
||||
bhp != NULL; prev = bhp, bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
|
||||
if (bhp->priority > argbhp->priority)
|
||||
break;
|
||||
if (prev == NULL)
|
||||
SH_TAILQ_INSERT_HEAD(&hp->hash_bucket, argbhp, hq, __bh);
|
||||
else
|
||||
SH_TAILQ_INSERT_AFTER(&hp->hash_bucket, prev, argbhp, hq, __bh);
|
||||
|
||||
/* Reset the hash bucket's priority. */
|
||||
hp->hash_priority = SH_TAILQ_FIRST(&hp->hash_bucket, __bh)->priority;
|
||||
|
||||
#ifdef DIAGNOSTIC
|
||||
__memp_check_order(hp);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The sync code has a separate counter for buffers on which it waits.
|
||||
* It reads that value without holding a lock so we update it as the
|
||||
* last thing we do. Once that value goes to 0, we won't see another
|
||||
* reference to that buffer being returned to the cache until the sync
|
||||
* code has finished, so we're safe as long as we don't let the value
|
||||
* go to 0 before we finish with the buffer.
|
||||
*/
|
||||
if (F_ISSET(argbhp, BH_LOCKED) && argbhp->ref_sync != 0)
|
||||
--argbhp->ref_sync;
|
||||
|
||||
MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,13 +1,13 @@
|
|||
/*-
|
||||
* See the file LICENSE for redistribution information.
|
||||
*
|
||||
* Copyright (c) 1996, 1997, 1998, 1999, 2000
|
||||
* Copyright (c) 1996-2002
|
||||
* Sleepycat Software. All rights reserved.
|
||||
*/
|
||||
#include "db_config.h"
|
||||
|
||||
#ifndef lint
|
||||
static const char revid[] = "$Id: mp_fset.c,v 11.13 2000/11/30 00:58:41 ubell Exp $";
|
||||
static const char revid[] = "$Id: mp_fset.c,v 11.25 2002/05/03 15:21:17 bostic Exp $";
|
||||
#endif /* not lint */
|
||||
|
||||
#ifndef NO_SYSTEM_INCLUDES
|
||||
|
|
@ -15,25 +15,18 @@ static const char revid[] = "$Id: mp_fset.c,v 11.13 2000/11/30 00:58:41 ubell Ex
|
|||
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_RPC
|
||||
#include "db_server.h"
|
||||
#endif
|
||||
|
||||
#include "db_int.h"
|
||||
#include "db_shash.h"
|
||||
#include "mp.h"
|
||||
|
||||
#ifdef HAVE_RPC
|
||||
#include "gen_client_ext.h"
|
||||
#include "rpc_client_ext.h"
|
||||
#endif
|
||||
#include "dbinc/db_shash.h"
|
||||
#include "dbinc/mp.h"
|
||||
|
||||
/*
|
||||
* memp_fset --
|
||||
* __memp_fset --
|
||||
* Mpool page set-flag routine.
|
||||
*
|
||||
* PUBLIC: int __memp_fset __P((DB_MPOOLFILE *, void *, u_int32_t));
|
||||
*/
|
||||
int
|
||||
memp_fset(dbmfp, pgaddr, flags)
|
||||
__memp_fset(dbmfp, pgaddr, flags)
|
||||
DB_MPOOLFILE *dbmfp;
|
||||
void *pgaddr;
|
||||
u_int32_t flags;
|
||||
|
|
@ -41,17 +34,13 @@ memp_fset(dbmfp, pgaddr, flags)
|
|||
BH *bhp;
|
||||
DB_ENV *dbenv;
|
||||
DB_MPOOL *dbmp;
|
||||
MPOOL *c_mp, *mp;
|
||||
DB_MPOOL_HASH *hp;
|
||||
MPOOL *c_mp;
|
||||
u_int32_t n_cache;
|
||||
int ret;
|
||||
|
||||
dbmp = dbmfp->dbmp;
|
||||
dbenv = dbmp->dbenv;
|
||||
mp = dbmp->reginfo[0].primary;
|
||||
|
||||
#ifdef HAVE_RPC
|
||||
if (F_ISSET(dbenv, DB_ENV_RPCCLIENT))
|
||||
return (__dbcl_memp_fset(dbmfp, pgaddr, flags));
|
||||
#endif
|
||||
|
||||
PANIC_CHECK(dbenv);
|
||||
|
||||
|
|
@ -60,7 +49,7 @@ memp_fset(dbmfp, pgaddr, flags)
|
|||
return (__db_ferr(dbenv, "memp_fset", 1));
|
||||
|
||||
if ((ret = __db_fchk(dbenv, "memp_fset", flags,
|
||||
DB_MPOOL_DIRTY | DB_MPOOL_CLEAN | DB_MPOOL_DISCARD)) != 0)
|
||||
DB_MPOOL_CLEAN | DB_MPOOL_DIRTY | DB_MPOOL_DISCARD)) != 0)
|
||||
return (ret);
|
||||
if ((ret = __db_fcchk(dbenv, "memp_fset",
|
||||
flags, DB_MPOOL_CLEAN, DB_MPOOL_DIRTY)) != 0)
|
||||
|
|
@ -72,27 +61,29 @@ memp_fset(dbmfp, pgaddr, flags)
|
|||
return (EACCES);
|
||||
}
|
||||
|
||||
/* Convert the page address to a buffer header. */
|
||||
/* Convert the page address to a buffer header and hash bucket. */
|
||||
bhp = (BH *)((u_int8_t *)pgaddr - SSZA(BH, buf));
|
||||
n_cache = NCACHE(dbmp->reginfo[0].primary, bhp->mf_offset, bhp->pgno);
|
||||
c_mp = dbmp->reginfo[n_cache].primary;
|
||||
hp = R_ADDR(&dbmp->reginfo[n_cache], c_mp->htab);
|
||||
hp = &hp[NBUCKET(c_mp, bhp->mf_offset, bhp->pgno)];
|
||||
|
||||
/* Convert the buffer header to a cache. */
|
||||
c_mp = BH_TO_CACHE(dbmp, bhp);
|
||||
MUTEX_LOCK(dbenv, &hp->hash_mutex);
|
||||
|
||||
R_LOCK(dbenv, dbmp->reginfo);
|
||||
|
||||
if (LF_ISSET(DB_MPOOL_CLEAN) && F_ISSET(bhp, BH_DIRTY)) {
|
||||
++c_mp->stat.st_page_clean;
|
||||
--c_mp->stat.st_page_dirty;
|
||||
/* Set/clear the page bits. */
|
||||
if (LF_ISSET(DB_MPOOL_CLEAN) &&
|
||||
F_ISSET(bhp, BH_DIRTY) && !F_ISSET(bhp, BH_DIRTY_CREATE)) {
|
||||
DB_ASSERT(hp->hash_page_dirty != 0);
|
||||
--hp->hash_page_dirty;
|
||||
F_CLR(bhp, BH_DIRTY);
|
||||
}
|
||||
if (LF_ISSET(DB_MPOOL_DIRTY) && !F_ISSET(bhp, BH_DIRTY)) {
|
||||
--c_mp->stat.st_page_clean;
|
||||
++c_mp->stat.st_page_dirty;
|
||||
++hp->hash_page_dirty;
|
||||
F_SET(bhp, BH_DIRTY);
|
||||
}
|
||||
if (LF_ISSET(DB_MPOOL_DISCARD))
|
||||
F_SET(bhp, BH_DISCARD);
|
||||
|
||||
R_UNLOCK(dbenv, dbmp->reginfo);
|
||||
MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
|
||||
return (0);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,30 +1,30 @@
|
|||
/*-
|
||||
* See the file LICENSE for redistribution information.
|
||||
*
|
||||
* Copyright (c) 1996, 1997, 1998, 1999, 2000
|
||||
* Copyright (c) 1996-2002
|
||||
* Sleepycat Software. All rights reserved.
|
||||
*/
|
||||
#include "db_config.h"
|
||||
|
||||
#ifndef lint
|
||||
static const char revid[] = "$Id: mp_method.c,v 11.10 2000/04/04 20:12:04 bostic Exp $";
|
||||
static const char revid[] = "$Id: mp_method.c,v 11.29 2002/03/27 04:32:27 bostic Exp $";
|
||||
#endif /* not lint */
|
||||
|
||||
#ifndef NO_SYSTEM_INCLUDES
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_RPC
|
||||
#include "db_server.h"
|
||||
#ifdef HAVE_RPC
|
||||
#include <rpc/rpc.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "db_int.h"
|
||||
#include "db_shash.h"
|
||||
#include "mp.h"
|
||||
#include "dbinc/db_shash.h"
|
||||
#include "dbinc/mp.h"
|
||||
|
||||
#ifdef HAVE_RPC
|
||||
#include "gen_client_ext.h"
|
||||
#include "rpc_client_ext.h"
|
||||
#include "dbinc_auto/db_server.h"
|
||||
#include "dbinc_auto/rpc_client_ext.h"
|
||||
#endif
|
||||
|
||||
static int __memp_set_cachesize __P((DB_ENV *, u_int32_t, u_int32_t, int));
|
||||
|
|
@ -41,29 +41,46 @@ __memp_dbenv_create(dbenv)
|
|||
DB_ENV *dbenv;
|
||||
{
|
||||
/*
|
||||
* !!!
|
||||
* Our caller has not yet had the opportunity to reset the panic
|
||||
* state or turn off mutex locking, and so we can neither check
|
||||
* the panic state or acquire a mutex in the DB_ENV create path.
|
||||
*
|
||||
* We default to 32 8K pages. We don't default to a flat 256K, because
|
||||
* some systems require significantly more memory to hold 32 pages than
|
||||
* others. For example, HP-UX with POSIX pthreads needs 88 bytes for
|
||||
* a POSIX pthread mutex and almost 200 bytes per buffer header, while
|
||||
* Solaris needs 24 and 52 bytes for the same structures.
|
||||
* Solaris needs 24 and 52 bytes for the same structures. The minimum
|
||||
* number of hash buckets is 37. These contain a mutex also.
|
||||
*/
|
||||
dbenv->mp_bytes = 32 * ((8 * 1024) + sizeof(BH));
|
||||
dbenv->mp_bytes =
|
||||
32 * ((8 * 1024) + sizeof(BH)) + 37 * sizeof(DB_MPOOL_HASH);
|
||||
dbenv->mp_ncache = 1;
|
||||
|
||||
dbenv->set_mp_mmapsize = __memp_set_mp_mmapsize;
|
||||
dbenv->set_cachesize = __memp_set_cachesize;
|
||||
|
||||
#ifdef HAVE_RPC
|
||||
/*
|
||||
* If we have a client, overwrite what we just setup to
|
||||
* point to client functions.
|
||||
*/
|
||||
#ifdef HAVE_RPC
|
||||
if (F_ISSET(dbenv, DB_ENV_RPCCLIENT)) {
|
||||
dbenv->set_cachesize = __dbcl_env_cachesize;
|
||||
dbenv->set_mp_mmapsize = __dbcl_set_mp_mmapsize;
|
||||
}
|
||||
dbenv->memp_dump_region = NULL;
|
||||
dbenv->memp_fcreate = __dbcl_memp_fcreate;
|
||||
dbenv->memp_nameop = NULL;
|
||||
dbenv->memp_register = __dbcl_memp_register;
|
||||
dbenv->memp_stat = __dbcl_memp_stat;
|
||||
dbenv->memp_sync = __dbcl_memp_sync;
|
||||
dbenv->memp_trickle = __dbcl_memp_trickle;
|
||||
} else
|
||||
#endif
|
||||
|
||||
{
|
||||
dbenv->set_cachesize = __memp_set_cachesize;
|
||||
dbenv->set_mp_mmapsize = __memp_set_mp_mmapsize;
|
||||
dbenv->memp_dump_region = __memp_dump_region;
|
||||
dbenv->memp_fcreate = __memp_fcreate;
|
||||
dbenv->memp_nameop = __memp_nameop;
|
||||
dbenv->memp_register = __memp_register;
|
||||
dbenv->memp_stat = __memp_stat;
|
||||
dbenv->memp_sync = __memp_sync;
|
||||
dbenv->memp_trickle = __memp_trickle;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -78,26 +95,50 @@ __memp_set_cachesize(dbenv, gbytes, bytes, ncache)
|
|||
{
|
||||
ENV_ILLEGAL_AFTER_OPEN(dbenv, "set_cachesize");
|
||||
|
||||
dbenv->mp_gbytes = gbytes + bytes / GIGABYTE;
|
||||
dbenv->mp_bytes = bytes % GIGABYTE;
|
||||
dbenv->mp_ncache = ncache == 0 ? 1 : ncache;
|
||||
/* Normalize the values. */
|
||||
if (ncache == 0)
|
||||
ncache = 1;
|
||||
|
||||
/*
|
||||
* If the application requested less than 500Mb, increase the
|
||||
* cachesize by 25% to account for our overhead. (I'm guessing
|
||||
* that caches over 500Mb are specifically sized, i.e., it's
|
||||
* a large server and the application actually knows how much
|
||||
* memory is available.)
|
||||
* You can only store 4GB-1 in an unsigned 32-bit value, so correct for
|
||||
* applications that specify 4GB cache sizes -- we know what they meant.
|
||||
*/
|
||||
if (gbytes / ncache == 4 && bytes == 0) {
|
||||
--gbytes;
|
||||
bytes = GIGABYTE - 1;
|
||||
} else {
|
||||
gbytes += bytes / GIGABYTE;
|
||||
bytes %= GIGABYTE;
|
||||
}
|
||||
|
||||
/* Avoid too-large cache sizes, they result in a region size of zero. */
|
||||
if (gbytes / ncache > 4 || (gbytes / ncache == 4 && bytes != 0)) {
|
||||
__db_err(dbenv, "individual cache size too large");
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
/*
|
||||
* If the application requested less than 500Mb, increase the cachesize
|
||||
* by 25% and factor in the size of the hash buckets to account for our
|
||||
* overhead. (I'm guessing caches over 500Mb are specifically sized,
|
||||
* that is, it's a large server and the application actually knows how
|
||||
* much memory is available. We only document the 25% overhead number,
|
||||
* not the hash buckets, but I don't see a reason to confuse the issue,
|
||||
* it shouldn't matter to an application.)
|
||||
*
|
||||
* There is a minimum cache size, regardless.
|
||||
*/
|
||||
if (dbenv->mp_gbytes == 0) {
|
||||
if (dbenv->mp_bytes < 500 * MEGABYTE)
|
||||
dbenv->mp_bytes += dbenv->mp_bytes / 4;
|
||||
if (dbenv->mp_bytes < DB_CACHESIZE_MIN)
|
||||
dbenv->mp_bytes = DB_CACHESIZE_MIN;
|
||||
if (gbytes == 0) {
|
||||
if (bytes < 500 * MEGABYTE)
|
||||
bytes += (bytes / 4) + 37 * sizeof(DB_MPOOL_HASH);
|
||||
if (bytes / ncache < DB_CACHESIZE_MIN)
|
||||
bytes = ncache * DB_CACHESIZE_MIN;
|
||||
}
|
||||
|
||||
dbenv->mp_gbytes = gbytes;
|
||||
dbenv->mp_bytes = bytes;
|
||||
dbenv->mp_ncache = ncache;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,13 +1,13 @@
|
|||
/*-
|
||||
* See the file LICENSE for redistribution information.
|
||||
*
|
||||
* Copyright (c) 1996, 1997, 1998, 1999, 2000
|
||||
* Copyright (c) 1996-2002
|
||||
* Sleepycat Software. All rights reserved.
|
||||
*/
|
||||
#include "db_config.h"
|
||||
|
||||
#ifndef lint
|
||||
static const char revid[] = "$Id: mp_region.c,v 11.26 2000/11/30 00:58:41 ubell Exp $";
|
||||
static const char revid[] = "$Id: mp_region.c,v 11.49 2002/05/07 18:42:20 bostic Exp $";
|
||||
#endif /* not lint */
|
||||
|
||||
#ifndef NO_SYSTEM_INCLUDES
|
||||
|
|
@ -17,11 +17,11 @@ static const char revid[] = "$Id: mp_region.c,v 11.26 2000/11/30 00:58:41 ubell
|
|||
#endif
|
||||
|
||||
#include "db_int.h"
|
||||
#include "db_shash.h"
|
||||
#include "mp.h"
|
||||
#include "dbinc/db_shash.h"
|
||||
#include "dbinc/mp.h"
|
||||
|
||||
static int __mpool_init __P((DB_ENV *, DB_MPOOL *, int, int));
|
||||
#ifdef MUTEX_SYSTEM_RESOURCES
|
||||
#ifdef HAVE_MUTEX_SYSTEM_RESOURCES
|
||||
static size_t __mpool_region_maint __P((REGINFO *));
|
||||
#endif
|
||||
|
||||
|
|
@ -119,6 +119,8 @@ __memp_open(dbenv)
|
|||
|
||||
regids[i] = dbmp->reginfo[i].id;
|
||||
}
|
||||
|
||||
R_UNLOCK(dbenv, dbmp->reginfo);
|
||||
} else {
|
||||
/*
|
||||
* Determine how many regions there are going to be, allocate
|
||||
|
|
@ -135,6 +137,19 @@ __memp_open(dbenv)
|
|||
dbmp->reginfo[i].id = INVALID_REGION_ID;
|
||||
dbmp->reginfo[0] = reginfo;
|
||||
|
||||
/*
|
||||
* We have to unlock the primary mpool region before we attempt
|
||||
* to join the additional mpool regions. If we don't, we can
|
||||
* deadlock. The scenario is that we hold the primary mpool
|
||||
* region lock. We then try to attach to an additional mpool
|
||||
* region, which requires the acquisition/release of the main
|
||||
* region lock (to search the list of regions). If another
|
||||
* thread of control already holds the main region lock and is
|
||||
* waiting on our primary mpool region lock, we'll deadlock.
|
||||
* See [#4696] for more information.
|
||||
*/
|
||||
R_UNLOCK(dbenv, dbmp->reginfo);
|
||||
|
||||
/* Join remaining regions. */
|
||||
regids = R_ADDR(dbmp->reginfo, mp->regids);
|
||||
for (i = 1; i < dbmp->nreg; ++i) {
|
||||
|
|
@ -155,17 +170,10 @@ __memp_open(dbenv)
|
|||
R_ADDR(&dbmp->reginfo[i], dbmp->reginfo[i].rp->primary);
|
||||
|
||||
/* If the region is threaded, allocate a mutex to lock the handles. */
|
||||
if (F_ISSET(dbenv, DB_ENV_THREAD)) {
|
||||
if ((ret = __db_mutex_alloc(
|
||||
dbenv, dbmp->reginfo, &dbmp->mutexp)) != 0) {
|
||||
goto err;
|
||||
}
|
||||
if ((ret =
|
||||
__db_mutex_init(dbenv, dbmp->mutexp, 0, MUTEX_THREAD)) != 0)
|
||||
goto err;
|
||||
}
|
||||
|
||||
R_UNLOCK(dbenv, dbmp->reginfo);
|
||||
if (F_ISSET(dbenv, DB_ENV_THREAD) &&
|
||||
(ret = __db_mutex_setup(dbenv, dbmp->reginfo, &dbmp->mutexp,
|
||||
MUTEX_ALLOC | MUTEX_THREAD)) != 0)
|
||||
goto err;
|
||||
|
||||
dbenv->mp_handle = dbmp;
|
||||
return (0);
|
||||
|
|
@ -180,12 +188,11 @@ err: if (dbmp->reginfo != NULL && dbmp->reginfo[0].addr != NULL) {
|
|||
if (dbmp->reginfo[i].id != INVALID_REGION_ID)
|
||||
(void)__db_r_detach(
|
||||
dbenv, &dbmp->reginfo[i], 0);
|
||||
__os_free(dbmp->reginfo,
|
||||
dbmp->nreg * sizeof(*dbmp->reginfo));
|
||||
__os_free(dbenv, dbmp->reginfo);
|
||||
}
|
||||
if (dbmp->mutexp != NULL)
|
||||
__db_mutex_free(dbenv, dbmp->reginfo, dbmp->mutexp);
|
||||
__os_free(dbmp, sizeof(*dbmp));
|
||||
__os_free(dbenv, dbmp);
|
||||
return (ret);
|
||||
}
|
||||
|
||||
|
|
@ -199,13 +206,13 @@ __mpool_init(dbenv, dbmp, reginfo_off, htab_buckets)
|
|||
DB_MPOOL *dbmp;
|
||||
int reginfo_off, htab_buckets;
|
||||
{
|
||||
DB_HASHTAB *htab;
|
||||
DB_MPOOL_HASH *htab;
|
||||
MPOOL *mp;
|
||||
REGINFO *reginfo;
|
||||
#ifdef MUTEX_SYSTEM_RESOURCES
|
||||
#ifdef HAVE_MUTEX_SYSTEM_RESOURCES
|
||||
size_t maint_size;
|
||||
#endif
|
||||
int ret;
|
||||
int i, ret;
|
||||
void *p;
|
||||
|
||||
mp = NULL;
|
||||
|
|
@ -218,7 +225,7 @@ __mpool_init(dbenv, dbmp, reginfo_off, htab_buckets)
|
|||
mp = reginfo->primary;
|
||||
memset(mp, 0, sizeof(*mp));
|
||||
|
||||
#ifdef MUTEX_SYSTEM_RESOURCES
|
||||
#ifdef HAVE_MUTEX_SYSTEM_RESOURCES
|
||||
maint_size = __mpool_region_maint(reginfo);
|
||||
/* Allocate room for the maintenance info and initialize it. */
|
||||
if ((ret = __db_shalloc(reginfo->addr,
|
||||
|
|
@ -231,14 +238,7 @@ __mpool_init(dbenv, dbmp, reginfo_off, htab_buckets)
|
|||
if (reginfo_off == 0) {
|
||||
SH_TAILQ_INIT(&mp->mpfq);
|
||||
|
||||
if ((ret = __db_shmutex_init(dbenv, &mp->sync_mutex,
|
||||
R_OFFSET(dbmp->reginfo, &mp->sync_mutex) +
|
||||
DB_FCNTL_OFF_MPOOL, 0, dbmp->reginfo,
|
||||
(REGMAINT *)R_ADDR(dbmp->reginfo, mp->maint_off))) != 0)
|
||||
goto err;
|
||||
|
||||
ZERO_LSN(mp->lsn);
|
||||
mp->lsn_cnt = 0;
|
||||
|
||||
mp->nreg = dbmp->nreg;
|
||||
if ((ret = __db_shalloc(dbmp->reginfo[0].addr,
|
||||
|
|
@ -247,32 +247,41 @@ __mpool_init(dbenv, dbmp, reginfo_off, htab_buckets)
|
|||
mp->regids = R_OFFSET(dbmp->reginfo, p);
|
||||
}
|
||||
|
||||
SH_TAILQ_INIT(&mp->bhq);
|
||||
|
||||
/* Allocate hash table space and initialize it. */
|
||||
if ((ret = __db_shalloc(reginfo->addr,
|
||||
htab_buckets * sizeof(DB_HASHTAB), 0, &htab)) != 0)
|
||||
htab_buckets * sizeof(DB_MPOOL_HASH), 0, &htab)) != 0)
|
||||
goto mem_err;
|
||||
__db_hashinit(htab, htab_buckets);
|
||||
mp->htab = R_OFFSET(reginfo, htab);
|
||||
mp->htab_buckets = htab_buckets;
|
||||
for (i = 0; i < htab_buckets; i++) {
|
||||
if ((ret = __db_mutex_setup(dbenv,
|
||||
reginfo, &htab[i].hash_mutex,
|
||||
MUTEX_NO_RLOCK)) != 0)
|
||||
return (ret);
|
||||
SH_TAILQ_INIT(&htab[i].hash_bucket);
|
||||
htab[i].hash_page_dirty = htab[i].hash_priority = 0;
|
||||
}
|
||||
mp->htab_buckets = mp->stat.st_hash_buckets = htab_buckets;
|
||||
|
||||
/*
|
||||
* Only the environment creator knows the total cache size, fill in
|
||||
* those statistics now.
|
||||
*/
|
||||
mp->stat.st_gbytes = dbenv->mp_gbytes;
|
||||
mp->stat.st_bytes = dbenv->mp_bytes;
|
||||
return (0);
|
||||
|
||||
mem_err:__db_err(dbenv, "Unable to allocate memory for mpool region");
|
||||
err: if (reginfo->primary != NULL)
|
||||
__db_shalloc_free(reginfo->addr, reginfo->primary);
|
||||
return (ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* __memp_close --
|
||||
* Internal version of memp_close: only called from DB_ENV->close.
|
||||
* __memp_dbenv_refresh --
|
||||
* Clean up after the mpool system on a close or failed open.
|
||||
*
|
||||
* PUBLIC: int __memp_close __P((DB_ENV *));
|
||||
* PUBLIC: int __memp_dbenv_refresh __P((DB_ENV *));
|
||||
*/
|
||||
int
|
||||
__memp_close(dbenv)
|
||||
__memp_dbenv_refresh(dbenv)
|
||||
DB_ENV *dbenv;
|
||||
{
|
||||
DB_MPOOL *dbmp;
|
||||
|
|
@ -287,12 +296,12 @@ __memp_close(dbenv)
|
|||
/* Discard DB_MPREGs. */
|
||||
while ((mpreg = LIST_FIRST(&dbmp->dbregq)) != NULL) {
|
||||
LIST_REMOVE(mpreg, q);
|
||||
__os_free(mpreg, sizeof(DB_MPREG));
|
||||
__os_free(dbenv, mpreg);
|
||||
}
|
||||
|
||||
/* Discard DB_MPOOLFILEs. */
|
||||
while ((dbmfp = TAILQ_FIRST(&dbmp->dbmfq)) != NULL)
|
||||
if ((t_ret = memp_fclose(dbmfp)) != 0 && ret == 0)
|
||||
if ((t_ret = __memp_fclose_int(dbmfp, 0)) != 0 && ret == 0)
|
||||
ret = t_ret;
|
||||
|
||||
/* Discard the thread mutex. */
|
||||
|
|
@ -305,14 +314,14 @@ __memp_close(dbenv)
|
|||
dbenv, &dbmp->reginfo[i], 0)) != 0 && ret == 0)
|
||||
ret = t_ret;
|
||||
|
||||
__os_free(dbmp->reginfo, dbmp->nreg * sizeof(*dbmp->reginfo));
|
||||
__os_free(dbmp, sizeof(*dbmp));
|
||||
__os_free(dbenv, dbmp->reginfo);
|
||||
__os_free(dbenv, dbmp);
|
||||
|
||||
dbenv->mp_handle = NULL;
|
||||
return (ret);
|
||||
}
|
||||
|
||||
#ifdef MUTEX_SYSTEM_RESOURCES
|
||||
#ifdef HAVE_MUTEX_SYSTEM_RESOURCES
|
||||
/*
|
||||
* __mpool_region_maint --
|
||||
* Return the amount of space needed for region maintenance info.
|
||||
|
|
@ -328,9 +337,11 @@ __mpool_region_maint(infop)
|
|||
/*
|
||||
* For mutex maintenance we need one mutex per possible page.
|
||||
* Compute the maximum number of pages this cache can have.
|
||||
* Also add in an mpool mutex.
|
||||
* Also add in an mpool mutex and mutexes for all dbenv and db
|
||||
* handles.
|
||||
*/
|
||||
numlocks = ((infop->rp->size / DB_MIN_PGSIZE) + 1);
|
||||
numlocks += DB_MAX_HANDLES;
|
||||
s = sizeof(roff_t) * numlocks;
|
||||
return (s);
|
||||
}
|
||||
|
|
@ -347,11 +358,109 @@ __mpool_region_destroy(dbenv, infop)
|
|||
DB_ENV *dbenv;
|
||||
REGINFO *infop;
|
||||
{
|
||||
MPOOL *mp;
|
||||
__db_shlocks_destroy(infop, (REGMAINT *)R_ADDR(infop,
|
||||
((MPOOL *)R_ADDR(infop, infop->rp->primary))->maint_off));
|
||||
|
||||
COMPQUIET(dbenv, NULL);
|
||||
mp = R_ADDR(infop, infop->rp->primary);
|
||||
|
||||
__db_shlocks_destroy(infop, (REGMAINT *)R_ADDR(infop, mp->maint_off));
|
||||
return;
|
||||
COMPQUIET(infop, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* __memp_nameop
|
||||
* Remove or rename a file in the pool.
|
||||
*
|
||||
* PUBLIC: int __memp_nameop __P((DB_ENV *,
|
||||
* PUBLIC: u_int8_t *, const char *, const char *, const char *));
|
||||
*
|
||||
* XXX
|
||||
* Undocumented interface: DB private.
|
||||
*/
|
||||
int
|
||||
__memp_nameop(dbenv, fileid, newname, fullold, fullnew)
|
||||
DB_ENV *dbenv;
|
||||
u_int8_t *fileid;
|
||||
const char *newname, *fullold, *fullnew;
|
||||
{
|
||||
DB_MPOOL *dbmp;
|
||||
MPOOL *mp;
|
||||
MPOOLFILE *mfp;
|
||||
roff_t newname_off;
|
||||
int locked, ret;
|
||||
void *p;
|
||||
|
||||
locked = 0;
|
||||
dbmp = NULL;
|
||||
|
||||
if (!MPOOL_ON(dbenv))
|
||||
goto fsop;
|
||||
|
||||
dbmp = dbenv->mp_handle;
|
||||
mp = dbmp->reginfo[0].primary;
|
||||
|
||||
/*
|
||||
* Remove or rename a file that the mpool might know about. We assume
|
||||
* that the fop layer has the file locked for exclusive access, so we
|
||||
* don't worry about locking except for the mpool mutexes. Checkpoint
|
||||
* can happen at any time, independent of file locking, so we have to
|
||||
* do the actual unlink or rename system call to avoid any race.
|
||||
*
|
||||
* If this is a rename, allocate first, because we can't recursively
|
||||
* grab the region lock.
|
||||
*/
|
||||
if (newname == NULL)
|
||||
p = NULL;
|
||||
else {
|
||||
if ((ret = __memp_alloc(dbmp, dbmp->reginfo,
|
||||
NULL, strlen(newname) + 1, &newname_off, &p)) != 0)
|
||||
return (ret);
|
||||
memcpy(p, newname, strlen(newname) + 1);
|
||||
}
|
||||
|
||||
locked = 1;
|
||||
R_LOCK(dbenv, dbmp->reginfo);
|
||||
|
||||
/*
|
||||
* Find the file -- if mpool doesn't know about this file, that's not
|
||||
* an error-- we may not have it open.
|
||||
*/
|
||||
for (mfp = SH_TAILQ_FIRST(&mp->mpfq, __mpoolfile);
|
||||
mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) {
|
||||
/* Ignore non-active files. */
|
||||
if (F_ISSET(mfp, MP_DEADFILE | MP_TEMP))
|
||||
continue;
|
||||
|
||||
/* Ignore non-matching files. */
|
||||
if (memcmp(fileid, R_ADDR(
|
||||
dbmp->reginfo, mfp->fileid_off), DB_FILE_ID_LEN) != 0)
|
||||
continue;
|
||||
|
||||
/* If newname is NULL, we're removing the file. */
|
||||
if (newname == NULL) {
|
||||
MUTEX_LOCK(dbenv, &mfp->mutex);
|
||||
MPOOLFILE_IGNORE(mfp);
|
||||
MUTEX_UNLOCK(dbenv, &mfp->mutex);
|
||||
} else {
|
||||
/*
|
||||
* Else, it's a rename. We've allocated memory
|
||||
* for the new name. Swap it with the old one.
|
||||
*/
|
||||
p = R_ADDR(dbmp->reginfo, mfp->path_off);
|
||||
mfp->path_off = newname_off;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* Delete the memory we no longer need. */
|
||||
if (p != NULL)
|
||||
__db_shalloc_free(dbmp->reginfo[0].addr, p);
|
||||
|
||||
fsop: if (newname == NULL)
|
||||
(void)__os_unlink(dbenv, fullold);
|
||||
else
|
||||
(void)__os_rename(dbenv, fullold, fullnew, 1);
|
||||
|
||||
if (locked)
|
||||
R_UNLOCK(dbenv, dbmp->reginfo);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,38 +1,33 @@
|
|||
/*-
|
||||
* See the file LICENSE for redistribution information.
|
||||
*
|
||||
* Copyright (c) 1996, 1997, 1998, 1999, 2000
|
||||
* Copyright (c) 1996-2002
|
||||
* Sleepycat Software. All rights reserved.
|
||||
*/
|
||||
#include "db_config.h"
|
||||
|
||||
#ifndef lint
|
||||
static const char revid[] = "$Id: mp_register.c,v 11.12 2000/11/15 19:25:39 sue Exp $";
|
||||
static const char revid[] = "$Id: mp_register.c,v 11.21 2002/03/27 04:32:27 bostic Exp $";
|
||||
#endif /* not lint */
|
||||
|
||||
#ifndef NO_SYSTEM_INCLUDES
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_RPC
|
||||
#include "db_server.h"
|
||||
#endif
|
||||
|
||||
#include "db_int.h"
|
||||
#include "db_shash.h"
|
||||
#include "mp.h"
|
||||
|
||||
#ifdef HAVE_RPC
|
||||
#include "gen_client_ext.h"
|
||||
#include "rpc_client_ext.h"
|
||||
#endif
|
||||
#include "dbinc/db_shash.h"
|
||||
#include "dbinc/mp.h"
|
||||
|
||||
/*
|
||||
* memp_register --
|
||||
* Register a file type's pgin, pgout routines.
|
||||
*
|
||||
* PUBLIC: int __memp_register __P((DB_ENV *, int,
|
||||
* PUBLIC: int (*)(DB_ENV *, db_pgno_t, void *, DBT *),
|
||||
* PUBLIC: int (*)(DB_ENV *, db_pgno_t, void *, DBT *)));
|
||||
*/
|
||||
int
|
||||
memp_register(dbenv, ftype, pgin, pgout)
|
||||
__memp_register(dbenv, ftype, pgin, pgout)
|
||||
DB_ENV *dbenv;
|
||||
int ftype;
|
||||
int (*pgin) __P((DB_ENV *, db_pgno_t, void *, DBT *));
|
||||
|
|
@ -42,13 +37,9 @@ memp_register(dbenv, ftype, pgin, pgout)
|
|||
DB_MPREG *mpreg;
|
||||
int ret;
|
||||
|
||||
#ifdef HAVE_RPC
|
||||
if (F_ISSET(dbenv, DB_ENV_RPCCLIENT))
|
||||
return (__dbcl_memp_register(dbenv, ftype, pgin, pgout));
|
||||
#endif
|
||||
|
||||
PANIC_CHECK(dbenv);
|
||||
ENV_REQUIRES_CONFIG(dbenv, dbenv->mp_handle, DB_INIT_MPOOL);
|
||||
ENV_REQUIRES_CONFIG(dbenv,
|
||||
dbenv->mp_handle, "DB_ENV->memp_register", DB_INIT_MPOOL);
|
||||
|
||||
dbmp = dbenv->mp_handle;
|
||||
|
||||
|
|
@ -70,7 +61,7 @@ memp_register(dbenv, ftype, pgin, pgout)
|
|||
return (0);
|
||||
|
||||
/* New entry. */
|
||||
if ((ret = __os_malloc(dbenv, sizeof(DB_MPREG), NULL, &mpreg)) != 0)
|
||||
if ((ret = __os_malloc(dbenv, sizeof(DB_MPREG), &mpreg)) != 0)
|
||||
return (ret);
|
||||
|
||||
mpreg->ftype = ftype;
|
||||
|
|
|
|||
327
bdb/mp/mp_stat.c
327
bdb/mp/mp_stat.c
|
|
@ -1,13 +1,13 @@
|
|||
/*-
|
||||
* See the file LICENSE for redistribution information.
|
||||
*
|
||||
* Copyright (c) 1996, 1997, 1998, 1999, 2000
|
||||
* Copyright (c) 1996-2002
|
||||
* Sleepycat Software. All rights reserved.
|
||||
*/
|
||||
#include "db_config.h"
|
||||
|
||||
#ifndef lint
|
||||
static const char revid[] = "$Id: mp_stat.c,v 11.21 2001/01/09 16:59:30 bostic Exp $";
|
||||
static const char revid[] = "$Id: mp_stat.c,v 11.51 2002/08/06 06:13:47 bostic Exp $";
|
||||
#endif /* not lint */
|
||||
|
||||
#ifndef NO_SYSTEM_INCLUDES
|
||||
|
|
@ -18,123 +18,150 @@ static const char revid[] = "$Id: mp_stat.c,v 11.21 2001/01/09 16:59:30 bostic E
|
|||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_RPC
|
||||
#include "db_server.h"
|
||||
#endif
|
||||
|
||||
#include "db_int.h"
|
||||
#include "db_page.h"
|
||||
#include "db_shash.h"
|
||||
#include "db_am.h"
|
||||
#include "mp.h"
|
||||
#include "dbinc/db_page.h"
|
||||
#include "dbinc/db_shash.h"
|
||||
#include "dbinc/db_am.h"
|
||||
#include "dbinc/mp.h"
|
||||
|
||||
#ifdef HAVE_RPC
|
||||
#include "gen_client_ext.h"
|
||||
#include "rpc_client_ext.h"
|
||||
#endif
|
||||
|
||||
static void __memp_dumpcache
|
||||
__P((DB_MPOOL *, REGINFO *, size_t *, FILE *, u_int32_t));
|
||||
static void __memp_dumpcache __P((DB_ENV *,
|
||||
DB_MPOOL *, REGINFO *, size_t *, FILE *, u_int32_t));
|
||||
static void __memp_pbh __P((DB_MPOOL *, BH *, size_t *, FILE *));
|
||||
static void __memp_stat_wait __P((REGINFO *, MPOOL *, DB_MPOOL_STAT *, int));
|
||||
|
||||
/*
|
||||
* memp_stat --
|
||||
* __memp_stat --
|
||||
* Display MPOOL statistics.
|
||||
*
|
||||
* PUBLIC: int __memp_stat
|
||||
* PUBLIC: __P((DB_ENV *, DB_MPOOL_STAT **, DB_MPOOL_FSTAT ***, u_int32_t));
|
||||
*/
|
||||
int
|
||||
memp_stat(dbenv, gspp, fspp, db_malloc)
|
||||
__memp_stat(dbenv, gspp, fspp, flags)
|
||||
DB_ENV *dbenv;
|
||||
DB_MPOOL_STAT **gspp;
|
||||
DB_MPOOL_FSTAT ***fspp;
|
||||
void *(*db_malloc) __P((size_t));
|
||||
u_int32_t flags;
|
||||
{
|
||||
DB_MPOOL *dbmp;
|
||||
DB_MPOOL_FSTAT **tfsp, *tstruct;
|
||||
DB_MPOOL_STAT *sp;
|
||||
MPOOL *c_mp, *mp;
|
||||
MPOOLFILE *mfp;
|
||||
char *tname;
|
||||
size_t len, nlen;
|
||||
u_int32_t i;
|
||||
size_t len, nlen, pagesize;
|
||||
u_int32_t pages, i;
|
||||
int ret;
|
||||
char *name;
|
||||
|
||||
#ifdef HAVE_RPC
|
||||
if (F_ISSET(dbenv, DB_ENV_RPCCLIENT))
|
||||
return (__dbcl_memp_stat(dbenv, gspp, fspp, db_malloc));
|
||||
#endif
|
||||
char *name, *tname;
|
||||
|
||||
PANIC_CHECK(dbenv);
|
||||
ENV_REQUIRES_CONFIG(dbenv, dbenv->mp_handle, DB_INIT_MPOOL);
|
||||
ENV_REQUIRES_CONFIG(dbenv,
|
||||
dbenv->mp_handle, "memp_stat", DB_INIT_MPOOL);
|
||||
|
||||
if ((ret = __db_fchk(dbenv,
|
||||
"DB_ENV->memp_stat", flags, DB_STAT_CLEAR)) != 0)
|
||||
return (ret);
|
||||
|
||||
dbmp = dbenv->mp_handle;
|
||||
sp = NULL;
|
||||
mp = dbmp->reginfo[0].primary;
|
||||
|
||||
/* Global statistics. */
|
||||
mp = dbmp->reginfo[0].primary;
|
||||
if (gspp != NULL) {
|
||||
*gspp = NULL;
|
||||
|
||||
if ((ret = __os_calloc(dbenv, 1, sizeof(**gspp), gspp)) != 0)
|
||||
if ((ret = __os_umalloc(dbenv, sizeof(**gspp), gspp)) != 0)
|
||||
return (ret);
|
||||
memset(*gspp, 0, sizeof(**gspp));
|
||||
sp = *gspp;
|
||||
|
||||
/*
|
||||
* Initialization and information that is not maintained on
|
||||
* a per-cache basis.
|
||||
*/
|
||||
sp->st_hash_longest = 0;
|
||||
sp->st_region_wait = dbmp->reginfo[0].rp->mutex.mutex_set_wait;
|
||||
sp->st_region_nowait =
|
||||
dbmp->reginfo[0].rp->mutex.mutex_set_nowait;
|
||||
sp->st_gbytes = dbenv->mp_gbytes;
|
||||
sp->st_bytes = dbenv->mp_bytes;
|
||||
c_mp = dbmp->reginfo[0].primary;
|
||||
sp->st_gbytes = c_mp->stat.st_gbytes;
|
||||
sp->st_bytes = c_mp->stat.st_bytes;
|
||||
sp->st_ncache = dbmp->nreg;
|
||||
sp->st_regsize = dbmp->reginfo[0].rp->size;
|
||||
|
||||
R_LOCK(dbenv, dbmp->reginfo);
|
||||
|
||||
/* Walk the cache list and accumulate the global information. */
|
||||
for (i = 0; i < mp->nreg; ++i) {
|
||||
c_mp = dbmp->reginfo[i].primary;
|
||||
|
||||
sp->st_map += c_mp->stat.st_map;
|
||||
sp->st_cache_hit += c_mp->stat.st_cache_hit;
|
||||
sp->st_cache_miss += c_mp->stat.st_cache_miss;
|
||||
sp->st_map += c_mp->stat.st_map;
|
||||
sp->st_page_create += c_mp->stat.st_page_create;
|
||||
sp->st_page_in += c_mp->stat.st_page_in;
|
||||
sp->st_page_out += c_mp->stat.st_page_out;
|
||||
sp->st_ro_evict += c_mp->stat.st_ro_evict;
|
||||
sp->st_rw_evict += c_mp->stat.st_rw_evict;
|
||||
sp->st_page_trickle += c_mp->stat.st_page_trickle;
|
||||
sp->st_pages += c_mp->stat.st_pages;
|
||||
/*
|
||||
* st_page_dirty calculated by __memp_stat_hash
|
||||
* st_page_clean calculated here
|
||||
*/
|
||||
__memp_stat_hash(
|
||||
&dbmp->reginfo[i], c_mp, &sp->st_page_dirty);
|
||||
sp->st_page_clean = sp->st_pages - sp->st_page_dirty;
|
||||
sp->st_hash_buckets += c_mp->stat.st_hash_buckets;
|
||||
sp->st_hash_searches += c_mp->stat.st_hash_searches;
|
||||
if (c_mp->stat.st_hash_longest > sp->st_hash_longest)
|
||||
sp->st_hash_longest =
|
||||
c_mp->stat.st_hash_longest;
|
||||
sp->st_hash_longest += c_mp->stat.st_hash_longest;
|
||||
sp->st_hash_examined += c_mp->stat.st_hash_examined;
|
||||
sp->st_page_clean += c_mp->stat.st_page_clean;
|
||||
sp->st_page_dirty += c_mp->stat.st_page_dirty;
|
||||
sp->st_page_trickle += c_mp->stat.st_page_trickle;
|
||||
sp->st_region_wait += c_mp->stat.st_region_wait;
|
||||
sp->st_region_nowait += c_mp->stat.st_region_nowait;
|
||||
/*
|
||||
* st_hash_nowait calculated by __memp_stat_wait
|
||||
* st_hash_wait
|
||||
*/
|
||||
__memp_stat_wait(&dbmp->reginfo[i], c_mp, sp, flags);
|
||||
sp->st_region_nowait +=
|
||||
dbmp->reginfo[i].rp->mutex.mutex_set_nowait;
|
||||
sp->st_region_wait +=
|
||||
dbmp->reginfo[i].rp->mutex.mutex_set_wait;
|
||||
sp->st_alloc += c_mp->stat.st_alloc;
|
||||
sp->st_alloc_buckets += c_mp->stat.st_alloc_buckets;
|
||||
if (sp->st_alloc_max_buckets <
|
||||
c_mp->stat.st_alloc_max_buckets)
|
||||
sp->st_alloc_max_buckets =
|
||||
c_mp->stat.st_alloc_max_buckets;
|
||||
sp->st_alloc_pages += c_mp->stat.st_alloc_pages;
|
||||
if (sp->st_alloc_max_pages <
|
||||
c_mp->stat.st_alloc_max_pages)
|
||||
sp->st_alloc_max_pages =
|
||||
c_mp->stat.st_alloc_max_pages;
|
||||
|
||||
if (LF_ISSET(DB_STAT_CLEAR)) {
|
||||
dbmp->reginfo[i].rp->mutex.mutex_set_wait = 0;
|
||||
dbmp->reginfo[i].rp->mutex.mutex_set_nowait = 0;
|
||||
pages = c_mp->stat.st_pages;
|
||||
memset(&c_mp->stat, 0, sizeof(c_mp->stat));
|
||||
c_mp->stat.st_hash_buckets = c_mp->htab_buckets;
|
||||
c_mp->stat.st_pages = pages;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We have duplicate statistics fields in the cache and
|
||||
* per-file structures. The counters are only incremented
|
||||
* in the per-file structures, though. The intent is that
|
||||
* if we ever flush files from the pool we can save their
|
||||
* last known totals in the cache structure.
|
||||
* We have duplicate statistics fields in per-file structures
|
||||
* and the cache. The counters are only incremented in the
|
||||
* per-file structures, except if a file is flushed from the
|
||||
* mpool, at which time we copy its information into the cache
|
||||
* statistics. We added the cache information above, now we
|
||||
* add the per-file information.
|
||||
*/
|
||||
R_LOCK(dbenv, dbmp->reginfo);
|
||||
for (mfp = SH_TAILQ_FIRST(&mp->mpfq, __mpoolfile);
|
||||
mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) {
|
||||
sp->st_map += mfp->stat.st_map;
|
||||
sp->st_cache_hit += mfp->stat.st_cache_hit;
|
||||
sp->st_cache_miss += mfp->stat.st_cache_miss;
|
||||
sp->st_map += mfp->stat.st_map;
|
||||
sp->st_page_create += mfp->stat.st_page_create;
|
||||
sp->st_page_in += mfp->stat.st_page_in;
|
||||
sp->st_page_out += mfp->stat.st_page_out;
|
||||
if (fspp == NULL && LF_ISSET(DB_STAT_CLEAR)) {
|
||||
pagesize = mfp->stat.st_pagesize;
|
||||
memset(&mfp->stat, 0, sizeof(mfp->stat));
|
||||
mfp->stat.st_pagesize = pagesize;
|
||||
}
|
||||
}
|
||||
|
||||
R_UNLOCK(dbenv, dbmp->reginfo);
|
||||
}
|
||||
|
||||
|
|
@ -142,9 +169,8 @@ memp_stat(dbenv, gspp, fspp, db_malloc)
|
|||
if (fspp != NULL) {
|
||||
*fspp = NULL;
|
||||
|
||||
R_LOCK(dbenv, dbmp->reginfo);
|
||||
|
||||
/* Count the MPOOLFILE structures. */
|
||||
R_LOCK(dbenv, dbmp->reginfo);
|
||||
for (i = 0, len = 0,
|
||||
mfp = SH_TAILQ_FIRST(&mp->mpfq, __mpoolfile);
|
||||
mfp != NULL;
|
||||
|
|
@ -153,18 +179,15 @@ memp_stat(dbenv, gspp, fspp, db_malloc)
|
|||
sizeof(DB_MPOOL_FSTAT) +
|
||||
strlen(__memp_fns(dbmp, mfp)) + 1;
|
||||
len += sizeof(DB_MPOOL_FSTAT *); /* Trailing NULL */
|
||||
|
||||
R_UNLOCK(dbenv, dbmp->reginfo);
|
||||
|
||||
if (len == 0)
|
||||
if (i == 0)
|
||||
return (0);
|
||||
|
||||
/* Allocate space */
|
||||
if ((ret = __os_malloc(dbenv, len, db_malloc, fspp)) != 0)
|
||||
if ((ret = __os_umalloc(dbenv, len, fspp)) != 0)
|
||||
return (ret);
|
||||
|
||||
R_LOCK(dbenv, dbmp->reginfo);
|
||||
|
||||
/*
|
||||
* Build each individual entry. We assume that an array of
|
||||
* pointers are aligned correctly to be followed by an array
|
||||
|
|
@ -179,20 +202,30 @@ memp_stat(dbenv, gspp, fspp, db_malloc)
|
|||
tstruct = (DB_MPOOL_FSTAT *)(tfsp + i + 1);
|
||||
tname = (char *)(tstruct + i);
|
||||
|
||||
/*
|
||||
* Files may have been opened since we counted, don't walk
|
||||
* off the end of the allocated space.
|
||||
*/
|
||||
R_LOCK(dbenv, dbmp->reginfo);
|
||||
for (mfp = SH_TAILQ_FIRST(&mp->mpfq, __mpoolfile);
|
||||
mfp != NULL;
|
||||
mfp != NULL && i-- > 0;
|
||||
++tfsp, ++tstruct, tname += nlen,
|
||||
mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) {
|
||||
name = __memp_fns(dbmp, mfp);
|
||||
nlen = strlen(name) + 1;
|
||||
*tfsp = tstruct;
|
||||
*tstruct = mfp->stat;
|
||||
if (LF_ISSET(DB_STAT_CLEAR)) {
|
||||
pagesize = mfp->stat.st_pagesize;
|
||||
memset(&mfp->stat, 0, sizeof(mfp->stat));
|
||||
mfp->stat.st_pagesize = pagesize;
|
||||
}
|
||||
tstruct->file_name = tname;
|
||||
memcpy(tname, name, nlen);
|
||||
}
|
||||
*tfsp = NULL;
|
||||
|
||||
R_UNLOCK(dbenv, dbmp->reginfo);
|
||||
|
||||
*tfsp = NULL;
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
|
@ -200,7 +233,6 @@ memp_stat(dbenv, gspp, fspp, db_malloc)
|
|||
#define FMAP_ENTRIES 200 /* Files we map. */
|
||||
|
||||
#define MPOOL_DUMP_HASH 0x01 /* Debug hash chains. */
|
||||
#define MPOOL_DUMP_LRU 0x02 /* Debug LRU chains. */
|
||||
#define MPOOL_DUMP_MEM 0x04 /* Debug region memory. */
|
||||
#define MPOOL_DUMP_ALL 0x07 /* Debug all. */
|
||||
|
||||
|
|
@ -208,14 +240,23 @@ memp_stat(dbenv, gspp, fspp, db_malloc)
|
|||
* __memp_dump_region --
|
||||
* Display MPOOL structures.
|
||||
*
|
||||
* PUBLIC: void __memp_dump_region __P((DB_ENV *, char *, FILE *));
|
||||
* PUBLIC: int __memp_dump_region __P((DB_ENV *, char *, FILE *));
|
||||
*/
|
||||
void
|
||||
int
|
||||
__memp_dump_region(dbenv, area, fp)
|
||||
DB_ENV *dbenv;
|
||||
char *area;
|
||||
FILE *fp;
|
||||
{
|
||||
static const FN fn[] = {
|
||||
{ MP_CAN_MMAP, "mmapped" },
|
||||
{ MP_DEADFILE, "dead" },
|
||||
{ MP_DIRECT, "no buffer" },
|
||||
{ MP_EXTENT, "extent" },
|
||||
{ MP_TEMP, "temporary" },
|
||||
{ MP_UNLINK, "unlink" },
|
||||
{ 0, NULL }
|
||||
};
|
||||
DB_MPOOL *dbmp;
|
||||
DB_MPOOLFILE *dbmfp;
|
||||
MPOOL *mp;
|
||||
|
|
@ -225,6 +266,10 @@ __memp_dump_region(dbenv, area, fp)
|
|||
int cnt;
|
||||
u_int8_t *p;
|
||||
|
||||
PANIC_CHECK(dbenv);
|
||||
ENV_REQUIRES_CONFIG(dbenv,
|
||||
dbenv->mp_handle, "memp_dump_region", DB_INIT_MPOOL);
|
||||
|
||||
dbmp = dbenv->mp_handle;
|
||||
|
||||
/* Make it easy to call from the debugger. */
|
||||
|
|
@ -239,40 +284,42 @@ __memp_dump_region(dbenv, area, fp)
|
|||
case 'h':
|
||||
LF_SET(MPOOL_DUMP_HASH);
|
||||
break;
|
||||
case 'l':
|
||||
LF_SET(MPOOL_DUMP_LRU);
|
||||
break;
|
||||
case 'm':
|
||||
LF_SET(MPOOL_DUMP_MEM);
|
||||
break;
|
||||
}
|
||||
|
||||
R_LOCK(dbenv, dbmp->reginfo);
|
||||
|
||||
mp = dbmp->reginfo[0].primary;
|
||||
|
||||
/* Display MPOOL structures. */
|
||||
(void)fprintf(fp, "%s\nPool (region addr 0x%lx)\n",
|
||||
DB_LINE, (u_long)dbmp->reginfo[0].addr);
|
||||
DB_LINE, P_TO_ULONG(dbmp->reginfo[0].addr));
|
||||
|
||||
/* Display the MPOOLFILE structures. */
|
||||
cnt = 0;
|
||||
for (mfp = SH_TAILQ_FIRST(&mp->mpfq, __mpoolfile);
|
||||
R_LOCK(dbenv, dbmp->reginfo);
|
||||
for (cnt = 0, mfp = SH_TAILQ_FIRST(&mp->mpfq, __mpoolfile);
|
||||
mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile), ++cnt) {
|
||||
(void)fprintf(fp, "File #%d: %s: type %ld, %s\n\t [UID: ",
|
||||
cnt + 1, __memp_fns(dbmp, mfp), (long)mfp->ftype,
|
||||
F_ISSET(mfp, MP_CAN_MMAP) ? "mmap" : "read/write");
|
||||
(void)fprintf(fp, "File #%d: %s: pagesize %lu\n", cnt + 1,
|
||||
__memp_fns(dbmp, mfp), (u_long)mfp->stat.st_pagesize);
|
||||
(void)fprintf(fp, "\t type %ld; ref %lu; blocks %lu; last %lu;",
|
||||
(long)mfp->ftype, (u_long)mfp->mpf_cnt,
|
||||
(u_long)mfp->block_cnt, (u_long)mfp->last_pgno);
|
||||
__db_prflags(mfp->flags, fn, fp);
|
||||
|
||||
(void)fprintf(fp, "\n\t UID: ");
|
||||
p = R_ADDR(dbmp->reginfo, mfp->fileid_off);
|
||||
for (i = 0; i < DB_FILE_ID_LEN; ++i) {
|
||||
(void)fprintf(fp, "%x", *p++);
|
||||
for (i = 0; i < DB_FILE_ID_LEN; ++i, ++p) {
|
||||
(void)fprintf(fp, "%x", (u_int)*p);
|
||||
if (i < DB_FILE_ID_LEN - 1)
|
||||
(void)fprintf(fp, " ");
|
||||
}
|
||||
(void)fprintf(fp, "]\n");
|
||||
(void)fprintf(fp, "\n");
|
||||
if (cnt < FMAP_ENTRIES)
|
||||
fmap[cnt] = R_OFFSET(dbmp->reginfo, mfp);
|
||||
}
|
||||
R_UNLOCK(dbenv, dbmp->reginfo);
|
||||
|
||||
MUTEX_THREAD_LOCK(dbenv, dbmp->mutexp);
|
||||
for (dbmfp = TAILQ_FIRST(&dbmp->dbmfq);
|
||||
dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q), ++cnt) {
|
||||
(void)fprintf(fp, "File #%d: %s: per-process, %s\n",
|
||||
|
|
@ -281,6 +328,7 @@ __memp_dump_region(dbenv, area, fp)
|
|||
if (cnt < FMAP_ENTRIES)
|
||||
fmap[cnt] = R_OFFSET(dbmp->reginfo, mfp);
|
||||
}
|
||||
MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp);
|
||||
if (cnt < FMAP_ENTRIES)
|
||||
fmap[cnt] = INVALID_ROFF;
|
||||
else
|
||||
|
|
@ -289,13 +337,14 @@ __memp_dump_region(dbenv, area, fp)
|
|||
/* Dump the memory pools. */
|
||||
for (i = 0; i < mp->nreg; ++i) {
|
||||
(void)fprintf(fp, "%s\nCache #%d:\n", DB_LINE, i + 1);
|
||||
__memp_dumpcache(dbmp, &dbmp->reginfo[i], fmap, fp, flags);
|
||||
__memp_dumpcache(
|
||||
dbenv, dbmp, &dbmp->reginfo[i], fmap, fp, flags);
|
||||
}
|
||||
|
||||
R_UNLOCK(dbenv, dbmp->reginfo);
|
||||
|
||||
/* Flush in case we're debugging. */
|
||||
(void)fflush(fp);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -303,7 +352,8 @@ __memp_dump_region(dbenv, area, fp)
|
|||
* Display statistics for a cache.
|
||||
*/
|
||||
static void
|
||||
__memp_dumpcache(dbmp, reginfo, fmap, fp, flags)
|
||||
__memp_dumpcache(dbenv, dbmp, reginfo, fmap, fp, flags)
|
||||
DB_ENV *dbenv;
|
||||
DB_MPOOL *dbmp;
|
||||
REGINFO *reginfo;
|
||||
size_t *fmap;
|
||||
|
|
@ -311,7 +361,7 @@ __memp_dumpcache(dbmp, reginfo, fmap, fp, flags)
|
|||
u_int32_t flags;
|
||||
{
|
||||
BH *bhp;
|
||||
DB_HASHTAB *dbht;
|
||||
DB_MPOOL_HASH *hp;
|
||||
MPOOL *c_mp;
|
||||
int bucket;
|
||||
|
||||
|
|
@ -320,25 +370,22 @@ __memp_dumpcache(dbmp, reginfo, fmap, fp, flags)
|
|||
/* Display the hash table list of BH's. */
|
||||
if (LF_ISSET(MPOOL_DUMP_HASH)) {
|
||||
(void)fprintf(fp,
|
||||
"%s\nBH hash table (%lu hash slots)\npageno, file, ref, address\n",
|
||||
"%s\nBH hash table (%lu hash slots)\nbucket (priority):\n",
|
||||
DB_LINE, (u_long)c_mp->htab_buckets);
|
||||
for (dbht = R_ADDR(reginfo, c_mp->htab),
|
||||
bucket = 0; bucket < c_mp->htab_buckets; ++dbht, ++bucket) {
|
||||
if (SH_TAILQ_FIRST(dbht, __bh) != NULL)
|
||||
(void)fprintf(fp, "%lu:\n", (u_long)bucket);
|
||||
for (bhp = SH_TAILQ_FIRST(dbht, __bh);
|
||||
bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
|
||||
__memp_pbh(dbmp, bhp, fmap, fp);
|
||||
}
|
||||
}
|
||||
(void)fprintf(fp,
|
||||
"\tpageno, file, ref, address [LSN] priority\n");
|
||||
|
||||
/* Display the LRU list of BH's. */
|
||||
if (LF_ISSET(MPOOL_DUMP_LRU)) {
|
||||
(void)fprintf(fp, "%s\nBH LRU list\n", DB_LINE);
|
||||
(void)fprintf(fp, "pageno, file, ref, address\n");
|
||||
for (bhp = SH_TAILQ_FIRST(&c_mp->bhq, __bh);
|
||||
bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh))
|
||||
__memp_pbh(dbmp, bhp, fmap, fp);
|
||||
for (hp = R_ADDR(reginfo, c_mp->htab),
|
||||
bucket = 0; bucket < c_mp->htab_buckets; ++hp, ++bucket) {
|
||||
MUTEX_LOCK(dbenv, &hp->hash_mutex);
|
||||
if ((bhp =
|
||||
SH_TAILQ_FIRST(&hp->hash_bucket, __bh)) != NULL)
|
||||
(void)fprintf(fp, "%lu (%u):\n",
|
||||
(u_long)bucket, hp->hash_priority);
|
||||
for (; bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
|
||||
__memp_pbh(dbmp, bhp, fmap, fp);
|
||||
MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
|
||||
}
|
||||
}
|
||||
|
||||
/* Dump the memory pool. */
|
||||
|
|
@ -360,10 +407,9 @@ __memp_pbh(dbmp, bhp, fmap, fp)
|
|||
static const FN fn[] = {
|
||||
{ BH_CALLPGIN, "callpgin" },
|
||||
{ BH_DIRTY, "dirty" },
|
||||
{ BH_DIRTY_CREATE, "created" },
|
||||
{ BH_DISCARD, "discard" },
|
||||
{ BH_LOCKED, "locked" },
|
||||
{ BH_SYNC, "sync" },
|
||||
{ BH_SYNC_LOGFLSH, "sync:logflush" },
|
||||
{ BH_TRASH, "trash" },
|
||||
{ 0, NULL }
|
||||
};
|
||||
|
|
@ -374,15 +420,72 @@ __memp_pbh(dbmp, bhp, fmap, fp)
|
|||
break;
|
||||
|
||||
if (fmap[i] == INVALID_ROFF)
|
||||
(void)fprintf(fp, " %4lu, %lu, %2lu, %lu",
|
||||
(void)fprintf(fp, "\t%5lu, %lu, %2lu, %8lu [%lu,%lu] %lu",
|
||||
(u_long)bhp->pgno, (u_long)bhp->mf_offset,
|
||||
(u_long)bhp->ref, (u_long)R_OFFSET(dbmp->reginfo, bhp));
|
||||
(u_long)bhp->ref, (u_long)R_OFFSET(dbmp->reginfo, bhp),
|
||||
(u_long)LSN(bhp->buf).file, (u_long)LSN(bhp->buf).offset,
|
||||
(u_long)bhp->priority);
|
||||
else
|
||||
(void)fprintf(fp, " %4lu, #%d, %2lu, %lu",
|
||||
(void)fprintf(fp, "\t%5lu, #%d, %2lu, %8lu [%lu,%lu] %lu",
|
||||
(u_long)bhp->pgno, i + 1,
|
||||
(u_long)bhp->ref, (u_long)R_OFFSET(dbmp->reginfo, bhp));
|
||||
(u_long)bhp->ref, (u_long)R_OFFSET(dbmp->reginfo, bhp),
|
||||
(u_long)LSN(bhp->buf).file, (u_long)LSN(bhp->buf).offset,
|
||||
(u_long)bhp->priority);
|
||||
|
||||
__db_prflags(bhp->flags, fn, fp);
|
||||
|
||||
(void)fprintf(fp, "\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* __memp_stat_hash --
|
||||
* Total hash bucket stats (other than mutex wait) into the region.
|
||||
*
|
||||
* PUBLIC: void __memp_stat_hash __P((REGINFO *, MPOOL *, u_int32_t *));
|
||||
*/
|
||||
void
|
||||
__memp_stat_hash(reginfo, mp, dirtyp)
|
||||
REGINFO *reginfo;
|
||||
MPOOL *mp;
|
||||
u_int32_t *dirtyp;
|
||||
{
|
||||
DB_MPOOL_HASH *hp;
|
||||
u_int32_t dirty;
|
||||
int i;
|
||||
|
||||
hp = R_ADDR(reginfo, mp->htab);
|
||||
for (i = 0, dirty = 0; i < mp->htab_buckets; i++, hp++)
|
||||
dirty += hp->hash_page_dirty;
|
||||
*dirtyp = dirty;
|
||||
}
|
||||
|
||||
/*
|
||||
* __memp_stat_wait --
|
||||
* Total hash bucket wait stats into the region.
|
||||
*/
|
||||
static void
|
||||
__memp_stat_wait(reginfo, mp, mstat, flags)
|
||||
REGINFO *reginfo;
|
||||
MPOOL *mp;
|
||||
DB_MPOOL_STAT *mstat;
|
||||
int flags;
|
||||
{
|
||||
DB_MPOOL_HASH *hp;
|
||||
DB_MUTEX *mutexp;
|
||||
int i;
|
||||
|
||||
mstat->st_hash_max_wait = 0;
|
||||
hp = R_ADDR(reginfo, mp->htab);
|
||||
for (i = 0; i < mp->htab_buckets; i++, hp++) {
|
||||
mutexp = &hp->hash_mutex;
|
||||
mstat->st_hash_nowait += mutexp->mutex_set_nowait;
|
||||
mstat->st_hash_wait += mutexp->mutex_set_wait;
|
||||
if (mutexp->mutex_set_wait > mstat->st_hash_max_wait)
|
||||
mstat->st_hash_max_wait = mutexp->mutex_set_wait;
|
||||
|
||||
if (LF_ISSET(DB_STAT_CLEAR)) {
|
||||
mutexp->mutex_set_wait = 0;
|
||||
mutexp->mutex_set_nowait = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
957
bdb/mp/mp_sync.c
957
bdb/mp/mp_sync.c
File diff suppressed because it is too large
Load diff
|
|
@ -1,13 +1,13 @@
|
|||
/*-
|
||||
* See the file LICENSE for redistribution information.
|
||||
*
|
||||
* Copyright (c) 1996, 1997, 1998, 1999, 2000
|
||||
* Copyright (c) 1996-2002
|
||||
* Sleepycat Software. All rights reserved.
|
||||
*/
|
||||
#include "db_config.h"
|
||||
|
||||
#ifndef lint
|
||||
static const char revid[] = "$Id: mp_trickle.c,v 11.12 2000/11/30 00:58:41 ubell Exp $";
|
||||
static const char revid[] = "$Id: mp_trickle.c,v 11.24 2002/08/06 06:13:53 bostic Exp $";
|
||||
#endif /* not lint */
|
||||
|
||||
#ifndef NO_SYSTEM_INCLUDES
|
||||
|
|
@ -16,42 +16,29 @@ static const char revid[] = "$Id: mp_trickle.c,v 11.12 2000/11/30 00:58:41 ubell
|
|||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_RPC
|
||||
#include "db_server.h"
|
||||
#endif
|
||||
|
||||
#include "db_int.h"
|
||||
#include "db_shash.h"
|
||||
#include "mp.h"
|
||||
|
||||
#ifdef HAVE_RPC
|
||||
#include "gen_client_ext.h"
|
||||
#include "rpc_client_ext.h"
|
||||
#endif
|
||||
|
||||
static int __memp_trick __P((DB_ENV *, int, int, int *));
|
||||
#include "dbinc/db_shash.h"
|
||||
#include "dbinc/mp.h"
|
||||
|
||||
/*
|
||||
* memp_trickle --
|
||||
* __memp_trickle --
|
||||
* Keep a specified percentage of the buffers clean.
|
||||
*
|
||||
* PUBLIC: int __memp_trickle __P((DB_ENV *, int, int *));
|
||||
*/
|
||||
int
|
||||
memp_trickle(dbenv, pct, nwrotep)
|
||||
__memp_trickle(dbenv, pct, nwrotep)
|
||||
DB_ENV *dbenv;
|
||||
int pct, *nwrotep;
|
||||
{
|
||||
DB_MPOOL *dbmp;
|
||||
MPOOL *mp;
|
||||
u_int32_t i;
|
||||
int ret;
|
||||
|
||||
#ifdef HAVE_RPC
|
||||
if (F_ISSET(dbenv, DB_ENV_RPCCLIENT))
|
||||
return (__dbcl_memp_trickle(dbenv, pct, nwrotep));
|
||||
#endif
|
||||
MPOOL *c_mp, *mp;
|
||||
u_int32_t clean, dirty, i, total, dtmp;
|
||||
int ret, wrote;
|
||||
|
||||
PANIC_CHECK(dbenv);
|
||||
ENV_REQUIRES_CONFIG(dbenv, dbenv->mp_handle, DB_INIT_MPOOL);
|
||||
ENV_REQUIRES_CONFIG(dbenv,
|
||||
dbenv->mp_handle, "memp_trickle", DB_INIT_MPOOL);
|
||||
|
||||
dbmp = dbenv->mp_handle;
|
||||
mp = dbmp->reginfo[0].primary;
|
||||
|
|
@ -62,88 +49,35 @@ memp_trickle(dbenv, pct, nwrotep)
|
|||
if (pct < 1 || pct > 100)
|
||||
return (EINVAL);
|
||||
|
||||
R_LOCK(dbenv, dbmp->reginfo);
|
||||
|
||||
/* Loop through the caches... */
|
||||
for (ret = 0, i = 0; i < mp->nreg; ++i)
|
||||
if ((ret = __memp_trick(dbenv, i, pct, nwrotep)) != 0)
|
||||
break;
|
||||
|
||||
R_UNLOCK(dbenv, dbmp->reginfo);
|
||||
return (ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* __memp_trick --
|
||||
* Trickle a single cache.
|
||||
*/
|
||||
static int
|
||||
__memp_trick(dbenv, ncache, pct, nwrotep)
|
||||
DB_ENV *dbenv;
|
||||
int ncache, pct, *nwrotep;
|
||||
{
|
||||
BH *bhp;
|
||||
DB_MPOOL *dbmp;
|
||||
MPOOL *c_mp;
|
||||
MPOOLFILE *mfp;
|
||||
db_pgno_t pgno;
|
||||
u_long total;
|
||||
int ret, wrote;
|
||||
|
||||
dbmp = dbenv->mp_handle;
|
||||
c_mp = dbmp->reginfo[ncache].primary;
|
||||
|
||||
/*
|
||||
* If there are sufficient clean buffers, or no buffers or no dirty
|
||||
* If there are sufficient clean buffers, no buffers or no dirty
|
||||
* buffers, we're done.
|
||||
*
|
||||
* XXX
|
||||
* Using st_page_clean and st_page_dirty is our only choice at the
|
||||
* moment, but it's not as correct as we might like in the presence
|
||||
* of pools with more than one buffer size, as a free 512-byte buffer
|
||||
* isn't the same as a free 8K buffer.
|
||||
* Using hash_page_dirty is our only choice at the moment, but it's not
|
||||
* as correct as we might like in the presence of pools having more
|
||||
* than one page size, as a free 512B buffer isn't the same as a free
|
||||
* 8KB buffer.
|
||||
*
|
||||
* Loop through the caches counting total/dirty buffers.
|
||||
*/
|
||||
loop: total = c_mp->stat.st_page_clean + c_mp->stat.st_page_dirty;
|
||||
if (total == 0 || c_mp->stat.st_page_dirty == 0 ||
|
||||
(c_mp->stat.st_page_clean * 100) / total >= (u_long)pct)
|
||||
return (0);
|
||||
|
||||
/* Loop until we write a buffer. */
|
||||
for (bhp = SH_TAILQ_FIRST(&c_mp->bhq, __bh);
|
||||
bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh)) {
|
||||
if (bhp->ref != 0 ||
|
||||
!F_ISSET(bhp, BH_DIRTY) || F_ISSET(bhp, BH_LOCKED))
|
||||
continue;
|
||||
|
||||
mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset);
|
||||
|
||||
/*
|
||||
* We can't write to temporary files -- see the comment in
|
||||
* mp_bh.c:__memp_bhwrite().
|
||||
*/
|
||||
if (F_ISSET(mfp, MP_TEMP))
|
||||
continue;
|
||||
|
||||
pgno = bhp->pgno;
|
||||
if ((ret = __memp_bhwrite(dbmp, mfp, bhp, NULL, &wrote)) != 0)
|
||||
return (ret);
|
||||
|
||||
/*
|
||||
* Any process syncing the shared memory buffer pool had better
|
||||
* be able to write to any underlying file. Be understanding,
|
||||
* but firm, on this point.
|
||||
*/
|
||||
if (!wrote) {
|
||||
__db_err(dbenv, "%s: unable to flush page: %lu",
|
||||
__memp_fns(dbmp, mfp), (u_long)pgno);
|
||||
return (EPERM);
|
||||
}
|
||||
|
||||
++c_mp->stat.st_page_trickle;
|
||||
if (nwrotep != NULL)
|
||||
++*nwrotep;
|
||||
goto loop;
|
||||
for (ret = 0, i = dirty = total = 0; i < mp->nreg; ++i) {
|
||||
c_mp = dbmp->reginfo[i].primary;
|
||||
total += c_mp->stat.st_pages;
|
||||
__memp_stat_hash(&dbmp->reginfo[i], c_mp, &dtmp);
|
||||
dirty += dtmp;
|
||||
}
|
||||
|
||||
return (0);
|
||||
clean = total - dirty;
|
||||
if (clean == total || (clean * 100) / total >= (u_long)pct)
|
||||
return (0);
|
||||
|
||||
if (nwrotep == NULL)
|
||||
nwrotep = &wrote;
|
||||
ret = __memp_sync_int(dbenv, NULL,
|
||||
((total * pct) / 100) - clean, DB_SYNC_TRICKLE, nwrotep);
|
||||
|
||||
mp->stat.st_page_trickle += *nwrotep;
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue