mariadb/bdb/btree/bt_split.c
unknown 155e78f014 BDB 4.1.24
BitKeeper/deleted/.del-ex_access.wpj~3df6ae8c99bf7c5f:
  Delete: bdb/build_vxworks/ex_access/ex_access.wpj
BitKeeper/deleted/.del-ex_btrec.wpj~a7622f1c6f432dc6:
  Delete: bdb/build_vxworks/ex_btrec/ex_btrec.wpj
BitKeeper/deleted/.del-ex_dbclient.wpj~7345440f3b204cdd:
  Delete: bdb/build_vxworks/ex_dbclient/ex_dbclient.wpj
BitKeeper/deleted/.del-ex_env.wpj~fbe1ab10b04e8b74:
  Delete: bdb/build_vxworks/ex_env/ex_env.wpj
BitKeeper/deleted/.del-ex_mpool.wpj~4479cfd5c45f327d:
  Delete: bdb/build_vxworks/ex_mpool/ex_mpool.wpj
BitKeeper/deleted/.del-ex_tpcb.wpj~f78093006e14bf41:
  Delete: bdb/build_vxworks/ex_tpcb/ex_tpcb.wpj
BitKeeper/deleted/.del-db_buildall.dsp~bd749ff6da11682:
  Delete: bdb/build_win32/db_buildall.dsp
BitKeeper/deleted/.del-cxx_app.cpp~ad8df8e0791011ed:
  Delete: bdb/cxx/cxx_app.cpp
BitKeeper/deleted/.del-cxx_log.cpp~a50ff3118fe06952:
  Delete: bdb/cxx/cxx_log.cpp
BitKeeper/deleted/.del-cxx_table.cpp~ecd751e79b055556:
  Delete: bdb/cxx/cxx_table.cpp
BitKeeper/deleted/.del-namemap.txt~796a3acd3885d8fd:
  Delete: bdb/cxx/namemap.txt
BitKeeper/deleted/.del-Design.fileop~3ca4da68f1727373:
  Delete: bdb/db/Design.fileop
BitKeeper/deleted/.del-db185_int.h~61bee3736e7959ef:
  Delete: bdb/db185/db185_int.h
BitKeeper/deleted/.del-acconfig.h~411e8854d67ad8b5:
  Delete: bdb/dist/acconfig.h
BitKeeper/deleted/.del-mutex.m4~a13383cde18a64e1:
  Delete: bdb/dist/aclocal/mutex.m4
BitKeeper/deleted/.del-options.m4~b9d0ca637213750a:
  Delete: bdb/dist/aclocal/options.m4
BitKeeper/deleted/.del-programs.m4~3ce7890b47732b30:
  Delete: bdb/dist/aclocal/programs.m4
BitKeeper/deleted/.del-tcl.m4~f944e2db93c3b6db:
  Delete: bdb/dist/aclocal/tcl.m4
BitKeeper/deleted/.del-types.m4~59cae158c9a32cff:
  Delete: bdb/dist/aclocal/types.m4
BitKeeper/deleted/.del-script~d38f6d3a4f159cb4:
  Delete: bdb/dist/build/script
BitKeeper/deleted/.del-configure.in~ac795a92c8fe049c:
  Delete: bdb/dist/configure.in
BitKeeper/deleted/.del-ltconfig~66bbd007d8024af:
  Delete: bdb/dist/ltconfig
BitKeeper/deleted/.del-rec_ctemp~a28554362534f00a:
  Delete: bdb/dist/rec_ctemp
BitKeeper/deleted/.del-s_tcl~2ffe4326459fcd9f:
  Delete: bdb/dist/s_tcl
BitKeeper/deleted/.del-.IGNORE_ME~d8148b08fa7d5d15:
  Delete: bdb/dist/template/.IGNORE_ME
BitKeeper/deleted/.del-btree.h~179f2aefec1753d:
  Delete: bdb/include/btree.h
BitKeeper/deleted/.del-cxx_int.h~6b649c04766508f8:
  Delete: bdb/include/cxx_int.h
BitKeeper/deleted/.del-db.src~6b433ae615b16a8d:
  Delete: bdb/include/db.src
BitKeeper/deleted/.del-db_185.h~ad8b373d9391d35c:
  Delete: bdb/include/db_185.h
BitKeeper/deleted/.del-db_am.h~a714912b6b75932f:
  Delete: bdb/include/db_am.h
BitKeeper/deleted/.del-db_cxx.h~fcafadf45f5d19e9:
  Delete: bdb/include/db_cxx.h
BitKeeper/deleted/.del-db_dispatch.h~6844f20f7eb46904:
  Delete: bdb/include/db_dispatch.h
BitKeeper/deleted/.del-db_int.src~419a3f48b6a01da7:
  Delete: bdb/include/db_int.src
BitKeeper/deleted/.del-db_join.h~76f9747a42c3399a:
  Delete: bdb/include/db_join.h
BitKeeper/deleted/.del-db_page.h~e302ca3a4db3abdc:
  Delete: bdb/include/db_page.h
BitKeeper/deleted/.del-db_server_int.h~e1d20b6ba3bca1ab:
  Delete: bdb/include/db_server_int.h
BitKeeper/deleted/.del-db_shash.h~5fbf2d696fac90f3:
  Delete: bdb/include/db_shash.h
BitKeeper/deleted/.del-db_swap.h~1e60887550864a59:
  Delete: bdb/include/db_swap.h
BitKeeper/deleted/.del-db_upgrade.h~c644eee73701fc8d:
  Delete: bdb/include/db_upgrade.h
BitKeeper/deleted/.del-db_verify.h~b8d6c297c61f342e:
  Delete: bdb/include/db_verify.h
BitKeeper/deleted/.del-debug.h~dc2b4f2cf27ccebc:
  Delete: bdb/include/debug.h
BitKeeper/deleted/.del-hash.h~2aaa548b28882dfb:
  Delete: bdb/include/hash.h
BitKeeper/deleted/.del-lock.h~a761c1b7de57b77f:
  Delete: bdb/include/lock.h
BitKeeper/deleted/.del-log.h~ff20184238e35e4d:
  Delete: bdb/include/log.h
BitKeeper/deleted/.del-mp.h~7e317597622f3411:
  Delete: bdb/include/mp.h
BitKeeper/deleted/.del-mutex.h~d3ae7a2977a68137:
  Delete: bdb/include/mutex.h
BitKeeper/deleted/.del-os.h~91867cc8757cd0e3:
  Delete: bdb/include/os.h
BitKeeper/deleted/.del-os_jump.h~e1b939fa5151d4be:
  Delete: bdb/include/os_jump.h
BitKeeper/deleted/.del-qam.h~6fad0c1b5723d597:
  Delete: bdb/include/qam.h
BitKeeper/deleted/.del-queue.h~4c72c0826c123d5:
  Delete: bdb/include/queue.h
BitKeeper/deleted/.del-region.h~513fe04d977ca0fc:
  Delete: bdb/include/region.h
BitKeeper/deleted/.del-shqueue.h~525fc3e6c2025c36:
  Delete: bdb/include/shqueue.h
BitKeeper/deleted/.del-tcl_db.h~c536fd61a844f23f:
  Delete: bdb/include/tcl_db.h
BitKeeper/deleted/.del-txn.h~c8d94b221ec147e4:
  Delete: bdb/include/txn.h
BitKeeper/deleted/.del-xa.h~ecc466493aae9d9a:
  Delete: bdb/include/xa.h
BitKeeper/deleted/.del-DbRecoveryInit.java~756b52601a0b9023:
  Delete: bdb/java/src/com/sleepycat/db/DbRecoveryInit.java
BitKeeper/deleted/.del-DbTxnRecover.java~74607cba7ab89d6d:
  Delete: bdb/java/src/com/sleepycat/db/DbTxnRecover.java
BitKeeper/deleted/.del-lock_conflict.c~fc5e0f14cf597a2b:
  Delete: bdb/lock/lock_conflict.c
BitKeeper/deleted/.del-log.src~53ac9e7b5cb023f2:
  Delete: bdb/log/log.src
BitKeeper/deleted/.del-log_findckp.c~24287f008916e81f:
  Delete: bdb/log/log_findckp.c
BitKeeper/deleted/.del-log_rec.c~d51711f2cac09297:
  Delete: bdb/log/log_rec.c
BitKeeper/deleted/.del-log_register.c~b40bb4efac75ca15:
  Delete: bdb/log/log_register.c
BitKeeper/deleted/.del-Design~b3d0f179f2767b:
  Delete: bdb/mp/Design
BitKeeper/deleted/.del-os_finit.c~95dbefc6fe79b26c:
  Delete: bdb/os/os_finit.c
BitKeeper/deleted/.del-os_abs.c~df95d1e7db81924:
  Delete: bdb/os_vxworks/os_abs.c
BitKeeper/deleted/.del-os_finit.c~803b484bdb9d0122:
  Delete: bdb/os_vxworks/os_finit.c
BitKeeper/deleted/.del-os_map.c~3a6d7926398b76d3:
  Delete: bdb/os_vxworks/os_map.c
BitKeeper/deleted/.del-os_finit.c~19a227c6d3c78ad:
  Delete: bdb/os_win32/os_finit.c
BitKeeper/deleted/.del-log-corruption.patch~1cf2ecc7c6408d5d:
  Delete: bdb/patches/log-corruption.patch
BitKeeper/deleted/.del-Btree.pm~af6d0c5eaed4a98e:
  Delete: bdb/perl.BerkeleyDB/BerkeleyDB/Btree.pm
BitKeeper/deleted/.del-BerkeleyDB.pm~7244036d4482643:
  Delete: bdb/perl.BerkeleyDB/BerkeleyDB.pm
BitKeeper/deleted/.del-BerkeleyDB.pod~e7b18fd6132448e3:
  Delete: bdb/perl.BerkeleyDB/BerkeleyDB.pod
BitKeeper/deleted/.del-Hash.pm~10292a26c06a5c95:
  Delete: bdb/perl.BerkeleyDB/BerkeleyDB/Hash.pm
BitKeeper/deleted/.del-BerkeleyDB.pod.P~79f76a1495eda203:
  Delete: bdb/perl.BerkeleyDB/BerkeleyDB.pod.P
BitKeeper/deleted/.del-BerkeleyDB.xs~80c99afbd98e392c:
  Delete: bdb/perl.BerkeleyDB/BerkeleyDB.xs
BitKeeper/deleted/.del-Changes~729c1891efa60de9:
  Delete: bdb/perl.BerkeleyDB/Changes
BitKeeper/deleted/.del-MANIFEST~63a1e34aecf157a0:
  Delete: bdb/perl.BerkeleyDB/MANIFEST
BitKeeper/deleted/.del-Makefile.PL~c68797707d8df87a:
  Delete: bdb/perl.BerkeleyDB/Makefile.PL
BitKeeper/deleted/.del-README~5f2f579b1a241407:
  Delete: bdb/perl.BerkeleyDB/README
BitKeeper/deleted/.del-Todo~dca3c66c193adda9:
  Delete: bdb/perl.BerkeleyDB/Todo
BitKeeper/deleted/.del-config.in~ae81681e450e0999:
  Delete: bdb/perl.BerkeleyDB/config.in
BitKeeper/deleted/.del-dbinfo~28ad67d83be4f68e:
  Delete: bdb/perl.BerkeleyDB/dbinfo
BitKeeper/deleted/.del-mkconsts~543ab60669c7a04e:
  Delete: bdb/perl.BerkeleyDB/mkconsts
BitKeeper/deleted/.del-mkpod~182c0ca54e439afb:
  Delete: bdb/perl.BerkeleyDB/mkpod
BitKeeper/deleted/.del-5.004~e008cb5a48805543:
  Delete: bdb/perl.BerkeleyDB/patches/5.004
BitKeeper/deleted/.del-irix_6_5.pl~61662bb08afcdec8:
  Delete: bdb/perl.BerkeleyDB/hints/irix_6_5.pl
BitKeeper/deleted/.del-solaris.pl~6771e7182394e152:
  Delete: bdb/perl.BerkeleyDB/hints/solaris.pl
BitKeeper/deleted/.del-typemap~783b8f5295b05f3d:
  Delete: bdb/perl.BerkeleyDB/typemap
BitKeeper/deleted/.del-5.004_01~6081ce2fff7b0bc:
  Delete: bdb/perl.BerkeleyDB/patches/5.004_01
BitKeeper/deleted/.del-5.004_02~87214eac35ad9e6:
  Delete: bdb/perl.BerkeleyDB/patches/5.004_02
BitKeeper/deleted/.del-5.004_03~9a672becec7cb40f:
  Delete: bdb/perl.BerkeleyDB/patches/5.004_03
BitKeeper/deleted/.del-5.004_04~e326cb51af09d154:
  Delete: bdb/perl.BerkeleyDB/patches/5.004_04
BitKeeper/deleted/.del-5.004_05~7ab457a1e41a92fe:
  Delete: bdb/perl.BerkeleyDB/patches/5.004_05
BitKeeper/deleted/.del-5.005~f9e2d59b5964cd4b:
  Delete: bdb/perl.BerkeleyDB/patches/5.005
BitKeeper/deleted/.del-5.005_01~3eb9fb7b5842ea8e:
  Delete: bdb/perl.BerkeleyDB/patches/5.005_01
BitKeeper/deleted/.del-5.005_02~67477ce0bef717cb:
  Delete: bdb/perl.BerkeleyDB/patches/5.005_02
BitKeeper/deleted/.del-5.005_03~c4c29a1fb21e290a:
  Delete: bdb/perl.BerkeleyDB/patches/5.005_03
BitKeeper/deleted/.del-5.6.0~e1fb9897d124ee22:
  Delete: bdb/perl.BerkeleyDB/patches/5.6.0
BitKeeper/deleted/.del-btree.t~e4a1a3c675ddc406:
  Delete: bdb/perl.BerkeleyDB/t/btree.t
BitKeeper/deleted/.del-db-3.0.t~d2c60991d84558f2:
  Delete: bdb/perl.BerkeleyDB/t/db-3.0.t
BitKeeper/deleted/.del-db-3.1.t~6ee88cd13f55e018:
  Delete: bdb/perl.BerkeleyDB/t/db-3.1.t
BitKeeper/deleted/.del-db-3.2.t~f73b6461f98fd1cf:
  Delete: bdb/perl.BerkeleyDB/t/db-3.2.t
BitKeeper/deleted/.del-destroy.t~cc6a2ae1980a2ecd:
  Delete: bdb/perl.BerkeleyDB/t/destroy.t
BitKeeper/deleted/.del-env.t~a8604a4499c4bd07:
  Delete: bdb/perl.BerkeleyDB/t/env.t
BitKeeper/deleted/.del-examples.t~2571b77c3cc75574:
  Delete: bdb/perl.BerkeleyDB/t/examples.t
BitKeeper/deleted/.del-examples.t.T~8228bdd75ac78b88:
  Delete: bdb/perl.BerkeleyDB/t/examples.t.T
BitKeeper/deleted/.del-examples3.t.T~66a186897a87026d:
  Delete: bdb/perl.BerkeleyDB/t/examples3.t.T
BitKeeper/deleted/.del-examples3.t~fe3822ba2f2d7f83:
  Delete: bdb/perl.BerkeleyDB/t/examples3.t
BitKeeper/deleted/.del-filter.t~f87b045c1b708637:
  Delete: bdb/perl.BerkeleyDB/t/filter.t
BitKeeper/deleted/.del-hash.t~616bfb4d644de3a3:
  Delete: bdb/perl.BerkeleyDB/t/hash.t
BitKeeper/deleted/.del-join.t~29fc39f74a83ca22:
  Delete: bdb/perl.BerkeleyDB/t/join.t
BitKeeper/deleted/.del-mldbm.t~31f5015341eea040:
  Delete: bdb/perl.BerkeleyDB/t/mldbm.t
BitKeeper/deleted/.del-queue.t~8f338034ce44a641:
  Delete: bdb/perl.BerkeleyDB/t/queue.t
BitKeeper/deleted/.del-recno.t~d4ddbd3743add63e:
  Delete: bdb/perl.BerkeleyDB/t/recno.t
BitKeeper/deleted/.del-strict.t~6885cdd2ea71ca2d:
  Delete: bdb/perl.BerkeleyDB/t/strict.t
BitKeeper/deleted/.del-subdb.t~aab62a5d5864c603:
  Delete: bdb/perl.BerkeleyDB/t/subdb.t
BitKeeper/deleted/.del-txn.t~65033b8558ae1216:
  Delete: bdb/perl.BerkeleyDB/t/txn.t
BitKeeper/deleted/.del-unknown.t~f3710458682665e1:
  Delete: bdb/perl.BerkeleyDB/t/unknown.t
BitKeeper/deleted/.del-Changes~436f74a5c414c65b:
  Delete: bdb/perl.DB_File/Changes
BitKeeper/deleted/.del-DB_File.pm~ae0951c6c7665a82:
  Delete: bdb/perl.DB_File/DB_File.pm
BitKeeper/deleted/.del-DB_File.xs~89e49a0b5556f1d8:
  Delete: bdb/perl.DB_File/DB_File.xs
BitKeeper/deleted/.del-DB_File_BS~290fad5dbbb87069:
  Delete: bdb/perl.DB_File/DB_File_BS
BitKeeper/deleted/.del-MANIFEST~90ee581572bdd4ac:
  Delete: bdb/perl.DB_File/MANIFEST
BitKeeper/deleted/.del-Makefile.PL~ac0567bb5a377e38:
  Delete: bdb/perl.DB_File/Makefile.PL
BitKeeper/deleted/.del-README~77e924a5a9bae6b3:
  Delete: bdb/perl.DB_File/README
BitKeeper/deleted/.del-config.in~ab4c2792b86a810b:
  Delete: bdb/perl.DB_File/config.in
BitKeeper/deleted/.del-dbinfo~461c43b30fab2cb:
  Delete: bdb/perl.DB_File/dbinfo
BitKeeper/deleted/.del-dynixptx.pl~50dcddfae25d17e9:
  Delete: bdb/perl.DB_File/hints/dynixptx.pl
BitKeeper/deleted/.del-typemap~55cffb3288a9e587:
  Delete: bdb/perl.DB_File/typemap
BitKeeper/deleted/.del-version.c~a4df0e646f8b3975:
  Delete: bdb/perl.DB_File/version.c
BitKeeper/deleted/.del-5.004_01~d6830d0082702af7:
  Delete: bdb/perl.DB_File/patches/5.004_01
BitKeeper/deleted/.del-5.004_02~78b082dc80c91031:
  Delete: bdb/perl.DB_File/patches/5.004_02
BitKeeper/deleted/.del-5.004~4411ec2e3c9e008b:
  Delete: bdb/perl.DB_File/patches/5.004
BitKeeper/deleted/.del-sco.pl~1e795fe14fe4dcfe:
  Delete: bdb/perl.DB_File/hints/sco.pl
BitKeeper/deleted/.del-5.004_03~33f274648b160d95:
  Delete: bdb/perl.DB_File/patches/5.004_03
BitKeeper/deleted/.del-5.004_04~8f3d1b3cf18bb20a:
  Delete: bdb/perl.DB_File/patches/5.004_04
BitKeeper/deleted/.del-5.004_05~9c0f02e7331e142:
  Delete: bdb/perl.DB_File/patches/5.004_05
BitKeeper/deleted/.del-5.005~c2108cb2e3c8d951:
  Delete: bdb/perl.DB_File/patches/5.005
BitKeeper/deleted/.del-5.005_01~3b45e9673afc4cfa:
  Delete: bdb/perl.DB_File/patches/5.005_01
BitKeeper/deleted/.del-5.005_02~9fe5766bb02a4522:
  Delete: bdb/perl.DB_File/patches/5.005_02
BitKeeper/deleted/.del-5.005_03~ffa1c38c19ae72ea:
  Delete: bdb/perl.DB_File/patches/5.005_03
BitKeeper/deleted/.del-5.6.0~373be3a5ce47be85:
  Delete: bdb/perl.DB_File/patches/5.6.0
BitKeeper/deleted/.del-db-btree.t~3231595a1c241eb3:
  Delete: bdb/perl.DB_File/t/db-btree.t
BitKeeper/deleted/.del-db-hash.t~7c4ad0c795c7fad2:
  Delete: bdb/perl.DB_File/t/db-hash.t
BitKeeper/deleted/.del-db-recno.t~6c2d3d80b9ba4a50:
  Delete: bdb/perl.DB_File/t/db-recno.t
BitKeeper/deleted/.del-db_server.sed~cdb00ebcd48a64e2:
  Delete: bdb/rpc_server/db_server.sed
BitKeeper/deleted/.del-db_server_proc.c~d46c8f409c3747f4:
  Delete: bdb/rpc_server/db_server_proc.c
BitKeeper/deleted/.del-db_server_svc.sed~3f5e59f334fa4607:
  Delete: bdb/rpc_server/db_server_svc.sed
BitKeeper/deleted/.del-db_server_util.c~a809f3a4629acda:
  Delete: bdb/rpc_server/db_server_util.c
BitKeeper/deleted/.del-log.tcl~ff1b41f1355b97d7:
  Delete: bdb/test/log.tcl
BitKeeper/deleted/.del-mpool.tcl~b0df4dc1b04db26c:
  Delete: bdb/test/mpool.tcl
BitKeeper/deleted/.del-mutex.tcl~52fd5c73a150565:
  Delete: bdb/test/mutex.tcl
BitKeeper/deleted/.del-txn.tcl~c4ff071550b5446e:
  Delete: bdb/test/txn.tcl
BitKeeper/deleted/.del-README~e800a12a5392010a:
  Delete: bdb/test/upgrade/README
BitKeeper/deleted/.del-pack-2.6.6.pl~89d5076d758d3e98:
  Delete: bdb/test/upgrade/generate-2.X/pack-2.6.6.pl
BitKeeper/deleted/.del-test-2.6.patch~4a52dc83d447547b:
  Delete: bdb/test/upgrade/generate-2.X/test-2.6.patch
2002-10-30 15:57:05 +04:00

1177 lines
35 KiB
C

/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996-2002
* Sleepycat Software. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995, 1996
* Keith Bostic. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "db_config.h"
#ifndef lint
static const char revid[] = "$Id: bt_split.c,v 11.58 2002/07/03 19:03:50 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <limits.h>
#include <string.h>
#endif
#include "db_int.h"
#include "dbinc/db_page.h"
#include "dbinc/db_shash.h"
#include "dbinc/lock.h"
#include "dbinc/btree.h"
static int __bam_broot __P((DBC *, PAGE *, PAGE *, PAGE *));
static int __bam_page __P((DBC *, EPG *, EPG *));
static int __bam_pinsert __P((DBC *, EPG *, PAGE *, PAGE *, int));
static int __bam_psplit __P((DBC *, EPG *, PAGE *, PAGE *, db_indx_t *));
static int __bam_root __P((DBC *, EPG *));
static int __ram_root __P((DBC *, PAGE *, PAGE *, PAGE *));
/*
* __bam_split --
* Split a page.
*
* PUBLIC: int __bam_split __P((DBC *, void *, db_pgno_t *));
*/
int
__bam_split(dbc, arg, root_pgnop)
DBC *dbc;
void *arg;
db_pgno_t *root_pgnop;
{
BTREE_CURSOR *cp;
enum { UP, DOWN } dir;
db_pgno_t root_pgno;
int exact, level, ret;
cp = (BTREE_CURSOR *)dbc->internal;
root_pgno = cp->root;
/*
* The locking protocol we use to avoid deadlock to acquire locks by
* walking down the tree, but we do it as lazily as possible, locking
* the root only as a last resort. We expect all stack pages to have
* been discarded before we're called; we discard all short-term locks.
*
* When __bam_split is first called, we know that a leaf page was too
* full for an insert. We don't know what leaf page it was, but we
* have the key/recno that caused the problem. We call XX_search to
* reacquire the leaf page, but this time get both the leaf page and
* its parent, locked. We then split the leaf page and see if the new
* internal key will fit into the parent page. If it will, we're done.
*
* If it won't, we discard our current locks and repeat the process,
* only this time acquiring the parent page and its parent, locked.
* This process repeats until we succeed in the split, splitting the
* root page as the final resort. The entire process then repeats,
* as necessary, until we split a leaf page.
*
* XXX
* A traditional method of speeding this up is to maintain a stack of
* the pages traversed in the original search. You can detect if the
* stack is correct by storing the page's LSN when it was searched and
* comparing that LSN with the current one when it's locked during the
* split. This would be an easy change for this code, but I have no
* numbers that indicate it's worthwhile.
*/
for (dir = UP, level = LEAFLEVEL;; dir == UP ? ++level : --level) {
/*
* Acquire a page and its parent, locked.
*/
if ((ret = (dbc->dbtype == DB_BTREE ?
__bam_search(dbc, PGNO_INVALID,
arg, S_WRPAIR, level, NULL, &exact) :
__bam_rsearch(dbc,
(db_recno_t *)arg, S_WRPAIR, level, &exact))) != 0)
return (ret);
if (root_pgnop != NULL)
*root_pgnop = cp->csp[0].page->pgno == root_pgno ?
root_pgno : cp->csp[-1].page->pgno;
/*
* Split the page if it still needs it (it's possible another
* thread of control has already split the page). If we are
* guaranteed that two items will fit on the page, the split
* is no longer necessary.
*/
if (2 * B_MAXSIZEONPAGE(cp->ovflsize)
<= (db_indx_t)P_FREESPACE(dbc->dbp, cp->csp[0].page)) {
__bam_stkrel(dbc, STK_NOLOCK);
return (0);
}
ret = cp->csp[0].page->pgno == root_pgno ?
__bam_root(dbc, &cp->csp[0]) :
__bam_page(dbc, &cp->csp[-1], &cp->csp[0]);
BT_STK_CLR(cp);
switch (ret) {
case 0:
/* Once we've split the leaf page, we're done. */
if (level == LEAFLEVEL)
return (0);
/* Switch directions. */
if (dir == UP)
dir = DOWN;
break;
case DB_NEEDSPLIT:
/*
* It's possible to fail to split repeatedly, as other
* threads may be modifying the tree, or the page usage
* is sufficiently bad that we don't get enough space
* the first time.
*/
if (dir == DOWN)
dir = UP;
break;
default:
return (ret);
}
}
/* NOTREACHED */
}
/*
* __bam_root --
* Split the root page of a btree.
*/
static int
__bam_root(dbc, cp)
DBC *dbc;
EPG *cp;
{
DB *dbp;
DBT log_dbt;
DB_LSN log_lsn;
DB_MPOOLFILE *mpf;
PAGE *lp, *rp;
db_indx_t split;
u_int32_t opflags;
int ret;
dbp = dbc->dbp;
mpf = dbp->mpf;
/* Yeah, right. */
if (cp->page->level >= MAXBTREELEVEL) {
__db_err(dbp->dbenv,
"Too many btree levels: %d", cp->page->level);
ret = ENOSPC;
goto err;
}
/* Create new left and right pages for the split. */
lp = rp = NULL;
if ((ret = __db_new(dbc, TYPE(cp->page), &lp)) != 0 ||
(ret = __db_new(dbc, TYPE(cp->page), &rp)) != 0)
goto err;
P_INIT(lp, dbp->pgsize, lp->pgno,
PGNO_INVALID, ISINTERNAL(cp->page) ? PGNO_INVALID : rp->pgno,
cp->page->level, TYPE(cp->page));
P_INIT(rp, dbp->pgsize, rp->pgno,
ISINTERNAL(cp->page) ? PGNO_INVALID : lp->pgno, PGNO_INVALID,
cp->page->level, TYPE(cp->page));
/* Split the page. */
if ((ret = __bam_psplit(dbc, cp, lp, rp, &split)) != 0)
goto err;
/* Log the change. */
if (DBC_LOGGING(dbc)) {
memset(&log_dbt, 0, sizeof(log_dbt));
log_dbt.data = cp->page;
log_dbt.size = dbp->pgsize;
ZERO_LSN(log_lsn);
opflags = F_ISSET(
(BTREE_CURSOR *)dbc->internal, C_RECNUM) ? SPL_NRECS : 0;
if ((ret = __bam_split_log(dbp,
dbc->txn, &LSN(cp->page), 0, PGNO(lp), &LSN(lp), PGNO(rp),
&LSN(rp), (u_int32_t)NUM_ENT(lp), 0, &log_lsn,
dbc->internal->root, &log_dbt, opflags)) != 0)
goto err;
} else
LSN_NOT_LOGGED(LSN(cp->page));
LSN(lp) = LSN(cp->page);
LSN(rp) = LSN(cp->page);
/* Clean up the new root page. */
if ((ret = (dbc->dbtype == DB_RECNO ?
__ram_root(dbc, cp->page, lp, rp) :
__bam_broot(dbc, cp->page, lp, rp))) != 0)
goto err;
/* Adjust any cursors. */
if ((ret = __bam_ca_split(dbc,
cp->page->pgno, lp->pgno, rp->pgno, split, 1)) != 0)
goto err;
/* Success -- write the real pages back to the store. */
(void)mpf->put(mpf, cp->page, DB_MPOOL_DIRTY);
(void)__TLPUT(dbc, cp->lock);
(void)mpf->put(mpf, lp, DB_MPOOL_DIRTY);
(void)mpf->put(mpf, rp, DB_MPOOL_DIRTY);
return (0);
err: if (lp != NULL)
(void)mpf->put(mpf, lp, 0);
if (rp != NULL)
(void)mpf->put(mpf, rp, 0);
(void)mpf->put(mpf, cp->page, 0);
(void)__TLPUT(dbc, cp->lock);
return (ret);
}
/*
* __bam_page --
* Split the non-root page of a btree.
*/
static int
__bam_page(dbc, pp, cp)
DBC *dbc;
EPG *pp, *cp;
{
BTREE_CURSOR *bc;
DBT log_dbt;
DB_LSN log_lsn;
DB *dbp;
DB_LOCK rplock, tplock;
DB_MPOOLFILE *mpf;
DB_LSN save_lsn;
PAGE *lp, *rp, *alloc_rp, *tp;
db_indx_t split;
u_int32_t opflags;
int ret, t_ret;
dbp = dbc->dbp;
mpf = dbp->mpf;
alloc_rp = lp = rp = tp = NULL;
LOCK_INIT(rplock);
LOCK_INIT(tplock);
ret = -1;
/*
* Create a new right page for the split, and fill in everything
* except its LSN and page number.
*
* We malloc space for both the left and right pages, so we don't get
* a new page from the underlying buffer pool until we know the split
* is going to succeed. The reason is that we can't release locks
* acquired during the get-a-new-page process because metadata page
* locks can't be discarded on failure since we may have modified the
* free list. So, if you assume that we're holding a write lock on the
* leaf page which ran out of space and started this split (e.g., we
* have already written records to the page, or we retrieved a record
* from it with the DB_RMW flag set), failing in a split with both a
* leaf page locked and the metadata page locked can potentially lock
* up the tree badly, because we've violated the rule of always locking
* down the tree, and never up.
*/
if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, &rp)) != 0)
goto err;
P_INIT(rp, dbp->pgsize, 0,
ISINTERNAL(cp->page) ? PGNO_INVALID : PGNO(cp->page),
ISINTERNAL(cp->page) ? PGNO_INVALID : NEXT_PGNO(cp->page),
cp->page->level, TYPE(cp->page));
/*
* Create new left page for the split, and fill in everything
* except its LSN and next-page page number.
*/
if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, &lp)) != 0)
goto err;
P_INIT(lp, dbp->pgsize, PGNO(cp->page),
ISINTERNAL(cp->page) ? PGNO_INVALID : PREV_PGNO(cp->page),
ISINTERNAL(cp->page) ? PGNO_INVALID : 0,
cp->page->level, TYPE(cp->page));
/*
* Split right.
*
* Only the indices are sorted on the page, i.e., the key/data pairs
* aren't, so it's simpler to copy the data from the split page onto
* two new pages instead of copying half the data to a new right page
* and compacting the left page in place. Since the left page can't
* change, we swap the original and the allocated left page after the
* split.
*/
if ((ret = __bam_psplit(dbc, cp, lp, rp, &split)) != 0)
goto err;
/*
* Test to see if we are going to be able to insert the new pages into
* the parent page. The interesting failure here is that the parent
* page can't hold the new keys, and has to be split in turn, in which
* case we want to release all the locks we can.
*/
if ((ret = __bam_pinsert(dbc, pp, lp, rp, 1)) != 0)
goto err;
/*
* Fix up the previous pointer of any leaf page following the split
* page.
*
* There's interesting deadlock situations here as we try to write-lock
* a page that's not in our direct ancestry. Consider a cursor walking
* backward through the leaf pages, that has our following page locked,
* and is waiting on a lock for the page we're splitting. In that case
* we're going to deadlock here . It's probably OK, stepping backward
* through the tree isn't a common operation.
*/
if (ISLEAF(cp->page) && NEXT_PGNO(cp->page) != PGNO_INVALID) {
if ((ret = __db_lget(dbc,
0, NEXT_PGNO(cp->page), DB_LOCK_WRITE, 0, &tplock)) != 0)
goto err;
if ((ret = mpf->get(mpf, &NEXT_PGNO(cp->page), 0, &tp)) != 0)
goto err;
}
/*
* We've got everything locked down we need, and we know the split
* is going to succeed. Go and get the additional page we'll need.
*/
if ((ret = __db_new(dbc, TYPE(cp->page), &alloc_rp)) != 0)
goto err;
/*
* Lock the new page. We need to do this because someone
* could get here through bt_lpgno if this page was recently
* dealocated. They can't look at it before we commit.
*/
if ((ret = __db_lget(dbc,
0, PGNO(alloc_rp), DB_LOCK_WRITE, 0, &rplock)) != 0)
goto err;
/*
* Fix up the page numbers we didn't have before. We have to do this
* before calling __bam_pinsert because it may copy a page number onto
* the parent page and it takes the page number from its page argument.
*/
PGNO(rp) = NEXT_PGNO(lp) = PGNO(alloc_rp);
/* Actually update the parent page. */
if ((ret = __bam_pinsert(dbc, pp, lp, rp, 0)) != 0)
goto err;
bc = (BTREE_CURSOR *)dbc->internal;
/* Log the change. */
if (DBC_LOGGING(dbc)) {
memset(&log_dbt, 0, sizeof(log_dbt));
log_dbt.data = cp->page;
log_dbt.size = dbp->pgsize;
if (tp == NULL)
ZERO_LSN(log_lsn);
opflags = F_ISSET(bc, C_RECNUM) ? SPL_NRECS : 0;
if ((ret = __bam_split_log(dbp, dbc->txn, &LSN(cp->page), 0,
PGNO(cp->page), &LSN(cp->page), PGNO(alloc_rp),
&LSN(alloc_rp), (u_int32_t)NUM_ENT(lp),
tp == NULL ? 0 : PGNO(tp),
tp == NULL ? &log_lsn : &LSN(tp),
PGNO_INVALID, &log_dbt, opflags)) != 0)
goto err;
} else
LSN_NOT_LOGGED(LSN(cp->page));
/* Update the LSNs for all involved pages. */
LSN(alloc_rp) = LSN(cp->page);
LSN(lp) = LSN(cp->page);
LSN(rp) = LSN(cp->page);
if (tp != NULL)
LSN(tp) = LSN(cp->page);
/*
* Copy the left and right pages into place. There are two paths
* through here. Either we are logging and we set the LSNs in the
* logging path. However, if we are not logging, then we do not
* have valid LSNs on lp or rp. The correct LSNs to use are the
* ones on the page we got from __db_new or the one that was
* originally on cp->page. In both cases, we save the LSN from the
* real database page (not a malloc'd one) and reapply it after we
* do the copy.
*/
save_lsn = alloc_rp->lsn;
memcpy(alloc_rp, rp, LOFFSET(dbp, rp));
memcpy((u_int8_t *)alloc_rp + HOFFSET(rp),
(u_int8_t *)rp + HOFFSET(rp), dbp->pgsize - HOFFSET(rp));
alloc_rp->lsn = save_lsn;
save_lsn = cp->page->lsn;
memcpy(cp->page, lp, LOFFSET(dbp, lp));
memcpy((u_int8_t *)cp->page + HOFFSET(lp),
(u_int8_t *)lp + HOFFSET(lp), dbp->pgsize - HOFFSET(lp));
cp->page->lsn = save_lsn;
/* Fix up the next-page link. */
if (tp != NULL)
PREV_PGNO(tp) = PGNO(rp);
/* Adjust any cursors. */
if ((ret = __bam_ca_split(dbc,
PGNO(cp->page), PGNO(cp->page), PGNO(rp), split, 0)) != 0)
goto err;
__os_free(dbp->dbenv, lp);
__os_free(dbp->dbenv, rp);
/*
* Success -- write the real pages back to the store. As we never
* acquired any sort of lock on the new page, we release it before
* releasing locks on the pages that reference it. We're finished
* modifying the page so it's not really necessary, but it's neater.
*/
if ((t_ret = mpf->put(mpf, alloc_rp, DB_MPOOL_DIRTY)) != 0 && ret == 0)
ret = t_ret;
(void)__TLPUT(dbc, rplock);
if ((t_ret = mpf->put(mpf, pp->page, DB_MPOOL_DIRTY)) != 0 && ret == 0)
ret = t_ret;
(void)__TLPUT(dbc, pp->lock);
if ((t_ret = mpf->put(mpf, cp->page, DB_MPOOL_DIRTY)) != 0 && ret == 0)
ret = t_ret;
(void)__TLPUT(dbc, cp->lock);
if (tp != NULL) {
if ((t_ret =
mpf->put(mpf, tp, DB_MPOOL_DIRTY)) != 0 && ret == 0)
ret = t_ret;
(void)__TLPUT(dbc, tplock);
}
return (ret);
err: if (lp != NULL)
__os_free(dbp->dbenv, lp);
if (rp != NULL)
__os_free(dbp->dbenv, rp);
if (alloc_rp != NULL)
(void)mpf->put(mpf, alloc_rp, 0);
if (tp != NULL)
(void)mpf->put(mpf, tp, 0);
/* We never updated the new or next pages, we can release them. */
(void)__LPUT(dbc, rplock);
(void)__LPUT(dbc, tplock);
(void)mpf->put(mpf, pp->page, 0);
if (ret == DB_NEEDSPLIT)
(void)__LPUT(dbc, pp->lock);
else
(void)__TLPUT(dbc, pp->lock);
(void)mpf->put(mpf, cp->page, 0);
if (ret == DB_NEEDSPLIT)
(void)__LPUT(dbc, cp->lock);
else
(void)__TLPUT(dbc, cp->lock);
return (ret);
}
/*
* __bam_broot --
* Fix up the btree root page after it has been split.
*/
static int
__bam_broot(dbc, rootp, lp, rp)
DBC *dbc;
PAGE *rootp, *lp, *rp;
{
BINTERNAL bi, *child_bi;
BKEYDATA *child_bk;
BTREE_CURSOR *cp;
DB *dbp;
DBT hdr, data;
db_pgno_t root_pgno;
int ret;
dbp = dbc->dbp;
cp = (BTREE_CURSOR *)dbc->internal;
/*
* If the root page was a leaf page, change it into an internal page.
* We copy the key we split on (but not the key's data, in the case of
* a leaf page) to the new root page.
*/
root_pgno = cp->root;
P_INIT(rootp, dbp->pgsize,
root_pgno, PGNO_INVALID, PGNO_INVALID, lp->level + 1, P_IBTREE);
memset(&data, 0, sizeof(data));
memset(&hdr, 0, sizeof(hdr));
/*
* The btree comparison code guarantees that the left-most key on any
* internal btree page is never used, so it doesn't need to be filled
* in. Set the record count if necessary.
*/
memset(&bi, 0, sizeof(bi));
bi.len = 0;
B_TSET(bi.type, B_KEYDATA, 0);
bi.pgno = lp->pgno;
if (F_ISSET(cp, C_RECNUM)) {
bi.nrecs = __bam_total(dbp, lp);
RE_NREC_SET(rootp, bi.nrecs);
}
hdr.data = &bi;
hdr.size = SSZA(BINTERNAL, data);
if ((ret =
__db_pitem(dbc, rootp, 0, BINTERNAL_SIZE(0), &hdr, NULL)) != 0)
return (ret);
switch (TYPE(rp)) {
case P_IBTREE:
/* Copy the first key of the child page onto the root page. */
child_bi = GET_BINTERNAL(dbp, rp, 0);
bi.len = child_bi->len;
B_TSET(bi.type, child_bi->type, 0);
bi.pgno = rp->pgno;
if (F_ISSET(cp, C_RECNUM)) {
bi.nrecs = __bam_total(dbp, rp);
RE_NREC_ADJ(rootp, bi.nrecs);
}
hdr.data = &bi;
hdr.size = SSZA(BINTERNAL, data);
data.data = child_bi->data;
data.size = child_bi->len;
if ((ret = __db_pitem(dbc, rootp, 1,
BINTERNAL_SIZE(child_bi->len), &hdr, &data)) != 0)
return (ret);
/* Increment the overflow ref count. */
if (B_TYPE(child_bi->type) == B_OVERFLOW)
if ((ret = __db_ovref(dbc,
((BOVERFLOW *)(child_bi->data))->pgno, 1)) != 0)
return (ret);
break;
case P_LDUP:
case P_LBTREE:
/* Copy the first key of the child page onto the root page. */
child_bk = GET_BKEYDATA(dbp, rp, 0);
switch (B_TYPE(child_bk->type)) {
case B_KEYDATA:
bi.len = child_bk->len;
B_TSET(bi.type, child_bk->type, 0);
bi.pgno = rp->pgno;
if (F_ISSET(cp, C_RECNUM)) {
bi.nrecs = __bam_total(dbp, rp);
RE_NREC_ADJ(rootp, bi.nrecs);
}
hdr.data = &bi;
hdr.size = SSZA(BINTERNAL, data);
data.data = child_bk->data;
data.size = child_bk->len;
if ((ret = __db_pitem(dbc, rootp, 1,
BINTERNAL_SIZE(child_bk->len), &hdr, &data)) != 0)
return (ret);
break;
case B_DUPLICATE:
case B_OVERFLOW:
bi.len = BOVERFLOW_SIZE;
B_TSET(bi.type, child_bk->type, 0);
bi.pgno = rp->pgno;
if (F_ISSET(cp, C_RECNUM)) {
bi.nrecs = __bam_total(dbp, rp);
RE_NREC_ADJ(rootp, bi.nrecs);
}
hdr.data = &bi;
hdr.size = SSZA(BINTERNAL, data);
data.data = child_bk;
data.size = BOVERFLOW_SIZE;
if ((ret = __db_pitem(dbc, rootp, 1,
BINTERNAL_SIZE(BOVERFLOW_SIZE), &hdr, &data)) != 0)
return (ret);
/* Increment the overflow ref count. */
if (B_TYPE(child_bk->type) == B_OVERFLOW)
if ((ret = __db_ovref(dbc,
((BOVERFLOW *)child_bk)->pgno, 1)) != 0)
return (ret);
break;
default:
return (__db_pgfmt(dbp->dbenv, rp->pgno));
}
break;
default:
return (__db_pgfmt(dbp->dbenv, rp->pgno));
}
return (0);
}
/*
* __ram_root --
* Fix up the recno root page after it has been split.
*/
static int
__ram_root(dbc, rootp, lp, rp)
DBC *dbc;
PAGE *rootp, *lp, *rp;
{
DB *dbp;
DBT hdr;
RINTERNAL ri;
db_pgno_t root_pgno;
int ret;
dbp = dbc->dbp;
root_pgno = dbc->internal->root;
/* Initialize the page. */
P_INIT(rootp, dbp->pgsize,
root_pgno, PGNO_INVALID, PGNO_INVALID, lp->level + 1, P_IRECNO);
/* Initialize the header. */
memset(&hdr, 0, sizeof(hdr));
hdr.data = &ri;
hdr.size = RINTERNAL_SIZE;
/* Insert the left and right keys, set the header information. */
ri.pgno = lp->pgno;
ri.nrecs = __bam_total(dbp, lp);
if ((ret = __db_pitem(dbc, rootp, 0, RINTERNAL_SIZE, &hdr, NULL)) != 0)
return (ret);
RE_NREC_SET(rootp, ri.nrecs);
ri.pgno = rp->pgno;
ri.nrecs = __bam_total(dbp, rp);
if ((ret = __db_pitem(dbc, rootp, 1, RINTERNAL_SIZE, &hdr, NULL)) != 0)
return (ret);
RE_NREC_ADJ(rootp, ri.nrecs);
return (0);
}
/*
* __bam_pinsert --
* Insert a new key into a parent page, completing the split.
*/
static int
__bam_pinsert(dbc, parent, lchild, rchild, space_check)
DBC *dbc;
EPG *parent;
PAGE *lchild, *rchild;
int space_check;
{
BINTERNAL bi, *child_bi;
BKEYDATA *child_bk, *tmp_bk;
BTREE *t;
BTREE_CURSOR *cp;
DB *dbp;
DBT a, b, hdr, data;
PAGE *ppage;
RINTERNAL ri;
db_indx_t off;
db_recno_t nrecs;
size_t (*func) __P((DB *, const DBT *, const DBT *));
u_int32_t n, nbytes, nksize;
int ret;
dbp = dbc->dbp;
cp = (BTREE_CURSOR *)dbc->internal;
t = dbp->bt_internal;
ppage = parent->page;
/* If handling record numbers, count records split to the right page. */
nrecs = F_ISSET(cp, C_RECNUM) &&
!space_check ? __bam_total(dbp, rchild) : 0;
/*
* Now we insert the new page's first key into the parent page, which
* completes the split. The parent points to a PAGE and a page index
* offset, where the new key goes ONE AFTER the index, because we split
* to the right.
*
* XXX
* Some btree algorithms replace the key for the old page as well as
* the new page. We don't, as there's no reason to believe that the
* first key on the old page is any better than the key we have, and,
* in the case of a key being placed at index 0 causing the split, the
* key is unavailable.
*/
off = parent->indx + O_INDX;
/*
* Calculate the space needed on the parent page.
*
* Prefix trees: space hack used when inserting into BINTERNAL pages.
* Retain only what's needed to distinguish between the new entry and
* the LAST entry on the page to its left. If the keys compare equal,
* retain the entire key. We ignore overflow keys, and the entire key
* must be retained for the next-to-leftmost key on the leftmost page
* of each level, or the search will fail. Applicable ONLY to internal
* pages that have leaf pages as children. Further reduction of the
* key between pairs of internal pages loses too much information.
*/
switch (TYPE(rchild)) {
case P_IBTREE:
child_bi = GET_BINTERNAL(dbp, rchild, 0);
nbytes = BINTERNAL_PSIZE(child_bi->len);
if (P_FREESPACE(dbp, ppage) < nbytes)
return (DB_NEEDSPLIT);
if (space_check)
return (0);
/* Add a new record for the right page. */
memset(&bi, 0, sizeof(bi));
bi.len = child_bi->len;
B_TSET(bi.type, child_bi->type, 0);
bi.pgno = rchild->pgno;
bi.nrecs = nrecs;
memset(&hdr, 0, sizeof(hdr));
hdr.data = &bi;
hdr.size = SSZA(BINTERNAL, data);
memset(&data, 0, sizeof(data));
data.data = child_bi->data;
data.size = child_bi->len;
if ((ret = __db_pitem(dbc, ppage, off,
BINTERNAL_SIZE(child_bi->len), &hdr, &data)) != 0)
return (ret);
/* Increment the overflow ref count. */
if (B_TYPE(child_bi->type) == B_OVERFLOW)
if ((ret = __db_ovref(dbc,
((BOVERFLOW *)(child_bi->data))->pgno, 1)) != 0)
return (ret);
break;
case P_LDUP:
case P_LBTREE:
child_bk = GET_BKEYDATA(dbp, rchild, 0);
switch (B_TYPE(child_bk->type)) {
case B_KEYDATA:
/*
* We set t->bt_prefix to NULL if we have a comparison
* callback but no prefix compression callback. But,
* if we're splitting in an off-page duplicates tree,
* we still have to do some checking. If using the
* default off-page duplicates comparison routine we
* can use the default prefix compression callback. If
* not using the default off-page duplicates comparison
* routine, we can't do any kind of prefix compression
* as there's no way for an application to specify a
* prefix compression callback that corresponds to its
* comparison callback.
*/
if (F_ISSET(dbc, DBC_OPD)) {
if (dbp->dup_compare == __bam_defcmp)
func = __bam_defpfx;
else
func = NULL;
} else
func = t->bt_prefix;
nbytes = BINTERNAL_PSIZE(child_bk->len);
nksize = child_bk->len;
if (func == NULL)
goto noprefix;
if (ppage->prev_pgno == PGNO_INVALID && off <= 1)
goto noprefix;
tmp_bk = GET_BKEYDATA(dbp, lchild, NUM_ENT(lchild) -
(TYPE(lchild) == P_LDUP ? O_INDX : P_INDX));
if (B_TYPE(tmp_bk->type) != B_KEYDATA)
goto noprefix;
memset(&a, 0, sizeof(a));
a.size = tmp_bk->len;
a.data = tmp_bk->data;
memset(&b, 0, sizeof(b));
b.size = child_bk->len;
b.data = child_bk->data;
nksize = (u_int32_t)func(dbp, &a, &b);
if ((n = BINTERNAL_PSIZE(nksize)) < nbytes)
nbytes = n;
else
noprefix: nksize = child_bk->len;
if (P_FREESPACE(dbp, ppage) < nbytes)
return (DB_NEEDSPLIT);
if (space_check)
return (0);
memset(&bi, 0, sizeof(bi));
bi.len = nksize;
B_TSET(bi.type, child_bk->type, 0);
bi.pgno = rchild->pgno;
bi.nrecs = nrecs;
memset(&hdr, 0, sizeof(hdr));
hdr.data = &bi;
hdr.size = SSZA(BINTERNAL, data);
memset(&data, 0, sizeof(data));
data.data = child_bk->data;
data.size = nksize;
if ((ret = __db_pitem(dbc, ppage, off,
BINTERNAL_SIZE(nksize), &hdr, &data)) != 0)
return (ret);
break;
case B_DUPLICATE:
case B_OVERFLOW:
nbytes = BINTERNAL_PSIZE(BOVERFLOW_SIZE);
if (P_FREESPACE(dbp, ppage) < nbytes)
return (DB_NEEDSPLIT);
if (space_check)
return (0);
memset(&bi, 0, sizeof(bi));
bi.len = BOVERFLOW_SIZE;
B_TSET(bi.type, child_bk->type, 0);
bi.pgno = rchild->pgno;
bi.nrecs = nrecs;
memset(&hdr, 0, sizeof(hdr));
hdr.data = &bi;
hdr.size = SSZA(BINTERNAL, data);
memset(&data, 0, sizeof(data));
data.data = child_bk;
data.size = BOVERFLOW_SIZE;
if ((ret = __db_pitem(dbc, ppage, off,
BINTERNAL_SIZE(BOVERFLOW_SIZE), &hdr, &data)) != 0)
return (ret);
/* Increment the overflow ref count. */
if (B_TYPE(child_bk->type) == B_OVERFLOW)
if ((ret = __db_ovref(dbc,
((BOVERFLOW *)child_bk)->pgno, 1)) != 0)
return (ret);
break;
default:
return (__db_pgfmt(dbp->dbenv, rchild->pgno));
}
break;
case P_IRECNO:
case P_LRECNO:
nbytes = RINTERNAL_PSIZE;
if (P_FREESPACE(dbp, ppage) < nbytes)
return (DB_NEEDSPLIT);
if (space_check)
return (0);
/* Add a new record for the right page. */
memset(&hdr, 0, sizeof(hdr));
hdr.data = &ri;
hdr.size = RINTERNAL_SIZE;
ri.pgno = rchild->pgno;
ri.nrecs = nrecs;
if ((ret = __db_pitem(dbc,
ppage, off, RINTERNAL_SIZE, &hdr, NULL)) != 0)
return (ret);
break;
default:
return (__db_pgfmt(dbp->dbenv, rchild->pgno));
}
/*
* If a Recno or Btree with record numbers AM page, or an off-page
* duplicates tree, adjust the parent page's left page record count.
*/
if (F_ISSET(cp, C_RECNUM)) {
/* Log the change. */
if (DBC_LOGGING(dbc)) {
if ((ret = __bam_cadjust_log(dbp, dbc->txn,
&LSN(ppage), 0, PGNO(ppage),
&LSN(ppage), parent->indx, -(int32_t)nrecs, 0)) != 0)
return (ret);
} else
LSN_NOT_LOGGED(LSN(ppage));
/* Update the left page count. */
if (dbc->dbtype == DB_RECNO)
GET_RINTERNAL(dbp, ppage, parent->indx)->nrecs -= nrecs;
else
GET_BINTERNAL(dbp, ppage, parent->indx)->nrecs -= nrecs;
}
return (0);
}
/*
* __bam_psplit --
* Do the real work of splitting the page.
*/
static int
__bam_psplit(dbc, cp, lp, rp, splitret)
DBC *dbc;
EPG *cp;
PAGE *lp, *rp;
db_indx_t *splitret;
{
DB *dbp;
PAGE *pp;
db_indx_t half, *inp, nbytes, off, splitp, top;
int adjust, cnt, iflag, isbigkey, ret;
dbp = dbc->dbp;
pp = cp->page;
inp = P_INP(dbp, pp);
adjust = TYPE(pp) == P_LBTREE ? P_INDX : O_INDX;
/*
* If we're splitting the first (last) page on a level because we're
* inserting (appending) a key to it, it's likely that the data is
* sorted. Moving a single item to the new page is less work and can
* push the fill factor higher than normal. This is trivial when we
* are splitting a new page before the beginning of the tree, all of
* the interesting tests are against values of 0.
*
* Catching appends to the tree is harder. In a simple append, we're
* inserting an item that sorts past the end of the tree; the cursor
* will point past the last element on the page. But, in trees with
* duplicates, the cursor may point to the last entry on the page --
* in this case, the entry will also be the last element of a duplicate
* set (the last because the search call specified the S_DUPLAST flag).
* The only way to differentiate between an insert immediately before
* the last item in a tree or an append after a duplicate set which is
* also the last item in the tree is to call the comparison function.
* When splitting internal pages during an append, the search code
* guarantees the cursor always points to the largest page item less
* than the new internal entry. To summarize, we want to catch three
* possible index values:
*
* NUM_ENT(page) Btree/Recno leaf insert past end-of-tree
* NUM_ENT(page) - O_INDX Btree or Recno internal insert past EOT
* NUM_ENT(page) - P_INDX Btree leaf insert past EOT after a set
* of duplicates
*
* two of which, (NUM_ENT(page) - O_INDX or P_INDX) might be an insert
* near the end of the tree, and not after the end of the tree at all.
* Do a simple test which might be wrong because calling the comparison
* functions is expensive. Regardless, it's not a big deal if we're
* wrong, we'll do the split the right way next time.
*/
off = 0;
if (NEXT_PGNO(pp) == PGNO_INVALID && cp->indx >= NUM_ENT(pp) - adjust)
off = NUM_ENT(pp) - adjust;
else if (PREV_PGNO(pp) == PGNO_INVALID && cp->indx == 0)
off = adjust;
if (off != 0)
goto sort;
/*
* Split the data to the left and right pages. Try not to split on
* an overflow key. (Overflow keys on internal pages will slow down
* searches.) Refuse to split in the middle of a set of duplicates.
*
* First, find the optimum place to split.
*
* It's possible to try and split past the last record on the page if
* there's a very large record at the end of the page. Make sure this
* doesn't happen by bounding the check at the next-to-last entry on
* the page.
*
* Note, we try and split half the data present on the page. This is
* because another process may have already split the page and left
* it half empty. We don't try and skip the split -- we don't know
* how much space we're going to need on the page, and we may need up
* to half the page for a big item, so there's no easy test to decide
* if we need to split or not. Besides, if two threads are inserting
* data into the same place in the database, we're probably going to
* need more space soon anyway.
*/
top = NUM_ENT(pp) - adjust;
half = (dbp->pgsize - HOFFSET(pp)) / 2;
for (nbytes = 0, off = 0; off < top && nbytes < half; ++off)
switch (TYPE(pp)) {
case P_IBTREE:
if (B_TYPE(
GET_BINTERNAL(dbp, pp, off)->type) == B_KEYDATA)
nbytes += BINTERNAL_SIZE(
GET_BINTERNAL(dbp, pp, off)->len);
else
nbytes += BINTERNAL_SIZE(BOVERFLOW_SIZE);
break;
case P_LBTREE:
if (B_TYPE(GET_BKEYDATA(dbp, pp, off)->type) ==
B_KEYDATA)
nbytes += BKEYDATA_SIZE(GET_BKEYDATA(dbp,
pp, off)->len);
else
nbytes += BOVERFLOW_SIZE;
++off;
/* FALLTHROUGH */
case P_LDUP:
case P_LRECNO:
if (B_TYPE(GET_BKEYDATA(dbp, pp, off)->type) ==
B_KEYDATA)
nbytes += BKEYDATA_SIZE(GET_BKEYDATA(dbp,
pp, off)->len);
else
nbytes += BOVERFLOW_SIZE;
break;
case P_IRECNO:
nbytes += RINTERNAL_SIZE;
break;
default:
return (__db_pgfmt(dbp->dbenv, pp->pgno));
}
sort: splitp = off;
/*
* Splitp is either at or just past the optimum split point. If the
* tree type is such that we're going to promote a key to an internal
* page, and our current choice is an overflow key, look for something
* close by that's smaller.
*/
switch (TYPE(pp)) {
case P_IBTREE:
iflag = 1;
isbigkey =
B_TYPE(GET_BINTERNAL(dbp, pp, off)->type) != B_KEYDATA;
break;
case P_LBTREE:
case P_LDUP:
iflag = 0;
isbigkey = B_TYPE(GET_BKEYDATA(dbp, pp, off)->type) !=
B_KEYDATA;
break;
default:
iflag = isbigkey = 0;
}
if (isbigkey)
for (cnt = 1; cnt <= 3; ++cnt) {
off = splitp + cnt * adjust;
if (off < (db_indx_t)NUM_ENT(pp) &&
((iflag && B_TYPE(
GET_BINTERNAL(dbp, pp,off)->type) == B_KEYDATA) ||
B_TYPE(GET_BKEYDATA(dbp, pp, off)->type) ==
B_KEYDATA)) {
splitp = off;
break;
}
if (splitp <= (db_indx_t)(cnt * adjust))
continue;
off = splitp - cnt * adjust;
if (iflag ? B_TYPE(
GET_BINTERNAL(dbp, pp, off)->type) == B_KEYDATA :
B_TYPE(GET_BKEYDATA(dbp, pp, off)->type) ==
B_KEYDATA) {
splitp = off;
break;
}
}
/*
* We can't split in the middle a set of duplicates. We know that
* no duplicate set can take up more than about 25% of the page,
* because that's the point where we push it off onto a duplicate
* page set. So, this loop can't be unbounded.
*/
if (TYPE(pp) == P_LBTREE &&
inp[splitp] == inp[splitp - adjust])
for (cnt = 1;; ++cnt) {
off = splitp + cnt * adjust;
if (off < NUM_ENT(pp) &&
inp[splitp] != inp[off]) {
splitp = off;
break;
}
if (splitp <= (db_indx_t)(cnt * adjust))
continue;
off = splitp - cnt * adjust;
if (inp[splitp] != inp[off]) {
splitp = off + adjust;
break;
}
}
/* We're going to split at splitp. */
if ((ret = __bam_copy(dbp, pp, lp, 0, splitp)) != 0)
return (ret);
if ((ret = __bam_copy(dbp, pp, rp, splitp, NUM_ENT(pp))) != 0)
return (ret);
*splitret = splitp;
return (0);
}
/*
* __bam_copy --
* Copy a set of records from one page to another.
*
* PUBLIC: int __bam_copy __P((DB *, PAGE *, PAGE *, u_int32_t, u_int32_t));
*/
int
__bam_copy(dbp, pp, cp, nxt, stop)
DB *dbp;
PAGE *pp, *cp;
u_int32_t nxt, stop;
{
db_indx_t *cinp, nbytes, off, *pinp;
cinp = P_INP(dbp, cp);
pinp = P_INP(dbp, pp);
/*
* Nxt is the offset of the next record to be placed on the target page.
*/
for (off = 0; nxt < stop; ++nxt, ++NUM_ENT(cp), ++off) {
switch (TYPE(pp)) {
case P_IBTREE:
if (B_TYPE(
GET_BINTERNAL(dbp, pp, nxt)->type) == B_KEYDATA)
nbytes = BINTERNAL_SIZE(
GET_BINTERNAL(dbp, pp, nxt)->len);
else
nbytes = BINTERNAL_SIZE(BOVERFLOW_SIZE);
break;
case P_LBTREE:
/*
* If we're on a key and it's a duplicate, just copy
* the offset.
*/
if (off != 0 && (nxt % P_INDX) == 0 &&
pinp[nxt] == pinp[nxt - P_INDX]) {
cinp[off] = cinp[off - P_INDX];
continue;
}
/* FALLTHROUGH */
case P_LDUP:
case P_LRECNO:
if (B_TYPE(GET_BKEYDATA(dbp, pp, nxt)->type) ==
B_KEYDATA)
nbytes = BKEYDATA_SIZE(GET_BKEYDATA(dbp,
pp, nxt)->len);
else
nbytes = BOVERFLOW_SIZE;
break;
case P_IRECNO:
nbytes = RINTERNAL_SIZE;
break;
default:
return (__db_pgfmt(dbp->dbenv, pp->pgno));
}
cinp[off] = HOFFSET(cp) -= nbytes;
memcpy(P_ENTRY(dbp, cp, off), P_ENTRY(dbp, pp, nxt), nbytes);
}
return (0);
}