mariadb/bdb/db/db_join.c
unknown 155e78f014 BDB 4.1.24
BitKeeper/deleted/.del-ex_access.wpj~3df6ae8c99bf7c5f:
  Delete: bdb/build_vxworks/ex_access/ex_access.wpj
BitKeeper/deleted/.del-ex_btrec.wpj~a7622f1c6f432dc6:
  Delete: bdb/build_vxworks/ex_btrec/ex_btrec.wpj
BitKeeper/deleted/.del-ex_dbclient.wpj~7345440f3b204cdd:
  Delete: bdb/build_vxworks/ex_dbclient/ex_dbclient.wpj
BitKeeper/deleted/.del-ex_env.wpj~fbe1ab10b04e8b74:
  Delete: bdb/build_vxworks/ex_env/ex_env.wpj
BitKeeper/deleted/.del-ex_mpool.wpj~4479cfd5c45f327d:
  Delete: bdb/build_vxworks/ex_mpool/ex_mpool.wpj
BitKeeper/deleted/.del-ex_tpcb.wpj~f78093006e14bf41:
  Delete: bdb/build_vxworks/ex_tpcb/ex_tpcb.wpj
BitKeeper/deleted/.del-db_buildall.dsp~bd749ff6da11682:
  Delete: bdb/build_win32/db_buildall.dsp
BitKeeper/deleted/.del-cxx_app.cpp~ad8df8e0791011ed:
  Delete: bdb/cxx/cxx_app.cpp
BitKeeper/deleted/.del-cxx_log.cpp~a50ff3118fe06952:
  Delete: bdb/cxx/cxx_log.cpp
BitKeeper/deleted/.del-cxx_table.cpp~ecd751e79b055556:
  Delete: bdb/cxx/cxx_table.cpp
BitKeeper/deleted/.del-namemap.txt~796a3acd3885d8fd:
  Delete: bdb/cxx/namemap.txt
BitKeeper/deleted/.del-Design.fileop~3ca4da68f1727373:
  Delete: bdb/db/Design.fileop
BitKeeper/deleted/.del-db185_int.h~61bee3736e7959ef:
  Delete: bdb/db185/db185_int.h
BitKeeper/deleted/.del-acconfig.h~411e8854d67ad8b5:
  Delete: bdb/dist/acconfig.h
BitKeeper/deleted/.del-mutex.m4~a13383cde18a64e1:
  Delete: bdb/dist/aclocal/mutex.m4
BitKeeper/deleted/.del-options.m4~b9d0ca637213750a:
  Delete: bdb/dist/aclocal/options.m4
BitKeeper/deleted/.del-programs.m4~3ce7890b47732b30:
  Delete: bdb/dist/aclocal/programs.m4
BitKeeper/deleted/.del-tcl.m4~f944e2db93c3b6db:
  Delete: bdb/dist/aclocal/tcl.m4
BitKeeper/deleted/.del-types.m4~59cae158c9a32cff:
  Delete: bdb/dist/aclocal/types.m4
BitKeeper/deleted/.del-script~d38f6d3a4f159cb4:
  Delete: bdb/dist/build/script
BitKeeper/deleted/.del-configure.in~ac795a92c8fe049c:
  Delete: bdb/dist/configure.in
BitKeeper/deleted/.del-ltconfig~66bbd007d8024af:
  Delete: bdb/dist/ltconfig
BitKeeper/deleted/.del-rec_ctemp~a28554362534f00a:
  Delete: bdb/dist/rec_ctemp
BitKeeper/deleted/.del-s_tcl~2ffe4326459fcd9f:
  Delete: bdb/dist/s_tcl
BitKeeper/deleted/.del-.IGNORE_ME~d8148b08fa7d5d15:
  Delete: bdb/dist/template/.IGNORE_ME
BitKeeper/deleted/.del-btree.h~179f2aefec1753d:
  Delete: bdb/include/btree.h
BitKeeper/deleted/.del-cxx_int.h~6b649c04766508f8:
  Delete: bdb/include/cxx_int.h
BitKeeper/deleted/.del-db.src~6b433ae615b16a8d:
  Delete: bdb/include/db.src
BitKeeper/deleted/.del-db_185.h~ad8b373d9391d35c:
  Delete: bdb/include/db_185.h
BitKeeper/deleted/.del-db_am.h~a714912b6b75932f:
  Delete: bdb/include/db_am.h
BitKeeper/deleted/.del-db_cxx.h~fcafadf45f5d19e9:
  Delete: bdb/include/db_cxx.h
BitKeeper/deleted/.del-db_dispatch.h~6844f20f7eb46904:
  Delete: bdb/include/db_dispatch.h
BitKeeper/deleted/.del-db_int.src~419a3f48b6a01da7:
  Delete: bdb/include/db_int.src
BitKeeper/deleted/.del-db_join.h~76f9747a42c3399a:
  Delete: bdb/include/db_join.h
BitKeeper/deleted/.del-db_page.h~e302ca3a4db3abdc:
  Delete: bdb/include/db_page.h
BitKeeper/deleted/.del-db_server_int.h~e1d20b6ba3bca1ab:
  Delete: bdb/include/db_server_int.h
BitKeeper/deleted/.del-db_shash.h~5fbf2d696fac90f3:
  Delete: bdb/include/db_shash.h
BitKeeper/deleted/.del-db_swap.h~1e60887550864a59:
  Delete: bdb/include/db_swap.h
BitKeeper/deleted/.del-db_upgrade.h~c644eee73701fc8d:
  Delete: bdb/include/db_upgrade.h
BitKeeper/deleted/.del-db_verify.h~b8d6c297c61f342e:
  Delete: bdb/include/db_verify.h
BitKeeper/deleted/.del-debug.h~dc2b4f2cf27ccebc:
  Delete: bdb/include/debug.h
BitKeeper/deleted/.del-hash.h~2aaa548b28882dfb:
  Delete: bdb/include/hash.h
BitKeeper/deleted/.del-lock.h~a761c1b7de57b77f:
  Delete: bdb/include/lock.h
BitKeeper/deleted/.del-log.h~ff20184238e35e4d:
  Delete: bdb/include/log.h
BitKeeper/deleted/.del-mp.h~7e317597622f3411:
  Delete: bdb/include/mp.h
BitKeeper/deleted/.del-mutex.h~d3ae7a2977a68137:
  Delete: bdb/include/mutex.h
BitKeeper/deleted/.del-os.h~91867cc8757cd0e3:
  Delete: bdb/include/os.h
BitKeeper/deleted/.del-os_jump.h~e1b939fa5151d4be:
  Delete: bdb/include/os_jump.h
BitKeeper/deleted/.del-qam.h~6fad0c1b5723d597:
  Delete: bdb/include/qam.h
BitKeeper/deleted/.del-queue.h~4c72c0826c123d5:
  Delete: bdb/include/queue.h
BitKeeper/deleted/.del-region.h~513fe04d977ca0fc:
  Delete: bdb/include/region.h
BitKeeper/deleted/.del-shqueue.h~525fc3e6c2025c36:
  Delete: bdb/include/shqueue.h
BitKeeper/deleted/.del-tcl_db.h~c536fd61a844f23f:
  Delete: bdb/include/tcl_db.h
BitKeeper/deleted/.del-txn.h~c8d94b221ec147e4:
  Delete: bdb/include/txn.h
BitKeeper/deleted/.del-xa.h~ecc466493aae9d9a:
  Delete: bdb/include/xa.h
BitKeeper/deleted/.del-DbRecoveryInit.java~756b52601a0b9023:
  Delete: bdb/java/src/com/sleepycat/db/DbRecoveryInit.java
BitKeeper/deleted/.del-DbTxnRecover.java~74607cba7ab89d6d:
  Delete: bdb/java/src/com/sleepycat/db/DbTxnRecover.java
BitKeeper/deleted/.del-lock_conflict.c~fc5e0f14cf597a2b:
  Delete: bdb/lock/lock_conflict.c
BitKeeper/deleted/.del-log.src~53ac9e7b5cb023f2:
  Delete: bdb/log/log.src
BitKeeper/deleted/.del-log_findckp.c~24287f008916e81f:
  Delete: bdb/log/log_findckp.c
BitKeeper/deleted/.del-log_rec.c~d51711f2cac09297:
  Delete: bdb/log/log_rec.c
BitKeeper/deleted/.del-log_register.c~b40bb4efac75ca15:
  Delete: bdb/log/log_register.c
BitKeeper/deleted/.del-Design~b3d0f179f2767b:
  Delete: bdb/mp/Design
BitKeeper/deleted/.del-os_finit.c~95dbefc6fe79b26c:
  Delete: bdb/os/os_finit.c
BitKeeper/deleted/.del-os_abs.c~df95d1e7db81924:
  Delete: bdb/os_vxworks/os_abs.c
BitKeeper/deleted/.del-os_finit.c~803b484bdb9d0122:
  Delete: bdb/os_vxworks/os_finit.c
BitKeeper/deleted/.del-os_map.c~3a6d7926398b76d3:
  Delete: bdb/os_vxworks/os_map.c
BitKeeper/deleted/.del-os_finit.c~19a227c6d3c78ad:
  Delete: bdb/os_win32/os_finit.c
BitKeeper/deleted/.del-log-corruption.patch~1cf2ecc7c6408d5d:
  Delete: bdb/patches/log-corruption.patch
BitKeeper/deleted/.del-Btree.pm~af6d0c5eaed4a98e:
  Delete: bdb/perl.BerkeleyDB/BerkeleyDB/Btree.pm
BitKeeper/deleted/.del-BerkeleyDB.pm~7244036d4482643:
  Delete: bdb/perl.BerkeleyDB/BerkeleyDB.pm
BitKeeper/deleted/.del-BerkeleyDB.pod~e7b18fd6132448e3:
  Delete: bdb/perl.BerkeleyDB/BerkeleyDB.pod
BitKeeper/deleted/.del-Hash.pm~10292a26c06a5c95:
  Delete: bdb/perl.BerkeleyDB/BerkeleyDB/Hash.pm
BitKeeper/deleted/.del-BerkeleyDB.pod.P~79f76a1495eda203:
  Delete: bdb/perl.BerkeleyDB/BerkeleyDB.pod.P
BitKeeper/deleted/.del-BerkeleyDB.xs~80c99afbd98e392c:
  Delete: bdb/perl.BerkeleyDB/BerkeleyDB.xs
BitKeeper/deleted/.del-Changes~729c1891efa60de9:
  Delete: bdb/perl.BerkeleyDB/Changes
BitKeeper/deleted/.del-MANIFEST~63a1e34aecf157a0:
  Delete: bdb/perl.BerkeleyDB/MANIFEST
BitKeeper/deleted/.del-Makefile.PL~c68797707d8df87a:
  Delete: bdb/perl.BerkeleyDB/Makefile.PL
BitKeeper/deleted/.del-README~5f2f579b1a241407:
  Delete: bdb/perl.BerkeleyDB/README
BitKeeper/deleted/.del-Todo~dca3c66c193adda9:
  Delete: bdb/perl.BerkeleyDB/Todo
BitKeeper/deleted/.del-config.in~ae81681e450e0999:
  Delete: bdb/perl.BerkeleyDB/config.in
BitKeeper/deleted/.del-dbinfo~28ad67d83be4f68e:
  Delete: bdb/perl.BerkeleyDB/dbinfo
BitKeeper/deleted/.del-mkconsts~543ab60669c7a04e:
  Delete: bdb/perl.BerkeleyDB/mkconsts
BitKeeper/deleted/.del-mkpod~182c0ca54e439afb:
  Delete: bdb/perl.BerkeleyDB/mkpod
BitKeeper/deleted/.del-5.004~e008cb5a48805543:
  Delete: bdb/perl.BerkeleyDB/patches/5.004
BitKeeper/deleted/.del-irix_6_5.pl~61662bb08afcdec8:
  Delete: bdb/perl.BerkeleyDB/hints/irix_6_5.pl
BitKeeper/deleted/.del-solaris.pl~6771e7182394e152:
  Delete: bdb/perl.BerkeleyDB/hints/solaris.pl
BitKeeper/deleted/.del-typemap~783b8f5295b05f3d:
  Delete: bdb/perl.BerkeleyDB/typemap
BitKeeper/deleted/.del-5.004_01~6081ce2fff7b0bc:
  Delete: bdb/perl.BerkeleyDB/patches/5.004_01
BitKeeper/deleted/.del-5.004_02~87214eac35ad9e6:
  Delete: bdb/perl.BerkeleyDB/patches/5.004_02
BitKeeper/deleted/.del-5.004_03~9a672becec7cb40f:
  Delete: bdb/perl.BerkeleyDB/patches/5.004_03
BitKeeper/deleted/.del-5.004_04~e326cb51af09d154:
  Delete: bdb/perl.BerkeleyDB/patches/5.004_04
BitKeeper/deleted/.del-5.004_05~7ab457a1e41a92fe:
  Delete: bdb/perl.BerkeleyDB/patches/5.004_05
BitKeeper/deleted/.del-5.005~f9e2d59b5964cd4b:
  Delete: bdb/perl.BerkeleyDB/patches/5.005
BitKeeper/deleted/.del-5.005_01~3eb9fb7b5842ea8e:
  Delete: bdb/perl.BerkeleyDB/patches/5.005_01
BitKeeper/deleted/.del-5.005_02~67477ce0bef717cb:
  Delete: bdb/perl.BerkeleyDB/patches/5.005_02
BitKeeper/deleted/.del-5.005_03~c4c29a1fb21e290a:
  Delete: bdb/perl.BerkeleyDB/patches/5.005_03
BitKeeper/deleted/.del-5.6.0~e1fb9897d124ee22:
  Delete: bdb/perl.BerkeleyDB/patches/5.6.0
BitKeeper/deleted/.del-btree.t~e4a1a3c675ddc406:
  Delete: bdb/perl.BerkeleyDB/t/btree.t
BitKeeper/deleted/.del-db-3.0.t~d2c60991d84558f2:
  Delete: bdb/perl.BerkeleyDB/t/db-3.0.t
BitKeeper/deleted/.del-db-3.1.t~6ee88cd13f55e018:
  Delete: bdb/perl.BerkeleyDB/t/db-3.1.t
BitKeeper/deleted/.del-db-3.2.t~f73b6461f98fd1cf:
  Delete: bdb/perl.BerkeleyDB/t/db-3.2.t
BitKeeper/deleted/.del-destroy.t~cc6a2ae1980a2ecd:
  Delete: bdb/perl.BerkeleyDB/t/destroy.t
BitKeeper/deleted/.del-env.t~a8604a4499c4bd07:
  Delete: bdb/perl.BerkeleyDB/t/env.t
BitKeeper/deleted/.del-examples.t~2571b77c3cc75574:
  Delete: bdb/perl.BerkeleyDB/t/examples.t
BitKeeper/deleted/.del-examples.t.T~8228bdd75ac78b88:
  Delete: bdb/perl.BerkeleyDB/t/examples.t.T
BitKeeper/deleted/.del-examples3.t.T~66a186897a87026d:
  Delete: bdb/perl.BerkeleyDB/t/examples3.t.T
BitKeeper/deleted/.del-examples3.t~fe3822ba2f2d7f83:
  Delete: bdb/perl.BerkeleyDB/t/examples3.t
BitKeeper/deleted/.del-filter.t~f87b045c1b708637:
  Delete: bdb/perl.BerkeleyDB/t/filter.t
BitKeeper/deleted/.del-hash.t~616bfb4d644de3a3:
  Delete: bdb/perl.BerkeleyDB/t/hash.t
BitKeeper/deleted/.del-join.t~29fc39f74a83ca22:
  Delete: bdb/perl.BerkeleyDB/t/join.t
BitKeeper/deleted/.del-mldbm.t~31f5015341eea040:
  Delete: bdb/perl.BerkeleyDB/t/mldbm.t
BitKeeper/deleted/.del-queue.t~8f338034ce44a641:
  Delete: bdb/perl.BerkeleyDB/t/queue.t
BitKeeper/deleted/.del-recno.t~d4ddbd3743add63e:
  Delete: bdb/perl.BerkeleyDB/t/recno.t
BitKeeper/deleted/.del-strict.t~6885cdd2ea71ca2d:
  Delete: bdb/perl.BerkeleyDB/t/strict.t
BitKeeper/deleted/.del-subdb.t~aab62a5d5864c603:
  Delete: bdb/perl.BerkeleyDB/t/subdb.t
BitKeeper/deleted/.del-txn.t~65033b8558ae1216:
  Delete: bdb/perl.BerkeleyDB/t/txn.t
BitKeeper/deleted/.del-unknown.t~f3710458682665e1:
  Delete: bdb/perl.BerkeleyDB/t/unknown.t
BitKeeper/deleted/.del-Changes~436f74a5c414c65b:
  Delete: bdb/perl.DB_File/Changes
BitKeeper/deleted/.del-DB_File.pm~ae0951c6c7665a82:
  Delete: bdb/perl.DB_File/DB_File.pm
BitKeeper/deleted/.del-DB_File.xs~89e49a0b5556f1d8:
  Delete: bdb/perl.DB_File/DB_File.xs
BitKeeper/deleted/.del-DB_File_BS~290fad5dbbb87069:
  Delete: bdb/perl.DB_File/DB_File_BS
BitKeeper/deleted/.del-MANIFEST~90ee581572bdd4ac:
  Delete: bdb/perl.DB_File/MANIFEST
BitKeeper/deleted/.del-Makefile.PL~ac0567bb5a377e38:
  Delete: bdb/perl.DB_File/Makefile.PL
BitKeeper/deleted/.del-README~77e924a5a9bae6b3:
  Delete: bdb/perl.DB_File/README
BitKeeper/deleted/.del-config.in~ab4c2792b86a810b:
  Delete: bdb/perl.DB_File/config.in
BitKeeper/deleted/.del-dbinfo~461c43b30fab2cb:
  Delete: bdb/perl.DB_File/dbinfo
BitKeeper/deleted/.del-dynixptx.pl~50dcddfae25d17e9:
  Delete: bdb/perl.DB_File/hints/dynixptx.pl
BitKeeper/deleted/.del-typemap~55cffb3288a9e587:
  Delete: bdb/perl.DB_File/typemap
BitKeeper/deleted/.del-version.c~a4df0e646f8b3975:
  Delete: bdb/perl.DB_File/version.c
BitKeeper/deleted/.del-5.004_01~d6830d0082702af7:
  Delete: bdb/perl.DB_File/patches/5.004_01
BitKeeper/deleted/.del-5.004_02~78b082dc80c91031:
  Delete: bdb/perl.DB_File/patches/5.004_02
BitKeeper/deleted/.del-5.004~4411ec2e3c9e008b:
  Delete: bdb/perl.DB_File/patches/5.004
BitKeeper/deleted/.del-sco.pl~1e795fe14fe4dcfe:
  Delete: bdb/perl.DB_File/hints/sco.pl
BitKeeper/deleted/.del-5.004_03~33f274648b160d95:
  Delete: bdb/perl.DB_File/patches/5.004_03
BitKeeper/deleted/.del-5.004_04~8f3d1b3cf18bb20a:
  Delete: bdb/perl.DB_File/patches/5.004_04
BitKeeper/deleted/.del-5.004_05~9c0f02e7331e142:
  Delete: bdb/perl.DB_File/patches/5.004_05
BitKeeper/deleted/.del-5.005~c2108cb2e3c8d951:
  Delete: bdb/perl.DB_File/patches/5.005
BitKeeper/deleted/.del-5.005_01~3b45e9673afc4cfa:
  Delete: bdb/perl.DB_File/patches/5.005_01
BitKeeper/deleted/.del-5.005_02~9fe5766bb02a4522:
  Delete: bdb/perl.DB_File/patches/5.005_02
BitKeeper/deleted/.del-5.005_03~ffa1c38c19ae72ea:
  Delete: bdb/perl.DB_File/patches/5.005_03
BitKeeper/deleted/.del-5.6.0~373be3a5ce47be85:
  Delete: bdb/perl.DB_File/patches/5.6.0
BitKeeper/deleted/.del-db-btree.t~3231595a1c241eb3:
  Delete: bdb/perl.DB_File/t/db-btree.t
BitKeeper/deleted/.del-db-hash.t~7c4ad0c795c7fad2:
  Delete: bdb/perl.DB_File/t/db-hash.t
BitKeeper/deleted/.del-db-recno.t~6c2d3d80b9ba4a50:
  Delete: bdb/perl.DB_File/t/db-recno.t
BitKeeper/deleted/.del-db_server.sed~cdb00ebcd48a64e2:
  Delete: bdb/rpc_server/db_server.sed
BitKeeper/deleted/.del-db_server_proc.c~d46c8f409c3747f4:
  Delete: bdb/rpc_server/db_server_proc.c
BitKeeper/deleted/.del-db_server_svc.sed~3f5e59f334fa4607:
  Delete: bdb/rpc_server/db_server_svc.sed
BitKeeper/deleted/.del-db_server_util.c~a809f3a4629acda:
  Delete: bdb/rpc_server/db_server_util.c
BitKeeper/deleted/.del-log.tcl~ff1b41f1355b97d7:
  Delete: bdb/test/log.tcl
BitKeeper/deleted/.del-mpool.tcl~b0df4dc1b04db26c:
  Delete: bdb/test/mpool.tcl
BitKeeper/deleted/.del-mutex.tcl~52fd5c73a150565:
  Delete: bdb/test/mutex.tcl
BitKeeper/deleted/.del-txn.tcl~c4ff071550b5446e:
  Delete: bdb/test/txn.tcl
BitKeeper/deleted/.del-README~e800a12a5392010a:
  Delete: bdb/test/upgrade/README
BitKeeper/deleted/.del-pack-2.6.6.pl~89d5076d758d3e98:
  Delete: bdb/test/upgrade/generate-2.X/pack-2.6.6.pl
BitKeeper/deleted/.del-test-2.6.patch~4a52dc83d447547b:
  Delete: bdb/test/upgrade/generate-2.X/test-2.6.patch
2002-10-30 15:57:05 +04:00

822 lines
23 KiB
C

/*
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1998-2002
* Sleepycat Software. All rights reserved.
*/
#include "db_config.h"
#ifndef lint
static const char revid[] = "$Id: db_join.c,v 11.55 2002/08/08 03:57:47 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <stdlib.h>
#include <string.h>
#endif
#include "db_int.h"
#include "dbinc/db_page.h"
#include "dbinc/db_join.h"
#include "dbinc/btree.h"
static int __db_join_close __P((DBC *));
static int __db_join_cmp __P((const void *, const void *));
static int __db_join_del __P((DBC *, u_int32_t));
static int __db_join_get __P((DBC *, DBT *, DBT *, u_int32_t));
static int __db_join_getnext __P((DBC *, DBT *, DBT *, u_int32_t, u_int32_t));
static int __db_join_primget __P((DB *,
DB_TXN *, u_int32_t, DBT *, DBT *, u_int32_t));
static int __db_join_put __P((DBC *, DBT *, DBT *, u_int32_t));
/*
* Check to see if the Nth secondary cursor of join cursor jc is pointing
* to a sorted duplicate set.
*/
#define SORTED_SET(jc, n) ((jc)->j_curslist[(n)]->dbp->dup_compare != NULL)
/*
* This is the duplicate-assisted join functionality. Right now we're
* going to write it such that we return one item at a time, although
* I think we may need to optimize it to return them all at once.
* It should be easier to get it working this way, and I believe that
* changing it should be fairly straightforward.
*
* We optimize the join by sorting cursors from smallest to largest
* cardinality. In most cases, this is indeed optimal. However, if
* a cursor with large cardinality has very few data in common with the
* first cursor, it is possible that the join will be made faster by
* putting it earlier in the cursor list. Since we have no way to detect
* cases like this, we simply provide a flag, DB_JOIN_NOSORT, which retains
* the sort order specified by the caller, who may know more about the
* structure of the data.
*
* The first cursor moves sequentially through the duplicate set while
* the others search explicitly for the duplicate in question.
*
*/
/*
* __db_join --
* This is the interface to the duplicate-assisted join functionality.
* In the same way that cursors mark a position in a database, a cursor
* can mark a position in a join. While most cursors are created by the
* cursor method of a DB, join cursors are created through an explicit
* call to DB->join.
*
* The curslist is an array of existing, intialized cursors and primary
* is the DB of the primary file. The data item that joins all the
* cursors in the curslist is used as the key into the primary and that
* key and data are returned. When no more items are left in the join
* set, the c_next operation off the join cursor will return DB_NOTFOUND.
*
* PUBLIC: int __db_join __P((DB *, DBC **, DBC **, u_int32_t));
*/
int
__db_join(primary, curslist, dbcp, flags)
DB *primary;
DBC **curslist, **dbcp;
u_int32_t flags;
{
DB_ENV *dbenv;
DBC *dbc;
JOIN_CURSOR *jc;
int ret;
u_int32_t i;
size_t ncurs, nslots;
COMPQUIET(nslots, 0);
PANIC_CHECK(primary->dbenv);
if ((ret = __db_joinchk(primary, curslist, flags)) != 0)
return (ret);
dbc = NULL;
jc = NULL;
dbenv = primary->dbenv;
if ((ret = __os_calloc(dbenv, 1, sizeof(DBC), &dbc)) != 0)
goto err;
if ((ret = __os_calloc(dbenv,
1, sizeof(JOIN_CURSOR), &jc)) != 0)
goto err;
if ((ret = __os_malloc(dbenv, 256, &jc->j_key.data)) != 0)
goto err;
jc->j_key.ulen = 256;
F_SET(&jc->j_key, DB_DBT_USERMEM);
F_SET(&jc->j_rdata, DB_DBT_REALLOC);
for (jc->j_curslist = curslist;
*jc->j_curslist != NULL; jc->j_curslist++)
;
/*
* The number of cursor slots we allocate is one greater than
* the number of cursors involved in the join, because the
* list is NULL-terminated.
*/
ncurs = jc->j_curslist - curslist;
nslots = ncurs + 1;
/*
* !!! -- A note on the various lists hanging off jc.
*
* j_curslist is the initial NULL-terminated list of cursors passed
* into __db_join. The original cursors are not modified; pristine
* copies are required because, in databases with unsorted dups, we
* must reset all of the secondary cursors after the first each
* time the first one is incremented, or else we will lose data
* which happen to be sorted differently in two different cursors.
*
* j_workcurs is where we put those copies that we're planning to
* work with. They're lazily c_dup'ed from j_curslist as we need
* them, and closed when the join cursor is closed or when we need
* to reset them to their original values (in which case we just
* c_dup afresh).
*
* j_fdupcurs is an array of cursors which point to the first
* duplicate in the duplicate set that contains the data value
* we're currently interested in. We need this to make
* __db_join_get correctly return duplicate duplicates; i.e., if a
* given data value occurs twice in the set belonging to cursor #2,
* and thrice in the set belonging to cursor #3, and once in all
* the other cursors, successive calls to __db_join_get need to
* return that data item six times. To make this happen, each time
* cursor N is allowed to advance to a new datum, all cursors M
* such that M > N have to be reset to the first duplicate with
* that datum, so __db_join_get will return all the dup-dups again.
* We could just reset them to the original cursor from j_curslist,
* but that would be a bit slower in the unsorted case and a LOT
* slower in the sorted one.
*
* j_exhausted is a list of boolean values which represent
* whether or not their corresponding cursors are "exhausted",
* i.e. whether the datum under the corresponding cursor has
* been found not to exist in any unreturned combinations of
* later secondary cursors, in which case they are ready to be
* incremented.
*/
/* We don't want to free regions whose callocs have failed. */
jc->j_curslist = NULL;
jc->j_workcurs = NULL;
jc->j_fdupcurs = NULL;
jc->j_exhausted = NULL;
if ((ret = __os_calloc(dbenv, nslots, sizeof(DBC *),
&jc->j_curslist)) != 0)
goto err;
if ((ret = __os_calloc(dbenv, nslots, sizeof(DBC *),
&jc->j_workcurs)) != 0)
goto err;
if ((ret = __os_calloc(dbenv, nslots, sizeof(DBC *),
&jc->j_fdupcurs)) != 0)
goto err;
if ((ret = __os_calloc(dbenv, nslots, sizeof(u_int8_t),
&jc->j_exhausted)) != 0)
goto err;
for (i = 0; curslist[i] != NULL; i++) {
jc->j_curslist[i] = curslist[i];
jc->j_workcurs[i] = NULL;
jc->j_fdupcurs[i] = NULL;
jc->j_exhausted[i] = 0;
}
jc->j_ncurs = (u_int32_t)ncurs;
/*
* If DB_JOIN_NOSORT is not set, optimize secondary cursors by
* sorting in order of increasing cardinality.
*/
if (!LF_ISSET(DB_JOIN_NOSORT))
qsort(jc->j_curslist, ncurs, sizeof(DBC *), __db_join_cmp);
/*
* We never need to reset the 0th cursor, so there's no
* solid reason to use workcurs[0] rather than curslist[0] in
* join_get. Nonetheless, it feels cleaner to do it for symmetry,
* and this is the most logical place to copy it.
*
* !!!
* There's no need to close the new cursor if we goto err only
* because this is the last thing that can fail. Modifier of this
* function beware!
*/
if ((ret = jc->j_curslist[0]->c_dup(jc->j_curslist[0], jc->j_workcurs,
DB_POSITIONI)) != 0)
goto err;
dbc->c_close = __db_join_close;
dbc->c_del = __db_join_del;
dbc->c_get = __db_join_get;
dbc->c_put = __db_join_put;
dbc->internal = (DBC_INTERNAL *) jc;
dbc->dbp = primary;
jc->j_primary = primary;
*dbcp = dbc;
MUTEX_THREAD_LOCK(dbenv, primary->mutexp);
TAILQ_INSERT_TAIL(&primary->join_queue, dbc, links);
MUTEX_THREAD_UNLOCK(dbenv, primary->mutexp);
return (0);
err: if (jc != NULL) {
if (jc->j_curslist != NULL)
__os_free(dbenv, jc->j_curslist);
if (jc->j_workcurs != NULL) {
if (jc->j_workcurs[0] != NULL)
__os_free(dbenv, jc->j_workcurs[0]);
__os_free(dbenv, jc->j_workcurs);
}
if (jc->j_fdupcurs != NULL)
__os_free(dbenv, jc->j_fdupcurs);
if (jc->j_exhausted != NULL)
__os_free(dbenv, jc->j_exhausted);
__os_free(dbenv, jc);
}
if (dbc != NULL)
__os_free(dbenv, dbc);
return (ret);
}
static int
__db_join_put(dbc, key, data, flags)
DBC *dbc;
DBT *key;
DBT *data;
u_int32_t flags;
{
PANIC_CHECK(dbc->dbp->dbenv);
COMPQUIET(key, NULL);
COMPQUIET(data, NULL);
COMPQUIET(flags, 0);
return (EINVAL);
}
static int
__db_join_del(dbc, flags)
DBC *dbc;
u_int32_t flags;
{
PANIC_CHECK(dbc->dbp->dbenv);
COMPQUIET(flags, 0);
return (EINVAL);
}
static int
__db_join_get(dbc, key_arg, data_arg, flags)
DBC *dbc;
DBT *key_arg, *data_arg;
u_int32_t flags;
{
DBT *key_n, key_n_mem;
DB *dbp;
DBC *cp;
JOIN_CURSOR *jc;
int db_manage_data, ret;
u_int32_t i, j, operation, opmods;
dbp = dbc->dbp;
jc = (JOIN_CURSOR *)dbc->internal;
PANIC_CHECK(dbp->dbenv);
operation = LF_ISSET(DB_OPFLAGS_MASK);
/* !!!
* If the set of flags here changes, check that __db_join_primget
* is updated to handle them properly.
*/
opmods = LF_ISSET(DB_RMW | DB_DIRTY_READ);
if ((ret = __db_joingetchk(dbp, key_arg, flags)) != 0)
return (ret);
/*
* Since we are fetching the key as a datum in the secondary indices,
* we must be careful of caller-specified DB_DBT_* memory
* management flags. If necessary, use a stack-allocated DBT;
* we'll appropriately copy and/or allocate the data later.
*/
if (F_ISSET(key_arg, DB_DBT_USERMEM) ||
F_ISSET(key_arg, DB_DBT_MALLOC)) {
/* We just use the default buffer; no need to go malloc. */
key_n = &key_n_mem;
memset(key_n, 0, sizeof(DBT));
} else {
/*
* Either DB_DBT_REALLOC or the default buffer will work
* fine if we have to reuse it, as we do.
*/
key_n = key_arg;
}
/*
* If our last attempt to do a get on the primary key failed,
* short-circuit the join and try again with the same key.
*/
if (F_ISSET(jc, JOIN_RETRY))
goto samekey;
F_CLR(jc, JOIN_RETRY);
retry: ret = jc->j_workcurs[0]->c_real_get(jc->j_workcurs[0],
&jc->j_key, key_n,
opmods | (jc->j_exhausted[0] ? DB_NEXT_DUP : DB_CURRENT));
if (ret == ENOMEM) {
jc->j_key.ulen <<= 1;
if ((ret = __os_realloc(dbp->dbenv,
jc->j_key.ulen, &jc->j_key.data)) != 0)
goto mem_err;
goto retry;
}
/*
* If ret == DB_NOTFOUND, we're out of elements of the first
* secondary cursor. This is how we finally finish the join
* if all goes well.
*/
if (ret != 0)
goto err;
/*
* If jc->j_exhausted[0] == 1, we've just advanced the first cursor,
* and we're going to want to advance all the cursors that point to
* the first member of a duplicate duplicate set (j_fdupcurs[1..N]).
* Close all the cursors in j_fdupcurs; we'll reopen them the
* first time through the upcoming loop.
*/
for (i = 1; i < jc->j_ncurs; i++) {
if (jc->j_fdupcurs[i] != NULL &&
(ret = jc->j_fdupcurs[i]->c_close(jc->j_fdupcurs[i])) != 0)
goto err;
jc->j_fdupcurs[i] = NULL;
}
/*
* If jc->j_curslist[1] == NULL, we have only one cursor in the join.
* Thus, we can safely increment that one cursor on each call
* to __db_join_get, and we signal this by setting jc->j_exhausted[0]
* right away.
*
* Otherwise, reset jc->j_exhausted[0] to 0, so that we don't
* increment it until we know we're ready to.
*/
if (jc->j_curslist[1] == NULL)
jc->j_exhausted[0] = 1;
else
jc->j_exhausted[0] = 0;
/* We have the first element; now look for it in the other cursors. */
for (i = 1; i < jc->j_ncurs; i++) {
DB_ASSERT(jc->j_curslist[i] != NULL);
if (jc->j_workcurs[i] == NULL)
/* If this is NULL, we need to dup curslist into it. */
if ((ret = jc->j_curslist[i]->c_dup(
jc->j_curslist[i], jc->j_workcurs + i,
DB_POSITIONI)) != 0)
goto err;
retry2: cp = jc->j_workcurs[i];
if ((ret = __db_join_getnext(cp, &jc->j_key, key_n,
jc->j_exhausted[i], opmods)) == DB_NOTFOUND) {
/*
* jc->j_workcurs[i] has no more of the datum we're
* interested in. Go back one cursor and get
* a new dup. We can't just move to a new
* element of the outer relation, because that way
* we might miss duplicate duplicates in cursor i-1.
*
* If this takes us back to the first cursor,
* -then- we can move to a new element of the outer
* relation.
*/
--i;
jc->j_exhausted[i] = 1;
if (i == 0) {
for (j = 1; jc->j_workcurs[j] != NULL; j++) {
/*
* We're moving to a new element of
* the first secondary cursor. If
* that cursor is sorted, then any
* other sorted cursors can be safely
* reset to the first duplicate
* duplicate in the current set if we
* have a pointer to it (we can't just
* leave them be, or we'll miss
* duplicate duplicates in the outer
* relation).
*
* If the first cursor is unsorted, or
* if cursor j is unsorted, we can
* make no assumptions about what
* we're looking for next or where it
* will be, so we reset to the very
* beginning (setting workcurs NULL
* will achieve this next go-round).
*
* XXX: This is likely to break
* horribly if any two cursors are
* both sorted, but have different
* specified sort functions. For,
* now, we dismiss this as pathology
* and let strange things happen--we
* can't make rope childproof.
*/
if ((ret = jc->j_workcurs[j]->c_close(
jc->j_workcurs[j])) != 0)
goto err;
if (!SORTED_SET(jc, 0) ||
!SORTED_SET(jc, j) ||
jc->j_fdupcurs[j] == NULL)
/*
* Unsafe conditions;
* reset fully.
*/
jc->j_workcurs[j] = NULL;
else
/* Partial reset suffices. */
if ((jc->j_fdupcurs[j]->c_dup(
jc->j_fdupcurs[j],
&jc->j_workcurs[j],
DB_POSITIONI)) != 0)
goto err;
jc->j_exhausted[j] = 0;
}
goto retry;
/* NOTREACHED */
}
/*
* We're about to advance the cursor and need to
* reset all of the workcurs[j] where j>i, so that
* we don't miss any duplicate duplicates.
*/
for (j = i + 1;
jc->j_workcurs[j] != NULL;
j++) {
if ((ret = jc->j_workcurs[j]->c_close(
jc->j_workcurs[j])) != 0)
goto err;
jc->j_exhausted[j] = 0;
if (jc->j_fdupcurs[j] != NULL &&
(ret = jc->j_fdupcurs[j]->c_dup(
jc->j_fdupcurs[j], &jc->j_workcurs[j],
DB_POSITIONI)) != 0)
goto err;
else
jc->j_workcurs[j] = NULL;
}
goto retry2;
/* NOTREACHED */
}
if (ret == ENOMEM) {
jc->j_key.ulen <<= 1;
if ((ret = __os_realloc(dbp->dbenv, jc->j_key.ulen,
&jc->j_key.data)) != 0) {
mem_err: __db_err(dbp->dbenv,
"Allocation failed for join key, len = %lu",
(u_long)jc->j_key.ulen);
goto err;
}
goto retry2;
}
if (ret != 0)
goto err;
/*
* If we made it this far, we've found a matching
* datum in cursor i. Mark the current cursor
* unexhausted, so we don't miss any duplicate
* duplicates the next go-round--unless this is the
* very last cursor, in which case there are none to
* miss, and we'll need that exhausted flag to finally
* get a DB_NOTFOUND and move on to the next datum in
* the outermost cursor.
*/
if (i + 1 != jc->j_ncurs)
jc->j_exhausted[i] = 0;
else
jc->j_exhausted[i] = 1;
/*
* If jc->j_fdupcurs[i] is NULL and the ith cursor's dups are
* sorted, then we're here for the first time since advancing
* cursor 0, and we have a new datum of interest.
* jc->j_workcurs[i] points to the beginning of a set of
* duplicate duplicates; store this into jc->j_fdupcurs[i].
*/
if (SORTED_SET(jc, i) && jc->j_fdupcurs[i] == NULL && (ret =
cp->c_dup(cp, &jc->j_fdupcurs[i], DB_POSITIONI)) != 0)
goto err;
}
err: if (ret != 0)
return (ret);
if (0) {
samekey: /*
* Get the key we tried and failed to return last time;
* it should be the current datum of all the secondary cursors.
*/
if ((ret = jc->j_workcurs[0]->c_real_get(jc->j_workcurs[0],
&jc->j_key, key_n, DB_CURRENT | opmods)) != 0)
return (ret);
F_CLR(jc, JOIN_RETRY);
}
/*
* ret == 0; we have a key to return.
*
* If DB_DBT_USERMEM or DB_DBT_MALLOC is set, we need to copy the key
* back into the dbt we were given for the key; call __db_retcopy.
* Otherwise, assert that we do not need to copy anything and proceed.
*/
DB_ASSERT(F_ISSET(
key_arg, DB_DBT_USERMEM | DB_DBT_MALLOC) || key_n == key_arg);
if (F_ISSET(key_arg, DB_DBT_USERMEM | DB_DBT_MALLOC) &&
(ret = __db_retcopy(dbp->dbenv,
key_arg, key_n->data, key_n->size, NULL, NULL)) != 0) {
/*
* The retcopy failed, most commonly because we have a user
* buffer for the key which is too small. Set things up to
* retry next time, and return.
*/
F_SET(jc, JOIN_RETRY);
return (ret);
}
/*
* If DB_JOIN_ITEM is set, we return it; otherwise we do the lookup
* in the primary and then return.
*
* Note that we use key_arg here; it is safe (and appropriate)
* to do so.
*/
if (operation == DB_JOIN_ITEM)
return (0);
/*
* If data_arg->flags == 0--that is, if DB is managing the
* data DBT's memory--it's not safe to just pass the DBT
* through to the primary get call, since we don't want that
* memory to belong to the primary DB handle (and if the primary
* is free-threaded, it can't anyway).
*
* Instead, use memory that is managed by the join cursor, in
* jc->j_rdata.
*/
if (!F_ISSET(data_arg, DB_DBT_MALLOC | DB_DBT_REALLOC | DB_DBT_USERMEM))
db_manage_data = 1;
else
db_manage_data = 0;
if ((ret = __db_join_primget(jc->j_primary,
jc->j_curslist[0]->txn, jc->j_curslist[0]->locker, key_arg,
db_manage_data ? &jc->j_rdata : data_arg, opmods)) != 0) {
if (ret == DB_NOTFOUND)
/*
* If ret == DB_NOTFOUND, the primary and secondary
* are out of sync; every item in each secondary
* should correspond to something in the primary,
* or we shouldn't have done the join this way.
* Wail.
*/
ret = __db_secondary_corrupt(jc->j_primary);
else
/*
* The get on the primary failed for some other
* reason, most commonly because we're using a user
* buffer that's not big enough. Flag our failure
* so we can return the same key next time.
*/
F_SET(jc, JOIN_RETRY);
}
if (db_manage_data && ret == 0) {
data_arg->data = jc->j_rdata.data;
data_arg->size = jc->j_rdata.size;
}
return (ret);
}
static int
__db_join_close(dbc)
DBC *dbc;
{
DB *dbp;
DB_ENV *dbenv;
JOIN_CURSOR *jc;
int ret, t_ret;
u_int32_t i;
jc = (JOIN_CURSOR *)dbc->internal;
dbp = dbc->dbp;
dbenv = dbp->dbenv;
ret = t_ret = 0;
/*
* Remove from active list of join cursors. Note that this
* must happen before any action that can fail and return, or else
* __db_close may loop indefinitely.
*/
MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
TAILQ_REMOVE(&dbp->join_queue, dbc, links);
MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
PANIC_CHECK(dbenv);
/*
* Close any open scratch cursors. In each case, there may
* not be as many outstanding as there are cursors in
* curslist, but we want to close whatever's there.
*
* If any close fails, there's no reason not to close everything else;
* we'll just return the error code of the last one to fail. There's
* not much the caller can do anyway, since these cursors only exist
* hanging off a db-internal data structure that they shouldn't be
* mucking with.
*/
for (i = 0; i < jc->j_ncurs; i++) {
if (jc->j_workcurs[i] != NULL && (t_ret =
jc->j_workcurs[i]->c_close(jc->j_workcurs[i])) != 0)
ret = t_ret;
if (jc->j_fdupcurs[i] != NULL && (t_ret =
jc->j_fdupcurs[i]->c_close(jc->j_fdupcurs[i])) != 0)
ret = t_ret;
}
__os_free(dbenv, jc->j_exhausted);
__os_free(dbenv, jc->j_curslist);
__os_free(dbenv, jc->j_workcurs);
__os_free(dbenv, jc->j_fdupcurs);
__os_free(dbenv, jc->j_key.data);
if (jc->j_rdata.data != NULL)
__os_ufree(dbenv, jc->j_rdata.data);
__os_free(dbenv, jc);
__os_free(dbenv, dbc);
return (ret);
}
/*
* __db_join_getnext --
* This function replaces the DBC_CONTINUE and DBC_KEYSET
* functionality inside the various cursor get routines.
*
* If exhausted == 0, we're not done with the current datum;
* return it if it matches "matching", otherwise search
* using DB_GET_BOTHC (which is faster than iteratively doing
* DB_NEXT_DUP) forward until we find one that does.
*
* If exhausted == 1, we are done with the current datum, so just
* leap forward to searching NEXT_DUPs.
*
* If no matching datum exists, returns DB_NOTFOUND, else 0.
*/
static int
__db_join_getnext(dbc, key, data, exhausted, opmods)
DBC *dbc;
DBT *key, *data;
u_int32_t exhausted, opmods;
{
int ret, cmp;
DB *dbp;
DBT ldata;
int (*func) __P((DB *, const DBT *, const DBT *));
dbp = dbc->dbp;
func = (dbp->dup_compare == NULL) ? __bam_defcmp : dbp->dup_compare;
switch (exhausted) {
case 0:
/*
* We don't want to step on data->data; use a new
* DBT and malloc so we don't step on dbc's rdata memory.
*/
memset(&ldata, 0, sizeof(DBT));
F_SET(&ldata, DB_DBT_MALLOC);
if ((ret = dbc->c_real_get(dbc,
key, &ldata, opmods | DB_CURRENT)) != 0)
break;
cmp = func(dbp, data, &ldata);
if (cmp == 0) {
/*
* We have to return the real data value. Copy
* it into data, then free the buffer we malloc'ed
* above.
*/
if ((ret = __db_retcopy(dbp->dbenv, data, ldata.data,
ldata.size, &data->data, &data->size)) != 0)
return (ret);
__os_ufree(dbp->dbenv, ldata.data);
return (0);
}
/*
* Didn't match--we want to fall through and search future
* dups. We just forget about ldata and free
* its buffer--data contains the value we're searching for.
*/
__os_ufree(dbp->dbenv, ldata.data);
/* FALLTHROUGH */
case 1:
ret = dbc->c_real_get(dbc, key, data, opmods | DB_GET_BOTHC);
break;
default:
ret = EINVAL;
break;
}
return (ret);
}
/*
* __db_join_cmp --
* Comparison function for sorting DBCs in cardinality order.
*/
static int
__db_join_cmp(a, b)
const void *a, *b;
{
DBC *dbca, *dbcb;
db_recno_t counta, countb;
/* In case c_count fails, pretend cursors are equal. */
counta = countb = 0;
dbca = *((DBC * const *)a);
dbcb = *((DBC * const *)b);
if (dbca->c_count(dbca, &counta, 0) != 0 ||
dbcb->c_count(dbcb, &countb, 0) != 0)
return (0);
return (counta - countb);
}
/*
* __db_join_primget --
* Perform a DB->get in the primary, being careful not to use a new
* locker ID if we're doing CDB locking.
*/
static int
__db_join_primget(dbp, txn, lockerid, key, data, flags)
DB *dbp;
DB_TXN *txn;
u_int32_t lockerid;
DBT *key, *data;
u_int32_t flags;
{
DBC *dbc;
int dirty, ret, rmw, t_ret;
/*
* The only allowable flags here are the two flags copied into
* "opmods" in __db_join_get, DB_RMW and DB_DIRTY_READ. The former
* is an op on the c_get call, the latter on the cursor call.
* It's a DB bug if we allow any other flags down in here.
*/
rmw = LF_ISSET(DB_RMW);
dirty = LF_ISSET(DB_DIRTY_READ);
LF_CLR(DB_RMW | DB_DIRTY_READ);
DB_ASSERT(flags == 0);
if ((ret = __db_icursor(dbp,
txn, dbp->type, PGNO_INVALID, 0, lockerid, &dbc)) != 0)
return (ret);
if (dirty ||
(txn != NULL && F_ISSET(txn, TXN_DIRTY_READ)))
F_SET(dbc, DBC_DIRTY_READ);
F_SET(dbc, DBC_TRANSIENT);
/*
* This shouldn't be necessary, thanks to the fact that join cursors
* swap in their own DB_DBT_REALLOC'ed buffers, but just for form's
* sake, we mirror what __db_get does.
*/
SET_RET_MEM(dbc, dbp);
ret = dbc->c_get(dbc, key, data, DB_SET | rmw);
if ((t_ret = __db_c_close(dbc)) != 0 && ret == 0)
ret = t_ret;
return (ret);
}