mariadb/newbrt/cachetable.c
Bradley C. Kuszmaul 8511ea7372 Log db close so that recovery will work right if the same db is opened and closed repeatedly.
Also the file numbers can thus be reused.
Don't pass the BRT into the flush commands, since the BRT may no longer be present.
Put a counter in to see how many rollback records are present.  (Addresses #698.)
Increment the file version to 4.
Fixes #545, #703.

Note: All the tests pass except
 * Many cxx tests are getting valgrind errors.  (Addresses #716.  Possibly causes #716.)
 * {{{test_log9.recover}}} fails with "Binary files ... differ".  These will presumably be fixed by #711 or #714.  (Addresses #711, #714.)
 * {{{test_log10.recover}}} fails.   There are two failures:
  1. A valgrind problem (see #718.)  (Addresses #718.  Possibly causes #718.)
  1. The "Binary files ... differ" issue.


git-svn-id: file:///svn/tokudb@3486 c7de825b-a66e-492c-adef-691d508d4ae1
2008-04-17 03:11:55 +00:00

790 lines
23 KiB
C

/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved."
#include "cachetable.h"
#include "hashfun.h"
#include "memory.h"
#include "primes.h"
#include "toku_assert.h"
#include "yerror.h"
#include "brt-internal.h"
#include "log_header.h"
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
#include <unistd.h>
//#define TRACE_CACHETABLE
#ifdef TRACE_CACHETABLE
#define WHEN_TRACE_CT(x) x
#else
#define WHEN_TRACE_CT(x) ((void)0)
#endif
typedef struct ctpair *PAIR;
struct ctpair {
enum typ_tag tag;
long long pinned;
long size;
char dirty;
CACHEKEY key;
void *value;
PAIR next,prev; // In LRU list.
PAIR hash_chain;
CACHEFILE cachefile;
CACHETABLE_FLUSH_FUNC_T flush_callback;
CACHETABLE_FETCH_FUNC_T fetch_callback;
void *extraargs;
int verify_flag; /* Used in verify_cachetable() */
LSN modified_lsn; // What was the LSN when modified (undefined if not dirty)
LSN written_lsn; // What was the LSN when written (we need to get this information when we fetch)
};
// The cachetable is as close to an ENV as we get.
struct cachetable {
enum typ_tag tag;
int n_in_table;
int table_size;
PAIR *table;
PAIR head,tail; // of LRU list. head is the most recently used. tail is least recently used.
CACHEFILE cachefiles;
long size_current, size_limit;
int primeidx;
LSN lsn_of_checkpoint; // the most recent checkpoint in the log.
TOKULOGGER logger;
};
struct fileid {
dev_t st_dev; /* device and inode are enough to uniquely identify a file in unix. */
ino_t st_ino;
};
struct cachefile {
CACHEFILE next;
u_int64_t refcount; /* CACHEFILEs are shared. Use a refcount to decide when to really close it.
* The reference count is one for every open DB.
* Plus one for every commit/rollback record. (It would be harder to keep a count for every open transaction,
* because then we'd have to figure out if the transaction was already counted. If we simply use a count for
* every record in the transaction, we'll be ok. Hence we use a 64-bit counter to make sure we don't run out.
*/
int fd; /* Bug: If a file is opened read-only, then it is stuck in read-only. If it is opened read-write, then subsequent writers can write to it too. */
CACHETABLE cachetable;
struct fileid fileid;
FILENUM filenum;
char *fname;
};
int toku_create_cachetable(CACHETABLE *result, long size_limit, LSN initial_lsn, TOKULOGGER logger) {
TAGMALLOC(CACHETABLE, t);
int i;
t->n_in_table = 0;
t->primeidx = 0;
t->table_size = toku_get_prime(t->primeidx);
MALLOC_N(t->table_size, t->table);
assert(t->table);
t->head = t->tail = 0;
for (i=0; i<t->table_size; i++) {
t->table[i]=0;
}
t->cachefiles = 0;
t->size_current = 0;
t->size_limit = size_limit;
t->lsn_of_checkpoint = initial_lsn;
t->logger = logger;
*result = t;
return 0;
}
// What cachefile goes with particular fd?
int toku_cachefile_of_filenum (CACHETABLE t, FILENUM filenum, CACHEFILE *cf) {
CACHEFILE extant;
for (extant = t->cachefiles; extant; extant=extant->next) {
if (extant->filenum.fileid==filenum.fileid) {
*cf = extant;
return 0;
}
}
return ENOENT;
}
// If something goes wrong, close the fd. After this, the caller shouldn't close the fd, but instead should close the cachefile.
int toku_cachetable_openfd (CACHEFILE *cf, CACHETABLE t, int fd, const char *fname) {
int r;
CACHEFILE extant;
FILENUM max_filenum_in_use={0};
struct stat statbuf;
struct fileid fileid;
memset(&fileid, 0, sizeof(fileid));
r=fstat(fd, &statbuf);
if (r != 0) { r=errno; close(fd); }
fileid.st_dev = statbuf.st_dev;
fileid.st_ino = statbuf.st_ino;
for (extant = t->cachefiles; extant; extant=extant->next) {
if (max_filenum_in_use.fileid<extant->filenum.fileid) max_filenum_in_use=extant->filenum;
if (memcmp(&extant->fileid, &fileid, sizeof(fileid))==0) {
r = close(fd);
assert(r == 0);
extant->refcount++;
*cf = extant;
return 0;
}
}
{
CACHEFILE MALLOC(newcf);
newcf->filenum.fileid = 1+max_filenum_in_use.fileid;
newcf->next = t->cachefiles;
newcf->refcount = 1;
newcf->fd = fd;
newcf->cachetable = t;
newcf->fileid = fileid;
newcf->fname = fname ? toku_strdup(fname) : 0;
t->cachefiles = newcf;
*cf = newcf;
return 0;
}
}
int toku_cachetable_openf (CACHEFILE *cf, CACHETABLE t, const char *fname, int flags, mode_t mode) {
int fd = open(fname, flags, mode);
if (fd<0) return errno;
return toku_cachetable_openfd (cf, t, fd, fname);
}
static CACHEFILE remove_cf_from_list (CACHEFILE cf, CACHEFILE list) {
if (list==0) return 0;
else if (list==cf) {
return list->next;
} else {
list->next = remove_cf_from_list(cf, list->next);
return list;
}
}
static int cachefile_flush_and_remove (CACHEFILE cf);
// Increment the reference count
void toku_cachefile_refup (CACHEFILE cf) {
cf->refcount++;
}
int toku_cachefile_close (CACHEFILE *cfp, TOKULOGGER logger) {
CACHEFILE cf = *cfp;
assert(cf->refcount>0);
cf->refcount--;
if (cf->refcount==0) {
int r;
if ((r = cachefile_flush_and_remove(cf))) return r;
r = close(cf->fd);
assert(r == 0);
cf->fd = -1;
cf->cachetable->cachefiles = remove_cf_from_list(cf, cf->cachetable->cachefiles);
if (logger) {
assert(cf->fname);
BYTESTRING bs = {.len=strlen(cf->fname), .data=cf->fname};
r = toku_log_cfclose(logger, 0, 0, bs, cf->filenum);
}
if (cf->fname)
toku_free(cf->fname);
toku_free(cf);
*cfp=0;
return r;
} else {
*cfp=0;
return 0;
}
}
int toku_cachetable_assert_all_unpinned (CACHETABLE t) {
int i;
int some_pinned=0;
for (i=0; i<t->table_size; i++) {
PAIR p;
for (p=t->table[i]; p; p=p->hash_chain) {
assert(p->pinned>=0);
if (p->pinned) {
printf("%s:%d pinned: %lld (%p)\n", __FILE__, __LINE__, p->key, p->value);
some_pinned=1;
}
}
}
return some_pinned;
}
int toku_cachefile_count_pinned (CACHEFILE cf, int print_them) {
int i;
int n_pinned=0;
CACHETABLE t = cf->cachetable;
for (i=0; i<t->table_size; i++) {
PAIR p;
for (p=t->table[i]; p; p=p->hash_chain) {
assert(p->pinned>=0);
if (p->pinned && p->cachefile==cf) {
if (print_them) printf("%s:%d pinned: %lld (%p)\n", __FILE__, __LINE__, p->key, p->value);
n_pinned++;
}
}
}
return n_pinned;
}
#if 0
unsigned int ct_hash_longlong (unsigned long long l) {
unsigned int r = hash_key((unsigned char*)&l, 8);
printf("%lld --> %d --> %d\n", l, r, r%64);
return r;
}
#endif
static unsigned int hashit (CACHETABLE t, CACHEKEY key) {
return hash_key((unsigned char*)&key, sizeof(key))%t->table_size;
}
static void cachetable_rehash (CACHETABLE t, int primeindexdelta) {
// printf("rehash %p %d %d %d\n", t, primeindexdelta, t->n_in_table, t->table_size);
int newprimeindex = primeindexdelta+t->primeidx;
if (newprimeindex < 0)
return;
int newtable_size = toku_get_prime(newprimeindex);
PAIR *newtable = toku_calloc(newtable_size, sizeof(*t->table));
int i;
//printf("%s:%d newtable_size=%d\n", __FILE__, __LINE__, newtable_size);
assert(newtable!=0);
t->primeidx=newprimeindex;
for (i=0; i<newtable_size; i++) newtable[i]=0;
for (i=0; i<t->table_size; i++) {
PAIR p;
while ((p=t->table[i])!=0) {
unsigned int h = hash_key((unsigned char *)&p->key, sizeof (p->key))%newtable_size;
t->table[i] = p->hash_chain;
p->hash_chain = newtable[h];
newtable[h] = p;
}
}
toku_free(t->table);
// printf("Freed\n");
t->table=newtable;
t->table_size=newtable_size;
//printf("Done growing or shrinking\n");
}
static void lru_remove (CACHETABLE t, PAIR p) {
if (p->next) {
p->next->prev = p->prev;
} else {
assert(t->tail==p);
t->tail = p->prev;
}
if (p->prev) {
p->prev->next = p->next;
} else {
assert(t->head==p);
t->head = p->next;
}
p->prev = p->next = 0;
}
static void lru_add_to_list (CACHETABLE t, PAIR p) {
// requires that touch_me is not currently in the table.
assert(p->prev==0);
p->prev = 0;
p->next = t->head;
if (t->head) {
t->head->prev = p;
} else {
assert(!t->tail);
t->tail = p;
}
t->head = p;
}
static void lru_touch (CACHETABLE t, PAIR p) {
lru_remove(t,p);
lru_add_to_list(t,p);
}
static PAIR remove_from_hash_chain (PAIR remove_me, PAIR list) {
if (remove_me==list) return list->hash_chain;
list->hash_chain = remove_from_hash_chain(remove_me, list->hash_chain);
return list;
}
// Predicate to determine if a node must be renamed. Nodes are renamed on the time they are written
// after a checkpoint.
// Thus we need to rename it if it is dirty,
// if it has been modified within the current checkpoint regime (hence non-strict inequality)
// and the last time it was written was in a previous checkpoint regime (strict inequality)
static BOOL need_to_rename_p (CACHETABLE t, PAIR p) {
return (p->dirty
&& p->modified_lsn.lsn>=t->lsn_of_checkpoint.lsn // nonstrict
&& p->written_lsn.lsn < t->lsn_of_checkpoint.lsn); // strict
}
static void flush_and_remove (CACHETABLE t, PAIR remove_me, int write_me) {
lru_remove(t, remove_me);
//printf("flush_callback(%lld,%p)\n", remove_me->key, remove_me->value);
WHEN_TRACE_CT(printf("%s:%d CT flush_callback(%lld, %p, dirty=%d, 0)\n", __FILE__, __LINE__, remove_me->key, remove_me->value, remove_me->dirty && write_me));
//printf("%s:%d TAG=%x p=%p\n", __FILE__, __LINE__, remove_me->tag, remove_me);
//printf("%s:%d dirty=%d\n", __FILE__, __LINE__, remove_me->dirty);
remove_me->flush_callback(remove_me->cachefile, remove_me->key, remove_me->value, remove_me->size, remove_me->dirty && write_me, 0,
t->lsn_of_checkpoint, need_to_rename_p(t, remove_me));
t->n_in_table--;
// Remove it from the hash chain.
{
unsigned int h = hashit(t, remove_me->key);
t->table[h] = remove_from_hash_chain (remove_me, t->table[h]);
}
t->size_current -= remove_me->size;
toku_free(remove_me);
}
static int maybe_flush_some (CACHETABLE t, long size __attribute__((unused))) {
int r = 0;
again:
// if (t->n_in_table >= t->table_size) {
if (size + t->size_current > t->size_limit) {
/* Try to remove one. */
PAIR remove_me;
for (remove_me = t->tail; remove_me; remove_me = remove_me->prev) {
if (!remove_me->pinned) {
flush_and_remove(t, remove_me, 1);
goto again;
}
}
/* All were pinned. */
//printf("All are pinned\n");
return 0; // Don't indicate an error code. Instead let memory get overfull.
}
if (4 * t->n_in_table < t->table_size)
cachetable_rehash(t, -1);
return r;
}
static int cachetable_insert_at(CACHEFILE cachefile, int h, CACHEKEY key, void *value, long size,
cachetable_flush_func_t flush_callback,
cachetable_fetch_func_t fetch_callback,
void *extraargs, int dirty,
LSN written_lsn) {
TAGMALLOC(PAIR, p);
p->pinned = 1;
p->dirty = dirty;
p->size = size;
//printf("%s:%d p=%p dirty=%d\n", __FILE__, __LINE__, p, p->dirty);
p->key = key;
p->value = value;
p->next = p->prev = 0;
p->cachefile = cachefile;
p->flush_callback = flush_callback;
p->fetch_callback = fetch_callback;
p->extraargs = extraargs;
p->modified_lsn.lsn = 0;
p->written_lsn = written_lsn;
CACHETABLE ct = cachefile->cachetable;
lru_add_to_list(ct, p);
p->hash_chain = ct->table[h];
ct->table[h] = p;
ct->n_in_table++;
ct->size_current += size;
if (ct->n_in_table > ct->table_size) {
cachetable_rehash(ct, +1);
}
return 0;
}
int toku_cachetable_put(CACHEFILE cachefile, CACHEKEY key, void*value, long size,
cachetable_flush_func_t flush_callback, cachetable_fetch_func_t fetch_callback, void *extraargs) {
WHEN_TRACE_CT(printf("%s:%d CT cachetable_put(%lld)=%p\n", __FILE__, __LINE__, key, value));
{
PAIR p;
for (p=cachefile->cachetable->table[hashit(cachefile->cachetable, key)]; p; p=p->hash_chain) {
if (p->key==key && p->cachefile==cachefile) {
// Semantically, these two asserts are not strictly right. After all, when are two functions eq?
// In practice, the functions better be the same.
assert(p->flush_callback==flush_callback);
assert(p->fetch_callback==fetch_callback);
return -1; /* Already present. */
}
}
}
int r;
if ((r=maybe_flush_some(cachefile->cachetable, size))) return r;
// flushing could change the result from hashit()
r = cachetable_insert_at(cachefile, hashit(cachefile->cachetable, key), key, value, size, flush_callback, fetch_callback, extraargs, 1, ZERO_LSN);
return r;
}
int toku_cachetable_get_and_pin(CACHEFILE cachefile, CACHEKEY key, void**value, long *sizep,
cachetable_flush_func_t flush_callback, cachetable_fetch_func_t fetch_callback, void *extraargs) {
CACHETABLE t = cachefile->cachetable;
int tsize __attribute__((__unused__)) = t->table_size;
PAIR p;
for (p=t->table[hashit(t,key)]; p; p=p->hash_chain) {
if (p->key==key && p->cachefile==cachefile) {
*value = p->value;
if (sizep) *sizep = p->size;
p->pinned++;
lru_touch(t,p);
WHEN_TRACE_CT(printf("%s:%d cachtable_get_and_pin(%lld)--> %p\n", __FILE__, __LINE__, key, *value));
return 0;
}
}
int r;
if ((r=maybe_flush_some(t, 1))) return r;
// Note. hashit(t,key) may have changed as a result of flushing.
{
void *toku_value;
long size = 1; // compat
LSN written_lsn;
WHEN_TRACE_CT(printf("%s:%d CT: fetch_callback(%lld...)\n", __FILE__, __LINE__, key));
if ((r=fetch_callback(cachefile, key, &toku_value, &size, extraargs, &written_lsn))) {
return r;
}
cachetable_insert_at(cachefile, hashit(t,key), key, toku_value, size, flush_callback, fetch_callback, extraargs, 0, written_lsn);
*value = toku_value;
if (sizep)
*sizep = size;
// maybe_flush_some(t, size);
}
WHEN_TRACE_CT(printf("%s:%d did fetch: cachtable_get_and_pin(%lld)--> %p\n", __FILE__, __LINE__, key, *value));
return 0;
}
int toku_cachetable_maybe_get_and_pin (CACHEFILE cachefile, CACHEKEY key, void**value) {
CACHETABLE t = cachefile->cachetable;
PAIR p;
for (p=t->table[hashit(t,key)]; p; p=p->hash_chain) {
if (p->key==key && p->cachefile==cachefile) {
*value = p->value;
p->pinned++;
lru_touch(t,p);
//printf("%s:%d cachetable_maybe_get_and_pin(%lld)--> %p\n", __FILE__, __LINE__, key, *value);
return 0;
}
}
return -1;
}
int toku_cachetable_unpin(CACHEFILE cachefile, CACHEKEY key, int dirty, long size) {
CACHETABLE t = cachefile->cachetable;
PAIR p;
WHEN_TRACE_CT(printf("%s:%d unpin(%lld)", __FILE__, __LINE__, key));
//printf("%s:%d is dirty now=%d\n", __FILE__, __LINE__, dirty);
for (p=t->table[hashit(t,key)]; p; p=p->hash_chain) {
if (p->key==key && p->cachefile==cachefile) {
assert(p->pinned>0);
p->pinned--;
p->dirty |= dirty;
if (size != 0) {
t->size_current -= p->size;
p->size = size;
t->size_current += p->size;
}
WHEN_TRACE_CT(printf("[count=%lld]\n", p->pinned));
return 0;
}
}
return 0;
}
// effect: Move an object from one key to another key.
// requires: The object is pinned in the table
int toku_cachetable_rename (CACHEFILE cachefile, CACHEKEY oldkey, CACHEKEY newkey) {
CACHETABLE t = cachefile->cachetable;
PAIR *ptr_to_p,p;
for (ptr_to_p = &t->table[hashit(t, oldkey)], p = *ptr_to_p;
p;
ptr_to_p = &p->hash_chain, p = *ptr_to_p) {
if (p->key==oldkey && p->cachefile==cachefile) {
*ptr_to_p = p->hash_chain;
p->key = newkey;
int nh = hashit(t, newkey);
p->hash_chain = t->table[nh];
t->table[nh] = p;
return 0;
}
}
return -1;
}
static int cachetable_flush (CACHETABLE t) {
int i;
for (i=0; i<t->table_size; i++) {
PAIR p;
while ((p = t->table[i]))
flush_and_remove(t, p, 1); // Must be careful, since flush_and_remove kills the linked list.
}
return 0;
}
void toku_cachefile_verify (CACHEFILE cf) {
toku_cachetable_verify(cf->cachetable);
}
void toku_cachetable_verify (CACHETABLE t) {
// First clear all the verify flags by going through the hash chains
{
int i;
for (i=0; i<t->table_size; i++) {
PAIR p;
for (p=t->table[i]; p; p=p->hash_chain) {
p->verify_flag=0;
}
}
}
// Now go through the LRU chain, make sure everything in the LRU chain is hashed, and set the verify flag.
{
PAIR p;
for (p=t->head; p; p=p->next) {
assert(p->verify_flag==0);
PAIR p2;
for (p2=t->table[hashit(t,p->key)]; p2; p2=p2->hash_chain) {
if (p2==p) {
/* found it */
goto next;
}
}
fprintf(stderr, "Something in the LRU chain is not hashed\n");
assert(0);
next:
p->verify_flag = 1;
}
}
// Now make sure everything in the hash chains has the verify_flag set to 1.
{
int i;
for (i=0; i<t->table_size; i++) {
PAIR p;
for (p=t->table[i]; p; p=p->hash_chain) {
assert(p->verify_flag);
}
}
}
}
static void assert_cachefile_is_flushed_and_removed (CACHEFILE cf) {
CACHETABLE t = cf->cachetable;
int i;
// Check it two ways
// First way: Look through all the hash chains
for (i=0; i<t->table_size; i++) {
PAIR p;
for (p=t->table[i]; p; p=p->hash_chain) {
assert(p->cachefile!=cf);
}
}
// Second way: Look through the LRU list.
{
PAIR p;
for (p=t->head; p; p=p->next) {
assert(p->cachefile!=cf);
}
}
}
static int cachefile_flush_and_remove (CACHEFILE cf) {
int i;
CACHETABLE t = cf->cachetable;
for (i=0; i<t->table_size; i++) {
PAIR p;
again:
p = t->table[i];
while (p) {
if (p->cachefile==cf) {
flush_and_remove(t, p, 1); // Must be careful, since flush_and_remove kills the linked list.
goto again;
} else {
p=p->hash_chain;
}
}
}
assert_cachefile_is_flushed_and_removed(cf);
if (4 * t->n_in_table < t->table_size)
cachetable_rehash(t, -1);
return 0;
}
/* Require that it all be flushed. */
int toku_cachetable_close (CACHETABLE *tp) {
CACHETABLE t=*tp;
int i;
int r;
if ((r=cachetable_flush(t))) return r;
for (i=0; i<t->table_size; i++) {
if (t->table[i]) return -1;
}
toku_free(t->table);
toku_free(t);
*tp = 0;
return 0;
}
int toku_cachetable_remove (CACHEFILE cachefile, CACHEKEY key, int write_me) {
/* Removing something already present is OK. */
CACHETABLE t = cachefile->cachetable;
PAIR p;
for (p=t->table[hashit(t,key)]; p; p=p->hash_chain) {
if (p->key==key && p->cachefile==cachefile) {
flush_and_remove(t, p, write_me);
if (4 * t->n_in_table < t->table_size)
cachetable_rehash(t, -1);
goto done;
}
}
done:
return 0;
}
#if 0
static void flush_and_keep (PAIR flush_me) {
if (flush_me->dirty) {
WHEN_TRACE_CT(printf("%s:%d CT flush_callback(%lld, %p, dirty=1, 0)\n", __FILE__, __LINE__, flush_me->key, flush_me->value));
flush_me->flush_callback(flush_me->cachefile, flush_me->key, flush_me->value, flush_me->size, 1, 1);
flush_me->dirty=0;
}
}
static int cachetable_fsync_pairs (CACHETABLE t, PAIR p) {
if (p) {
int r = cachetable_fsync_pairs(t, p->hash_chain);
if (r!=0) return r;
flush_and_keep(p);
}
return 0;
}
int cachetable_fsync (CACHETABLE t) {
int i;
int r;
for (i=0; i<t->table_size; i++) {
r=cachetable_fsync_pairs(t, t->table[i]);
if (r!=0) return r;
}
return 0;
}
#endif
#if 0
int cachefile_pwrite (CACHEFILE cf, const void *buf, size_t count, off_t offset) {
ssize_t r = pwrite(cf->fd, buf, count, offset);
if (r==-1) return errno;
assert((size_t)r==count);
return 0;
}
int cachefile_pread (CACHEFILE cf, void *buf, size_t count, off_t offset) {
ssize_t r = pread(cf->fd, buf, count, offset);
if (r==-1) return errno;
if (r==0) return -1; /* No error for EOF ??? */
assert((size_t)r==count);
return 0;
}
#endif
int toku_cachefile_fd (CACHEFILE cf) {
return cf->fd;
}
/* debug functions */
void toku_cachetable_print_state (CACHETABLE ct) {
int i;
for (i=0; i<ct->table_size; i++) {
PAIR p = ct->table[i];
if (p != 0) {
printf("t[%d]=", i);
for (p=ct->table[i]; p; p=p->hash_chain) {
printf(" {%lld, %p, dirty=%d, pin=%lld, size=%ld}", p->key, p->cachefile, p->dirty, p->pinned, p->size);
}
printf("\n");
}
}
}
void toku_cachetable_get_state (CACHETABLE ct, int *num_entries_ptr, int *hash_size_ptr, long *size_current_ptr, long *size_limit_ptr) {
if (num_entries_ptr)
*num_entries_ptr = ct->n_in_table;
if (hash_size_ptr)
*hash_size_ptr = ct->table_size;
if (size_current_ptr)
*size_current_ptr = ct->size_current;
if (size_limit_ptr)
*size_limit_ptr = ct->size_limit;
}
int toku_cachetable_get_key_state (CACHETABLE ct, CACHEKEY key, void **value_ptr,
int *dirty_ptr, long long *pin_ptr, long *size_ptr) {
PAIR p;
for (p = ct->table[hashit(ct, key)]; p; p = p->hash_chain) {
if (p->key == key) {
if (value_ptr)
*value_ptr = p->value;
if (dirty_ptr)
*dirty_ptr = p->dirty;
if (pin_ptr)
*pin_ptr = p->pinned;
if (size_ptr)
*size_ptr = p->size;
return 0;
}
}
return 1;
}
int toku_cachetable_checkpoint (CACHETABLE ct) {
// Single threaded checkpoint.
// In future: for multithreaded checkpoint we should not proceed if the previous checkpoint has not finished.
// Requires: Everything is unpinned. (In the multithreaded version we have to wait for things to get unpinned and then
// grab them (or else the unpinner has to do something.)
// Algorithm: Write a checkpoint record to the log, noting the LSN of that record.
// Note the LSN of the previous checkpoint (stored in lsn_of_checkpoint)
// For every (unpinnned) dirty node in which the LSN is newer than the prev checkpoint LSN:
// flush the node (giving it a new nodeid, and fixing up the downpointer in the parent)
// Watch out since evicting the node modifies the hash table.
//?? This is a skeleton. It compiles, but doesn't do anything reasonable yet.
//?? log_the_checkpoint();
int n_saved=0;
int n_in_table = ct->n_in_table;
struct save_something {
CACHEFILE cf;
DISKOFF key;
void *value;
long size;
LSN modified_lsn;
CACHETABLE_FLUSH_FUNC_T flush_callback;
} *MALLOC_N(n_in_table, info);
{
PAIR pair;
for (pair=ct->head; pair; pair=pair->next) {
assert(!pair->pinned);
if (pair->dirty && pair->modified_lsn.lsn>ct->lsn_of_checkpoint.lsn) {
//?? /save_something_about_the_pair(); // This read-only so it doesn't modify the table.
n_saved++;
}
}
}
{
int i;
for (i=0; i<n_saved; i++) {
info[i].flush_callback(info[i].cf, info[i].key, info[i].value, info[i].size, 1, 1, info[i].modified_lsn, 0);
}
}
toku_free(info);
return 0;
}
TOKULOGGER toku_cachefile_logger (CACHEFILE cf) {
return cf->cachetable->logger;
}
FILENUM toku_cachefile_filenum (CACHEFILE cf) {
return cf->filenum;
}