mirror of
https://github.com/MariaDB/server.git
synced 2025-04-23 23:50:07 +02:00
607 lines
25 KiB
C++
607 lines
25 KiB
C++
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
|
#ident "$Id$"
|
|
/*======
|
|
This file is part of PerconaFT.
|
|
|
|
|
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
|
|
|
PerconaFT is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License, version 2,
|
|
as published by the Free Software Foundation.
|
|
|
|
PerconaFT is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
----------------------------------------
|
|
|
|
PerconaFT is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU Affero General Public License, version 3,
|
|
as published by the Free Software Foundation.
|
|
|
|
PerconaFT is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU Affero General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Affero General Public License
|
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
|
======= */
|
|
|
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
|
|
|
#pragma once
|
|
|
|
#include "cachetable/background_job_manager.h"
|
|
#include <portability/toku_random.h>
|
|
#include <util/frwlock.h>
|
|
#include <util/kibbutz.h>
|
|
#include <util/nb_mutex.h>
|
|
#include <util/partitioned_counter.h>
|
|
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// This file contains the classes and structs that make up the cachetable.
|
|
// The structs are:
|
|
// - cachefile
|
|
// - ctpair
|
|
// - pair_list
|
|
// - cachefile_list
|
|
// - checkpointer
|
|
// - evictor
|
|
// - cleaner
|
|
//
|
|
// The rest of this comment assumes familiarity with the locks used in these
|
|
// classes/structs and what the locks protect. Nevertheless, here is
|
|
// a list of the locks that we have:
|
|
// - pair_list->list_lock
|
|
// - pair_list->pending_lock_expensive
|
|
// - pair_list->pending_lock_cheap
|
|
// - cachefile_list->lock
|
|
// - PAIR->mutex
|
|
// - PAIR->value_rwlock
|
|
// - PAIR->disk_nb_mutex
|
|
//
|
|
// Here are rules for how the locks interact:
|
|
// - To grab any of the pair_list's locks, or the cachefile_list's lock,
|
|
// the cachetable must be in existence
|
|
// - To grab the PAIR mutex, we must know the PAIR will not dissappear:
|
|
// - the PAIR must be pinned (value_rwlock or disk_nb_mutex is held)
|
|
// - OR, the pair_list's list lock is held
|
|
// - As a result, to get rid of a PAIR from the pair_list, we must hold
|
|
// both the pair_list's list_lock and the PAIR's mutex
|
|
// - To grab PAIR->value_rwlock, we must hold the PAIR's mutex
|
|
// - To grab PAIR->disk_nb_mutex, we must hold the PAIR's mutex
|
|
// and hold PAIR->value_rwlock
|
|
//
|
|
// Now let's talk about ordering. Here is an order from outer to inner (top locks must be grabbed first)
|
|
// - pair_list->pending_lock_expensive
|
|
// - pair_list->list_lock
|
|
// - cachefile_list->lock
|
|
// - PAIR->mutex
|
|
// - pair_list->pending_lock_cheap <-- after grabbing this lock,
|
|
// NO other locks
|
|
// should be grabbed.
|
|
// - when grabbing PAIR->value_rwlock or PAIR->disk_nb_mutex,
|
|
// if the acquisition will not block, then it does not matter if any other locks held,
|
|
// BUT if the acquisition will block, then NO other locks may be held besides
|
|
// PAIR->mutex.
|
|
//
|
|
// HERE ARE TWO EXAMPLES:
|
|
// To pin a PAIR on a client thread, the following must be done:
|
|
// - first grab the list lock and find the PAIR
|
|
// - with the list lock grabbed, grab PAIR->mutex
|
|
// - with PAIR->mutex held:
|
|
// - release list lock
|
|
// - pin PAIR
|
|
// - with PAIR pinned, grab pending_lock_cheap,
|
|
// - copy and clear PAIR->checkpoint_pending,
|
|
// - resolve checkpointing if necessary
|
|
// - return to user.
|
|
// The list lock may be held while pinning the PAIR if
|
|
// the PAIR has no contention. Otherwise, we may have
|
|
// get a deadlock with another thread that has the PAIR pinned,
|
|
// tries to pin some other PAIR, and in doing so, grabs the list lock.
|
|
//
|
|
// To unpin a PAIR on a client thread:
|
|
// - because the PAIR is pinned, we don't need the pair_list's list_lock
|
|
// - so, simply acquire PAIR->mutex
|
|
// - unpin the PAIR
|
|
// - return
|
|
//
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
class evictor;
|
|
class pair_list;
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Maps to a file on disk.
|
|
//
|
|
struct cachefile {
|
|
// these next two fields are protected by cachetable's list lock
|
|
// they are managed whenever we add or remove a pair from
|
|
// the cachetable. As of Riddler, this linked list is only used to
|
|
// make cachetable_flush_cachefile more efficient
|
|
PAIR cf_head; // doubly linked list that is NOT circular
|
|
uint32_t num_pairs; // count on number of pairs in the cachetable belong to this cachefile
|
|
|
|
bool for_checkpoint; //True if part of the in-progress checkpoint
|
|
|
|
// If set and the cachefile closes, the file will be removed.
|
|
// Clients must not operate on the cachefile after setting this,
|
|
// nor attempt to open any cachefile with the same fname (dname)
|
|
// until this cachefile has been fully closed and unlinked.
|
|
bool unlink_on_close;
|
|
// If set then fclose will not be logged in recovery log.
|
|
bool skip_log_recover_on_close;
|
|
int fd; /* Bug: If a file is opened read-only, then it is stuck in read-only. If it is opened read-write, then subsequent writers can write to it too. */
|
|
CACHETABLE cachetable;
|
|
struct fileid fileid;
|
|
// the filenum is used as an identifer of the cachefile
|
|
// for logging and recovery
|
|
FILENUM filenum;
|
|
// number used to generate hashes for blocks in the cachefile
|
|
// used in toku_cachetable_hash
|
|
// this used to be the filenum.fileid, but now it is separate
|
|
uint32_t hash_id;
|
|
char *fname_in_env; /* Used for logging */
|
|
|
|
void *userdata;
|
|
void (*log_fassociate_during_checkpoint)(CACHEFILE cf, void *userdata); // When starting a checkpoint we must log all open files.
|
|
void (*close_userdata)(CACHEFILE cf, int fd, void *userdata, bool lsnvalid, LSN); // when closing the last reference to a cachefile, first call this function.
|
|
void (*free_userdata)(CACHEFILE cf, void *userdata); // when closing the last reference to a cachefile, first call this function.
|
|
void (*begin_checkpoint_userdata)(LSN lsn_of_checkpoint, void *userdata); // before checkpointing cachefiles call this function.
|
|
void (*checkpoint_userdata)(CACHEFILE cf, int fd, void *userdata); // when checkpointing a cachefile, call this function.
|
|
void (*end_checkpoint_userdata)(CACHEFILE cf, int fd, void *userdata); // after checkpointing cachefiles call this function.
|
|
void (*note_pin_by_checkpoint)(CACHEFILE cf, void *userdata); // add a reference to the userdata to prevent it from being removed from memory
|
|
void (*note_unpin_by_checkpoint)(CACHEFILE cf, void *userdata); // add a reference to the userdata to prevent it from being removed from memory
|
|
BACKGROUND_JOB_MANAGER bjm;
|
|
};
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// The pair represents the data stored in the cachetable.
|
|
//
|
|
struct ctpair {
|
|
// these fields are essentially constants. They do not change.
|
|
CACHEFILE cachefile;
|
|
CACHEKEY key;
|
|
uint32_t fullhash;
|
|
CACHETABLE_FLUSH_CALLBACK flush_callback;
|
|
CACHETABLE_PARTIAL_EVICTION_EST_CALLBACK pe_est_callback;
|
|
CACHETABLE_PARTIAL_EVICTION_CALLBACK pe_callback;
|
|
CACHETABLE_CLEANER_CALLBACK cleaner_callback;
|
|
CACHETABLE_CLONE_CALLBACK clone_callback;
|
|
CACHETABLE_CHECKPOINT_COMPLETE_CALLBACK checkpoint_complete_callback;
|
|
void *write_extraargs;
|
|
|
|
// access to these fields are protected by disk_nb_mutex
|
|
void* cloned_value_data; // cloned copy of value_data used for checkpointing
|
|
long cloned_value_size; // size of cloned_value_data, used for accounting of size_current
|
|
void* disk_data; // data used to fetch/flush value_data to and from disk.
|
|
|
|
// access to these fields are protected by value_rwlock
|
|
void* value_data; // data used by client threads, FTNODEs and ROLLBACK_LOG_NODEs
|
|
PAIR_ATTR attr;
|
|
enum cachetable_dirty dirty;
|
|
|
|
// protected by PAIR->mutex
|
|
uint32_t count; // clock count
|
|
uint32_t refcount; // if > 0, then this PAIR is referenced by
|
|
// callers to the cachetable, and therefore cannot
|
|
// be evicted
|
|
uint32_t num_waiting_on_refs; // number of threads waiting on refcount to go to zero
|
|
toku_cond_t refcount_wait; // cond used to wait for refcount to go to zero
|
|
|
|
// locks
|
|
toku::frwlock value_rwlock;
|
|
struct nb_mutex disk_nb_mutex; // single writer, protects disk_data, is used for writing cloned nodes for checkpoint
|
|
toku_mutex_t* mutex; // gotten from the pair list
|
|
|
|
// Access to checkpoint_pending is protected by two mechanisms,
|
|
// the value_rwlock and the pair_list's pending locks (expensive and cheap).
|
|
// checkpoint_pending may be true of false.
|
|
// Here are the rules for reading/modifying this bit.
|
|
// - To transition this field from false to true during begin_checkpoint,
|
|
// we must be holding both of the pair_list's pending locks.
|
|
// - To transition this field from true to false during end_checkpoint,
|
|
// we must be holding the value_rwlock.
|
|
// - For a non-checkpoint thread to read the value, we must hold both the
|
|
// value_rwlock and one of the pair_list's pending locks
|
|
// - For the checkpoint thread to read the value, we must
|
|
// hold the value_rwlock
|
|
//
|
|
bool checkpoint_pending; // If this is on, then we have got to resolve checkpointing modifying it.
|
|
|
|
// these are variables that are only used to transfer information to background threads
|
|
// we cache them here to avoid a malloc. In the future, we should investigate if this
|
|
// is necessary, as having these fields here is not technically necessary
|
|
long size_evicting_estimate;
|
|
evictor* ev;
|
|
pair_list* list;
|
|
|
|
// A PAIR is stored in a pair_list (which happens to be PAIR->list).
|
|
// These variables are protected by the list lock in the pair_list
|
|
//
|
|
// clock_next,clock_prev represent a circular doubly-linked list.
|
|
PAIR clock_next,clock_prev; // In clock.
|
|
PAIR hash_chain;
|
|
|
|
// pending_next,pending_next represent a non-circular doubly-linked list.
|
|
PAIR pending_next;
|
|
PAIR pending_prev;
|
|
|
|
// cf_next, cf_prev represent a non-circular doubly-linked list.
|
|
// entries in linked list for PAIRs in a cachefile, these are protected
|
|
// by the list lock of the PAIR's pair_list. They are used to make
|
|
// cachetable_flush_cachefile cheaper so that we don't need
|
|
// to search the entire cachetable to find a particular cachefile's
|
|
// PAIRs
|
|
PAIR cf_next;
|
|
PAIR cf_prev;
|
|
};
|
|
|
|
//
|
|
// This initializes the fields and members of the pair.
|
|
//
|
|
void pair_init(PAIR p,
|
|
CACHEFILE cachefile,
|
|
CACHEKEY key,
|
|
void *value,
|
|
PAIR_ATTR attr,
|
|
enum cachetable_dirty dirty,
|
|
uint32_t fullhash,
|
|
CACHETABLE_WRITE_CALLBACK write_callback,
|
|
evictor *ev,
|
|
pair_list *list);
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// The pair list maintains the set of PAIR's that make up
|
|
// the cachetable.
|
|
//
|
|
class pair_list {
|
|
public:
|
|
//
|
|
// the following fields are protected by the list lock
|
|
//
|
|
uint32_t m_n_in_table; // number of pairs in the hash table
|
|
uint32_t m_table_size; // number of buckets in the hash table
|
|
uint32_t m_num_locks;
|
|
PAIR *m_table; // hash table
|
|
toku_mutex_aligned_t *m_mutexes;
|
|
//
|
|
// The following fields are the heads of various linked lists.
|
|
// They also protected by the list lock, but their
|
|
// usage is not as straightforward. For each of them,
|
|
// only ONE thread is allowed iterate over them with
|
|
// a read lock on the list lock. All other threads
|
|
// that want to modify elements in the lists or iterate over
|
|
// the lists must hold the write list lock. Here is the
|
|
// association between what threads may hold a read lock
|
|
// on the list lock while iterating:
|
|
// - clock_head -> eviction thread (evictor)
|
|
// - cleaner_head -> cleaner thread (cleaner)
|
|
// - pending_head -> checkpoint thread (checkpointer)
|
|
//
|
|
PAIR m_clock_head; // of clock . head is the next thing to be up for decrement.
|
|
PAIR m_cleaner_head; // for cleaner thread. head is the next thing to look at for possible cleaning.
|
|
PAIR m_checkpoint_head; // for begin checkpoint to iterate over PAIRs and mark as pending_checkpoint
|
|
PAIR m_pending_head; // list of pairs marked with checkpoint_pending
|
|
|
|
// this field is public so we are still POD
|
|
|
|
// usage of this lock is described above
|
|
toku_pthread_rwlock_t m_list_lock;
|
|
//
|
|
// these locks are the "pending locks" referenced
|
|
// in comments about PAIR->checkpoint_pending. There
|
|
// are two of them, but both serve the same purpose, which
|
|
// is to protect the transition of a PAIR's checkpoint pending
|
|
// value from false to true during begin_checkpoint.
|
|
// We use two locks, because threads that want to read the
|
|
// checkpoint_pending value may hold a lock for varying periods of time.
|
|
// Threads running eviction may need to protect checkpoint_pending
|
|
// while writing a node to disk, which is an expensive operation,
|
|
// so it uses pending_lock_expensive. Client threads that
|
|
// want to pin PAIRs will want to protect checkpoint_pending
|
|
// just long enough to read the value and wipe it out. This is
|
|
// a cheap operation, and as a result, uses pending_lock_cheap.
|
|
//
|
|
// By having two locks, and making begin_checkpoint first
|
|
// grab pending_lock_expensive and then pending_lock_cheap,
|
|
// we ensure that threads that want to pin nodes can grab
|
|
// only pending_lock_cheap, and never block behind threads
|
|
// holding pending_lock_expensive and writing a node out to disk
|
|
//
|
|
toku_pthread_rwlock_t m_pending_lock_expensive;
|
|
toku_pthread_rwlock_t m_pending_lock_cheap;
|
|
void init();
|
|
void destroy();
|
|
void evict_completely(PAIR pair);
|
|
void evict_from_cachetable(PAIR pair);
|
|
void evict_from_cachefile(PAIR pair);
|
|
void add_to_cachetable_only(PAIR p);
|
|
void put(PAIR pair);
|
|
PAIR find_pair(CACHEFILE file, CACHEKEY key, uint32_t hash);
|
|
void pending_pairs_remove (PAIR p);
|
|
void verify();
|
|
void get_state(int *num_entries, int *hash_size);
|
|
void read_list_lock();
|
|
void read_list_unlock();
|
|
void write_list_lock();
|
|
void write_list_unlock();
|
|
void read_pending_exp_lock();
|
|
void read_pending_exp_unlock();
|
|
void write_pending_exp_lock();
|
|
void write_pending_exp_unlock();
|
|
void read_pending_cheap_lock();
|
|
void read_pending_cheap_unlock();
|
|
void write_pending_cheap_lock();
|
|
void write_pending_cheap_unlock();
|
|
toku_mutex_t* get_mutex_for_pair(uint32_t fullhash);
|
|
void pair_lock_by_fullhash(uint32_t fullhash);
|
|
void pair_unlock_by_fullhash(uint32_t fullhash);
|
|
|
|
private:
|
|
void pair_remove (PAIR p);
|
|
void remove_from_hash_chain(PAIR p);
|
|
void add_to_cf_list (PAIR p);
|
|
void add_to_clock (PAIR p);
|
|
void add_to_hash_chain(PAIR p);
|
|
};
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Wrapper for the head of our cachefile list.
|
|
//
|
|
class cachefile_list {
|
|
public:
|
|
void init();
|
|
void destroy();
|
|
void read_lock();
|
|
void read_unlock();
|
|
void write_lock();
|
|
void write_unlock();
|
|
int cachefile_of_iname_in_env(const char *iname_in_env, CACHEFILE *cf);
|
|
int cachefile_of_filenum(FILENUM filenum, CACHEFILE *cf);
|
|
void add_cf_unlocked(CACHEFILE newcf);
|
|
void add_stale_cf(CACHEFILE newcf);
|
|
void remove_cf(CACHEFILE cf);
|
|
void remove_stale_cf_unlocked(CACHEFILE cf);
|
|
FILENUM reserve_filenum();
|
|
uint32_t get_new_hash_id_unlocked();
|
|
CACHEFILE find_cachefile_unlocked(struct fileid* fileid);
|
|
CACHEFILE find_stale_cachefile_unlocked(struct fileid* fileid);
|
|
void verify_unused_filenum(FILENUM filenum);
|
|
bool evict_some_stale_pair(evictor* ev);
|
|
void free_stale_data(evictor* ev);
|
|
// access to these fields are protected by the lock
|
|
FILENUM m_next_filenum_to_use;
|
|
uint32_t m_next_hash_id_to_use;
|
|
toku_pthread_rwlock_t m_lock; // this field is publoc so we are still POD
|
|
toku::omt<CACHEFILE> m_active_filenum;
|
|
toku::omt<CACHEFILE> m_active_fileid;
|
|
toku::omt<CACHEFILE> m_stale_fileid;
|
|
private:
|
|
CACHEFILE find_cachefile_in_list_unlocked(CACHEFILE start, struct fileid* fileid);
|
|
};
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// The checkpointer handles starting and finishing checkpoints of the
|
|
// cachetable's data.
|
|
//
|
|
class checkpointer {
|
|
public:
|
|
int init(pair_list *_pl, TOKULOGGER _logger, evictor *_ev, cachefile_list *files);
|
|
void destroy();
|
|
void set_checkpoint_period(uint32_t new_period);
|
|
uint32_t get_checkpoint_period();
|
|
int shutdown();
|
|
bool has_been_shutdown();
|
|
void begin_checkpoint();
|
|
void add_background_job();
|
|
void remove_background_job();
|
|
void end_checkpoint(void (*testcallback_f)(void*), void* testextra);
|
|
TOKULOGGER get_logger();
|
|
// used during begin_checkpoint
|
|
void increment_num_txns();
|
|
private:
|
|
uint32_t m_checkpoint_num_txns; // how many transactions are in the checkpoint
|
|
TOKULOGGER m_logger;
|
|
LSN m_lsn_of_checkpoint_in_progress;
|
|
uint32_t m_checkpoint_num_files; // how many cachefiles are in the checkpoint
|
|
struct minicron m_checkpointer_cron; // the periodic checkpointing thread
|
|
cachefile_list *m_cf_list;
|
|
pair_list *m_list;
|
|
evictor *m_ev;
|
|
bool m_checkpointer_cron_init;
|
|
bool m_checkpointer_init;
|
|
|
|
// variable used by the checkpoint thread to know
|
|
// when all work induced by cloning on client threads is done
|
|
BACKGROUND_JOB_MANAGER m_checkpoint_clones_bjm;
|
|
// private methods for begin_checkpoint
|
|
void update_cachefiles();
|
|
void log_begin_checkpoint();
|
|
void turn_on_pending_bits();
|
|
// private methods for end_checkpoint
|
|
void fill_checkpoint_cfs(CACHEFILE* checkpoint_cfs);
|
|
void checkpoint_pending_pairs();
|
|
void checkpoint_userdata(CACHEFILE* checkpoint_cfs);
|
|
void log_end_checkpoint();
|
|
void end_checkpoint_userdata(CACHEFILE* checkpoint_cfs);
|
|
void remove_cachefiles(CACHEFILE* checkpoint_cfs);
|
|
|
|
// Unit test struct needs access to private members.
|
|
friend struct checkpointer_test;
|
|
};
|
|
|
|
//
|
|
// This is how often we want the eviction thread
|
|
// to run, in seconds.
|
|
//
|
|
const int EVICTION_PERIOD = 1;
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// The evictor handles the removal of pairs from the pair list/cachetable.
|
|
//
|
|
class evictor {
|
|
public:
|
|
int init(long _size_limit, pair_list* _pl, cachefile_list* _cf_list, KIBBUTZ _kibbutz, uint32_t eviction_period);
|
|
void destroy();
|
|
void add_pair_attr(PAIR_ATTR attr);
|
|
void remove_pair_attr(PAIR_ATTR attr);
|
|
void change_pair_attr(PAIR_ATTR old_attr, PAIR_ATTR new_attr);
|
|
void add_cloned_data_size(long size);
|
|
void remove_cloned_data_size(long size);
|
|
uint64_t reserve_memory(double fraction, uint64_t upper_bound);
|
|
void release_reserved_memory(uint64_t reserved_memory);
|
|
void run_eviction_thread();
|
|
void do_partial_eviction(PAIR p);
|
|
void evict_pair(PAIR p, bool checkpoint_pending);
|
|
void wait_for_cache_pressure_to_subside();
|
|
void signal_eviction_thread();
|
|
void signal_eviction_thread_locked();
|
|
bool should_client_thread_sleep();
|
|
bool should_client_wake_eviction_thread();
|
|
// function needed for testing
|
|
void get_state(long *size_current_ptr, long *size_limit_ptr);
|
|
void fill_engine_status();
|
|
void set_enable_partial_eviction(bool enabled);
|
|
bool get_enable_partial_eviction(void) const;
|
|
private:
|
|
void add_to_size_current(long size);
|
|
void remove_from_size_current(long size);
|
|
void run_eviction();
|
|
bool run_eviction_on_pair(PAIR p);
|
|
void try_evict_pair(PAIR p);
|
|
void decrease_size_evicting(long size_evicting_estimate);
|
|
bool should_sleeping_clients_wakeup();
|
|
bool eviction_needed();
|
|
|
|
// We have some intentional races with these variables because we're ok with reading something a little bit old.
|
|
// Provide some hooks for reading variables in an unsafe way so that there are function names we can stick in a valgrind suppression.
|
|
int64_t unsafe_read_size_current(void) const;
|
|
int64_t unsafe_read_size_evicting(void) const;
|
|
|
|
pair_list* m_pl;
|
|
cachefile_list* m_cf_list;
|
|
int64_t m_size_current; // the sum of the sizes of the pairs in the cachetable
|
|
int64_t m_size_cloned_data; // stores amount of cloned data we have, only used for engine status
|
|
// changes to these two values are protected
|
|
// by ev_thread_lock
|
|
int64_t m_size_reserved; // How much memory is reserved (e.g., by the loader)
|
|
int64_t m_size_evicting; // the sum of the sizes of the pairs being written
|
|
|
|
// these are constants
|
|
int64_t m_low_size_watermark; // target max size of cachetable that eviction thread aims for
|
|
int64_t m_low_size_hysteresis; // if cachetable grows to this size, client threads wake up eviction thread upon adding data
|
|
int64_t m_high_size_watermark; // if cachetable grows to this size, client threads sleep upon adding data
|
|
int64_t m_high_size_hysteresis; // if > cachetable size, then sleeping client threads may wake up
|
|
|
|
bool m_enable_partial_eviction; // true if partial evictions are permitted
|
|
|
|
// used to calculate random numbers
|
|
struct random_data m_random_data;
|
|
char m_random_statebuf[64];
|
|
|
|
// mutex that protects fields listed immedietly below
|
|
toku_mutex_t m_ev_thread_lock;
|
|
// the eviction thread
|
|
toku_pthread_t m_ev_thread;
|
|
// condition variable that controls the sleeping period
|
|
// of the eviction thread
|
|
toku_cond_t m_ev_thread_cond;
|
|
// number of client threads that are currently sleeping
|
|
// due to an over-subscribed cachetable
|
|
uint32_t m_num_sleepers;
|
|
// states if the eviction thread should run. set to true
|
|
// in init, set to false during destroy
|
|
bool m_run_thread;
|
|
// bool that states if the eviction thread is currently running
|
|
bool m_ev_thread_is_running;
|
|
// period which the eviction thread sleeps
|
|
uint32_t m_period_in_seconds;
|
|
// condition variable on which client threads wait on when sleeping
|
|
// due to an over-subscribed cachetable
|
|
toku_cond_t m_flow_control_cond;
|
|
|
|
// variables for engine status
|
|
PARTITIONED_COUNTER m_size_nonleaf;
|
|
PARTITIONED_COUNTER m_size_leaf;
|
|
PARTITIONED_COUNTER m_size_rollback;
|
|
PARTITIONED_COUNTER m_size_cachepressure;
|
|
PARTITIONED_COUNTER m_wait_pressure_count;
|
|
PARTITIONED_COUNTER m_wait_pressure_time;
|
|
PARTITIONED_COUNTER m_long_wait_pressure_count;
|
|
PARTITIONED_COUNTER m_long_wait_pressure_time;
|
|
|
|
KIBBUTZ m_kibbutz;
|
|
|
|
// this variable is ONLY used for testing purposes
|
|
uint64_t m_num_eviction_thread_runs;
|
|
|
|
bool m_ev_thread_init;
|
|
bool m_evictor_init;
|
|
|
|
friend class evictor_test_helpers;
|
|
friend class evictor_unit_test;
|
|
};
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Iterates over the clean head in the pair list, calling the cleaner
|
|
// callback on each node in that list.
|
|
//
|
|
class cleaner {
|
|
public:
|
|
int init(uint32_t cleaner_iterations, pair_list* _pl, CACHETABLE _ct);
|
|
void destroy(void);
|
|
uint32_t get_iterations(void);
|
|
void set_iterations(uint32_t new_iterations);
|
|
uint32_t get_period_unlocked(void);
|
|
void set_period(uint32_t new_period);
|
|
int run_cleaner(void);
|
|
|
|
private:
|
|
pair_list* m_pl;
|
|
CACHETABLE m_ct;
|
|
struct minicron m_cleaner_cron; // the periodic cleaner thread
|
|
uint32_t m_cleaner_iterations; // how many times to run the cleaner per
|
|
// cleaner period (minicron has a
|
|
// minimum period of 1s so if you want
|
|
// more frequent cleaner runs you must
|
|
// use this)
|
|
bool m_cleaner_cron_init;
|
|
bool m_cleaner_init;
|
|
};
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// The cachetable is as close to an ENV as we get.
|
|
//
|
|
struct cachetable {
|
|
pair_list list;
|
|
cleaner cl;
|
|
evictor ev;
|
|
checkpointer cp;
|
|
cachefile_list cf_list;
|
|
|
|
KIBBUTZ client_kibbutz; // pool of worker threads and jobs to do asynchronously for the client.
|
|
KIBBUTZ ct_kibbutz; // pool of worker threads and jobs to do asynchronously for the cachetable
|
|
KIBBUTZ checkpointing_kibbutz; // small pool for checkpointing cloned pairs
|
|
|
|
char *env_dir;
|
|
};
|