Fixes #6020. Detect huge pages. The fractal tree layer notices when you create a logger (maybe there are other useful times to detect, but that's enough for now). The ydb layer notices when you all {{{db_env->open()}}}. The mysql handlerton notices the error in {{{tokudb_init_func()}}}.

git-svn-id: file:///svn/toku/tokudb@54498 c7de825b-a66e-492c-adef-691d508d4ae1
This commit is contained in:
Bradley C. Kuszmaul 2013-04-17 00:01:31 -04:00 committed by Yoni Fogel
parent 930aacc602
commit ca3af4843e
7 changed files with 125 additions and 1 deletions

View file

@ -120,6 +120,7 @@ enum {
TOKUDB_NEEDS_REPAIR = -100013,
TOKUDB_CURSOR_CONTINUE = -100014,
TOKUDB_BAD_CHECKSUM = -100015,
TOKUDB_HUGE_PAGES_ENABLED = -100016,
DONTUSE_I_JUST_PUT_THIS_HERE_SO_I_COULD_HAVE_A_COMMA_AFTER_EACH_ITEM
};
@ -270,6 +271,7 @@ static void print_defines (void) {
dodefine(TOKUDB_NEEDS_REPAIR);
dodefine(TOKUDB_CURSOR_CONTINUE);
dodefine(TOKUDB_BAD_CHECKSUM);
dodefine(TOKUDB_HUGE_PAGES_ENABLED);
/* LOADER flags */
printf("/* LOADER flags */\n");

View file

@ -177,6 +177,7 @@ static void benchmark_setup (void) {
}
r = db_env_create(&dbenv, 0);
if (r!=0) fprintf(stderr, "Error on db_env_create: %s\n", db_strerror(r));
assert(r == 0);
#if DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR <= 4

View file

@ -13,6 +13,7 @@
#include "log-internal.h"
#include "txn_manager.h"
#include "rollback_log_node_cache.h"
#include "huge_page_detection.h"
#include <util/partitioned_counter.h>
static const int log_format_version=TOKU_LOG_VERSION;
@ -78,6 +79,11 @@ static bool is_a_logfile (const char *name, long long *number_result) {
// TODO: can't fail
int toku_logger_create (TOKULOGGER *resultp) {
if (complain_and_return_true_if_huge_pages_are_enabled()) {
*resultp = NULL;
errno = TOKUDB_HUGE_PAGES_ENABLED;
return TOKUDB_HUGE_PAGES_ENABLED;
}
TOKULOGGER CALLOC(result);
if (result==0) return get_error_errno();
result->is_open=false;

View file

@ -1,6 +1,7 @@
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
set(tokuportability_srcs
huge_page_detection
file
memory
os_malloc

View file

@ -0,0 +1,109 @@
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
#ident "$Id: memory.cc 52238 2013-01-18 20:21:22Z zardosht $"
#ident "Copyright (c) 2007-2012 Tokutek Inc. All rights reserved."
#include <sys/mman.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <toku_assert.h>
#include "huge_page_detection.h"
extern "C" {
static bool check_huge_pages_config_file(const char *fname)
// Effect: Return true if huge pages are there. If so, print diagnostics.
{
FILE *f=fopen(fname, "r");
if (f) {
// It's redhat and the feature appears to be there. Is it enabled?
char buf[1000];
char *r = fgets(buf, sizeof(buf), f);
assert(r!=NULL);
if (strstr(buf, "[always]")) {
fprintf(stderr, "Transparent huge pages are enabled, according to %s\n", fname);
return true;
} else {
return false;
}
}
return false;
}
/* struct mapinfo { */
/* void *addr; */
/* size_t size; */
/* }; */
/* static void* map_it(size_t size, struct mapinfo *mi, int *n_maps) { */
/* void *r = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); */
/* if ((long)r==-1) perror("mmap failed"); */
/* mi[*n_maps].addr = r; */
/* mi[*n_maps].size = size; */
/* (*n_maps)++; */
/* return r; */
/* } */
static bool check_huge_pages_in_practice(void)
// Effect: Return true if huge pages appear to be defined in practice.
{
const size_t TWO_MB = 2UL*1024UL*1024UL;
void *first = mmap(NULL, 2*TWO_MB, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
if ((long)first==-1) perror("mmap failed");
{
int r = munmap(first, 2*TWO_MB);
assert(r==0);
}
void *second_addr = (void*)(((unsigned long)first + TWO_MB) & ~(TWO_MB -1));
void *second = mmap(second_addr, TWO_MB, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
if ((long)second==-1) perror("mmap failed");
assert((long)second%TWO_MB == 0);
const long pagesize = 4096;
const long n_pages = TWO_MB/pagesize;
unsigned char vec[n_pages];
{
int r = mincore(second, TWO_MB, vec);
assert(r==0);
}
for (long i=0; i<n_pages; i++) {
assert(!vec[i]);
}
((char*)second)[0] = 1;
{
int r = mincore(second, TWO_MB, vec);
assert(r==0);
}
assert(vec[0]);
{
int r = munmap(second, TWO_MB);
assert(r==0);
}
if (vec[1]) {
fprintf(stderr, "Transparent huge pages appear to be enabled according to mincore()\n");
return true;
} else {
return false;
}
}
bool complain_and_return_true_if_huge_pages_are_enabled(void)
// Effect: Return true if huge pages appear to be enabled. If so, print some diagnostics to stderr.
// If environment variable TOKU_HUGE_PAGES_OK is set, then don't complain.
{
char *toku_huge_pages_ok = getenv("TOKU_HUGE_PAGES_OK");
if (toku_huge_pages_ok) {
return false;
} else {
bool conf1 = check_huge_pages_config_file("/sys/kernel/mm/redhat_transparent_hugepage/enabled");
bool conf2 = check_huge_pages_config_file("/sys/kernel/mm/transparent_hugepage/enabled");
bool prac = check_huge_pages_in_practice();
return conf1|conf2|prac;
}
}
}

View file

@ -0,0 +1,3 @@
extern "C" bool complain_and_return_true_if_huge_pages_are_enabled(void);
// Effect: Return true if huge pages appear to be enabled. If so, print some diagnostics to stderr.
// If environment variable TOKU_HUGE_PAGES_OK is set, then don't complain.

View file

@ -2275,7 +2275,7 @@ toku_env_create(DB_ENV ** envp, uint32_t flags) {
result->i->bt_compare = toku_builtin_compare_fun;
r = toku_logger_create(&result->i->logger);
assert_zero(r);
if (r!=0) goto cleanup; // In particular, logger_create can return the huge page error.
assert(result->i->logger);
// Create the locktree manager, passing in the create/destroy/escalate callbacks.
@ -2659,6 +2659,8 @@ db_strerror(int error) {
return "User cancelled operation";
case TOKUDB_NO_DATA:
return "Ran out of data (not EOF)";
case TOKUDB_HUGE_PAGES_ENABLED:
return "Transparent huge pages are enabled but TokuDB's memory allocator will oversubscribe main memory with transparent huge pages. This check can be disabled by setting the environment variable TOKU_HUGE_PAGES_OK.";
}
static char unknown_result[100]; // Race condition if two threads call this at the same time. However even in a bad case, it should be some sort of null-terminated string.