From ca3af4843eaf5928af567d9585dae506e58a850d Mon Sep 17 00:00:00 2001 From: "Bradley C. Kuszmaul" Date: Wed, 17 Apr 2013 00:01:31 -0400 Subject: [PATCH] Fixes #6020. Detect huge pages. The fractal tree layer notices when you create a logger (maybe there are other useful times to detect, but that's enough for now). The ydb layer notices when you all {{{db_env->open()}}}. The mysql handlerton notices the error in {{{tokudb_init_func()}}}. git-svn-id: file:///svn/toku/tokudb@54498 c7de825b-a66e-492c-adef-691d508d4ae1 --- buildheader/make_tdb.cc | 2 + db-benchmark-test/db-benchmark-test.cc | 1 + ft/logger.cc | 6 ++ portability/CMakeLists.txt | 1 + portability/huge_page_detection.cc | 109 +++++++++++++++++++++++++ portability/huge_page_detection.h | 3 + src/ydb.cc | 4 +- 7 files changed, 125 insertions(+), 1 deletion(-) create mode 100644 portability/huge_page_detection.cc create mode 100644 portability/huge_page_detection.h diff --git a/buildheader/make_tdb.cc b/buildheader/make_tdb.cc index 54b0234117e..40e10c3b3e9 100644 --- a/buildheader/make_tdb.cc +++ b/buildheader/make_tdb.cc @@ -120,6 +120,7 @@ enum { TOKUDB_NEEDS_REPAIR = -100013, TOKUDB_CURSOR_CONTINUE = -100014, TOKUDB_BAD_CHECKSUM = -100015, + TOKUDB_HUGE_PAGES_ENABLED = -100016, DONTUSE_I_JUST_PUT_THIS_HERE_SO_I_COULD_HAVE_A_COMMA_AFTER_EACH_ITEM }; @@ -270,6 +271,7 @@ static void print_defines (void) { dodefine(TOKUDB_NEEDS_REPAIR); dodefine(TOKUDB_CURSOR_CONTINUE); dodefine(TOKUDB_BAD_CHECKSUM); + dodefine(TOKUDB_HUGE_PAGES_ENABLED); /* LOADER flags */ printf("/* LOADER flags */\n"); diff --git a/db-benchmark-test/db-benchmark-test.cc b/db-benchmark-test/db-benchmark-test.cc index 933a8bd9dbf..c6cc0aea1b2 100644 --- a/db-benchmark-test/db-benchmark-test.cc +++ b/db-benchmark-test/db-benchmark-test.cc @@ -177,6 +177,7 @@ static void benchmark_setup (void) { } r = db_env_create(&dbenv, 0); + if (r!=0) fprintf(stderr, "Error on db_env_create: %s\n", db_strerror(r)); assert(r == 0); #if DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR <= 4 diff --git a/ft/logger.cc b/ft/logger.cc index 714df3407e5..72c2149d40e 100644 --- a/ft/logger.cc +++ b/ft/logger.cc @@ -13,6 +13,7 @@ #include "log-internal.h" #include "txn_manager.h" #include "rollback_log_node_cache.h" +#include "huge_page_detection.h" #include static const int log_format_version=TOKU_LOG_VERSION; @@ -78,6 +79,11 @@ static bool is_a_logfile (const char *name, long long *number_result) { // TODO: can't fail int toku_logger_create (TOKULOGGER *resultp) { + if (complain_and_return_true_if_huge_pages_are_enabled()) { + *resultp = NULL; + errno = TOKUDB_HUGE_PAGES_ENABLED; + return TOKUDB_HUGE_PAGES_ENABLED; + } TOKULOGGER CALLOC(result); if (result==0) return get_error_errno(); result->is_open=false; diff --git a/portability/CMakeLists.txt b/portability/CMakeLists.txt index 7fcdf93017b..c0ae346908e 100644 --- a/portability/CMakeLists.txt +++ b/portability/CMakeLists.txt @@ -1,6 +1,7 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}) set(tokuportability_srcs + huge_page_detection file memory os_malloc diff --git a/portability/huge_page_detection.cc b/portability/huge_page_detection.cc new file mode 100644 index 00000000000..6dd3e449cb1 --- /dev/null +++ b/portability/huge_page_detection.cc @@ -0,0 +1,109 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id: memory.cc 52238 2013-01-18 20:21:22Z zardosht $" +#ident "Copyright (c) 2007-2012 Tokutek Inc. All rights reserved." + +#include +#include +#include +#include + +#include + +#include "huge_page_detection.h" + +extern "C" { + +static bool check_huge_pages_config_file(const char *fname) +// Effect: Return true if huge pages are there. If so, print diagnostics. +{ + FILE *f=fopen(fname, "r"); + if (f) { + // It's redhat and the feature appears to be there. Is it enabled? + char buf[1000]; + char *r = fgets(buf, sizeof(buf), f); + assert(r!=NULL); + if (strstr(buf, "[always]")) { + fprintf(stderr, "Transparent huge pages are enabled, according to %s\n", fname); + return true; + } else { + return false; + } + } + return false; +} + +/* struct mapinfo { */ +/* void *addr; */ +/* size_t size; */ +/* }; */ + +/* static void* map_it(size_t size, struct mapinfo *mi, int *n_maps) { */ +/* void *r = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); */ +/* if ((long)r==-1) perror("mmap failed"); */ +/* mi[*n_maps].addr = r; */ +/* mi[*n_maps].size = size; */ +/* (*n_maps)++; */ +/* return r; */ +/* } */ + +static bool check_huge_pages_in_practice(void) +// Effect: Return true if huge pages appear to be defined in practice. +{ + const size_t TWO_MB = 2UL*1024UL*1024UL; + + void *first = mmap(NULL, 2*TWO_MB, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if ((long)first==-1) perror("mmap failed"); + { + int r = munmap(first, 2*TWO_MB); + assert(r==0); + } + + void *second_addr = (void*)(((unsigned long)first + TWO_MB) & ~(TWO_MB -1)); + void *second = mmap(second_addr, TWO_MB, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if ((long)second==-1) perror("mmap failed"); + assert((long)second%TWO_MB == 0); + + const long pagesize = 4096; + const long n_pages = TWO_MB/pagesize; + unsigned char vec[n_pages]; + { + int r = mincore(second, TWO_MB, vec); + assert(r==0); + } + for (long i=0; ii->bt_compare = toku_builtin_compare_fun; r = toku_logger_create(&result->i->logger); - assert_zero(r); + if (r!=0) goto cleanup; // In particular, logger_create can return the huge page error. assert(result->i->logger); // Create the locktree manager, passing in the create/destroy/escalate callbacks. @@ -2659,6 +2659,8 @@ db_strerror(int error) { return "User cancelled operation"; case TOKUDB_NO_DATA: return "Ran out of data (not EOF)"; + case TOKUDB_HUGE_PAGES_ENABLED: + return "Transparent huge pages are enabled but TokuDB's memory allocator will oversubscribe main memory with transparent huge pages. This check can be disabled by setting the environment variable TOKU_HUGE_PAGES_OK."; } static char unknown_result[100]; // Race condition if two threads call this at the same time. However even in a bad case, it should be some sort of null-terminated string.