diff --git a/extra/CMakeLists.txt b/extra/CMakeLists.txt index f8f71b00743..cf3a35cb1dd 100644 --- a/extra/CMakeLists.txt +++ b/extra/CMakeLists.txt @@ -72,10 +72,24 @@ IF(CMAKE_SYSTEM_NAME STREQUAL "SunOS") ENDIF() ENDIF() +IF(WITH_INNOBASE_STORAGE_ENGINE) + # Add path to the InnoDB headers + INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/storage/innobase/include) + # We use the InnoDB code directly in case the code changes. + ADD_DEFINITIONS("-DUNIV_INNOCHECKSUM") + SET(INNOBASE_SOURCES + ../storage/innobase/buf/buf0checksum.cc + ../storage/innobase/ut/ut0crc32.cc + ../storage/innobase/ut/ut0ut.cc + ) + MYSQL_ADD_EXECUTABLE(innochecksum innochecksum.cc ${INNOBASE_SOURCES}) + TARGET_LINK_LIBRARIES(innochecksum mysys mysys_ssl) +ENDIF() + MYSQL_ADD_EXECUTABLE(replace replace.c COMPONENT Server) TARGET_LINK_LIBRARIES(replace mysys) + IF(UNIX) - MYSQL_ADD_EXECUTABLE(innochecksum innochecksum.c) MYSQL_ADD_EXECUTABLE(resolve_stack_dump resolve_stack_dump.c) TARGET_LINK_LIBRARIES(resolve_stack_dump mysys) diff --git a/extra/innochecksum.c b/extra/innochecksum.c deleted file mode 100644 index ed4dfc48789..00000000000 --- a/extra/innochecksum.c +++ /dev/null @@ -1,325 +0,0 @@ -/* - Copyright (c) 2005, 2011, Oracle and/or its affiliates - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; version 2 of the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -*/ - -/* - InnoDB offline file checksum utility. 85% of the code in this file - was taken wholesale fron the InnoDB codebase. - - The final 15% was originally written by Mark Smith of Danga - Interactive, Inc. - - Published with a permission. -*/ - -#include -#include -#include -#include -#include -#include -#include - -/* all of these ripped from InnoDB code from MySQL 4.0.22 */ -#define UT_HASH_RANDOM_MASK 1463735687 -#define UT_HASH_RANDOM_MASK2 1653893711 -#define FIL_PAGE_LSN 16 -#define FIL_PAGE_FILE_FLUSH_LSN 26 -#define FIL_PAGE_OFFSET 4 -#define FIL_PAGE_DATA 38 -#define FIL_PAGE_END_LSN_OLD_CHKSUM 8 -#define FIL_PAGE_SPACE_OR_CHKSUM 0 -#define UNIV_PAGE_SIZE (2 * 8192) - -/* command line argument to do page checks (that's it) */ -/* another argument to specify page ranges... seek to right spot and go from there */ - -typedef unsigned long int ulint; - -/* innodb function in name; modified slightly to not have the ASM version (lots of #ifs that didn't apply) */ -ulint mach_read_from_4(uchar *b) -{ - return( ((ulint)(b[0]) << 24) - + ((ulint)(b[1]) << 16) - + ((ulint)(b[2]) << 8) - + (ulint)(b[3]) - ); -} - -ulint -ut_fold_ulint_pair( -/*===============*/ - /* out: folded value */ - ulint n1, /* in: ulint */ - ulint n2) /* in: ulint */ -{ - return(((((n1 ^ n2 ^ UT_HASH_RANDOM_MASK2) << 8) + n1) - ^ UT_HASH_RANDOM_MASK) + n2); -} - -ulint -ut_fold_binary( -/*===========*/ - /* out: folded value */ - uchar* str, /* in: string of bytes */ - ulint len) /* in: length */ -{ - ulint i; - ulint fold= 0; - - for (i= 0; i < len; i++) - { - fold= ut_fold_ulint_pair(fold, (ulint)(*str)); - - str++; - } - - return(fold); -} - -ulint -buf_calc_page_new_checksum( -/*=======================*/ - /* out: checksum */ - uchar* page) /* in: buffer page */ -{ - ulint checksum; - - /* Since the fields FIL_PAGE_FILE_FLUSH_LSN and ..._ARCH_LOG_NO - are written outside the buffer pool to the first pages of data - files, we have to skip them in the page checksum calculation. - We must also skip the field FIL_PAGE_SPACE_OR_CHKSUM where the - checksum is stored, and also the last 8 bytes of page because - there we store the old formula checksum. */ - - checksum= ut_fold_binary(page + FIL_PAGE_OFFSET, - FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET) - + ut_fold_binary(page + FIL_PAGE_DATA, - UNIV_PAGE_SIZE - FIL_PAGE_DATA - - FIL_PAGE_END_LSN_OLD_CHKSUM); - checksum= checksum & 0xFFFFFFFF; - - return(checksum); -} - -ulint -buf_calc_page_old_checksum( -/*=======================*/ - /* out: checksum */ - uchar* page) /* in: buffer page */ -{ - ulint checksum; - - checksum= ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN); - - checksum= checksum & 0xFFFFFFFF; - - return(checksum); -} - - -int main(int argc, char **argv) -{ - FILE *f; /* our input file */ - uchar *p; /* storage of pages read */ - int bytes; /* bytes read count */ - ulint ct; /* current page number (0 based) */ - int now; /* current time */ - int lastt; /* last time */ - ulint oldcsum, oldcsumfield, csum, csumfield, logseq, logseqfield; /* ulints for checksum storage */ - struct stat st; /* for stat, if you couldn't guess */ - unsigned long long int size; /* size of file (has to be 64 bits) */ - ulint pages; /* number of pages in file */ - ulint start_page= 0, end_page= 0, use_end_page= 0; /* for starting and ending at certain pages */ - off_t offset= 0; - int just_count= 0; /* if true, just print page count */ - int verbose= 0; - int debug= 0; - int c; - int fd; - - /* remove arguments */ - while ((c= getopt(argc, argv, "cvds:e:p:")) != -1) - { - switch (c) - { - case 'v': - verbose= 1; - break; - case 'c': - just_count= 1; - break; - case 's': - start_page= atoi(optarg); - break; - case 'e': - end_page= atoi(optarg); - use_end_page= 1; - break; - case 'p': - start_page= atoi(optarg); - end_page= atoi(optarg); - use_end_page= 1; - break; - case 'd': - debug= 1; - break; - case ':': - fprintf(stderr, "option -%c requires an argument\n", optopt); - return 1; - break; - case '?': - fprintf(stderr, "unrecognized option: -%c\n", optopt); - return 1; - break; - } - } - - /* debug implies verbose... */ - if (debug) verbose= 1; - - /* make sure we have the right arguments */ - if (optind >= argc) - { - printf("InnoDB offline file checksum utility.\n"); - printf("usage: %s [-c] [-s ] [-e ] [-p ] [-v] [-d] \n", argv[0]); - printf("\t-c\tprint the count of pages in the file\n"); - printf("\t-s n\tstart on this page number (0 based)\n"); - printf("\t-e n\tend at this page number (0 based)\n"); - printf("\t-p n\tcheck only this page (0 based)\n"); - printf("\t-v\tverbose (prints progress every 5 seconds)\n"); - printf("\t-d\tdebug mode (prints checksums for each page)\n"); - return 1; - } - - /* stat the file to get size and page count */ - if (stat(argv[optind], &st)) - { - perror("error statting file"); - return 1; - } - size= st.st_size; - pages= size / UNIV_PAGE_SIZE; - if (just_count) - { - printf("%lu\n", pages); - return 0; - } - else if (verbose) - { - printf("file %s = %llu bytes (%lu pages)...\n", argv[optind], size, pages); - printf("checking pages in range %lu to %lu\n", start_page, use_end_page ? end_page : (pages - 1)); - } - - /* open the file for reading */ - f= fopen(argv[optind], "r"); - if (!f) - { - perror("error opening file"); - return 1; - } - - /* seek to the necessary position */ - if (start_page) - { - fd= fileno(f); - if (!fd) - { - perror("unable to obtain file descriptor number"); - return 1; - } - - offset= (off_t)start_page * (off_t)UNIV_PAGE_SIZE; - - if (lseek(fd, offset, SEEK_SET) != offset) - { - perror("unable to seek to necessary offset"); - return 1; - } - } - - /* allocate buffer for reading (so we don't realloc every time) */ - p= (uchar *)malloc(UNIV_PAGE_SIZE); - - /* main checksumming loop */ - ct= start_page; - lastt= 0; - while (!feof(f)) - { - bytes= fread(p, 1, UNIV_PAGE_SIZE, f); - if (!bytes && feof(f)) return 0; - if (bytes != UNIV_PAGE_SIZE) - { - fprintf(stderr, "bytes read (%d) doesn't match universal page size (%d)\n", bytes, UNIV_PAGE_SIZE); - return 1; - } - - /* check the "stored log sequence numbers" */ - logseq= mach_read_from_4(p + FIL_PAGE_LSN + 4); - logseqfield= mach_read_from_4(p + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM + 4); - if (debug) - printf("page %lu: log sequence number: first = %lu; second = %lu\n", ct, logseq, logseqfield); - if (logseq != logseqfield) - { - fprintf(stderr, "page %lu invalid (fails log sequence number check)\n", ct); - return 1; - } - - /* check old method of checksumming */ - oldcsum= buf_calc_page_old_checksum(p); - oldcsumfield= mach_read_from_4(p + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM); - if (debug) - printf("page %lu: old style: calculated = %lu; recorded = %lu\n", ct, oldcsum, oldcsumfield); - if (oldcsumfield != mach_read_from_4(p + FIL_PAGE_LSN) && oldcsumfield != oldcsum) - { - fprintf(stderr, "page %lu invalid (fails old style checksum)\n", ct); - return 1; - } - - /* now check the new method */ - csum= buf_calc_page_new_checksum(p); - csumfield= mach_read_from_4(p + FIL_PAGE_SPACE_OR_CHKSUM); - if (debug) - printf("page %lu: new style: calculated = %lu; recorded = %lu\n", ct, csum, csumfield); - if (csumfield != 0 && csum != csumfield) - { - fprintf(stderr, "page %lu invalid (fails new style checksum)\n", ct); - return 1; - } - - /* end if this was the last page we were supposed to check */ - if (use_end_page && (ct >= end_page)) - return 0; - - /* do counter increase and progress printing */ - ct++; - if (verbose) - { - if (ct % 64 == 0) - { - now= time(0); - if (!lastt) lastt= now; - if (now - lastt >= 1) - { - printf("page %lu okay: %.3f%% done\n", (ct - 1), (float) ct / pages * 100); - lastt= now; - } - } - } - } - return 0; -} - diff --git a/extra/innochecksum.cc b/extra/innochecksum.cc new file mode 100644 index 00000000000..c89196b1eee --- /dev/null +++ b/extra/innochecksum.cc @@ -0,0 +1,396 @@ +/* + Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +/* + InnoDB offline file checksum utility. 85% of the code in this utility + is included from the InnoDB codebase. + + The final 15% was originally written by Mark Smith of Danga + Interactive, Inc. + + Published with a permission. +*/ + +#include +#include +#include +#include +#include +#include +#ifndef __WIN__ +# include +#endif +#include +#include +#include /* ORACLE_WELCOME_COPYRIGHT_NOTICE */ + +/* Only parts of these files are included from the InnoDB codebase. +The parts not included are excluded by #ifndef UNIV_INNOCHECKSUM. */ + +#include "univ.i" /* include all of this */ + +#include "buf0checksum.h" /* buf_calc_page_*() */ +#include "fil0fil.h" /* FIL_* */ +#include "fsp0fsp.h" /* fsp_flags_get_page_size() & + fsp_flags_get_zip_size() */ +#include "mach0data.h" /* mach_read_from_4() */ +#include "ut0crc32.h" /* ut_crc32_init() */ + +#ifdef UNIV_NONINL +# include "fsp0fsp.ic" +# include "mach0data.ic" +# include "ut0rnd.ic" +#endif + +/* Global variables */ +static my_bool verbose; +static my_bool debug; +static my_bool just_count; +static ulong start_page; +static ulong end_page; +static ulong do_page; +static my_bool use_end_page; +static my_bool do_one_page; +ulong srv_page_size; /* replaces declaration in srv0srv.c */ +static ulong physical_page_size; /* Page size in bytes on disk. */ +static ulong logical_page_size; /* Page size when uncompressed. */ + +/* Get the page size of the filespace from the filespace header. */ +static +my_bool +get_page_size( +/*==========*/ + FILE* f, /*!< in: file pointer, must be open + and set to start of file */ + byte* buf, /*!< in: buffer used to read the page */ + ulong* logical_page_size, /*!< out: Logical/Uncompressed page size */ + ulong* physical_page_size) /*!< out: Physical/Commpressed page size */ +{ + ulong flags; + + int bytes= fread(buf, 1, UNIV_PAGE_SIZE_MIN, f); + + if (ferror(f)) + { + perror("Error reading file header"); + return FALSE; + } + + if (bytes != UNIV_PAGE_SIZE_MIN) + { + fprintf(stderr, "Error; Was not able to read the minimum page size "); + fprintf(stderr, "of %d bytes. Bytes read was %d\n", UNIV_PAGE_SIZE_MIN, bytes); + return FALSE; + } + + rewind(f); + + flags = mach_read_from_4(buf + FIL_PAGE_DATA + FSP_SPACE_FLAGS); + + /* srv_page_size is used by InnoDB code as UNIV_PAGE_SIZE */ + srv_page_size = *logical_page_size = fsp_flags_get_page_size(flags); + + /* fsp_flags_get_zip_size() will return zero if not compressed. */ + *physical_page_size = fsp_flags_get_zip_size(flags); + if (*physical_page_size == 0) + *physical_page_size= *logical_page_size; + + return TRUE; +} + + +/* command line argument to do page checks (that's it) */ +/* another argument to specify page ranges... seek to right spot and go from there */ + +static struct my_option innochecksum_options[] = +{ + {"help", '?', "Displays this help and exits.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"info", 'I', "Synonym for --help.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"version", 'V', "Displays version information and exits.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"verbose", 'v', "Verbose (prints progress every 5 seconds).", + &verbose, &verbose, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"debug", 'd', "Debug mode (prints checksums for each page, implies verbose).", + &debug, &debug, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"count", 'c', "Print the count of pages in the file.", + &just_count, &just_count, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"start_page", 's', "Start on this page number (0 based).", + &start_page, &start_page, 0, GET_ULONG, REQUIRED_ARG, + 0, 0, (longlong) 2L*1024L*1024L*1024L, 0, 1, 0}, + {"end_page", 'e', "End at this page number (0 based).", + &end_page, &end_page, 0, GET_ULONG, REQUIRED_ARG, + 0, 0, (longlong) 2L*1024L*1024L*1024L, 0, 1, 0}, + {"page", 'p', "Check only this page (0 based).", + &do_page, &do_page, 0, GET_ULONG, REQUIRED_ARG, + 0, 0, (longlong) 2L*1024L*1024L*1024L, 0, 1, 0}, + + {0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} +}; + +static void print_version(void) +{ + printf("%s Ver %s, for %s (%s)\n", + my_progname, INNODB_VERSION_STR, + SYSTEM_TYPE, MACHINE_TYPE); +} + +static void usage(void) +{ + print_version(); + puts(ORACLE_WELCOME_COPYRIGHT_NOTICE("2000")); + printf("InnoDB offline file checksum utility.\n"); + printf("Usage: %s [-c] [-s ] [-e ] [-p ] [-v] [-d] \n", my_progname); + my_print_help(innochecksum_options); + my_print_variables(innochecksum_options); +} + +extern "C" my_bool +innochecksum_get_one_option( +/*========================*/ + int optid, + const struct my_option *opt __attribute__((unused)), + char *argument __attribute__((unused))) +{ + switch (optid) { + case 'd': + verbose=1; /* debug implies verbose... */ + break; + case 'e': + use_end_page= 1; + break; + case 'p': + end_page= start_page= do_page; + use_end_page= 1; + do_one_page= 1; + break; + case 'V': + print_version(); + exit(0); + break; + case 'I': + case '?': + usage(); + exit(0); + break; + } + return 0; +} + +static int get_options( +/*===================*/ + int *argc, + char ***argv) +{ + int ho_error; + + if ((ho_error=handle_options(argc, argv, innochecksum_options, innochecksum_get_one_option))) + exit(ho_error); + + /* The next arg must be the filename */ + if (!*argc) + { + usage(); + return 1; + } + return 0; +} /* get_options */ + + +int main(int argc, char **argv) +{ + FILE* f; /* our input file */ + char* filename; /* our input filename. */ + unsigned char buf[UNIV_PAGE_SIZE_MAX]; /* Buffer to store pages read */ + ulong bytes; /* bytes read count */ + ulint ct; /* current page number (0 based) */ + time_t now; /* current time */ + time_t lastt; /* last time */ + ulint oldcsum, oldcsumfield, csum, csumfield, crc32, logseq, logseqfield; + /* ulints for checksum storage */ + struct stat st; /* for stat, if you couldn't guess */ + unsigned long long int size; /* size of file (has to be 64 bits) */ + ulint pages; /* number of pages in file */ + off_t offset= 0; + int fd; + + printf("InnoDB offline file checksum utility.\n"); + + ut_crc32_init(); + + MY_INIT(argv[0]); + + if (get_options(&argc,&argv)) + exit(1); + + if (verbose) + my_print_variables(innochecksum_options); + + /* The file name is not optional */ + filename = *argv; + if (*filename == '\0') + { + fprintf(stderr, "Error; File name missing\n"); + return 1; + } + + /* stat the file to get size and page count */ + if (stat(filename, &st)) + { + fprintf(stderr, "Error; %s cannot be found\n", filename); + return 1; + } + size= st.st_size; + + /* Open the file for reading */ + f= fopen(filename, "rb"); + if (f == NULL) + { + fprintf(stderr, "Error; %s cannot be opened", filename); + perror(" "); + return 1; + } + + if (!get_page_size(f, buf, &logical_page_size, &physical_page_size)) + { + return 1; + } + + /* This tool currently does not support Compressed tables */ + if (logical_page_size != physical_page_size) + { + fprintf(stderr, "Error; This file contains compressed pages\n"); + return 1; + } + + pages= (ulint) (size / physical_page_size); + + if (just_count) + { + if (verbose) + printf("Number of pages: "); + printf("%lu\n", pages); + return 0; + } + else if (verbose) + { + printf("file %s = %llu bytes (%lu pages)...\n", filename, size, pages); + if (do_one_page) + printf("InnoChecksum; checking page %lu\n", do_page); + else + printf("InnoChecksum; checking pages in range %lu to %lu\n", start_page, use_end_page ? end_page : (pages - 1)); + } + + /* seek to the necessary position */ + if (start_page) + { + fd= fileno(f); + if (!fd) + { + perror("Error; Unable to obtain file descriptor number"); + return 1; + } + + offset= (off_t)start_page * (off_t)physical_page_size; + + if (lseek(fd, offset, SEEK_SET) != offset) + { + perror("Error; Unable to seek to necessary offset"); + return 1; + } + } + + /* main checksumming loop */ + ct= start_page; + lastt= 0; + while (!feof(f)) + { + bytes= fread(buf, 1, physical_page_size, f); + if (!bytes && feof(f)) + return 0; + + if (ferror(f)) + { + fprintf(stderr, "Error reading %lu bytes", physical_page_size); + perror(" "); + return 1; + } + if (bytes != physical_page_size) + { + fprintf(stderr, "Error; bytes read (%lu) doesn't match page size (%lu)\n", bytes, physical_page_size); + return 1; + } + + /* check the "stored log sequence numbers" */ + logseq= mach_read_from_4(buf + FIL_PAGE_LSN + 4); + logseqfield= mach_read_from_4(buf + logical_page_size - FIL_PAGE_END_LSN_OLD_CHKSUM + 4); + if (debug) + printf("page %lu: log sequence number: first = %lu; second = %lu\n", ct, logseq, logseqfield); + if (logseq != logseqfield) + { + fprintf(stderr, "Fail; page %lu invalid (fails log sequence number check)\n", ct); + return 1; + } + + /* check old method of checksumming */ + oldcsum= buf_calc_page_old_checksum(buf); + oldcsumfield= mach_read_from_4(buf + logical_page_size - FIL_PAGE_END_LSN_OLD_CHKSUM); + if (debug) + printf("page %lu: old style: calculated = %lu; recorded = %lu\n", ct, oldcsum, oldcsumfield); + if (oldcsumfield != mach_read_from_4(buf + FIL_PAGE_LSN) && oldcsumfield != oldcsum) + { + fprintf(stderr, "Fail; page %lu invalid (fails old style checksum)\n", ct); + return 1; + } + + /* now check the new method */ + csum= buf_calc_page_new_checksum(buf); + crc32= buf_calc_page_crc32(buf); + csumfield= mach_read_from_4(buf + FIL_PAGE_SPACE_OR_CHKSUM); + if (debug) + printf("page %lu: new style: calculated = %lu; crc32 = %lu; recorded = %lu\n", + ct, csum, crc32, csumfield); + if (csumfield != 0 && crc32 != csumfield && csum != csumfield) + { + fprintf(stderr, "Fail; page %lu invalid (fails innodb and crc32 checksum)\n", ct); + return 1; + } + + /* end if this was the last page we were supposed to check */ + if (use_end_page && (ct >= end_page)) + return 0; + + /* do counter increase and progress printing */ + ct++; + if (verbose) + { + if (ct % 64 == 0) + { + now= time(0); + if (!lastt) lastt= now; + if (now - lastt >= 1) + { + printf("page %lu okay: %.3f%% done\n", (ct - 1), (float) ct / pages * 100); + lastt= now; + } + } + } + } + return 0; +} + diff --git a/storage/innobase/buf/buf0mtflu.cc b/storage/innobase/buf/buf0mtflu.cc index 62ed3f539e2..5b591024922 100644 --- a/storage/innobase/buf/buf0mtflu.cc +++ b/storage/innobase/buf/buf0mtflu.cc @@ -134,6 +134,7 @@ typedef struct thread_sync static int mtflush_work_initialized = -1; static os_fast_mutex_t mtflush_mtx; +static os_fast_mutex_t mtflush_mtx_wait; static thread_sync_t* mtflush_ctx=NULL; /******************************************************************//** @@ -180,7 +181,9 @@ buf_mtflu_flush_pool_instance( pools based on the assumption that it will help in the retry which will follow the failure. */ +#ifdef UNIV_DEBUG fprintf(stderr, "InnoDB: Note: buf flush start failed there is already active flush for this buffer pool.\n"); +#endif return 0; } @@ -223,12 +226,16 @@ mtflush_service_io( mtflush_io->wt_status = WTHR_SIG_WAITING; + /* TODO: Temporal fix for the hang bug. This needs a real fix. */ + os_fast_mutex_lock(&mtflush_mtx_wait); work_item = (wrk_t *)ib_wqueue_nowait(mtflush_io->wq); if (work_item == NULL) { work_item = (wrk_t *)ib_wqueue_wait(mtflush_io->wq); } + os_fast_mutex_unlock(&mtflush_mtx_wait); + if (work_item) { mtflush_io->wt_status = WTHR_RUNNING; } else { @@ -237,6 +244,10 @@ mtflush_service_io( return; } + if (work_item->wi_status != WRK_ITEM_EXIT) { + work_item->wi_status = WRK_ITEM_SET; + } + work_item->id_usr = os_thread_get_curr_id(); /* This works as a producer/consumer model, where in tasks are @@ -253,7 +264,7 @@ mtflush_service_io( work_item->wi_status = WRK_ITEM_EXIT; ib_wqueue_add(mtflush_io->wr_cq, work_item, work_item->wheap); mtflush_io->wt_status = WTHR_KILL_IT; - return; + break; case MT_WRK_WRITE: ut_a(work_item->wi_status == WRK_ITEM_SET); @@ -273,9 +284,9 @@ mtflush_service_io( default: /* None other than Write/Read handling planned */ ut_a(0); + break; } - mtflush_io->wt_status = WTHR_NO_WORK; } /******************************************************************//** @@ -289,6 +300,7 @@ DECLARE_THREAD(mtflush_io_thread)( void * arg) { thread_sync_t *mtflush_io = ((thread_sync_t *)arg); + ulint n_timeout = 0; #ifdef UNIV_DEBUG ib_uint64_t stat_universal_num_processed = 0; ib_uint64_t stat_cycle_num_processed = 0; @@ -296,8 +308,32 @@ DECLARE_THREAD(mtflush_io_thread)( #endif while (TRUE) { +#ifdef UNIV_DEBUG + fprintf(stderr, "InnoDB: Note. Thread %lu work queue len %lu return queue len %lu\n", + os_thread_get_curr_id(), + ib_wqueue_len(mtflush_io->wq), + ib_wqueue_len(mtflush_io->wr_cq)); +#endif /* UNIV_DEBUG */ + mtflush_service_io(mtflush_io); +#ifdef UNIV_DEBUG + if (mtflush_io->wt_status == WTHR_NO_WORK) { + n_timeout++; + + if (n_timeout > 10) { + fprintf(stderr, "InnoDB: Note: Thread %lu has not received " + " work queue len %lu return queue len %lu\n", + os_thread_get_curr_id(), + ib_wqueue_len(mtflush_io->wq), + ib_wqueue_len(mtflush_io->wr_cq)); + n_timeout = 0; + } + } else { + n_timeout = 0; + } +#endif /* UNIV_DEBUG */ + if (mtflush_io->wt_status == WTHR_KILL_IT) { break; } @@ -379,6 +415,7 @@ buf_mtflu_io_thread_exit(void) ib_wqueue_free(mtflush_io->rd_cq); os_fast_mutex_free(&mtflush_mtx); + os_fast_mutex_free(&mtflush_mtx_wait); /* Free heap */ mem_heap_free(mtflush_io->wheap); @@ -400,6 +437,7 @@ buf_mtflu_handler_init( ib_wqueue_t* mtflush_read_comp_queue; os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_mtx); + os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_mtx_wait); /* Create heap, work queue, write completion queue, read completion queue for multi-threaded flush, and init @@ -465,16 +503,15 @@ buf_mtflu_flush_work_items( node items areallocated */ work_heap = mem_heap_create(0); work_item = (wrk_t*)mem_heap_alloc(work_heap, sizeof(wrk_t)*buf_pool_inst); + memset(work_item, 0, sizeof(wrk_t)*buf_pool_inst); for(i=0;iwq, @@ -490,14 +527,18 @@ buf_mtflu_flush_work_items( if (done_wi != NULL) { per_pool_pages_flushed[i] = done_wi->n_flushed; - if((int)done_wi->id_usr == -1 && - done_wi->wi_status == WRK_ITEM_SET ) { +#ifdef UNIV_DEBUG + /* TODO: Temporal fix for hang. This is really a bug. */ + if((int)done_wi->id_usr == 0 && + (done_wi->wi_status == WRK_ITEM_SET || + done_wi->wi_status == WRK_ITEM_UNSET)) { fprintf(stderr, "**Set/Unused work_item[%lu] flush_type=%d\n", i, done_wi->wr.flush_type); ut_a(0); } +#endif n_flushed+= done_wi->n_flushed; i++; diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 4999a202bd6..6b44cb96677 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -16610,6 +16610,11 @@ static MYSQL_SYSVAR_LONG(mtflush_threads, srv_mtflush_threads, MTFLUSH_MAX_WORKER, /* Max setting */ 0); +static MYSQL_SYSVAR_BOOL(use_mtflush, srv_use_mtflush, + PLUGIN_VAR_OPCMDARG , + "Use multi-threaded flush. Default TRUE.", + NULL, NULL, TRUE); + static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(additional_mem_pool_size), MYSQL_SYSVAR(api_trx_level), @@ -16762,6 +16767,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(use_lz4), #endif MYSQL_SYSVAR(mtflush_threads), + MYSQL_SYSVAR(use_mtflush), NULL }; diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index 918a92fa811..37bc9ba5c86 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -1008,6 +1008,8 @@ Release fil_system mutex */ void fil_system_exit(void); /*==================*/ + +#ifndef UNIV_INNOCHECKSUM /*******************************************************************//** Returns the table space by a given id, NULL if not found. */ fil_space_t* @@ -1020,5 +1022,5 @@ char* fil_space_name( /*===========*/ fil_space_t* space); /*!< in: space */ - +#endif #endif /* fil0fil_h */ diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 725aaf9553d..b4bb9c09ef6 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -257,8 +257,13 @@ extern my_bool srv_use_lz4; /* Number of flush threads */ #define MTFLUSH_MAX_WORKER 64 #define MTFLUSH_DEFAULT_WORKER 8 + +/* Number of threads used for multi-threaded flush */ extern long srv_mtflush_threads; +/* If this flag is TRUE, then we will use multi threaded flush. */ +extern my_bool srv_use_mtflush; + #ifdef __WIN__ extern ibool srv_use_native_conditions; #endif /* __WIN__ */ diff --git a/storage/innobase/include/ut0list.h b/storage/innobase/include/ut0list.h index 29fc8669ce4..796a272db59 100644 --- a/storage/innobase/include/ut0list.h +++ b/storage/innobase/include/ut0list.h @@ -150,6 +150,15 @@ ib_list_is_empty( /* out: TRUE if empty else */ const ib_list_t* list); /* in: list */ +/******************************************************************** +Get number of items on list. +@return number of items on list */ +UNIV_INLINE +ulint +ib_list_len( +/*========*/ + const ib_list_t* list); /*first || list->last)); } + +/******************************************************************** +Get number of items on list. +@return number of items on list */ +UNIV_INLINE +ulint +ib_list_len( +/*========*/ + const ib_list_t* list) /*first; + + while(node) { + len++; + node = node->next; + } + + return (len); +} diff --git a/storage/innobase/include/ut0wqueue.h b/storage/innobase/include/ut0wqueue.h index bbbbd3b146b..9906e299808 100644 --- a/storage/innobase/include/ut0wqueue.h +++ b/storage/innobase/include/ut0wqueue.h @@ -103,6 +103,14 @@ ib_wqueue_nowait( /*=============*/ ib_wqueue_t* wq); /*items)); } + +/******************************************************************** +Get number of items on queue. +@return number of items on queue */ +ulint +ib_wqueue_len( +/*==========*/ + ib_wqueue_t* wq) /*mutex); + len = ib_list_len(wq->items); + mutex_exit(&wq->mutex); + + return(len); +} diff --git a/storage/xtradb/buf/buf0mtflu.cc b/storage/xtradb/buf/buf0mtflu.cc index eeb9bf36c86..f7da4c1c7a9 100644 --- a/storage/xtradb/buf/buf0mtflu.cc +++ b/storage/xtradb/buf/buf0mtflu.cc @@ -134,6 +134,7 @@ typedef struct thread_sync static int mtflush_work_initialized = -1; static os_fast_mutex_t mtflush_mtx; +static os_fast_mutex_t mtflush_mtx_wait; static thread_sync_t* mtflush_ctx=NULL; /******************************************************************//** @@ -182,7 +183,9 @@ buf_mtflu_flush_pool_instance( pools based on the assumption that it will help in the retry which will follow the failure. */ +#ifdef UNIV_DEBUG fprintf(stderr, "InnoDB: Note: buf flush start failed there is already active flush for this buffer pool.\n"); +#endif return 0; } @@ -228,12 +231,16 @@ mtflush_service_io( mtflush_io->wt_status = WTHR_SIG_WAITING; + /* TODO: Temporal fix for the hang bug. This needs a real fix. */ + os_fast_mutex_lock(&mtflush_mtx_wait); work_item = (wrk_t *)ib_wqueue_nowait(mtflush_io->wq); if (work_item == NULL) { work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wq, MT_WAIT_IN_USECS); } + os_fast_mutex_unlock(&mtflush_mtx_wait); + if (work_item) { mtflush_io->wt_status = WTHR_RUNNING; } else { @@ -242,6 +249,10 @@ mtflush_service_io( return; } + if (work_item->wi_status != WRK_ITEM_EXIT) { + work_item->wi_status = WRK_ITEM_SET; + } + work_item->id_usr = os_thread_get_curr_id(); /* This works as a producer/consumer model, where in tasks are @@ -258,7 +269,7 @@ mtflush_service_io( work_item->wi_status = WRK_ITEM_EXIT; ib_wqueue_add(mtflush_io->wr_cq, work_item, work_item->wheap); mtflush_io->wt_status = WTHR_KILL_IT; - return; + break; case MT_WRK_WRITE: ut_a(work_item->wi_status == WRK_ITEM_SET); @@ -278,9 +289,9 @@ mtflush_service_io( default: /* None other than Write/Read handling planned */ ut_a(0); + break; } - mtflush_io->wt_status = WTHR_NO_WORK; } /******************************************************************//** @@ -302,13 +313,16 @@ DECLARE_THREAD(mtflush_io_thread)( #endif while (TRUE) { +#ifdef UNIV_DEBUG fprintf(stderr, "InnoDB: Note. Thread %lu work queue len %lu return queue len %lu\n", os_thread_get_curr_id(), ib_wqueue_len(mtflush_io->wq), ib_wqueue_len(mtflush_io->wr_cq)); +#endif /* UNIV_DEBUG */ mtflush_service_io(mtflush_io); +#ifdef UNIV_DEBUG if (mtflush_io->wt_status == WTHR_NO_WORK) { n_timeout++; @@ -323,6 +337,7 @@ DECLARE_THREAD(mtflush_io_thread)( } else { n_timeout = 0; } +#endif /* UNIV_DEBUG */ if (mtflush_io->wt_status == WTHR_KILL_IT) { break; @@ -405,6 +420,7 @@ buf_mtflu_io_thread_exit(void) ib_wqueue_free(mtflush_io->rd_cq); os_fast_mutex_free(&mtflush_mtx); + os_fast_mutex_free(&mtflush_mtx_wait); /* Free heap */ mem_heap_free(mtflush_io->wheap); @@ -426,6 +442,7 @@ buf_mtflu_handler_init( ib_wqueue_t* mtflush_read_comp_queue; os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_mtx); + os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_mtx_wait); /* Create heap, work queue, write completion queue, read completion queue for multi-threaded flush, and init @@ -491,16 +508,15 @@ buf_mtflu_flush_work_items( node items areallocated */ work_heap = mem_heap_create(0); work_item = (wrk_t*)mem_heap_alloc(work_heap, sizeof(wrk_t)*buf_pool_inst); + memset(work_item, 0, sizeof(wrk_t)*buf_pool_inst); for(i=0;iwq, @@ -516,14 +532,18 @@ buf_mtflu_flush_work_items( if (done_wi != NULL) { per_pool_pages_flushed[i] = done_wi->n_flushed; - if((int)done_wi->id_usr == -1 && - done_wi->wi_status == WRK_ITEM_SET ) { +#ifdef UNIV_DEBUG + /* TODO: Temporal fix for hang. This is really a bug. */ + if((int)done_wi->id_usr == 0 && + (done_wi->wi_status == WRK_ITEM_SET || + done_wi->wi_status == WRK_ITEM_UNSET)) { fprintf(stderr, "**Set/Unused work_item[%lu] flush_type=%d\n", i, done_wi->wr.flush_type); ut_a(0); } +#endif n_flushed+= done_wi->n_flushed; i++; diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index f26ad436190..f35ec84fd12 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -17971,6 +17971,11 @@ static MYSQL_SYSVAR_LONG(mtflush_threads, srv_mtflush_threads, MTFLUSH_MAX_WORKER, /* Max setting */ 0); +static MYSQL_SYSVAR_BOOL(use_mtflush, srv_use_mtflush, + PLUGIN_VAR_OPCMDARG , + "Use multi-threaded flush. Default TRUE.", + NULL, NULL, TRUE); + static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(log_block_size), MYSQL_SYSVAR(additional_mem_pool_size), @@ -18168,6 +18173,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(use_lz4), #endif MYSQL_SYSVAR(mtflush_threads), + MYSQL_SYSVAR(use_mtflush), NULL }; diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h index 6b69a899690..e42063f6335 100644 --- a/storage/xtradb/include/fil0fil.h +++ b/storage/xtradb/include/fil0fil.h @@ -1042,6 +1042,8 @@ Release fil_system mutex */ void fil_system_exit(void); /*==================*/ + +#ifndef UNIV_INNOCHECKSUM /*******************************************************************//** Returns the table space by a given id, NULL if not found. */ fil_space_t* @@ -1054,5 +1056,5 @@ char* fil_space_name( /*===========*/ fil_space_t* space); /*!< in: space */ - +#endif #endif /* fil0fil_h */ diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h index bfb59865841..879989770e6 100644 --- a/storage/xtradb/include/srv0srv.h +++ b/storage/xtradb/include/srv0srv.h @@ -277,8 +277,13 @@ extern my_bool srv_use_lz4; /* Number of flush threads */ #define MTFLUSH_MAX_WORKER 64 #define MTFLUSH_DEFAULT_WORKER 8 + +/* Number of threads used for multi-threaded flush */ extern long srv_mtflush_threads; +/* If this flag is TRUE, then we will use multi threaded flush. */ +extern my_bool srv_use_mtflush; + /** Server undo tablespaces directory, can be absolute path. */ extern char* srv_undo_dir; diff --git a/storage/xtradb/include/ut0list.h b/storage/xtradb/include/ut0list.h index b1035bad099..796a272db59 100644 --- a/storage/xtradb/include/ut0list.h +++ b/storage/xtradb/include/ut0list.h @@ -151,7 +151,7 @@ ib_list_is_empty( const ib_list_t* list); /* in: list */ /******************************************************************** -Get number of items on list. +Get number of items on list. @return number of items on list */ UNIV_INLINE ulint diff --git a/storage/xtradb/include/ut0list.ic b/storage/xtradb/include/ut0list.ic index eaf2577b16c..7a7f53adb2f 100644 --- a/storage/xtradb/include/ut0list.ic +++ b/storage/xtradb/include/ut0list.ic @@ -60,7 +60,7 @@ ib_list_is_empty( } /******************************************************************** -Get number of items on list. +Get number of items on list. @return number of items on list */ UNIV_INLINE ulint diff --git a/storage/xtradb/include/ut0wqueue.h b/storage/xtradb/include/ut0wqueue.h index 6513f4982c0..e6b9891aed1 100644 --- a/storage/xtradb/include/ut0wqueue.h +++ b/storage/xtradb/include/ut0wqueue.h @@ -105,7 +105,7 @@ ib_wqueue_nowait( /******************************************************************** -Get number of items on queue. +Get number of items on queue. @return number of items on queue */ ulint ib_wqueue_len( diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc index d6801b701ae..f7469e29911 100644 --- a/storage/xtradb/srv/srv0srv.cc +++ b/storage/xtradb/srv/srv0srv.cc @@ -176,9 +176,11 @@ UNIV_INTERN my_bool srv_use_posix_fallocate = FALSE; /* If this flag is TRUE, then we disable doublewrite buffer */ UNIV_INTERN my_bool srv_use_atomic_writes = FALSE; /* If this flag IS TRUE, then we use lz4 to compress/decompress pages */ -UNIV_INTERN my_bool srv_use_lz4 = FALSE; +UNIV_INTERN my_bool srv_use_lz4 = FALSE; /* Number of threads used for multi-threaded flush */ UNIV_INTERN long srv_mtflush_threads = MTFLUSH_DEFAULT_WORKER; +/* If this flag is TRUE, then we will use multi threaded flush. */ +UNIV_INTERN my_bool srv_use_mtflush = TRUE; #ifdef __WIN__ /* Windows native condition variables. We use runtime loading / function diff --git a/storage/xtradb/srv/srv0start.cc b/storage/xtradb/srv/srv0start.cc index bb539569e9a..4d97632f818 100644 --- a/storage/xtradb/srv/srv0start.cc +++ b/storage/xtradb/srv/srv0start.cc @@ -2719,19 +2719,23 @@ files_checked: if (!srv_read_only_mode) { - /* Start multi-threaded flush threads */ - mtflush_ctx = buf_mtflu_handler_init(srv_mtflush_threads, - srv_buf_pool_instances); - - /* Set up the thread ids */ - buf_mtflu_set_thread_ids(srv_mtflush_threads, - mtflush_ctx, - (thread_ids + 6 + SRV_MAX_N_PURGE_THREADS)); + if (srv_use_mtflush) { + /* Start multi-threaded flush threads */ + mtflush_ctx = buf_mtflu_handler_init( + srv_mtflush_threads, + srv_buf_pool_instances); + /* Set up the thread ids */ + buf_mtflu_set_thread_ids( + srv_mtflush_threads, + mtflush_ctx, + (thread_ids + 6 + SRV_MAX_N_PURGE_THREADS)); #if UNIV_DEBUG - fprintf(stderr, "InnoDB: Note: %s:%d buf-pool-instances:%lu mtflush_threads %lu\n", - __FILE__, __LINE__, srv_buf_pool_instances, srv_mtflush_threads); + fprintf(stderr, "InnoDB: Note: %s:%d buf-pool-instances:%lu mtflush_threads %lu\n", + __FILE__, __LINE__, srv_buf_pool_instances, srv_mtflush_threads); #endif + } + os_thread_create(buf_flush_page_cleaner_thread, NULL, NULL); } @@ -3004,9 +3008,12 @@ innobase_shutdown_for_mysql(void) logs_empty_and_mark_files_at_shutdown() and should have already quit or is quitting right now. */ - /* g. Exit the multi threaded flush threads */ - buf_mtflu_io_thread_exit(); + if (srv_use_mtflush) { + /* g. Exit the multi threaded flush threads */ + + buf_mtflu_io_thread_exit(); + } #ifdef UNIV_DEBUG fprintf(stderr, "InnoDB: Note: %s:%d os_thread_count:%lu \n", __FUNCTION__, __LINE__, os_thread_count);