/************************************************************************ Starts the InnoDB database server (c) 1996-2000 Innobase Oy Created 2/16/1996 Heikki Tuuri *************************************************************************/ #include "os0proc.h" #include "sync0sync.h" #include "ut0mem.h" #include "mem0mem.h" #include "mem0pool.h" #include "data0data.h" #include "data0type.h" #include "dict0dict.h" #include "buf0buf.h" #include "buf0flu.h" #include "buf0rea.h" #include "os0file.h" #include "os0thread.h" #include "fil0fil.h" #include "fsp0fsp.h" #include "rem0rec.h" #include "rem0cmp.h" #include "mtr0mtr.h" #include "log0log.h" #include "log0recv.h" #include "page0page.h" #include "page0cur.h" #include "trx0trx.h" #include "dict0boot.h" #include "trx0sys.h" #include "dict0crea.h" #include "btr0btr.h" #include "btr0pcur.h" #include "btr0cur.h" #include "btr0sea.h" #include "rem0rec.h" #include "srv0srv.h" #include "que0que.h" #include "usr0sess.h" #include "lock0lock.h" #include "trx0roll.h" #include "trx0purge.h" #include "row0ins.h" #include "row0sel.h" #include "row0upd.h" #include "row0row.h" #include "row0mysql.h" #include "lock0lock.h" #include "ibuf0ibuf.h" #include "pars0pars.h" #include "btr0sea.h" #include "srv0start.h" #include "que0que.h" static ibool srv_start_has_been_called = FALSE; ulint srv_sizeof_trx_t_in_ha_innodb_cc; ibool srv_startup_is_before_trx_rollback_phase = FALSE; ibool srv_is_being_started = FALSE; static ibool srv_was_started = FALSE; /* At a shutdown the value first climbs to SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE */ ulint srv_shutdown_state = 0; ibool measure_cont = FALSE; static os_file_t files[1000]; static mutex_t ios_mutex; static ulint ios; static ulint n[SRV_MAX_N_IO_THREADS + 5]; static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 5]; /* We use this mutex to test the return value of pthread_mutex_trylock on successful locking. HP-UX does NOT return 0, though Linux et al do. */ static os_fast_mutex_t srv_os_test_mutex; /* Name of srv_monitor_file */ static char* srv_monitor_file_name; #define SRV_N_PENDING_IOS_PER_THREAD OS_AIO_N_PENDING_IOS_PER_THREAD #define SRV_MAX_N_PENDING_SYNC_IOS 100 /* The following limit may be too big in some old operating systems: we may get an assertion failure in os0file.c */ #define SRV_MAX_N_OPEN_FILES 500 #define SRV_LOG_SPACE_FIRST_ID 1000000000 /************************************************************************* Reads the data files and their sizes from a character string given in the .cnf file. */ ibool srv_parse_data_file_paths_and_sizes( /*================================*/ /* out: TRUE if ok, FALSE if parsing error */ char* str, /* in: the data file path string */ char*** data_file_names, /* out, own: array of data file names */ ulint** data_file_sizes, /* out, own: array of data file sizes in megabytes */ ulint** data_file_is_raw_partition,/* out, own: array of flags showing which data files are raw partitions */ ulint* n_data_files, /* out: number of data files */ ibool* is_auto_extending, /* out: TRUE if the last data file is auto-extending */ ulint* max_auto_extend_size) /* out: max auto extend size for the last file if specified, 0 if not */ { char* input_str; char* endp; char* path; ulint size; ulint i = 0; *is_auto_extending = FALSE; *max_auto_extend_size = 0; input_str = str; /* First calculate the number of data files and check syntax: path:size[M | G];path:size[M | G]... . Note that a Windows path may contain a drive name and a ':'. */ while (*str != '\0') { path = str; while ((*str != ':' && *str != '\0') || (*str == ':' && (*(str + 1) == '\\' || *(str + 1) == '/'))) { str++; } if (*str == '\0') { return(FALSE); } str++; size = strtoul(str, &endp, 10); str = endp; if (*str != 'M' && *str != 'G') { size = size / (1024 * 1024); } else if (*str == 'G') { size = size * 1024; str++; } else { str++; } if (0 == memcmp(str, ":autoextend", (sizeof ":autoextend") - 1)) { str += (sizeof ":autoextend") - 1; if (0 == memcmp(str, ":max:", (sizeof ":max:") - 1)) { str += (sizeof ":max:") - 1; size = strtoul(str, &endp, 10); str = endp; if (*str != 'M' && *str != 'G') { size = size / (1024 * 1024); } else if (*str == 'G') { size = size * 1024; str++; } else { str++; } } if (*str != '\0') { return(FALSE); } } if (strlen(str) >= 6 && *str == 'n' && *(str + 1) == 'e' && *(str + 2) == 'w') { str += 3; } if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') { str += 3; } if (size == 0) { return(FALSE); } i++; if (*str == ';') { str++; } else if (*str != '\0') { return(FALSE); } } *data_file_names = (char**)ut_malloc(i * sizeof(void*)); *data_file_sizes = (ulint*)ut_malloc(i * sizeof(ulint)); *data_file_is_raw_partition = (ulint*)ut_malloc(i * sizeof(ulint)); *n_data_files = i; /* Then store the actual values to our arrays */ str = input_str; i = 0; while (*str != '\0') { path = str; /* Note that we must ignore the ':' in a Windows path */ while ((*str != ':' && *str != '\0') || (*str == ':' && (*(str + 1) == '\\' || *(str + 1) == '/'))) { str++; } if (*str == ':') { /* Make path a null-terminated string */ *str = '\0'; str++; } size = strtoul(str, &endp, 10); str = endp; if ((*str != 'M') && (*str != 'G')) { size = size / (1024 * 1024); } else if (*str == 'G') { size = size * 1024; str++; } else { str++; } (*data_file_names)[i] = path; (*data_file_sizes)[i] = size; if (0 == memcmp(str, ":autoextend", (sizeof ":autoextend") - 1)) { *is_auto_extending = TRUE; str += (sizeof ":autoextend") - 1; if (0 == memcmp(str, ":max:", (sizeof ":max:") - 1)) { str += (sizeof ":max:") - 1; size = strtoul(str, &endp, 10); str = endp; if (*str != 'M' && *str != 'G') { size = size / (1024 * 1024); } else if (*str == 'G') { size = size * 1024; str++; } else { str++; } *max_auto_extend_size = size; } if (*str != '\0') { return(FALSE); } } (*data_file_is_raw_partition)[i] = 0; if (strlen(str) >= 6 && *str == 'n' && *(str + 1) == 'e' && *(str + 2) == 'w') { str += 3; (*data_file_is_raw_partition)[i] = SRV_NEW_RAW; } if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') { str += 3; if ((*data_file_is_raw_partition)[i] == 0) { (*data_file_is_raw_partition)[i] = SRV_OLD_RAW; } } i++; if (*str == ';') { str++; } } return(TRUE); } /************************************************************************* Reads log group home directories from a character string given in the .cnf file. */ ibool srv_parse_log_group_home_dirs( /*==========================*/ /* out: TRUE if ok, FALSE if parsing error */ char* str, /* in: character string */ char*** log_group_home_dirs) /* out, own: log group home dirs */ { char* input_str; char* path; ulint i = 0; input_str = str; /* First calculate the number of directories and check syntax: path;path;... */ while (*str != '\0') { path = str; while (*str != ';' && *str != '\0') { str++; } i++; if (*str == ';') { str++; } else if (*str != '\0') { return(FALSE); } } *log_group_home_dirs = (char**) ut_malloc(i * sizeof(void*)); /* Then store the actual values to our array */ str = input_str; i = 0; while (*str != '\0') { path = str; while (*str != ';' && *str != '\0') { str++; } if (*str == ';') { *str = '\0'; str++; } (*log_group_home_dirs)[i] = path; i++; } return(TRUE); } /************************************************************************ I/o-handler thread function. */ static #ifndef __WIN__ void* #else ulint #endif io_handler_thread( /*==============*/ void* arg) { ulint segment; ulint i; segment = *((ulint*)arg); #ifdef UNIV_DEBUG_THREAD_CREATION fprintf(stderr, "Io handler thread %lu starts, id %lu\n", segment, os_thread_pf(os_thread_get_curr_id())); #endif for (i = 0;; i++) { fil_aio_wait(segment); mutex_enter(&ios_mutex); ios++; mutex_exit(&ios_mutex); } /* We count the number of threads in os_thread_exit(). A created thread should always use that to exit and not use return() to exit. The thread actually never comes here because it is exited in an os_event_wait(). */ os_thread_exit(NULL); #ifndef __WIN__ return(NULL); #else return(0); #endif } #ifdef __WIN__ #define SRV_PATH_SEPARATOR '\\' #else #define SRV_PATH_SEPARATOR '/' #endif /************************************************************************* Normalizes a directory path for Windows: converts slashes to backslashes. */ void srv_normalize_path_for_win( /*=======================*/ char* str __attribute__((unused))) /* in/out: null-terminated character string */ { #ifdef __WIN__ for (; *str; str++) { if (*str == '/') { *str = '\\'; } } #endif } /************************************************************************* Adds a slash or a backslash to the end of a string if it is missing and the string is not empty. */ static char* srv_add_path_separator_if_needed( /*=============================*/ /* out: string which has the separator if the string is not empty */ char* str) /* in: null-terminated character string */ { char* out_str; ulint len = ut_strlen(str); if (len == 0 || str[len - 1] == SRV_PATH_SEPARATOR) { return(str); } out_str = ut_malloc(len + 2); memcpy(out_str, str, len); out_str[len] = SRV_PATH_SEPARATOR; out_str[len + 1] = 0; return(out_str); } /************************************************************************* Calculates the low 32 bits when a file size which is given as a number database pages is converted to the number of bytes. */ static ulint srv_calc_low32( /*===========*/ /* out: low 32 bytes of file size when expressed in bytes */ ulint file_size) /* in: file size in database pages */ { return(0xFFFFFFFF & (file_size << UNIV_PAGE_SIZE_SHIFT)); } /************************************************************************* Calculates the high 32 bits when a file size which is given as a number database pages is converted to the number of bytes. */ static ulint srv_calc_high32( /*============*/ /* out: high 32 bytes of file size when expressed in bytes */ ulint file_size) /* in: file size in database pages */ { return(file_size >> (32 - UNIV_PAGE_SIZE_SHIFT)); } /************************************************************************* Creates or opens the log files and closes them. */ static ulint open_or_create_log_file( /*====================*/ /* out: DB_SUCCESS or error code */ ibool* log_file_created, /* out: TRUE if new log file created */ ibool log_file_has_been_opened,/* in: TRUE if a log file has been opened before: then it is an error to try to create another log file */ ulint k, /* in: log group number */ ulint i) /* in: log file number in group */ { ibool ret; ulint arch_space_id; ulint size; ulint size_high; char name[10000]; *log_file_created = FALSE; srv_normalize_path_for_win(srv_log_group_home_dirs[k]); srv_log_group_home_dirs[k] = srv_add_path_separator_if_needed( srv_log_group_home_dirs[k]); ut_a(strlen(srv_log_group_home_dirs[k]) < (sizeof name) - 10 - sizeof "ib_logfile"); sprintf(name, "%s%s%lu", srv_log_group_home_dirs[k], "ib_logfile", i); files[i] = os_file_create(name, OS_FILE_CREATE, OS_FILE_NORMAL, OS_LOG_FILE, &ret); if (ret == FALSE) { if (os_file_get_last_error() != OS_FILE_ALREADY_EXISTS) { fprintf(stderr, "InnoDB: Error in creating or opening %s\n", name); return(DB_ERROR); } files[i] = os_file_create(name, OS_FILE_OPEN, OS_FILE_AIO, OS_LOG_FILE, &ret); if (!ret) { fprintf(stderr, "InnoDB: Error in opening %s\n", name); return(DB_ERROR); } ret = os_file_get_size(files[i], &size, &size_high); ut_a(ret); if (size != srv_calc_low32(srv_log_file_size) || size_high != srv_calc_high32(srv_log_file_size)) { fprintf(stderr, "InnoDB: Error: log file %s is of different size %lu %lu bytes\n" "InnoDB: than specified in the .cnf file %lu %lu bytes!\n", name, size_high, size, srv_calc_high32(srv_log_file_size), srv_calc_low32(srv_log_file_size)); return(DB_ERROR); } } else { *log_file_created = TRUE; ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Log file %s did not exist: new to be created\n", name); if (log_file_has_been_opened) { return(DB_ERROR); } fprintf(stderr, "InnoDB: Setting log file %s size to %lu MB\n", name, srv_log_file_size >> (20 - UNIV_PAGE_SIZE_SHIFT)); fprintf(stderr, "InnoDB: Database physically writes the file full: wait...\n"); ret = os_file_set_size(name, files[i], srv_calc_low32(srv_log_file_size), srv_calc_high32(srv_log_file_size)); if (!ret) { fprintf(stderr, "InnoDB: Error in creating %s: probably out of disk space\n", name); return(DB_ERROR); } } ret = os_file_close(files[i]); ut_a(ret); if (i == 0) { /* Create in memory the file space object which is for this log group */ fil_space_create(name, 2 * k + SRV_LOG_SPACE_FIRST_ID, FIL_LOG); } ut_a(fil_validate()); fil_node_create(name, srv_log_file_size, 2 * k + SRV_LOG_SPACE_FIRST_ID); /* If this is the first log group, create the file space object for archived logs */ if (k == 0 && i == 0) { arch_space_id = 2 * k + 1 + SRV_LOG_SPACE_FIRST_ID; fil_space_create((char*) "arch_log_space", arch_space_id, FIL_LOG); } else { arch_space_id = ULINT_UNDEFINED; } if (i == 0) { log_group_init(k, srv_n_log_files, srv_log_file_size * UNIV_PAGE_SIZE, 2 * k + SRV_LOG_SPACE_FIRST_ID, arch_space_id); } return(DB_SUCCESS); } /************************************************************************* Creates or opens database data files and closes them. */ static ulint open_or_create_data_files( /*======================*/ /* out: DB_SUCCESS or error code */ ibool* create_new_db, /* out: TRUE if new database should be created */ dulint* min_flushed_lsn,/* out: min of flushed lsn values in data files */ ulint* min_arch_log_no,/* out: min of archived log numbers in data files */ dulint* max_flushed_lsn,/* out: */ ulint* max_arch_log_no,/* out: */ ulint* sum_of_new_sizes)/* out: sum of sizes of the new files added */ { ibool ret; ulint i; ibool one_opened = FALSE; ibool one_created = FALSE; ulint size; ulint size_high; ulint rounded_size_pages; char name[10000]; if (srv_n_data_files >= 1000) { fprintf(stderr, "InnoDB: can only have < 1000 data files\n" "InnoDB: you have defined %lu\n", srv_n_data_files); return(DB_ERROR); } *sum_of_new_sizes = 0; *create_new_db = FALSE; srv_normalize_path_for_win(srv_data_home); srv_data_home = srv_add_path_separator_if_needed(srv_data_home); for (i = 0; i < srv_n_data_files; i++) { srv_normalize_path_for_win(srv_data_file_names[i]); ut_a(strlen(srv_data_home) + strlen(srv_data_file_names[i]) < (sizeof name) - 1); sprintf(name, "%s%s", srv_data_home, srv_data_file_names[i]); files[i] = os_file_create(name, OS_FILE_CREATE, OS_FILE_NORMAL, OS_DATA_FILE, &ret); if (srv_data_file_is_raw_partition[i] == SRV_NEW_RAW) { /* The partition is opened, not created; then it is written over */ srv_created_new_raw = TRUE; files[i] = os_file_create( name, OS_FILE_OPEN, OS_FILE_NORMAL, OS_DATA_FILE, &ret); if (!ret) { fprintf(stderr, "InnoDB: Error in opening %s\n", name); return(DB_ERROR); } } else if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) { ret = FALSE; } if (ret == FALSE) { if (srv_data_file_is_raw_partition[i] != SRV_OLD_RAW && os_file_get_last_error() != OS_FILE_ALREADY_EXISTS) { fprintf(stderr, "InnoDB: Error in creating or opening %s\n", name); return(DB_ERROR); } if (one_created) { fprintf(stderr, "InnoDB: Error: data files can only be added at the end\n"); fprintf(stderr, "InnoDB: of a tablespace, but data file %s existed beforehand.\n", name); return(DB_ERROR); } files[i] = os_file_create( name, OS_FILE_OPEN, OS_FILE_NORMAL, OS_DATA_FILE, &ret); if (!ret) { fprintf(stderr, "InnoDB: Error in opening %s\n", name); os_file_get_last_error(); return(DB_ERROR); } if (srv_data_file_is_raw_partition[i] != SRV_OLD_RAW) { ret = os_file_get_size(files[i], &size, &size_high); ut_a(ret); /* Round size downward to megabytes */ rounded_size_pages = (size / (1024 * 1024) + 4096 * size_high) << (20 - UNIV_PAGE_SIZE_SHIFT); if (i == srv_n_data_files - 1 && srv_auto_extend_last_data_file) { if (srv_data_file_sizes[i] > rounded_size_pages || (srv_last_file_size_max > 0 && srv_last_file_size_max < rounded_size_pages)) { fprintf(stderr, "InnoDB: Error: auto-extending data file %s is of a different size\n" "InnoDB: %lu pages (rounded down to MB) than specified in the .cnf file:\n" "InnoDB: initial %lu pages, max %lu (relevant if non-zero) pages!\n", name, rounded_size_pages, srv_data_file_sizes[i], srv_last_file_size_max); return(DB_ERROR); } srv_data_file_sizes[i] = rounded_size_pages; } if (rounded_size_pages != srv_data_file_sizes[i]) { fprintf(stderr, "InnoDB: Error: data file %s is of a different size\n" "InnoDB: %lu pages (rounded down to MB)\n" "InnoDB: than specified in the .cnf file %lu pages!\n", name, rounded_size_pages, srv_data_file_sizes[i]); return(DB_ERROR); } } fil_read_flushed_lsn_and_arch_log_no(files[i], one_opened, min_flushed_lsn, min_arch_log_no, max_flushed_lsn, max_arch_log_no); one_opened = TRUE; } else { one_created = TRUE; if (i > 0) { ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Data file %s did not exist: new to be created\n", name); } else { fprintf(stderr, "InnoDB: The first specified data file %s did not exist:\n" "InnoDB: a new database to be created!\n", name); *create_new_db = TRUE; } ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Setting file %s size to %lu MB\n", name, (srv_data_file_sizes[i] >> (20 - UNIV_PAGE_SIZE_SHIFT))); fprintf(stderr, "InnoDB: Database physically writes the file full: wait...\n"); ret = os_file_set_size(name, files[i], srv_calc_low32(srv_data_file_sizes[i]), srv_calc_high32(srv_data_file_sizes[i])); if (!ret) { fprintf(stderr, "InnoDB: Error in creating %s: probably out of disk space\n", name); return(DB_ERROR); } *sum_of_new_sizes = *sum_of_new_sizes + srv_data_file_sizes[i]; } ret = os_file_close(files[i]); ut_a(ret); if (i == 0) { fil_space_create(name, 0, FIL_TABLESPACE); } ut_a(fil_validate()); fil_node_create(name, srv_data_file_sizes[i], 0); } ios = 0; mutex_create(&ios_mutex); mutex_set_level(&ios_mutex, SYNC_NO_ORDER_CHECK); return(DB_SUCCESS); } /******************************************************************** Starts InnoDB and creates a new database if database files are not found and the user wants. Server parameters are read from a file of name "srv_init" in the ib_home directory. */ int innobase_start_or_create_for_mysql(void) /*====================================*/ /* out: DB_SUCCESS or error code */ { ibool create_new_db; ibool log_file_created; ibool log_created = FALSE; ibool log_opened = FALSE; dulint min_flushed_lsn; dulint max_flushed_lsn; ulint min_arch_log_no; ulint max_arch_log_no; ibool start_archive; ulint sum_of_new_sizes; ulint sum_of_data_file_sizes; ulint tablespace_size_in_header; ulint err; ulint i; ulint k; mtr_t mtr; #ifdef UNIV_DEBUG fprintf(stderr, "InnoDB: !!!!!!!!!!!!!! UNIV_DEBUG switched on !!!!!!!!!!!!!!!\n"); #endif #ifdef UNIV_SYNC_DEBUG fprintf(stderr, "InnoDB: !!!!!!!!!!!!!! UNIV_SYNC_DEBUG switched on !!!!!!!!!!!!!!!\n"); #endif #ifdef UNIV_SEARCH_DEBUG fprintf(stderr, "InnoDB: !!!!!!!!!!!!!! UNIV_SEARCH_DEBUG switched on !!!!!!!!!!!!!!!\n"); #endif #ifdef UNIV_MEM_DEBUG fprintf(stderr, "InnoDB: !!!!!!!!!!!!!! UNIV_MEM_DEBUG switched on !!!!!!!!!!!!!!!\n"); #endif if (srv_sizeof_trx_t_in_ha_innodb_cc != (ulint)sizeof(trx_t)) { fprintf(stderr, "InnoDB: Error: trx_t size is %lu in ha_innodb.cc but %lu in srv0start.c\n" "InnoDB: Check that pthread_mutex_t is defined in the same way in these\n" "InnoDB: compilation modules. Cannot continue.\n", srv_sizeof_trx_t_in_ha_innodb_cc, (ulint)sizeof(trx_t)); return(DB_ERROR); } /* Since InnoDB does not currently clean up all its internal data structures in MySQL Embedded Server Library server_end(), we print an error message if someone tries to start up InnoDB a second time during the process lifetime. */ if (srv_start_has_been_called) { fprintf(stderr, "InnoDB: Error:startup called second time during the process lifetime.\n" "InnoDB: In the MySQL Embedded Server Library you cannot call server_init()\n" "InnoDB: more than once during the process lifetime.\n"); } srv_start_has_been_called = TRUE; log_do_write = TRUE; /* yydebug = TRUE; */ srv_is_being_started = TRUE; srv_startup_is_before_trx_rollback_phase = TRUE; os_aio_use_native_aio = FALSE; #ifdef __WIN__ if (os_get_os_version() == OS_WIN95 || os_get_os_version() == OS_WIN31 || os_get_os_version() == OS_WINNT) { /* On Win 95, 98, ME, Win32 subsystem for Windows 3.1, and NT use simulated aio. In NT Windows provides async i/o, but when run in conjunction with InnoDB Hot Backup, it seemed to corrupt the data files. */ os_aio_use_native_aio = FALSE; } else { /* On Win 2000 and XP use async i/o */ os_aio_use_native_aio = TRUE; } #endif if (srv_file_flush_method_str == NULL) { /* These are the default options */ srv_unix_file_flush_method = SRV_UNIX_FDATASYNC; srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; #ifndef __WIN__ } else if (0 == ut_strcmp(srv_file_flush_method_str, (char*)"fdatasync")) { srv_unix_file_flush_method = SRV_UNIX_FDATASYNC; } else if (0 == ut_strcmp(srv_file_flush_method_str, (char*)"O_DSYNC")) { srv_unix_file_flush_method = SRV_UNIX_O_DSYNC; } else if (0 == ut_strcmp(srv_file_flush_method_str, (char*)"O_DIRECT")) { srv_unix_file_flush_method = SRV_UNIX_O_DIRECT; } else if (0 == ut_strcmp(srv_file_flush_method_str, (char*)"littlesync")) { srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC; } else if (0 == ut_strcmp(srv_file_flush_method_str, (char*)"nosync")) { srv_unix_file_flush_method = SRV_UNIX_NOSYNC; #else } else if (0 == ut_strcmp(srv_file_flush_method_str, (char*)"normal")) { srv_win_file_flush_method = SRV_WIN_IO_NORMAL; os_aio_use_native_aio = FALSE; } else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) { srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; os_aio_use_native_aio = FALSE; } else if (0 == ut_strcmp(srv_file_flush_method_str, "async_unbuffered")) { srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; #endif } else { fprintf(stderr, "InnoDB: Unrecognized value %s for innodb_flush_method\n", srv_file_flush_method_str); return(DB_ERROR); } /* Set the maximum number of threads which can wait for a semaphore inside InnoDB */ #if defined(__WIN__) || defined(__NETWARE__) /* Create less event semaphores because Win 98/ME had difficulty creating 40000 event semaphores. Comment from Novell, Inc.: also, these just take a lot of memory on NetWare. */ srv_max_n_threads = 1000; #else if (srv_pool_size >= 8 * 1024 * 1024) { /* Here we still have srv_pool_size counted in bytes, srv_boot converts the value to pages; if buffer pool is less than 8 MB, assume fewer threads. */ srv_max_n_threads = 10000; } else { srv_max_n_threads = 1000; /* saves several MB of memory, especially in 64-bit computers */ } #endif err = srv_boot(); if (err != DB_SUCCESS) { return((int) err); } mutex_create(&srv_monitor_file_mutex); mutex_set_level(&srv_monitor_file_mutex, SYNC_NO_ORDER_CHECK); srv_monitor_file_name = mem_alloc( strlen(fil_path_to_mysql_datadir) + 20 + sizeof "/innodb_status."); sprintf(srv_monitor_file_name, "%s/innodb.status.%lu", fil_path_to_mysql_datadir, os_proc_get_number()); srv_monitor_file = fopen(srv_monitor_file_name, "w+"); if (!srv_monitor_file) { fprintf(stderr, "InnoDB: unable to create %s: %s\n", srv_monitor_file_name, strerror(errno)); return(DB_ERROR); } /* Restrict the maximum number of file i/o threads */ if (srv_n_file_io_threads > SRV_MAX_N_IO_THREADS) { srv_n_file_io_threads = SRV_MAX_N_IO_THREADS; } if (!os_aio_use_native_aio) { /* In simulated aio we currently have use only for 4 threads */ srv_n_file_io_threads = 4; os_aio_init(8 * SRV_N_PENDING_IOS_PER_THREAD * srv_n_file_io_threads, srv_n_file_io_threads, SRV_MAX_N_PENDING_SYNC_IOS); } else { os_aio_init(SRV_N_PENDING_IOS_PER_THREAD * srv_n_file_io_threads, srv_n_file_io_threads, SRV_MAX_N_PENDING_SYNC_IOS); } fil_init(SRV_MAX_N_OPEN_FILES); buf_pool_init(srv_pool_size, srv_pool_size); fsp_init(); log_init(); lock_sys_create(srv_lock_table_size); /* Create i/o-handler threads: */ for (i = 0; i < srv_n_file_io_threads; i++) { n[i] = i; os_thread_create(io_handler_thread, n + i, thread_ids + i); } if (0 != ut_strcmp(srv_log_group_home_dirs[0], srv_arch_dir)) { fprintf(stderr, "InnoDB: Error: you must set the log group home dir in my.cnf the\n" "InnoDB: same as log arch dir.\n"); return(DB_ERROR); } if (srv_n_log_files * srv_log_file_size >= 262144) { fprintf(stderr, "InnoDB: Error: combined size of log files must be < 4 GB\n"); return(DB_ERROR); } sum_of_new_sizes = 0; for (i = 0; i < srv_n_data_files; i++) { #ifndef __WIN__ if (sizeof(off_t) < 5 && srv_data_file_sizes[i] >= 262144) { fprintf(stderr, "InnoDB: Error: file size must be < 4 GB with this MySQL binary\n" "InnoDB: and operating system combination, in some OS's < 2 GB\n"); return(DB_ERROR); } #endif sum_of_new_sizes += srv_data_file_sizes[i]; } if (sum_of_new_sizes < 640) { fprintf(stderr, "InnoDB: Error: tablespace size must be at least 10 MB\n"); return(DB_ERROR); } err = open_or_create_data_files(&create_new_db, &min_flushed_lsn, &min_arch_log_no, &max_flushed_lsn, &max_arch_log_no, &sum_of_new_sizes); if (err != DB_SUCCESS) { fprintf(stderr, "InnoDB: Could not open or create data files.\n" "InnoDB: If you tried to add new data files, and it failed here,\n" "InnoDB: you should now edit innodb_data_file_path in my.cnf back\n" "InnoDB: to what it was, and remove the new ibdata files InnoDB created\n" "InnoDB: in this failed attempt. InnoDB only wrote those files full of\n" "InnoDB: zeros, but did not yet use them in any way. But be careful: do not\n" "InnoDB: remove old data files which contain your precious data!\n"); return((int) err); } if (!create_new_db) { /* If we are using the doublewrite method, we will check if there are half-written pages in data files, and restore them from the doublewrite buffer if possible */ if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) { trx_sys_doublewrite_restore_corrupt_pages(); } } srv_normalize_path_for_win(srv_arch_dir); srv_arch_dir = srv_add_path_separator_if_needed(srv_arch_dir); for (k = 0; k < srv_n_log_groups; k++) { for (i = 0; i < srv_n_log_files; i++) { err = open_or_create_log_file(&log_file_created, log_opened, k, i); if (err != DB_SUCCESS) { return((int) err); } if (log_file_created) { log_created = TRUE; } else { log_opened = TRUE; } if ((log_opened && create_new_db) || (log_opened && log_created)) { fprintf(stderr, "InnoDB: Error: all log files must be created at the same time.\n" "InnoDB: All log files must be created also in database creation.\n" "InnoDB: If you want bigger or smaller log files, shut down the\n" "InnoDB: database and make sure there were no errors in shutdown.\n" "InnoDB: Then delete the existing log files. Edit the .cnf file\n" "InnoDB: and start the database again.\n"); return(DB_ERROR); } } } if (log_created && !create_new_db && !srv_archive_recovery) { if (ut_dulint_cmp(max_flushed_lsn, min_flushed_lsn) != 0 || max_arch_log_no != min_arch_log_no) { fprintf(stderr, "InnoDB: Cannot initialize created log files because\n" "InnoDB: data files were not in sync with each other\n" "InnoDB: or the data files are corrupt.\n"); return(DB_ERROR); } if (ut_dulint_cmp(max_flushed_lsn, ut_dulint_create(0, 1000)) < 0) { fprintf(stderr, "InnoDB: Cannot initialize created log files because\n" "InnoDB: data files are corrupt, or new data files were\n" "InnoDB: created when the database was started previous\n" "InnoDB: time but the database was not shut down\n" "InnoDB: normally after that.\n"); return(DB_ERROR); } mutex_enter(&(log_sys->mutex)); recv_reset_logs(max_flushed_lsn, max_arch_log_no + 1, TRUE); mutex_exit(&(log_sys->mutex)); } if (create_new_db) { mtr_start(&mtr); fsp_header_init(0, sum_of_new_sizes, &mtr); mtr_commit(&mtr); trx_sys_create(); dict_create(); srv_startup_is_before_trx_rollback_phase = FALSE; } else if (srv_archive_recovery) { fprintf(stderr, "InnoDB: Starting archive recovery from a backup...\n"); err = recv_recovery_from_archive_start( min_flushed_lsn, srv_archive_recovery_limit_lsn, min_arch_log_no); if (err != DB_SUCCESS) { return(DB_ERROR); } /* Since ibuf init is in dict_boot, and ibuf is needed in any disk i/o, first call dict_boot */ dict_boot(); trx_sys_init_at_db_start(); srv_startup_is_before_trx_rollback_phase = FALSE; /* Initialize the fsp free limit global variable in the log system */ fsp_header_get_free_limit(0); recv_recovery_from_archive_finish(); } else { /* We always try to do a recovery, even if the database had been shut down normally */ err = recv_recovery_from_checkpoint_start(LOG_CHECKPOINT, ut_dulint_max, min_flushed_lsn, max_flushed_lsn); if (err != DB_SUCCESS) { return(DB_ERROR); } /* Since ibuf init is in dict_boot, and ibuf is needed in any disk i/o, first call dict_boot */ dict_boot(); trx_sys_init_at_db_start(); /* The following needs trx lists which are initialized in trx_sys_init_at_db_start */ srv_startup_is_before_trx_rollback_phase = FALSE; /* Initialize the fsp free limit global variable in the log system */ fsp_header_get_free_limit(0); recv_recovery_from_checkpoint_finish(); } if (!create_new_db && sum_of_new_sizes > 0) { /* New data file(s) were added */ mtr_start(&mtr); fsp_header_inc_size(0, sum_of_new_sizes, &mtr); mtr_commit(&mtr); } if (recv_needed_recovery) { ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Flushing modified pages from the buffer pool...\n"); } log_make_checkpoint_at(ut_dulint_max, TRUE); if (!srv_log_archive_on) { ut_a(DB_SUCCESS == log_archive_noarchivelog()); } else { mutex_enter(&(log_sys->mutex)); start_archive = FALSE; if (log_sys->archiving_state == LOG_ARCH_OFF) { start_archive = TRUE; } mutex_exit(&(log_sys->mutex)); if (start_archive) { ut_a(DB_SUCCESS == log_archive_archivelog()); } } if (srv_measure_contention) { /* os_thread_create(&test_measure_cont, NULL, thread_ids + SRV_MAX_N_IO_THREADS); */ } /* fprintf(stderr, "Max allowed record size %lu\n", page_get_free_space_of_empty() / 2); */ /* Create the thread which watches the timeouts for lock waits and prints InnoDB monitor info */ os_thread_create(&srv_lock_timeout_and_monitor_thread, NULL, thread_ids + 2 + SRV_MAX_N_IO_THREADS); /* Create the thread which warns of long semaphore waits */ os_thread_create(&srv_error_monitor_thread, NULL, thread_ids + 3 + SRV_MAX_N_IO_THREADS); srv_was_started = TRUE; srv_is_being_started = FALSE; sync_order_checks_on = TRUE; if (srv_use_doublewrite_buf && trx_doublewrite == NULL) { trx_sys_create_doublewrite_buf(); } err = dict_create_or_check_foreign_constraint_tables(); if (err != DB_SUCCESS) { return((int)DB_ERROR); } /* Create the master thread which monitors the database server, and does purge and other utility operations */ os_thread_create(&srv_master_thread, NULL, thread_ids + 1 + SRV_MAX_N_IO_THREADS); #ifdef UNIV_DEBUG /* buf_debug_prints = TRUE; */ #endif /* UNIV_DEBUG */ sum_of_data_file_sizes = 0; for (i = 0; i < srv_n_data_files; i++) { sum_of_data_file_sizes += srv_data_file_sizes[i]; } tablespace_size_in_header = fsp_header_get_tablespace_size(0); if (!srv_auto_extend_last_data_file && sum_of_data_file_sizes != tablespace_size_in_header) { fprintf(stderr, "InnoDB: Error: tablespace size stored in header is %lu pages, but\n" "InnoDB: the sum of data file sizes is %lu pages\n", tablespace_size_in_header, sum_of_data_file_sizes); } if (srv_auto_extend_last_data_file && sum_of_data_file_sizes < tablespace_size_in_header) { fprintf(stderr, "InnoDB: Error: tablespace size stored in header is %lu pages, but\n" "InnoDB: the sum of data file sizes is only %lu pages\n", tablespace_size_in_header, sum_of_data_file_sizes); } /* Check that os_fast_mutexes work as exptected */ os_fast_mutex_init(&srv_os_test_mutex); if (0 != os_fast_mutex_trylock(&srv_os_test_mutex)) { fprintf(stderr, "InnoDB: Error: pthread_mutex_trylock returns an unexpected value on\n" "InnoDB: success! Cannot continue.\n"); exit(1); } os_fast_mutex_unlock(&srv_os_test_mutex); os_fast_mutex_lock(&srv_os_test_mutex); os_fast_mutex_unlock(&srv_os_test_mutex); os_fast_mutex_free(&srv_os_test_mutex); /***********************************************************/ /* Do NOT merge to the 4.1 code base! */ if (trx_sys_downgrading_from_4_1_1) { fprintf(stderr, "InnoDB: You are downgrading from an InnoDB version which allows multiple\n" "InnoDB: tablespaces. Wait that purge and insert buffer merge run to\n" "InnoDB: completion...\n"); for (;;) { os_thread_sleep(10000000); if (0 == strcmp(srv_main_thread_op_info, "waiting for server activity")) { break; } } fprintf(stderr, "InnoDB: Full purge and insert buffer merge completed.\n"); trx_sys_mark_downgraded_from_4_1_1(); fprintf(stderr, "InnoDB: Downgraded from >= 4.1.1 to 4.0\n"); } /***********************************************************/ if (srv_print_verbose_log) { ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Started\n"); } if (srv_force_recovery > 0) { fprintf(stderr, "InnoDB: !!! innodb_force_recovery is set to %lu !!!\n", srv_force_recovery); } fflush(stderr); return((int) DB_SUCCESS); } /******************************************************************** Shuts down the InnoDB database. */ int innobase_shutdown_for_mysql(void) /*=============================*/ /* out: DB_SUCCESS or error code */ { ulint i; if (!srv_was_started) { if (srv_is_being_started) { ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Warning: shutting down a not properly started\n"); fprintf(stderr, " InnoDB: or created database!\n"); } return(DB_SUCCESS); } /* 1. Flush buffer pool to disk, write the current lsn to the tablespace header(s), and copy all log data to archive. The step 1 is the real InnoDB shutdown. The remaining steps just free data structures after the shutdown. */ logs_empty_and_mark_files_at_shutdown(); if (srv_conc_n_threads != 0) { fprintf(stderr, "InnoDB: Warning: query counter shows %ld queries still\n" "InnoDB: inside InnoDB at shutdown\n", srv_conc_n_threads); } /* 2. Make all threads created by InnoDB to exit */ srv_shutdown_state = SRV_SHUTDOWN_EXIT_THREADS; /* All threads end up waiting for certain events. Put those events to the signaled state. Then the threads will exit themselves in os_thread_event_wait(). */ for (i = 0; i < 1000; i++) { /* NOTE: IF YOU CREATE THREADS IN INNODB, YOU MUST EXIT THEM HERE OR EARLIER */ /* 1. Let the lock timeout thread exit */ os_event_set(srv_lock_timeout_thread_event); /* 2. srv error monitor thread exits automatically, no need to do anything here */ /* 3. We wake the master thread so that it exits */ srv_wake_master_thread(); /* 4. Exit the i/o threads */ os_aio_wake_all_threads_at_shutdown(); os_mutex_enter(os_sync_mutex); if (os_thread_count == 0) { /* All the threads have exited or are just exiting; NOTE that the threads may not have completed their exit yet. Should we use pthread_join() to make sure they have exited? Now we just sleep 0.1 seconds and hope that is enough! */ os_mutex_exit(os_sync_mutex); os_thread_sleep(100000); break; } os_mutex_exit(os_sync_mutex); os_thread_sleep(100000); } if (i == 1000) { fprintf(stderr, "InnoDB: Warning: %lu threads created by InnoDB had not exited at shutdown!\n", os_thread_count); } if (srv_monitor_file) { fclose(srv_monitor_file); srv_monitor_file = 0; unlink(srv_monitor_file_name); mem_free(srv_monitor_file_name); } mutex_free(&srv_monitor_file_mutex); /* 3. Free all InnoDB's own mutexes and the os_fast_mutexes inside them */ sync_close(); /* 4. Free the os_conc_mutex and all os_events and os_mutexes */ srv_free(); os_sync_free(); /* 5. Free all allocated memory and the os_fast_mutex created in ut0mem.c */ ut_free_all_mem(); if (os_thread_count != 0 || os_event_count != 0 || os_mutex_count != 0 || os_fast_mutex_count != 0) { fprintf(stderr, "InnoDB: Warning: some resources were not cleaned up in shutdown:\n" "InnoDB: threads %lu, events %lu, os_mutexes %lu, os_fast_mutexes %lu\n", os_thread_count, os_event_count, os_mutex_count, os_fast_mutex_count); } if (dict_foreign_err_file) { fclose(dict_foreign_err_file); } if (lock_latest_err_file) { fclose(lock_latest_err_file); } if (srv_print_verbose_log) { ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Shutdown completed\n"); } return((int) DB_SUCCESS); }