From afc9c81292202475bd3d79ce93c0e4bd9aa9f946 Mon Sep 17 00:00:00 2001 From: "heikki@hundin.mysql.fi" <> Date: Sat, 18 Dec 2004 13:04:34 +0200 Subject: [PATCH 1/7] rem0rec.ic: Do not use short int in rem0rec.ic, since its size is not fixed in ANSI C; improve comments of the relative offset field in a record; use mach_read_from_2() to read the relative offset field to save CPU time, if the compiler does not optimize a more complex access function --- innobase/include/rem0rec.ic | 81 +++++++++++++++++++++++++------------ 1 file changed, 56 insertions(+), 25 deletions(-) diff --git a/innobase/include/rem0rec.ic b/innobase/include/rem0rec.ic index 20a97f7e750..6c3dabf04a2 100644 --- a/innobase/include/rem0rec.ic +++ b/innobase/include/rem0rec.ic @@ -40,8 +40,18 @@ most significant bytes and bits are written below less significant. (1) byte offset (2) bit usage within byte downward from - origin -> 1 8 bits pointer to next record (relative) - 2 8 bits pointer to next record (relative) + origin -> 1 8 bits relative offset of next record + 2 8 bits relative offset of next record + the relative offset is an unsigned 16-bit + integer: + (offset_of_next_record + - offset_of_this_record) mod 64Ki, + where mod is the modulo as a non-negative + number; + we can calculate the the offset of the next + record with the formula: + relative_offset + offset_of_this_record + mod UNIV_PAGE_SIZE 3 3 bits status: 000=conventional record 001=node pointer record (inside B-tree) @@ -252,26 +262,37 @@ UNIV_INLINE ulint rec_get_next_offs( /*==============*/ - /* out: the page offset of the next chained record */ + /* out: the page offset of the next chained record, or + 0 if none */ rec_t* rec, /* in: physical record */ ibool comp) /* in: TRUE=compact page format */ { + ulint field_value; + + ut_ad(REC_NEXT_MASK == 0xFFFFUL); + ut_ad(REC_NEXT_SHIFT == 0); + + field_value = mach_read_from_2(rec - REC_NEXT); + if (comp) { - lint ret = (signed short) rec_get_bit_field_2(rec, REC_NEXT, - REC_NEXT_MASK, REC_NEXT_SHIFT); #if UNIV_PAGE_SIZE <= 32768 - /* with 64 KiB page size, the pointer will "wrap around", - and the following assertions are invalid */ - ut_ad(ret + ut_align_offset(rec, UNIV_PAGE_SIZE) < - UNIV_PAGE_SIZE); + /* Note that for 64 KiB pages, field_value can 'wrap around' + and the debug assertion is not valid */ + + ut_ad((int16_t)field_value + + ut_align_offset(rec, UNIV_PAGE_SIZE) + < UNIV_PAGE_SIZE); #endif - return(ret ? ut_align_offset(rec + ret, UNIV_PAGE_SIZE) : 0); - } - else { - ulint ret = rec_get_bit_field_2(rec, REC_NEXT, - REC_NEXT_MASK, REC_NEXT_SHIFT); - ut_ad(ret < UNIV_PAGE_SIZE); - return(ret); + if (field_value == 0) { + + return(0); + } + + return(ut_align_offset(rec + field_value, UNIV_PAGE_SIZE)); + } else { + ut_ad(field_value < UNIV_PAGE_SIZE); + + return(field_value); } } @@ -284,21 +305,31 @@ rec_set_next_offs( /*==============*/ rec_t* rec, /* in: physical record */ ibool comp, /* in: TRUE=compact page format */ - ulint next) /* in: offset of the next record */ + ulint next) /* in: offset of the next record, or 0 if none */ { ut_ad(rec); ut_ad(UNIV_PAGE_SIZE > next); + ut_ad(REC_NEXT_MASK == 0xFFFFUL); + ut_ad(REC_NEXT_SHIFT == 0); if (comp) { - rec_set_bit_field_2(rec, next - ? (next - ut_align_offset(rec, UNIV_PAGE_SIZE)) -#ifdef UNIV_DEBUG /* avoid an assertion failure */ - & (REC_NEXT_MASK >> REC_NEXT_SHIFT) -#endif - : 0, REC_NEXT, REC_NEXT_MASK, REC_NEXT_SHIFT); + ulint field_value; + + if (next) { + /* The following two statements calculate + next - offset_of_rec mod 64Ki, where mod is the modulo + as a non-negative number */ + + field_value = (ulint)((lint)next + - (lint)ut_align_offset(rec, UNIV_PAGE_SIZE)); + field_value &= REC_NEXT_MASK; + } else { + field_value = 0; + } + + mach_write_to_2(rec - REC_NEXT, field_value); } else { - rec_set_bit_field_2(rec, next, - REC_NEXT, REC_NEXT_MASK, REC_NEXT_SHIFT); + mach_write_to_2(rec - REC_NEXT, next); } } From 75b6b7ed70137ef7aaaae7612bf6650235248a55 Mon Sep 17 00:00:00 2001 From: "heikki@hundin.mysql.fi" <> Date: Sat, 18 Dec 2004 18:20:23 +0200 Subject: [PATCH 2/7] trx0roll.c: Print progress of background rollback of transactions with more than 1000 undo log entries srv0start.c, trx0roll.c, log0recv.c, trx0roll.h: Cleanup background rollback code in crash recovery; do not flush all modified pages from the buffer pool after a crash recovery: this makes mysqld accesible for users more quickly --- innobase/include/trx0roll.h | 16 +++++++--- innobase/log/log0recv.c | 1 - innobase/srv/srv0start.c | 12 ++++---- innobase/trx/trx0roll.c | 58 +++++++++++++++++++++++++------------ 4 files changed, 57 insertions(+), 30 deletions(-) diff --git a/innobase/include/trx0roll.h b/innobase/include/trx0roll.h index 893e5af6c01..9d025da4a5f 100644 --- a/innobase/include/trx0roll.h +++ b/innobase/include/trx0roll.h @@ -105,11 +105,19 @@ trx_rollback( Rollback or clean up transactions which have no user session. If the transaction already was committed, then we clean up a possible insert undo log. If the transaction was not yet committed, then we roll it back. -Note: this is done in a background thread */ +Note: this is done in a background thread. */ -void * -trx_rollback_or_clean_all_without_sess(void *); -/*============================================*/ +#ifndef __WIN__ +void* +#else +ulint +#endif +trx_rollback_or_clean_all_without_sess( +/*===================================*/ + /* out: a dummy parameter */ + void* arg __attribute__((unused))); + /* in: a dummy parameter required by + os_thread_create */ /******************************************************************** Finishes a transaction rollback. */ diff --git a/innobase/log/log0recv.c b/innobase/log/log0recv.c index f42f0eb8c72..5eefd32c8a6 100644 --- a/innobase/log/log0recv.c +++ b/innobase/log/log0recv.c @@ -2937,7 +2937,6 @@ recv_recovery_from_checkpoint_finish(void) #ifndef UNIV_LOG_DEBUG recv_sys_free(); #endif - if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) { os_thread_create(trx_rollback_or_clean_all_without_sess, (void *)&i, &recovery_thread_id); diff --git a/innobase/srv/srv0start.c b/innobase/srv/srv0start.c index 69341a1d7d1..a0e763d7a44 100644 --- a/innobase/srv/srv0start.c +++ b/innobase/srv/srv0start.c @@ -1403,15 +1403,13 @@ NetWare. */ fsp_header_inc_size(0, sum_of_new_sizes, &mtr); mtr_commit(&mtr); - } - if (recv_needed_recovery) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Flushing modified pages from the buffer pool...\n"); - } + /* Immediately write the log record about increased tablespace + size to disk, so that it is durable even if mysqld would crash + quickly */ - log_make_checkpoint_at(ut_dulint_max, TRUE); + log_buffer_flush_to_disk(); + } #ifdef UNIV_LOG_ARCHIVE /* Archiving is always off under MySQL */ diff --git a/innobase/trx/trx0roll.c b/innobase/trx/trx0roll.c index db5e16c7778..ca286997598 100644 --- a/innobase/trx/trx0roll.c +++ b/innobase/trx/trx0roll.c @@ -332,11 +332,19 @@ trx_savept_take( Rollback or clean up transactions which have no user session. If the transaction already was committed, then we clean up a possible insert undo log. If the transaction was not yet committed, then we roll it back. -Note: this is done in a background thread */ +Note: this is done in a background thread. */ -void * -trx_rollback_or_clean_all_without_sess(void *i) -/*========================================*/ +#ifndef __WIN__ +void* +#else +ulint +#endif +trx_rollback_or_clean_all_without_sess( +/*===================================*/ + /* out: a dummy parameter */ + void* arg __attribute__((unused))) + /* in: a dummy parameter required by + os_thread_create */ { mem_heap_t* heap; que_fork_t* fork; @@ -361,9 +369,9 @@ trx_rollback_or_clean_all_without_sess(void *i) if (UT_LIST_GET_FIRST(trx_sys->trx_list)) { fprintf(stderr, - "InnoDB: Starting rollback of uncommitted transactions\n"); +"InnoDB: Starting in background the rollback of uncommitted transactions\n"); } else { - os_thread_exit(i); + goto leave_function; } loop: heap = mem_heap_create(512); @@ -373,7 +381,6 @@ loop: trx = UT_LIST_GET_FIRST(trx_sys->trx_list); while (trx) { - if ((trx->sess || (trx->conc_state == TRX_NOT_STARTED))) { trx = UT_LIST_GET_NEXT(trx_list, trx); } else if (trx->conc_state == TRX_PREPARED) { @@ -386,17 +393,17 @@ loop: mutex_exit(&kernel_mutex); if (trx == NULL) { + ut_print_timestamp(stderr); fprintf(stderr, - "InnoDB: Rollback of uncommitted transactions completed\n"); + " InnoDB: Rollback of uncommitted transactions completed\n"); mem_heap_free(heap); - - os_thread_exit(i); + + goto leave_function; } trx->sess = trx_dummy_sess; - if (trx->conc_state == TRX_COMMITTED_IN_MEMORY) { fprintf(stderr, "InnoDB: Cleaning up trx with id %lu %lu\n", (ulong) ut_dulint_get_high(trx->id), @@ -428,13 +435,15 @@ loop: trx_roll_max_undo_no = ut_conv_dulint_to_longlong(trx->undo_no); trx_roll_progress_printed_pct = 0; rows_to_undo = trx_roll_max_undo_no; + if (rows_to_undo > 1000000000) { rows_to_undo = rows_to_undo / 1000000; unit = "M"; } + ut_print_timestamp(stderr); fprintf(stderr, -"InnoDB: Rolling back trx with id %lu %lu, %lu%s rows to undo", +" InnoDB: Rolling back trx with id %lu %lu, %lu%s rows to undo", (ulong) ut_dulint_get_high(trx->id), (ulong) ut_dulint_get_low(trx->id), (ulong) rows_to_undo, unit); @@ -454,7 +463,7 @@ loop: fprintf(stderr, "InnoDB: Waiting for rollback of trx id %lu to end\n", - (ulong) ut_dulint_get_low(trx->id)); + (ulong) ut_dulint_get_low(trx->id)); os_thread_sleep(100000); mutex_enter(&kernel_mutex); @@ -495,7 +504,19 @@ loop: goto loop; - os_thread_exit(i); /* not reached */ +leave_function: + /* We count the number of threads in os_thread_exit(). A created + thread should always use that to exit and not use return() to exit. */ + + os_thread_exit(NULL); + + /* The following is dummy code to keep the compiler happy: */ + +#ifndef __WIN__ + return(NULL); +#else + return(0); +#endif } /*********************************************************************** @@ -856,16 +877,17 @@ try_again: ut_ad(ut_dulint_cmp(ut_dulint_add(undo_no, 1), trx->undo_no) == 0); /* We print rollback progress info if we are in a crash recovery - and the transaction has at least 1000 row operations to undo */ + and the transaction has at least 1000 row operations to undo. + Transactions in crash recovery have sess == NULL. */ - if (srv_is_being_started && trx_roll_max_undo_no > 1000) { - progress_pct = 100 - (ulint) + if (trx->sess == NULL && trx_roll_max_undo_no > 1000) { + progress_pct = 100 - (ulint) ((ut_conv_dulint_to_longlong(undo_no) * 100) / trx_roll_max_undo_no); if (progress_pct != trx_roll_progress_printed_pct) { if (trx_roll_progress_printed_pct == 0) { fprintf(stderr, - "\nInnoDB: Progress in percents: %lu", (ulong) progress_pct); +"\nInnoDB: Progress in percents: %lu", (ulong) progress_pct); } else { fprintf(stderr, " %lu", (ulong) progress_pct); From 2a072721c33b9af8468f0c223752e3f4df9b1cf3 Mon Sep 17 00:00:00 2001 From: "heikki@hundin.mysql.fi" <> Date: Sat, 18 Dec 2004 18:44:03 +0200 Subject: [PATCH 3/7] trx0roll.c: Fix the rollback progress printout in crash recovery --- innobase/trx/trx0roll.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/innobase/trx/trx0roll.c b/innobase/trx/trx0roll.c index ca286997598..ddb1240339d 100644 --- a/innobase/trx/trx0roll.c +++ b/innobase/trx/trx0roll.c @@ -30,9 +30,13 @@ Created 3/26/1996 Heikki Tuuri /* This many pages must be undone before a truncate is tried within rollback */ #define TRX_ROLL_TRUNC_THRESHOLD 1 +/* In crash recovery, the current trx to be rolled back */ +trx_t* trx_roll_crash_recv_trx = NULL; + /* In crash recovery we set this to the undo n:o of the current trx to be rolled back. Then we can print how many % the rollback has progressed. */ ib_longlong trx_roll_max_undo_no; + /* Auxiliary variable which tells the previous progress % we printed */ ulint trx_roll_progress_printed_pct; @@ -432,6 +436,7 @@ loop: ut_a(thr == que_fork_start_command(fork)); + trx_roll_crash_recv_trx = trx; trx_roll_max_undo_no = ut_conv_dulint_to_longlong(trx->undo_no); trx_roll_progress_printed_pct = 0; rows_to_undo = trx_roll_max_undo_no; @@ -443,7 +448,7 @@ loop: ut_print_timestamp(stderr); fprintf(stderr, -" InnoDB: Rolling back trx with id %lu %lu, %lu%s rows to undo", +" InnoDB: Rolling back trx with id %lu %lu, %lu%s rows to undo\n", (ulong) ut_dulint_get_high(trx->id), (ulong) ut_dulint_get_low(trx->id), (ulong) rows_to_undo, unit); @@ -502,6 +507,8 @@ loop: (ulong) ut_dulint_get_low(trx->id)); mem_heap_free(heap); + trx_roll_crash_recv_trx = NULL; + goto loop; leave_function: @@ -877,17 +884,17 @@ try_again: ut_ad(ut_dulint_cmp(ut_dulint_add(undo_no, 1), trx->undo_no) == 0); /* We print rollback progress info if we are in a crash recovery - and the transaction has at least 1000 row operations to undo. - Transactions in crash recovery have sess == NULL. */ + and the transaction has at least 1000 row operations to undo. */ + + if (trx == trx_roll_crash_recv_trx && trx_roll_max_undo_no > 1000) { - if (trx->sess == NULL && trx_roll_max_undo_no > 1000) { progress_pct = 100 - (ulint) ((ut_conv_dulint_to_longlong(undo_no) * 100) / trx_roll_max_undo_no); if (progress_pct != trx_roll_progress_printed_pct) { if (trx_roll_progress_printed_pct == 0) { fprintf(stderr, -"\nInnoDB: Progress in percents: %lu", (ulong) progress_pct); +"\nInnoDB: Progress in percents: %lu\n", (ulong) progress_pct); } else { fprintf(stderr, " %lu", (ulong) progress_pct); From 7d68f2e40d7fd7e7988326eaa97d37c1332ba42a Mon Sep 17 00:00:00 2001 From: "heikki@hundin.mysql.fi" <> Date: Sat, 18 Dec 2004 19:52:43 +0200 Subject: [PATCH 4/7] trx0roll.c: Put a sensible value of thread id and process no to a background rollback transaction --- innobase/trx/trx0roll.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/innobase/trx/trx0roll.c b/innobase/trx/trx0roll.c index ddb1240339d..e5cffd2a4f3 100644 --- a/innobase/trx/trx0roll.c +++ b/innobase/trx/trx0roll.c @@ -454,6 +454,10 @@ loop: (ulong) rows_to_undo, unit); mutex_exit(&kernel_mutex); + trx->mysql_thread_id = os_thread_get_curr_id(); + + trx->mysql_process_no = os_proc_get_number(); + if (trx->dict_operation) { row_mysql_lock_data_dictionary(trx); } From 52666fca26e2270010db6d5e6693fe427a00317e Mon Sep 17 00:00:00 2001 From: "petr@mysql.com" <> Date: Sun, 19 Dec 2004 03:07:05 +0300 Subject: [PATCH 5/7] fix for Bug#7386 - IM fails to compile on alpha with Compaq C++ compiler --- server-tools/instance-manager/mysql_connection.cc | 4 +++- server-tools/instance-manager/priv.cc | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/server-tools/instance-manager/mysql_connection.cc b/server-tools/instance-manager/mysql_connection.cc index 9365edc7b91..83b046c1e5b 100644 --- a/server-tools/instance-manager/mysql_connection.cc +++ b/server-tools/instance-manager/mysql_connection.cc @@ -191,9 +191,11 @@ void Mysql_connection_thread::run() int Mysql_connection_thread::check_connection() { ulong pkt_len=0; // to hold client reply length + /* maximum size of the version string */ + enum { MAX_VERSION_LENGTH= 80 }; /* buffer for the first packet */ /* packet contains: */ - char buff[mysqlmanager_version_length + 1 + // server version, 0-ended + char buff[MAX_VERSION_LENGTH + 1 + // server version, 0-ended 4 + // connection id SCRAMBLE_LENGTH + 2 + // scramble (in 2 pieces) 18]; // server variables: flags, diff --git a/server-tools/instance-manager/priv.cc b/server-tools/instance-manager/priv.cc index 8112ebd41d8..4b47fe5b593 100644 --- a/server-tools/instance-manager/priv.cc +++ b/server-tools/instance-manager/priv.cc @@ -16,6 +16,10 @@ #include "priv.h" +/* + The following string must be less then 80 characters, as + mysql_connection.cc relies on it +*/ const char mysqlmanager_version[] = "0.2-alpha"; const int mysqlmanager_version_length= sizeof(mysqlmanager_version) - 1; From 48fcadf358cad7e12aa540d4187ffb9733cd2100 Mon Sep 17 00:00:00 2001 From: "dlenev@brandersnatch.localdomain" <> Date: Sun, 19 Dec 2004 12:51:40 +0300 Subject: [PATCH 6/7] Small cleanup of derived tables handling. We should not call free_tmp_table() for derived table in mysql_derived_filling(), since by this moment this table is already registered in THD::derived_tables list and thus free_tmp_table() will be called for it automatically in close_thread_tables(). --- sql/sql_derived.cc | 5 ----- 1 file changed, 5 deletions(-) diff --git a/sql/sql_derived.cc b/sql/sql_derived.cc index 7cea1c6fcee..1d4b911bb65 100644 --- a/sql/sql_derived.cc +++ b/sql/sql_derived.cc @@ -268,13 +268,8 @@ int mysql_derived_filling(THD *thd, LEX *lex, TABLE_LIST *orig_table_list) unit->cleanup(); } else - { - free_tmp_table(thd, table); unit->cleanup(); - } lex->current_select= save_current_select; - if (res) - free_tmp_table(thd, table); } return res; } From 8bebebf82688496563902cb957baad5467804ac0 Mon Sep 17 00:00:00 2001 From: "kaa@polly.local" <> Date: Sun, 19 Dec 2004 12:57:34 +0300 Subject: [PATCH 7/7] Fixed cut&paste bug that broke compilation with compile-pentium-valgrind-max --- innobase/os/os0proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/innobase/os/os0proc.c b/innobase/os/os0proc.c index 98254ae1055..dd2037695b7 100644 --- a/innobase/os/os0proc.c +++ b/innobase/os/os0proc.c @@ -565,7 +565,7 @@ os_mem_alloc_large( if (ptr) { if (set_to_zero) { #ifdef UNIV_SET_MEM_TO_ZERO - memset(ret, '\0', size); + memset(ptr, '\0', size); #endif }