mariadb/log/log0recv.c
marko 509e761f06 branches/innodb+: Merge revisions 4660:5090 from branches/zip:
------------------------------------------------------------------------
  r4670 | vasil | 2009-04-07 09:35:23 +0300 (Tue, 07 Apr 2009) | 11 lines

  branches/zip:

  Fix Bug#43660 SHOW INDEXES/ANALYZE does NOT update cardinality for
  indexes of InnoDB table

  by replacing the pseudo random number generator with a better one (LCG).

  This also fixes Mantis Issue#212.

  Approved by:	Heikki (rb://110)
  ------------------------------------------------------------------------
  r4671 | vasil | 2009-04-07 09:37:31 +0300 (Tue, 07 Apr 2009) | 4 lines

  branches/zip:

  Add ChangeLog entry for r4670.
  ------------------------------------------------------------------------
  r4673 | marko | 2009-04-07 15:45:28 +0300 (Tue, 07 Apr 2009) | 4 lines

  branches/zip: Allow in-place updates of UTF-8 CHAR columns
  from or to NULL in ROW_FORMAT=REDUNDANT. (Bug #44032)

  rb://107 approved by Heikki Tuuri.
  ------------------------------------------------------------------------
  r4677 | marko | 2009-04-07 16:19:31 +0300 (Tue, 07 Apr 2009) | 1 line

  branches/zip: Adjust r4673 as in the merge to branches/6.0 -r4676.
  ------------------------------------------------------------------------
  r4678 | inaam | 2009-04-07 18:45:37 +0300 (Tue, 07 Apr 2009) | 12 lines

  branches/zip

  Enable atomics on solaris (using the libc functions as defined in
  atomic.h) if GCC atomic builtins are not present.

  There still remains some work to be done (by Vasil?). This patch
  makes changes to plug.in to check pthread_t size and presence of
  atomic functions when running on solaris. The same has to become
  a part of the generated Makefile.in when we bake our source.

  Reviewed by: Heikki rb://106
  ------------------------------------------------------------------------
  r4687 | vasil | 2009-04-08 13:08:59 +0300 (Wed, 08 Apr 2009) | 4 lines

  branches/zip:

  Whitespace fixup in the ChangeLog
  ------------------------------------------------------------------------
  r4688 | vasil | 2009-04-08 13:11:15 +0300 (Wed, 08 Apr 2009) | 4 lines

  branches/zip:

  Add ChangeLog entry for r4678.
  ------------------------------------------------------------------------
  r4689 | marko | 2009-04-08 14:24:49 +0300 (Wed, 08 Apr 2009) | 5 lines

  branches/zip: Hide unnecessarily visible globals.
  dict_ind_redundant, dict_ind_compact: Declare these UNIV_INTERN.
  innodb_hton_ptr: Declare static.  We do not attempt to access the
  built-in InnoDB any more.
  trx_roll_savepoints_free(): Declare UNIV_INTERN.
  ------------------------------------------------------------------------
  r4700 | calvin | 2009-04-11 00:37:10 +0300 (Sat, 11 Apr 2009) | 9 lines

  branches/zip: Rewrite CMakeLists.txt

  CMakeLists.txt is completely rewritten:
  - To replace the one written by mysql
  - Print out some useful information, such as
    system name, directory, generator used,
    win64, Microsoft compiler, etc.
  - Remove one workaround for mysqld.lib location.
    User does not need to specify a build type
  ------------------------------------------------------------------------
  r4702 | calvin | 2009-04-13 18:16:44 +0300 (Mon, 13 Apr 2009) | 3 lines

  branches/zip: delete the original CMakeLists.txt

  A new version will be committed, suggested by Ken.
  ------------------------------------------------------------------------
  r4703 | calvin | 2009-04-13 18:20:45 +0300 (Mon, 13 Apr 2009) | 9 lines

  branches/zip: new CMakeLists.txt

  CMakeLists.txt is completely rewritten with enhancements:
  - Print out useful information, such as
    system name, directory, generator used,
    win64, Microsoft compiler, etc.
  - Remove one workaround for mysqld.lib location.
    User does not need to specify a build type
    when invoking MSVC generator.
  ------------------------------------------------------------------------
  r4706 | vasil | 2009-04-14 14:32:11 +0300 (Tue, 14 Apr 2009) | 5 lines

  branches/zip:

  When using the random function, first take the modulus by the number of pages
  and then typecast to ulint.
  ------------------------------------------------------------------------
  r4707 | calvin | 2009-04-14 17:47:31 +0300 (Tue, 14 Apr 2009) | 13 lines

  branches/zip: remove statically linked libraries from mysql

  To make zlib and strings dynamically linked; mysqld will export
  additional functions required by InnoDB.

  Since the symbols will be resolved dynamically during runtime,
  wdl_load_mapfile() is no longer able to make any function calls
  to ones in mysqld. As the result, strtoull() (from strings.lib)
  is replaced with _strtoui64().

  rb://111

  Approved by: Marko
  ------------------------------------------------------------------------
  r4712 | vasil | 2009-04-15 12:26:32 +0300 (Wed, 15 Apr 2009) | 157 lines

  branches/zip: Merge revisions 4481:4710 from branches/5.1:

  (resolving conflict in r4574, r4575 and skipping r4699 and r4705 because
  analogous changes to r4699 and r4705 were already made to branches/zip)

    ------------------------------------------------------------------------
    r4573 | vasil | 2009-03-30 14:17:13 +0300 (Mon, 30 Mar 2009) | 4 lines
    Changed paths:
       M /branches/5.1/mysql-test/innodb.test

    branches/5.1:

    Fix email address from dev@innodb.com to innodb_dev_ww@oracle.com

    ------------------------------------------------------------------------
    r4574 | vasil | 2009-03-30 14:27:08 +0300 (Mon, 30 Mar 2009) | 38 lines
    Changed paths:
       M /branches/5.1/Makefile.am
       M /branches/5.1/mysql-test/innodb.test

    branches/5.1:

    Restore the state of INNODB_THREAD_CONCURRENCY to silence this warning:

      TEST                                      RESULT   TIME (ms)
      ------------------------------------------------------------

      worker[1] Using MTR_BUILD_THREAD 250, with reserved ports 12500..12509
      main.innodb                              [ pass ]   8803

      MTR's internal check of the test case 'main.innodb' failed.
      This means that the test case does not preserve the state that existed
      before the test case was executed.  Most likely the test case did not
      do a proper clean-up.
      This is the diff of the states of the servers before and after the
      test case was executed:
      mysqltest: Logging to '/tmp/autotest.sh-20090330_033000-5.1.5Hg8CY/mysql-5.1/mysql-test/var/tmp/check-mysqld_1.log'.
      mysqltest: Results saved in '/tmp/autotest.sh-20090330_033000-5.1.5Hg8CY/mysql-5.1/mysql-test/var/tmp/check-mysqld_1.result'.
      mysqltest: Connecting to server localhost:12500 (socket /tmp/autotest.sh-20090330_033000-5.1.5Hg8CY/mysql-5.1/mysql-test/var/tmp/mysqld.1.sock) as 'root', connection 'default', attempt 0 ...
      mysqltest: ... Connected.
      mysqltest: Start processing test commands from './include/check-testcase.test' ...
      mysqltest: ... Done processing test commands.
      --- /tmp/autotest.sh-20090330_033000-5.1.5Hg8CY/mysql-5.1/mysql-test/var/tmp/check-mysqld_1.result	2009-03-30 14:12:31.000000000 +0300
      +++ /tmp/autotest.sh-20090330_033000-5.1.5Hg8CY/mysql-5.1/mysql-test/var/tmp/check-mysqld_1.reject	2009-03-30 14:12:41.000000000 +0300
      @@ -99,7 +99,7 @@
       INNODB_SUPPORT_XA	ON
       INNODB_SYNC_SPIN_LOOPS	20
       INNODB_TABLE_LOCKS	ON
      -INNODB_THREAD_CONCURRENCY	8
      +INNODB_THREAD_CONCURRENCY	16
       INNODB_THREAD_SLEEP_DELAY	10000
       INSERT_ID	0
       INTERACTIVE_TIMEOUT	28800

      mysqltest: Result content mismatch

      not ok
    ------------------------------------------------------------------------
    r4575 | vasil | 2009-03-30 15:55:31 +0300 (Mon, 30 Mar 2009) | 8 lines
    Changed paths:
       M /branches/5.1/mysql-test/innodb.result
       M /branches/5.1/mysql-test/innodb.test

    branches/5.1:

    Fix Bug#43309 Test main.innodb can't be run twice

    Make the innodb mysql-test more flexible by inspecting how much a
    variable of interest has changed since the start of the test. Do not
    assume the variables have zero values at the start of the test.
    ------------------------------------------------------------------------
    r4576 | vasil | 2009-03-30 16:25:10 +0300 (Mon, 30 Mar 2009) | 4 lines
    Changed paths:
       M /branches/5.1/Makefile.am

    branches/5.1:

    Revert a change to Makefile.am that I committed accidentally in c4574.
    ------------------------------------------------------------------------
    r4659 | vasil | 2009-04-06 15:34:51 +0300 (Mon, 06 Apr 2009) | 6 lines
    Changed paths:
       M /branches/5.1/mysql-test/innodb.test

    branches/5.1:

    Followup to r4575 and the fix of Bug#43309 Test main.innodb can't be run twice:

    Add an explanatory comment, as suggested by Patrick Crews in the bug report.
    ------------------------------------------------------------------------
    r4699 | vasil | 2009-04-09 14:01:52 +0300 (Thu, 09 Apr 2009) | 15 lines
    Changed paths:
       M /branches/5.1/handler/ha_innodb.cc
       M /branches/5.1/include/srv0srv.h
       M /branches/5.1/page/page0cur.c
       M /branches/5.1/srv/srv0srv.c

    branches/5.1:

    Fix Bug#43660 SHOW INDEXES/ANALYZE does NOT update cardinality for indexes
    of InnoDB table

    by replacing the PRNG that is used to pick random pages with a better
    one.

    This is based on r4670 but also adds a new configuration option and
    enables the fix only if this option is changed. Please skip the present
    revision when merging.

    Approved by:	Heikki (via email)
    ------------------------------------------------------------------------
    r4705 | vasil | 2009-04-14 14:30:13 +0300 (Tue, 14 Apr 2009) | 5 lines
    Changed paths:
       M /branches/5.1/page/page0cur.c

    branches/5.1:

    When using the random function, first take the modulus by the number of pages
    and then typecast to ulint.

    ------------------------------------------------------------------------
    r4710 | vasil | 2009-04-15 11:55:18 +0300 (Wed, 15 Apr 2009) | 25 lines
    Changed paths:
       M /branches/5.1/handler/ha_innodb.cc

    branches/5.1:

    Merge a change from MySQL (looks like this is against 5.0 but they later
    merged it to 5.1):

      ------------------------------------------------------------
      revno: 1810.3846.1
      committer: Alexey Botchkov <holyfoot@mysql.com>
      branch nick: 31435
      timestamp: Tue 2008-11-11 14:42:32 +0400
      message:
        Bug#31435 ha_innodb.cc:3983: ulint convert_search_mode_to_innobase(ha_rkey_function): Asse 
            I think we don't need to issue an error statement in the convert_search_mode_to_innobase().
            Returning the PAGE_CUR_UNSUPP value is enough as allows to handle this
            case depending on the requirements.

        per-file comments:
          sql/ha_innodb.cc 
        Bug#31435 ha_innodb.cc:3983: ulint convert_search_mode_to_innobase(ha_rkey_function): Asse 
             no error issued in convert_search_mode_to_innobase.
             ha_innobase::records_in_range() returns HA_POS_ERROR if search mode isn't supported.
      modified:
        sql/ha_innodb.cc
    ------------------------------------------------------------------------
  ------------------------------------------------------------------------
  r4713 | vasil | 2009-04-15 12:36:16 +0300 (Wed, 15 Apr 2009) | 4 lines

  branches/zip:

  Add missing ChangeLog entries
  ------------------------------------------------------------------------
  r4714 | vasil | 2009-04-15 12:36:57 +0300 (Wed, 15 Apr 2009) | 4 lines

  branches/zip:

  Fix typo in the ChangeLog
  ------------------------------------------------------------------------
  r4715 | vasil | 2009-04-15 12:39:04 +0300 (Wed, 15 Apr 2009) | 4 lines

  branches/zip:

  Whitespace cleanup in ChangeLog
  ------------------------------------------------------------------------
  r4716 | vasil | 2009-04-15 21:36:06 +0300 (Wed, 15 Apr 2009) | 4 lines

  branches/zip:

  Add ChangeLog entry for r4543.
  ------------------------------------------------------------------------
  r4717 | calvin | 2009-04-16 01:22:35 +0300 (Thu, 16 Apr 2009) | 18 lines

  branches/zip: Use the Windows Interlocked functions for atomic memory
  access

  Mapping the atomic operations to Windows Interlocked functions:

  os_compare_and_swap_* to InterlockedCompareExchange(64)
  os_atomic_increment_* to InterlockedExchangeAdd(64)
  os_atomic_test_and_set_byte to InterlockedExchange

  In this patch, the legacy code under UNIV_CAN_USE_X86_ASSEMBLER is
  removed all together, and add HAVE_WINDOWS_ATOMICS and
  INNODB_RW_LOCKS_USE_ATOMICS to CMakeLists.txt

  This is to address mantis issue#194.

  rb://113

  Approved by: Marko
  ------------------------------------------------------------------------
  r4720 | vasil | 2009-04-16 09:44:48 +0300 (Thu, 16 Apr 2009) | 4 lines

  branches/zip:

  Add ChangeLog entry for r4717.
  ------------------------------------------------------------------------
  r4721 | marko | 2009-04-16 10:32:09 +0300 (Thu, 16 Apr 2009) | 2 lines

  branches/zip: row_scan_and_check_index(): Initialize prebuilt->index_usable.
  This should have been done in r4631.  Spotted by Michael.
  ------------------------------------------------------------------------
  r4728 | marko | 2009-04-16 16:02:27 +0300 (Thu, 16 Apr 2009) | 3 lines

  branches/zip: univ.i: Define REFMAN as the base URL of the
  MySQL Reference Manual and use it in every string.
  This fixes Issue #221.
  ------------------------------------------------------------------------
  r4733 | calvin | 2009-04-17 08:13:20 +0300 (Fri, 17 Apr 2009) | 6 lines

  branches/zip: minor changes to CMakeLists.txt

  All are non-functional changes:
  - should check for long (not int), spotted by Sunny
  - comment out the project definition, avoiding to generate another
    .sln file.
  ------------------------------------------------------------------------
  r4748 | vasil | 2009-04-18 00:50:09 +0300 (Sat, 18 Apr 2009) | 118 lines

  branches/zip: Merge revisions 4710:4746 from branches/5.1:

    ------------------------------------------------------------------------
    r4746 | vasil | 2009-04-18 00:32:08 +0300 (Sat, 18 Apr 2009) | 110 lines
    Changed paths:
       M /branches/5.1/handler/ha_innodb.cc
       M /branches/5.1/include/pars0pars.h

    branches/5.1:

    Merge a change from MySQL:

      ------------------------------------------------------------
      revno: 2728.10.2
      committer: Ignacio Galarza <iggy@mysql.com>
      branch nick: mysql-5.1-bugteam-bug29125
      timestamp: Fri 2009-02-13 11:41:47 -0500
      message:
        Bug#29125 Windows Server X64: so many compiler warnings
        - Remove bothersome warning messages.  This change focuses on the warnings 
        that are covered by the ignore file: support-files/compiler_warnings.supp.
        - Strings are guaranteed to be max uint in length
      modified:
        client/mysql_upgrade.c
        client/mysqladmin.cc
        client/mysqlbinlog.cc
        client/mysqlcheck.c
        client/mysqldump.c
        client/mysqlslap.c
        client/mysqltest.cc
        client/sql_string.cc
        extra/comp_err.c
        extra/yassl/src/buffer.cpp
        extra/yassl/taocrypt/include/block.hpp
        extra/yassl/taocrypt/src/algebra.cpp
        extra/yassl/taocrypt/src/asn.cpp
        include/config-win.h
        libmysql/libmysql.c
        mysys/array.c
        mysys/base64.c
        mysys/charset.c
        mysys/checksum.c
        mysys/default.c
        mysys/default_modify.c
        mysys/hash.c
        mysys/mf_keycache.c
        mysys/mf_tempdir.c
        mysys/my_append.c
        mysys/my_compress.c
        mysys/my_conio.c
        mysys/my_copy.c
        mysys/my_getwd.c
        mysys/my_pread.c
        mysys/my_quick.c
        mysys/my_read.c
        mysys/safemalloc.c
        mysys/string.c
        server-tools/instance-manager/buffer.cc
        server-tools/instance-manager/instance.cc
        server-tools/instance-manager/options.cc
        server-tools/instance-manager/parse.h
        sql-common/client.c
        sql-common/my_user.c
        sql/event_data_objects.cc
        sql/event_parse_data.cc
        sql/events.cc
        sql/gen_lex_hash.cc
        sql/item.h
        sql/item_func.cc
        sql/item_strfunc.cc
        sql/item_timefunc.cc
        sql/lock.cc
        sql/log_event.cc
        sql/log_event.h
        sql/log_event_old.cc
        sql/net_serv.cc
        sql/sp_head.h
        sql/spatial.h
        sql/sql_class.h
        sql/sql_connect.cc
        sql/sql_crypt.cc
        sql/sql_error.cc
        sql/sql_insert.cc
        sql/sql_lex.cc
        sql/sql_lex.h
        sql/sql_load.cc
        sql/sql_prepare.cc
        sql/sql_profile.cc
        sql/sql_repl.cc
        sql/sql_servers.cc
        sql/sql_string.cc
        sql/sql_table.cc
        sql/sql_trigger.cc
        sql/sql_udf.cc
        sql/sql_view.cc
        sql/udf_example.c
        sql/uniques.cc
        storage/archive/azio.c
        storage/archive/azlib.h
        storage/csv/ha_tina.cc
        storage/csv/ha_tina.h
        storage/csv/transparent_file.h
        storage/federated/ha_federated.cc
        storage/federated/ha_federated.h
        storage/heap/hp_write.c
        storage/innobase/handler/ha_innodb.cc
        storage/innobase/include/pars0pars.h
        storage/myisam/ha_myisam.cc
        storage/myisam/mi_check.c
        storage/myisam/mi_packrec.c
        storage/myisam/mi_search.c
        storage/myisam/rt_index.c
        storage/myisammrg/ha_myisammrg.cc
        strings/ctype.c
        strings/my_vsnprintf.c
        tests/bug25714.c
        tests/mysql_client_test.c
  ------------------------------------------------------------------------
  r4749 | vasil | 2009-04-18 00:58:08 +0300 (Sat, 18 Apr 2009) | 4 lines

  branches/zip:

  Add ChangeLog entry for t4748.
  ------------------------------------------------------------------------
  r4751 | vasil | 2009-04-18 01:29:16 +0300 (Sat, 18 Apr 2009) | 4 lines

  branches/zip:

  Silence warning about unused variables.
  ------------------------------------------------------------------------
  r4752 | vasil | 2009-04-18 01:30:37 +0300 (Sat, 18 Apr 2009) | 4 lines

  branches/zip:

  Include the needed header for memset().
  ------------------------------------------------------------------------
  r4753 | vasil | 2009-04-18 01:31:34 +0300 (Sat, 18 Apr 2009) | 4 lines

  branches/zip:

  Silence a compiler warning.
  ------------------------------------------------------------------------
  r4756 | vasil | 2009-04-18 02:19:03 +0300 (Sat, 18 Apr 2009) | 5 lines

  branches/zip:

  Rename the aux config program and give it a more specific name because
  more are coming.
  ------------------------------------------------------------------------
  r4757 | vasil | 2009-04-18 02:22:33 +0300 (Sat, 18 Apr 2009) | 4 lines

  branches/zip:

  Add comment and copyright notice to the aux config program.
  ------------------------------------------------------------------------
  r4758 | vasil | 2009-04-18 02:40:47 +0300 (Sat, 18 Apr 2009) | 5 lines

  branches/zip:

  Add aux config programs to emulate the newly added checks in plug.in
  (from r4678).
  ------------------------------------------------------------------------
  r4830 | marko | 2009-04-20 16:11:38 +0300 (Mon, 20 Apr 2009) | 6 lines

  branches/zip: Cosmetic fixes.

  row_unlock_for_mysql(): Add a const qualifier to read-only rec_t*.
  Use dict_index_is_clust().

  CMakeLists.txt: svn propset svn:eol-style native.
  ------------------------------------------------------------------------
  r4893 | marko | 2009-04-23 09:32:36 +0300 (Thu, 23 Apr 2009) | 11 lines

  branches/zip: Introduce the logical type names trx_id_t, roll_ptr_t,
  and undo_no_t. Each type is still defined as dulint.

  This is an initial step towards replacing dulint with a 64-bit data type.
  Because modern compilers have no trouble supporting 64-bit arithmetics
  even on 32-bit targets, the dulint struct is a relic that should go.

  The last remaining major use of dulint is dictionary IDs
  (table, index, and row ids).

  rb://114 approved by Sunny Bains
  ------------------------------------------------------------------------
  r4894 | marko | 2009-04-23 10:21:07 +0300 (Thu, 23 Apr 2009) | 1 line

  branches/zip: ChangeLog: Document r4893.
  ------------------------------------------------------------------------
  r4895 | marko | 2009-04-23 10:22:06 +0300 (Thu, 23 Apr 2009) | 1 line

  branches/zip: ChangeLog: Add the missing include/ to two files.
  ------------------------------------------------------------------------
  r4896 | marko | 2009-04-23 10:37:40 +0300 (Thu, 23 Apr 2009) | 4 lines

  branches/zip: row_scan_and_check_index(): Improve the diagnostics, by reporting
  errors from row_search_for_mysql() in the error log.
  The errors will still be ignored by CHECK TABLE.
  This is somewhat related to Issue #211.
  ------------------------------------------------------------------------
  r4897 | marko | 2009-04-23 10:40:34 +0300 (Thu, 23 Apr 2009) | 2 lines

  branches/zip: row_scan_and_check_index(): Check
  row_merge_is_index_usable() earlier, to make the logic clearer.
  ------------------------------------------------------------------------
  r4898 | marko | 2009-04-23 15:15:07 +0300 (Thu, 23 Apr 2009) | 4 lines

  branches/zip: Correct a misleading comment.  PAGE_MAX_TRX_ID
  will be updated in ibuf_insert_low() and updated from the
  insert buffer tree page to the secondary index tree page
  during the insert buffer merge.
  ------------------------------------------------------------------------
  r4915 | marko | 2009-04-27 13:40:20 +0300 (Mon, 27 Apr 2009) | 2 lines

  branches/zip: row_scan_and_check_index(): Add some comments on
  prebuilt->index_usable, as suggested by Michael.
  ------------------------------------------------------------------------
  r4921 | marko | 2009-04-29 11:51:25 +0300 (Wed, 29 Apr 2009) | 2 lines

  branches/zip: btr_cur_optimistic_insert(): Remove a redundant condition.
  The insert buffer tree is a clustered index.
  ------------------------------------------------------------------------
  r4922 | marko | 2009-04-29 23:23:27 +0300 (Wed, 29 Apr 2009) | 22 lines

  branches/zip: Distinguish temporary tables in MLOG_FILE_CREATE.
  This addresses Mantis Issue #23 in InnoDB Hot Backup and some
  of MySQL Bug #41609.

  In MLOG_FILE_CREATE, we need to distinguish temporary tables, so that
  InnoDB Hot Backup can work correctly.  It turns out that we can do this
  easily, by using a bit of the previously unused parameter for page number.
  (The page number parameter of MLOG_FILE_CREATE has been written as 0 
  ever since MySQL 4.1, which introduced MLOG_FILE_CREATE.)

  MLOG_FILE_FLAG_TEMP: A flag for indicating a temporary table in
  the page number parameter of MLOG_FILE_ operations.

  fil_op_write_log(): Add the parameter log_flags.

  fil_op_log_parse_or_replay(): Add the parameter log_flags.
  Do not replay MLOG_FILE_CREATE when MLOG_FILE_FLAG_TEMP is set in log_flags.
  This only affects ibbackup --apply-log.  InnoDB itself never replays file
  operations.

  rb://117 approved by Heikki Tuuri
  ------------------------------------------------------------------------
  r4977 | marko | 2009-05-13 15:49:38 +0300 (Wed, 13 May 2009) | 12 lines

  branches/zip: Merge revisions 4746:4976 from branches/5.1:

    ------------------------------------------------------------------------
    r4976 | marko | 2009-05-13 15:44:54 +0300 (Wed, 13 May 2009) | 6 lines

    branches/5.1: Display DB_ROLL_PTR in the COLUMNS section of the
    innodb_table_monitor output.  It was accidentally omitted due to an
    off-by-one loop condition.  (Bug #44320)

    rb://116 approved by Heikki Tuuri
    ------------------------------------------------------------------------
  ------------------------------------------------------------------------
  r4978 | vasil | 2009-05-13 16:21:55 +0300 (Wed, 13 May 2009) | 4 lines

  branches/zip:

  Add ChangeLog entry for r4977.

  ------------------------------------------------------------------------
  r4995 | marko | 2009-05-14 15:31:43 +0300 (Thu, 14 May 2009) | 24 lines

  branches/zip: Merge revisions 4976:4994 from branches/5.1:

    ------------------------------------------------------------------------
    r4994 | marko | 2009-05-14 15:04:55 +0300 (Thu, 14 May 2009) | 18 lines

    branches/5.1: Prevent a race condition in innobase_commit() by ensuring
    that innodb_commit_concurrency>0 remains constant at run time. (Bug #42101)

    srv_commit_concurrency: Make this a static variable in ha_innodb.cc.

    innobase_commit_concurrency_validate(): Check that innodb_commit_concurrency
    is not changed from or to 0 at run time.  This is needed, because
    innobase_commit() assumes that innodb_commit_concurrency>0 remains constant.
    Without this limitation, the checks for innodb_commit_concurrency>0
    in innobase_commit() should be removed and that function would have to
    acquire and release commit_cond_m at least twice per invocation.
    Normally, innodb_commit_concurrency=0, and introducing the mutex operations
    would mean significant overhead.

    innodb_bug42101.test, innodb_bug42101-nonzero.test: Test cases.

    rb://123 approved by Heikki Tuuri
    ------------------------------------------------------------------------
  ------------------------------------------------------------------------
  r5000 | vasil | 2009-05-14 20:13:41 +0300 (Thu, 14 May 2009) | 4 lines

  branches/zip:

  Add ChangeLog entry for r4994.
  ------------------------------------------------------------------------
  r5026 | marko | 2009-05-18 16:29:51 +0300 (Mon, 18 May 2009) | 1 line

  branches/zip: buf_validate(): Add missing out: comment.
  ------------------------------------------------------------------------
  r5027 | marko | 2009-05-18 16:36:10 +0300 (Mon, 18 May 2009) | 1 line

  branches/zip: Add some missing out: comments to buf0buf.h, buf0buf.c.
  ------------------------------------------------------------------------
  r5028 | marko | 2009-05-18 16:40:07 +0300 (Mon, 18 May 2009) | 11 lines

  branches/zip: When executing an optimistic update by delete-and-insert,
  correctly estimate the free space on the compressed page by
  page_zip_available(..., create=TRUE). This was reported as Issue #231.

  btr_cur_update_alloc_zip(): Add the parameter ibool create and pass it
  to page_zip_available(). The parameter was previously passed as 0.

  btr_cur_optimistic_update(): Pass create=TRUE to btr_cur_update_alloc_zip().

  rb://120 approved by Heikki Tuuri
  ------------------------------------------------------------------------
  r5030 | marko | 2009-05-19 10:04:04 +0300 (Tue, 19 May 2009) | 2 lines

  branches/zip: os_thread_get_curr_id(), os_thread_get_curr():
  Add missing out: comments.
  ------------------------------------------------------------------------
  r5031 | marko | 2009-05-19 10:30:02 +0300 (Tue, 19 May 2009) | 1 line

  branches/zip: Add missing out: comments to nullary functions.
  ------------------------------------------------------------------------
  r5033 | marko | 2009-05-19 11:00:51 +0300 (Tue, 19 May 2009) | 1 line

  branches/zip: Remove bogus out: comments of functions returning void.
  ------------------------------------------------------------------------
  r5034 | marko | 2009-05-19 12:41:32 +0300 (Tue, 19 May 2009) | 1 line

  branches/zip: row_update_prebuilt_trx(): Correct bogus comment.
  ------------------------------------------------------------------------
  r5035 | marko | 2009-05-19 13:04:58 +0300 (Tue, 19 May 2009) | 3 lines

  branches/zip: ut0auxconf_have_solaris_atomics.c: Get the
  function declarations from <atomic.h>.
  Call the functions with proper arguments.
  ------------------------------------------------------------------------
  r5036 | marko | 2009-05-19 13:05:50 +0300 (Tue, 19 May 2009) | 1 line

  branches/zip: Add proper comments to some file page accessors.
  ------------------------------------------------------------------------
  r5037 | marko | 2009-05-19 13:08:16 +0300 (Tue, 19 May 2009) | 1 line

  branches/zip: Fix a typo that was introduced in r5036.
  ------------------------------------------------------------------------
  r5038 | marko | 2009-05-19 22:59:07 +0300 (Tue, 19 May 2009) | 30 lines

  branches/zip: Write PAGE_MAX_TRX_ID to the redo log. Otherwise,
  transactions that are started before the rollback of incomplete
  transactions has finished may have an inconsistent view of the
  secondary indexes.

  dict_index_is_sec_or_ibuf(): Auxiliary function for controlling
  updates and checks of PAGE_MAX_TRX_ID: check whether an index is a
  secondary index or the insert buffer tree.

  page_set_max_trx_id(), page_update_max_trx_id(),
  lock_rec_insert_check_and_lock(),
  lock_sec_rec_modify_check_and_lock(), btr_cur_ins_lock_and_undo(),
  btr_cur_upd_lock_and_undo(): Add the parameter mtr.

  page_set_max_trx_id(): Allow mtr to be NULL.  When mtr==NULL, do not
  attempt to write to the redo log.  This only occurs when creating a
  page or reorganizing a compressed page.  In these cases, the
  PAGE_MAX_TRX_ID will be set correctly during the application of redo
  log records, even though there is no explicit log record about it.

  btr_discard_only_page_on_level(): Preserve PAGE_MAX_TRX_ID.  This
  function should be unreachable, though.

  btr_cur_pessimistic_update(): Update PAGE_MAX_TRX_ID.

  Add some assertions for checking that PAGE_MAX_TRX_ID is set on all
  secondary index leaf pages.

  rb://115 tested by Michael, fixes Issue #211
  ------------------------------------------------------------------------
  r5039 | marko | 2009-05-19 23:13:12 +0300 (Tue, 19 May 2009) | 1 line

  branches/zip: ib_wqueue_wait(): Add decorative comment.
  ------------------------------------------------------------------------
  r5041 | marko | 2009-05-20 08:42:12 +0300 (Wed, 20 May 2009) | 1 line

  branches/zip: Add missing function comments.
  ------------------------------------------------------------------------
  r5042 | marko | 2009-05-20 08:46:01 +0300 (Wed, 20 May 2009) | 1 line

  branches/zip: sync0rw.ic: Remove an extra ; that was added in r5041.
  ------------------------------------------------------------------------
  r5044 | marko | 2009-05-20 11:11:58 +0300 (Wed, 20 May 2009) | 2 lines

  branches/zip: mlog_parse_index(): Correct a parameter comment
  and add a const qualifier that was missing.
  ------------------------------------------------------------------------
  r5045 | marko | 2009-05-20 11:37:08 +0300 (Wed, 20 May 2009) | 1 line

  branches/zip: fil0fil.c: Correct some comments.
  ------------------------------------------------------------------------
  r5046 | marko | 2009-05-20 12:19:40 +0300 (Wed, 20 May 2009) | 1 line

  branches/zip: Fix some function comments.
  ------------------------------------------------------------------------
  r5047 | marko | 2009-05-20 12:26:49 +0300 (Wed, 20 May 2009) | 1 line

  branches/zip: ut_snprintf(): Fix the function comments.
  ------------------------------------------------------------------------
  r5048 | marko | 2009-05-20 12:28:44 +0300 (Wed, 20 May 2009) | 3 lines

  branches/zip: inno_bcmp(): Remove this memcmp replacement.
  srv0start.c does not (any longer) call memcmp.
  srv_parse_megabytes(): Add a function comment.
  ------------------------------------------------------------------------
  r5052 | marko | 2009-05-20 12:32:37 +0300 (Wed, 20 May 2009) | 1 line

  branches/zip: ib_vector_is_empty(): Fix the function comment.
  ------------------------------------------------------------------------
  r5054 | marko | 2009-05-20 12:35:33 +0300 (Wed, 20 May 2009) | 1 line

  branches/zip: page_cur_lcg_prng(): Add missing parameter list.
  ------------------------------------------------------------------------
  r5057 | marko | 2009-05-20 12:45:17 +0300 (Wed, 20 May 2009) | 1 line

  branches/zip: Remove bogus in: comments from struct members.
  ------------------------------------------------------------------------
  r5058 | marko | 2009-05-20 13:06:03 +0300 (Wed, 20 May 2009) | 1 line

  branches/zip: Clean up some function comments.
  ------------------------------------------------------------------------
  r5060 | marko | 2009-05-20 14:06:59 +0300 (Wed, 20 May 2009) | 1 line

  branches/zip: Clean up some comments.
  ------------------------------------------------------------------------
  r5061 | marko | 2009-05-20 14:07:49 +0300 (Wed, 20 May 2009) | 2 lines

  branches/zip: innodb_export_status(): Remove the return(0),
  now that the function was declared void in r5060.
  ------------------------------------------------------------------------
  r5062 | marko | 2009-05-20 14:45:03 +0300 (Wed, 20 May 2009) | 1 line

  branches/zip: ha_innodb.cc: Clean up some comments.
  ------------------------------------------------------------------------
  r5063 | marko | 2009-05-20 16:10:17 +0300 (Wed, 20 May 2009) | 1 line

  branches/zip: ut_dulint_sort(): Write proper comments.
  ------------------------------------------------------------------------
  r5064 | marko | 2009-05-20 16:17:26 +0300 (Wed, 20 May 2009) | 2 lines

  branches/zip: innobase_end(), innobase_flush_logs():
  Document the function parameters.
  ------------------------------------------------------------------------
  r5065 | marko | 2009-05-20 23:17:43 +0300 (Wed, 20 May 2009) | 1 line

  branches/zip: ha_innodb.cc: Add some missing function comments.
  ------------------------------------------------------------------------
  r5066 | marko | 2009-05-21 00:51:23 +0300 (Thu, 21 May 2009) | 2 lines

  branches/zip: Fix some function comments.
  ------------------------------------------------------------------------
  r5070 | vasil | 2009-05-21 08:27:00 +0300 (Thu, 21 May 2009) | 4 lines

  branches/zip:

  Whitespace fixup.
  ------------------------------------------------------------------------
2009-05-25 06:20:53 +00:00

3614 lines
92 KiB
C

/*****************************************************************************
Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/******************************************************
Recovery
Created 9/20/1997 Heikki Tuuri
*******************************************************/
#include "log0recv.h"
#ifdef UNIV_NONINL
#include "log0recv.ic"
#endif
#include "mem0mem.h"
#include "buf0buf.h"
#include "buf0flu.h"
#include "mtr0mtr.h"
#include "mtr0log.h"
#include "page0cur.h"
#include "page0zip.h"
#include "btr0btr.h"
#include "btr0cur.h"
#include "ibuf0ibuf.h"
#include "trx0undo.h"
#include "trx0rec.h"
#include "fil0fil.h"
#ifndef UNIV_HOTBACKUP
# include "buf0rea.h"
# include "srv0srv.h"
# include "srv0start.h"
# include "trx0roll.h"
# include "row0merge.h"
# include "sync0sync.h"
#else /* !UNIV_HOTBACKUP */
/* This is set to FALSE if the backup was originally taken with the
ibbackup --include regexp option: then we do not want to create tables in
directories which were not included */
UNIV_INTERN ibool recv_replay_file_ops = TRUE;
#endif /* !UNIV_HOTBACKUP */
/* Log records are stored in the hash table in chunks at most of this size;
this must be less than UNIV_PAGE_SIZE as it is stored in the buffer pool */
#define RECV_DATA_BLOCK_SIZE (MEM_MAX_ALLOC_IN_BUF - sizeof(recv_data_t))
/* Read-ahead area in applying log records to file pages */
#define RECV_READ_AHEAD_AREA 32
UNIV_INTERN recv_sys_t* recv_sys = NULL;
UNIV_INTERN ibool recv_recovery_on = FALSE;
#ifdef UNIV_LOG_ARCHIVE
UNIV_INTERN ibool recv_recovery_from_backup_on = FALSE;
#endif /* UNIV_LOG_ARCHIVE */
#ifndef UNIV_HOTBACKUP
UNIV_INTERN ibool recv_needed_recovery = FALSE;
UNIV_INTERN ibool recv_lsn_checks_on = FALSE;
/* There are two conditions under which we scan the logs, the first
is normal startup and the second is when we do a recovery from an
archive.
This flag is set if we are doing a scan from the last checkpoint during
startup. If we find log entries that were written after the last checkpoint
we know that the server was not cleanly shutdown. We must then initialize
the crash recovery environment before attempting to store these entries in
the log hash table. */
UNIV_INTERN ibool recv_log_scan_is_startup_type = FALSE;
/* If the following is TRUE, the buffer pool file pages must be invalidated
after recovery and no ibuf operations are allowed; this becomes TRUE if
the log record hash table becomes too full, and log records must be merged
to file pages already before the recovery is finished: in this case no
ibuf operations are allowed, as they could modify the pages read in the
buffer pool before the pages have been recovered to the up-to-date state */
/* Recovery is running and no operations on the log files are allowed
yet: the variable name is misleading */
UNIV_INTERN ibool recv_no_ibuf_operations = FALSE;
# define recv_is_making_a_backup FALSE
# define recv_is_from_backup FALSE
#else /* !UNIV_HOTBACKUP */
# define recv_needed_recovery FALSE
UNIV_INTERN ibool recv_is_making_a_backup = FALSE;
UNIV_INTERN ibool recv_is_from_backup = FALSE;
# define buf_pool_get_curr_size() (5 * 1024 * 1024)
#endif /* !UNIV_HOTBACKUP */
/* The following counter is used to decide when to print info on
log scan */
UNIV_INTERN ulint recv_scan_print_counter = 0;
UNIV_INTERN ulint recv_previous_parsed_rec_type = 999999;
UNIV_INTERN ulint recv_previous_parsed_rec_offset = 0;
UNIV_INTERN ulint recv_previous_parsed_rec_is_multi = 0;
UNIV_INTERN ulint recv_max_parsed_page_no = 0;
/* This many frames must be left free in the buffer pool when we scan
the log and store the scanned log records in the buffer pool: we will
use these free frames to read in pages when we start applying the
log records to the database.
This is the default value. If the actual size of the buffer pool is
larger than 10 MB we'll set this value to 512. */
UNIV_INTERN ulint recv_n_pool_free_frames = 256;
/* The maximum lsn we see for a page during the recovery process. If this
is bigger than the lsn we are able to scan up to, that is an indication that
the recovery failed and the database may be corrupt. */
UNIV_INTERN ib_uint64_t recv_max_page_lsn;
/* prototypes */
#ifndef UNIV_HOTBACKUP
/***********************************************************
Initialize crash recovery environment. Can be called iff
recv_needed_recovery == FALSE. */
static
void
recv_init_crash_recovery(void);
/*===========================*/
#endif /* !UNIV_HOTBACKUP */
/************************************************************
Creates the recovery system. */
UNIV_INTERN
void
recv_sys_create(void)
/*=================*/
{
if (recv_sys != NULL) {
return;
}
recv_sys = mem_alloc(sizeof(recv_sys_t));
mutex_create(&recv_sys->mutex, SYNC_RECV);
recv_sys->heap = NULL;
recv_sys->addr_hash = NULL;
}
/************************************************************
Inits the recovery system for a recovery operation. */
UNIV_INTERN
void
recv_sys_init(
/*==========*/
ulint available_memory) /* in: available memory in bytes */
{
if (recv_sys->heap != NULL) {
return;
}
#ifndef UNIV_HOTBACKUP
/* Initialize red-black tree for fast insertions into the
flush_list during recovery process.
As this initialization is done while holding the buffer pool
mutex we perform it before acquiring recv_sys->mutex. */
buf_flush_init_flush_rbt();
#endif /* !UNIV_HOTBACKUP */
mutex_enter(&(recv_sys->mutex));
#ifndef UNIV_HOTBACKUP
recv_sys->heap = mem_heap_create_in_buffer(256);
#else /* !UNIV_HOTBACKUP */
recv_sys->heap = mem_heap_create(256);
recv_is_from_backup = TRUE;
#endif /* !UNIV_HOTBACKUP */
/* Set appropriate value of recv_n_pool_free_frames. */
if (buf_pool_get_curr_size() >= (10 * 1024 * 1024)) {
/* Buffer pool of size greater than 10 MB. */
recv_n_pool_free_frames = 512;
}
recv_sys->buf = ut_malloc(RECV_PARSING_BUF_SIZE);
recv_sys->len = 0;
recv_sys->recovered_offset = 0;
recv_sys->addr_hash = hash_create(available_memory / 64);
recv_sys->n_addrs = 0;
recv_sys->apply_log_recs = FALSE;
recv_sys->apply_batch_on = FALSE;
recv_sys->last_block_buf_start = mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE);
recv_sys->last_block = ut_align(recv_sys->last_block_buf_start,
OS_FILE_LOG_BLOCK_SIZE);
recv_sys->found_corrupt_log = FALSE;
recv_max_page_lsn = 0;
mutex_exit(&(recv_sys->mutex));
}
/************************************************************
Empties the hash table when it has been fully processed. */
static
void
recv_sys_empty_hash(void)
/*=====================*/
{
ut_ad(mutex_own(&(recv_sys->mutex)));
if (recv_sys->n_addrs != 0) {
fprintf(stderr,
"InnoDB: Error: %lu pages with log records"
" were left unprocessed!\n"
"InnoDB: Maximum page number with"
" log records on it %lu\n",
(ulong) recv_sys->n_addrs,
(ulong) recv_max_parsed_page_no);
ut_error;
}
hash_table_free(recv_sys->addr_hash);
mem_heap_empty(recv_sys->heap);
recv_sys->addr_hash = hash_create(buf_pool_get_curr_size() / 256);
}
#ifndef UNIV_HOTBACKUP
# ifndef UNIV_LOG_DEBUG
/************************************************************
Frees the recovery system. */
static
void
recv_sys_free(void)
/*===============*/
{
mutex_enter(&(recv_sys->mutex));
hash_table_free(recv_sys->addr_hash);
mem_heap_free(recv_sys->heap);
ut_free(recv_sys->buf);
mem_free(recv_sys->last_block_buf_start);
recv_sys->addr_hash = NULL;
recv_sys->heap = NULL;
mutex_exit(&(recv_sys->mutex));
/* Free up the flush_rbt. */
buf_flush_free_flush_rbt();
}
# endif /* UNIV_LOG_DEBUG */
/************************************************************
Truncates possible corrupted or extra records from a log group. */
static
void
recv_truncate_group(
/*================*/
log_group_t* group, /* in: log group */
ib_uint64_t recovered_lsn, /* in: recovery succeeded up to this
lsn */
ib_uint64_t limit_lsn, /* in: this was the limit for
recovery */
ib_uint64_t checkpoint_lsn, /* in: recovery was started from this
checkpoint */
ib_uint64_t archived_lsn) /* in: the log has been archived up to
this lsn */
{
ib_uint64_t start_lsn;
ib_uint64_t end_lsn;
ib_uint64_t finish_lsn1;
ib_uint64_t finish_lsn2;
ib_uint64_t finish_lsn;
ulint len;
ulint i;
if (archived_lsn == IB_ULONGLONG_MAX) {
/* Checkpoint was taken in the NOARCHIVELOG mode */
archived_lsn = checkpoint_lsn;
}
finish_lsn1 = ut_uint64_align_down(archived_lsn,
OS_FILE_LOG_BLOCK_SIZE)
+ log_group_get_capacity(group);
finish_lsn2 = ut_uint64_align_up(recovered_lsn,
OS_FILE_LOG_BLOCK_SIZE)
+ recv_sys->last_log_buf_size;
if (limit_lsn != IB_ULONGLONG_MAX) {
/* We do not know how far we should erase log records: erase
as much as possible */
finish_lsn = finish_lsn1;
} else {
/* It is enough to erase the length of the log buffer */
finish_lsn = finish_lsn1 < finish_lsn2
? finish_lsn1 : finish_lsn2;
}
ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
/* Write the log buffer full of zeros */
for (i = 0; i < RECV_SCAN_SIZE; i++) {
*(log_sys->buf + i) = '\0';
}
start_lsn = ut_uint64_align_down(recovered_lsn,
OS_FILE_LOG_BLOCK_SIZE);
if (start_lsn != recovered_lsn) {
/* Copy the last incomplete log block to the log buffer and
edit its data length: */
ut_memcpy(log_sys->buf, recv_sys->last_block,
OS_FILE_LOG_BLOCK_SIZE);
log_block_set_data_len(log_sys->buf,
(ulint) (recovered_lsn - start_lsn));
}
if (start_lsn >= finish_lsn) {
return;
}
for (;;) {
end_lsn = start_lsn + RECV_SCAN_SIZE;
if (end_lsn > finish_lsn) {
end_lsn = finish_lsn;
}
len = (ulint) (end_lsn - start_lsn);
log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
if (end_lsn >= finish_lsn) {
return;
}
/* Write the log buffer full of zeros */
for (i = 0; i < RECV_SCAN_SIZE; i++) {
*(log_sys->buf + i) = '\0';
}
start_lsn = end_lsn;
}
}
/************************************************************
Copies the log segment between group->recovered_lsn and recovered_lsn from the
most up-to-date log group to group, so that it contains the latest log data. */
static
void
recv_copy_group(
/*============*/
log_group_t* up_to_date_group, /* in: the most up-to-date log
group */
log_group_t* group, /* in: copy to this log
group */
ib_uint64_t recovered_lsn) /* in: recovery succeeded up
to this lsn */
{
ib_uint64_t start_lsn;
ib_uint64_t end_lsn;
ulint len;
if (group->scanned_lsn >= recovered_lsn) {
return;
}
ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
start_lsn = ut_uint64_align_down(group->scanned_lsn,
OS_FILE_LOG_BLOCK_SIZE);
for (;;) {
end_lsn = start_lsn + RECV_SCAN_SIZE;
if (end_lsn > recovered_lsn) {
end_lsn = ut_uint64_align_up(recovered_lsn,
OS_FILE_LOG_BLOCK_SIZE);
}
log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
up_to_date_group, start_lsn, end_lsn);
len = (ulint) (end_lsn - start_lsn);
log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
if (end_lsn >= recovered_lsn) {
return;
}
start_lsn = end_lsn;
}
}
/************************************************************
Copies a log segment from the most up-to-date log group to the other log
groups, so that they all contain the latest log data. Also writes the info
about the latest checkpoint to the groups, and inits the fields in the group
memory structs to up-to-date values. */
static
void
recv_synchronize_groups(
/*====================*/
log_group_t* up_to_date_group) /* in: the most up-to-date
log group */
{
log_group_t* group;
ib_uint64_t start_lsn;
ib_uint64_t end_lsn;
ib_uint64_t recovered_lsn;
ib_uint64_t limit_lsn;
recovered_lsn = recv_sys->recovered_lsn;
limit_lsn = recv_sys->limit_lsn;
/* Read the last recovered log block to the recovery system buffer:
the block is always incomplete */
start_lsn = ut_uint64_align_down(recovered_lsn,
OS_FILE_LOG_BLOCK_SIZE);
end_lsn = ut_uint64_align_up(recovered_lsn, OS_FILE_LOG_BLOCK_SIZE);
ut_a(start_lsn != end_lsn);
log_group_read_log_seg(LOG_RECOVER, recv_sys->last_block,
up_to_date_group, start_lsn, end_lsn);
group = UT_LIST_GET_FIRST(log_sys->log_groups);
while (group) {
if (group != up_to_date_group) {
/* Copy log data if needed */
recv_copy_group(group, up_to_date_group,
recovered_lsn);
}
/* Update the fields in the group struct to correspond to
recovered_lsn */
log_group_set_fields(group, recovered_lsn);
group = UT_LIST_GET_NEXT(log_groups, group);
}
/* Copy the checkpoint info to the groups; remember that we have
incremented checkpoint_no by one, and the info will not be written
over the max checkpoint info, thus making the preservation of max
checkpoint info on disk certain */
log_groups_write_checkpoint_info();
mutex_exit(&(log_sys->mutex));
/* Wait for the checkpoint write to complete */
rw_lock_s_lock(&(log_sys->checkpoint_lock));
rw_lock_s_unlock(&(log_sys->checkpoint_lock));
mutex_enter(&(log_sys->mutex));
}
#endif /* !UNIV_HOTBACKUP */
/***************************************************************************
Checks the consistency of the checkpoint info */
static
ibool
recv_check_cp_is_consistent(
/*========================*/
/* out: TRUE if ok */
const byte* buf) /* in: buffer containing checkpoint info */
{
ulint fold;
fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(
buf + LOG_CHECKPOINT_CHECKSUM_1)) {
return(FALSE);
}
fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(
buf + LOG_CHECKPOINT_CHECKSUM_2)) {
return(FALSE);
}
return(TRUE);
}
#ifndef UNIV_HOTBACKUP
/************************************************************
Looks for the maximum consistent checkpoint from the log groups. */
static
ulint
recv_find_max_checkpoint(
/*=====================*/
/* out: error code or DB_SUCCESS */
log_group_t** max_group, /* out: max group */
ulint* max_field) /* out: LOG_CHECKPOINT_1 or
LOG_CHECKPOINT_2 */
{
log_group_t* group;
ib_uint64_t max_no;
ib_uint64_t checkpoint_no;
ulint field;
byte* buf;
group = UT_LIST_GET_FIRST(log_sys->log_groups);
max_no = 0;
*max_group = NULL;
*max_field = 0;
buf = log_sys->checkpoint_buf;
while (group) {
group->state = LOG_GROUP_CORRUPTED;
for (field = LOG_CHECKPOINT_1; field <= LOG_CHECKPOINT_2;
field += LOG_CHECKPOINT_2 - LOG_CHECKPOINT_1) {
log_group_read_checkpoint_info(group, field);
if (!recv_check_cp_is_consistent(buf)) {
#ifdef UNIV_DEBUG
if (log_debug_writes) {
fprintf(stderr,
"InnoDB: Checkpoint in group"
" %lu at %lu invalid, %lu\n",
(ulong) group->id,
(ulong) field,
(ulong) mach_read_from_4(
buf
+ LOG_CHECKPOINT_CHECKSUM_1));
}
#endif /* UNIV_DEBUG */
goto not_consistent;
}
group->state = LOG_GROUP_OK;
group->lsn = mach_read_ull(
buf + LOG_CHECKPOINT_LSN);
group->lsn_offset = mach_read_from_4(
buf + LOG_CHECKPOINT_OFFSET);
checkpoint_no = mach_read_ull(
buf + LOG_CHECKPOINT_NO);
#ifdef UNIV_DEBUG
if (log_debug_writes) {
fprintf(stderr,
"InnoDB: Checkpoint number %lu"
" found in group %lu\n",
(ulong) checkpoint_no,
(ulong) group->id);
}
#endif /* UNIV_DEBUG */
if (checkpoint_no >= max_no) {
*max_group = group;
*max_field = field;
max_no = checkpoint_no;
}
not_consistent:
;
}
group = UT_LIST_GET_NEXT(log_groups, group);
}
if (*max_group == NULL) {
fprintf(stderr,
"InnoDB: No valid checkpoint found.\n"
"InnoDB: If this error appears when you are"
" creating an InnoDB database,\n"
"InnoDB: the problem may be that during"
" an earlier attempt you managed\n"
"InnoDB: to create the InnoDB data files,"
" but log file creation failed.\n"
"InnoDB: If that is the case, please refer to\n"
"InnoDB: " REFMAN "error-creating-innodb.html\n");
return(DB_ERROR);
}
return(DB_SUCCESS);
}
#else /* !UNIV_HOTBACKUP */
/***********************************************************************
Reads the checkpoint info needed in hot backup. */
UNIV_INTERN
ibool
recv_read_cp_info_for_backup(
/*=========================*/
/* out: TRUE if success */
const byte* hdr, /* in: buffer containing the log group
header */
ib_uint64_t* lsn, /* out: checkpoint lsn */
ulint* offset, /* out: checkpoint offset in the log group */
ulint* fsp_limit,/* out: fsp limit of space 0,
1000000000 if the database is running
with < version 3.23.50 of InnoDB */
ib_uint64_t* cp_no, /* out: checkpoint number */
ib_uint64_t* first_header_lsn)
/* out: lsn of of the start of the
first log file */
{
ulint max_cp = 0;
ib_uint64_t max_cp_no = 0;
const byte* cp_buf;
cp_buf = hdr + LOG_CHECKPOINT_1;
if (recv_check_cp_is_consistent(cp_buf)) {
max_cp_no = mach_read_ull(cp_buf + LOG_CHECKPOINT_NO);
max_cp = LOG_CHECKPOINT_1;
}
cp_buf = hdr + LOG_CHECKPOINT_2;
if (recv_check_cp_is_consistent(cp_buf)) {
if (mach_read_ull(cp_buf + LOG_CHECKPOINT_NO) > max_cp_no) {
max_cp = LOG_CHECKPOINT_2;
}
}
if (max_cp == 0) {
return(FALSE);
}
cp_buf = hdr + max_cp;
*lsn = mach_read_ull(cp_buf + LOG_CHECKPOINT_LSN);
*offset = mach_read_from_4(cp_buf + LOG_CHECKPOINT_OFFSET);
/* If the user is running a pre-3.23.50 version of InnoDB, its
checkpoint data does not contain the fsp limit info */
if (mach_read_from_4(cp_buf + LOG_CHECKPOINT_FSP_MAGIC_N)
== LOG_CHECKPOINT_FSP_MAGIC_N_VAL) {
*fsp_limit = mach_read_from_4(
cp_buf + LOG_CHECKPOINT_FSP_FREE_LIMIT);
if (*fsp_limit == 0) {
*fsp_limit = 1000000000;
}
} else {
*fsp_limit = 1000000000;
}
/* fprintf(stderr, "fsp limit %lu MB\n", *fsp_limit); */
*cp_no = mach_read_ull(cp_buf + LOG_CHECKPOINT_NO);
*first_header_lsn = mach_read_ull(hdr + LOG_FILE_START_LSN);
return(TRUE);
}
#endif /* !UNIV_HOTBACKUP */
/**********************************************************
Checks the 4-byte checksum to the trailer checksum field of a log block.
We also accept a log block in the old format < InnoDB-3.23.52 where the
checksum field contains the log block number. */
static
ibool
log_block_checksum_is_ok_or_old_format(
/*===================================*/
/* out: TRUE if ok, or if the log
block may be in the format of InnoDB
version < 3.23.52 */
const byte* block) /* in: pointer to a log block */
{
#ifdef UNIV_LOG_DEBUG
return(TRUE);
#endif /* UNIV_LOG_DEBUG */
if (log_block_calc_checksum(block) == log_block_get_checksum(block)) {
return(TRUE);
}
if (log_block_get_hdr_no(block) == log_block_get_checksum(block)) {
/* We assume the log block is in the format of
InnoDB version < 3.23.52 and the block is ok */
#if 0
fprintf(stderr,
"InnoDB: Scanned old format < InnoDB-3.23.52"
" log block number %lu\n",
log_block_get_hdr_no(block));
#endif
return(TRUE);
}
return(FALSE);
}
#ifdef UNIV_HOTBACKUP
/***********************************************************************
Scans the log segment and n_bytes_scanned is set to the length of valid
log scanned. */
UNIV_INTERN
void
recv_scan_log_seg_for_backup(
/*=========================*/
byte* buf, /* in: buffer containing log data */
ulint buf_len, /* in: data length in that buffer */
ib_uint64_t* scanned_lsn, /* in/out: lsn of buffer start,
we return scanned lsn */
ulint* scanned_checkpoint_no,
/* in/out: 4 lowest bytes of the
highest scanned checkpoint number so
far */
ulint* n_bytes_scanned)/* out: how much we were able to
scan, smaller than buf_len if log
data ended here */
{
ulint data_len;
byte* log_block;
ulint no;
*n_bytes_scanned = 0;
for (log_block = buf; log_block < buf + buf_len;
log_block += OS_FILE_LOG_BLOCK_SIZE) {
no = log_block_get_hdr_no(log_block);
#if 0
fprintf(stderr, "Log block header no %lu\n", no);
#endif
if (no != log_block_convert_lsn_to_no(*scanned_lsn)
|| !log_block_checksum_is_ok_or_old_format(log_block)) {
#if 0
fprintf(stderr,
"Log block n:o %lu, scanned lsn n:o %lu\n",
no, log_block_convert_lsn_to_no(*scanned_lsn));
#endif
/* Garbage or an incompletely written log block */
log_block += OS_FILE_LOG_BLOCK_SIZE;
#if 0
fprintf(stderr,
"Next log block n:o %lu\n",
log_block_get_hdr_no(log_block));
#endif
break;
}
if (*scanned_checkpoint_no > 0
&& log_block_get_checkpoint_no(log_block)
< *scanned_checkpoint_no
&& *scanned_checkpoint_no
- log_block_get_checkpoint_no(log_block)
> 0x80000000UL) {
/* Garbage from a log buffer flush which was made
before the most recent database recovery */
#if 0
fprintf(stderr,
"Scanned cp n:o %lu, block cp n:o %lu\n",
*scanned_checkpoint_no,
log_block_get_checkpoint_no(log_block));
#endif
break;
}
data_len = log_block_get_data_len(log_block);
*scanned_checkpoint_no
= log_block_get_checkpoint_no(log_block);
*scanned_lsn += data_len;
*n_bytes_scanned += data_len;
if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
/* Log data ends here */
#if 0
fprintf(stderr, "Log block data len %lu\n",
data_len);
#endif
break;
}
}
}
#endif /* UNIV_HOTBACKUP */
/***********************************************************************
Tries to parse a single log record body and also applies it to a page if
specified. File ops are parsed, but not applied in this function. */
static
byte*
recv_parse_or_apply_log_rec_body(
/*=============================*/
/* out: log record end, NULL if not a
complete record */
byte type, /* in: type */
byte* ptr, /* in: pointer to a buffer */
byte* end_ptr,/* in: pointer to the buffer end */
buf_block_t* block, /* in/out: buffer block or NULL; if
not NULL, then the log record is
applied to the page, and the log
record should be complete then */
mtr_t* mtr) /* in: mtr or NULL; should be non-NULL
if and only if block is non-NULL */
{
dict_index_t* index = NULL;
page_t* page;
page_zip_des_t* page_zip;
#ifdef UNIV_DEBUG
ulint page_type;
#endif /* UNIV_DEBUG */
ut_ad(!block == !mtr);
if (block) {
page = block->frame;
page_zip = buf_block_get_page_zip(block);
ut_d(page_type = fil_page_get_type(page));
} else {
page = NULL;
page_zip = NULL;
ut_d(page_type = FIL_PAGE_TYPE_ALLOCATED);
}
switch (type) {
case MLOG_1BYTE: case MLOG_2BYTES: case MLOG_4BYTES: case MLOG_8BYTES:
#ifdef UNIV_DEBUG
if (page && page_type == FIL_PAGE_TYPE_ALLOCATED
&& end_ptr >= ptr + 2) {
/* It is OK to set FIL_PAGE_TYPE and certain
list node fields on an empty page. Any other
write is not OK. */
/* NOTE: There may be bogus assertion failures for
dict_hdr_create(), trx_rseg_header_create(),
trx_sys_create_doublewrite_buf(), and
trx_sysf_create().
These are only called during database creation. */
ulint offs = mach_read_from_2(ptr);
switch (type) {
default:
ut_error;
case MLOG_2BYTES:
/* Note that this can fail when the
redo log been written with something
older than InnoDB Plugin 1.0.4. */
ut_ad(offs == FIL_PAGE_TYPE
|| offs == IBUF_TREE_SEG_HEADER
+ IBUF_HEADER + FSEG_HDR_OFFSET
|| offs == PAGE_BTR_IBUF_FREE_LIST
+ PAGE_HEADER + FIL_ADDR_BYTE
|| offs == PAGE_BTR_IBUF_FREE_LIST
+ PAGE_HEADER + FIL_ADDR_BYTE
+ FIL_ADDR_SIZE
|| offs == PAGE_BTR_SEG_LEAF
+ PAGE_HEADER + FSEG_HDR_OFFSET
|| offs == PAGE_BTR_SEG_TOP
+ PAGE_HEADER + FSEG_HDR_OFFSET
|| offs == PAGE_BTR_IBUF_FREE_LIST_NODE
+ PAGE_HEADER + FIL_ADDR_BYTE
+ 0 /*FLST_PREV*/
|| offs == PAGE_BTR_IBUF_FREE_LIST_NODE
+ PAGE_HEADER + FIL_ADDR_BYTE
+ FIL_ADDR_SIZE /*FLST_NEXT*/);
break;
case MLOG_4BYTES:
/* Note that this can fail when the
redo log been written with something
older than InnoDB Plugin 1.0.4. */
ut_ad(0
|| offs == IBUF_TREE_SEG_HEADER
+ IBUF_HEADER + FSEG_HDR_SPACE
|| offs == IBUF_TREE_SEG_HEADER
+ IBUF_HEADER + FSEG_HDR_PAGE_NO
|| offs == PAGE_BTR_IBUF_FREE_LIST
+ PAGE_HEADER/* flst_init */
|| offs == PAGE_BTR_IBUF_FREE_LIST
+ PAGE_HEADER + FIL_ADDR_PAGE
|| offs == PAGE_BTR_IBUF_FREE_LIST
+ PAGE_HEADER + FIL_ADDR_PAGE
+ FIL_ADDR_SIZE
|| offs == PAGE_BTR_SEG_LEAF
+ PAGE_HEADER + FSEG_HDR_PAGE_NO
|| offs == PAGE_BTR_SEG_LEAF
+ PAGE_HEADER + FSEG_HDR_SPACE
|| offs == PAGE_BTR_SEG_TOP
+ PAGE_HEADER + FSEG_HDR_PAGE_NO
|| offs == PAGE_BTR_SEG_TOP
+ PAGE_HEADER + FSEG_HDR_SPACE
|| offs == PAGE_BTR_IBUF_FREE_LIST_NODE
+ PAGE_HEADER + FIL_ADDR_PAGE
+ 0 /*FLST_PREV*/
|| offs == PAGE_BTR_IBUF_FREE_LIST_NODE
+ PAGE_HEADER + FIL_ADDR_PAGE
+ FIL_ADDR_SIZE /*FLST_NEXT*/);
break;
}
}
#endif /* UNIV_DEBUG */
ptr = mlog_parse_nbytes(type, ptr, end_ptr, page, page_zip);
break;
case MLOG_REC_INSERT: case MLOG_COMP_REC_INSERT:
ut_ad(!page || page_type == FIL_PAGE_INDEX);
if (NULL != (ptr = mlog_parse_index(
ptr, end_ptr,
type == MLOG_COMP_REC_INSERT,
&index))) {
ut_a(!page
|| (ibool)!!page_is_comp(page)
== dict_table_is_comp(index->table));
ptr = page_cur_parse_insert_rec(FALSE, ptr, end_ptr,
block, index, mtr);
}
break;
case MLOG_REC_CLUST_DELETE_MARK: case MLOG_COMP_REC_CLUST_DELETE_MARK:
ut_ad(!page || page_type == FIL_PAGE_INDEX);
if (NULL != (ptr = mlog_parse_index(
ptr, end_ptr,
type == MLOG_COMP_REC_CLUST_DELETE_MARK,
&index))) {
ut_a(!page
|| (ibool)!!page_is_comp(page)
== dict_table_is_comp(index->table));
ptr = btr_cur_parse_del_mark_set_clust_rec(
ptr, end_ptr, page, page_zip, index);
}
break;
case MLOG_COMP_REC_SEC_DELETE_MARK:
ut_ad(!page || page_type == FIL_PAGE_INDEX);
/* This log record type is obsolete, but we process it for
backward compatibility with MySQL 5.0.3 and 5.0.4. */
ut_a(!page || page_is_comp(page));
ut_a(!page_zip);
ptr = mlog_parse_index(ptr, end_ptr, TRUE, &index);
if (!ptr) {
break;
}
/* Fall through */
case MLOG_REC_SEC_DELETE_MARK:
ut_ad(!page || page_type == FIL_PAGE_INDEX);
ptr = btr_cur_parse_del_mark_set_sec_rec(ptr, end_ptr,
page, page_zip);
break;
case MLOG_REC_UPDATE_IN_PLACE: case MLOG_COMP_REC_UPDATE_IN_PLACE:
ut_ad(!page || page_type == FIL_PAGE_INDEX);
if (NULL != (ptr = mlog_parse_index(
ptr, end_ptr,
type == MLOG_COMP_REC_UPDATE_IN_PLACE,
&index))) {
ut_a(!page
|| (ibool)!!page_is_comp(page)
== dict_table_is_comp(index->table));
ptr = btr_cur_parse_update_in_place(ptr, end_ptr, page,
page_zip, index);
}
break;
case MLOG_LIST_END_DELETE: case MLOG_COMP_LIST_END_DELETE:
case MLOG_LIST_START_DELETE: case MLOG_COMP_LIST_START_DELETE:
ut_ad(!page || page_type == FIL_PAGE_INDEX);
if (NULL != (ptr = mlog_parse_index(
ptr, end_ptr,
type == MLOG_COMP_LIST_END_DELETE
|| type == MLOG_COMP_LIST_START_DELETE,
&index))) {
ut_a(!page
|| (ibool)!!page_is_comp(page)
== dict_table_is_comp(index->table));
ptr = page_parse_delete_rec_list(type, ptr, end_ptr,
block, index, mtr);
}
break;
case MLOG_LIST_END_COPY_CREATED: case MLOG_COMP_LIST_END_COPY_CREATED:
ut_ad(!page || page_type == FIL_PAGE_INDEX);
if (NULL != (ptr = mlog_parse_index(
ptr, end_ptr,
type == MLOG_COMP_LIST_END_COPY_CREATED,
&index))) {
ut_a(!page
|| (ibool)!!page_is_comp(page)
== dict_table_is_comp(index->table));
ptr = page_parse_copy_rec_list_to_created_page(
ptr, end_ptr, block, index, mtr);
}
break;
case MLOG_PAGE_REORGANIZE: case MLOG_COMP_PAGE_REORGANIZE:
ut_ad(!page || page_type == FIL_PAGE_INDEX);
if (NULL != (ptr = mlog_parse_index(
ptr, end_ptr,
type == MLOG_COMP_PAGE_REORGANIZE,
&index))) {
ut_a(!page
|| (ibool)!!page_is_comp(page)
== dict_table_is_comp(index->table));
ptr = btr_parse_page_reorganize(ptr, end_ptr, index,
block, mtr);
}
break;
case MLOG_PAGE_CREATE: case MLOG_COMP_PAGE_CREATE:
/* Allow anything in page_type when creating a page. */
ut_a(!page_zip);
ptr = page_parse_create(ptr, end_ptr,
type == MLOG_COMP_PAGE_CREATE,
block, mtr);
break;
case MLOG_UNDO_INSERT:
ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
ptr = trx_undo_parse_add_undo_rec(ptr, end_ptr, page);
break;
case MLOG_UNDO_ERASE_END:
ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
ptr = trx_undo_parse_erase_page_end(ptr, end_ptr, page, mtr);
break;
case MLOG_UNDO_INIT:
/* Allow anything in page_type when creating a page. */
ptr = trx_undo_parse_page_init(ptr, end_ptr, page, mtr);
break;
case MLOG_UNDO_HDR_DISCARD:
ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
ptr = trx_undo_parse_discard_latest(ptr, end_ptr, page, mtr);
break;
case MLOG_UNDO_HDR_CREATE:
case MLOG_UNDO_HDR_REUSE:
ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
ptr = trx_undo_parse_page_header(type, ptr, end_ptr,
page, mtr);
break;
case MLOG_REC_MIN_MARK: case MLOG_COMP_REC_MIN_MARK:
ut_ad(!page || page_type == FIL_PAGE_INDEX);
/* On a compressed page, MLOG_COMP_REC_MIN_MARK
will be followed by MLOG_COMP_REC_DELETE
or MLOG_ZIP_WRITE_HEADER(FIL_PAGE_PREV, FIL_NULL)
in the same mini-transaction. */
ut_a(type == MLOG_COMP_REC_MIN_MARK || !page_zip);
ptr = btr_parse_set_min_rec_mark(
ptr, end_ptr, type == MLOG_COMP_REC_MIN_MARK,
page, mtr);
break;
case MLOG_REC_DELETE: case MLOG_COMP_REC_DELETE:
ut_ad(!page || page_type == FIL_PAGE_INDEX);
if (NULL != (ptr = mlog_parse_index(
ptr, end_ptr,
type == MLOG_COMP_REC_DELETE,
&index))) {
ut_a(!page
|| (ibool)!!page_is_comp(page)
== dict_table_is_comp(index->table));
ptr = page_cur_parse_delete_rec(ptr, end_ptr,
block, index, mtr);
}
break;
case MLOG_IBUF_BITMAP_INIT:
/* Allow anything in page_type when creating a page. */
ptr = ibuf_parse_bitmap_init(ptr, end_ptr, block, mtr);
break;
case MLOG_INIT_FILE_PAGE:
/* Allow anything in page_type when creating a page. */
ptr = fsp_parse_init_file_page(ptr, end_ptr, block);
break;
case MLOG_WRITE_STRING:
ut_ad(!page || page_type != FIL_PAGE_TYPE_ALLOCATED);
ptr = mlog_parse_string(ptr, end_ptr, page, page_zip);
break;
case MLOG_FILE_CREATE:
case MLOG_FILE_RENAME:
case MLOG_FILE_DELETE:
case MLOG_FILE_CREATE2:
ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, 0, 0);
break;
case MLOG_ZIP_WRITE_NODE_PTR:
ut_ad(!page || page_type == FIL_PAGE_INDEX);
ptr = page_zip_parse_write_node_ptr(ptr, end_ptr,
page, page_zip);
break;
case MLOG_ZIP_WRITE_BLOB_PTR:
ut_ad(!page || page_type == FIL_PAGE_INDEX);
ptr = page_zip_parse_write_blob_ptr(ptr, end_ptr,
page, page_zip);
break;
case MLOG_ZIP_WRITE_HEADER:
ut_ad(!page || page_type == FIL_PAGE_INDEX);
ptr = page_zip_parse_write_header(ptr, end_ptr,
page, page_zip);
break;
case MLOG_ZIP_PAGE_COMPRESS:
/* Allow anything in page_type when creating a page. */
ptr = page_zip_parse_compress(ptr, end_ptr,
page, page_zip);
break;
default:
ptr = NULL;
recv_sys->found_corrupt_log = TRUE;
}
if (index) {
dict_table_t* table = index->table;
dict_mem_index_free(index);
dict_mem_table_free(table);
}
return(ptr);
}
/*************************************************************************
Calculates the fold value of a page file address: used in inserting or
searching for a log record in the hash table. */
UNIV_INLINE
ulint
recv_fold(
/*======*/
/* out: folded value */
ulint space, /* in: space */
ulint page_no)/* in: page number */
{
return(ut_fold_ulint_pair(space, page_no));
}
/*************************************************************************
Calculates the hash value of a page file address: used in inserting or
searching for a log record in the hash table. */
UNIV_INLINE
ulint
recv_hash(
/*======*/
/* out: folded value */
ulint space, /* in: space */
ulint page_no)/* in: page number */
{
return(hash_calc_hash(recv_fold(space, page_no), recv_sys->addr_hash));
}
/*************************************************************************
Gets the hashed file address struct for a page. */
static
recv_addr_t*
recv_get_fil_addr_struct(
/*=====================*/
/* out: file address struct, NULL if not found from
the hash table */
ulint space, /* in: space id */
ulint page_no)/* in: page number */
{
recv_addr_t* recv_addr;
recv_addr = HASH_GET_FIRST(recv_sys->addr_hash,
recv_hash(space, page_no));
while (recv_addr) {
if ((recv_addr->space == space)
&& (recv_addr->page_no == page_no)) {
break;
}
recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
}
return(recv_addr);
}
/***********************************************************************
Adds a new log record to the hash table of log records. */
static
void
recv_add_to_hash_table(
/*===================*/
byte type, /* in: log record type */
ulint space, /* in: space id */
ulint page_no, /* in: page number */
byte* body, /* in: log record body */
byte* rec_end, /* in: log record end */
ib_uint64_t start_lsn, /* in: start lsn of the mtr */
ib_uint64_t end_lsn) /* in: end lsn of the mtr */
{
recv_t* recv;
ulint len;
recv_data_t* recv_data;
recv_data_t** prev_field;
recv_addr_t* recv_addr;
if (fil_tablespace_deleted_or_being_deleted_in_mem(space, -1)) {
/* The tablespace does not exist any more: do not store the
log record */
return;
}
len = rec_end - body;
recv = mem_heap_alloc(recv_sys->heap, sizeof(recv_t));
recv->type = type;
recv->len = rec_end - body;
recv->start_lsn = start_lsn;
recv->end_lsn = end_lsn;
recv_addr = recv_get_fil_addr_struct(space, page_no);
if (recv_addr == NULL) {
recv_addr = mem_heap_alloc(recv_sys->heap,
sizeof(recv_addr_t));
recv_addr->space = space;
recv_addr->page_no = page_no;
recv_addr->state = RECV_NOT_PROCESSED;
UT_LIST_INIT(recv_addr->rec_list);
HASH_INSERT(recv_addr_t, addr_hash, recv_sys->addr_hash,
recv_fold(space, page_no), recv_addr);
recv_sys->n_addrs++;
#if 0
fprintf(stderr, "Inserting log rec for space %lu, page %lu\n",
space, page_no);
#endif
}
UT_LIST_ADD_LAST(rec_list, recv_addr->rec_list, recv);
prev_field = &(recv->data);
/* Store the log record body in chunks of less than UNIV_PAGE_SIZE:
recv_sys->heap grows into the buffer pool, and bigger chunks could not
be allocated */
while (rec_end > body) {
len = rec_end - body;
if (len > RECV_DATA_BLOCK_SIZE) {
len = RECV_DATA_BLOCK_SIZE;
}
recv_data = mem_heap_alloc(recv_sys->heap,
sizeof(recv_data_t) + len);
*prev_field = recv_data;
ut_memcpy(((byte*)recv_data) + sizeof(recv_data_t), body, len);
prev_field = &(recv_data->next);
body += len;
}
*prev_field = NULL;
}
/*************************************************************************
Copies the log record body from recv to buf. */
static
void
recv_data_copy_to_buf(
/*==================*/
byte* buf, /* in: buffer of length at least recv->len */
recv_t* recv) /* in: log record */
{
recv_data_t* recv_data;
ulint part_len;
ulint len;
len = recv->len;
recv_data = recv->data;
while (len > 0) {
if (len > RECV_DATA_BLOCK_SIZE) {
part_len = RECV_DATA_BLOCK_SIZE;
} else {
part_len = len;
}
ut_memcpy(buf, ((byte*)recv_data) + sizeof(recv_data_t),
part_len);
buf += part_len;
len -= part_len;
recv_data = recv_data->next;
}
}
/****************************************************************************
Applies the hashed log records to the page, if the page lsn is less than the
lsn of a log record. This can be called when a buffer page has just been
read in, or also for a page already in the buffer pool. */
UNIV_INTERN
void
recv_recover_page_func(
/*===================*/
#ifndef UNIV_HOTBACKUP
ibool just_read_in,
/* in: TRUE if the i/o-handler calls this for
a freshly read page */
#endif /* !UNIV_HOTBACKUP */
buf_block_t* block) /* in: buffer block */
{
page_t* page;
recv_addr_t* recv_addr;
recv_t* recv;
byte* buf;
ib_uint64_t start_lsn;
ib_uint64_t end_lsn;
ib_uint64_t page_lsn;
ib_uint64_t page_newest_lsn;
ibool modification_to_page;
#ifndef UNIV_HOTBACKUP
ibool success;
#endif /* !UNIV_HOTBACKUP */
mtr_t mtr;
mutex_enter(&(recv_sys->mutex));
if (recv_sys->apply_log_recs == FALSE) {
/* Log records should not be applied now */
mutex_exit(&(recv_sys->mutex));
return;
}
recv_addr = recv_get_fil_addr_struct(buf_block_get_space(block),
buf_block_get_page_no(block));
if ((recv_addr == NULL)
|| (recv_addr->state == RECV_BEING_PROCESSED)
|| (recv_addr->state == RECV_PROCESSED)) {
mutex_exit(&(recv_sys->mutex));
return;
}
#if 0
fprintf(stderr, "Recovering space %lu, page %lu\n",
buf_block_get_space(block), buf_block_get_page_no(block));
#endif
recv_addr->state = RECV_BEING_PROCESSED;
mutex_exit(&(recv_sys->mutex));
mtr_start(&mtr);
mtr_set_log_mode(&mtr, MTR_LOG_NONE);
page = block->frame;
#ifndef UNIV_HOTBACKUP
if (just_read_in) {
/* Move the ownership of the x-latch on the page to
this OS thread, so that we can acquire a second
x-latch on it. This is needed for the operations to
the page to pass the debug checks. */
rw_lock_x_lock_move_ownership(&block->lock);
}
success = buf_page_get_known_nowait(RW_X_LATCH, block,
BUF_KEEP_OLD,
__FILE__, __LINE__,
&mtr);
ut_a(success);
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
#endif /* !UNIV_HOTBACKUP */
/* Read the newest modification lsn from the page */
page_lsn = mach_read_ull(page + FIL_PAGE_LSN);
#ifndef UNIV_HOTBACKUP
/* It may be that the page has been modified in the buffer
pool: read the newest modification lsn there */
page_newest_lsn = buf_page_get_newest_modification(&block->page);
if (page_newest_lsn) {
page_lsn = page_newest_lsn;
}
#else /* !UNIV_HOTBACKUP */
/* In recovery from a backup we do not really use the buffer pool */
page_newest_lsn = 0;
#endif /* !UNIV_HOTBACKUP */
modification_to_page = FALSE;
start_lsn = end_lsn = 0;
recv = UT_LIST_GET_FIRST(recv_addr->rec_list);
while (recv) {
end_lsn = recv->end_lsn;
if (recv->len > RECV_DATA_BLOCK_SIZE) {
/* We have to copy the record body to a separate
buffer */
buf = mem_alloc(recv->len);
recv_data_copy_to_buf(buf, recv);
} else {
buf = ((byte*)(recv->data)) + sizeof(recv_data_t);
}
if (recv->type == MLOG_INIT_FILE_PAGE) {
page_lsn = page_newest_lsn;
mach_write_ull(page + UNIV_PAGE_SIZE
- FIL_PAGE_END_LSN_OLD_CHKSUM, 0);
mach_write_ull(page + FIL_PAGE_LSN, 0);
}
if (recv->start_lsn >= page_lsn) {
if (!modification_to_page) {
modification_to_page = TRUE;
start_lsn = recv->start_lsn;
}
#ifdef UNIV_DEBUG
if (log_debug_writes) {
fprintf(stderr,
"InnoDB: Applying log rec"
" type %lu len %lu"
" to space %lu page no %lu\n",
(ulong) recv->type, (ulong) recv->len,
(ulong) recv_addr->space,
(ulong) recv_addr->page_no);
}
#endif /* UNIV_DEBUG */
recv_parse_or_apply_log_rec_body(recv->type, buf,
buf + recv->len,
block, &mtr);
mach_write_ull(page + UNIV_PAGE_SIZE
- FIL_PAGE_END_LSN_OLD_CHKSUM,
recv->start_lsn + recv->len);
mach_write_ull(page + FIL_PAGE_LSN,
recv->start_lsn + recv->len);
}
if (recv->len > RECV_DATA_BLOCK_SIZE) {
mem_free(buf);
}
recv = UT_LIST_GET_NEXT(rec_list, recv);
}
#ifdef UNIV_ZIP_DEBUG
if (fil_page_get_type(page) == FIL_PAGE_INDEX) {
page_zip_des_t* page_zip = buf_block_get_page_zip(block);
if (page_zip) {
ut_a(page_zip_validate_low(page_zip, page, FALSE));
}
}
#endif /* UNIV_ZIP_DEBUG */
mutex_enter(&(recv_sys->mutex));
if (recv_max_page_lsn < page_lsn) {
recv_max_page_lsn = page_lsn;
}
recv_addr->state = RECV_PROCESSED;
ut_a(recv_sys->n_addrs);
recv_sys->n_addrs--;
mutex_exit(&(recv_sys->mutex));
#ifndef UNIV_HOTBACKUP
if (modification_to_page) {
ut_a(block);
buf_flush_recv_note_modification(block, start_lsn, end_lsn);
}
#endif /* !UNIV_HOTBACKUP */
/* Make sure that committing mtr does not change the modification
lsn values of page */
mtr.modifications = FALSE;
mtr_commit(&mtr);
}
#ifndef UNIV_HOTBACKUP
/***********************************************************************
Reads in pages which have hashed log records, from an area around a given
page number. */
static
ulint
recv_read_in_area(
/*==============*/
/* out: number of pages found */
ulint space, /* in: space */
ulint zip_size,/* in: compressed page size in bytes, or 0 */
ulint page_no)/* in: page number */
{
recv_addr_t* recv_addr;
ulint page_nos[RECV_READ_AHEAD_AREA];
ulint low_limit;
ulint n;
low_limit = page_no - (page_no % RECV_READ_AHEAD_AREA);
n = 0;
for (page_no = low_limit; page_no < low_limit + RECV_READ_AHEAD_AREA;
page_no++) {
recv_addr = recv_get_fil_addr_struct(space, page_no);
if (recv_addr && !buf_page_peek(space, page_no)) {
mutex_enter(&(recv_sys->mutex));
if (recv_addr->state == RECV_NOT_PROCESSED) {
recv_addr->state = RECV_BEING_READ;
page_nos[n] = page_no;
n++;
}
mutex_exit(&(recv_sys->mutex));
}
}
buf_read_recv_pages(FALSE, space, zip_size, page_nos, n);
/*
fprintf(stderr, "Recv pages at %lu n %lu\n", page_nos[0], n);
*/
return(n);
}
/***********************************************************************
Empties the hash table of stored log records, applying them to appropriate
pages. */
UNIV_INTERN
void
recv_apply_hashed_log_recs(
/*=======================*/
ibool allow_ibuf) /* in: if TRUE, also ibuf operations are
allowed during the application; if FALSE,
no ibuf operations are allowed, and after
the application all file pages are flushed to
disk and invalidated in buffer pool: this
alternative means that no new log records
can be generated during the application;
the caller must in this case own the log
mutex */
{
recv_addr_t* recv_addr;
ulint i;
ulint n_pages;
ibool has_printed = FALSE;
mtr_t mtr;
loop:
mutex_enter(&(recv_sys->mutex));
if (recv_sys->apply_batch_on) {
mutex_exit(&(recv_sys->mutex));
os_thread_sleep(500000);
goto loop;
}
ut_ad(!allow_ibuf == mutex_own(&log_sys->mutex));
if (!allow_ibuf) {
recv_no_ibuf_operations = TRUE;
}
recv_sys->apply_log_recs = TRUE;
recv_sys->apply_batch_on = TRUE;
for (i = 0; i < hash_get_n_cells(recv_sys->addr_hash); i++) {
recv_addr = HASH_GET_FIRST(recv_sys->addr_hash, i);
while (recv_addr) {
ulint space = recv_addr->space;
ulint zip_size = fil_space_get_zip_size(space);
ulint page_no = recv_addr->page_no;
if (recv_addr->state == RECV_NOT_PROCESSED) {
if (!has_printed) {
ut_print_timestamp(stderr);
fputs(" InnoDB: Starting an"
" apply batch of log records"
" to the database...\n"
"InnoDB: Progress in percents: ",
stderr);
has_printed = TRUE;
}
mutex_exit(&(recv_sys->mutex));
if (buf_page_peek(space, page_no)) {
buf_block_t* block;
mtr_start(&mtr);
block = buf_page_get(
space, zip_size, page_no,
RW_X_LATCH, &mtr);
buf_block_dbg_add_level(
block, SYNC_NO_ORDER_CHECK);
recv_recover_page(FALSE, block);
mtr_commit(&mtr);
} else {
recv_read_in_area(space, zip_size,
page_no);
}
mutex_enter(&(recv_sys->mutex));
}
recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
}
if (has_printed
&& (i * 100) / hash_get_n_cells(recv_sys->addr_hash)
!= ((i + 1) * 100)
/ hash_get_n_cells(recv_sys->addr_hash)) {
fprintf(stderr, "%lu ", (ulong)
((i * 100)
/ hash_get_n_cells(recv_sys->addr_hash)));
}
}
/* Wait until all the pages have been processed */
while (recv_sys->n_addrs != 0) {
mutex_exit(&(recv_sys->mutex));
os_thread_sleep(500000);
mutex_enter(&(recv_sys->mutex));
}
if (has_printed) {
fprintf(stderr, "\n");
}
if (!allow_ibuf) {
/* Flush all the file pages to disk and invalidate them in
the buffer pool */
mutex_exit(&(recv_sys->mutex));
mutex_exit(&(log_sys->mutex));
n_pages = buf_flush_batch(BUF_FLUSH_LIST, ULINT_MAX,
IB_ULONGLONG_MAX);
ut_a(n_pages != ULINT_UNDEFINED);
buf_flush_wait_batch_end(BUF_FLUSH_LIST);
buf_pool_invalidate();
mutex_enter(&(log_sys->mutex));
mutex_enter(&(recv_sys->mutex));
recv_no_ibuf_operations = FALSE;
}
recv_sys->apply_log_recs = FALSE;
recv_sys->apply_batch_on = FALSE;
recv_sys_empty_hash();
if (has_printed) {
fprintf(stderr, "InnoDB: Apply batch completed\n");
}
mutex_exit(&(recv_sys->mutex));
}
#else /* !UNIV_HOTBACKUP */
/***********************************************************************
Applies log records in the hash table to a backup. */
UNIV_INTERN
void
recv_apply_log_recs_for_backup(void)
/*================================*/
{
recv_addr_t* recv_addr;
ulint n_hash_cells;
buf_block_t* block;
ulint actual_size;
ibool success;
ulint error;
ulint i;
recv_sys->apply_log_recs = TRUE;
recv_sys->apply_batch_on = TRUE;
block = back_block1;
fputs("InnoDB: Starting an apply batch of log records"
" to the database...\n"
"InnoDB: Progress in percents: ", stderr);
n_hash_cells = hash_get_n_cells(recv_sys->addr_hash);
for (i = 0; i < n_hash_cells; i++) {
/* The address hash table is externally chained */
recv_addr = hash_get_nth_cell(recv_sys->addr_hash, i)->node;
while (recv_addr != NULL) {
ulint zip_size
= fil_space_get_zip_size(recv_addr->space);
if (zip_size == ULINT_UNDEFINED) {
#if 0
fprintf(stderr,
"InnoDB: Warning: cannot apply"
" log record to"
" tablespace %lu page %lu,\n"
"InnoDB: because tablespace with"
" that id does not exist.\n",
recv_addr->space, recv_addr->page_no);
#endif
recv_addr->state = RECV_PROCESSED;
ut_a(recv_sys->n_addrs);
recv_sys->n_addrs--;
goto skip_this_recv_addr;
}
/* We simulate a page read made by the buffer pool, to
make sure the recovery apparatus works ok. We must init
the block. */
buf_page_init_for_backup_restore(
recv_addr->space, recv_addr->page_no,
zip_size, block);
/* Extend the tablespace's last file if the page_no
does not fall inside its bounds; we assume the last
file is auto-extending, and ibbackup copied the file
when it still was smaller */
success = fil_extend_space_to_desired_size(
&actual_size,
recv_addr->space, recv_addr->page_no + 1);
if (!success) {
fprintf(stderr,
"InnoDB: Fatal error: cannot extend"
" tablespace %lu to hold %lu pages\n",
recv_addr->space, recv_addr->page_no);
exit(1);
}
/* Read the page from the tablespace file using the
fil0fil.c routines */
if (zip_size) {
error = fil_io(OS_FILE_READ, TRUE,
recv_addr->space, zip_size,
recv_addr->page_no, 0, zip_size,
block->page.zip.data, NULL);
if (error == DB_SUCCESS
&& !buf_zip_decompress(block, TRUE)) {
exit(1);
}
} else {
error = fil_io(OS_FILE_READ, TRUE,
recv_addr->space, 0,
recv_addr->page_no, 0,
UNIV_PAGE_SIZE,
block->frame, NULL);
}
if (error != DB_SUCCESS) {
fprintf(stderr,
"InnoDB: Fatal error: cannot read"
" from tablespace"
" %lu page number %lu\n",
(ulong) recv_addr->space,
(ulong) recv_addr->page_no);
exit(1);
}
/* Apply the log records to this page */
recv_recover_page(FALSE, block);
/* Write the page back to the tablespace file using the
fil0fil.c routines */
buf_flush_init_for_writing(
block->frame, buf_block_get_page_zip(block),
mach_read_ull(block->frame + FIL_PAGE_LSN));
if (zip_size) {
error = fil_io(OS_FILE_WRITE, TRUE,
recv_addr->space, zip_size,
recv_addr->page_no, 0,
zip_size,
block->page.zip.data, NULL);
} else {
error = fil_io(OS_FILE_WRITE, TRUE,
recv_addr->space, 0,
recv_addr->page_no, 0,
UNIV_PAGE_SIZE,
block->frame, NULL);
}
skip_this_recv_addr:
recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
}
if ((100 * i) / n_hash_cells
!= (100 * (i + 1)) / n_hash_cells) {
fprintf(stderr, "%lu ",
(ulong) ((100 * i) / n_hash_cells));
fflush(stderr);
}
}
recv_sys_empty_hash();
}
#endif /* !UNIV_HOTBACKUP */
/***********************************************************************
Tries to parse a single log record and returns its length. */
static
ulint
recv_parse_log_rec(
/*===============*/
/* out: length of the record, or 0 if the record was
not complete */
byte* ptr, /* in: pointer to a buffer */
byte* end_ptr,/* in: pointer to the buffer end */
byte* type, /* out: type */
ulint* space, /* out: space id */
ulint* page_no,/* out: page number */
byte** body) /* out: log record body start */
{
byte* new_ptr;
*body = NULL;
if (ptr == end_ptr) {
return(0);
}
if (*ptr == MLOG_MULTI_REC_END) {
*type = *ptr;
return(1);
}
if (*ptr == MLOG_DUMMY_RECORD) {
*type = *ptr;
*space = ULINT_UNDEFINED - 1; /* For debugging */
return(1);
}
new_ptr = mlog_parse_initial_log_record(ptr, end_ptr, type, space,
page_no);
*body = new_ptr;
if (UNIV_UNLIKELY(!new_ptr)) {
return(0);
}
/* Check that page_no is sensible */
if (UNIV_UNLIKELY(*page_no > 0x8FFFFFFFUL)) {
recv_sys->found_corrupt_log = TRUE;
return(0);
}
new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr,
NULL, NULL);
if (UNIV_UNLIKELY(new_ptr == NULL)) {
return(0);
}
if (*page_no > recv_max_parsed_page_no) {
recv_max_parsed_page_no = *page_no;
}
return(new_ptr - ptr);
}
/***********************************************************
Calculates the new value for lsn when more data is added to the log. */
static
ib_uint64_t
recv_calc_lsn_on_data_add(
/*======================*/
ib_uint64_t lsn, /* in: old lsn */
ib_uint64_t len) /* in: this many bytes of data is
added, log block headers not included */
{
ulint frag_len;
ulint lsn_len;
frag_len = (((ulint) lsn) % OS_FILE_LOG_BLOCK_SIZE)
- LOG_BLOCK_HDR_SIZE;
ut_ad(frag_len < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
- LOG_BLOCK_TRL_SIZE);
lsn_len = (ulint) len;
lsn_len += (lsn_len + frag_len)
/ (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
- LOG_BLOCK_TRL_SIZE)
* (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE);
return(lsn + lsn_len);
}
#ifdef UNIV_LOG_DEBUG
/***********************************************************
Checks that the parser recognizes incomplete initial segments of a log
record as incomplete. */
static
void
recv_check_incomplete_log_recs(
/*===========================*/
byte* ptr, /* in: pointer to a complete log record */
ulint len) /* in: length of the log record */
{
ulint i;
byte type;
ulint space;
ulint page_no;
byte* body;
for (i = 0; i < len; i++) {
ut_a(0 == recv_parse_log_rec(ptr, ptr + i, &type, &space,
&page_no, &body));
}
}
#endif /* UNIV_LOG_DEBUG */
/***********************************************************
Prints diagnostic info of corrupt log. */
static
void
recv_report_corrupt_log(
/*====================*/
byte* ptr, /* in: pointer to corrupt log record */
byte type, /* in: type of the record */
ulint space, /* in: space id, this may also be garbage */
ulint page_no)/* in: page number, this may also be garbage */
{
fprintf(stderr,
"InnoDB: ############### CORRUPT LOG RECORD FOUND\n"
"InnoDB: Log record type %lu, space id %lu, page number %lu\n"
"InnoDB: Log parsing proceeded successfully up to %llu\n"
"InnoDB: Previous log record type %lu, is multi %lu\n"
"InnoDB: Recv offset %lu, prev %lu\n",
(ulong) type, (ulong) space, (ulong) page_no,
recv_sys->recovered_lsn,
(ulong) recv_previous_parsed_rec_type,
(ulong) recv_previous_parsed_rec_is_multi,
(ulong) (ptr - recv_sys->buf),
(ulong) recv_previous_parsed_rec_offset);
if ((ulint)(ptr - recv_sys->buf + 100)
> recv_previous_parsed_rec_offset
&& (ulint)(ptr - recv_sys->buf + 100
- recv_previous_parsed_rec_offset)
< 200000) {
fputs("InnoDB: Hex dump of corrupt log starting"
" 100 bytes before the start\n"
"InnoDB: of the previous log rec,\n"
"InnoDB: and ending 100 bytes after the start"
" of the corrupt rec:\n",
stderr);
ut_print_buf(stderr,
recv_sys->buf
+ recv_previous_parsed_rec_offset - 100,
ptr - recv_sys->buf + 200
- recv_previous_parsed_rec_offset);
putc('\n', stderr);
}
fputs("InnoDB: WARNING: the log file may have been corrupt and it\n"
"InnoDB: is possible that the log scan did not proceed\n"
"InnoDB: far enough in recovery! Please run CHECK TABLE\n"
"InnoDB: on your InnoDB tables to check that they are ok!\n"
"InnoDB: If mysqld crashes after this recovery, look at\n"
"InnoDB: " REFMAN "forcing-recovery.html\n"
"InnoDB: about forcing recovery.\n", stderr);
fflush(stderr);
}
/***********************************************************
Parses log records from a buffer and stores them to a hash table to wait
merging to file pages. */
static
ibool
recv_parse_log_recs(
/*================*/
/* out: currently always returns FALSE */
ibool store_to_hash) /* in: TRUE if the records should be stored
to the hash table; this is set to FALSE if just
debug checking is needed */
{
byte* ptr;
byte* end_ptr;
ulint single_rec;
ulint len;
ulint total_len;
ib_uint64_t new_recovered_lsn;
ib_uint64_t old_lsn;
byte type;
ulint space;
ulint page_no;
byte* body;
ulint n_recs;
ut_ad(mutex_own(&(log_sys->mutex)));
ut_ad(recv_sys->parse_start_lsn != 0);
loop:
ptr = recv_sys->buf + recv_sys->recovered_offset;
end_ptr = recv_sys->buf + recv_sys->len;
if (ptr == end_ptr) {
return(FALSE);
}
single_rec = (ulint)*ptr & MLOG_SINGLE_REC_FLAG;
if (single_rec || *ptr == MLOG_DUMMY_RECORD) {
/* The mtr only modified a single page, or this is a file op */
old_lsn = recv_sys->recovered_lsn;
/* Try to parse a log record, fetching its type, space id,
page no, and a pointer to the body of the log record */
len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
&page_no, &body);
if (len == 0 || recv_sys->found_corrupt_log) {
if (recv_sys->found_corrupt_log) {
recv_report_corrupt_log(ptr,
type, space, page_no);
}
return(FALSE);
}
new_recovered_lsn = recv_calc_lsn_on_data_add(old_lsn, len);
if (new_recovered_lsn > recv_sys->scanned_lsn) {
/* The log record filled a log block, and we require
that also the next log block should have been scanned
in */
return(FALSE);
}
recv_previous_parsed_rec_type = (ulint)type;
recv_previous_parsed_rec_offset = recv_sys->recovered_offset;
recv_previous_parsed_rec_is_multi = 0;
recv_sys->recovered_offset += len;
recv_sys->recovered_lsn = new_recovered_lsn;
#ifdef UNIV_DEBUG
if (log_debug_writes) {
fprintf(stderr,
"InnoDB: Parsed a single log rec"
" type %lu len %lu space %lu page no %lu\n",
(ulong) type, (ulong) len, (ulong) space,
(ulong) page_no);
}
#endif /* UNIV_DEBUG */
if (type == MLOG_DUMMY_RECORD) {
/* Do nothing */
} else if (!store_to_hash) {
/* In debug checking, update a replicate page
according to the log record, and check that it
becomes identical with the original page */
#ifdef UNIV_LOG_DEBUG
recv_check_incomplete_log_recs(ptr, len);
#endif/* UNIV_LOG_DEBUG */
} else if (type == MLOG_FILE_CREATE
|| type == MLOG_FILE_CREATE2
|| type == MLOG_FILE_RENAME
|| type == MLOG_FILE_DELETE) {
ut_a(space);
#ifdef UNIV_HOTBACKUP
if (recv_replay_file_ops) {
/* In ibbackup --apply-log, replay an .ibd file
operation, if possible; note that
fil_path_to_mysql_datadir is set in ibbackup to
point to the datadir we should use there */
if (NULL == fil_op_log_parse_or_replay(
body, end_ptr, type,
space, page_no)) {
fprintf(stderr,
"InnoDB: Error: file op"
" log record of type %lu"
" space %lu not complete in\n"
"InnoDB: the replay phase."
" Path %s\n",
(ulint)type, space,
(char*)(body + 2));
ut_error;
}
}
#endif
/* In normal mysqld crash recovery we do not try to
replay file operations */
} else {
recv_add_to_hash_table(type, space, page_no, body,
ptr + len, old_lsn,
recv_sys->recovered_lsn);
}
} else {
/* Check that all the records associated with the single mtr
are included within the buffer */
total_len = 0;
n_recs = 0;
for (;;) {
len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
&page_no, &body);
if (len == 0 || recv_sys->found_corrupt_log) {
if (recv_sys->found_corrupt_log) {
recv_report_corrupt_log(
ptr, type, space, page_no);
}
return(FALSE);
}
recv_previous_parsed_rec_type = (ulint)type;
recv_previous_parsed_rec_offset
= recv_sys->recovered_offset + total_len;
recv_previous_parsed_rec_is_multi = 1;
if ((!store_to_hash) && (type != MLOG_MULTI_REC_END)) {
#ifdef UNIV_LOG_DEBUG
recv_check_incomplete_log_recs(ptr, len);
#endif /* UNIV_LOG_DEBUG */
}
#ifdef UNIV_DEBUG
if (log_debug_writes) {
fprintf(stderr,
"InnoDB: Parsed a multi log rec"
" type %lu len %lu"
" space %lu page no %lu\n",
(ulong) type, (ulong) len,
(ulong) space, (ulong) page_no);
}
#endif /* UNIV_DEBUG */
total_len += len;
n_recs++;
ptr += len;
if (type == MLOG_MULTI_REC_END) {
/* Found the end mark for the records */
break;
}
}
new_recovered_lsn = recv_calc_lsn_on_data_add(
recv_sys->recovered_lsn, total_len);
if (new_recovered_lsn > recv_sys->scanned_lsn) {
/* The log record filled a log block, and we require
that also the next log block should have been scanned
in */
return(FALSE);
}
/* Add all the records to the hash table */
ptr = recv_sys->buf + recv_sys->recovered_offset;
for (;;) {
old_lsn = recv_sys->recovered_lsn;
len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
&page_no, &body);
if (recv_sys->found_corrupt_log) {
recv_report_corrupt_log(ptr,
type, space, page_no);
}
ut_a(len != 0);
ut_a(0 == ((ulint)*ptr & MLOG_SINGLE_REC_FLAG));
recv_sys->recovered_offset += len;
recv_sys->recovered_lsn
= recv_calc_lsn_on_data_add(old_lsn, len);
if (type == MLOG_MULTI_REC_END) {
/* Found the end mark for the records */
break;
}
if (store_to_hash) {
recv_add_to_hash_table(type, space, page_no,
body, ptr + len,
old_lsn,
new_recovered_lsn);
}
ptr += len;
}
}
goto loop;
}
/***********************************************************
Adds data from a new log block to the parsing buffer of recv_sys if
recv_sys->parse_start_lsn is non-zero. */
static
ibool
recv_sys_add_to_parsing_buf(
/*========================*/
/* out: TRUE if more data added */
const byte* log_block, /* in: log block */
ib_uint64_t scanned_lsn) /* in: lsn of how far we were able
to find data in this log block */
{
ulint more_len;
ulint data_len;
ulint start_offset;
ulint end_offset;
ut_ad(scanned_lsn >= recv_sys->scanned_lsn);
if (!recv_sys->parse_start_lsn) {
/* Cannot start parsing yet because no start point for
it found */
return(FALSE);
}
data_len = log_block_get_data_len(log_block);
if (recv_sys->parse_start_lsn >= scanned_lsn) {
return(FALSE);
} else if (recv_sys->scanned_lsn >= scanned_lsn) {
return(FALSE);
} else if (recv_sys->parse_start_lsn > recv_sys->scanned_lsn) {
more_len = (ulint) (scanned_lsn - recv_sys->parse_start_lsn);
} else {
more_len = (ulint) (scanned_lsn - recv_sys->scanned_lsn);
}
if (more_len == 0) {
return(FALSE);
}
ut_ad(data_len >= more_len);
start_offset = data_len - more_len;
if (start_offset < LOG_BLOCK_HDR_SIZE) {
start_offset = LOG_BLOCK_HDR_SIZE;
}
end_offset = data_len;
if (end_offset > OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
end_offset = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE;
}
ut_ad(start_offset <= end_offset);
if (start_offset < end_offset) {
ut_memcpy(recv_sys->buf + recv_sys->len,
log_block + start_offset, end_offset - start_offset);
recv_sys->len += end_offset - start_offset;
ut_a(recv_sys->len <= RECV_PARSING_BUF_SIZE);
}
return(TRUE);
}
/***********************************************************
Moves the parsing buffer data left to the buffer start. */
static
void
recv_sys_justify_left_parsing_buf(void)
/*===================================*/
{
ut_memmove(recv_sys->buf, recv_sys->buf + recv_sys->recovered_offset,
recv_sys->len - recv_sys->recovered_offset);
recv_sys->len -= recv_sys->recovered_offset;
recv_sys->recovered_offset = 0;
}
/***********************************************************
Scans log from a buffer and stores new log data to the parsing buffer.
Parses and hashes the log records if new data found. Unless
UNIV_HOTBACKUP is defined, this function will apply log records
automatically when the hash table becomes full. */
UNIV_INTERN
ibool
recv_scan_log_recs(
/*===============*/
/* out: TRUE if limit_lsn has been
reached, or not able to scan any more
in this log group */
ulint available_memory,/* in: we let the hash table of recs
to grow to this size, at the maximum */
ibool store_to_hash, /* in: TRUE if the records should be
stored to the hash table; this is set
to FALSE if just debug checking is
needed */
const byte* buf, /* in: buffer containing a log
segment or garbage */
ulint len, /* in: buffer length */
ib_uint64_t start_lsn, /* in: buffer start lsn */
ib_uint64_t* contiguous_lsn, /* in/out: it is known that all log
groups contain contiguous log data up
to this lsn */
ib_uint64_t* group_scanned_lsn)/* out: scanning succeeded up to
this lsn */
{
const byte* log_block;
ulint no;
ib_uint64_t scanned_lsn;
ibool finished;
ulint data_len;
ibool more_data;
ut_ad(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
ut_ad(len > 0);
ut_a(store_to_hash <= TRUE);
finished = FALSE;
log_block = buf;
scanned_lsn = start_lsn;
more_data = FALSE;
while (log_block < buf + len && !finished) {
no = log_block_get_hdr_no(log_block);
/*
fprintf(stderr, "Log block header no %lu\n", no);
fprintf(stderr, "Scanned lsn no %lu\n",
log_block_convert_lsn_to_no(scanned_lsn));
*/
if (no != log_block_convert_lsn_to_no(scanned_lsn)
|| !log_block_checksum_is_ok_or_old_format(log_block)) {
if (no == log_block_convert_lsn_to_no(scanned_lsn)
&& !log_block_checksum_is_ok_or_old_format(
log_block)) {
fprintf(stderr,
"InnoDB: Log block no %lu at"
" lsn %llu has\n"
"InnoDB: ok header, but checksum field"
" contains %lu, should be %lu\n",
(ulong) no,
scanned_lsn,
(ulong) log_block_get_checksum(
log_block),
(ulong) log_block_calc_checksum(
log_block));
}
/* Garbage or an incompletely written log block */
finished = TRUE;
break;
}
if (log_block_get_flush_bit(log_block)) {
/* This block was a start of a log flush operation:
we know that the previous flush operation must have
been completed for all log groups before this block
can have been flushed to any of the groups. Therefore,
we know that log data is contiguous up to scanned_lsn
in all non-corrupt log groups. */
if (scanned_lsn > *contiguous_lsn) {
*contiguous_lsn = scanned_lsn;
}
}
data_len = log_block_get_data_len(log_block);
if ((store_to_hash || (data_len == OS_FILE_LOG_BLOCK_SIZE))
&& scanned_lsn + data_len > recv_sys->scanned_lsn
&& (recv_sys->scanned_checkpoint_no > 0)
&& (log_block_get_checkpoint_no(log_block)
< recv_sys->scanned_checkpoint_no)
&& (recv_sys->scanned_checkpoint_no
- log_block_get_checkpoint_no(log_block)
> 0x80000000UL)) {
/* Garbage from a log buffer flush which was made
before the most recent database recovery */
finished = TRUE;
#ifdef UNIV_LOG_DEBUG
/* This is not really an error, but currently
we stop here in the debug version: */
ut_error;
#endif
break;
}
if (!recv_sys->parse_start_lsn
&& (log_block_get_first_rec_group(log_block) > 0)) {
/* We found a point from which to start the parsing
of log records */
recv_sys->parse_start_lsn = scanned_lsn
+ log_block_get_first_rec_group(log_block);
recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
}
scanned_lsn += data_len;
if (scanned_lsn > recv_sys->scanned_lsn) {
/* We have found more entries. If this scan is
of startup type, we must initiate crash recovery
environment before parsing these log records. */
#ifndef UNIV_HOTBACKUP
if (recv_log_scan_is_startup_type
&& !recv_needed_recovery) {
fprintf(stderr,
"InnoDB: Log scan progressed"
" past the checkpoint lsn %llu\n",
recv_sys->scanned_lsn);
recv_init_crash_recovery();
}
#endif /* !UNIV_HOTBACKUP */
/* We were able to find more log data: add it to the
parsing buffer if parse_start_lsn is already
non-zero */
if (recv_sys->len + 4 * OS_FILE_LOG_BLOCK_SIZE
>= RECV_PARSING_BUF_SIZE) {
fprintf(stderr,
"InnoDB: Error: log parsing"
" buffer overflow."
" Recovery may have failed!\n");
recv_sys->found_corrupt_log = TRUE;
} else if (!recv_sys->found_corrupt_log) {
more_data = recv_sys_add_to_parsing_buf(
log_block, scanned_lsn);
}
recv_sys->scanned_lsn = scanned_lsn;
recv_sys->scanned_checkpoint_no
= log_block_get_checkpoint_no(log_block);
}
if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
/* Log data for this group ends here */
finished = TRUE;
} else {
log_block += OS_FILE_LOG_BLOCK_SIZE;
}
}
*group_scanned_lsn = scanned_lsn;
if (recv_needed_recovery
|| (recv_is_from_backup && !recv_is_making_a_backup)) {
recv_scan_print_counter++;
if (finished || (recv_scan_print_counter % 80 == 0)) {
fprintf(stderr,
"InnoDB: Doing recovery: scanned up to"
" log sequence number %llu\n",
*group_scanned_lsn);
}
}
if (more_data && !recv_sys->found_corrupt_log) {
/* Try to parse more log records */
recv_parse_log_recs(store_to_hash);
#ifndef UNIV_HOTBACKUP
if (store_to_hash && mem_heap_get_size(recv_sys->heap)
> available_memory) {
/* Hash table of log records has grown too big:
empty it; FALSE means no ibuf operations
allowed, as we cannot add new records to the
log yet: they would be produced by ibuf
operations */
recv_apply_hashed_log_recs(FALSE);
}
#endif /* !UNIV_HOTBACKUP */
if (recv_sys->recovered_offset > RECV_PARSING_BUF_SIZE / 4) {
/* Move parsing buffer data to the buffer start */
recv_sys_justify_left_parsing_buf();
}
}
return(finished);
}
#ifndef UNIV_HOTBACKUP
/***********************************************************
Scans log from a buffer and stores new log data to the parsing buffer. Parses
and hashes the log records if new data found. */
static
void
recv_group_scan_log_recs(
/*=====================*/
log_group_t* group, /* in: log group */
ib_uint64_t* contiguous_lsn, /* in/out: it is known that all log
groups contain contiguous log data up
to this lsn */
ib_uint64_t* group_scanned_lsn)/* out: scanning succeeded up to
this lsn */
{
ibool finished;
ib_uint64_t start_lsn;
ib_uint64_t end_lsn;
finished = FALSE;
start_lsn = *contiguous_lsn;
while (!finished) {
end_lsn = start_lsn + RECV_SCAN_SIZE;
log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
group, start_lsn, end_lsn);
finished = recv_scan_log_recs(
(buf_pool->curr_size - recv_n_pool_free_frames)
* UNIV_PAGE_SIZE, TRUE, log_sys->buf, RECV_SCAN_SIZE,
start_lsn, contiguous_lsn, group_scanned_lsn);
start_lsn = end_lsn;
}
#ifdef UNIV_DEBUG
if (log_debug_writes) {
fprintf(stderr,
"InnoDB: Scanned group %lu up to"
" log sequence number %llu\n",
(ulong) group->id,
*group_scanned_lsn);
}
#endif /* UNIV_DEBUG */
}
/***********************************************************
Initialize crash recovery environment. Can be called iff
recv_needed_recovery == FALSE. */
static
void
recv_init_crash_recovery(void)
/*==========================*/
{
ut_a(!recv_needed_recovery);
recv_needed_recovery = TRUE;
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Database was not"
" shut down normally!\n"
"InnoDB: Starting crash recovery.\n");
fprintf(stderr,
"InnoDB: Reading tablespace information"
" from the .ibd files...\n");
fil_load_single_table_tablespaces();
/* If we are using the doublewrite method, we will
check if there are half-written pages in data files,
and restore them from the doublewrite buffer if
possible */
if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
fprintf(stderr,
"InnoDB: Restoring possible"
" half-written data pages from"
" the doublewrite\n"
"InnoDB: buffer...\n");
trx_sys_doublewrite_init_or_restore_pages(TRUE);
}
}
/************************************************************
Recovers from a checkpoint. When this function returns, the database is able
to start processing of new user transactions, but the function
recv_recovery_from_checkpoint_finish should be called later to complete
the recovery and free the resources used in it. */
UNIV_INTERN
ulint
recv_recovery_from_checkpoint_start_func(
/*=====================================*/
/* out: error code or DB_SUCCESS */
#ifdef UNIV_LOG_ARCHIVE
ulint type, /* in: LOG_CHECKPOINT or LOG_ARCHIVE */
ib_uint64_t limit_lsn, /* in: recover up to this lsn
if possible */
#endif /* UNIV_LOG_ARCHIVE */
ib_uint64_t min_flushed_lsn,/* in: min flushed lsn from
data files */
ib_uint64_t max_flushed_lsn)/* in: max flushed lsn from
data files */
{
log_group_t* group;
log_group_t* max_cp_group;
log_group_t* up_to_date_group;
ulint max_cp_field;
ib_uint64_t checkpoint_lsn;
ib_uint64_t checkpoint_no;
ib_uint64_t old_scanned_lsn;
ib_uint64_t group_scanned_lsn;
ib_uint64_t contiguous_lsn;
ib_uint64_t archived_lsn;
byte* buf;
byte log_hdr_buf[LOG_FILE_HDR_SIZE];
ulint err;
#ifdef UNIV_LOG_ARCHIVE
ut_ad(type != LOG_CHECKPOINT || limit_lsn == IB_ULONGLONG_MAX);
# define TYPE_CHECKPOINT (type == LOG_CHECKPOINT)
# define LIMIT_LSN limit_lsn
#else /* UNIV_LOG_ARCHIVE */
# define TYPE_CHECKPOINT 1
# define LIMIT_LSN IB_ULONGLONG_MAX
#endif /* UNIV_LOG_ARCHIVE */
if (TYPE_CHECKPOINT) {
recv_sys_create();
recv_sys_init(buf_pool_get_curr_size());
}
if (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO) {
fprintf(stderr,
"InnoDB: The user has set SRV_FORCE_NO_LOG_REDO on\n");
fprintf(stderr,
"InnoDB: Skipping log redo\n");
return(DB_SUCCESS);
}
recv_recovery_on = TRUE;
recv_sys->limit_lsn = LIMIT_LSN;
mutex_enter(&(log_sys->mutex));
/* Look for the latest checkpoint from any of the log groups */
err = recv_find_max_checkpoint(&max_cp_group, &max_cp_field);
if (err != DB_SUCCESS) {
mutex_exit(&(log_sys->mutex));
return(err);
}
log_group_read_checkpoint_info(max_cp_group, max_cp_field);
buf = log_sys->checkpoint_buf;
checkpoint_lsn = mach_read_ull(buf + LOG_CHECKPOINT_LSN);
checkpoint_no = mach_read_ull(buf + LOG_CHECKPOINT_NO);
archived_lsn = mach_read_ull(buf + LOG_CHECKPOINT_ARCHIVED_LSN);
/* Read the first log file header to print a note if this is
a recovery from a restored InnoDB Hot Backup */
fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, max_cp_group->space_id, 0,
0, 0, LOG_FILE_HDR_SIZE,
log_hdr_buf, max_cp_group);
if (0 == ut_memcmp(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
(byte*)"ibbackup", (sizeof "ibbackup") - 1)) {
/* This log file was created by ibbackup --restore: print
a note to the user about it */
fprintf(stderr,
"InnoDB: The log file was created by"
" ibbackup --apply-log at\n"
"InnoDB: %s\n",
log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP);
fprintf(stderr,
"InnoDB: NOTE: the following crash recovery"
" is part of a normal restore.\n");
/* Wipe over the label now */
memset(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
' ', 4);
/* Write to the log file to wipe over the label */
fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE,
max_cp_group->space_id, 0,
0, 0, OS_FILE_LOG_BLOCK_SIZE,
log_hdr_buf, max_cp_group);
}
#ifdef UNIV_LOG_ARCHIVE
group = UT_LIST_GET_FIRST(log_sys->log_groups);
while (group) {
log_checkpoint_get_nth_group_info(buf, group->id,
&(group->archived_file_no),
&(group->archived_offset));
group = UT_LIST_GET_NEXT(log_groups, group);
}
#endif /* UNIV_LOG_ARCHIVE */
if (TYPE_CHECKPOINT) {
/* Start reading the log groups from the checkpoint lsn up. The
variable contiguous_lsn contains an lsn up to which the log is
known to be contiguously written to all log groups. */
recv_sys->parse_start_lsn = checkpoint_lsn;
recv_sys->scanned_lsn = checkpoint_lsn;
recv_sys->scanned_checkpoint_no = 0;
recv_sys->recovered_lsn = checkpoint_lsn;
srv_start_lsn = checkpoint_lsn;
}
contiguous_lsn = ut_uint64_align_down(recv_sys->scanned_lsn,
OS_FILE_LOG_BLOCK_SIZE);
if (TYPE_CHECKPOINT) {
up_to_date_group = max_cp_group;
#ifdef UNIV_LOG_ARCHIVE
} else {
ulint capacity;
/* Try to recover the remaining part from logs: first from
the logs of the archived group */
group = recv_sys->archive_group;
capacity = log_group_get_capacity(group);
if (recv_sys->scanned_lsn > checkpoint_lsn + capacity
|| checkpoint_lsn > recv_sys->scanned_lsn + capacity) {
mutex_exit(&(log_sys->mutex));
/* The group does not contain enough log: probably
an archived log file was missing or corrupt */
return(DB_ERROR);
}
recv_group_scan_log_recs(group, &contiguous_lsn,
&group_scanned_lsn);
if (recv_sys->scanned_lsn < checkpoint_lsn) {
mutex_exit(&(log_sys->mutex));
/* The group did not contain enough log: an archived
log file was missing or invalid, or the log group
was corrupt */
return(DB_ERROR);
}
group->scanned_lsn = group_scanned_lsn;
up_to_date_group = group;
#endif /* UNIV_LOG_ARCHIVE */
}
ut_ad(RECV_SCAN_SIZE <= log_sys->buf_size);
group = UT_LIST_GET_FIRST(log_sys->log_groups);
#ifdef UNIV_LOG_ARCHIVE
if ((type == LOG_ARCHIVE) && (group == recv_sys->archive_group)) {
group = UT_LIST_GET_NEXT(log_groups, group);
}
#endif /* UNIV_LOG_ARCHIVE */
/* Set the flag to publish that we are doing startup scan. */
recv_log_scan_is_startup_type = TYPE_CHECKPOINT;
while (group) {
old_scanned_lsn = recv_sys->scanned_lsn;
recv_group_scan_log_recs(group, &contiguous_lsn,
&group_scanned_lsn);
group->scanned_lsn = group_scanned_lsn;
if (old_scanned_lsn < group_scanned_lsn) {
/* We found a more up-to-date group */
up_to_date_group = group;
}
#ifdef UNIV_LOG_ARCHIVE
if ((type == LOG_ARCHIVE)
&& (group == recv_sys->archive_group)) {
group = UT_LIST_GET_NEXT(log_groups, group);
}
#endif /* UNIV_LOG_ARCHIVE */
group = UT_LIST_GET_NEXT(log_groups, group);
}
/* Done with startup scan. Clear the flag. */
recv_log_scan_is_startup_type = FALSE;
if (TYPE_CHECKPOINT) {
/* NOTE: we always do a 'recovery' at startup, but only if
there is something wrong we will print a message to the
user about recovery: */
if (checkpoint_lsn != max_flushed_lsn
|| checkpoint_lsn != min_flushed_lsn) {
if (checkpoint_lsn < max_flushed_lsn) {
fprintf(stderr,
"InnoDB: #########################"
"#################################\n"
"InnoDB: "
"WARNING!\n"
"InnoDB: The log sequence number"
" in ibdata files is higher\n"
"InnoDB: than the log sequence number"
" in the ib_logfiles! Are you sure\n"
"InnoDB: you are using the right"
" ib_logfiles to start up"
" the database?\n"
"InnoDB: Log sequence number in"
" ib_logfiles is %llu, log\n"
"InnoDB: sequence numbers stamped"
" to ibdata file headers are between\n"
"InnoDB: %llu and %llu.\n"
"InnoDB: #########################"
"#################################\n",
checkpoint_lsn,
min_flushed_lsn,
max_flushed_lsn);
}
if (!recv_needed_recovery) {
fprintf(stderr,
"InnoDB: The log sequence number"
" in ibdata files does not match\n"
"InnoDB: the log sequence number"
" in the ib_logfiles!\n");
recv_init_crash_recovery();
}
}
if (!recv_needed_recovery) {
/* Init the doublewrite buffer memory structure */
trx_sys_doublewrite_init_or_restore_pages(FALSE);
}
}
/* We currently have only one log group */
if (group_scanned_lsn < checkpoint_lsn) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: ERROR: We were only able to scan the log"
" up to\n"
"InnoDB: %llu, but a checkpoint was at %llu.\n"
"InnoDB: It is possible that"
" the database is now corrupt!\n",
group_scanned_lsn,
checkpoint_lsn);
}
if (group_scanned_lsn < recv_max_page_lsn) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: ERROR: We were only able to scan the log"
" up to %llu\n"
"InnoDB: but a database page a had an lsn %llu."
" It is possible that the\n"
"InnoDB: database is now corrupt!\n",
group_scanned_lsn,
recv_max_page_lsn);
}
if (recv_sys->recovered_lsn < checkpoint_lsn) {
mutex_exit(&(log_sys->mutex));
if (recv_sys->recovered_lsn >= LIMIT_LSN) {
return(DB_SUCCESS);
}
ut_error;
return(DB_ERROR);
}
/* Synchronize the uncorrupted log groups to the most up-to-date log
group; we also copy checkpoint info to groups */
log_sys->next_checkpoint_lsn = checkpoint_lsn;
log_sys->next_checkpoint_no = checkpoint_no + 1;
#ifdef UNIV_LOG_ARCHIVE
log_sys->archived_lsn = archived_lsn;
#endif /* UNIV_LOG_ARCHIVE */
recv_synchronize_groups(up_to_date_group);
if (!recv_needed_recovery) {
ut_a(checkpoint_lsn == recv_sys->recovered_lsn);
} else {
srv_start_lsn = recv_sys->recovered_lsn;
}
log_sys->lsn = recv_sys->recovered_lsn;
ut_memcpy(log_sys->buf, recv_sys->last_block, OS_FILE_LOG_BLOCK_SIZE);
log_sys->buf_free = (ulint) log_sys->lsn % OS_FILE_LOG_BLOCK_SIZE;
log_sys->buf_next_to_write = log_sys->buf_free;
log_sys->written_to_some_lsn = log_sys->lsn;
log_sys->written_to_all_lsn = log_sys->lsn;
log_sys->last_checkpoint_lsn = checkpoint_lsn;
log_sys->next_checkpoint_no = checkpoint_no + 1;
#ifdef UNIV_LOG_ARCHIVE
if (archived_lsn == IB_ULONGLONG_MAX) {
log_sys->archiving_state = LOG_ARCH_OFF;
}
#endif /* UNIV_LOG_ARCHIVE */
mutex_enter(&(recv_sys->mutex));
recv_sys->apply_log_recs = TRUE;
mutex_exit(&(recv_sys->mutex));
mutex_exit(&(log_sys->mutex));
recv_lsn_checks_on = TRUE;
/* The database is now ready to start almost normal processing of user
transactions: transaction rollbacks and the application of the log
records in the hash table can be run in background. */
return(DB_SUCCESS);
#undef TYPE_CHECKPOINT
#undef LIMIT_LSN
}
/************************************************************
Completes recovery from a checkpoint. */
UNIV_INTERN
void
recv_recovery_from_checkpoint_finish(void)
/*======================================*/
{
int i;
/* Apply the hashed log records to the respective file pages */
if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
recv_apply_hashed_log_recs(TRUE);
}
#ifdef UNIV_DEBUG
if (log_debug_writes) {
fprintf(stderr,
"InnoDB: Log records applied to the database\n");
}
#endif /* UNIV_DEBUG */
if (recv_needed_recovery) {
trx_sys_print_mysql_master_log_pos();
trx_sys_print_mysql_binlog_offset();
}
if (recv_sys->found_corrupt_log) {
fprintf(stderr,
"InnoDB: WARNING: the log file may have been"
" corrupt and it\n"
"InnoDB: is possible that the log scan or parsing"
" did not proceed\n"
"InnoDB: far enough in recovery. Please run"
" CHECK TABLE\n"
"InnoDB: on your InnoDB tables to check that"
" they are ok!\n"
"InnoDB: It may be safest to recover your"
" InnoDB database from\n"
"InnoDB: a backup!\n");
}
/* Free the resources of the recovery system */
recv_recovery_on = FALSE;
#ifndef UNIV_LOG_DEBUG
recv_sys_free();
#endif
/* Drop partially created indexes. */
row_merge_drop_temp_indexes();
#ifdef UNIV_SYNC_DEBUG
/* Wait for a while so that created threads have time to suspend
themselves before we switch the latching order checks on */
os_thread_sleep(1000000);
/* Switch latching order checks on in sync0sync.c */
sync_order_checks_on = TRUE;
#endif
if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) {
/* Rollback the uncommitted transactions which have no user
session */
os_thread_create(trx_rollback_or_clean_all_recovered,
(void *)&i, NULL);
}
}
/**********************************************************
Resets the logs. The contents of log files will be lost! */
UNIV_INTERN
void
recv_reset_logs(
/*============*/
ib_uint64_t lsn, /* in: reset to this lsn
rounded up to be divisible by
OS_FILE_LOG_BLOCK_SIZE, after
which we add
LOG_BLOCK_HDR_SIZE */
#ifdef UNIV_LOG_ARCHIVE
ulint arch_log_no, /* in: next archived log file number */
#endif /* UNIV_LOG_ARCHIVE */
ibool new_logs_created)/* in: TRUE if resetting logs
is done at the log creation;
FALSE if it is done after
archive recovery */
{
log_group_t* group;
ut_ad(mutex_own(&(log_sys->mutex)));
log_sys->lsn = ut_uint64_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE);
group = UT_LIST_GET_FIRST(log_sys->log_groups);
while (group) {
group->lsn = log_sys->lsn;
group->lsn_offset = LOG_FILE_HDR_SIZE;
#ifdef UNIV_LOG_ARCHIVE
group->archived_file_no = arch_log_no;
group->archived_offset = 0;
#endif /* UNIV_LOG_ARCHIVE */
if (!new_logs_created) {
recv_truncate_group(group, group->lsn, group->lsn,
group->lsn, group->lsn);
}
group = UT_LIST_GET_NEXT(log_groups, group);
}
log_sys->buf_next_to_write = 0;
log_sys->written_to_some_lsn = log_sys->lsn;
log_sys->written_to_all_lsn = log_sys->lsn;
log_sys->next_checkpoint_no = 0;
log_sys->last_checkpoint_lsn = 0;
#ifdef UNIV_LOG_ARCHIVE
log_sys->archived_lsn = log_sys->lsn;
#endif /* UNIV_LOG_ARCHIVE */
log_block_init(log_sys->buf, log_sys->lsn);
log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
log_sys->lsn += LOG_BLOCK_HDR_SIZE;
mutex_exit(&(log_sys->mutex));
/* Reset the checkpoint fields in logs */
log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
mutex_enter(&(log_sys->mutex));
}
#endif /* !UNIV_HOTBACKUP */
#ifdef UNIV_HOTBACKUP
/**********************************************************
Creates new log files after a backup has been restored. */
UNIV_INTERN
void
recv_reset_log_files_for_backup(
/*============================*/
const char* log_dir, /* in: log file directory path */
ulint n_log_files, /* in: number of log files */
ulint log_file_size, /* in: log file size */
ib_uint64_t lsn) /* in: new start lsn, must be
divisible by OS_FILE_LOG_BLOCK_SIZE */
{
os_file_t log_file;
ibool success;
byte* buf;
ulint i;
ulint log_dir_len;
char name[5000];
static const char ib_logfile_basename[] = "ib_logfile";
log_dir_len = strlen(log_dir);
/* full path name of ib_logfile consists of log dir path + basename
+ number. This must fit in the name buffer.
*/
ut_a(log_dir_len + strlen(ib_logfile_basename) + 11 < sizeof(name));
buf = ut_malloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
memset(buf, '\0', LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
for (i = 0; i < n_log_files; i++) {
sprintf(name, "%s%s%lu", log_dir,
ib_logfile_basename, (ulong)i);
log_file = os_file_create_simple(name, OS_FILE_CREATE,
OS_FILE_READ_WRITE, &success);
if (!success) {
fprintf(stderr,
"InnoDB: Cannot create %s. Check that"
" the file does not exist yet.\n", name);
exit(1);
}
fprintf(stderr,
"Setting log file size to %lu %lu\n",
(ulong) ut_get_high32(log_file_size),
(ulong) log_file_size & 0xFFFFFFFFUL);
success = os_file_set_size(name, log_file,
log_file_size & 0xFFFFFFFFUL,
ut_get_high32(log_file_size));
if (!success) {
fprintf(stderr,
"InnoDB: Cannot set %s size to %lu %lu\n",
name, (ulong) ut_get_high32(log_file_size),
(ulong) (log_file_size & 0xFFFFFFFFUL));
exit(1);
}
os_file_flush(log_file);
os_file_close(log_file);
}
/* We pretend there is a checkpoint at lsn + LOG_BLOCK_HDR_SIZE */
log_reset_first_header_and_checkpoint(buf, lsn);
log_block_init_in_old_format(buf + LOG_FILE_HDR_SIZE, lsn);
log_block_set_first_rec_group(buf + LOG_FILE_HDR_SIZE,
LOG_BLOCK_HDR_SIZE);
sprintf(name, "%s%s%lu", log_dir, ib_logfile_basename, (ulong)0);
log_file = os_file_create_simple(name, OS_FILE_OPEN,
OS_FILE_READ_WRITE, &success);
if (!success) {
fprintf(stderr, "InnoDB: Cannot open %s.\n", name);
exit(1);
}
os_file_write(name, log_file, buf, 0, 0,
LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
os_file_flush(log_file);
os_file_close(log_file);
ut_free(buf);
}
#endif /* UNIV_HOTBACKUP */
#ifdef UNIV_LOG_ARCHIVE
/**********************************************************
Reads from the archive of a log group and performs recovery. */
static
ibool
log_group_recover_from_archive_file(
/*================================*/
/* out: TRUE if no more complete
consistent archive files */
log_group_t* group) /* in: log group */
{
os_file_t file_handle;
ib_uint64_t start_lsn;
ib_uint64_t file_end_lsn;
ib_uint64_t dummy_lsn;
ib_uint64_t scanned_lsn;
ulint len;
ibool ret;
byte* buf;
ulint read_offset;
ulint file_size;
ulint file_size_high;
int input_char;
char name[10000];
ut_a(0);
try_open_again:
buf = log_sys->buf;
/* Add the file to the archive file space; open the file */
log_archived_file_name_gen(name, group->id, group->archived_file_no);
file_handle = os_file_create(name, OS_FILE_OPEN,
OS_FILE_LOG, OS_FILE_AIO, &ret);
if (ret == FALSE) {
ask_again:
fprintf(stderr,
"InnoDB: Do you want to copy additional"
" archived log files\n"
"InnoDB: to the directory\n");
fprintf(stderr,
"InnoDB: or were these all the files needed"
" in recovery?\n");
fprintf(stderr,
"InnoDB: (Y == copy more files; N == this is all)?");
input_char = getchar();
if (input_char == (int) 'N') {
return(TRUE);
} else if (input_char == (int) 'Y') {
goto try_open_again;
} else {
goto ask_again;
}
}
ret = os_file_get_size(file_handle, &file_size, &file_size_high);
ut_a(ret);
ut_a(file_size_high == 0);
fprintf(stderr, "InnoDB: Opened archived log file %s\n", name);
ret = os_file_close(file_handle);
if (file_size < LOG_FILE_HDR_SIZE) {
fprintf(stderr,
"InnoDB: Archive file header incomplete %s\n", name);
return(TRUE);
}
ut_a(ret);
/* Add the archive file as a node to the space */
fil_node_create(name, 1 + file_size / UNIV_PAGE_SIZE,
group->archive_space_id, FALSE);
#if RECV_SCAN_SIZE < LOG_FILE_HDR_SIZE
# error "RECV_SCAN_SIZE < LOG_FILE_HDR_SIZE"
#endif
/* Read the archive file header */
fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, group->archive_space_id, 0, 0,
LOG_FILE_HDR_SIZE, buf, NULL);
/* Check if the archive file header is consistent */
if (mach_read_from_4(buf + LOG_GROUP_ID) != group->id
|| mach_read_from_4(buf + LOG_FILE_NO)
!= group->archived_file_no) {
fprintf(stderr,
"InnoDB: Archive file header inconsistent %s\n", name);
return(TRUE);
}
if (!mach_read_from_4(buf + LOG_FILE_ARCH_COMPLETED)) {
fprintf(stderr,
"InnoDB: Archive file not completely written %s\n",
name);
return(TRUE);
}
start_lsn = mach_read_ull(buf + LOG_FILE_START_LSN);
file_end_lsn = mach_read_ull(buf + LOG_FILE_END_LSN);
if (!recv_sys->scanned_lsn) {
if (recv_sys->parse_start_lsn < start_lsn) {
fprintf(stderr,
"InnoDB: Archive log file %s"
" starts from too big a lsn\n",
name);
return(TRUE);
}
recv_sys->scanned_lsn = start_lsn;
}
if (recv_sys->scanned_lsn != start_lsn) {
fprintf(stderr,
"InnoDB: Archive log file %s starts from"
" a wrong lsn\n",
name);
return(TRUE);
}
read_offset = LOG_FILE_HDR_SIZE;
for (;;) {
len = RECV_SCAN_SIZE;
if (read_offset + len > file_size) {
len = ut_calc_align_down(file_size - read_offset,
OS_FILE_LOG_BLOCK_SIZE);
}
if (len == 0) {
break;
}
#ifdef UNIV_DEBUG
if (log_debug_writes) {
fprintf(stderr,
"InnoDB: Archive read starting at"
" lsn %llu, len %lu from file %s\n",
start_lsn,
(ulong) len, name);
}
#endif /* UNIV_DEBUG */
fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE,
group->archive_space_id, read_offset / UNIV_PAGE_SIZE,
read_offset % UNIV_PAGE_SIZE, len, buf, NULL);
ret = recv_scan_log_recs(
(buf_pool->n_frames - recv_n_pool_free_frames)
* UNIV_PAGE_SIZE, TRUE, buf, len, start_lsn,
&dummy_lsn, &scanned_lsn);
if (scanned_lsn == file_end_lsn) {
return(FALSE);
}
if (ret) {
fprintf(stderr,
"InnoDB: Archive log file %s"
" does not scan right\n",
name);
return(TRUE);
}
read_offset += len;
start_lsn += len;
ut_ad(start_lsn == scanned_lsn);
}
return(FALSE);
}
/************************************************************
Recovers from archived log files, and also from log files, if they exist. */
UNIV_INTERN
ulint
recv_recovery_from_archive_start(
/*=============================*/
/* out: error code or DB_SUCCESS */
ib_uint64_t min_flushed_lsn,/* in: min flushed lsn field from the
data files */
ib_uint64_t limit_lsn, /* in: recover up to this lsn if
possible */
ulint first_log_no) /* in: number of the first archived
log file to use in the recovery; the
file will be searched from
INNOBASE_LOG_ARCH_DIR specified in
server config file */
{
log_group_t* group;
ulint group_id;
ulint trunc_len;
ibool ret;
ulint err;
ut_a(0);
recv_sys_create();
recv_sys_init(buf_pool_get_curr_size());
recv_recovery_on = TRUE;
recv_recovery_from_backup_on = TRUE;
recv_sys->limit_lsn = limit_lsn;
group_id = 0;
group = UT_LIST_GET_FIRST(log_sys->log_groups);
while (group) {
if (group->id == group_id) {
break;
}
group = UT_LIST_GET_NEXT(log_groups, group);
}
if (!group) {
fprintf(stderr,
"InnoDB: There is no log group defined with id %lu!\n",
(ulong) group_id);
return(DB_ERROR);
}
group->archived_file_no = first_log_no;
recv_sys->parse_start_lsn = min_flushed_lsn;
recv_sys->scanned_lsn = 0;
recv_sys->scanned_checkpoint_no = 0;
recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
recv_sys->archive_group = group;
ret = FALSE;
mutex_enter(&(log_sys->mutex));
while (!ret) {
ret = log_group_recover_from_archive_file(group);
/* Close and truncate a possible processed archive file
from the file space */
trunc_len = UNIV_PAGE_SIZE
* fil_space_get_size(group->archive_space_id);
if (trunc_len > 0) {
fil_space_truncate_start(group->archive_space_id,
trunc_len);
}
group->archived_file_no++;
}
if (recv_sys->recovered_lsn < limit_lsn) {
if (!recv_sys->scanned_lsn) {
recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
}
mutex_exit(&(log_sys->mutex));
err = recv_recovery_from_checkpoint_start(LOG_ARCHIVE,
limit_lsn,
IB_ULONGLONG_MAX,
IB_ULONGLONG_MAX);
if (err != DB_SUCCESS) {
return(err);
}
mutex_enter(&(log_sys->mutex));
}
if (limit_lsn != IB_ULONGLONG_MAX) {
recv_apply_hashed_log_recs(FALSE);
recv_reset_logs(recv_sys->recovered_lsn, 0, FALSE);
}
mutex_exit(&(log_sys->mutex));
return(DB_SUCCESS);
}
/************************************************************
Completes recovery from archive. */
UNIV_INTERN
void
recv_recovery_from_archive_finish(void)
/*===================================*/
{
recv_recovery_from_checkpoint_finish();
recv_recovery_from_backup_on = FALSE;
}
#endif /* UNIV_LOG_ARCHIVE */