mariadb/buf/buf0buf.c
marko e76b873f24 branches/innodb+: Merge revisions 4660:5090 from branches/zip:
------------------------------------------------------------------------
  r4670 | vasil | 2009-04-07 09:35:23 +0300 (Tue, 07 Apr 2009) | 11 lines

  branches/zip:

  Fix Bug#43660 SHOW INDEXES/ANALYZE does NOT update cardinality for
  indexes of InnoDB table

  by replacing the pseudo random number generator with a better one (LCG).

  This also fixes Mantis Issue#212.

  Approved by:	Heikki (rb://110)
  ------------------------------------------------------------------------
  r4671 | vasil | 2009-04-07 09:37:31 +0300 (Tue, 07 Apr 2009) | 4 lines

  branches/zip:

  Add ChangeLog entry for r4670.
  ------------------------------------------------------------------------
  r4673 | marko | 2009-04-07 15:45:28 +0300 (Tue, 07 Apr 2009) | 4 lines

  branches/zip: Allow in-place updates of UTF-8 CHAR columns
  from or to NULL in ROW_FORMAT=REDUNDANT. (Bug #44032)

  rb://107 approved by Heikki Tuuri.
  ------------------------------------------------------------------------
  r4677 | marko | 2009-04-07 16:19:31 +0300 (Tue, 07 Apr 2009) | 1 line

  branches/zip: Adjust r4673 as in the merge to branches/6.0 -r4676.
  ------------------------------------------------------------------------
  r4678 | inaam | 2009-04-07 18:45:37 +0300 (Tue, 07 Apr 2009) | 12 lines

  branches/zip

  Enable atomics on solaris (using the libc functions as defined in
  atomic.h) if GCC atomic builtins are not present.

  There still remains some work to be done (by Vasil?). This patch
  makes changes to plug.in to check pthread_t size and presence of
  atomic functions when running on solaris. The same has to become
  a part of the generated Makefile.in when we bake our source.

  Reviewed by: Heikki rb://106
  ------------------------------------------------------------------------
  r4687 | vasil | 2009-04-08 13:08:59 +0300 (Wed, 08 Apr 2009) | 4 lines

  branches/zip:

  Whitespace fixup in the ChangeLog
  ------------------------------------------------------------------------
  r4688 | vasil | 2009-04-08 13:11:15 +0300 (Wed, 08 Apr 2009) | 4 lines

  branches/zip:

  Add ChangeLog entry for r4678.
  ------------------------------------------------------------------------
  r4689 | marko | 2009-04-08 14:24:49 +0300 (Wed, 08 Apr 2009) | 5 lines

  branches/zip: Hide unnecessarily visible globals.
  dict_ind_redundant, dict_ind_compact: Declare these UNIV_INTERN.
  innodb_hton_ptr: Declare static.  We do not attempt to access the
  built-in InnoDB any more.
  trx_roll_savepoints_free(): Declare UNIV_INTERN.
  ------------------------------------------------------------------------
  r4700 | calvin | 2009-04-11 00:37:10 +0300 (Sat, 11 Apr 2009) | 9 lines

  branches/zip: Rewrite CMakeLists.txt

  CMakeLists.txt is completely rewritten:
  - To replace the one written by mysql
  - Print out some useful information, such as
    system name, directory, generator used,
    win64, Microsoft compiler, etc.
  - Remove one workaround for mysqld.lib location.
    User does not need to specify a build type
  ------------------------------------------------------------------------
  r4702 | calvin | 2009-04-13 18:16:44 +0300 (Mon, 13 Apr 2009) | 3 lines

  branches/zip: delete the original CMakeLists.txt

  A new version will be committed, suggested by Ken.
  ------------------------------------------------------------------------
  r4703 | calvin | 2009-04-13 18:20:45 +0300 (Mon, 13 Apr 2009) | 9 lines

  branches/zip: new CMakeLists.txt

  CMakeLists.txt is completely rewritten with enhancements:
  - Print out useful information, such as
    system name, directory, generator used,
    win64, Microsoft compiler, etc.
  - Remove one workaround for mysqld.lib location.
    User does not need to specify a build type
    when invoking MSVC generator.
  ------------------------------------------------------------------------
  r4706 | vasil | 2009-04-14 14:32:11 +0300 (Tue, 14 Apr 2009) | 5 lines

  branches/zip:

  When using the random function, first take the modulus by the number of pages
  and then typecast to ulint.
  ------------------------------------------------------------------------
  r4707 | calvin | 2009-04-14 17:47:31 +0300 (Tue, 14 Apr 2009) | 13 lines

  branches/zip: remove statically linked libraries from mysql

  To make zlib and strings dynamically linked; mysqld will export
  additional functions required by InnoDB.

  Since the symbols will be resolved dynamically during runtime,
  wdl_load_mapfile() is no longer able to make any function calls
  to ones in mysqld. As the result, strtoull() (from strings.lib)
  is replaced with _strtoui64().

  rb://111

  Approved by: Marko
  ------------------------------------------------------------------------
  r4712 | vasil | 2009-04-15 12:26:32 +0300 (Wed, 15 Apr 2009) | 157 lines

  branches/zip: Merge revisions 4481:4710 from branches/5.1:

  (resolving conflict in r4574, r4575 and skipping r4699 and r4705 because
  analogous changes to r4699 and r4705 were already made to branches/zip)

    ------------------------------------------------------------------------
    r4573 | vasil | 2009-03-30 14:17:13 +0300 (Mon, 30 Mar 2009) | 4 lines
    Changed paths:
       M /branches/5.1/mysql-test/innodb.test

    branches/5.1:

    Fix email address from dev@innodb.com to innodb_dev_ww@oracle.com

    ------------------------------------------------------------------------
    r4574 | vasil | 2009-03-30 14:27:08 +0300 (Mon, 30 Mar 2009) | 38 lines
    Changed paths:
       M /branches/5.1/Makefile.am
       M /branches/5.1/mysql-test/innodb.test

    branches/5.1:

    Restore the state of INNODB_THREAD_CONCURRENCY to silence this warning:

      TEST                                      RESULT   TIME (ms)
      ------------------------------------------------------------

      worker[1] Using MTR_BUILD_THREAD 250, with reserved ports 12500..12509
      main.innodb                              [ pass ]   8803

      MTR's internal check of the test case 'main.innodb' failed.
      This means that the test case does not preserve the state that existed
      before the test case was executed.  Most likely the test case did not
      do a proper clean-up.
      This is the diff of the states of the servers before and after the
      test case was executed:
      mysqltest: Logging to '/tmp/autotest.sh-20090330_033000-5.1.5Hg8CY/mysql-5.1/mysql-test/var/tmp/check-mysqld_1.log'.
      mysqltest: Results saved in '/tmp/autotest.sh-20090330_033000-5.1.5Hg8CY/mysql-5.1/mysql-test/var/tmp/check-mysqld_1.result'.
      mysqltest: Connecting to server localhost:12500 (socket /tmp/autotest.sh-20090330_033000-5.1.5Hg8CY/mysql-5.1/mysql-test/var/tmp/mysqld.1.sock) as 'root', connection 'default', attempt 0 ...
      mysqltest: ... Connected.
      mysqltest: Start processing test commands from './include/check-testcase.test' ...
      mysqltest: ... Done processing test commands.
      --- /tmp/autotest.sh-20090330_033000-5.1.5Hg8CY/mysql-5.1/mysql-test/var/tmp/check-mysqld_1.result	2009-03-30 14:12:31.000000000 +0300
      +++ /tmp/autotest.sh-20090330_033000-5.1.5Hg8CY/mysql-5.1/mysql-test/var/tmp/check-mysqld_1.reject	2009-03-30 14:12:41.000000000 +0300
      @@ -99,7 +99,7 @@
       INNODB_SUPPORT_XA	ON
       INNODB_SYNC_SPIN_LOOPS	20
       INNODB_TABLE_LOCKS	ON
      -INNODB_THREAD_CONCURRENCY	8
      +INNODB_THREAD_CONCURRENCY	16
       INNODB_THREAD_SLEEP_DELAY	10000
       INSERT_ID	0
       INTERACTIVE_TIMEOUT	28800

      mysqltest: Result content mismatch

      not ok
    ------------------------------------------------------------------------
    r4575 | vasil | 2009-03-30 15:55:31 +0300 (Mon, 30 Mar 2009) | 8 lines
    Changed paths:
       M /branches/5.1/mysql-test/innodb.result
       M /branches/5.1/mysql-test/innodb.test

    branches/5.1:

    Fix Bug#43309 Test main.innodb can't be run twice

    Make the innodb mysql-test more flexible by inspecting how much a
    variable of interest has changed since the start of the test. Do not
    assume the variables have zero values at the start of the test.
    ------------------------------------------------------------------------
    r4576 | vasil | 2009-03-30 16:25:10 +0300 (Mon, 30 Mar 2009) | 4 lines
    Changed paths:
       M /branches/5.1/Makefile.am

    branches/5.1:

    Revert a change to Makefile.am that I committed accidentally in c4574.
    ------------------------------------------------------------------------
    r4659 | vasil | 2009-04-06 15:34:51 +0300 (Mon, 06 Apr 2009) | 6 lines
    Changed paths:
       M /branches/5.1/mysql-test/innodb.test

    branches/5.1:

    Followup to r4575 and the fix of Bug#43309 Test main.innodb can't be run twice:

    Add an explanatory comment, as suggested by Patrick Crews in the bug report.
    ------------------------------------------------------------------------
    r4699 | vasil | 2009-04-09 14:01:52 +0300 (Thu, 09 Apr 2009) | 15 lines
    Changed paths:
       M /branches/5.1/handler/ha_innodb.cc
       M /branches/5.1/include/srv0srv.h
       M /branches/5.1/page/page0cur.c
       M /branches/5.1/srv/srv0srv.c

    branches/5.1:

    Fix Bug#43660 SHOW INDEXES/ANALYZE does NOT update cardinality for indexes
    of InnoDB table

    by replacing the PRNG that is used to pick random pages with a better
    one.

    This is based on r4670 but also adds a new configuration option and
    enables the fix only if this option is changed. Please skip the present
    revision when merging.

    Approved by:	Heikki (via email)
    ------------------------------------------------------------------------
    r4705 | vasil | 2009-04-14 14:30:13 +0300 (Tue, 14 Apr 2009) | 5 lines
    Changed paths:
       M /branches/5.1/page/page0cur.c

    branches/5.1:

    When using the random function, first take the modulus by the number of pages
    and then typecast to ulint.

    ------------------------------------------------------------------------
    r4710 | vasil | 2009-04-15 11:55:18 +0300 (Wed, 15 Apr 2009) | 25 lines
    Changed paths:
       M /branches/5.1/handler/ha_innodb.cc

    branches/5.1:

    Merge a change from MySQL (looks like this is against 5.0 but they later
    merged it to 5.1):

      ------------------------------------------------------------
      revno: 1810.3846.1
      committer: Alexey Botchkov <holyfoot@mysql.com>
      branch nick: 31435
      timestamp: Tue 2008-11-11 14:42:32 +0400
      message:
        Bug#31435 ha_innodb.cc:3983: ulint convert_search_mode_to_innobase(ha_rkey_function): Asse 
            I think we don't need to issue an error statement in the convert_search_mode_to_innobase().
            Returning the PAGE_CUR_UNSUPP value is enough as allows to handle this
            case depending on the requirements.

        per-file comments:
          sql/ha_innodb.cc 
        Bug#31435 ha_innodb.cc:3983: ulint convert_search_mode_to_innobase(ha_rkey_function): Asse 
             no error issued in convert_search_mode_to_innobase.
             ha_innobase::records_in_range() returns HA_POS_ERROR if search mode isn't supported.
      modified:
        sql/ha_innodb.cc
    ------------------------------------------------------------------------
  ------------------------------------------------------------------------
  r4713 | vasil | 2009-04-15 12:36:16 +0300 (Wed, 15 Apr 2009) | 4 lines

  branches/zip:

  Add missing ChangeLog entries
  ------------------------------------------------------------------------
  r4714 | vasil | 2009-04-15 12:36:57 +0300 (Wed, 15 Apr 2009) | 4 lines

  branches/zip:

  Fix typo in the ChangeLog
  ------------------------------------------------------------------------
  r4715 | vasil | 2009-04-15 12:39:04 +0300 (Wed, 15 Apr 2009) | 4 lines

  branches/zip:

  Whitespace cleanup in ChangeLog
  ------------------------------------------------------------------------
  r4716 | vasil | 2009-04-15 21:36:06 +0300 (Wed, 15 Apr 2009) | 4 lines

  branches/zip:

  Add ChangeLog entry for r4543.
  ------------------------------------------------------------------------
  r4717 | calvin | 2009-04-16 01:22:35 +0300 (Thu, 16 Apr 2009) | 18 lines

  branches/zip: Use the Windows Interlocked functions for atomic memory
  access

  Mapping the atomic operations to Windows Interlocked functions:

  os_compare_and_swap_* to InterlockedCompareExchange(64)
  os_atomic_increment_* to InterlockedExchangeAdd(64)
  os_atomic_test_and_set_byte to InterlockedExchange

  In this patch, the legacy code under UNIV_CAN_USE_X86_ASSEMBLER is
  removed all together, and add HAVE_WINDOWS_ATOMICS and
  INNODB_RW_LOCKS_USE_ATOMICS to CMakeLists.txt

  This is to address mantis issue#194.

  rb://113

  Approved by: Marko
  ------------------------------------------------------------------------
  r4720 | vasil | 2009-04-16 09:44:48 +0300 (Thu, 16 Apr 2009) | 4 lines

  branches/zip:

  Add ChangeLog entry for r4717.
  ------------------------------------------------------------------------
  r4721 | marko | 2009-04-16 10:32:09 +0300 (Thu, 16 Apr 2009) | 2 lines

  branches/zip: row_scan_and_check_index(): Initialize prebuilt->index_usable.
  This should have been done in r4631.  Spotted by Michael.
  ------------------------------------------------------------------------
  r4728 | marko | 2009-04-16 16:02:27 +0300 (Thu, 16 Apr 2009) | 3 lines

  branches/zip: univ.i: Define REFMAN as the base URL of the
  MySQL Reference Manual and use it in every string.
  This fixes Issue #221.
  ------------------------------------------------------------------------
  r4733 | calvin | 2009-04-17 08:13:20 +0300 (Fri, 17 Apr 2009) | 6 lines

  branches/zip: minor changes to CMakeLists.txt

  All are non-functional changes:
  - should check for long (not int), spotted by Sunny
  - comment out the project definition, avoiding to generate another
    .sln file.
  ------------------------------------------------------------------------
  r4748 | vasil | 2009-04-18 00:50:09 +0300 (Sat, 18 Apr 2009) | 118 lines

  branches/zip: Merge revisions 4710:4746 from branches/5.1:

    ------------------------------------------------------------------------
    r4746 | vasil | 2009-04-18 00:32:08 +0300 (Sat, 18 Apr 2009) | 110 lines
    Changed paths:
       M /branches/5.1/handler/ha_innodb.cc
       M /branches/5.1/include/pars0pars.h

    branches/5.1:

    Merge a change from MySQL:

      ------------------------------------------------------------
      revno: 2728.10.2
      committer: Ignacio Galarza <iggy@mysql.com>
      branch nick: mysql-5.1-bugteam-bug29125
      timestamp: Fri 2009-02-13 11:41:47 -0500
      message:
        Bug#29125 Windows Server X64: so many compiler warnings
        - Remove bothersome warning messages.  This change focuses on the warnings 
        that are covered by the ignore file: support-files/compiler_warnings.supp.
        - Strings are guaranteed to be max uint in length
      modified:
        client/mysql_upgrade.c
        client/mysqladmin.cc
        client/mysqlbinlog.cc
        client/mysqlcheck.c
        client/mysqldump.c
        client/mysqlslap.c
        client/mysqltest.cc
        client/sql_string.cc
        extra/comp_err.c
        extra/yassl/src/buffer.cpp
        extra/yassl/taocrypt/include/block.hpp
        extra/yassl/taocrypt/src/algebra.cpp
        extra/yassl/taocrypt/src/asn.cpp
        include/config-win.h
        libmysql/libmysql.c
        mysys/array.c
        mysys/base64.c
        mysys/charset.c
        mysys/checksum.c
        mysys/default.c
        mysys/default_modify.c
        mysys/hash.c
        mysys/mf_keycache.c
        mysys/mf_tempdir.c
        mysys/my_append.c
        mysys/my_compress.c
        mysys/my_conio.c
        mysys/my_copy.c
        mysys/my_getwd.c
        mysys/my_pread.c
        mysys/my_quick.c
        mysys/my_read.c
        mysys/safemalloc.c
        mysys/string.c
        server-tools/instance-manager/buffer.cc
        server-tools/instance-manager/instance.cc
        server-tools/instance-manager/options.cc
        server-tools/instance-manager/parse.h
        sql-common/client.c
        sql-common/my_user.c
        sql/event_data_objects.cc
        sql/event_parse_data.cc
        sql/events.cc
        sql/gen_lex_hash.cc
        sql/item.h
        sql/item_func.cc
        sql/item_strfunc.cc
        sql/item_timefunc.cc
        sql/lock.cc
        sql/log_event.cc
        sql/log_event.h
        sql/log_event_old.cc
        sql/net_serv.cc
        sql/sp_head.h
        sql/spatial.h
        sql/sql_class.h
        sql/sql_connect.cc
        sql/sql_crypt.cc
        sql/sql_error.cc
        sql/sql_insert.cc
        sql/sql_lex.cc
        sql/sql_lex.h
        sql/sql_load.cc
        sql/sql_prepare.cc
        sql/sql_profile.cc
        sql/sql_repl.cc
        sql/sql_servers.cc
        sql/sql_string.cc
        sql/sql_table.cc
        sql/sql_trigger.cc
        sql/sql_udf.cc
        sql/sql_view.cc
        sql/udf_example.c
        sql/uniques.cc
        storage/archive/azio.c
        storage/archive/azlib.h
        storage/csv/ha_tina.cc
        storage/csv/ha_tina.h
        storage/csv/transparent_file.h
        storage/federated/ha_federated.cc
        storage/federated/ha_federated.h
        storage/heap/hp_write.c
        storage/innobase/handler/ha_innodb.cc
        storage/innobase/include/pars0pars.h
        storage/myisam/ha_myisam.cc
        storage/myisam/mi_check.c
        storage/myisam/mi_packrec.c
        storage/myisam/mi_search.c
        storage/myisam/rt_index.c
        storage/myisammrg/ha_myisammrg.cc
        strings/ctype.c
        strings/my_vsnprintf.c
        tests/bug25714.c
        tests/mysql_client_test.c
  ------------------------------------------------------------------------
  r4749 | vasil | 2009-04-18 00:58:08 +0300 (Sat, 18 Apr 2009) | 4 lines

  branches/zip:

  Add ChangeLog entry for t4748.
  ------------------------------------------------------------------------
  r4751 | vasil | 2009-04-18 01:29:16 +0300 (Sat, 18 Apr 2009) | 4 lines

  branches/zip:

  Silence warning about unused variables.
  ------------------------------------------------------------------------
  r4752 | vasil | 2009-04-18 01:30:37 +0300 (Sat, 18 Apr 2009) | 4 lines

  branches/zip:

  Include the needed header for memset().
  ------------------------------------------------------------------------
  r4753 | vasil | 2009-04-18 01:31:34 +0300 (Sat, 18 Apr 2009) | 4 lines

  branches/zip:

  Silence a compiler warning.
  ------------------------------------------------------------------------
  r4756 | vasil | 2009-04-18 02:19:03 +0300 (Sat, 18 Apr 2009) | 5 lines

  branches/zip:

  Rename the aux config program and give it a more specific name because
  more are coming.
  ------------------------------------------------------------------------
  r4757 | vasil | 2009-04-18 02:22:33 +0300 (Sat, 18 Apr 2009) | 4 lines

  branches/zip:

  Add comment and copyright notice to the aux config program.
  ------------------------------------------------------------------------
  r4758 | vasil | 2009-04-18 02:40:47 +0300 (Sat, 18 Apr 2009) | 5 lines

  branches/zip:

  Add aux config programs to emulate the newly added checks in plug.in
  (from r4678).
  ------------------------------------------------------------------------
  r4830 | marko | 2009-04-20 16:11:38 +0300 (Mon, 20 Apr 2009) | 6 lines

  branches/zip: Cosmetic fixes.

  row_unlock_for_mysql(): Add a const qualifier to read-only rec_t*.
  Use dict_index_is_clust().

  CMakeLists.txt: svn propset svn:eol-style native.
  ------------------------------------------------------------------------
  r4893 | marko | 2009-04-23 09:32:36 +0300 (Thu, 23 Apr 2009) | 11 lines

  branches/zip: Introduce the logical type names trx_id_t, roll_ptr_t,
  and undo_no_t. Each type is still defined as dulint.

  This is an initial step towards replacing dulint with a 64-bit data type.
  Because modern compilers have no trouble supporting 64-bit arithmetics
  even on 32-bit targets, the dulint struct is a relic that should go.

  The last remaining major use of dulint is dictionary IDs
  (table, index, and row ids).

  rb://114 approved by Sunny Bains
  ------------------------------------------------------------------------
  r4894 | marko | 2009-04-23 10:21:07 +0300 (Thu, 23 Apr 2009) | 1 line

  branches/zip: ChangeLog: Document r4893.
  ------------------------------------------------------------------------
  r4895 | marko | 2009-04-23 10:22:06 +0300 (Thu, 23 Apr 2009) | 1 line

  branches/zip: ChangeLog: Add the missing include/ to two files.
  ------------------------------------------------------------------------
  r4896 | marko | 2009-04-23 10:37:40 +0300 (Thu, 23 Apr 2009) | 4 lines

  branches/zip: row_scan_and_check_index(): Improve the diagnostics, by reporting
  errors from row_search_for_mysql() in the error log.
  The errors will still be ignored by CHECK TABLE.
  This is somewhat related to Issue #211.
  ------------------------------------------------------------------------
  r4897 | marko | 2009-04-23 10:40:34 +0300 (Thu, 23 Apr 2009) | 2 lines

  branches/zip: row_scan_and_check_index(): Check
  row_merge_is_index_usable() earlier, to make the logic clearer.
  ------------------------------------------------------------------------
  r4898 | marko | 2009-04-23 15:15:07 +0300 (Thu, 23 Apr 2009) | 4 lines

  branches/zip: Correct a misleading comment.  PAGE_MAX_TRX_ID
  will be updated in ibuf_insert_low() and updated from the
  insert buffer tree page to the secondary index tree page
  during the insert buffer merge.
  ------------------------------------------------------------------------
  r4915 | marko | 2009-04-27 13:40:20 +0300 (Mon, 27 Apr 2009) | 2 lines

  branches/zip: row_scan_and_check_index(): Add some comments on
  prebuilt->index_usable, as suggested by Michael.
  ------------------------------------------------------------------------
  r4921 | marko | 2009-04-29 11:51:25 +0300 (Wed, 29 Apr 2009) | 2 lines

  branches/zip: btr_cur_optimistic_insert(): Remove a redundant condition.
  The insert buffer tree is a clustered index.
  ------------------------------------------------------------------------
  r4922 | marko | 2009-04-29 23:23:27 +0300 (Wed, 29 Apr 2009) | 22 lines

  branches/zip: Distinguish temporary tables in MLOG_FILE_CREATE.
  This addresses Mantis Issue #23 in InnoDB Hot Backup and some
  of MySQL Bug #41609.

  In MLOG_FILE_CREATE, we need to distinguish temporary tables, so that
  InnoDB Hot Backup can work correctly.  It turns out that we can do this
  easily, by using a bit of the previously unused parameter for page number.
  (The page number parameter of MLOG_FILE_CREATE has been written as 0 
  ever since MySQL 4.1, which introduced MLOG_FILE_CREATE.)

  MLOG_FILE_FLAG_TEMP: A flag for indicating a temporary table in
  the page number parameter of MLOG_FILE_ operations.

  fil_op_write_log(): Add the parameter log_flags.

  fil_op_log_parse_or_replay(): Add the parameter log_flags.
  Do not replay MLOG_FILE_CREATE when MLOG_FILE_FLAG_TEMP is set in log_flags.
  This only affects ibbackup --apply-log.  InnoDB itself never replays file
  operations.

  rb://117 approved by Heikki Tuuri
  ------------------------------------------------------------------------
  r4977 | marko | 2009-05-13 15:49:38 +0300 (Wed, 13 May 2009) | 12 lines

  branches/zip: Merge revisions 4746:4976 from branches/5.1:

    ------------------------------------------------------------------------
    r4976 | marko | 2009-05-13 15:44:54 +0300 (Wed, 13 May 2009) | 6 lines

    branches/5.1: Display DB_ROLL_PTR in the COLUMNS section of the
    innodb_table_monitor output.  It was accidentally omitted due to an
    off-by-one loop condition.  (Bug #44320)

    rb://116 approved by Heikki Tuuri
    ------------------------------------------------------------------------
  ------------------------------------------------------------------------
  r4978 | vasil | 2009-05-13 16:21:55 +0300 (Wed, 13 May 2009) | 4 lines

  branches/zip:

  Add ChangeLog entry for r4977.

  ------------------------------------------------------------------------
  r4995 | marko | 2009-05-14 15:31:43 +0300 (Thu, 14 May 2009) | 24 lines

  branches/zip: Merge revisions 4976:4994 from branches/5.1:

    ------------------------------------------------------------------------
    r4994 | marko | 2009-05-14 15:04:55 +0300 (Thu, 14 May 2009) | 18 lines

    branches/5.1: Prevent a race condition in innobase_commit() by ensuring
    that innodb_commit_concurrency>0 remains constant at run time. (Bug #42101)

    srv_commit_concurrency: Make this a static variable in ha_innodb.cc.

    innobase_commit_concurrency_validate(): Check that innodb_commit_concurrency
    is not changed from or to 0 at run time.  This is needed, because
    innobase_commit() assumes that innodb_commit_concurrency>0 remains constant.
    Without this limitation, the checks for innodb_commit_concurrency>0
    in innobase_commit() should be removed and that function would have to
    acquire and release commit_cond_m at least twice per invocation.
    Normally, innodb_commit_concurrency=0, and introducing the mutex operations
    would mean significant overhead.

    innodb_bug42101.test, innodb_bug42101-nonzero.test: Test cases.

    rb://123 approved by Heikki Tuuri
    ------------------------------------------------------------------------
  ------------------------------------------------------------------------
  r5000 | vasil | 2009-05-14 20:13:41 +0300 (Thu, 14 May 2009) | 4 lines

  branches/zip:

  Add ChangeLog entry for r4994.
  ------------------------------------------------------------------------
  r5026 | marko | 2009-05-18 16:29:51 +0300 (Mon, 18 May 2009) | 1 line

  branches/zip: buf_validate(): Add missing out: comment.
  ------------------------------------------------------------------------
  r5027 | marko | 2009-05-18 16:36:10 +0300 (Mon, 18 May 2009) | 1 line

  branches/zip: Add some missing out: comments to buf0buf.h, buf0buf.c.
  ------------------------------------------------------------------------
  r5028 | marko | 2009-05-18 16:40:07 +0300 (Mon, 18 May 2009) | 11 lines

  branches/zip: When executing an optimistic update by delete-and-insert,
  correctly estimate the free space on the compressed page by
  page_zip_available(..., create=TRUE). This was reported as Issue #231.

  btr_cur_update_alloc_zip(): Add the parameter ibool create and pass it
  to page_zip_available(). The parameter was previously passed as 0.

  btr_cur_optimistic_update(): Pass create=TRUE to btr_cur_update_alloc_zip().

  rb://120 approved by Heikki Tuuri
  ------------------------------------------------------------------------
  r5030 | marko | 2009-05-19 10:04:04 +0300 (Tue, 19 May 2009) | 2 lines

  branches/zip: os_thread_get_curr_id(), os_thread_get_curr():
  Add missing out: comments.
  ------------------------------------------------------------------------
  r5031 | marko | 2009-05-19 10:30:02 +0300 (Tue, 19 May 2009) | 1 line

  branches/zip: Add missing out: comments to nullary functions.
  ------------------------------------------------------------------------
  r5033 | marko | 2009-05-19 11:00:51 +0300 (Tue, 19 May 2009) | 1 line

  branches/zip: Remove bogus out: comments of functions returning void.
  ------------------------------------------------------------------------
  r5034 | marko | 2009-05-19 12:41:32 +0300 (Tue, 19 May 2009) | 1 line

  branches/zip: row_update_prebuilt_trx(): Correct bogus comment.
  ------------------------------------------------------------------------
  r5035 | marko | 2009-05-19 13:04:58 +0300 (Tue, 19 May 2009) | 3 lines

  branches/zip: ut0auxconf_have_solaris_atomics.c: Get the
  function declarations from <atomic.h>.
  Call the functions with proper arguments.
  ------------------------------------------------------------------------
  r5036 | marko | 2009-05-19 13:05:50 +0300 (Tue, 19 May 2009) | 1 line

  branches/zip: Add proper comments to some file page accessors.
  ------------------------------------------------------------------------
  r5037 | marko | 2009-05-19 13:08:16 +0300 (Tue, 19 May 2009) | 1 line

  branches/zip: Fix a typo that was introduced in r5036.
  ------------------------------------------------------------------------
  r5038 | marko | 2009-05-19 22:59:07 +0300 (Tue, 19 May 2009) | 30 lines

  branches/zip: Write PAGE_MAX_TRX_ID to the redo log. Otherwise,
  transactions that are started before the rollback of incomplete
  transactions has finished may have an inconsistent view of the
  secondary indexes.

  dict_index_is_sec_or_ibuf(): Auxiliary function for controlling
  updates and checks of PAGE_MAX_TRX_ID: check whether an index is a
  secondary index or the insert buffer tree.

  page_set_max_trx_id(), page_update_max_trx_id(),
  lock_rec_insert_check_and_lock(),
  lock_sec_rec_modify_check_and_lock(), btr_cur_ins_lock_and_undo(),
  btr_cur_upd_lock_and_undo(): Add the parameter mtr.

  page_set_max_trx_id(): Allow mtr to be NULL.  When mtr==NULL, do not
  attempt to write to the redo log.  This only occurs when creating a
  page or reorganizing a compressed page.  In these cases, the
  PAGE_MAX_TRX_ID will be set correctly during the application of redo
  log records, even though there is no explicit log record about it.

  btr_discard_only_page_on_level(): Preserve PAGE_MAX_TRX_ID.  This
  function should be unreachable, though.

  btr_cur_pessimistic_update(): Update PAGE_MAX_TRX_ID.

  Add some assertions for checking that PAGE_MAX_TRX_ID is set on all
  secondary index leaf pages.

  rb://115 tested by Michael, fixes Issue #211
  ------------------------------------------------------------------------
  r5039 | marko | 2009-05-19 23:13:12 +0300 (Tue, 19 May 2009) | 1 line

  branches/zip: ib_wqueue_wait(): Add decorative comment.
  ------------------------------------------------------------------------
  r5041 | marko | 2009-05-20 08:42:12 +0300 (Wed, 20 May 2009) | 1 line

  branches/zip: Add missing function comments.
  ------------------------------------------------------------------------
  r5042 | marko | 2009-05-20 08:46:01 +0300 (Wed, 20 May 2009) | 1 line

  branches/zip: sync0rw.ic: Remove an extra ; that was added in r5041.
  ------------------------------------------------------------------------
  r5044 | marko | 2009-05-20 11:11:58 +0300 (Wed, 20 May 2009) | 2 lines

  branches/zip: mlog_parse_index(): Correct a parameter comment
  and add a const qualifier that was missing.
  ------------------------------------------------------------------------
  r5045 | marko | 2009-05-20 11:37:08 +0300 (Wed, 20 May 2009) | 1 line

  branches/zip: fil0fil.c: Correct some comments.
  ------------------------------------------------------------------------
  r5046 | marko | 2009-05-20 12:19:40 +0300 (Wed, 20 May 2009) | 1 line

  branches/zip: Fix some function comments.
  ------------------------------------------------------------------------
  r5047 | marko | 2009-05-20 12:26:49 +0300 (Wed, 20 May 2009) | 1 line

  branches/zip: ut_snprintf(): Fix the function comments.
  ------------------------------------------------------------------------
  r5048 | marko | 2009-05-20 12:28:44 +0300 (Wed, 20 May 2009) | 3 lines

  branches/zip: inno_bcmp(): Remove this memcmp replacement.
  srv0start.c does not (any longer) call memcmp.
  srv_parse_megabytes(): Add a function comment.
  ------------------------------------------------------------------------
  r5052 | marko | 2009-05-20 12:32:37 +0300 (Wed, 20 May 2009) | 1 line

  branches/zip: ib_vector_is_empty(): Fix the function comment.
  ------------------------------------------------------------------------
  r5054 | marko | 2009-05-20 12:35:33 +0300 (Wed, 20 May 2009) | 1 line

  branches/zip: page_cur_lcg_prng(): Add missing parameter list.
  ------------------------------------------------------------------------
  r5057 | marko | 2009-05-20 12:45:17 +0300 (Wed, 20 May 2009) | 1 line

  branches/zip: Remove bogus in: comments from struct members.
  ------------------------------------------------------------------------
  r5058 | marko | 2009-05-20 13:06:03 +0300 (Wed, 20 May 2009) | 1 line

  branches/zip: Clean up some function comments.
  ------------------------------------------------------------------------
  r5060 | marko | 2009-05-20 14:06:59 +0300 (Wed, 20 May 2009) | 1 line

  branches/zip: Clean up some comments.
  ------------------------------------------------------------------------
  r5061 | marko | 2009-05-20 14:07:49 +0300 (Wed, 20 May 2009) | 2 lines

  branches/zip: innodb_export_status(): Remove the return(0),
  now that the function was declared void in r5060.
  ------------------------------------------------------------------------
  r5062 | marko | 2009-05-20 14:45:03 +0300 (Wed, 20 May 2009) | 1 line

  branches/zip: ha_innodb.cc: Clean up some comments.
  ------------------------------------------------------------------------
  r5063 | marko | 2009-05-20 16:10:17 +0300 (Wed, 20 May 2009) | 1 line

  branches/zip: ut_dulint_sort(): Write proper comments.
  ------------------------------------------------------------------------
  r5064 | marko | 2009-05-20 16:17:26 +0300 (Wed, 20 May 2009) | 2 lines

  branches/zip: innobase_end(), innobase_flush_logs():
  Document the function parameters.
  ------------------------------------------------------------------------
  r5065 | marko | 2009-05-20 23:17:43 +0300 (Wed, 20 May 2009) | 1 line

  branches/zip: ha_innodb.cc: Add some missing function comments.
  ------------------------------------------------------------------------
  r5066 | marko | 2009-05-21 00:51:23 +0300 (Thu, 21 May 2009) | 2 lines

  branches/zip: Fix some function comments.
  ------------------------------------------------------------------------
  r5070 | vasil | 2009-05-21 08:27:00 +0300 (Thu, 21 May 2009) | 4 lines

  branches/zip:

  Whitespace fixup.
  ------------------------------------------------------------------------
2009-05-25 06:20:53 +00:00

4034 lines
107 KiB
C

/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
briefly in the InnoDB documentation. The contributions by Google are
incorporated with their permission, and subject to the conditions contained in
the file COPYING.Google.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/******************************************************
The database buffer buf_pool
Created 11/5/1995 Heikki Tuuri
*******************************************************/
#include "buf0buf.h"
#ifdef UNIV_NONINL
#include "buf0buf.ic"
#endif
#include "mem0mem.h"
#include "btr0btr.h"
#include "fil0fil.h"
#ifndef UNIV_HOTBACKUP
#include "buf0buddy.h"
#include "lock0lock.h"
#include "btr0sea.h"
#include "ibuf0ibuf.h"
#include "trx0undo.h"
#include "log0log.h"
#endif /* !UNIV_HOTBACKUP */
#include "srv0srv.h"
#include "dict0dict.h"
#include "log0recv.h"
#include "page0zip.h"
/*
IMPLEMENTATION OF THE BUFFER POOL
=================================
Performance improvement:
------------------------
Thread scheduling in NT may be so slow that the OS wait mechanism should
not be used even in waiting for disk reads to complete.
Rather, we should put waiting query threads to the queue of
waiting jobs, and let the OS thread do something useful while the i/o
is processed. In this way we could remove most OS thread switches in
an i/o-intensive benchmark like TPC-C.
A possibility is to put a user space thread library between the database
and NT. User space thread libraries might be very fast.
SQL Server 7.0 can be configured to use 'fibers' which are lightweight
threads in NT. These should be studied.
Buffer frames and blocks
------------------------
Following the terminology of Gray and Reuter, we call the memory
blocks where file pages are loaded buffer frames. For each buffer
frame there is a control block, or shortly, a block, in the buffer
control array. The control info which does not need to be stored
in the file along with the file page, resides in the control block.
Buffer pool struct
------------------
The buffer buf_pool contains a single mutex which protects all the
control data structures of the buf_pool. The content of a buffer frame is
protected by a separate read-write lock in its control block, though.
These locks can be locked and unlocked without owning the buf_pool mutex.
The OS events in the buf_pool struct can be waited for without owning the
buf_pool mutex.
The buf_pool mutex is a hot-spot in main memory, causing a lot of
memory bus traffic on multiprocessor systems when processors
alternately access the mutex. On our Pentium, the mutex is accessed
maybe every 10 microseconds. We gave up the solution to have mutexes
for each control block, for instance, because it seemed to be
complicated.
A solution to reduce mutex contention of the buf_pool mutex is to
create a separate mutex for the page hash table. On Pentium,
accessing the hash table takes 2 microseconds, about half
of the total buf_pool mutex hold time.
Control blocks
--------------
The control block contains, for instance, the bufferfix count
which is incremented when a thread wants a file page to be fixed
in a buffer frame. The bufferfix operation does not lock the
contents of the frame, however. For this purpose, the control
block contains a read-write lock.
The buffer frames have to be aligned so that the start memory
address of a frame is divisible by the universal page size, which
is a power of two.
We intend to make the buffer buf_pool size on-line reconfigurable,
that is, the buf_pool size can be changed without closing the database.
Then the database administarator may adjust it to be bigger
at night, for example. The control block array must
contain enough control blocks for the maximum buffer buf_pool size
which is used in the particular database.
If the buf_pool size is cut, we exploit the virtual memory mechanism of
the OS, and just refrain from using frames at high addresses. Then the OS
can swap them to disk.
The control blocks containing file pages are put to a hash table
according to the file address of the page.
We could speed up the access to an individual page by using
"pointer swizzling": we could replace the page references on
non-leaf index pages by direct pointers to the page, if it exists
in the buf_pool. We could make a separate hash table where we could
chain all the page references in non-leaf pages residing in the buf_pool,
using the page reference as the hash key,
and at the time of reading of a page update the pointers accordingly.
Drawbacks of this solution are added complexity and,
possibly, extra space required on non-leaf pages for memory pointers.
A simpler solution is just to speed up the hash table mechanism
in the database, using tables whose size is a power of 2.
Lists of blocks
---------------
There are several lists of control blocks.
The free list (buf_pool->free) contains blocks which are currently not
used.
The common LRU list contains all the blocks holding a file page
except those for which the bufferfix count is non-zero.
The pages are in the LRU list roughly in the order of the last
access to the page, so that the oldest pages are at the end of the
list. We also keep a pointer to near the end of the LRU list,
which we can use when we want to artificially age a page in the
buf_pool. This is used if we know that some page is not needed
again for some time: we insert the block right after the pointer,
causing it to be replaced sooner than would noramlly be the case.
Currently this aging mechanism is used for read-ahead mechanism
of pages, and it can also be used when there is a scan of a full
table which cannot fit in the memory. Putting the pages near the
of the LRU list, we make sure that most of the buf_pool stays in the
main memory, undisturbed.
The unzip_LRU list contains a subset of the common LRU list. The
blocks on the unzip_LRU list hold a compressed file page and the
corresponding uncompressed page frame. A block is in unzip_LRU if and
only if the predicate buf_page_belongs_to_unzip_LRU(&block->page)
holds. The blocks in unzip_LRU will be in same order as they are in
the common LRU list. That is, each manipulation of the common LRU
list will result in the same manipulation of the unzip_LRU list.
The chain of modified blocks (buf_pool->flush_list) contains the blocks
holding file pages that have been modified in the memory
but not written to disk yet. The block with the oldest modification
which has not yet been written to disk is at the end of the chain.
The chain of unmodified compressed blocks (buf_pool->zip_clean)
contains the control blocks (buf_page_t) of those compressed pages
that are not in buf_pool->flush_list and for which no uncompressed
page has been allocated in the buffer pool. The control blocks for
uncompressed pages are accessible via buf_block_t objects that are
reachable via buf_pool->chunks[].
The chains of free memory blocks (buf_pool->zip_free[]) are used by
the buddy allocator (buf0buddy.c) to keep track of currently unused
memory blocks of size sizeof(buf_page_t)..UNIV_PAGE_SIZE / 2. These
blocks are inside the UNIV_PAGE_SIZE-sized memory blocks of type
BUF_BLOCK_MEMORY that the buddy allocator requests from the buffer
pool. The buddy allocator is solely used for allocating control
blocks for compressed pages (buf_page_t) and compressed page frames.
Loading a file page
-------------------
First, a victim block for replacement has to be found in the
buf_pool. It is taken from the free list or searched for from the
end of the LRU-list. An exclusive lock is reserved for the frame,
the io_fix field is set in the block fixing the block in buf_pool,
and the io-operation for loading the page is queued. The io-handler thread
releases the X-lock on the frame and resets the io_fix field
when the io operation completes.
A thread may request the above operation using the function
buf_page_get(). It may then continue to request a lock on the frame.
The lock is granted when the io-handler releases the x-lock.
Read-ahead
----------
The read-ahead mechanism is intended to be intelligent and
isolated from the semantically higher levels of the database
index management. From the higher level we only need the
information if a file page has a natural successor or
predecessor page. On the leaf level of a B-tree index,
these are the next and previous pages in the natural
order of the pages.
Let us first explain the read-ahead mechanism when the leafs
of a B-tree are scanned in an ascending or descending order.
When a read page is the first time referenced in the buf_pool,
the buffer manager checks if it is at the border of a so-called
linear read-ahead area. The tablespace is divided into these
areas of size 64 blocks, for example. So if the page is at the
border of such an area, the read-ahead mechanism checks if
all the other blocks in the area have been accessed in an
ascending or descending order. If this is the case, the system
looks at the natural successor or predecessor of the page,
checks if that is at the border of another area, and in this case
issues read-requests for all the pages in that area. Maybe
we could relax the condition that all the pages in the area
have to be accessed: if data is deleted from a table, there may
appear holes of unused pages in the area.
A different read-ahead mechanism is used when there appears
to be a random access pattern to a file.
If a new page is referenced in the buf_pool, and several pages
of its random access area (for instance, 32 consecutive pages
in a tablespace) have recently been referenced, we may predict
that the whole area may be needed in the near future, and issue
the read requests for the whole area.
*/
#ifndef UNIV_HOTBACKUP
/* Value in microseconds */
static const int WAIT_FOR_READ = 5000;
/* The buffer buf_pool of the database */
UNIV_INTERN buf_pool_t* buf_pool = NULL;
/* mutex protecting the buffer pool struct and control blocks, except the
read-write lock in them */
UNIV_INTERN mutex_t buf_pool_mutex;
/* mutex protecting the control blocks of compressed-only pages
(of type buf_page_t, not buf_block_t) */
UNIV_INTERN mutex_t buf_pool_zip_mutex;
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
static ulint buf_dbg_counter = 0; /* This is used to insert validation
operations in excution in the
debug version */
/** Flag to forbid the release of the buffer pool mutex.
Protected by buf_pool_mutex. */
UNIV_INTERN ulint buf_pool_mutex_exit_forbidden = 0;
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
#ifdef UNIV_DEBUG
/* If this is set TRUE, the program prints info whenever
read-ahead or flush occurs */
UNIV_INTERN ibool buf_debug_prints = FALSE;
#endif /* UNIV_DEBUG */
/* A chunk of buffers. The buffer pool is allocated in chunks. */
struct buf_chunk_struct{
ulint mem_size; /* allocated size of the chunk */
ulint size; /* size of frames[] and blocks[] */
void* mem; /* pointer to the memory area which
was allocated for the frames */
buf_block_t* blocks; /* array of buffer control blocks */
};
#endif /* !UNIV_HOTBACKUP */
/************************************************************************
Calculates a page checksum which is stored to the page when it is written
to a file. Note that we must be careful to calculate the same value on
32-bit and 64-bit architectures. */
UNIV_INTERN
ulint
buf_calc_page_new_checksum(
/*=======================*/
/* out: checksum */
const byte* page) /* in: buffer page */
{
ulint checksum;
/* Since the field FIL_PAGE_FILE_FLUSH_LSN, and in versions <= 4.1.x
..._ARCH_LOG_NO, are written outside the buffer pool to the first
pages of data files, we have to skip them in the page checksum
calculation.
We must also skip the field FIL_PAGE_SPACE_OR_CHKSUM where the
checksum is stored, and also the last 8 bytes of page because
there we store the old formula checksum. */
checksum = ut_fold_binary(page + FIL_PAGE_OFFSET,
FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET)
+ ut_fold_binary(page + FIL_PAGE_DATA,
UNIV_PAGE_SIZE - FIL_PAGE_DATA
- FIL_PAGE_END_LSN_OLD_CHKSUM);
checksum = checksum & 0xFFFFFFFFUL;
return(checksum);
}
/************************************************************************
In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
looked at the first few bytes of the page. This calculates that old
checksum.
NOTE: we must first store the new formula checksum to
FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
because this takes that field as an input! */
UNIV_INTERN
ulint
buf_calc_page_old_checksum(
/*=======================*/
/* out: checksum */
const byte* page) /* in: buffer page */
{
ulint checksum;
checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN);
checksum = checksum & 0xFFFFFFFFUL;
return(checksum);
}
/************************************************************************
Checks if a page is corrupt. */
UNIV_INTERN
ibool
buf_page_is_corrupted(
/*==================*/
/* out: TRUE if corrupted */
const byte* read_buf, /* in: a database page */
ulint zip_size) /* in: size of compressed page;
0 for uncompressed pages */
{
ulint checksum_field;
ulint old_checksum_field;
if (UNIV_LIKELY(!zip_size)
&& memcmp(read_buf + FIL_PAGE_LSN + 4,
read_buf + UNIV_PAGE_SIZE
- FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
/* Stored log sequence numbers at the start and the end
of page do not match */
return(TRUE);
}
#ifndef UNIV_HOTBACKUP
if (recv_lsn_checks_on) {
ib_uint64_t current_lsn;
if (log_peek_lsn(&current_lsn)
&& current_lsn < mach_read_ull(read_buf + FIL_PAGE_LSN)) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Error: page %lu log sequence number"
" %llu\n"
"InnoDB: is in the future! Current system "
"log sequence number %llu.\n"
"InnoDB: Your database may be corrupt or "
"you may have copied the InnoDB\n"
"InnoDB: tablespace but not the InnoDB "
"log files. See\n"
"InnoDB: " REFMAN "forcing-recovery.html\n"
"InnoDB: for more information.\n",
(ulong) mach_read_from_4(read_buf
+ FIL_PAGE_OFFSET),
mach_read_ull(read_buf + FIL_PAGE_LSN),
current_lsn);
}
}
#endif
/* If we use checksums validation, make additional check before
returning TRUE to ensure that the checksum is not equal to
BUF_NO_CHECKSUM_MAGIC which might be stored by InnoDB with checksums
disabled. Otherwise, skip checksum calculation and return FALSE */
if (UNIV_LIKELY(srv_use_checksums)) {
checksum_field = mach_read_from_4(read_buf
+ FIL_PAGE_SPACE_OR_CHKSUM);
if (UNIV_UNLIKELY(zip_size)) {
return(checksum_field != BUF_NO_CHECKSUM_MAGIC
&& checksum_field
!= page_zip_calc_checksum(read_buf, zip_size));
}
old_checksum_field = mach_read_from_4(
read_buf + UNIV_PAGE_SIZE
- FIL_PAGE_END_LSN_OLD_CHKSUM);
/* There are 2 valid formulas for old_checksum_field:
1. Very old versions of InnoDB only stored 8 byte lsn to the
start and the end of the page.
2. Newer InnoDB versions store the old formula checksum
there. */
if (old_checksum_field != mach_read_from_4(read_buf
+ FIL_PAGE_LSN)
&& old_checksum_field != BUF_NO_CHECKSUM_MAGIC
&& old_checksum_field
!= buf_calc_page_old_checksum(read_buf)) {
return(TRUE);
}
/* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
(always equal to 0), to FIL_PAGE_SPACE_OR_CHKSUM */
if (checksum_field != 0
&& checksum_field != BUF_NO_CHECKSUM_MAGIC
&& checksum_field
!= buf_calc_page_new_checksum(read_buf)) {
return(TRUE);
}
}
return(FALSE);
}
/************************************************************************
Prints a page to stderr. */
UNIV_INTERN
void
buf_page_print(
/*===========*/
const byte* read_buf, /* in: a database page */
ulint zip_size) /* in: compressed page size, or
0 for uncompressed pages */
{
#ifndef UNIV_HOTBACKUP
dict_index_t* index;
#endif /* !UNIV_HOTBACKUP */
ulint checksum;
ulint old_checksum;
ulint size = zip_size;
if (!size) {
size = UNIV_PAGE_SIZE;
}
ut_print_timestamp(stderr);
fprintf(stderr, " InnoDB: Page dump in ascii and hex (%lu bytes):\n",
(ulong) size);
ut_print_buf(stderr, read_buf, size);
fputs("\nInnoDB: End of page dump\n", stderr);
if (zip_size) {
/* Print compressed page. */
switch (fil_page_get_type(read_buf)) {
case FIL_PAGE_TYPE_ZBLOB:
case FIL_PAGE_TYPE_ZBLOB2:
checksum = srv_use_checksums
? page_zip_calc_checksum(read_buf, zip_size)
: BUF_NO_CHECKSUM_MAGIC;
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Compressed BLOB page"
" checksum %lu, stored %lu\n"
"InnoDB: Page lsn %lu %lu\n"
"InnoDB: Page number (if stored"
" to page already) %lu,\n"
"InnoDB: space id (if stored"
" to page already) %lu\n",
(ulong) checksum,
(ulong) mach_read_from_4(
read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
(ulong) mach_read_from_4(
read_buf + FIL_PAGE_LSN),
(ulong) mach_read_from_4(
read_buf + (FIL_PAGE_LSN + 4)),
(ulong) mach_read_from_4(
read_buf + FIL_PAGE_OFFSET),
(ulong) mach_read_from_4(
read_buf
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
return;
default:
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: unknown page type %lu,"
" assuming FIL_PAGE_INDEX\n",
fil_page_get_type(read_buf));
/* fall through */
case FIL_PAGE_INDEX:
checksum = srv_use_checksums
? page_zip_calc_checksum(read_buf, zip_size)
: BUF_NO_CHECKSUM_MAGIC;
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Compressed page checksum %lu,"
" stored %lu\n"
"InnoDB: Page lsn %lu %lu\n"
"InnoDB: Page number (if stored"
" to page already) %lu,\n"
"InnoDB: space id (if stored"
" to page already) %lu\n",
(ulong) checksum,
(ulong) mach_read_from_4(
read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
(ulong) mach_read_from_4(
read_buf + FIL_PAGE_LSN),
(ulong) mach_read_from_4(
read_buf + (FIL_PAGE_LSN + 4)),
(ulong) mach_read_from_4(
read_buf + FIL_PAGE_OFFSET),
(ulong) mach_read_from_4(
read_buf
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
return;
case FIL_PAGE_TYPE_XDES:
/* This is an uncompressed page. */
break;
}
}
checksum = srv_use_checksums
? buf_calc_page_new_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
old_checksum = srv_use_checksums
? buf_calc_page_old_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Page checksum %lu, prior-to-4.0.14-form"
" checksum %lu\n"
"InnoDB: stored checksum %lu, prior-to-4.0.14-form"
" stored checksum %lu\n"
"InnoDB: Page lsn %lu %lu, low 4 bytes of lsn"
" at page end %lu\n"
"InnoDB: Page number (if stored to page already) %lu,\n"
"InnoDB: space id (if created with >= MySQL-4.1.1"
" and stored already) %lu\n",
(ulong) checksum, (ulong) old_checksum,
(ulong) mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
(ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
- FIL_PAGE_END_LSN_OLD_CHKSUM),
(ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN),
(ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN + 4),
(ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
- FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
(ulong) mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
(ulong) mach_read_from_4(read_buf
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
#ifndef UNIV_HOTBACKUP
if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE)
== TRX_UNDO_INSERT) {
fprintf(stderr,
"InnoDB: Page may be an insert undo log page\n");
} else if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR
+ TRX_UNDO_PAGE_TYPE)
== TRX_UNDO_UPDATE) {
fprintf(stderr,
"InnoDB: Page may be an update undo log page\n");
}
#endif /* !UNIV_HOTBACKUP */
switch (fil_page_get_type(read_buf)) {
case FIL_PAGE_INDEX:
fprintf(stderr,
"InnoDB: Page may be an index page where"
" index id is %lu %lu\n",
(ulong) ut_dulint_get_high(
btr_page_get_index_id(read_buf)),
(ulong) ut_dulint_get_low(
btr_page_get_index_id(read_buf)));
#ifndef UNIV_HOTBACKUP
index = dict_index_find_on_id_low(
btr_page_get_index_id(read_buf));
if (index) {
fputs("InnoDB: (", stderr);
dict_index_name_print(stderr, NULL, index);
fputs(")\n", stderr);
}
#endif /* !UNIV_HOTBACKUP */
break;
case FIL_PAGE_INODE:
fputs("InnoDB: Page may be an 'inode' page\n", stderr);
break;
case FIL_PAGE_IBUF_FREE_LIST:
fputs("InnoDB: Page may be an insert buffer free list page\n",
stderr);
break;
case FIL_PAGE_TYPE_ALLOCATED:
fputs("InnoDB: Page may be a freshly allocated page\n",
stderr);
break;
case FIL_PAGE_IBUF_BITMAP:
fputs("InnoDB: Page may be an insert buffer bitmap page\n",
stderr);
break;
case FIL_PAGE_TYPE_SYS:
fputs("InnoDB: Page may be a system page\n",
stderr);
break;
case FIL_PAGE_TYPE_TRX_SYS:
fputs("InnoDB: Page may be a transaction system page\n",
stderr);
break;
case FIL_PAGE_TYPE_FSP_HDR:
fputs("InnoDB: Page may be a file space header page\n",
stderr);
break;
case FIL_PAGE_TYPE_XDES:
fputs("InnoDB: Page may be an extent descriptor page\n",
stderr);
break;
case FIL_PAGE_TYPE_BLOB:
fputs("InnoDB: Page may be a BLOB page\n",
stderr);
break;
case FIL_PAGE_TYPE_ZBLOB:
case FIL_PAGE_TYPE_ZBLOB2:
fputs("InnoDB: Page may be a compressed BLOB page\n",
stderr);
break;
}
}
#ifndef UNIV_HOTBACKUP
/************************************************************************
Initializes a buffer control block when the buf_pool is created. */
static
void
buf_block_init(
/*===========*/
buf_block_t* block, /* in: pointer to control block */
byte* frame) /* in: pointer to buffer frame */
{
UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE, block);
block->frame = frame;
block->page.state = BUF_BLOCK_NOT_USED;
block->page.buf_fix_count = 0;
block->page.io_fix = BUF_IO_NONE;
block->modify_clock = 0;
#ifdef UNIV_DEBUG_FILE_ACCESSES
block->page.file_page_was_freed = FALSE;
#endif /* UNIV_DEBUG_FILE_ACCESSES */
block->check_index_page_at_flush = FALSE;
block->index = NULL;
#ifdef UNIV_DEBUG
block->page.in_page_hash = FALSE;
block->page.in_zip_hash = FALSE;
block->page.in_flush_list = FALSE;
block->page.in_free_list = FALSE;
block->page.in_LRU_list = FALSE;
block->in_unzip_LRU_list = FALSE;
#endif /* UNIV_DEBUG */
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
block->n_pointers = 0;
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
page_zip_des_init(&block->page.zip);
mutex_create(&block->mutex, SYNC_BUF_BLOCK);
rw_lock_create(&block->lock, SYNC_LEVEL_VARYING);
ut_ad(rw_lock_validate(&(block->lock)));
#ifdef UNIV_SYNC_DEBUG
rw_lock_create(&block->debug_latch, SYNC_NO_ORDER_CHECK);
#endif /* UNIV_SYNC_DEBUG */
}
/************************************************************************
Allocates a chunk of buffer frames. */
static
buf_chunk_t*
buf_chunk_init(
/*===========*/
/* out: chunk, or NULL on failure */
buf_chunk_t* chunk, /* out: chunk of buffers */
ulint mem_size) /* in: requested size in bytes */
{
buf_block_t* block;
byte* frame;
ulint i;
/* Round down to a multiple of page size,
although it already should be. */
mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);
/* Reserve space for the block descriptors. */
mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
+ (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
chunk->mem_size = mem_size;
chunk->mem = os_mem_alloc_large(&chunk->mem_size);
if (UNIV_UNLIKELY(chunk->mem == NULL)) {
return(NULL);
}
/* Allocate the block descriptors from
the start of the memory block. */
chunk->blocks = chunk->mem;
/* Align a pointer to the first frame. Note that when
os_large_page_size is smaller than UNIV_PAGE_SIZE,
we may allocate one fewer block than requested. When
it is bigger, we may allocate more blocks than requested. */
frame = ut_align(chunk->mem, UNIV_PAGE_SIZE);
chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
- (frame != chunk->mem);
/* Subtract the space needed for block descriptors. */
{
ulint size = chunk->size;
while (frame < (byte*) (chunk->blocks + size)) {
frame += UNIV_PAGE_SIZE;
size--;
}
chunk->size = size;
}
/* Init block structs and assign frames for them. Then we
assign the frames to the first blocks (we already mapped the
memory above). */
block = chunk->blocks;
for (i = chunk->size; i--; ) {
buf_block_init(block, frame);
#ifdef HAVE_purify
/* Wipe contents of frame to eliminate a Purify warning */
memset(block->frame, '\0', UNIV_PAGE_SIZE);
#endif
/* Add the block to the free list */
UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
ut_d(block->page.in_free_list = TRUE);
block++;
frame += UNIV_PAGE_SIZE;
}
return(chunk);
}
#ifdef UNIV_DEBUG
/*************************************************************************
Finds a block in the given buffer chunk that points to a
given compressed page. */
static
buf_block_t*
buf_chunk_contains_zip(
/*===================*/
/* out: buffer block pointing to
the compressed page, or NULL */
buf_chunk_t* chunk, /* in: chunk being checked */
const void* data) /* in: pointer to compressed page */
{
buf_block_t* block;
ulint i;
ut_ad(buf_pool);
ut_ad(buf_pool_mutex_own());
block = chunk->blocks;
for (i = chunk->size; i--; block++) {
if (block->page.zip.data == data) {
return(block);
}
}
return(NULL);
}
/*************************************************************************
Finds a block in the buffer pool that points to a
given compressed page. */
UNIV_INTERN
buf_block_t*
buf_pool_contains_zip(
/*==================*/
/* out: buffer block pointing to
the compressed page, or NULL */
const void* data) /* in: pointer to compressed page */
{
ulint n;
buf_chunk_t* chunk = buf_pool->chunks;
for (n = buf_pool->n_chunks; n--; chunk++) {
buf_block_t* block = buf_chunk_contains_zip(chunk, data);
if (block) {
return(block);
}
}
return(NULL);
}
#endif /* UNIV_DEBUG */
/*************************************************************************
Checks that all file pages in the buffer chunk are in a replaceable state. */
static
const buf_block_t*
buf_chunk_not_freed(
/*================*/
/* out: address of a non-free block,
or NULL if all freed */
buf_chunk_t* chunk) /* in: chunk being checked */
{
buf_block_t* block;
ulint i;
ut_ad(buf_pool);
ut_ad(buf_pool_mutex_own());
block = chunk->blocks;
for (i = chunk->size; i--; block++) {
mutex_enter(&block->mutex);
if (buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
&& !buf_flush_ready_for_replace(&block->page)) {
mutex_exit(&block->mutex);
return(block);
}
mutex_exit(&block->mutex);
}
return(NULL);
}
/*************************************************************************
Checks that all blocks in the buffer chunk are in BUF_BLOCK_NOT_USED state. */
static
ibool
buf_chunk_all_free(
/*===============*/
/* out: TRUE if all freed */
const buf_chunk_t* chunk) /* in: chunk being checked */
{
const buf_block_t* block;
ulint i;
ut_ad(buf_pool);
ut_ad(buf_pool_mutex_own());
block = chunk->blocks;
for (i = chunk->size; i--; block++) {
if (buf_block_get_state(block) != BUF_BLOCK_NOT_USED) {
return(FALSE);
}
}
return(TRUE);
}
/************************************************************************
Frees a chunk of buffer frames. */
static
void
buf_chunk_free(
/*===========*/
buf_chunk_t* chunk) /* out: chunk of buffers */
{
buf_block_t* block;
const buf_block_t* block_end;
ut_ad(buf_pool_mutex_own());
block_end = chunk->blocks + chunk->size;
for (block = chunk->blocks; block < block_end; block++) {
ut_a(buf_block_get_state(block) == BUF_BLOCK_NOT_USED);
ut_a(!block->page.zip.data);
ut_ad(!block->page.in_LRU_list);
ut_ad(!block->in_unzip_LRU_list);
ut_ad(!block->page.in_flush_list);
/* Remove the block from the free list. */
ut_ad(block->page.in_free_list);
UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
/* Free the latches. */
mutex_free(&block->mutex);
rw_lock_free(&block->lock);
#ifdef UNIV_SYNC_DEBUG
rw_lock_free(&block->debug_latch);
#endif /* UNIV_SYNC_DEBUG */
UNIV_MEM_UNDESC(block);
}
os_mem_free_large(chunk->mem, chunk->mem_size);
}
/************************************************************************
Creates the buffer pool. */
UNIV_INTERN
buf_pool_t*
buf_pool_init(void)
/*===============*/
/* out, own: buf_pool object, NULL if not
enough memory or error */
{
buf_chunk_t* chunk;
ulint i;
buf_pool = mem_zalloc(sizeof(buf_pool_t));
/* 1. Initialize general fields
------------------------------- */
mutex_create(&buf_pool_mutex, SYNC_BUF_POOL);
mutex_create(&buf_pool_zip_mutex, SYNC_BUF_BLOCK);
buf_pool_mutex_enter();
buf_pool->n_chunks = 1;
buf_pool->chunks = chunk = mem_alloc(sizeof *chunk);
UT_LIST_INIT(buf_pool->free);
if (!buf_chunk_init(chunk, srv_buf_pool_size)) {
mem_free(chunk);
mem_free(buf_pool);
buf_pool = NULL;
return(NULL);
}
srv_buf_pool_old_size = srv_buf_pool_size;
buf_pool->curr_size = chunk->size;
srv_buf_pool_curr_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
buf_pool->last_printout_time = time(NULL);
/* 2. Initialize flushing fields
-------------------------------- */
for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
buf_pool->no_flush[i] = os_event_create(NULL);
}
buf_pool->ulint_clock = 1;
/* 3. Initialize LRU fields
--------------------------- */
/* All fields are initialized by mem_zalloc(). */
buf_pool_mutex_exit();
btr_search_sys_create(buf_pool->curr_size
* UNIV_PAGE_SIZE / sizeof(void*) / 64);
/* 4. Initialize the buddy allocator fields */
/* All fields are initialized by mem_zalloc(). */
return(buf_pool);
}
/************************************************************************
Frees the buffer pool at shutdown. This must not be invoked before
freeing all mutexes. */
UNIV_INTERN
void
buf_pool_free(void)
/*===============*/
{
buf_chunk_t* chunk;
buf_chunk_t* chunks;
chunks = buf_pool->chunks;
chunk = chunks + buf_pool->n_chunks;
while (--chunk >= chunks) {
/* Bypass the checks of buf_chunk_free(), since they
would fail at shutdown. */
os_mem_free_large(chunk->mem, chunk->mem_size);
}
buf_pool->n_chunks = 0;
}
/************************************************************************
Drops the adaptive hash index. To prevent a livelock, this function
is only to be called while holding btr_search_latch and while
btr_search_enabled == FALSE. */
UNIV_INTERN
void
buf_pool_drop_hash_index(void)
/*==========================*/
{
ibool released_search_latch;
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
ut_ad(!btr_search_enabled);
do {
buf_chunk_t* chunks = buf_pool->chunks;
buf_chunk_t* chunk = chunks + buf_pool->n_chunks;
released_search_latch = FALSE;
while (--chunk >= chunks) {
buf_block_t* block = chunk->blocks;
ulint i = chunk->size;
for (; i--; block++) {
/* block->is_hashed cannot be modified
when we have an x-latch on btr_search_latch;
see the comment in buf0buf.h */
if (!block->is_hashed) {
continue;
}
/* To follow the latching order, we
have to release btr_search_latch
before acquiring block->latch. */
rw_lock_x_unlock(&btr_search_latch);
/* When we release the search latch,
we must rescan all blocks, because
some may become hashed again. */
released_search_latch = TRUE;
rw_lock_x_lock(&block->lock);
/* This should be guaranteed by the
callers, which will be holding
btr_search_enabled_mutex. */
ut_ad(!btr_search_enabled);
/* Because we did not buffer-fix the
block by calling buf_block_get_gen(),
it is possible that the block has been
allocated for some other use after
btr_search_latch was released above.
We do not care which file page the
block is mapped to. All we want to do
is to drop any hash entries referring
to the page. */
/* It is possible that
block->page.state != BUF_FILE_PAGE.
Even that does not matter, because
btr_search_drop_page_hash_index() will
check block->is_hashed before doing
anything. block->is_hashed can only
be set on uncompressed file pages. */
btr_search_drop_page_hash_index(block);
rw_lock_x_unlock(&block->lock);
rw_lock_x_lock(&btr_search_latch);
ut_ad(!btr_search_enabled);
}
}
} while (released_search_latch);
}
/************************************************************************
Relocate a buffer control block. Relocates the block on the LRU list
and in buf_pool->page_hash. Does not relocate bpage->list.
The caller must take care of relocating bpage->list. */
UNIV_INTERN
void
buf_relocate(
/*=========*/
buf_page_t* bpage, /* in/out: control block being relocated;
buf_page_get_state(bpage) must be
BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */
buf_page_t* dpage) /* in/out: destination control block */
{
buf_page_t* b;
ulint fold;
ut_ad(buf_pool_mutex_own());
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
ut_a(bpage->buf_fix_count == 0);
ut_ad(bpage->in_LRU_list);
ut_ad(!bpage->in_zip_hash);
ut_ad(bpage->in_page_hash);
ut_ad(bpage == buf_page_hash_get(bpage->space, bpage->offset));
#ifdef UNIV_DEBUG
switch (buf_page_get_state(bpage)) {
case BUF_BLOCK_ZIP_FREE:
case BUF_BLOCK_NOT_USED:
case BUF_BLOCK_READY_FOR_USE:
case BUF_BLOCK_FILE_PAGE:
case BUF_BLOCK_MEMORY:
case BUF_BLOCK_REMOVE_HASH:
ut_error;
case BUF_BLOCK_ZIP_DIRTY:
case BUF_BLOCK_ZIP_PAGE:
break;
}
#endif /* UNIV_DEBUG */
memcpy(dpage, bpage, sizeof *dpage);
ut_d(bpage->in_LRU_list = FALSE);
ut_d(bpage->in_page_hash = FALSE);
/* relocate buf_pool->LRU */
b = UT_LIST_GET_PREV(LRU, bpage);
UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
if (b) {
UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, b, dpage);
} else {
UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, dpage);
}
if (UNIV_UNLIKELY(buf_pool->LRU_old == bpage)) {
buf_pool->LRU_old = dpage;
#ifdef UNIV_LRU_DEBUG
/* buf_pool->LRU_old must be the first item in the LRU list
whose "old" flag is set. */
ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
|| !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
|| UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
#endif /* UNIV_LRU_DEBUG */
}
ut_d(UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU,
ut_ad(ut_list_node_313->in_LRU_list)));
/* relocate buf_pool->page_hash */
fold = buf_page_address_fold(bpage->space, bpage->offset);
HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
}
/************************************************************************
Shrinks the buffer pool. */
static
void
buf_pool_shrink(
/*============*/
ulint chunk_size) /* in: number of pages to remove */
{
buf_chunk_t* chunks;
buf_chunk_t* chunk;
ulint max_size;
ulint max_free_size;
buf_chunk_t* max_chunk;
buf_chunk_t* max_free_chunk;
ut_ad(!buf_pool_mutex_own());
try_again:
btr_search_disable(); /* Empty the adaptive hash index again */
buf_pool_mutex_enter();
shrink_again:
if (buf_pool->n_chunks <= 1) {
/* Cannot shrink if there is only one chunk */
goto func_done;
}
/* Search for the largest free chunk
not larger than the size difference */
chunks = buf_pool->chunks;
chunk = chunks + buf_pool->n_chunks;
max_size = max_free_size = 0;
max_chunk = max_free_chunk = NULL;
while (--chunk >= chunks) {
if (chunk->size <= chunk_size
&& chunk->size > max_free_size) {
if (chunk->size > max_size) {
max_size = chunk->size;
max_chunk = chunk;
}
if (buf_chunk_all_free(chunk)) {
max_free_size = chunk->size;
max_free_chunk = chunk;
}
}
}
if (!max_free_size) {
ulint dirty = 0;
ulint nonfree = 0;
buf_block_t* block;
buf_block_t* bend;
/* Cannot shrink: try again later
(do not assign srv_buf_pool_old_size) */
if (!max_chunk) {
goto func_exit;
}
block = max_chunk->blocks;
bend = block + max_chunk->size;
/* Move the blocks of chunk to the end of the
LRU list and try to flush them. */
for (; block < bend; block++) {
switch (buf_block_get_state(block)) {
case BUF_BLOCK_NOT_USED:
continue;
case BUF_BLOCK_FILE_PAGE:
break;
default:
nonfree++;
continue;
}
mutex_enter(&block->mutex);
/* The following calls will temporarily
release block->mutex and buf_pool_mutex.
Therefore, we have to always retry,
even if !dirty && !nonfree. */
if (!buf_flush_ready_for_replace(&block->page)) {
buf_LRU_make_block_old(&block->page);
dirty++;
} else if (buf_LRU_free_block(&block->page, TRUE, NULL)
!= BUF_LRU_FREED) {
nonfree++;
}
mutex_exit(&block->mutex);
}
buf_pool_mutex_exit();
/* Request for a flush of the chunk if it helps.
Do not flush if there are non-free blocks, since
flushing will not make the chunk freeable. */
if (nonfree) {
/* Avoid busy-waiting. */
os_thread_sleep(100000);
} else if (dirty
&& buf_flush_batch(BUF_FLUSH_LRU, dirty, 0)
== ULINT_UNDEFINED) {
buf_flush_wait_batch_end(BUF_FLUSH_LRU);
}
goto try_again;
}
max_size = max_free_size;
max_chunk = max_free_chunk;
srv_buf_pool_old_size = srv_buf_pool_size;
/* Rewrite buf_pool->chunks. Copy everything but max_chunk. */
chunks = mem_alloc((buf_pool->n_chunks - 1) * sizeof *chunks);
memcpy(chunks, buf_pool->chunks,
(max_chunk - buf_pool->chunks) * sizeof *chunks);
memcpy(chunks + (max_chunk - buf_pool->chunks),
max_chunk + 1,
buf_pool->chunks + buf_pool->n_chunks
- (max_chunk + 1));
ut_a(buf_pool->curr_size > max_chunk->size);
buf_pool->curr_size -= max_chunk->size;
srv_buf_pool_curr_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
chunk_size -= max_chunk->size;
buf_chunk_free(max_chunk);
mem_free(buf_pool->chunks);
buf_pool->chunks = chunks;
buf_pool->n_chunks--;
/* Allow a slack of one megabyte. */
if (chunk_size > 1048576 / UNIV_PAGE_SIZE) {
goto shrink_again;
}
func_done:
srv_buf_pool_old_size = srv_buf_pool_size;
func_exit:
buf_pool_mutex_exit();
btr_search_enable();
}
/************************************************************************
Rebuild buf_pool->page_hash. */
static
void
buf_pool_page_hash_rebuild(void)
/*============================*/
{
ulint i;
ulint n_chunks;
buf_chunk_t* chunk;
hash_table_t* page_hash;
hash_table_t* zip_hash;
buf_page_t* b;
buf_pool_mutex_enter();
/* Free, create, and populate the hash table. */
hash_table_free(buf_pool->page_hash);
buf_pool->page_hash = page_hash = hash_create(2 * buf_pool->curr_size);
zip_hash = hash_create(2 * buf_pool->curr_size);
HASH_MIGRATE(buf_pool->zip_hash, zip_hash, buf_page_t, hash,
BUF_POOL_ZIP_FOLD_BPAGE);
hash_table_free(buf_pool->zip_hash);
buf_pool->zip_hash = zip_hash;
/* Insert the uncompressed file pages to buf_pool->page_hash. */
chunk = buf_pool->chunks;
n_chunks = buf_pool->n_chunks;
for (i = 0; i < n_chunks; i++, chunk++) {
ulint j;
buf_block_t* block = chunk->blocks;
for (j = 0; j < chunk->size; j++, block++) {
if (buf_block_get_state(block)
== BUF_BLOCK_FILE_PAGE) {
ut_ad(!block->page.in_zip_hash);
ut_ad(block->page.in_page_hash);
HASH_INSERT(buf_page_t, hash, page_hash,
buf_page_address_fold(
block->page.space,
block->page.offset),
&block->page);
}
}
}
/* Insert the compressed-only pages to buf_pool->page_hash.
All such blocks are either in buf_pool->zip_clean or
in buf_pool->flush_list. */
for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
b = UT_LIST_GET_NEXT(list, b)) {
ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
ut_ad(!b->in_flush_list);
ut_ad(b->in_LRU_list);
ut_ad(b->in_page_hash);
ut_ad(!b->in_zip_hash);
HASH_INSERT(buf_page_t, hash, page_hash,
buf_page_address_fold(b->space, b->offset), b);
}
for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
b = UT_LIST_GET_NEXT(list, b)) {
ut_ad(b->in_flush_list);
ut_ad(b->in_LRU_list);
ut_ad(b->in_page_hash);
ut_ad(!b->in_zip_hash);
switch (buf_page_get_state(b)) {
case BUF_BLOCK_ZIP_DIRTY:
HASH_INSERT(buf_page_t, hash, page_hash,
buf_page_address_fold(b->space,
b->offset), b);
break;
case BUF_BLOCK_FILE_PAGE:
/* uncompressed page */
break;
case BUF_BLOCK_ZIP_FREE:
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_NOT_USED:
case BUF_BLOCK_READY_FOR_USE:
case BUF_BLOCK_MEMORY:
case BUF_BLOCK_REMOVE_HASH:
ut_error;
break;
}
}
buf_pool_mutex_exit();
}
/************************************************************************
Resizes the buffer pool. */
UNIV_INTERN
void
buf_pool_resize(void)
/*=================*/
{
buf_pool_mutex_enter();
if (srv_buf_pool_old_size == srv_buf_pool_size) {
buf_pool_mutex_exit();
return;
}
if (srv_buf_pool_curr_size + 1048576 > srv_buf_pool_size) {
buf_pool_mutex_exit();
/* Disable adaptive hash indexes and empty the index
in order to free up memory in the buffer pool chunks. */
buf_pool_shrink((srv_buf_pool_curr_size - srv_buf_pool_size)
/ UNIV_PAGE_SIZE);
} else if (srv_buf_pool_curr_size + 1048576 < srv_buf_pool_size) {
/* Enlarge the buffer pool by at least one megabyte */
ulint mem_size
= srv_buf_pool_size - srv_buf_pool_curr_size;
buf_chunk_t* chunks;
buf_chunk_t* chunk;
chunks = mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks);
memcpy(chunks, buf_pool->chunks, buf_pool->n_chunks
* sizeof *chunks);
chunk = &chunks[buf_pool->n_chunks];
if (!buf_chunk_init(chunk, mem_size)) {
mem_free(chunks);
} else {
buf_pool->curr_size += chunk->size;
srv_buf_pool_curr_size = buf_pool->curr_size
* UNIV_PAGE_SIZE;
mem_free(buf_pool->chunks);
buf_pool->chunks = chunks;
buf_pool->n_chunks++;
}
srv_buf_pool_old_size = srv_buf_pool_size;
buf_pool_mutex_exit();
}
buf_pool_page_hash_rebuild();
}
/********************************************************************
Add watch for the given page to be read in. Caller must have the buffer pool
mutex reserved. */
static
void
buf_pool_watch_set(
/*===============*/
ulint space, /* in: space id */
ulint page_no) /* in: page number */
{
ut_ad(buf_pool_mutex_own());
/* There can't be multiple watches at the same time. */
ut_a(!buf_pool->watch_active);
buf_pool->watch_active = TRUE;
buf_pool->watch_space = space;
buf_pool->watch_occurred = FALSE;
buf_pool->watch_page_no = page_no;
}
/********************************************************************
Stop watching if the marked page is read in. */
UNIV_INTERN
void
buf_pool_watch_clear(void)
/*======================*/
{
buf_pool_mutex_enter();
ut_ad(buf_pool->watch_active);
buf_pool->watch_active = FALSE;
buf_pool_mutex_exit();
}
/********************************************************************
Check if the given page is being watched and has been read to the buffer
pool. */
UNIV_INTERN
ibool
buf_pool_watch_occurred(
/*====================*/
/* out: TRUE if the given page is being
watched and it has been read in */
ulint space, /* in: space id */
ulint page_no) /* in: page number */
{
ulint ret;
buf_pool_mutex_enter();
ret = buf_pool->watch_active
&& space == buf_pool->watch_space
&& page_no == buf_pool->watch_page_no
&& buf_pool->watch_occurred;
buf_pool_mutex_exit();
return(ret);
}
/************************************************************************
Moves to the block to the start of the LRU list if there is a danger
that the block would drift out of the buffer pool. */
UNIV_INLINE
void
buf_block_make_young(
/*=================*/
buf_page_t* bpage) /* in: block to make younger */
{
ut_ad(!buf_pool_mutex_own());
/* Note that we read freed_page_clock's without holding any mutex:
this is allowed since the result is used only in heuristics */
if (buf_page_peek_if_too_old(bpage)) {
buf_pool_mutex_enter();
/* There has been freeing activity in the LRU list:
best to move to the head of the LRU list */
buf_LRU_make_block_young(bpage);
buf_pool_mutex_exit();
}
}
/************************************************************************
Moves a page to the start of the buffer pool LRU list. This high-level
function can be used to prevent an important page from from slipping out of
the buffer pool. */
UNIV_INTERN
void
buf_page_make_young(
/*================*/
buf_page_t* bpage) /* in: buffer block of a file page */
{
buf_pool_mutex_enter();
ut_a(buf_page_in_file(bpage));
buf_LRU_make_block_young(bpage);
buf_pool_mutex_exit();
}
/************************************************************************
Resets the check_index_page_at_flush field of a page if found in the buffer
pool. */
UNIV_INTERN
void
buf_reset_check_index_page_at_flush(
/*================================*/
ulint space, /* in: space id */
ulint offset) /* in: page number */
{
buf_block_t* block;
buf_pool_mutex_enter();
block = (buf_block_t*) buf_page_hash_get(space, offset);
if (block && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE) {
block->check_index_page_at_flush = FALSE;
}
buf_pool_mutex_exit();
}
/************************************************************************
Returns the current state of is_hashed of a page. FALSE if the page is
not in the pool. NOTE that this operation does not fix the page in the
pool if it is found there. */
UNIV_INTERN
ibool
buf_page_peek_if_search_hashed(
/*===========================*/
/* out: TRUE if page hash index is built in search
system */
ulint space, /* in: space id */
ulint offset) /* in: page number */
{
buf_block_t* block;
ibool is_hashed;
buf_pool_mutex_enter();
block = (buf_block_t*) buf_page_hash_get(space, offset);
if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
is_hashed = FALSE;
} else {
is_hashed = block->is_hashed;
}
buf_pool_mutex_exit();
return(is_hashed);
}
#ifdef UNIV_DEBUG_FILE_ACCESSES
/************************************************************************
Sets file_page_was_freed TRUE if the page is found in the buffer pool.
This function should be called when we free a file page and want the
debug version to check that it is not accessed any more unless
reallocated. */
UNIV_INTERN
buf_page_t*
buf_page_set_file_page_was_freed(
/*=============================*/
/* out: control block if found in page hash table,
otherwise NULL */
ulint space, /* in: space id */
ulint offset) /* in: page number */
{
buf_page_t* bpage;
buf_pool_mutex_enter();
bpage = buf_page_hash_get(space, offset);
if (bpage) {
bpage->file_page_was_freed = TRUE;
}
buf_pool_mutex_exit();
return(bpage);
}
/************************************************************************
Sets file_page_was_freed FALSE if the page is found in the buffer pool.
This function should be called when we free a file page and want the
debug version to check that it is not accessed any more unless
reallocated. */
UNIV_INTERN
buf_page_t*
buf_page_reset_file_page_was_freed(
/*===============================*/
/* out: control block if found in page hash table,
otherwise NULL */
ulint space, /* in: space id */
ulint offset) /* in: page number */
{
buf_page_t* bpage;
buf_pool_mutex_enter();
bpage = buf_page_hash_get(space, offset);
if (bpage) {
bpage->file_page_was_freed = FALSE;
}
buf_pool_mutex_exit();
return(bpage);
}
#endif /* UNIV_DEBUG_FILE_ACCESSES */
/************************************************************************
Get read access to a compressed page (usually of type
FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
The page must be released with buf_page_release_zip().
NOTE: the page is not protected by any latch. Mutual exclusion has to
be implemented at a higher level. In other words, all possible
accesses to a given page through this function must be protected by
the same set of mutexes or latches. */
UNIV_INTERN
buf_page_t*
buf_page_get_zip(
/*=============*/
/* out: pointer to the block */
ulint space, /* in: space id */
ulint zip_size,/* in: compressed page size */
ulint offset) /* in: page number */
{
buf_page_t* bpage;
mutex_t* block_mutex;
ibool must_read;
#ifndef UNIV_LOG_DEBUG
ut_ad(!ibuf_inside());
#endif
buf_pool->n_page_gets++;
for (;;) {
buf_pool_mutex_enter();
lookup:
bpage = buf_page_hash_get(space, offset);
if (bpage) {
break;
}
/* Page not in buf_pool: needs to be read from file */
buf_pool_mutex_exit();
buf_read_page(space, zip_size, offset);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(++buf_dbg_counter % 37 || buf_validate());
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
}
if (UNIV_UNLIKELY(!bpage->zip.data)) {
/* There is no compressed page. */
buf_pool_mutex_exit();
return(NULL);
}
block_mutex = buf_page_get_mutex(bpage);
mutex_enter(block_mutex);
switch (buf_page_get_state(bpage)) {
case BUF_BLOCK_NOT_USED:
case BUF_BLOCK_READY_FOR_USE:
case BUF_BLOCK_MEMORY:
case BUF_BLOCK_REMOVE_HASH:
case BUF_BLOCK_ZIP_FREE:
ut_error;
break;
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_ZIP_DIRTY:
bpage->buf_fix_count++;
break;
case BUF_BLOCK_FILE_PAGE:
/* Discard the uncompressed page frame if possible. */
if (buf_LRU_free_block(bpage, FALSE, NULL)
== BUF_LRU_FREED) {
mutex_exit(block_mutex);
goto lookup;
}
buf_block_buf_fix_inc((buf_block_t*) bpage,
__FILE__, __LINE__);
break;
}
must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
buf_pool_mutex_exit();
buf_page_set_accessed(bpage, TRUE);
mutex_exit(block_mutex);
buf_block_make_young(bpage);
#ifdef UNIV_DEBUG_FILE_ACCESSES
ut_a(!bpage->file_page_was_freed);
#endif
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(++buf_dbg_counter % 5771 || buf_validate());
ut_a(bpage->buf_fix_count > 0);
ut_a(buf_page_in_file(bpage));
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
if (must_read) {
/* Let us wait until the read operation
completes */
for (;;) {
enum buf_io_fix io_fix;
mutex_enter(block_mutex);
io_fix = buf_page_get_io_fix(bpage);
mutex_exit(block_mutex);
if (io_fix == BUF_IO_READ) {
os_thread_sleep(WAIT_FOR_READ);
} else {
break;
}
}
}
#ifdef UNIV_IBUF_COUNT_DEBUG
ut_a(ibuf_count_get(buf_page_get_space(bpage),
buf_page_get_page_no(bpage)) == 0);
#endif
return(bpage);
}
/************************************************************************
Initialize some fields of a control block. */
UNIV_INLINE
void
buf_block_init_low(
/*===============*/
buf_block_t* block) /* in: block to init */
{
block->check_index_page_at_flush = FALSE;
block->index = NULL;
block->n_hash_helps = 0;
block->is_hashed = FALSE;
block->n_fields = 1;
block->n_bytes = 0;
block->left_side = TRUE;
}
#endif /* !UNIV_HOTBACKUP */
/************************************************************************
Decompress a block. */
UNIV_INTERN
ibool
buf_zip_decompress(
/*===============*/
/* out: TRUE if successful */
buf_block_t* block, /* in/out: block */
ibool check) /* in: TRUE=verify the page checksum */
{
const byte* frame = block->page.zip.data;
ut_ad(buf_block_get_zip_size(block));
ut_a(buf_block_get_space(block) != 0);
if (UNIV_LIKELY(check)) {
ulint stamp_checksum = mach_read_from_4(
frame + FIL_PAGE_SPACE_OR_CHKSUM);
ulint calc_checksum = page_zip_calc_checksum(
frame, page_zip_get_size(&block->page.zip));
if (UNIV_UNLIKELY(stamp_checksum != calc_checksum)) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: compressed page checksum mismatch"
" (space %u page %u): %lu != %lu\n",
block->page.space, block->page.offset,
stamp_checksum, calc_checksum);
return(FALSE);
}
}
switch (fil_page_get_type(frame)) {
case FIL_PAGE_INDEX:
if (page_zip_decompress(&block->page.zip,
block->frame)) {
return(TRUE);
}
fprintf(stderr,
"InnoDB: unable to decompress space %lu page %lu\n",
(ulong) block->page.space,
(ulong) block->page.offset);
return(FALSE);
case FIL_PAGE_TYPE_ALLOCATED:
case FIL_PAGE_INODE:
case FIL_PAGE_IBUF_BITMAP:
case FIL_PAGE_TYPE_FSP_HDR:
case FIL_PAGE_TYPE_XDES:
case FIL_PAGE_TYPE_ZBLOB:
case FIL_PAGE_TYPE_ZBLOB2:
/* Copy to uncompressed storage. */
memcpy(block->frame, frame,
buf_block_get_zip_size(block));
return(TRUE);
}
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: unknown compressed page"
" type %lu\n",
fil_page_get_type(frame));
return(FALSE);
}
#ifndef UNIV_HOTBACKUP
/***********************************************************************
Gets the block to whose frame the pointer is pointing to. */
UNIV_INTERN
buf_block_t*
buf_block_align(
/*============*/
/* out: pointer to block, never NULL */
const byte* ptr) /* in: pointer to a frame */
{
buf_chunk_t* chunk;
ulint i;
/* TODO: protect buf_pool->chunks with a mutex (it will
currently remain constant after buf_pool_init()) */
for (chunk = buf_pool->chunks, i = buf_pool->n_chunks; i--; chunk++) {
lint offs = ptr - chunk->blocks->frame;
if (UNIV_UNLIKELY(offs < 0)) {
continue;
}
offs >>= UNIV_PAGE_SIZE_SHIFT;
if (UNIV_LIKELY((ulint) offs < chunk->size)) {
buf_block_t* block = &chunk->blocks[offs];
/* The function buf_chunk_init() invokes
buf_block_init() so that block[n].frame ==
block->frame + n * UNIV_PAGE_SIZE. Check it. */
ut_ad(block->frame == page_align(ptr));
#ifdef UNIV_DEBUG
/* A thread that updates these fields must
hold buf_pool_mutex and block->mutex. Acquire
only the latter. */
mutex_enter(&block->mutex);
switch (buf_block_get_state(block)) {
case BUF_BLOCK_ZIP_FREE:
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_ZIP_DIRTY:
/* These types should only be used in
the compressed buffer pool, whose
memory is allocated from
buf_pool->chunks, in UNIV_PAGE_SIZE
blocks flagged as BUF_BLOCK_MEMORY. */
ut_error;
break;
case BUF_BLOCK_NOT_USED:
case BUF_BLOCK_READY_FOR_USE:
case BUF_BLOCK_MEMORY:
/* Some data structures contain
"guess" pointers to file pages. The
file pages may have been freed and
reused. Do not complain. */
break;
case BUF_BLOCK_REMOVE_HASH:
/* buf_LRU_block_remove_hashed_page()
will overwrite the FIL_PAGE_OFFSET and
FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID with
0xff and set the state to
BUF_BLOCK_REMOVE_HASH. */
ut_ad(page_get_space_id(page_align(ptr))
== 0xffffffff);
ut_ad(page_get_page_no(page_align(ptr))
== 0xffffffff);
break;
case BUF_BLOCK_FILE_PAGE:
ut_ad(block->page.space
== page_get_space_id(page_align(ptr)));
ut_ad(block->page.offset
== page_get_page_no(page_align(ptr)));
break;
}
mutex_exit(&block->mutex);
#endif /* UNIV_DEBUG */
return(block);
}
}
/* The block should always be found. */
ut_error;
return(NULL);
}
/************************************************************************
Find out if a pointer belongs to a buf_block_t. It can be a pointer to
the buf_block_t itself or a member of it */
UNIV_INTERN
ibool
buf_pointer_is_block_field(
/*=======================*/
/* out: TRUE if ptr belongs
to a buf_block_t struct */
const void* ptr) /* in: pointer not
dereferenced */
{
const buf_chunk_t* chunk = buf_pool->chunks;
const buf_chunk_t* const echunk = chunk + buf_pool->n_chunks;
/* TODO: protect buf_pool->chunks with a mutex (it will
currently remain constant after buf_pool_init()) */
while (chunk < echunk) {
if (ptr >= (void *)chunk->blocks
&& ptr < (void *)(chunk->blocks + chunk->size)) {
return(TRUE);
}
chunk++;
}
return(FALSE);
}
/************************************************************************
Find out if a buffer block was created by buf_chunk_init(). */
static
ibool
buf_block_is_uncompressed(
/*======================*/
/* out: TRUE if "block" has
been added to buf_pool->free
by buf_chunk_init() */
const buf_block_t* block) /* in: pointer to block,
not dereferenced */
{
ut_ad(buf_pool_mutex_own());
if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
/* The pointer should be aligned. */
return(FALSE);
}
return(buf_pointer_is_block_field((void *)block));
}
/************************************************************************
This is the general function used to get access to a database page. */
UNIV_INTERN
buf_block_t*
buf_page_get_gen(
/*=============*/
/* out: pointer to the block or NULL */
ulint space, /* in: space id */
ulint zip_size,/* in: compressed page size in bytes
or 0 for uncompressed pages */
ulint offset, /* in: page number */
ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
buf_block_t* guess, /* in: guessed block or NULL */
ulint mode, /* in: BUF_GET, BUF_GET_IF_IN_POOL,
BUF_GET_NO_LATCH, or
BUF_GET_IF_IN_POOL_OR_WATCH */
const char* file, /* in: file name */
ulint line, /* in: line where called */
mtr_t* mtr) /* in: mini-transaction */
{
buf_block_t* block;
ibool accessed;
ulint fix_type;
ibool must_read;
ut_ad(mtr);
ut_ad((rw_latch == RW_S_LATCH)
|| (rw_latch == RW_X_LATCH)
|| (rw_latch == RW_NO_LATCH));
ut_ad((mode != BUF_GET_NO_LATCH) || (rw_latch == RW_NO_LATCH));
ut_ad(mode == BUF_GET
|| mode == BUF_GET_IF_IN_POOL
|| mode == BUF_GET_NO_LATCH
|| mode == BUF_GET_IF_IN_POOL_OR_WATCH);
ut_ad(zip_size == fil_space_get_zip_size(space));
#ifndef UNIV_LOG_DEBUG
ut_ad(!ibuf_inside() || ibuf_page(space, zip_size, offset, NULL));
#endif
buf_pool->n_page_gets++;
loop:
block = guess;
buf_pool_mutex_enter();
if (block) {
/* If the guess is a compressed page descriptor that
has been allocated by buf_buddy_alloc(), it may have
been invalidated by buf_buddy_relocate(). In that
case, block could point to something that happens to
contain the expected bits in block->page. Similarly,
the guess may be pointing to a buffer pool chunk that
has been released when resizing the buffer pool. */
if (!buf_block_is_uncompressed(block)
|| offset != block->page.offset
|| space != block->page.space
|| buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
block = guess = NULL;
} else {
ut_ad(!block->page.in_zip_hash);
ut_ad(block->page.in_page_hash);
}
}
if (block == NULL) {
block = (buf_block_t*) buf_page_hash_get(space, offset);
}
loop2:
if (block == NULL) {
/* Page not in buf_pool: needs to be read from file */
if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
buf_pool_watch_set(space, offset);
}
buf_pool_mutex_exit();
if (mode == BUF_GET_IF_IN_POOL
|| mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
return(NULL);
}
buf_read_page(space, zip_size, offset);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(++buf_dbg_counter % 37 || buf_validate());
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
goto loop;
}
ut_ad(page_zip_get_size(&block->page.zip) == zip_size);
must_read = buf_block_get_io_fix(block) == BUF_IO_READ;
if (must_read
&& (mode == BUF_GET_IF_IN_POOL
|| mode == BUF_GET_IF_IN_POOL_OR_WATCH)) {
/* The page is being read to buffer pool,
but we cannot wait around for the read to
complete. */
if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
buf_pool_watch_set(space, offset);
}
/* The page is only being read to buffer */
buf_pool_mutex_exit();
return(NULL);
}
switch (buf_block_get_state(block)) {
buf_page_t* bpage;
ibool success;
case BUF_BLOCK_FILE_PAGE:
break;
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_ZIP_DIRTY:
bpage = &block->page;
if (bpage->buf_fix_count
|| buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
/* This condition often occurs when the buffer
is not buffer-fixed, but I/O-fixed by
buf_page_init_for_read(). */
wait_until_unfixed:
/* The block is buffer-fixed or I/O-fixed.
Try again later. */
buf_pool_mutex_exit();
os_thread_sleep(WAIT_FOR_READ);
goto loop;
}
/* Allocate an uncompressed page. */
buf_pool_mutex_exit();
block = buf_LRU_get_free_block(0);
ut_a(block);
buf_pool_mutex_enter();
mutex_enter(&block->mutex);
{
buf_page_t* hash_bpage
= buf_page_hash_get(space, offset);
if (UNIV_UNLIKELY(bpage != hash_bpage)) {
/* The buf_pool->page_hash was modified
while buf_pool_mutex was released.
Free the block that was allocated. */
buf_LRU_block_free_non_file_page(block);
mutex_exit(&block->mutex);
block = (buf_block_t*) hash_bpage;
goto loop2;
}
}
if (UNIV_UNLIKELY
(bpage->buf_fix_count
|| buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
/* The block was buffer-fixed or I/O-fixed
while buf_pool_mutex was not held by this thread.
Free the block that was allocated and try again.
This should be extremely unlikely. */
buf_LRU_block_free_non_file_page(block);
mutex_exit(&block->mutex);
goto wait_until_unfixed;
}
/* Move the compressed page from bpage to block,
and uncompress it. */
mutex_enter(&buf_pool_zip_mutex);
buf_relocate(bpage, &block->page);
buf_block_init_low(block);
block->lock_hash_val = lock_rec_hash(space, offset);
UNIV_MEM_DESC(&block->page.zip.data,
page_zip_get_size(&block->page.zip), block);
if (buf_page_get_state(&block->page)
== BUF_BLOCK_ZIP_PAGE) {
UT_LIST_REMOVE(list, buf_pool->zip_clean,
&block->page);
ut_ad(!block->page.in_flush_list);
} else {
/* Relocate buf_pool->flush_list. */
buf_flush_relocate_on_flush_list(bpage,
&block->page);
}
/* Buffer-fix, I/O-fix, and X-latch the block
for the duration of the decompression.
Also add the block to the unzip_LRU list. */
block->page.state = BUF_BLOCK_FILE_PAGE;
/* Insert at the front of unzip_LRU list */
buf_unzip_LRU_add_block(block, FALSE);
block->page.buf_fix_count = 1;
buf_block_set_io_fix(block, BUF_IO_READ);
buf_pool->n_pend_unzip++;
rw_lock_x_lock(&block->lock);
UNIV_MEM_INVALID(bpage, sizeof *bpage);
mutex_exit(&block->mutex);
mutex_exit(&buf_pool_zip_mutex);
buf_buddy_free(bpage, sizeof *bpage);
buf_pool_mutex_exit();
/* Decompress the page and apply buffered operations
while not holding buf_pool_mutex or block->mutex. */
success = buf_zip_decompress(block, srv_use_checksums);
if (UNIV_LIKELY(success)) {
ibuf_merge_or_delete_for_page(block, space, offset,
zip_size, TRUE);
}
/* Unfix and unlatch the block. */
buf_pool_mutex_enter();
mutex_enter(&block->mutex);
buf_pool->n_pend_unzip--;
block->page.buf_fix_count--;
buf_block_set_io_fix(block, BUF_IO_NONE);
mutex_exit(&block->mutex);
rw_lock_x_unlock(&block->lock);
if (UNIV_UNLIKELY(!success)) {
buf_pool_mutex_exit();
return(NULL);
}
break;
case BUF_BLOCK_ZIP_FREE:
case BUF_BLOCK_NOT_USED:
case BUF_BLOCK_READY_FOR_USE:
case BUF_BLOCK_MEMORY:
case BUF_BLOCK_REMOVE_HASH:
ut_error;
break;
}
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
mutex_enter(&block->mutex);
UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page);
buf_block_buf_fix_inc(block, file, line);
buf_pool_mutex_exit();
/* Check if this is the first access to the page */
accessed = buf_page_is_accessed(&block->page);
buf_page_set_accessed(&block->page, TRUE);
mutex_exit(&block->mutex);
buf_block_make_young(&block->page);
#ifdef UNIV_DEBUG_FILE_ACCESSES
ut_a(!block->page.file_page_was_freed);
#endif
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(++buf_dbg_counter % 5771 || buf_validate());
ut_a(block->page.buf_fix_count > 0);
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
switch (rw_latch) {
case RW_NO_LATCH:
if (must_read) {
/* Let us wait until the read operation
completes */
for (;;) {
enum buf_io_fix io_fix;
mutex_enter(&block->mutex);
io_fix = buf_block_get_io_fix(block);
mutex_exit(&block->mutex);
if (io_fix == BUF_IO_READ) {
os_thread_sleep(WAIT_FOR_READ);
} else {
break;
}
}
}
fix_type = MTR_MEMO_BUF_FIX;
break;
case RW_S_LATCH:
rw_lock_s_lock_func(&(block->lock), 0, file, line);
fix_type = MTR_MEMO_PAGE_S_FIX;
break;
default:
ut_ad(rw_latch == RW_X_LATCH);
rw_lock_x_lock_func(&(block->lock), 0, file, line);
fix_type = MTR_MEMO_PAGE_X_FIX;
break;
}
mtr_memo_push(mtr, block, fix_type);
if (!accessed) {
/* In the case of a first access, try to apply linear
read-ahead */
buf_read_ahead_linear(space, zip_size, offset);
}
#ifdef UNIV_IBUF_COUNT_DEBUG
ut_a(ibuf_count_get(buf_block_get_space(block),
buf_block_get_page_no(block)) == 0);
#endif
return(block);
}
/************************************************************************
This is the general function used to get optimistic access to a database
page. */
UNIV_INTERN
ibool
buf_page_optimistic_get_func(
/*=========================*/
/* out: TRUE if success */
ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
buf_block_t* block, /* in: guessed buffer block */
ib_uint64_t modify_clock,/* in: modify clock value if mode is
..._GUESS_ON_CLOCK */
const char* file, /* in: file name */
ulint line, /* in: line where called */
mtr_t* mtr) /* in: mini-transaction */
{
ibool accessed;
ibool success;
ulint fix_type;
ut_ad(mtr && block);
ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
mutex_enter(&block->mutex);
if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) {
mutex_exit(&block->mutex);
return(FALSE);
}
buf_block_buf_fix_inc(block, file, line);
accessed = buf_page_is_accessed(&block->page);
buf_page_set_accessed(&block->page, TRUE);
mutex_exit(&block->mutex);
buf_block_make_young(&block->page);
/* Check if this is the first access to the page */
ut_ad(!ibuf_inside()
|| ibuf_page(buf_block_get_space(block),
buf_block_get_zip_size(block),
buf_block_get_page_no(block), NULL));
if (rw_latch == RW_S_LATCH) {
success = rw_lock_s_lock_nowait(&(block->lock),
file, line);
fix_type = MTR_MEMO_PAGE_S_FIX;
} else {
success = rw_lock_x_lock_func_nowait(&(block->lock),
file, line);
fix_type = MTR_MEMO_PAGE_X_FIX;
}
if (UNIV_UNLIKELY(!success)) {
mutex_enter(&block->mutex);
buf_block_buf_fix_dec(block);
mutex_exit(&block->mutex);
return(FALSE);
}
if (UNIV_UNLIKELY(modify_clock != block->modify_clock)) {
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
if (rw_latch == RW_S_LATCH) {
rw_lock_s_unlock(&(block->lock));
} else {
rw_lock_x_unlock(&(block->lock));
}
mutex_enter(&block->mutex);
buf_block_buf_fix_dec(block);
mutex_exit(&block->mutex);
return(FALSE);
}
mtr_memo_push(mtr, block, fix_type);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(++buf_dbg_counter % 5771 || buf_validate());
ut_a(block->page.buf_fix_count > 0);
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
#ifdef UNIV_DEBUG_FILE_ACCESSES
ut_a(block->page.file_page_was_freed == FALSE);
#endif
if (UNIV_UNLIKELY(!accessed)) {
/* In the case of a first access, try to apply linear
read-ahead */
buf_read_ahead_linear(buf_block_get_space(block),
buf_block_get_zip_size(block),
buf_block_get_page_no(block));
}
#ifdef UNIV_IBUF_COUNT_DEBUG
ut_a(ibuf_count_get(buf_block_get_space(block),
buf_block_get_page_no(block)) == 0);
#endif
buf_pool->n_page_gets++;
return(TRUE);
}
/************************************************************************
This is used to get access to a known database page, when no waiting can be
done. For example, if a search in an adaptive hash index leads us to this
frame. */
UNIV_INTERN
ibool
buf_page_get_known_nowait(
/*======================*/
/* out: TRUE if success */
ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
buf_block_t* block, /* in: the known page */
ulint mode, /* in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
const char* file, /* in: file name */
ulint line, /* in: line where called */
mtr_t* mtr) /* in: mini-transaction */
{
ibool success;
ulint fix_type;
ut_ad(mtr);
ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
mutex_enter(&block->mutex);
if (buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH) {
/* Another thread is just freeing the block from the LRU list
of the buffer pool: do not try to access this page; this
attempt to access the page can only come through the hash
index because when the buffer block state is ..._REMOVE_HASH,
we have already removed it from the page address hash table
of the buffer pool. */
mutex_exit(&block->mutex);
return(FALSE);
}
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
buf_block_buf_fix_inc(block, file, line);
mutex_exit(&block->mutex);
if (mode == BUF_MAKE_YOUNG) {
buf_block_make_young(&block->page);
}
ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
if (rw_latch == RW_S_LATCH) {
success = rw_lock_s_lock_nowait(&(block->lock),
file, line);
fix_type = MTR_MEMO_PAGE_S_FIX;
} else {
success = rw_lock_x_lock_func_nowait(&(block->lock),
file, line);
fix_type = MTR_MEMO_PAGE_X_FIX;
}
if (!success) {
mutex_enter(&block->mutex);
buf_block_buf_fix_dec(block);
mutex_exit(&block->mutex);
return(FALSE);
}
mtr_memo_push(mtr, block, fix_type);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(++buf_dbg_counter % 5771 || buf_validate());
ut_a(block->page.buf_fix_count > 0);
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
#ifdef UNIV_DEBUG_FILE_ACCESSES
ut_a(block->page.file_page_was_freed == FALSE);
#endif
#ifdef UNIV_IBUF_COUNT_DEBUG
ut_a((mode == BUF_KEEP_OLD)
|| (ibuf_count_get(buf_block_get_space(block),
buf_block_get_page_no(block)) == 0));
#endif
buf_pool->n_page_gets++;
return(TRUE);
}
/***********************************************************************
Given a tablespace id and page number tries to get that page. If the
page is not in the buffer pool it is not loaded and NULL is returned.
Suitable for using when holding the kernel mutex. */
UNIV_INTERN
const buf_block_t*
buf_page_try_get_func(
/*==================*/
/* out: pointer to a page or NULL */
ulint space_id,/* in: tablespace id */
ulint page_no,/* in: page number */
const char* file, /* in: file name */
ulint line, /* in: line where called */
mtr_t* mtr) /* in: mini-transaction */
{
buf_block_t* block;
ibool success;
ulint fix_type;
buf_pool_mutex_enter();
block = buf_block_hash_get(space_id, page_no);
if (!block) {
buf_pool_mutex_exit();
return(NULL);
}
mutex_enter(&block->mutex);
buf_pool_mutex_exit();
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_a(buf_block_get_space(block) == space_id);
ut_a(buf_block_get_page_no(block) == page_no);
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
buf_block_buf_fix_inc(block, file, line);
mutex_exit(&block->mutex);
fix_type = MTR_MEMO_PAGE_S_FIX;
success = rw_lock_s_lock_nowait(&block->lock, file, line);
if (!success) {
/* Let us try to get an X-latch. If the current thread
is holding an X-latch on the page, we cannot get an
S-latch. */
fix_type = MTR_MEMO_PAGE_X_FIX;
success = rw_lock_x_lock_func_nowait(&block->lock,
file, line);
}
if (!success) {
mutex_enter(&block->mutex);
buf_block_buf_fix_dec(block);
mutex_exit(&block->mutex);
return(NULL);
}
mtr_memo_push(mtr, block, fix_type);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(++buf_dbg_counter % 5771 || buf_validate());
ut_a(block->page.buf_fix_count > 0);
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
#ifdef UNIV_DEBUG_FILE_ACCESSES
ut_a(block->page.file_page_was_freed == FALSE);
#endif /* UNIV_DEBUG_FILE_ACCESSES */
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
buf_pool->n_page_gets++;
#ifdef UNIV_IBUF_COUNT_DEBUG
ut_a(ibuf_count_get(buf_block_get_space(block),
buf_block_get_page_no(block)) == 0);
#endif
return(block);
}
/************************************************************************
Initialize some fields of a control block. */
UNIV_INLINE
void
buf_page_init_low(
/*==============*/
buf_page_t* bpage) /* in: block to init */
{
bpage->flush_type = BUF_FLUSH_LRU;
bpage->accessed = FALSE;
bpage->io_fix = BUF_IO_NONE;
bpage->buf_fix_count = 0;
bpage->freed_page_clock = 0;
bpage->newest_modification = 0;
bpage->oldest_modification = 0;
HASH_INVALIDATE(bpage, hash);
#ifdef UNIV_DEBUG_FILE_ACCESSES
bpage->file_page_was_freed = FALSE;
#endif /* UNIV_DEBUG_FILE_ACCESSES */
}
/************************************************************************
Set watch occurred flag. */
UNIV_INTERN
void
buf_pool_watch_notify(
/*==================*/
ulint space, /* in: space id of page read in */
ulint offset) /* in: offset of page read in */
{
ut_ad(buf_pool_mutex_own());
if (buf_pool->watch_active
&& space == buf_pool->watch_space
&& offset == buf_pool->watch_page_no) {
buf_pool->watch_occurred = TRUE;
}
}
/************************************************************************
Inits a page to the buffer buf_pool. */
static
void
buf_page_init(
/*==========*/
ulint space, /* in: space id */
ulint offset, /* in: offset of the page within space
in units of a page */
buf_block_t* block) /* in: block to init */
{
buf_page_t* hash_page;
ut_ad(buf_pool_mutex_own());
ut_ad(mutex_own(&(block->mutex)));
ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
/* Set the state of the block */
buf_block_set_file_page(block, space, offset);
#ifdef UNIV_DEBUG_VALGRIND
if (!space) {
/* Silence valid Valgrind warnings about uninitialized
data being written to data files. There are some unused
bytes on some pages that InnoDB does not initialize. */
UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE);
}
#endif /* UNIV_DEBUG_VALGRIND */
buf_block_init_low(block);
block->lock_hash_val = lock_rec_hash(space, offset);
/* Insert into the hash table of file pages */
hash_page = buf_page_hash_get(space, offset);
if (UNIV_LIKELY_NULL(hash_page)) {
fprintf(stderr,
"InnoDB: Error: page %lu %lu already found"
" in the hash table: %p, %p\n",
(ulong) space,
(ulong) offset,
(const void*) hash_page, (const void*) block);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
mutex_exit(&block->mutex);
buf_pool_mutex_exit();
buf_print();
buf_LRU_print();
buf_validate();
buf_LRU_validate();
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
ut_error;
}
buf_page_init_low(&block->page);
buf_pool_watch_notify(space, offset);
ut_ad(!block->page.in_zip_hash);
ut_ad(!block->page.in_page_hash);
ut_d(block->page.in_page_hash = TRUE);
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
buf_page_address_fold(space, offset), &block->page);
}
/************************************************************************
Function which inits a page for read to the buffer buf_pool. If the page is
(1) already in buf_pool, or
(2) if we specify to read only ibuf pages and the page is not an ibuf page, or
(3) if the space is deleted or being deleted,
then this function does nothing.
Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
on the buffer frame. The io-handler must take care that the flag is cleared
and the lock released later. */
UNIV_INTERN
buf_page_t*
buf_page_init_for_read(
/*===================*/
/* out: pointer to the block or NULL */
ulint* err, /* out: DB_SUCCESS or DB_TABLESPACE_DELETED */
ulint mode, /* in: BUF_READ_IBUF_PAGES_ONLY, ... */
ulint space, /* in: space id */
ulint zip_size,/* in: compressed page size, or 0 */
ibool unzip, /* in: TRUE=request uncompressed page */
ib_int64_t tablespace_version,/* in: prevents reading from a wrong
version of the tablespace in case we have done
DISCARD + IMPORT */
ulint offset) /* in: page number */
{
buf_block_t* block;
buf_page_t* bpage;
mtr_t mtr;
ibool lru = FALSE;
void* data;
ut_ad(buf_pool);
*err = DB_SUCCESS;
if (mode == BUF_READ_IBUF_PAGES_ONLY) {
/* It is a read-ahead within an ibuf routine */
ut_ad(!ibuf_bitmap_page(zip_size, offset));
ut_ad(ibuf_inside());
mtr_start(&mtr);
if (!recv_no_ibuf_operations
&& !ibuf_page(space, zip_size, offset, &mtr)) {
mtr_commit(&mtr);
return(NULL);
}
} else {
ut_ad(mode == BUF_READ_ANY_PAGE);
}
if (zip_size && UNIV_LIKELY(!unzip)
&& UNIV_LIKELY(!recv_recovery_is_on())) {
block = NULL;
} else {
block = buf_LRU_get_free_block(0);
ut_ad(block);
}
buf_pool_mutex_enter();
if (buf_page_hash_get(space, offset)) {
/* The page is already in the buffer pool. */
err_exit:
if (block) {
mutex_enter(&block->mutex);
buf_LRU_block_free_non_file_page(block);
mutex_exit(&block->mutex);
}
bpage = NULL;
goto func_exit;
}
if (fil_tablespace_deleted_or_being_deleted_in_mem(
space, tablespace_version)) {
/* The page belongs to a space which has been
deleted or is being deleted. */
*err = DB_TABLESPACE_DELETED;
goto err_exit;
}
if (block) {
bpage = &block->page;
mutex_enter(&block->mutex);
buf_page_init(space, offset, block);
/* The block must be put to the LRU list, to the old blocks */
buf_LRU_add_block(bpage, TRUE/* to old blocks */);
/* We set a pass-type x-lock on the frame because then
the same thread which called for the read operation
(and is running now at this point of code) can wait
for the read to complete by waiting for the x-lock on
the frame; if the x-lock were recursive, the same
thread would illegally get the x-lock before the page
read is completed. The x-lock is cleared by the
io-handler thread. */
rw_lock_x_lock_gen(&block->lock, BUF_IO_READ);
buf_page_set_io_fix(bpage, BUF_IO_READ);
if (UNIV_UNLIKELY(zip_size)) {
page_zip_set_size(&block->page.zip, zip_size);
/* buf_pool_mutex may be released and
reacquired by buf_buddy_alloc(). Thus, we
must release block->mutex in order not to
break the latching order in the reacquisition
of buf_pool_mutex. We also must defer this
operation until after the block descriptor has
been added to buf_pool->LRU and
buf_pool->page_hash. */
mutex_exit(&block->mutex);
data = buf_buddy_alloc(zip_size, &lru);
mutex_enter(&block->mutex);
block->page.zip.data = data;
/* To maintain the invariant
block->in_unzip_LRU_list
== buf_page_belongs_to_unzip_LRU(&block->page)
we have to add this block to unzip_LRU
after block->page.zip.data is set. */
ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
buf_unzip_LRU_add_block(block, TRUE);
}
mutex_exit(&block->mutex);
} else {
/* Defer buf_buddy_alloc() until after the block has
been found not to exist. The buf_buddy_alloc() and
buf_buddy_free() calls may be expensive because of
buf_buddy_relocate(). */
/* The compressed page must be allocated before the
control block (bpage), in order to avoid the
invocation of buf_buddy_relocate_block() on
uninitialized data. */
data = buf_buddy_alloc(zip_size, &lru);
bpage = buf_buddy_alloc(sizeof *bpage, &lru);
/* If buf_buddy_alloc() allocated storage from the LRU list,
it released and reacquired buf_pool_mutex. Thus, we must
check the page_hash again, as it may have been modified. */
if (UNIV_UNLIKELY(lru)
&& UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) {
/* The block was added by some other thread. */
buf_buddy_free(bpage, sizeof *bpage);
buf_buddy_free(data, zip_size);
bpage = NULL;
goto func_exit;
}
page_zip_des_init(&bpage->zip);
page_zip_set_size(&bpage->zip, zip_size);
bpage->zip.data = data;
mutex_enter(&buf_pool_zip_mutex);
UNIV_MEM_DESC(bpage->zip.data,
page_zip_get_size(&bpage->zip), bpage);
buf_page_init_low(bpage);
buf_pool_watch_notify(space, offset);
bpage->state = BUF_BLOCK_ZIP_PAGE;
bpage->space = space;
bpage->offset = offset;
#ifdef UNIV_DEBUG
bpage->in_page_hash = FALSE;
bpage->in_zip_hash = FALSE;
bpage->in_flush_list = FALSE;
bpage->in_free_list = FALSE;
bpage->in_LRU_list = FALSE;
#endif /* UNIV_DEBUG */
ut_d(bpage->in_page_hash = TRUE);
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
buf_page_address_fold(space, offset), bpage);
/* The block must be put to the LRU list, to the old blocks */
buf_LRU_add_block(bpage, TRUE/* to old blocks */);
buf_LRU_insert_zip_clean(bpage);
buf_page_set_io_fix(bpage, BUF_IO_READ);
mutex_exit(&buf_pool_zip_mutex);
}
buf_pool->n_pend_reads++;
func_exit:
buf_pool_mutex_exit();
if (mode == BUF_READ_IBUF_PAGES_ONLY) {
mtr_commit(&mtr);
}
ut_ad(!bpage || buf_page_in_file(bpage));
return(bpage);
}
/************************************************************************
Initializes a page to the buffer buf_pool. The page is usually not read
from a file even if it cannot be found in the buffer buf_pool. This is one
of the functions which perform to a block a state transition NOT_USED =>
FILE_PAGE (the other is buf_page_get_gen). */
UNIV_INTERN
buf_block_t*
buf_page_create(
/*============*/
/* out: pointer to the block, page bufferfixed */
ulint space, /* in: space id */
ulint offset, /* in: offset of the page within space in units of
a page */
ulint zip_size,/* in: compressed page size, or 0 */
mtr_t* mtr) /* in: mini-transaction handle */
{
buf_frame_t* frame;
buf_block_t* block;
buf_block_t* free_block = NULL;
ut_ad(mtr);
ut_ad(space || !zip_size);
free_block = buf_LRU_get_free_block(0);
buf_pool_mutex_enter();
block = (buf_block_t*) buf_page_hash_get(space, offset);
if (block && buf_page_in_file(&block->page)) {
#ifdef UNIV_IBUF_COUNT_DEBUG
ut_a(ibuf_count_get(space, offset) == 0);
#endif
#ifdef UNIV_DEBUG_FILE_ACCESSES
block->page.file_page_was_freed = FALSE;
#endif /* UNIV_DEBUG_FILE_ACCESSES */
/* Page can be found in buf_pool */
buf_pool_mutex_exit();
buf_block_free(free_block);
return(buf_page_get_with_no_latch(space, zip_size,
offset, mtr));
}
/* If we get here, the page was not in buf_pool: init it there */
#ifdef UNIV_DEBUG
if (buf_debug_prints) {
fprintf(stderr, "Creating space %lu page %lu to buffer\n",
(ulong) space, (ulong) offset);
}
#endif /* UNIV_DEBUG */
block = free_block;
mutex_enter(&block->mutex);
buf_page_init(space, offset, block);
/* The block must be put to the LRU list */
buf_LRU_add_block(&block->page, FALSE);
buf_block_buf_fix_inc(block, __FILE__, __LINE__);
buf_pool->n_pages_created++;
if (zip_size) {
void* data;
ibool lru;
/* Prevent race conditions during buf_buddy_alloc(),
which may release and reacquire buf_pool_mutex,
by IO-fixing and X-latching the block. */
buf_page_set_io_fix(&block->page, BUF_IO_READ);
rw_lock_x_lock(&block->lock);
page_zip_set_size(&block->page.zip, zip_size);
mutex_exit(&block->mutex);
/* buf_pool_mutex may be released and reacquired by
buf_buddy_alloc(). Thus, we must release block->mutex
in order not to break the latching order in
the reacquisition of buf_pool_mutex. We also must
defer this operation until after the block descriptor
has been added to buf_pool->LRU and buf_pool->page_hash. */
data = buf_buddy_alloc(zip_size, &lru);
mutex_enter(&block->mutex);
block->page.zip.data = data;
/* To maintain the invariant
block->in_unzip_LRU_list
== buf_page_belongs_to_unzip_LRU(&block->page)
we have to add this block to unzip_LRU after
block->page.zip.data is set. */
ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
buf_unzip_LRU_add_block(block, FALSE);
buf_page_set_io_fix(&block->page, BUF_IO_NONE);
rw_lock_x_unlock(&block->lock);
}
buf_pool_mutex_exit();
mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
buf_page_set_accessed(&block->page, TRUE);
mutex_exit(&block->mutex);
/* Delete possible entries for the page from the insert buffer:
such can exist if the page belonged to an index which was dropped */
ibuf_merge_or_delete_for_page(NULL, space, offset, zip_size, TRUE);
/* Flush pages from the end of the LRU list if necessary */
buf_flush_free_margin();
frame = block->frame;
memset(frame + FIL_PAGE_PREV, 0xff, 4);
memset(frame + FIL_PAGE_NEXT, 0xff, 4);
mach_write_to_2(frame + FIL_PAGE_TYPE, FIL_PAGE_TYPE_ALLOCATED);
/* Reset to zero the file flush lsn field in the page; if the first
page of an ibdata file is 'created' in this function into the buffer
pool then we lose the original contents of the file flush lsn stamp.
Then InnoDB could in a crash recovery print a big, false, corruption
warning if the stamp contains an lsn bigger than the ib_logfile lsn. */
memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(++buf_dbg_counter % 357 || buf_validate());
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
#ifdef UNIV_IBUF_COUNT_DEBUG
ut_a(ibuf_count_get(buf_block_get_space(block),
buf_block_get_page_no(block)) == 0);
#endif
return(block);
}
/************************************************************************
Completes an asynchronous read or write request of a file page to or from
the buffer pool. */
UNIV_INTERN
void
buf_page_io_complete(
/*=================*/
buf_page_t* bpage) /* in: pointer to the block in question */
{
enum buf_io_fix io_type;
const ibool uncompressed = (buf_page_get_state(bpage)
== BUF_BLOCK_FILE_PAGE);
ut_a(buf_page_in_file(bpage));
/* We do not need protect io_fix here by mutex to read
it because this is the only function where we can change the value
from BUF_IO_READ or BUF_IO_WRITE to some other value, and our code
ensures that this is the only thread that handles the i/o for this
block. */
io_type = buf_page_get_io_fix(bpage);
ut_ad(io_type == BUF_IO_READ || io_type == BUF_IO_WRITE);
if (io_type == BUF_IO_READ) {
ulint read_page_no;
ulint read_space_id;
byte* frame;
if (buf_page_get_zip_size(bpage)) {
frame = bpage->zip.data;
buf_pool->n_pend_unzip++;
if (uncompressed
&& !buf_zip_decompress((buf_block_t*) bpage,
FALSE)) {
buf_pool->n_pend_unzip--;
goto corrupt;
}
buf_pool->n_pend_unzip--;
} else {
ut_a(uncompressed);
frame = ((buf_block_t*) bpage)->frame;
}
/* If this page is not uninitialized and not in the
doublewrite buffer, then the page number and space id
should be the same as in block. */
read_page_no = mach_read_from_4(frame + FIL_PAGE_OFFSET);
read_space_id = mach_read_from_4(
frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
if (bpage->space == TRX_SYS_SPACE
&& trx_doublewrite_page_inside(bpage->offset)) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Error: reading page %lu\n"
"InnoDB: which is in the"
" doublewrite buffer!\n",
(ulong) bpage->offset);
} else if (!read_space_id && !read_page_no) {
/* This is likely an uninitialized page. */
} else if ((bpage->space
&& bpage->space != read_space_id)
|| bpage->offset != read_page_no) {
/* We did not compare space_id to read_space_id
if bpage->space == 0, because the field on the
page may contain garbage in MySQL < 4.1.1,
which only supported bpage->space == 0. */
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Error: space id and page n:o"
" stored in the page\n"
"InnoDB: read in are %lu:%lu,"
" should be %lu:%lu!\n",
(ulong) read_space_id, (ulong) read_page_no,
(ulong) bpage->space,
(ulong) bpage->offset);
}
/* From version 3.23.38 up we store the page checksum
to the 4 first bytes of the page end lsn field */
if (buf_page_is_corrupted(frame,
buf_page_get_zip_size(bpage))) {
corrupt:
fprintf(stderr,
"InnoDB: Database page corruption on disk"
" or a failed\n"
"InnoDB: file read of page %lu.\n"
"InnoDB: You may have to recover"
" from a backup.\n",
(ulong) bpage->offset);
buf_page_print(frame, buf_page_get_zip_size(bpage));
fprintf(stderr,
"InnoDB: Database page corruption on disk"
" or a failed\n"
"InnoDB: file read of page %lu.\n"
"InnoDB: You may have to recover"
" from a backup.\n",
(ulong) bpage->offset);
fputs("InnoDB: It is also possible that"
" your operating\n"
"InnoDB: system has corrupted its"
" own file cache\n"
"InnoDB: and rebooting your computer"
" removes the\n"
"InnoDB: error.\n"
"InnoDB: If the corrupt page is an index page\n"
"InnoDB: you can also try to"
" fix the corruption\n"
"InnoDB: by dumping, dropping,"
" and reimporting\n"
"InnoDB: the corrupt table."
" You can use CHECK\n"
"InnoDB: TABLE to scan your"
" table for corruption.\n"
"InnoDB: See also "
REFMAN "forcing-recovery.html\n"
"InnoDB: about forcing recovery.\n", stderr);
if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
fputs("InnoDB: Ending processing because of"
" a corrupt database page.\n",
stderr);
exit(1);
}
}
if (recv_recovery_is_on()) {
/* Pages must be uncompressed for crash recovery. */
ut_a(uncompressed);
recv_recover_page(TRUE, (buf_block_t*) bpage);
}
if (uncompressed && !recv_no_ibuf_operations) {
ibuf_merge_or_delete_for_page(
(buf_block_t*) bpage, bpage->space,
bpage->offset, buf_page_get_zip_size(bpage),
TRUE);
}
}
buf_pool_mutex_enter();
mutex_enter(buf_page_get_mutex(bpage));
#ifdef UNIV_IBUF_COUNT_DEBUG
if (io_type == BUF_IO_WRITE || uncompressed) {
/* For BUF_IO_READ of compressed-only blocks, the
buffered operations will be merged by buf_page_get_gen()
after the block has been uncompressed. */
ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
}
#endif
/* Because this thread which does the unlocking is not the same that
did the locking, we use a pass value != 0 in unlock, which simply
removes the newest lock debug record, without checking the thread
id. */
buf_page_set_io_fix(bpage, BUF_IO_NONE);
switch (io_type) {
case BUF_IO_READ:
/* NOTE that the call to ibuf may have moved the ownership of
the x-latch to this OS thread: do not let this confuse you in
debugging! */
ut_ad(buf_pool->n_pend_reads > 0);
buf_pool->n_pend_reads--;
buf_pool->n_pages_read++;
if (uncompressed) {
rw_lock_x_unlock_gen(&((buf_block_t*) bpage)->lock,
BUF_IO_READ);
}
break;
case BUF_IO_WRITE:
/* Write means a flush operation: call the completion
routine in the flush system */
buf_flush_write_complete(bpage);
if (uncompressed) {
rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
BUF_IO_WRITE);
}
buf_pool->n_pages_written++;
break;
default:
ut_error;
}
#ifdef UNIV_DEBUG
if (buf_debug_prints) {
fprintf(stderr, "Has %s page space %lu page no %lu\n",
io_type == BUF_IO_READ ? "read" : "written",
(ulong) buf_page_get_space(bpage),
(ulong) buf_page_get_page_no(bpage));
}
#endif /* UNIV_DEBUG */
mutex_exit(buf_page_get_mutex(bpage));
buf_pool_mutex_exit();
}
/*************************************************************************
Invalidates the file pages in the buffer pool when an archive recovery is
completed. All the file pages buffered must be in a replaceable state when
this function is called: not latched and not modified. */
UNIV_INTERN
void
buf_pool_invalidate(void)
/*=====================*/
{
ibool freed;
ut_ad(buf_all_freed());
freed = TRUE;
while (freed) {
freed = buf_LRU_search_and_free_block(100);
}
buf_pool_mutex_enter();
ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
buf_pool_mutex_exit();
}
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/*************************************************************************
Validates the buffer buf_pool data structure. */
UNIV_INTERN
ibool
buf_validate(void)
/*==============*/
/* out: TRUE */
{
buf_page_t* b;
buf_chunk_t* chunk;
ulint i;
ulint n_single_flush = 0;
ulint n_lru_flush = 0;
ulint n_list_flush = 0;
ulint n_lru = 0;
ulint n_flush = 0;
ulint n_free = 0;
ulint n_zip = 0;
ut_ad(buf_pool);
buf_pool_mutex_enter();
chunk = buf_pool->chunks;
/* Check the uncompressed blocks. */
for (i = buf_pool->n_chunks; i--; chunk++) {
ulint j;
buf_block_t* block = chunk->blocks;
for (j = chunk->size; j--; block++) {
mutex_enter(&block->mutex);
switch (buf_block_get_state(block)) {
case BUF_BLOCK_ZIP_FREE:
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_ZIP_DIRTY:
/* These should only occur on
zip_clean, zip_free[], or flush_list. */
ut_error;
break;
case BUF_BLOCK_FILE_PAGE:
ut_a(buf_page_hash_get(buf_block_get_space(
block),
buf_block_get_page_no(
block))
== &block->page);
#ifdef UNIV_IBUF_COUNT_DEBUG
ut_a(buf_page_get_io_fix(&block->page)
== BUF_IO_READ
|| !ibuf_count_get(buf_block_get_space(
block),
buf_block_get_page_no(
block)));
#endif
switch (buf_page_get_io_fix(&block->page)) {
case BUF_IO_NONE:
break;
case BUF_IO_WRITE:
switch (buf_page_get_flush_type(
&block->page)) {
case BUF_FLUSH_LRU:
n_lru_flush++;
ut_a(rw_lock_is_locked(
&block->lock,
RW_LOCK_SHARED));
break;
case BUF_FLUSH_LIST:
n_list_flush++;
break;
case BUF_FLUSH_SINGLE_PAGE:
n_single_flush++;
break;
default:
ut_error;
}
break;
case BUF_IO_READ:
ut_a(rw_lock_is_locked(&block->lock,
RW_LOCK_EX));
break;
}
n_lru++;
if (block->page.oldest_modification > 0) {
n_flush++;
}
break;
case BUF_BLOCK_NOT_USED:
n_free++;
break;
case BUF_BLOCK_READY_FOR_USE:
case BUF_BLOCK_MEMORY:
case BUF_BLOCK_REMOVE_HASH:
/* do nothing */
break;
}
mutex_exit(&block->mutex);
}
}
mutex_enter(&buf_pool_zip_mutex);
/* Check clean compressed-only blocks. */
for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
b = UT_LIST_GET_NEXT(list, b)) {
ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
switch (buf_page_get_io_fix(b)) {
case BUF_IO_NONE:
/* All clean blocks should be I/O-unfixed. */
break;
case BUF_IO_READ:
/* In buf_LRU_free_block(), we temporarily set
b->io_fix = BUF_IO_READ for a newly allocated
control block in order to prevent
buf_page_get_gen() from decompressing the block. */
break;
default:
ut_error;
break;
}
ut_a(!b->oldest_modification);
ut_a(buf_page_hash_get(b->space, b->offset) == b);
n_lru++;
n_zip++;
}
/* Check dirty compressed-only blocks. */
for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
b = UT_LIST_GET_NEXT(list, b)) {
ut_ad(b->in_flush_list);
switch (buf_page_get_state(b)) {
case BUF_BLOCK_ZIP_DIRTY:
ut_a(b->oldest_modification);
n_lru++;
n_flush++;
n_zip++;
switch (buf_page_get_io_fix(b)) {
case BUF_IO_NONE:
case BUF_IO_READ:
break;
case BUF_IO_WRITE:
switch (buf_page_get_flush_type(b)) {
case BUF_FLUSH_LRU:
n_lru_flush++;
break;
case BUF_FLUSH_LIST:
n_list_flush++;
break;
case BUF_FLUSH_SINGLE_PAGE:
n_single_flush++;
break;
default:
ut_error;
}
break;
}
break;
case BUF_BLOCK_FILE_PAGE:
/* uncompressed page */
break;
case BUF_BLOCK_ZIP_FREE:
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_NOT_USED:
case BUF_BLOCK_READY_FOR_USE:
case BUF_BLOCK_MEMORY:
case BUF_BLOCK_REMOVE_HASH:
ut_error;
break;
}
ut_a(buf_page_hash_get(b->space, b->offset) == b);
}
mutex_exit(&buf_pool_zip_mutex);
if (n_lru + n_free > buf_pool->curr_size + n_zip) {
fprintf(stderr, "n LRU %lu, n free %lu, pool %lu zip %lu\n",
(ulong) n_lru, (ulong) n_free,
(ulong) buf_pool->curr_size, (ulong) n_zip);
ut_error;
}
ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
fprintf(stderr, "Free list len %lu, free blocks %lu\n",
(ulong) UT_LIST_GET_LEN(buf_pool->free),
(ulong) n_free);
ut_error;
}
ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
buf_pool_mutex_exit();
ut_a(buf_LRU_validate());
ut_a(buf_flush_validate());
return(TRUE);
}
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/*************************************************************************
Prints info of the buffer buf_pool data structure. */
UNIV_INTERN
void
buf_print(void)
/*===========*/
{
dulint* index_ids;
ulint* counts;
ulint size;
ulint i;
ulint j;
dulint id;
ulint n_found;
buf_chunk_t* chunk;
dict_index_t* index;
ut_ad(buf_pool);
size = buf_pool->curr_size;
index_ids = mem_alloc(sizeof(dulint) * size);
counts = mem_alloc(sizeof(ulint) * size);
buf_pool_mutex_enter();
fprintf(stderr,
"buf_pool size %lu\n"
"database pages %lu\n"
"free pages %lu\n"
"modified database pages %lu\n"
"n pending decompressions %lu\n"
"n pending reads %lu\n"
"n pending flush LRU %lu list %lu single page %lu\n"
"pages read %lu, created %lu, written %lu\n",
(ulong) size,
(ulong) UT_LIST_GET_LEN(buf_pool->LRU),
(ulong) UT_LIST_GET_LEN(buf_pool->free),
(ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
(ulong) buf_pool->n_pend_unzip,
(ulong) buf_pool->n_pend_reads,
(ulong) buf_pool->n_flush[BUF_FLUSH_LRU],
(ulong) buf_pool->n_flush[BUF_FLUSH_LIST],
(ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE],
(ulong) buf_pool->n_pages_read, buf_pool->n_pages_created,
(ulong) buf_pool->n_pages_written);
/* Count the number of blocks belonging to each index in the buffer */
n_found = 0;
chunk = buf_pool->chunks;
for (i = buf_pool->n_chunks; i--; chunk++) {
buf_block_t* block = chunk->blocks;
ulint n_blocks = chunk->size;
for (; n_blocks--; block++) {
const buf_frame_t* frame = block->frame;
if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
id = btr_page_get_index_id(frame);
/* Look for the id in the index_ids array */
j = 0;
while (j < n_found) {
if (ut_dulint_cmp(index_ids[j],
id) == 0) {
counts[j]++;
break;
}
j++;
}
if (j == n_found) {
n_found++;
index_ids[j] = id;
counts[j] = 1;
}
}
}
}
buf_pool_mutex_exit();
for (i = 0; i < n_found; i++) {
index = dict_index_get_if_in_cache(index_ids[i]);
fprintf(stderr,
"Block count for index %lu in buffer is about %lu",
(ulong) ut_dulint_get_low(index_ids[i]),
(ulong) counts[i]);
if (index) {
putc(' ', stderr);
dict_index_name_print(stderr, NULL, index);
}
putc('\n', stderr);
}
mem_free(index_ids);
mem_free(counts);
ut_a(buf_validate());
}
#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
#ifdef UNIV_DEBUG
/*************************************************************************
Returns the number of latched pages in the buffer pool. */
UNIV_INTERN
ulint
buf_get_latched_pages_number(void)
/*==============================*/
/* out: number of latched pages */
{
buf_chunk_t* chunk;
buf_page_t* b;
ulint i;
ulint fixed_pages_number = 0;
buf_pool_mutex_enter();
chunk = buf_pool->chunks;
for (i = buf_pool->n_chunks; i--; chunk++) {
buf_block_t* block;
ulint j;
block = chunk->blocks;
for (j = chunk->size; j--; block++) {
if (buf_block_get_state(block)
!= BUF_BLOCK_FILE_PAGE) {
continue;
}
mutex_enter(&block->mutex);
if (block->page.buf_fix_count != 0
|| buf_page_get_io_fix(&block->page)
!= BUF_IO_NONE) {
fixed_pages_number++;
}
mutex_exit(&block->mutex);
}
}
mutex_enter(&buf_pool_zip_mutex);
/* Traverse the lists of clean and dirty compressed-only blocks. */
for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
b = UT_LIST_GET_NEXT(list, b)) {
ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
if (b->buf_fix_count != 0
|| buf_page_get_io_fix(b) != BUF_IO_NONE) {
fixed_pages_number++;
}
}
for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
b = UT_LIST_GET_NEXT(list, b)) {
ut_ad(b->in_flush_list);
switch (buf_page_get_state(b)) {
case BUF_BLOCK_ZIP_DIRTY:
if (b->buf_fix_count != 0
|| buf_page_get_io_fix(b) != BUF_IO_NONE) {
fixed_pages_number++;
}
break;
case BUF_BLOCK_FILE_PAGE:
/* uncompressed page */
break;
case BUF_BLOCK_ZIP_FREE:
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_NOT_USED:
case BUF_BLOCK_READY_FOR_USE:
case BUF_BLOCK_MEMORY:
case BUF_BLOCK_REMOVE_HASH:
ut_error;
break;
}
}
mutex_exit(&buf_pool_zip_mutex);
buf_pool_mutex_exit();
return(fixed_pages_number);
}
#endif /* UNIV_DEBUG */
/*************************************************************************
Returns the number of pending buf pool ios. */
UNIV_INTERN
ulint
buf_get_n_pending_ios(void)
/*=======================*/
/* out: number of pending I/O operations */
{
return(buf_pool->n_pend_reads
+ buf_pool->n_flush[BUF_FLUSH_LRU]
+ buf_pool->n_flush[BUF_FLUSH_LIST]
+ buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
}
/*************************************************************************
Returns the ratio in percents of modified pages in the buffer pool /
database pages in the buffer pool. */
UNIV_INTERN
ulint
buf_get_modified_ratio_pct(void)
/*============================*/
/* out: modified page percentage ratio */
{
ulint ratio;
buf_pool_mutex_enter();
ratio = (100 * UT_LIST_GET_LEN(buf_pool->flush_list))
/ (1 + UT_LIST_GET_LEN(buf_pool->LRU)
+ UT_LIST_GET_LEN(buf_pool->free));
/* 1 + is there to avoid division by zero */
buf_pool_mutex_exit();
return(ratio);
}
/*************************************************************************
Prints info of the buffer i/o. */
UNIV_INTERN
void
buf_print_io(
/*=========*/
FILE* file) /* in/out: buffer where to print */
{
time_t current_time;
double time_elapsed;
ulint size;
ut_ad(buf_pool);
size = buf_pool->curr_size;
buf_pool_mutex_enter();
fprintf(file,
"Buffer pool size %lu\n"
"Free buffers %lu\n"
"Database pages %lu\n"
"Modified db pages %lu\n"
"Pending reads %lu\n"
"Pending writes: LRU %lu, flush list %lu, single page %lu\n",
(ulong) size,
(ulong) UT_LIST_GET_LEN(buf_pool->free),
(ulong) UT_LIST_GET_LEN(buf_pool->LRU),
(ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
(ulong) buf_pool->n_pend_reads,
(ulong) buf_pool->n_flush[BUF_FLUSH_LRU]
+ buf_pool->init_flush[BUF_FLUSH_LRU],
(ulong) buf_pool->n_flush[BUF_FLUSH_LIST]
+ buf_pool->init_flush[BUF_FLUSH_LIST],
(ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
current_time = time(NULL);
time_elapsed = 0.001 + difftime(current_time,
buf_pool->last_printout_time);
buf_pool->last_printout_time = current_time;
fprintf(file,
"Pages read %lu, created %lu, written %lu\n"
"%.2f reads/s, %.2f creates/s, %.2f writes/s\n",
(ulong) buf_pool->n_pages_read,
(ulong) buf_pool->n_pages_created,
(ulong) buf_pool->n_pages_written,
(buf_pool->n_pages_read - buf_pool->n_pages_read_old)
/ time_elapsed,
(buf_pool->n_pages_created - buf_pool->n_pages_created_old)
/ time_elapsed,
(buf_pool->n_pages_written - buf_pool->n_pages_written_old)
/ time_elapsed);
if (buf_pool->n_page_gets > buf_pool->n_page_gets_old) {
fprintf(file, "Buffer pool hit rate %lu / 1000\n",
(ulong)
(1000 - ((1000 * (buf_pool->n_pages_read
- buf_pool->n_pages_read_old))
/ (buf_pool->n_page_gets
- buf_pool->n_page_gets_old))));
} else {
fputs("No buffer pool page gets since the last printout\n",
file);
}
buf_pool->n_page_gets_old = buf_pool->n_page_gets;
buf_pool->n_pages_read_old = buf_pool->n_pages_read;
buf_pool->n_pages_created_old = buf_pool->n_pages_created;
buf_pool->n_pages_written_old = buf_pool->n_pages_written;
/* Print some values to help us with visualizing what is
happening with LRU eviction. */
fprintf(file,
"LRU len: %lu, unzip_LRU len: %lu\n"
"I/O sum[%lu]:cur[%lu], unzip sum[%lu]:cur[%lu]\n",
UT_LIST_GET_LEN(buf_pool->LRU),
UT_LIST_GET_LEN(buf_pool->unzip_LRU),
buf_LRU_stat_sum.io, buf_LRU_stat_cur.io,
buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
buf_pool_mutex_exit();
}
/**************************************************************************
Refreshes the statistics used to print per-second averages. */
UNIV_INTERN
void
buf_refresh_io_stats(void)
/*======================*/
{
buf_pool->last_printout_time = time(NULL);
buf_pool->n_page_gets_old = buf_pool->n_page_gets;
buf_pool->n_pages_read_old = buf_pool->n_pages_read;
buf_pool->n_pages_created_old = buf_pool->n_pages_created;
buf_pool->n_pages_written_old = buf_pool->n_pages_written;
}
/*************************************************************************
Asserts that all file pages in the buffer are in a replaceable state. */
UNIV_INTERN
ibool
buf_all_freed(void)
/*===============*/
/* out: TRUE */
{
buf_chunk_t* chunk;
ulint i;
ut_ad(buf_pool);
buf_pool_mutex_enter();
chunk = buf_pool->chunks;
for (i = buf_pool->n_chunks; i--; chunk++) {
const buf_block_t* block = buf_chunk_not_freed(chunk);
if (UNIV_LIKELY_NULL(block)) {
fprintf(stderr,
"Page %lu %lu still fixed or dirty\n",
(ulong) block->page.space,
(ulong) block->page.offset);
ut_error;
}
}
buf_pool_mutex_exit();
return(TRUE);
}
/*************************************************************************
Checks that there currently are no pending i/o-operations for the buffer
pool. */
UNIV_INTERN
ibool
buf_pool_check_no_pending_io(void)
/*==============================*/
/* out: TRUE if there is no pending i/o */
{
ibool ret;
buf_pool_mutex_enter();
if (buf_pool->n_pend_reads + buf_pool->n_flush[BUF_FLUSH_LRU]
+ buf_pool->n_flush[BUF_FLUSH_LIST]
+ buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]) {
ret = FALSE;
} else {
ret = TRUE;
}
buf_pool_mutex_exit();
return(ret);
}
/*************************************************************************
Gets the current length of the free list of buffer blocks. */
UNIV_INTERN
ulint
buf_get_free_list_len(void)
/*=======================*/
/* out: length of the free list */
{
ulint len;
buf_pool_mutex_enter();
len = UT_LIST_GET_LEN(buf_pool->free);
buf_pool_mutex_exit();
return(len);
}
#else /* !UNIV_HOTBACKUP */
/************************************************************************
Inits a page to the buffer buf_pool, for use in ibbackup --restore. */
UNIV_INTERN
void
buf_page_init_for_backup_restore(
/*=============================*/
ulint space, /* in: space id */
ulint offset, /* in: offset of the page within space
in units of a page */
ulint zip_size,/* in: compressed page size in bytes
or 0 for uncompressed pages */
buf_block_t* block) /* in: block to init */
{
block->page.state = BUF_BLOCK_FILE_PAGE;
block->page.space = space;
block->page.offset = offset;
page_zip_des_init(&block->page.zip);
/* We assume that block->page.data has been allocated
with zip_size == UNIV_PAGE_SIZE. */
ut_ad(zip_size <= UNIV_PAGE_SIZE);
ut_ad(ut_is_2pow(zip_size));
page_zip_set_size(&block->page.zip, zip_size);
if (zip_size) {
block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
}
}
#endif /* !UNIV_HOTBACKUP */