mariadb/trx/trx0i_s.c
sunny 37312683cc branches/innodb+: Merge revisions r5971:6130 from branches/zip.
------------------------------------------------------------------------
  r5971 | marko | 2009-09-23 23:03:51 +1000 (Wed, 23 Sep 2009) | 2 lines

  branches/zip: os_file_pwrite(): Make the code compile in InnoDB Hot Backup
  when the pwrite system call is not available.
  ------------------------------------------------------------------------
  r5972 | marko | 2009-09-24 05:44:52 +1000 (Thu, 24 Sep 2009) | 5 lines

  branches/zip: fil_node_open_file(): In InnoDB Hot Backup,
  determine the page size of single-file tablespaces before computing
    the file node size.  Otherwise, the space->size of compressed tablespaces
  would be computed with UNIV_PAGE_SIZE instead of key_block_size.
  This should fix Issue #313.
  ------------------------------------------------------------------------
  r5973 | marko | 2009-09-24 05:53:21 +1000 (Thu, 24 Sep 2009) | 2 lines

  branches/zip: recv_add_to_hash_table():
  Simplify obfuscated pointer arithmetics.
  ------------------------------------------------------------------------
  r5978 | marko | 2009-09-24 17:47:56 +1000 (Thu, 24 Sep 2009) | 1 line

  branches/zip: Fix warnings and errors when UNIV_HOTBACKUP is defined.
  ------------------------------------------------------------------------
  r5979 | marko | 2009-09-24 20:16:10 +1000 (Thu, 24 Sep 2009) | 4 lines

  branches/zip: ha_innodb.cc: Define MYSQL_PLUGIN_IMPORT when necessary.
  This preprocessor symbol has been recently introduced in MySQL 5.1.
  The InnoDB Plugin should remain source compatible with MySQL 5.1.24
  and later.
  ------------------------------------------------------------------------
  r5988 | calvin | 2009-09-26 05:14:43 +1000 (Sat, 26 Sep 2009) | 8 lines

  branches/zip: fix bug#47055 unconditional exit(1) on ERROR_WORKING_SET_QUOTA
  1453 (0x5AD) for InnoDB backend

  When error ERROR_WORKING_SET_QUOTA or ERROR_NO_SYSTEM_RESOURCES
  occurs, yields for 100ms and retries the operation.

  Approved by: Heikki (on IM)

  ------------------------------------------------------------------------
  r5992 | vasil | 2009-09-28 17:10:29 +1000 (Mon, 28 Sep 2009) | 4 lines

  branches/zip:

  Add ChangeLog entry for c5988.

  ------------------------------------------------------------------------
  r5994 | marko | 2009-09-28 18:33:59 +1000 (Mon, 28 Sep 2009) | 17 lines

  branches/zip: Try to prevent the reuse of tablespace identifiers after
  InnoDB has crashed during table creation.  Also, refuse to start if
  files with duplicate tablespace identifiers are encountered.

    fil_node_create(): Update fil_system->max_assigned_id.  This should
  prevent the reuse of a space->id when InnoDB does a full crash
  recovery and invokes fil_load_single_table_tablespaces().  Normally,
  fil_system->max_assigned_id is initialized from
  SELECT MAX(ID) FROM SYS_TABLES.

  fil_open_single_table_tablespace(): Return FALSE when
  fil_space_create() fails.

  fil_load_single_table_tablespace(): Exit if fil_space_create() fails
  and innodb_force_recovery=0.

  rb://173 approved by Heikki Tuuri.  This addresses Issue #335.
  ------------------------------------------------------------------------
  r5995 | marko | 2009-09-28 18:52:25 +1000 (Mon, 28 Sep 2009) | 17 lines

  branches/zip: Do not write to PAGE_INDEX_ID after page creation,
  not even when restoring an uncompressed page after a compression failure.

  btr_page_reorganize_low(): On compression failure, do not restore
  those page header fields that should not be affected by the
  reorganization.  Instead, compare the fields.

  page_zip_decompress(): Add the parameter ibool all, for copying all
  page header fields.  Pass the parameter all=TRUE on block read
  completion, redo log application, and page_zip_validate(); pass
  all=FALSE in all other cases.

  page_zip_reorganize(): Do not restore the uncompressed page on
  failure.  It will be restored (to pre-modification state) by the
  caller anyway.

  rb://167, Issue #346
  ------------------------------------------------------------------------
  r5996 | marko | 2009-09-28 22:46:02 +1000 (Mon, 28 Sep 2009) | 4 lines

  branches/zip: Address Issue #350 in comments.

  lock_rec_queue_validate(), lock_rec_queue_validate(): Note that
  this debug code may violate the latching order and cause deadlocks.
  ------------------------------------------------------------------------
  r5997 | marko | 2009-09-28 23:03:58 +1000 (Mon, 28 Sep 2009) | 12 lines

  branches/zip: Remove an assertion failure when the InnoDB data dictionary
  is inconsistent with the MySQL .frm file.

  ha_innobase::index_read(): When the index cannot be found,
  return an error.

  ha_innobase::change_active_index(): When prebuilt->index == NULL,
  set also prebuilt->index_usable = FALSE.  This is not needed for
  correctness, because prebuilt->index_usable is only checked by
  row_search_for_mysql(), which requires prebuilt->index != NULL.

  This addresses Issue #349.  Approved by Heikki Tuuri over IM.
  ------------------------------------------------------------------------
  r6005 | vasil | 2009-09-29 18:09:52 +1000 (Tue, 29 Sep 2009) | 4 lines

  branches/zip:

  ChangeLog: wrap around 78th column, not earlier.

  ------------------------------------------------------------------------
  r6006 | vasil | 2009-09-29 20:15:25 +1000 (Tue, 29 Sep 2009) | 4 lines

  branches/zip:

  Add ChangeLog entry for the release of 1.0.4.

  ------------------------------------------------------------------------
  r6007 | vasil | 2009-09-29 23:19:59 +1000 (Tue, 29 Sep 2009) | 6 lines

  branches/zip:

  Fix the year, should be 2009.

  Pointed by:	Calvin

  ------------------------------------------------------------------------
  r6026 | marko | 2009-09-30 17:18:24 +1000 (Wed, 30 Sep 2009) | 1 line

  branches/zip: Add some debug assertions for checking FSEG_MAGIC_N.
  ------------------------------------------------------------------------
  r6028 | marko | 2009-09-30 23:55:23 +1000 (Wed, 30 Sep 2009) | 3 lines

  branches/zip: recv_no_log_write: New debug flag for tracking down
  Mantis Issue #347.  No modifications should be made to the database
  while recv_apply_hashed_log_recs() is about to complete.
  ------------------------------------------------------------------------
  r6029 | calvin | 2009-10-01 06:32:02 +1000 (Thu, 01 Oct 2009) | 4 lines

    branches/zip: non-functional changes

  Fix typo.

  ------------------------------------------------------------------------
  r6031 | marko | 2009-10-01 21:24:33 +1000 (Thu, 01 Oct 2009) | 49 lines

  branches/zip: Clean up after a crash during DROP INDEX.
  When InnoDB crashes while dropping an index, ensure that
  the index will be completely dropped during crash recovery.

  row_merge_drop_index(): Before dropping an index, rename the index to
  start with TEMP_INDEX_PREFIX_STR and commit the change, so that
  row_merge_drop_temp_indexes() will drop the index after crash
  recovery if the server crashes while dropping the index.

  fseg_inode_try_get(): New function, forked from fseg_inode_get().
  Return NULL if the file segment index node is free.

  fseg_inode_get(): Assert that the file segment index node is not free.

  fseg_free_step(): If the file segment index node is already free,
  print a diagnostic message and return TRUE.

  fsp_free_seg_inode(): Write a nonzero number to FSEG_MAGIC_N, so that
  allocated-and-freed file segment index nodes can be better
  distinguished from uninitialized ones.

  This is rb://174, addressing Issue #348.

  Tested by restarting mysqld upon the completion of the added
  log_write_up_to() invocation below, during DROP INDEX.  The index was
  dropped after crash recovery, and re-issuing the DROP INDEX did not
  crash the server.
    
    Index: btr/btr0btr.c
    ===================================================================
    --- btr/btr0btr.c	(revision 6026)
    +++ btr/btr0btr.c	(working copy)
    @@ -42,6 +42,7 @@ Created 6/2/1994 Heikki Tuuri
     #include "ibuf0ibuf.h"
     #include "trx0trx.h"
    +#include "log0log.h"
     
     /*
     Latching strategy of the InnoDB B-tree
     --------------------------------------
    @@ -873,6 +874,8 @@ leaf_loop:
     
   		  goto leaf_loop;
   	  }
    +
    +	log_write_up_to(mtr.end_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
     top_loop:
   	  mtr_start(&mtr);

  ------------------------------------------------------------------------
  r6033 | calvin | 2009-10-02 06:19:46 +1000 (Fri, 02 Oct 2009) | 4 lines

  branches/zip: fix a typo in error message

  Reported as bug#47763.

  ------------------------------------------------------------------------
  r6043 | inaam | 2009-10-06 01:45:35 +1100 (Tue, 06 Oct 2009) | 12 lines

  branches/zip  rb://176

  Do not invalidate buffer pool while an LRU batch is active. Added
  code to buf_pool_invalidate() to wait for the running batches to finish.

  This patch also resets the state of buf_pool struct at invalidation. This
  addresses the concern where buf_pool->freed_page_clock becomes non-zero
  because we read in a system tablespace page for file format info at
  startup.

  Approved by: Marko

  ------------------------------------------------------------------------
  r6044 | pekka | 2009-10-07 01:44:54 +1100 (Wed, 07 Oct 2009) | 5 lines

  branches/zip:
  Add os_file_is_same() function for Hot Backup (inside ifdef UNIV_HOTBACKUP).
  This is part of the fix for Issue #186.
  Note! The Windows implementation is incomplete.

  ------------------------------------------------------------------------
  r6046 | pekka | 2009-10-08 20:24:56 +1100 (Thu, 08 Oct 2009) | 3 lines

  branches/zip: Revert r6044 which added os_file_is_same() function
  (issue#186). This functionality is moved to Hot Backup source tree.

  ------------------------------------------------------------------------
  r6048 | vasil | 2009-10-09 16:42:55 +1100 (Fri, 09 Oct 2009) | 16 lines

  branches/zip:

  When scanning a directory readdir() is called and stat() after it,
  if a file is deleted between the two calls stat will fail and the
  whole precedure will fail. Change this behavior to continue with the
  next entry if stat() fails because of nonexistent file. This is
  transparent change as it will make it look as if the file was deleted
  before the readdir() call.

  This change is needed in order to fix
  https://svn.innodb.com/mantis/view.php?id=174
  in which we need to abort if os_file_readdir_next_file()
  encounters "real" errors.

  Approved by:	Marko, Pekka (rb://177)

  ------------------------------------------------------------------------
  r6049 | vasil | 2009-10-10 03:05:26 +1100 (Sat, 10 Oct 2009) | 7 lines

  branches/zip:

  Fix compilation warning in Hot Backup:

  innodb/fil/fil0fil.c: In function 'fil_load_single_table_tablespace':
  innodb/fil/fil0fil.c:3253: warning: format '%lld' expects type 'long long int', but argument 6 has type 'ib_int64_t'

  ------------------------------------------------------------------------
  r6064 | calvin | 2009-10-14 02:23:35 +1100 (Wed, 14 Oct 2009) | 4 lines

  branches/zip: non-functional changes

  Changes from MySQL to fix build issue.

  ------------------------------------------------------------------------
  r6065 | inaam | 2009-10-14 04:43:13 +1100 (Wed, 14 Oct 2009) | 7 lines

  branches/zip rb://182

  Call fsync() on datafiles after a batch of pages is written to disk
  even when skip_innodb_doublewrite is set.

  Approved by: Heikki

  ------------------------------------------------------------------------
  r6080 | sunny | 2009-10-15 09:29:01 +1100 (Thu, 15 Oct 2009) | 3 lines

  branches/zip: Change page_mem_alloc_free() to inline.
  Fix Bug #47058 - Failure to compile innodb_plugin on solaris 10u7 + spro cc/CC 5.10

  ------------------------------------------------------------------------
  r6084 | vasil | 2009-10-15 16:21:17 +1100 (Thu, 15 Oct 2009) | 4 lines

  branches/zip:

  Add ChangeLog entry for r6080.

  ------------------------------------------------------------------------
  r6095 | vasil | 2009-10-20 00:04:59 +1100 (Tue, 20 Oct 2009) | 7 lines

  branches/zip:

  Fix Bug#47808 innodb_information_schema.test fails when run under valgrind 

  by using the wait_until_rows_count macro that loops until the number of
  rows becomes 14 instead of sleep 0.1, which is obviously very fragile.

  ------------------------------------------------------------------------
  r6096 | vasil | 2009-10-20 00:06:09 +1100 (Tue, 20 Oct 2009) | 4 lines

  branches/zip:

  Add ChangeLog entry for r6095.

  ------------------------------------------------------------------------
  r6099 | jyang | 2009-10-22 13:58:39 +1100 (Thu, 22 Oct 2009) | 7 lines

  branches/zip: Port bug #46000 related changes from 5.1 to zip
  branch. Due to different code path for creating index in zip
  branch comparing to 5.1), the index reserved name check function
  is extended to be used in ha_innobase::add_index(). 
  rb://190  Approved by: Marko


  ------------------------------------------------------------------------
  r6100 | jyang | 2009-10-22 14:51:07 +1100 (Thu, 22 Oct 2009) | 6 lines

  branches/zip: As a request from mysql, WARN_LEVEL_ERROR cannot
  be used for push_warning_* call any more. Switch to 
  WARN_LEVEL_WARN. Bug #47233.
  rb://172 approved by Sunny Bains and Marko.


  ------------------------------------------------------------------------
  r6101 | jyang | 2009-10-23 19:45:50 +1100 (Fri, 23 Oct 2009) | 7 lines
    
  branches/zip: Update test result with the WARN_LEVEL_ERROR
  to WARN_LEVEL_WARN change. This is the same result as 
  submitted in rb://172 review, which approved by Sunny Bains
  and Marko.

  ------------------------------------------------------------------------
  r6102 | marko | 2009-10-26 18:32:23 +1100 (Mon, 26 Oct 2009) | 1 line

  branches/zip: row_prebuilt_struct::prebuilts: Unused field, remove.
  ------------------------------------------------------------------------
  r6103 | marko | 2009-10-27 00:46:18 +1100 (Tue, 27 Oct 2009) | 4 lines

  branches/zip: row_ins_alloc_sys_fields(): Zero out the system columns
  DB_TRX_ID, DB_ROLL_PTR and DB_ROW_ID, in order to avoid harmless
  Valgrind warnings about uninitialized data.  (The warnings were
  harmless, because the fields would be initialized at a later stage.)
  ------------------------------------------------------------------------
  r6105 | calvin | 2009-10-28 09:05:52 +1100 (Wed, 28 Oct 2009) | 6 lines

  branches/zip: backport r3848 from 6.0 branch

  ----
      branches/6.0: innobase_start_or_create_for_mysql(): Make the 10 MB
      minimum tablespace limit independent of UNIV_PAGE_SIZE. (Bug #41490)

  ------------------------------------------------------------------------
  r6107 | marko | 2009-10-29 01:10:34 +1100 (Thu, 29 Oct 2009) | 5 lines

  branches/zip: buf_page_set_old(): Improve UNIV_LRU_DEBUG diagnostics
  in order to catch the buf_pool->LRU_old corruption reported in Issue #381.

  buf_LRU_old_init(): Set the property from the tail towards the front
  of the buf_pool->LRU list, in order not to trip the debug check.
  ------------------------------------------------------------------------
  r6108 | calvin | 2009-10-29 16:58:04 +1100 (Thu, 29 Oct 2009) | 5 lines

  branches/zip: close file handle when building with UNIV_HOTBACKUP

  The change does not affect regular InnoDB engine. Confirmed by
  Marko.

  ------------------------------------------------------------------------
  r6109 | jyang | 2009-10-29 19:37:32 +1100 (Thu, 29 Oct 2009) | 7 lines

  branches/zip: In os_mem_alloc_large(), if we fail to attach
  the shared memory, reset memory pointer ptr to NULL, and
   allocate memory from conventional pool.
  Bug #48237 Error handling in os_mem_alloc_large appears to be incorrect
  rb://198  Approved by: Marko


  ------------------------------------------------------------------------
  r6110 | marko | 2009-10-29 21:44:57 +1100 (Thu, 29 Oct 2009) | 2 lines

  branches/zip: Makefile.am (INCLUDES): Merge a change from MySQL:
  Use $(srcdir)/include instead of $(top_srcdir)/storage/innobase/include.
  ------------------------------------------------------------------------
  r6111 | marko | 2009-10-29 22:04:11 +1100 (Thu, 29 Oct 2009) | 33 lines

  branches/zip: Fix corruption of buf_pool->LRU_old and improve debug assertions.
  This was reported as Issue #381.

  buf_page_set_old(): Assert that blocks may only be set old if
  buf_pool->LRU_old is initialized and buf_pool->LRU_old_len is nonzero.
  Assert that buf_pool->LRU_old points to the block at the old/new boundary.

  buf_LRU_old_adjust_len(): Invoke buf_page_set_old() after adjusting
  buf_pool->LRU_old and buf_pool->LRU_old_len, in order not to violate
  the added assertions.

  buf_LRU_old_init(): Replace buf_page_set_old() with a direct
  assignment to bpage->old, because these loops that initialize all the
  blocks would temporarily violate the assertions about
  buf_pool->LRU_old.

  buf_LRU_remove_block(): When setting buf_pool->LRU_old = NULL, also
  clear all bpage->old flags and set buf_pool->LRU_old_len = 0.

  buf_LRU_add_block_to_end_low(), buf_LRU_add_block_low(): Move the
  buf_page_set_old() call later in order not to violate the debug
  assertions.  If buf_pool->LRU_old is NULL, set old=FALSE.

  buf_LRU_free_block(): Replace the UNIV_LRU_DEBUG assertion with a
  dummy buf_page_set_old() call that performs more thorough checks.

  buf_LRU_validate(): Do not tolerate garbage in buf_pool->LRU_old_len
  even if buf_pool->LRU_old is NULL.  Check that bpage->old is monotonic.

  buf_relocate(): Make the UNIV_LRU_DEBUG checks stricter.

  buf0buf.h: Revise the documentation of buf_page_t::old and
  buf_pool_t::LRU_old_len.
  ------------------------------------------------------------------------
  r6112 | calvin | 2009-10-30 01:21:15 +1100 (Fri, 30 Oct 2009) | 4 lines

  branches/zip: consideration for icc compilers

  Proposed by MySQL, and approved by Marko.

  ------------------------------------------------------------------------
  r6113 | vasil | 2009-10-30 03:15:50 +1100 (Fri, 30 Oct 2009) | 93 lines

  branches/zip: Merge r5912:6112 from branches/5.1:

  (after this merge the innodb-autoinc test starts to fail, but
  I commit anyway because it would be easier to investigate the
  failure this way)

    ------------------------------------------------------------------------
    r5952 | calvin | 2009-09-22 19:45:07 +0300 (Tue, 22 Sep 2009) | 7 lines
    Changed paths:
       M /branches/5.1/handler/ha_innodb.cc
    
    branches/5.1: fix bug#42383: Can't create table 'test.bug39438'
    
    For embedded server, MySQL may pass in full path, which is
    currently disallowed. It is needed to relax the condition by
    accepting full paths in the embedded case.
    
    Approved by: Heikki (on IM)
    ------------------------------------------------------------------------
    r6032 | vasil | 2009-10-01 15:55:49 +0300 (Thu, 01 Oct 2009) | 8 lines
    Changed paths:
       M /branches/5.1/handler/ha_innodb.cc
    
    branches/5.1:
    
    Fix Bug#38996 Race condition in ANALYZE TABLE
    
    by serializing ANALYZE TABLE inside InnoDB.
    
    Approved by:	Heikki (rb://175)
    
    ------------------------------------------------------------------------
    r6045 | jyang | 2009-10-08 02:27:08 +0300 (Thu, 08 Oct 2009) | 7 lines
    Changed paths:
       M /branches/5.1/handler/ha_innodb.cc
       A /branches/5.1/mysql-test/innodb_bug47777.result
       A /branches/5.1/mysql-test/innodb_bug47777.test
    
    branches/5.1: Fix bug #47777. Treat the Geometry data same as
    Binary BLOB in ha_innobase::store_key_val_for_row(), since the
    Geometry data is stored as Binary BLOB in Innodb.
    
    Review: rb://180 approved by Marko Makela.
    
    
      ------------------------------------------------------------------------
    r6051 | sunny | 2009-10-12 07:05:00 +0300 (Mon, 12 Oct 2009) | 6 lines
    Changed paths:
       M /branches/5.1/handler/ha_innodb.cc
       M /branches/5.1/mysql-test/innodb-autoinc.result
       M /branches/5.1/mysql-test/innodb-autoinc.test
    
    branches/5.1: Ignore negative values supplied by the user when calculating the
    next value to store in dict_table_t. Setting autoincrement columns top negative
    values is undefined behavior and this change should bring the behavior of
    InnoDB closer to what users expect. Added several tests to check.
    rb://162
    
    ------------------------------------------------------------------------
    r6052 | sunny | 2009-10-12 07:09:56 +0300 (Mon, 12 Oct 2009) | 4 lines
    Changed paths:
       M /branches/5.1/handler/ha_innodb.cc
       M /branches/5.1/mysql-test/innodb-autoinc.result
       M /branches/5.1/mysql-test/innodb-autoinc.test
    
    branches/5.1: Reset the statement level autoinc counter on ROLLBACK. Fix
    the test results too.
    rb://164
    
    ------------------------------------------------------------------------
    r6053 | sunny | 2009-10-12 07:37:49 +0300 (Mon, 12 Oct 2009) | 6 lines
    Changed paths:
       M /branches/5.1/handler/ha_innodb.cc
       M /branches/5.1/mysql-test/innodb-autoinc.result
       M /branches/5.1/mysql-test/innodb-autoinc.test
    
    branches/5.1: Copy the maximum AUTOINC value from the old table to the new
    table when MySQL does a CREATE INDEX ON T. This is required because MySQL
    does a table copy, rename and drops the old table.
    Fix Bug#47125: auto_increment start value is ignored if an index is created and engine=innodb
    rb://168
    
    ------------------------------------------------------------------------
    r6076 | vasil | 2009-10-14 19:30:12 +0300 (Wed, 14 Oct 2009) | 4 lines
    Changed paths:
       M /branches/5.1/row/row0mysql.c
    
    branches/5.1:
      
  Fix typo.
      
    ------------------------------------------------------------------------

  ------------------------------------------------------------------------
  r6114 | vasil | 2009-10-30 03:43:51 +1100 (Fri, 30 Oct 2009) | 6 lines

  branches/zip:

  * Add ChangeLog entries for latest changes
  * Obey alphabetical order in the list of the files
  * White-space fixup

  ------------------------------------------------------------------------
  r6121 | sunny | 2009-10-30 10:42:11 +1100 (Fri, 30 Oct 2009) | 7 lines

  branches/zip: This test has been problematic for sometime now. The underlying
  bug is that the data dictionaries get out of sync. In the AUTOINC code we
  try and apply salve to the symptoms. In the past MySQL made some unrelated
  change and the dictionaries stopped getting out of sync and this test started
  to fail. Now, it seems they have reverted that changed and the test is
  passing again. I suspect this is not he last time that this test will change.

  ------------------------------------------------------------------------
  r6124 | jyang | 2009-10-30 19:02:31 +1100 (Fri, 30 Oct 2009) | 5 lines

  branches/zip: Correct the bug number for -r6109 change
  from # 48273 to #48237



  ------------------------------------------------------------------------
  r6126 | vasil | 2009-10-30 19:36:07 +1100 (Fri, 30 Oct 2009) | 45 lines

  branches/zip: Merge r6112:6125 from branches/5.1:

  (skipping r6122 and r6123, Jimmy says these are already present and need
  not be merged):

    ------------------------------------------------------------------------
    r6122 | jyang | 2009-10-30 05:18:38 +0200 (Fri, 30 Oct 2009) | 7 lines
    Changed paths:
       M /branches/5.1/handler/ha_innodb.cc
       M /branches/5.1/mysql-test/innodb_bug44369.result
       M /branches/5.1/mysql-test/innodb_bug44369.test
       M /branches/5.1/mysql-test/innodb_bug46000.result
       M /branches/5.1/mysql-test/innodb_bug46000.test
    
    branches/5.1: Chnage WARN_LEVEL_ERROR to WARN_LEVEL_WARN
    for push_warning_printf() call in innodb.
    Fix Bug#47233: Innodb calls push_warning(MYSQL_ERROR::WARN_LEVEL_ERROR)
    
    rb://170 approved by Marko.
    
    
    ------------------------------------------------------------------------
    r6123 | jyang | 2009-10-30 05:43:06 +0200 (Fri, 30 Oct 2009) | 8 lines
    Changed paths:
       M /branches/5.1/os/os0proc.c
    
    branches/5.1: In os_mem_alloc_large(), if we fail to attach
    the shared memory, reset memory pointer ptr to NULL, and
    allocate memory from conventional pool. This is a port
    from branches/zip.
    Bug #48237 Error handling in os_mem_alloc_large appears to be incorrect
    rb://198  Approved by: Marko
    
    
    ------------------------------------------------------------------------
    r6125 | vasil | 2009-10-30 10:31:23 +0200 (Fri, 30 Oct 2009) | 4 lines
    Changed paths:
       M /branches/5.1/handler/ha_innodb.cc
    
    branches/5.1:
    
    White-space fixup.
    
    ------------------------------------------------------------------------

  ------------------------------------------------------------------------
  r6130 | marko | 2009-11-02 20:42:56 +1100 (Mon, 02 Nov 2009) | 9 lines

  branches/zip: Free all resources at shutdown. Set pointers to NULL, so
  that Valgrind will not complain about freed data structures that are
  reachable via pointers.  This addresses Bug #45992 and Bug #46656.

  This patch is mostly based on changes copied from branches/embedded-1.0,
  mainly c5432, c3439, c3134, c2994, c2978, but also some other code was
  copied.  Some added cleanup code is specific to MySQL/InnoDB.

  rb://199 approved by Sunny Bains
    ------------------------------------------------------------------------
2009-11-04 06:02:00 +00:00

1481 lines
38 KiB
C

/*****************************************************************************
Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file trx/trx0i_s.c
INFORMATION SCHEMA innodb_trx, innodb_locks and
innodb_lock_waits tables fetch code.
The code below fetches information needed to fill those
3 dynamic tables and uploads it into a "transactions
table cache" for later retrieval.
Created July 17, 2007 Vasil Dimov
*******************************************************/
#include <mysql/plugin.h>
#include "mysql_addons.h"
#include "univ.i"
#include "buf0buf.h"
#include "dict0dict.h"
#include "ha0storage.h"
#include "ha_prototypes.h"
#include "hash0hash.h"
#include "lock0iter.h"
#include "lock0lock.h"
#include "mem0mem.h"
#include "page0page.h"
#include "rem0rec.h"
#include "row0row.h"
#include "srv0srv.h"
#include "sync0rw.h"
#include "sync0sync.h"
#include "sync0types.h"
#include "trx0i_s.h"
#include "trx0sys.h"
#include "trx0trx.h"
#include "ut0mem.h"
#include "ut0ut.h"
/** Initial number of rows in the table cache */
#define TABLE_CACHE_INITIAL_ROWSNUM 1024
/** @brief The maximum number of chunks to allocate for a table cache.
The rows of a table cache are stored in a set of chunks. When a new
row is added a new chunk is allocated if necessary. Assuming that the
first one is 1024 rows (TABLE_CACHE_INITIAL_ROWSNUM) and each
subsequent is N/2 where N is the number of rows we have allocated till
now, then 39th chunk would accommodate 1677416425 rows and all chunks
would accommodate 3354832851 rows. */
#define MEM_CHUNKS_IN_TABLE_CACHE 39
/** The following are some testing auxiliary macros. Do not enable them
in a production environment. */
/* @{ */
#if 0
/** If this is enabled then lock folds will always be different
resulting in equal rows being put in a different cells of the hash
table. Checking for duplicates will be flawed because different
fold will be calculated when a row is searched in the hash table. */
#define TEST_LOCK_FOLD_ALWAYS_DIFFERENT
#endif
#if 0
/** This effectively kills the search-for-duplicate-before-adding-a-row
function, but searching in the hash is still performed. It will always
be assumed that lock is not present and insertion will be performed in
the hash table. */
#define TEST_NO_LOCKS_ROW_IS_EVER_EQUAL_TO_LOCK_T
#endif
#if 0
/** This aggressively repeats adding each row many times. Depending on
the above settings this may be noop or may result in lots of rows being
added. */
#define TEST_ADD_EACH_LOCKS_ROW_MANY_TIMES
#endif
#if 0
/** Very similar to TEST_NO_LOCKS_ROW_IS_EVER_EQUAL_TO_LOCK_T but hash
table search is not performed at all. */
#define TEST_DO_NOT_CHECK_FOR_DUPLICATE_ROWS
#endif
#if 0
/** Do not insert each row into the hash table, duplicates may appear
if this is enabled, also if this is enabled searching into the hash is
noop because it will be empty. */
#define TEST_DO_NOT_INSERT_INTO_THE_HASH_TABLE
#endif
/* @} */
/** Memory limit passed to ha_storage_put_memlim().
@param cache hash storage
@return maximum allowed allocation size */
#define MAX_ALLOWED_FOR_STORAGE(cache) \
(TRX_I_S_MEM_LIMIT \
- (cache)->mem_allocd)
/** Memory limit in table_cache_create_empty_row().
@param cache hash storage
@return maximum allowed allocation size */
#define MAX_ALLOWED_FOR_ALLOC(cache) \
(TRX_I_S_MEM_LIMIT \
- (cache)->mem_allocd \
- ha_storage_get_size((cache)->storage))
/** Memory for each table in the intermediate buffer is allocated in
separate chunks. These chunks are considered to be concatenated to
represent one flat array of rows. */
typedef struct i_s_mem_chunk_struct {
ulint offset; /*!< offset, in number of rows */
ulint rows_allocd; /*!< the size of this chunk, in number
of rows */
void* base; /*!< start of the chunk */
} i_s_mem_chunk_t;
/** This represents one table's cache. */
typedef struct i_s_table_cache_struct {
ulint rows_used; /*!< number of used rows */
ulint rows_allocd; /*!< number of allocated rows */
ulint row_size; /*!< size of a single row */
i_s_mem_chunk_t chunks[MEM_CHUNKS_IN_TABLE_CACHE]; /*!< array of
memory chunks that stores the
rows */
} i_s_table_cache_t;
/** This structure describes the intermediate buffer */
struct trx_i_s_cache_struct {
rw_lock_t rw_lock; /*!< read-write lock protecting
the rest of this structure */
ullint last_read; /*!< last time the cache was read;
measured in microseconds since
epoch */
mutex_t last_read_mutex;/*!< mutex protecting the
last_read member - it is updated
inside a shared lock of the
rw_lock member */
i_s_table_cache_t innodb_trx; /*!< innodb_trx table */
i_s_table_cache_t innodb_locks; /*!< innodb_locks table */
i_s_table_cache_t innodb_lock_waits;/*!< innodb_lock_waits table */
/** the hash table size is LOCKS_HASH_CELLS_NUM * sizeof(void*) bytes */
#define LOCKS_HASH_CELLS_NUM 10000
hash_table_t* locks_hash; /*!< hash table used to eliminate
duplicate entries in the
innodb_locks table */
/** Initial size of the cache storage */
#define CACHE_STORAGE_INITIAL_SIZE 1024
/** Number of hash cells in the cache storage */
#define CACHE_STORAGE_HASH_CELLS 2048
ha_storage_t* storage; /*!< storage for external volatile
data that can possibly not be
available later, when we release
the kernel mutex */
ulint mem_allocd; /*!< the amount of memory
allocated with mem_alloc*() */
ibool is_truncated; /*!< this is TRUE if the memory
limit was hit and thus the data
in the cache is truncated */
};
/** This is the intermediate buffer where data needed to fill the
INFORMATION SCHEMA tables is fetched and later retrieved by the C++
code in handler/i_s.cc. */
static trx_i_s_cache_t trx_i_s_cache_static;
/** This is the intermediate buffer where data needed to fill the
INFORMATION SCHEMA tables is fetched and later retrieved by the C++
code in handler/i_s.cc. */
UNIV_INTERN trx_i_s_cache_t* trx_i_s_cache = &trx_i_s_cache_static;
/*******************************************************************//**
For a record lock that is in waiting state retrieves the only bit that
is set, for a table lock returns ULINT_UNDEFINED.
@return record number within the heap */
static
ulint
wait_lock_get_heap_no(
/*==================*/
const lock_t* lock) /*!< in: lock */
{
ulint ret;
switch (lock_get_type(lock)) {
case LOCK_REC:
ret = lock_rec_find_set_bit(lock);
ut_a(ret != ULINT_UNDEFINED);
break;
case LOCK_TABLE:
ret = ULINT_UNDEFINED;
break;
default:
ut_error;
}
return(ret);
}
/*******************************************************************//**
Initializes the members of a table cache. */
static
void
table_cache_init(
/*=============*/
i_s_table_cache_t* table_cache, /*!< out: table cache */
size_t row_size) /*!< in: the size of a
row */
{
ulint i;
table_cache->rows_used = 0;
table_cache->rows_allocd = 0;
table_cache->row_size = row_size;
for (i = 0; i < MEM_CHUNKS_IN_TABLE_CACHE; i++) {
/* the memory is actually allocated in
table_cache_create_empty_row() */
table_cache->chunks[i].base = NULL;
}
}
/*******************************************************************//**
Frees a table cache. */
static
void
table_cache_free(
/*=============*/
i_s_table_cache_t* table_cache) /*!< in/out: table cache */
{
ulint i;
for (i = 0; i < MEM_CHUNKS_IN_TABLE_CACHE; i++) {
/* the memory is actually allocated in
table_cache_create_empty_row() */
if (table_cache->chunks[i].base) {
mem_free(table_cache->chunks[i].base);
table_cache->chunks[i].base = NULL;
}
}
}
/*******************************************************************//**
Returns an empty row from a table cache. The row is allocated if no more
empty rows are available. The number of used rows is incremented.
If the memory limit is hit then NULL is returned and nothing is
allocated.
@return empty row, or NULL if out of memory */
static
void*
table_cache_create_empty_row(
/*=========================*/
i_s_table_cache_t* table_cache, /*!< in/out: table cache */
trx_i_s_cache_t* cache) /*!< in/out: cache to record
how many bytes are
allocated */
{
ulint i;
void* row;
ut_a(table_cache->rows_used <= table_cache->rows_allocd);
if (table_cache->rows_used == table_cache->rows_allocd) {
/* rows_used == rows_allocd means that new chunk needs
to be allocated: either no more empty rows in the
last allocated chunk or nothing has been allocated yet
(rows_num == rows_allocd == 0); */
i_s_mem_chunk_t* chunk;
ulint req_bytes;
ulint got_bytes;
ulint req_rows;
ulint got_rows;
/* find the first not allocated chunk */
for (i = 0; i < MEM_CHUNKS_IN_TABLE_CACHE; i++) {
if (table_cache->chunks[i].base == NULL) {
break;
}
}
/* i == MEM_CHUNKS_IN_TABLE_CACHE means that all chunks
have been allocated :-X */
ut_a(i < MEM_CHUNKS_IN_TABLE_CACHE);
/* allocate the chunk we just found */
if (i == 0) {
/* first chunk, nothing is allocated yet */
req_rows = TABLE_CACHE_INITIAL_ROWSNUM;
} else {
/* Memory is increased by the formula
new = old + old / 2; We are trying not to be
aggressive here (= using the common new = old * 2)
because the allocated memory will not be freed
until InnoDB exit (it is reused). So it is better
to once allocate the memory in more steps, but
have less unused/wasted memory than to use less
steps in allocation (which is done once in a
lifetime) but end up with lots of unused/wasted
memory. */
req_rows = table_cache->rows_allocd / 2;
}
req_bytes = req_rows * table_cache->row_size;
if (req_bytes > MAX_ALLOWED_FOR_ALLOC(cache)) {
return(NULL);
}
chunk = &table_cache->chunks[i];
chunk->base = mem_alloc2(req_bytes, &got_bytes);
got_rows = got_bytes / table_cache->row_size;
cache->mem_allocd += got_bytes;
#if 0
printf("allocating chunk %d req bytes=%lu, got bytes=%lu, "
"row size=%lu, "
"req rows=%lu, got rows=%lu\n",
i, req_bytes, got_bytes,
table_cache->row_size,
req_rows, got_rows);
#endif
chunk->rows_allocd = got_rows;
table_cache->rows_allocd += got_rows;
/* adjust the offset of the next chunk */
if (i < MEM_CHUNKS_IN_TABLE_CACHE - 1) {
table_cache->chunks[i + 1].offset
= chunk->offset + chunk->rows_allocd;
}
/* return the first empty row in the newly allocated
chunk */
row = chunk->base;
} else {
char* chunk_start;
ulint offset;
/* there is an empty row, no need to allocate new
chunks */
/* find the first chunk that contains allocated but
empty/unused rows */
for (i = 0; i < MEM_CHUNKS_IN_TABLE_CACHE; i++) {
if (table_cache->chunks[i].offset
+ table_cache->chunks[i].rows_allocd
> table_cache->rows_used) {
break;
}
}
/* i == MEM_CHUNKS_IN_TABLE_CACHE means that all chunks
are full, but
table_cache->rows_used != table_cache->rows_allocd means
exactly the opposite - there are allocated but
empty/unused rows :-X */
ut_a(i < MEM_CHUNKS_IN_TABLE_CACHE);
chunk_start = (char*) table_cache->chunks[i].base;
offset = table_cache->rows_used
- table_cache->chunks[i].offset;
row = chunk_start + offset * table_cache->row_size;
}
table_cache->rows_used++;
return(row);
}
/*******************************************************************//**
Fills i_s_trx_row_t object.
If memory can not be allocated then FALSE is returned.
@return FALSE if allocation fails */
static
ibool
fill_trx_row(
/*=========*/
i_s_trx_row_t* row, /*!< out: result object
that's filled */
const trx_t* trx, /*!< in: transaction to
get data from */
const i_s_locks_row_t* requested_lock_row,/*!< in: pointer to the
corresponding row in
innodb_locks if trx is
waiting or NULL if trx
is not waiting */
trx_i_s_cache_t* cache) /*!< in/out: cache into
which to copy volatile
strings */
{
row->trx_id = trx_get_id(trx);
row->trx_started = (ib_time_t) trx->start_time;
row->trx_state = trx_get_que_state_str(trx);
if (trx->wait_lock != NULL) {
ut_a(requested_lock_row != NULL);
row->requested_lock_row = requested_lock_row;
row->trx_wait_started = (ib_time_t) trx->wait_started;
} else {
ut_a(requested_lock_row == NULL);
row->requested_lock_row = NULL;
row->trx_wait_started = 0;
}
row->trx_weight = (ullint) ut_conv_dulint_to_longlong(TRX_WEIGHT(trx));
if (trx->mysql_thd != NULL) {
row->trx_mysql_thread_id
= thd_get_thread_id(trx->mysql_thd);
} else {
/* For internal transactions e.g., purge and transactions
being recovered at startup there is no associated MySQL
thread data structure. */
row->trx_mysql_thread_id = 0;
}
if (trx->mysql_query_str != NULL && *trx->mysql_query_str != NULL) {
if (strlen(*trx->mysql_query_str)
> TRX_I_S_TRX_QUERY_MAX_LEN) {
char query[TRX_I_S_TRX_QUERY_MAX_LEN + 1];
memcpy(query, *trx->mysql_query_str,
TRX_I_S_TRX_QUERY_MAX_LEN);
query[TRX_I_S_TRX_QUERY_MAX_LEN] = '\0';
row->trx_query = ha_storage_put_memlim(
cache->storage, query,
TRX_I_S_TRX_QUERY_MAX_LEN + 1,
MAX_ALLOWED_FOR_STORAGE(cache));
} else {
row->trx_query = ha_storage_put_str_memlim(
cache->storage, *trx->mysql_query_str,
MAX_ALLOWED_FOR_STORAGE(cache));
}
if (row->trx_query == NULL) {
return(FALSE);
}
} else {
row->trx_query = NULL;
}
return(TRUE);
}
/*******************************************************************//**
Format the nth field of "rec" and put it in "buf". The result is always
NUL-terminated. Returns the number of bytes that were written to "buf"
(including the terminating NUL).
@return end of the result */
static
ulint
put_nth_field(
/*==========*/
char* buf, /*!< out: buffer */
ulint buf_size,/*!< in: buffer size in bytes */
ulint n, /*!< in: number of field */
const dict_index_t* index, /*!< in: index */
const rec_t* rec, /*!< in: record */
const ulint* offsets)/*!< in: record offsets, returned
by rec_get_offsets() */
{
const byte* data;
ulint data_len;
dict_field_t* dict_field;
ulint ret;
ut_ad(rec_offs_validate(rec, NULL, offsets));
if (buf_size == 0) {
return(0);
}
ret = 0;
if (n > 0) {
/* we must append ", " before the actual data */
if (buf_size < 3) {
buf[0] = '\0';
return(1);
}
memcpy(buf, ", ", 3);
buf += 2;
buf_size -= 2;
ret += 2;
}
/* now buf_size >= 1 */
data = rec_get_nth_field(rec, offsets, n, &data_len);
dict_field = dict_index_get_nth_field(index, n);
ret += row_raw_format((const char*) data, data_len,
dict_field, buf, buf_size);
return(ret);
}
/*******************************************************************//**
Fills the "lock_data" member of i_s_locks_row_t object.
If memory can not be allocated then FALSE is returned.
@return FALSE if allocation fails */
static
ibool
fill_lock_data(
/*===========*/
const char** lock_data,/*!< out: "lock_data" to fill */
const lock_t* lock, /*!< in: lock used to find the data */
ulint heap_no,/*!< in: rec num used to find the data */
trx_i_s_cache_t* cache) /*!< in/out: cache where to store
volatile data */
{
mtr_t mtr;
const buf_block_t* block;
const page_t* page;
const rec_t* rec;
ut_a(lock_get_type(lock) == LOCK_REC);
mtr_start(&mtr);
block = buf_page_try_get(lock_rec_get_space_id(lock),
lock_rec_get_page_no(lock),
&mtr);
if (block == NULL) {
*lock_data = NULL;
mtr_commit(&mtr);
return(TRUE);
}
page = (const page_t*) buf_block_get_frame(block);
rec = page_find_rec_with_heap_no(page, heap_no);
if (page_rec_is_infimum(rec)) {
*lock_data = ha_storage_put_str_memlim(
cache->storage, "infimum pseudo-record",
MAX_ALLOWED_FOR_STORAGE(cache));
} else if (page_rec_is_supremum(rec)) {
*lock_data = ha_storage_put_str_memlim(
cache->storage, "supremum pseudo-record",
MAX_ALLOWED_FOR_STORAGE(cache));
} else {
const dict_index_t* index;
ulint n_fields;
mem_heap_t* heap;
ulint offsets_onstack[REC_OFFS_NORMAL_SIZE];
ulint* offsets;
char buf[TRX_I_S_LOCK_DATA_MAX_LEN];
ulint buf_used;
ulint i;
rec_offs_init(offsets_onstack);
offsets = offsets_onstack;
index = lock_rec_get_index(lock);
n_fields = dict_index_get_n_unique(index);
ut_a(n_fields > 0);
heap = NULL;
offsets = rec_get_offsets(rec, index, offsets, n_fields,
&heap);
/* format and store the data */
buf_used = 0;
for (i = 0; i < n_fields; i++) {
buf_used += put_nth_field(
buf + buf_used, sizeof(buf) - buf_used,
i, index, rec, offsets) - 1;
}
*lock_data = (const char*) ha_storage_put_memlim(
cache->storage, buf, buf_used + 1,
MAX_ALLOWED_FOR_STORAGE(cache));
if (UNIV_UNLIKELY(heap != NULL)) {
/* this means that rec_get_offsets() has created a new
heap and has stored offsets in it; check that this is
really the case and free the heap */
ut_a(offsets != offsets_onstack);
mem_heap_free(heap);
}
}
mtr_commit(&mtr);
if (*lock_data == NULL) {
return(FALSE);
}
return(TRUE);
}
/*******************************************************************//**
Fills i_s_locks_row_t object. Returns its first argument.
If memory can not be allocated then FALSE is returned.
@return FALSE if allocation fails */
static
ibool
fill_locks_row(
/*===========*/
i_s_locks_row_t* row, /*!< out: result object that's filled */
const lock_t* lock, /*!< in: lock to get data from */
ulint heap_no,/*!< in: lock's record number
or ULINT_UNDEFINED if the lock
is a table lock */
trx_i_s_cache_t* cache) /*!< in/out: cache into which to copy
volatile strings */
{
row->lock_trx_id = lock_get_trx_id(lock);
row->lock_mode = lock_get_mode_str(lock);
row->lock_type = lock_get_type_str(lock);
row->lock_table = ha_storage_put_str_memlim(
cache->storage, lock_get_table_name(lock),
MAX_ALLOWED_FOR_STORAGE(cache));
/* memory could not be allocated */
if (row->lock_table == NULL) {
return(FALSE);
}
switch (lock_get_type(lock)) {
case LOCK_REC:
row->lock_index = ha_storage_put_str_memlim(
cache->storage, lock_rec_get_index_name(lock),
MAX_ALLOWED_FOR_STORAGE(cache));
/* memory could not be allocated */
if (row->lock_index == NULL) {
return(FALSE);
}
row->lock_space = lock_rec_get_space_id(lock);
row->lock_page = lock_rec_get_page_no(lock);
row->lock_rec = heap_no;
if (!fill_lock_data(&row->lock_data, lock, heap_no, cache)) {
/* memory could not be allocated */
return(FALSE);
}
break;
case LOCK_TABLE:
row->lock_index = NULL;
row->lock_space = ULINT_UNDEFINED;
row->lock_page = ULINT_UNDEFINED;
row->lock_rec = ULINT_UNDEFINED;
row->lock_data = NULL;
break;
default:
ut_error;
}
row->lock_table_id = lock_get_table_id(lock);
row->hash_chain.value = row;
return(TRUE);
}
/*******************************************************************//**
Fills i_s_lock_waits_row_t object. Returns its first argument.
@return result object that's filled */
static
i_s_lock_waits_row_t*
fill_lock_waits_row(
/*================*/
i_s_lock_waits_row_t* row, /*!< out: result object
that's filled */
const i_s_locks_row_t* requested_lock_row,/*!< in: pointer to the
relevant requested lock
row in innodb_locks */
const i_s_locks_row_t* blocking_lock_row)/*!< in: pointer to the
relevant blocking lock
row in innodb_locks */
{
row->requested_lock_row = requested_lock_row;
row->blocking_lock_row = blocking_lock_row;
return(row);
}
/*******************************************************************//**
Calculates a hash fold for a lock. For a record lock the fold is
calculated from 4 elements, which uniquely identify a lock at a given
point in time: transaction id, space id, page number, record number.
For a table lock the fold is table's id.
@return fold */
static
ulint
fold_lock(
/*======*/
const lock_t* lock, /*!< in: lock object to fold */
ulint heap_no)/*!< in: lock's record number
or ULINT_UNDEFINED if the lock
is a table lock */
{
#ifdef TEST_LOCK_FOLD_ALWAYS_DIFFERENT
static ulint fold = 0;
return(fold++);
#else
ulint ret;
switch (lock_get_type(lock)) {
case LOCK_REC:
ut_a(heap_no != ULINT_UNDEFINED);
ret = ut_fold_ulint_pair((ulint) lock_get_trx_id(lock),
lock_rec_get_space_id(lock));
ret = ut_fold_ulint_pair(ret,
lock_rec_get_page_no(lock));
ret = ut_fold_ulint_pair(ret, heap_no);
break;
case LOCK_TABLE:
/* this check is actually not necessary for continuing
correct operation, but something must have gone wrong if
it fails. */
ut_a(heap_no == ULINT_UNDEFINED);
ret = (ulint) lock_get_table_id(lock);
break;
default:
ut_error;
}
return(ret);
#endif
}
/*******************************************************************//**
Checks whether i_s_locks_row_t object represents a lock_t object.
@return TRUE if they match */
static
ibool
locks_row_eq_lock(
/*==============*/
const i_s_locks_row_t* row, /*!< in: innodb_locks row */
const lock_t* lock, /*!< in: lock object */
ulint heap_no)/*!< in: lock's record number
or ULINT_UNDEFINED if the lock
is a table lock */
{
#ifdef TEST_NO_LOCKS_ROW_IS_EVER_EQUAL_TO_LOCK_T
return(0);
#else
switch (lock_get_type(lock)) {
case LOCK_REC:
ut_a(heap_no != ULINT_UNDEFINED);
return(row->lock_trx_id == lock_get_trx_id(lock)
&& row->lock_space == lock_rec_get_space_id(lock)
&& row->lock_page == lock_rec_get_page_no(lock)
&& row->lock_rec == heap_no);
case LOCK_TABLE:
/* this check is actually not necessary for continuing
correct operation, but something must have gone wrong if
it fails. */
ut_a(heap_no == ULINT_UNDEFINED);
return(row->lock_trx_id == lock_get_trx_id(lock)
&& row->lock_table_id == lock_get_table_id(lock));
default:
ut_error;
return(FALSE);
}
#endif
}
/*******************************************************************//**
Searches for a row in the innodb_locks cache that has a specified id.
This happens in O(1) time since a hash table is used. Returns pointer to
the row or NULL if none is found.
@return row or NULL */
static
i_s_locks_row_t*
search_innodb_locks(
/*================*/
trx_i_s_cache_t* cache, /*!< in: cache */
const lock_t* lock, /*!< in: lock to search for */
ulint heap_no)/*!< in: lock's record number
or ULINT_UNDEFINED if the lock
is a table lock */
{
i_s_hash_chain_t* hash_chain;
HASH_SEARCH(
/* hash_chain->"next" */
next,
/* the hash table */
cache->locks_hash,
/* fold */
fold_lock(lock, heap_no),
/* the type of the next variable */
i_s_hash_chain_t*,
/* auxiliary variable */
hash_chain,
/* assertion on every traversed item */
,
/* this determines if we have found the lock */
locks_row_eq_lock(hash_chain->value, lock, heap_no));
if (hash_chain == NULL) {
return(NULL);
}
/* else */
return(hash_chain->value);
}
/*******************************************************************//**
Adds new element to the locks cache, enlarging it if necessary.
Returns a pointer to the added row. If the row is already present then
no row is added and a pointer to the existing row is returned.
If row can not be allocated then NULL is returned.
@return row */
static
i_s_locks_row_t*
add_lock_to_cache(
/*==============*/
trx_i_s_cache_t* cache, /*!< in/out: cache */
const lock_t* lock, /*!< in: the element to add */
ulint heap_no)/*!< in: lock's record number
or ULINT_UNDEFINED if the lock
is a table lock */
{
i_s_locks_row_t* dst_row;
#ifdef TEST_ADD_EACH_LOCKS_ROW_MANY_TIMES
ulint i;
for (i = 0; i < 10000; i++) {
#endif
#ifndef TEST_DO_NOT_CHECK_FOR_DUPLICATE_ROWS
/* quit if this lock is already present */
dst_row = search_innodb_locks(cache, lock, heap_no);
if (dst_row != NULL) {
return(dst_row);
}
#endif
dst_row = (i_s_locks_row_t*)
table_cache_create_empty_row(&cache->innodb_locks, cache);
/* memory could not be allocated */
if (dst_row == NULL) {
return(NULL);
}
if (!fill_locks_row(dst_row, lock, heap_no, cache)) {
/* memory could not be allocated */
cache->innodb_locks.rows_used--;
return(NULL);
}
#ifndef TEST_DO_NOT_INSERT_INTO_THE_HASH_TABLE
HASH_INSERT(
/* the type used in the hash chain */
i_s_hash_chain_t,
/* hash_chain->"next" */
next,
/* the hash table */
cache->locks_hash,
/* fold */
fold_lock(lock, heap_no),
/* add this data to the hash */
&dst_row->hash_chain);
#endif
#ifdef TEST_ADD_EACH_LOCKS_ROW_MANY_TIMES
} /* for()-loop */
#endif
return(dst_row);
}
/*******************************************************************//**
Adds new pair of locks to the lock waits cache.
If memory can not be allocated then FALSE is returned.
@return FALSE if allocation fails */
static
ibool
add_lock_wait_to_cache(
/*===================*/
trx_i_s_cache_t* cache, /*!< in/out: cache */
const i_s_locks_row_t* requested_lock_row,/*!< in: pointer to the
relevant requested lock
row in innodb_locks */
const i_s_locks_row_t* blocking_lock_row)/*!< in: pointer to the
relevant blocking lock
row in innodb_locks */
{
i_s_lock_waits_row_t* dst_row;
dst_row = (i_s_lock_waits_row_t*)
table_cache_create_empty_row(&cache->innodb_lock_waits,
cache);
/* memory could not be allocated */
if (dst_row == NULL) {
return(FALSE);
}
fill_lock_waits_row(dst_row, requested_lock_row, blocking_lock_row);
return(TRUE);
}
/*******************************************************************//**
Adds transaction's relevant (important) locks to cache.
If the transaction is waiting, then the wait lock is added to
innodb_locks and a pointer to the added row is returned in
requested_lock_row, otherwise requested_lock_row is set to NULL.
If rows can not be allocated then FALSE is returned and the value of
requested_lock_row is undefined.
@return FALSE if allocation fails */
static
ibool
add_trx_relevant_locks_to_cache(
/*============================*/
trx_i_s_cache_t* cache, /*!< in/out: cache */
const trx_t* trx, /*!< in: transaction */
i_s_locks_row_t** requested_lock_row)/*!< out: pointer to the
requested lock row, or NULL or
undefined */
{
ut_ad(mutex_own(&kernel_mutex));
/* If transaction is waiting we add the wait lock and all locks
from another transactions that are blocking the wait lock. */
if (trx->que_state == TRX_QUE_LOCK_WAIT) {
const lock_t* curr_lock;
ulint wait_lock_heap_no;
i_s_locks_row_t* blocking_lock_row;
lock_queue_iterator_t iter;
ut_a(trx->wait_lock != NULL);
wait_lock_heap_no
= wait_lock_get_heap_no(trx->wait_lock);
/* add the requested lock */
*requested_lock_row
= add_lock_to_cache(cache, trx->wait_lock,
wait_lock_heap_no);
/* memory could not be allocated */
if (*requested_lock_row == NULL) {
return(FALSE);
}
/* then iterate over the locks before the wait lock and
add the ones that are blocking it */
lock_queue_iterator_reset(&iter, trx->wait_lock,
ULINT_UNDEFINED);
curr_lock = lock_queue_iterator_get_prev(&iter);
while (curr_lock != NULL) {
if (lock_has_to_wait(trx->wait_lock,
curr_lock)) {
/* add the lock that is
blocking trx->wait_lock */
blocking_lock_row
= add_lock_to_cache(
cache, curr_lock,
/* heap_no is the same
for the wait and waited
locks */
wait_lock_heap_no);
/* memory could not be allocated */
if (blocking_lock_row == NULL) {
return(FALSE);
}
/* add the relation between both locks
to innodb_lock_waits */
if (!add_lock_wait_to_cache(
cache, *requested_lock_row,
blocking_lock_row)) {
/* memory could not be allocated */
return(FALSE);
}
}
curr_lock = lock_queue_iterator_get_prev(&iter);
}
} else {
*requested_lock_row = NULL;
}
return(TRUE);
}
/** The minimum time that a cache must not be updated after it has been
read for the last time; measured in microseconds. We use this technique
to ensure that SELECTs which join several INFORMATION SCHEMA tables read
the same version of the cache. */
#define CACHE_MIN_IDLE_TIME_US 100000 /* 0.1 sec */
/*******************************************************************//**
Checks if the cache can safely be updated.
@return TRUE if can be updated */
static
ibool
can_cache_be_updated(
/*=================*/
trx_i_s_cache_t* cache) /*!< in: cache */
{
ullint now;
/* Here we read cache->last_read without acquiring its mutex
because last_read is only updated when a shared rw lock on the
whole cache is being held (see trx_i_s_cache_end_read()) and
we are currently holding an exclusive rw lock on the cache.
So it is not possible for last_read to be updated while we are
reading it. */
#ifdef UNIV_SYNC_DEBUG
ut_a(rw_lock_own(&cache->rw_lock, RW_LOCK_EX));
#endif
now = ut_time_us(NULL);
if (now - cache->last_read > CACHE_MIN_IDLE_TIME_US) {
return(TRUE);
}
return(FALSE);
}
/*******************************************************************//**
Declare a cache empty, preparing it to be filled up. Not all resources
are freed because they can be reused. */
static
void
trx_i_s_cache_clear(
/*================*/
trx_i_s_cache_t* cache) /*!< out: cache to clear */
{
cache->innodb_trx.rows_used = 0;
cache->innodb_locks.rows_used = 0;
cache->innodb_lock_waits.rows_used = 0;
hash_table_clear(cache->locks_hash);
ha_storage_empty(&cache->storage);
}
/*******************************************************************//**
Fetches the data needed to fill the 3 INFORMATION SCHEMA tables into the
table cache buffer. Cache must be locked for write. */
static
void
fetch_data_into_cache(
/*==================*/
trx_i_s_cache_t* cache) /*!< in/out: cache */
{
trx_t* trx;
i_s_trx_row_t* trx_row;
i_s_locks_row_t* requested_lock_row;
ut_ad(mutex_own(&kernel_mutex));
trx_i_s_cache_clear(cache);
/* We iterate over the list of all transactions and add each one
to innodb_trx's cache. We also add all locks that are relevant
to each transaction into innodb_locks' and innodb_lock_waits'
caches. */
for (trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
trx != NULL;
trx = UT_LIST_GET_NEXT(trx_list, trx)) {
if (!add_trx_relevant_locks_to_cache(cache, trx,
&requested_lock_row)) {
cache->is_truncated = TRUE;
return;
}
trx_row = (i_s_trx_row_t*)
table_cache_create_empty_row(&cache->innodb_trx,
cache);
/* memory could not be allocated */
if (trx_row == NULL) {
cache->is_truncated = TRUE;
return;
}
if (!fill_trx_row(trx_row, trx, requested_lock_row, cache)) {
/* memory could not be allocated */
cache->innodb_trx.rows_used--;
cache->is_truncated = TRUE;
return;
}
}
cache->is_truncated = FALSE;
}
/*******************************************************************//**
Update the transactions cache if it has not been read for some time.
Called from handler/i_s.cc.
@return 0 - fetched, 1 - not */
UNIV_INTERN
int
trx_i_s_possibly_fetch_data_into_cache(
/*===================================*/
trx_i_s_cache_t* cache) /*!< in/out: cache */
{
if (!can_cache_be_updated(cache)) {
return(1);
}
/* We are going to access trx->query in all transactions */
innobase_mysql_prepare_print_arbitrary_thd();
/* We need to read trx_sys and record/table lock queues */
mutex_enter(&kernel_mutex);
fetch_data_into_cache(cache);
mutex_exit(&kernel_mutex);
innobase_mysql_end_print_arbitrary_thd();
return(0);
}
/*******************************************************************//**
Returns TRUE if the data in the cache is truncated due to the memory
limit posed by TRX_I_S_MEM_LIMIT.
@return TRUE if truncated */
UNIV_INTERN
ibool
trx_i_s_cache_is_truncated(
/*=======================*/
trx_i_s_cache_t* cache) /*!< in: cache */
{
return(cache->is_truncated);
}
/*******************************************************************//**
Initialize INFORMATION SCHEMA trx related cache. */
UNIV_INTERN
void
trx_i_s_cache_init(
/*===============*/
trx_i_s_cache_t* cache) /*!< out: cache to init */
{
/* The latching is done in the following order:
acquire trx_i_s_cache_t::rw_lock, X
acquire kernel_mutex
release kernel_mutex
release trx_i_s_cache_t::rw_lock
acquire trx_i_s_cache_t::rw_lock, S
acquire trx_i_s_cache_t::last_read_mutex
release trx_i_s_cache_t::last_read_mutex
release trx_i_s_cache_t::rw_lock */
rw_lock_create(&cache->rw_lock, SYNC_TRX_I_S_RWLOCK);
cache->last_read = 0;
mutex_create(&cache->last_read_mutex, SYNC_TRX_I_S_LAST_READ);
table_cache_init(&cache->innodb_trx, sizeof(i_s_trx_row_t));
table_cache_init(&cache->innodb_locks, sizeof(i_s_locks_row_t));
table_cache_init(&cache->innodb_lock_waits,
sizeof(i_s_lock_waits_row_t));
cache->locks_hash = hash_create(LOCKS_HASH_CELLS_NUM);
cache->storage = ha_storage_create(CACHE_STORAGE_INITIAL_SIZE,
CACHE_STORAGE_HASH_CELLS);
cache->mem_allocd = 0;
cache->is_truncated = FALSE;
}
/*******************************************************************//**
Free the INFORMATION SCHEMA trx related cache. */
UNIV_INTERN
void
trx_i_s_cache_free(
/*===============*/
trx_i_s_cache_t* cache) /*!< in, own: cache to free */
{
hash_table_free(cache->locks_hash);
ha_storage_free(cache->storage);
table_cache_free(&cache->innodb_trx);
table_cache_free(&cache->innodb_locks);
table_cache_free(&cache->innodb_lock_waits);
memset(cache, 0, sizeof *cache);
}
/*******************************************************************//**
Issue a shared/read lock on the tables cache. */
UNIV_INTERN
void
trx_i_s_cache_start_read(
/*=====================*/
trx_i_s_cache_t* cache) /*!< in: cache */
{
rw_lock_s_lock(&cache->rw_lock);
}
/*******************************************************************//**
Release a shared/read lock on the tables cache. */
UNIV_INTERN
void
trx_i_s_cache_end_read(
/*===================*/
trx_i_s_cache_t* cache) /*!< in: cache */
{
ullint now;
#ifdef UNIV_SYNC_DEBUG
ut_a(rw_lock_own(&cache->rw_lock, RW_LOCK_SHARED));
#endif
/* update cache last read time */
now = ut_time_us(NULL);
mutex_enter(&cache->last_read_mutex);
cache->last_read = now;
mutex_exit(&cache->last_read_mutex);
rw_lock_s_unlock(&cache->rw_lock);
}
/*******************************************************************//**
Issue an exclusive/write lock on the tables cache. */
UNIV_INTERN
void
trx_i_s_cache_start_write(
/*======================*/
trx_i_s_cache_t* cache) /*!< in: cache */
{
rw_lock_x_lock(&cache->rw_lock);
}
/*******************************************************************//**
Release an exclusive/write lock on the tables cache. */
UNIV_INTERN
void
trx_i_s_cache_end_write(
/*====================*/
trx_i_s_cache_t* cache) /*!< in: cache */
{
#ifdef UNIV_SYNC_DEBUG
ut_a(rw_lock_own(&cache->rw_lock, RW_LOCK_EX));
#endif
rw_lock_x_unlock(&cache->rw_lock);
}
/*******************************************************************//**
Selects a INFORMATION SCHEMA table cache from the whole cache.
@return table cache */
static
i_s_table_cache_t*
cache_select_table(
/*===============*/
trx_i_s_cache_t* cache, /*!< in: whole cache */
enum i_s_table table) /*!< in: which table */
{
i_s_table_cache_t* table_cache;
#ifdef UNIV_SYNC_DEBUG
ut_a(rw_lock_own(&cache->rw_lock, RW_LOCK_SHARED)
|| rw_lock_own(&cache->rw_lock, RW_LOCK_EX));
#endif
switch (table) {
case I_S_INNODB_TRX:
table_cache = &cache->innodb_trx;
break;
case I_S_INNODB_LOCKS:
table_cache = &cache->innodb_locks;
break;
case I_S_INNODB_LOCK_WAITS:
table_cache = &cache->innodb_lock_waits;
break;
default:
ut_error;
}
return(table_cache);
}
/*******************************************************************//**
Retrieves the number of used rows in the cache for a given
INFORMATION SCHEMA table.
@return number of rows */
UNIV_INTERN
ulint
trx_i_s_cache_get_rows_used(
/*========================*/
trx_i_s_cache_t* cache, /*!< in: cache */
enum i_s_table table) /*!< in: which table */
{
i_s_table_cache_t* table_cache;
table_cache = cache_select_table(cache, table);
return(table_cache->rows_used);
}
/*******************************************************************//**
Retrieves the nth row (zero-based) in the cache for a given
INFORMATION SCHEMA table.
@return row */
UNIV_INTERN
void*
trx_i_s_cache_get_nth_row(
/*======================*/
trx_i_s_cache_t* cache, /*!< in: cache */
enum i_s_table table, /*!< in: which table */
ulint n) /*!< in: row number */
{
i_s_table_cache_t* table_cache;
ulint i;
void* row;
table_cache = cache_select_table(cache, table);
ut_a(n < table_cache->rows_used);
row = NULL;
for (i = 0; i < MEM_CHUNKS_IN_TABLE_CACHE; i++) {
if (table_cache->chunks[i].offset
+ table_cache->chunks[i].rows_allocd > n) {
row = (char*) table_cache->chunks[i].base
+ (n - table_cache->chunks[i].offset)
* table_cache->row_size;
break;
}
}
ut_a(row != NULL);
return(row);
}
/*******************************************************************//**
Crafts a lock id string from a i_s_locks_row_t object. Returns its
second argument. This function aborts if there is not enough space in
lock_id. Be sure to provide at least TRX_I_S_LOCK_ID_MAX_LEN + 1 if you
want to be 100% sure that it will not abort.
@return resulting lock id */
UNIV_INTERN
char*
trx_i_s_create_lock_id(
/*===================*/
const i_s_locks_row_t* row, /*!< in: innodb_locks row */
char* lock_id,/*!< out: resulting lock_id */
ulint lock_id_size)/*!< in: size of the lock id
buffer */
{
int res_len;
/* please adjust TRX_I_S_LOCK_ID_MAX_LEN if you change this */
if (row->lock_space != ULINT_UNDEFINED) {
/* record lock */
res_len = ut_snprintf(lock_id, lock_id_size,
TRX_ID_FMT ":%lu:%lu:%lu",
row->lock_trx_id, row->lock_space,
row->lock_page, row->lock_rec);
} else {
/* table lock */
res_len = ut_snprintf(lock_id, lock_id_size,
TRX_ID_FMT ":%llu",
row->lock_trx_id,
row->lock_table_id);
}
/* the typecast is safe because snprintf(3) never returns
negative result */
ut_a(res_len >= 0);
ut_a((ulint) res_len < lock_id_size);
return(lock_id);
}