mirror of
				https://github.com/MariaDB/server.git
				synced 2025-10-31 19:06:14 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			265 lines
		
	
	
	
		
			10 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			265 lines
		
	
	
	
		
			10 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /*****************************************************************************
 | |
| 
 | |
| Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
 | |
| Copyright (c) 2018, 2022, MariaDB Corporation.
 | |
| 
 | |
| This program is free software; you can redistribute it and/or modify it under
 | |
| the terms of the GNU General Public License as published by the Free Software
 | |
| Foundation; version 2 of the License.
 | |
| 
 | |
| This program is distributed in the hope that it will be useful, but WITHOUT
 | |
| ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 | |
| FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 | |
| 
 | |
| You should have received a copy of the GNU General Public License along with
 | |
| this program; if not, write to the Free Software Foundation, Inc.,
 | |
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
 | |
| 
 | |
| *****************************************************************************/
 | |
| 
 | |
| /**************************************************//**
 | |
| @file read/read0read.cc
 | |
| Cursor read
 | |
| 
 | |
| Created 2/16/1997 Heikki Tuuri
 | |
| *******************************************************/
 | |
| 
 | |
| #include "read0types.h"
 | |
| 
 | |
| #include "srv0srv.h"
 | |
| #include "trx0sys.h"
 | |
| #include "trx0purge.h"
 | |
| 
 | |
| /*
 | |
| -------------------------------------------------------------------------------
 | |
| FACT A: Cursor read view on a secondary index sees only committed versions
 | |
| -------
 | |
| of the records in the secondary index or those versions of rows created
 | |
| by transaction which created a cursor before cursor was created even
 | |
| if transaction which created the cursor has changed that clustered index page.
 | |
| 
 | |
| PROOF: We must show that read goes always to the clustered index record
 | |
| to see that record is visible in the cursor read view. Consider e.g.
 | |
| following table and SQL-clauses:
 | |
| 
 | |
| create table t1(a int not null, b int, primary key(a), index(b));
 | |
| insert into t1 values (1,1),(2,2);
 | |
| commit;
 | |
| 
 | |
| Now consider that we have a cursor for a query
 | |
| 
 | |
| select b from t1 where b >= 1;
 | |
| 
 | |
| This query will use secondary key on the table t1. Now after the first fetch
 | |
| on this cursor if we do a update:
 | |
| 
 | |
| update t1 set b = 5 where b = 2;
 | |
| 
 | |
| Now second fetch of the cursor should not see record (2,5) instead it should
 | |
| see record (2,2).
 | |
| 
 | |
| We also should show that if we have delete t1 where b = 5; we still
 | |
| can see record (2,2).
 | |
| 
 | |
| When we access a secondary key record maximum transaction id is fetched
 | |
| from this record and this trx_id is compared to up_limit_id in the view.
 | |
| If trx_id in the record is greater or equal than up_limit_id in the view
 | |
| cluster record is accessed.  Because trx_id of the creating
 | |
| transaction is stored when this view was created to the list of
 | |
| trx_ids not seen by this read view previous version of the
 | |
| record is requested to be built. This is build using clustered record.
 | |
| If the secondary key record is delete-marked, its corresponding
 | |
| clustered record can be already be purged only if records
 | |
| trx_id < low_limit_no. Purge can't remove any record deleted by a
 | |
| transaction which was active when cursor was created. But, we still
 | |
| may have a deleted secondary key record but no clustered record. But,
 | |
| this is not a problem because this case is handled in
 | |
| row_sel_get_clust_rec() function which is called
 | |
| whenever we note that this read view does not see trx_id in the
 | |
| record. Thus, we see correct version. Q. E. D.
 | |
| 
 | |
| -------------------------------------------------------------------------------
 | |
| FACT B: Cursor read view on a clustered index sees only committed versions
 | |
| -------
 | |
| of the records in the clustered index or those versions of rows created
 | |
| by transaction which created a cursor before cursor was created even
 | |
| if transaction which created the cursor has changed that clustered index page.
 | |
| 
 | |
| PROOF:  Consider e.g.following table and SQL-clauses:
 | |
| 
 | |
| create table t1(a int not null, b int, primary key(a));
 | |
| insert into t1 values (1),(2);
 | |
| commit;
 | |
| 
 | |
| Now consider that we have a cursor for a query
 | |
| 
 | |
| select a from t1 where a >= 1;
 | |
| 
 | |
| This query will use clustered key on the table t1. Now after the first fetch
 | |
| on this cursor if we do a update:
 | |
| 
 | |
| update t1 set a = 5 where a = 2;
 | |
| 
 | |
| Now second fetch of the cursor should not see record (5) instead it should
 | |
| see record (2).
 | |
| 
 | |
| We also should show that if we have execute delete t1 where a = 5; after
 | |
| the cursor is opened we still can see record (2).
 | |
| 
 | |
| When accessing clustered record we always check if this read view sees
 | |
| trx_id stored to clustered record. By default we don't see any changes
 | |
| if record trx_id >= low_limit_id i.e. change was made transaction
 | |
| which started after transaction which created the cursor. If row
 | |
| was changed by the future transaction a previous version of the
 | |
| clustered record is created. Thus we see only committed version in
 | |
| this case. We see all changes made by committed transactions i.e.
 | |
| record trx_id < up_limit_id. In this case we don't need to do anything,
 | |
| we already see correct version of the record. We don't see any changes
 | |
| made by active transaction except creating transaction. We have stored
 | |
| trx_id of creating transaction to list of trx_ids when this view was
 | |
| created. Thus we can easily see if this record was changed by the
 | |
| creating transaction. Because we already have clustered record we can
 | |
| access roll_ptr. Using this roll_ptr we can fetch undo record.
 | |
| We can now check that undo_no of the undo record is less than undo_no of the
 | |
| transaction which created a view when cursor was created. We see this
 | |
| clustered record only in case when record undo_no is less than undo_no
 | |
| in the view. If this is not true we build based on undo_rec previous
 | |
| version of the record. This record is found because purge can't remove
 | |
| records accessed by active transaction. Thus we see correct version. Q. E. D.
 | |
| -------------------------------------------------------------------------------
 | |
| FACT C: Purge does not remove any delete-marked row that is visible
 | |
| -------
 | |
| in any cursor read view.
 | |
| 
 | |
| PROOF: We know that:
 | |
|  1: Currently active read views in trx_sys_t::view_list are ordered by
 | |
|     ReadView::low_limit_no in descending order, that is,
 | |
|     newest read view first.
 | |
| 
 | |
|  2: Purge clones the oldest read view and uses that to determine whether there
 | |
|     are any active transactions that can see the to be purged records.
 | |
| 
 | |
| Therefore any joining or active transaction will not have a view older
 | |
| than the purge view, according to 1.
 | |
| 
 | |
| When purge needs to remove a delete-marked row from a secondary index,
 | |
| it will first check that the DB_TRX_ID value of the corresponding
 | |
| record in the clustered index is older than the purge view. It will
 | |
| also check if there is a newer version of the row (clustered index
 | |
| record) that is not delete-marked in the secondary index. If such a
 | |
| row exists and is collation-equal to the delete-marked secondary index
 | |
| record then purge will not remove the secondary index record.
 | |
| 
 | |
| Delete-marked clustered index records will be removed by
 | |
| row_purge_remove_clust_if_poss(), unless the clustered index record
 | |
| (and its DB_ROLL_PTR) has been updated. Every new version of the
 | |
| clustered index record will update DB_ROLL_PTR, pointing to a new UNDO
 | |
| log entry that allows the old version to be reconstructed. The
 | |
| DB_ROLL_PTR in the oldest remaining version in the old-version chain
 | |
| may be pointing to garbage (an undo log record discarded by purge),
 | |
| but it will never be dereferenced, because the purge view is older
 | |
| than any active transaction.
 | |
| 
 | |
| For details see: row_undo_mod_sec_is_unsafe() and row_purge_poss_sec()
 | |
| */
 | |
| 
 | |
| 
 | |
| /**
 | |
|   Creates a snapshot where exactly the transactions serialized before this
 | |
|   point in time are seen in the view.
 | |
| 
 | |
|   @param[in,out] trx transaction
 | |
| */
 | |
| inline void ReadViewBase::snapshot(trx_t *trx)
 | |
| {
 | |
|   trx_sys.snapshot_ids(trx, &m_ids, &m_low_limit_id, &m_low_limit_no);
 | |
|   if (m_ids.empty())
 | |
|   {
 | |
|     m_up_limit_id= m_low_limit_id;
 | |
|     return;
 | |
|   }
 | |
| 
 | |
|   std::sort(m_ids.begin(), m_ids.end());
 | |
|   m_up_limit_id= m_ids.front();
 | |
|   ut_ad(m_up_limit_id <= m_low_limit_id);
 | |
| 
 | |
|   if (m_low_limit_no == m_low_limit_id &&
 | |
|       m_low_limit_id == m_up_limit_id + m_ids.size())
 | |
|   {
 | |
|     m_ids.clear();
 | |
|     m_low_limit_id= m_low_limit_no= m_up_limit_id;
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| /**
 | |
|   Opens a read view where exactly the transactions serialized before this
 | |
|   point in time are seen in the view.
 | |
| 
 | |
|   View becomes visible to purge thread.
 | |
| 
 | |
|   @param[in,out] trx transaction
 | |
| 
 | |
|   Reuses closed view if there were no read-write transactions since (and at)
 | |
|   its creation time.
 | |
| 
 | |
|   Original comment states: there is an inherent race here between purge
 | |
|   and this thread.
 | |
| 
 | |
|   To avoid this race we should've checked trx_sys.get_max_trx_id() and
 | |
|   set m_open atomically under ReadView::m_mutex protection. But we're cutting
 | |
|   edges to achieve greater performance.
 | |
| 
 | |
|   There're at least two types of concurrent threads interested in this
 | |
|   value: purge coordinator thread (see trx_sys_t::clone_oldest_view()) and
 | |
|   InnoDB monitor thread (see lock_trx_print_wait_and_mvcc_state()).
 | |
| 
 | |
|   What bad things can happen because we allow this race?
 | |
| 
 | |
|   Speculative execution may reorder state change before get_max_trx_id().
 | |
|   In this case purge thread has short gap to clone outdated view. Which is
 | |
|   probably not that bad: it just won't be able to purge things that it was
 | |
|   actually allowed to purge for a short while.
 | |
| 
 | |
|   This thread may as well get suspended after trx_sys.get_max_trx_id() and
 | |
|   before m_open is set. New read-write transaction may get started, committed
 | |
|   and purged meanwhile. It is acceptable as well, since this view doesn't see
 | |
|   it.
 | |
| */
 | |
| void ReadView::open(trx_t *trx)
 | |
| {
 | |
|   ut_ad(this == &trx->read_view);
 | |
|   if (is_open())
 | |
|     ut_ad(!srv_read_only_mode);
 | |
|   else if (likely(!srv_read_only_mode))
 | |
|   {
 | |
|     m_creator_trx_id= trx->id;
 | |
|     if (trx->is_autocommit_non_locking() && empty() &&
 | |
|         low_limit_id() == trx_sys.get_max_trx_id())
 | |
|       m_open.store(true, std::memory_order_relaxed);
 | |
|     else
 | |
|     {
 | |
|       m_mutex.wr_lock();
 | |
|       snapshot(trx);
 | |
|       m_open.store(true, std::memory_order_relaxed);
 | |
|       m_mutex.wr_unlock();
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| /**
 | |
|   Clones the oldest view and stores it in view.
 | |
| 
 | |
|   No need to call ReadView::close(). The caller owns the view that is passed
 | |
|   in. This function is called by purge thread to determine whether it should
 | |
|   purge the delete marked record or not.
 | |
| */
 | |
| void trx_sys_t::clone_oldest_view(ReadViewBase *view) const
 | |
| {
 | |
|   view->snapshot(nullptr);
 | |
|   /* Find oldest view. */
 | |
|   trx_list.for_each([view](const trx_t &trx) {
 | |
|                       trx.read_view.append_to(view);
 | |
| 		    });
 | |
| }
 | 
