mirror of
				https://github.com/MariaDB/server.git
				synced 2025-10-26 08:28:13 +01:00 
			
		
		
		
	 77bebe9eb0
			
		
	
	
	77bebe9eb0
	
	
	
		
			
			During regular iteration the page cleaner does flush from flush list with some flush target and then goes for generating free pages from LRU tail. When asynchronous flush is triggered i.e. when 7/8 th of the LSN margin is filled in the redo log, the flush target for flush list is set to innodb_io_capacity_max. If it could flush all, the flush bandwidth for LRU flush is currently set to zero. If the LRU tail has dirty pages, page cleaner ends up freeing no pages in one iteration. The scenario could repeat across multiple iterations till async flush target is reached. During this time the DB system is starved of free pages resulting in apparent stall and in some cases dict_sys latch fatal error. Fix: In page cleaner iteration, before LRU flush, ensure we provide enough flush limit so that freeing pages is no blocked by dirty pages in LRU tail. Log IO and flush state if double write flush wait is long. Reviewed by: Marko Mäkelä
		
			
				
	
	
		
			168 lines
		
	
	
	
		
			6 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			168 lines
		
	
	
	
		
			6 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /*****************************************************************************
 | |
| 
 | |
| Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
 | |
| Copyright (c) 2017, 2022, MariaDB Corporation.
 | |
| 
 | |
| This program is free software; you can redistribute it and/or modify it under
 | |
| the terms of the GNU General Public License as published by the Free Software
 | |
| Foundation; version 2 of the License.
 | |
| 
 | |
| This program is distributed in the hope that it will be useful, but WITHOUT
 | |
| ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 | |
| FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 | |
| 
 | |
| You should have received a copy of the GNU General Public License along with
 | |
| this program; if not, write to the Free Software Foundation, Inc.,
 | |
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
 | |
| 
 | |
| *****************************************************************************/
 | |
| 
 | |
| /**************************************************//**
 | |
| @file include/buf0dblwr.h
 | |
| Doublewrite buffer module
 | |
| 
 | |
| Created 2011/12/19 Inaam Rana
 | |
| *******************************************************/
 | |
| 
 | |
| #pragma once
 | |
| 
 | |
| #include "os0file.h"
 | |
| #include "buf0types.h"
 | |
| 
 | |
| /** Doublewrite control struct */
 | |
| class buf_dblwr_t
 | |
| {
 | |
|   struct element
 | |
|   {
 | |
|     /** asynchronous write request */
 | |
|     IORequest request;
 | |
|     /** payload size in bytes */
 | |
|     size_t size;
 | |
|   };
 | |
| 
 | |
|   struct slot
 | |
|   {
 | |
|     /** first free position in write_buf measured in units of
 | |
|      * srv_page_size */
 | |
|     ulint first_free;
 | |
|     /** number of slots reserved for the current write batch */
 | |
|     ulint reserved;
 | |
|     /** the doublewrite buffer, aligned to srv_page_size */
 | |
|     byte* write_buf;
 | |
|     /** buffer blocks to be written via write_buf */
 | |
|     element* buf_block_arr;
 | |
|   };
 | |
| 
 | |
|   /** the page number of the first doublewrite block (block_size() pages) */
 | |
|   page_id_t block1{0, 0};
 | |
|   /** the page number of the second doublewrite block (block_size() pages) */
 | |
|   page_id_t block2{0, 0};
 | |
| 
 | |
|   /** mutex protecting the data members below */
 | |
|   mysql_mutex_t mutex;
 | |
|   /** condition variable for !batch_running */
 | |
|   pthread_cond_t cond;
 | |
|   /** whether a batch is being written from the doublewrite buffer */
 | |
|   bool batch_running;
 | |
|   /** number of expected flush_buffered_writes_completed() calls */
 | |
|   unsigned flushing_buffered_writes;
 | |
|   /** number of flush_buffered_writes_completed() calls */
 | |
|   ulint writes_completed;
 | |
|   /** number of pages written by flush_buffered_writes_completed() */
 | |
|   ulint pages_written;
 | |
| 
 | |
|   slot slots[2];
 | |
|   slot *active_slot;
 | |
| 
 | |
|   /** Initialise the persistent storage of the doublewrite buffer.
 | |
|   @param header   doublewrite page header in the TRX_SYS page */
 | |
|   inline void init(const byte *header) noexcept;
 | |
| 
 | |
|   /** Flush possible buffered writes to persistent storage. */
 | |
|   bool flush_buffered_writes(const ulint size) noexcept;
 | |
| 
 | |
| public:
 | |
|   /** Initialise the doublewrite buffer data structures. */
 | |
|   void init() noexcept;
 | |
|   /** Create or restore the doublewrite buffer in the TRX_SYS page.
 | |
|   @return whether the operation succeeded */
 | |
|   bool create() noexcept;
 | |
|   /** Free the doublewrite buffer. */
 | |
|   void close() noexcept;
 | |
| 
 | |
|   /** Acquire the mutex */
 | |
|   void lock() noexcept { mysql_mutex_lock(&mutex); }
 | |
|   /** @return the number of completed batches */
 | |
|   ulint batches() const noexcept
 | |
|   { mysql_mutex_assert_owner(&mutex); return writes_completed; }
 | |
|   /** @return the number of final pages written */
 | |
|   ulint written() const noexcept
 | |
|   { mysql_mutex_assert_owner(&mutex); return pages_written; }
 | |
|   /** Release the mutex */
 | |
|   void unlock() noexcept { mysql_mutex_unlock(&mutex); }
 | |
| 
 | |
|   /** Initialize the doublewrite buffer memory structure on recovery.
 | |
|   If we are upgrading from a version before MySQL 4.1, then this
 | |
|   function performs the necessary update operations to support
 | |
|   innodb_file_per_table. If we are in a crash recovery, this function
 | |
|   loads the pages from double write buffer which are not older than
 | |
|   the checkpoint into memory.
 | |
|   @param file File handle
 | |
|   @param path Path name of file
 | |
|   @return DB_SUCCESS or error code */
 | |
|   dberr_t init_or_load_pages(pfs_os_file_t file, const char *path) noexcept;
 | |
| 
 | |
|   /** Process and remove the double write buffer pages for all tablespaces. */
 | |
|   void recover() noexcept;
 | |
| 
 | |
|   /** Update the doublewrite buffer on data page write completion. */
 | |
|   void write_completed() noexcept;
 | |
|   /** Flush possible buffered writes to persistent storage.
 | |
|   It is very important to call this function after a batch of writes has been
 | |
|   posted, and also when we may have to wait for a page latch!
 | |
|   Otherwise a deadlock of threads can occur. */
 | |
|   void flush_buffered_writes() noexcept;
 | |
|   /** Update the doublewrite buffer on write batch completion
 | |
|   @param request  the completed batch write request */
 | |
|   void flush_buffered_writes_completed(const IORequest &request) noexcept;
 | |
| 
 | |
|   /** Size of the doublewrite block in pages */
 | |
|   uint32_t block_size() const noexcept { return FSP_EXTENT_SIZE; }
 | |
| 
 | |
|   /** Schedule a page write. If the doublewrite memory buffer is full,
 | |
|   flush_buffered_writes() will be invoked to make space.
 | |
|   @param request    asynchronous write request
 | |
|   @param size       payload size in bytes */
 | |
|   void add_to_batch(const IORequest &request, size_t size) noexcept;
 | |
| 
 | |
|   /** Determine whether the doublewrite buffer has been created */
 | |
|   bool is_created() const noexcept
 | |
|   { return UNIV_LIKELY(block1 != page_id_t(0, 0)); }
 | |
| 
 | |
|   /** @return whether a page identifier is part of the doublewrite buffer */
 | |
|   bool is_inside(const page_id_t id) const noexcept
 | |
|   {
 | |
|     if (!is_created())
 | |
|       return false;
 | |
|     ut_ad(block1 < block2);
 | |
|     if (id < block1)
 | |
|       return false;
 | |
|     const uint32_t size= block_size();
 | |
|     return id < block1 + size || (id >= block2 && id < block2 + size);
 | |
|   }
 | |
| 
 | |
|   /** Wait for flush_buffered_writes() to be fully completed */
 | |
|   void wait_flush_buffered_writes() noexcept
 | |
|   {
 | |
|     mysql_mutex_lock(&mutex);
 | |
|     while (batch_running)
 | |
|       my_cond_wait(&cond, &mutex.m_mutex);
 | |
|     mysql_mutex_unlock(&mutex);
 | |
|   }
 | |
| 
 | |
|   /** Print double write state information. */
 | |
|   ATTRIBUTE_COLD void print_info() const noexcept;
 | |
| };
 | |
| 
 | |
| /** The doublewrite buffer */
 | |
| extern buf_dblwr_t buf_dblwr;
 |