mirror of
				https://github.com/MariaDB/server.git
				synced 2025-10-31 02:46:29 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			4654 lines
		
	
	
	
		
			137 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			4654 lines
		
	
	
	
		
			137 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /*****************************************************************************
 | |
| 
 | |
| Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved.
 | |
| Copyright (c) 2012, Facebook Inc.
 | |
| Copyright (c) 2014, 2023, MariaDB Corporation.
 | |
| 
 | |
| This program is free software; you can redistribute it and/or modify it under
 | |
| the terms of the GNU General Public License as published by the Free Software
 | |
| Foundation; version 2 of the License.
 | |
| 
 | |
| This program is distributed in the hope that it will be useful, but WITHOUT
 | |
| ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 | |
| FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 | |
| 
 | |
| You should have received a copy of the GNU General Public License along with
 | |
| this program; if not, write to the Free Software Foundation, Inc.,
 | |
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
 | |
| 
 | |
| *****************************************************************************/
 | |
| 
 | |
| /**************************************************//**
 | |
| @file page/page0zip.cc
 | |
| Compressed page interface
 | |
| 
 | |
| Created June 2005 by Marko Makela
 | |
| *******************************************************/
 | |
| 
 | |
| #include "page0zip.h"
 | |
| #include "fsp0types.h"
 | |
| #include "page0page.h"
 | |
| #include "buf0checksum.h"
 | |
| #include "zlib.h"
 | |
| #include "span.h"
 | |
| 
 | |
| using st_::span;
 | |
| 
 | |
| #ifndef UNIV_INNOCHECKSUM
 | |
| #include "mtr0log.h"
 | |
| #include "dict0dict.h"
 | |
| #include "btr0cur.h"
 | |
| #include "log0recv.h"
 | |
| #include "row0row.h"
 | |
| #include "btr0sea.h"
 | |
| #include "dict0boot.h"
 | |
| #include "lock0lock.h"
 | |
| #include "srv0srv.h"
 | |
| #include "buf0lru.h"
 | |
| #include "srv0mon.h"
 | |
| 
 | |
| #include <map>
 | |
| #include <algorithm>
 | |
| 
 | |
| /** Statistics on compression, indexed by page_zip_des_t::ssize - 1 */
 | |
| page_zip_stat_t		page_zip_stat[PAGE_ZIP_SSIZE_MAX];
 | |
| /** Statistics on compression, indexed by index->id */
 | |
| page_zip_stat_per_index_t	page_zip_stat_per_index;
 | |
| 
 | |
| /** Compression level to be used by zlib. Settable by user. */
 | |
| uint	page_zip_level;
 | |
| 
 | |
| /* Please refer to ../include/page0zip.ic for a description of the
 | |
| compressed page format. */
 | |
| 
 | |
| /* The infimum and supremum records are omitted from the compressed page.
 | |
| On compress, we compare that the records are there, and on uncompress we
 | |
| restore the records. */
 | |
| /** Extra bytes of an infimum record */
 | |
| static const byte infimum_extra[] = {
 | |
| 	0x01,			/* info_bits=0, n_owned=1 */
 | |
| 	0x00, 0x02		/* heap_no=0, status=2 */
 | |
| 	/* ?, ?	*/		/* next=(first user rec, or supremum) */
 | |
| };
 | |
| /** Data bytes of an infimum record */
 | |
| static const byte infimum_data[] = {
 | |
| 	0x69, 0x6e, 0x66, 0x69,
 | |
| 	0x6d, 0x75, 0x6d, 0x00	/* "infimum\0" */
 | |
| };
 | |
| /** Extra bytes and data bytes of a supremum record */
 | |
| static const byte supremum_extra_data alignas(4) [] = {
 | |
| 	/* 0x0?, */		/* info_bits=0, n_owned=1..8 */
 | |
| 	0x00, 0x0b,		/* heap_no=1, status=3 */
 | |
| 	0x00, 0x00,		/* next=0 */
 | |
| 	0x73, 0x75, 0x70, 0x72,
 | |
| 	0x65, 0x6d, 0x75, 0x6d	/* "supremum" */
 | |
| };
 | |
| 
 | |
| /** Assert that a block of memory is filled with zero bytes.
 | |
| @param b in: memory block
 | |
| @param s in: size of the memory block, in bytes */
 | |
| #define ASSERT_ZERO(b, s) ut_ad(!memcmp(b, field_ref_zero, s))
 | |
| /** Assert that a BLOB pointer is filled with zero bytes.
 | |
| @param b in: BLOB pointer */
 | |
| #define ASSERT_ZERO_BLOB(b) ASSERT_ZERO(b, FIELD_REF_SIZE)
 | |
| 
 | |
| /* Enable some extra debugging output.  This code can be enabled
 | |
| independently of any UNIV_ debugging conditions. */
 | |
| #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
 | |
| # include <stdarg.h>
 | |
| MY_ATTRIBUTE((format (printf, 1, 2)))
 | |
| /**********************************************************************//**
 | |
| Report a failure to decompress or compress.
 | |
| @return number of characters printed */
 | |
| static
 | |
| int
 | |
| page_zip_fail_func(
 | |
| /*===============*/
 | |
| 	const char*	fmt,	/*!< in: printf(3) format string */
 | |
| 	...)			/*!< in: arguments corresponding to fmt */
 | |
| {
 | |
| 	int	res;
 | |
| 	va_list	ap;
 | |
| 
 | |
| 	ut_print_timestamp(stderr);
 | |
| 	fputs("  InnoDB: ", stderr);
 | |
| 	va_start(ap, fmt);
 | |
| 	res = vfprintf(stderr, fmt, ap);
 | |
| 	va_end(ap);
 | |
| 
 | |
| 	return(res);
 | |
| }
 | |
| /** Wrapper for page_zip_fail_func()
 | |
| @param fmt_args in: printf(3) format string and arguments */
 | |
| # define page_zip_fail(fmt_args) page_zip_fail_func fmt_args
 | |
| #else /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
 | |
| /** Dummy wrapper for page_zip_fail_func()
 | |
| @param fmt_args ignored: printf(3) format string and arguments */
 | |
| # define page_zip_fail(fmt_args) /* empty */
 | |
| #endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Determine the guaranteed free space on an empty page.
 | |
| @return minimum payload size on the page */
 | |
| ulint
 | |
| page_zip_empty_size(
 | |
| /*================*/
 | |
| 	ulint	n_fields,	/*!< in: number of columns in the index */
 | |
| 	ulint	zip_size)	/*!< in: compressed page size in bytes */
 | |
| {
 | |
| 	ulint	size = zip_size
 | |
| 		/* subtract the page header and the longest
 | |
| 		uncompressed data needed for one record */
 | |
| 		- (PAGE_DATA
 | |
| 		   + PAGE_ZIP_CLUST_LEAF_SLOT_SIZE
 | |
| 		   + 1/* encoded heap_no==2 in page_zip_write_rec() */
 | |
| 		   + 1/* end of modification log */
 | |
| 		   - REC_N_NEW_EXTRA_BYTES/* omitted bytes */)
 | |
| 		/* subtract the space for page_zip_fields_encode() */
 | |
| 		- compressBound(static_cast<uLong>(2 * (n_fields + 1)));
 | |
| 	return(lint(size) > 0 ? size : 0);
 | |
| }
 | |
| 
 | |
| /** Check whether a tuple is too big for compressed table
 | |
| @param[in]	index	dict index object
 | |
| @param[in]	entry	entry for the index
 | |
| @return	true if it's too big, otherwise false */
 | |
| bool
 | |
| page_zip_is_too_big(
 | |
| 	const dict_index_t*	index,
 | |
| 	const dtuple_t*		entry)
 | |
| {
 | |
| 	const ulint zip_size = index->table->space->zip_size();
 | |
| 
 | |
| 	/* Estimate the free space of an empty compressed page.
 | |
| 	Subtract one byte for the encoded heap_no in the
 | |
| 	modification log. */
 | |
| 	ulint	free_space_zip = page_zip_empty_size(
 | |
| 		index->n_fields, zip_size);
 | |
| 	ulint	n_uniq = dict_index_get_n_unique_in_tree(index);
 | |
| 
 | |
| 	ut_ad(dict_table_is_comp(index->table));
 | |
| 	ut_ad(zip_size);
 | |
| 
 | |
| 	if (free_space_zip == 0) {
 | |
| 		return(true);
 | |
| 	}
 | |
| 
 | |
| 	/* Subtract one byte for the encoded heap_no in the
 | |
| 	modification log. */
 | |
| 	free_space_zip--;
 | |
| 
 | |
| 	/* There should be enough room for two node pointer
 | |
| 	records on an empty non-leaf page.  This prevents
 | |
| 	infinite page splits. */
 | |
| 
 | |
| 	if (entry->n_fields >= n_uniq
 | |
| 	    && (REC_NODE_PTR_SIZE
 | |
| 		+ rec_get_converted_size_comp_prefix(
 | |
| 			index, entry->fields, n_uniq, NULL)
 | |
| 		/* On a compressed page, there is
 | |
| 		a two-byte entry in the dense
 | |
| 		page directory for every record.
 | |
| 		But there is no record header. */
 | |
| 		- (REC_N_NEW_EXTRA_BYTES - 2)
 | |
| 		> free_space_zip / 2)) {
 | |
| 		return(true);
 | |
| 	}
 | |
| 
 | |
| 	return(false);
 | |
| }
 | |
| 
 | |
| /*************************************************************//**
 | |
| Gets the number of elements in the dense page directory,
 | |
| including deleted records (the free list).
 | |
| @return number of elements in the dense page directory */
 | |
| UNIV_INLINE
 | |
| ulint
 | |
| page_zip_dir_elems(
 | |
| /*===============*/
 | |
| 	const page_zip_des_t*	page_zip)	/*!< in: compressed page */
 | |
| {
 | |
| 	/* Exclude the page infimum and supremum from the record count. */
 | |
| 	return ulint(page_dir_get_n_heap(page_zip->data))
 | |
| 		- PAGE_HEAP_NO_USER_LOW;
 | |
| }
 | |
| 
 | |
| /*************************************************************//**
 | |
| Gets the size of the compressed page trailer (the dense page directory),
 | |
| including deleted records (the free list).
 | |
| @return length of dense page directory, in bytes */
 | |
| UNIV_INLINE
 | |
| ulint
 | |
| page_zip_dir_size(
 | |
| /*==============*/
 | |
| 	const page_zip_des_t*	page_zip)	/*!< in: compressed page */
 | |
| {
 | |
| 	return(PAGE_ZIP_DIR_SLOT_SIZE * page_zip_dir_elems(page_zip));
 | |
| }
 | |
| 
 | |
| /*************************************************************//**
 | |
| Gets an offset to the compressed page trailer (the dense page directory),
 | |
| including deleted records (the free list).
 | |
| @return offset of the dense page directory */
 | |
| UNIV_INLINE
 | |
| ulint
 | |
| page_zip_dir_start_offs(
 | |
| /*====================*/
 | |
| 	const page_zip_des_t*	page_zip,	/*!< in: compressed page */
 | |
| 	ulint			n_dense)	/*!< in: directory size */
 | |
| {
 | |
| 	ut_ad(n_dense * PAGE_ZIP_DIR_SLOT_SIZE < page_zip_get_size(page_zip));
 | |
| 
 | |
| 	return(page_zip_get_size(page_zip) - n_dense * PAGE_ZIP_DIR_SLOT_SIZE);
 | |
| }
 | |
| 
 | |
| /*************************************************************//**
 | |
| Gets a pointer to the compressed page trailer (the dense page directory),
 | |
| including deleted records (the free list).
 | |
| @param[in] page_zip compressed page
 | |
| @param[in] n_dense number of entries in the directory
 | |
| @return pointer to the dense page directory */
 | |
| #define page_zip_dir_start_low(page_zip, n_dense)			\
 | |
| 	((page_zip)->data + page_zip_dir_start_offs(page_zip, n_dense))
 | |
| /*************************************************************//**
 | |
| Gets a pointer to the compressed page trailer (the dense page directory),
 | |
| including deleted records (the free list).
 | |
| @param[in] page_zip compressed page
 | |
| @return pointer to the dense page directory */
 | |
| #define page_zip_dir_start(page_zip)					\
 | |
| 	page_zip_dir_start_low(page_zip, page_zip_dir_elems(page_zip))
 | |
| 
 | |
| /*************************************************************//**
 | |
| Gets the size of the compressed page trailer (the dense page directory),
 | |
| only including user records (excluding the free list).
 | |
| @return length of dense page directory comprising existing records, in bytes */
 | |
| UNIV_INLINE
 | |
| ulint
 | |
| page_zip_dir_user_size(
 | |
| /*===================*/
 | |
| 	const page_zip_des_t*	page_zip)	/*!< in: compressed page */
 | |
| {
 | |
| 	ulint	size = PAGE_ZIP_DIR_SLOT_SIZE
 | |
| 		* ulint(page_get_n_recs(page_zip->data));
 | |
| 	ut_ad(size <= page_zip_dir_size(page_zip));
 | |
| 	return(size);
 | |
| }
 | |
| 
 | |
| /*************************************************************//**
 | |
| Find the slot of the given record in the dense page directory.
 | |
| @return dense directory slot, or NULL if record not found */
 | |
| UNIV_INLINE
 | |
| byte*
 | |
| page_zip_dir_find_low(
 | |
| /*==================*/
 | |
| 	byte*	slot,			/*!< in: start of records */
 | |
| 	byte*	end,			/*!< in: end of records */
 | |
| 	ulint	offset)			/*!< in: offset of user record */
 | |
| {
 | |
| 	ut_ad(slot <= end);
 | |
| 
 | |
| 	for (; slot < end; slot += PAGE_ZIP_DIR_SLOT_SIZE) {
 | |
| 		if ((mach_read_from_2(slot) & PAGE_ZIP_DIR_SLOT_MASK)
 | |
| 		    == offset) {
 | |
| 			return(slot);
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return(NULL);
 | |
| }
 | |
| 
 | |
| /*************************************************************//**
 | |
| Find the slot of the given non-free record in the dense page directory.
 | |
| @return dense directory slot, or NULL if record not found */
 | |
| UNIV_INLINE
 | |
| byte*
 | |
| page_zip_dir_find(
 | |
| /*==============*/
 | |
| 	page_zip_des_t*	page_zip,		/*!< in: compressed page */
 | |
| 	ulint		offset)			/*!< in: offset of user record */
 | |
| {
 | |
| 	byte*	end	= page_zip->data + page_zip_get_size(page_zip);
 | |
| 
 | |
| 	ut_ad(page_zip_simple_validate(page_zip));
 | |
| 
 | |
| 	return(page_zip_dir_find_low(end - page_zip_dir_user_size(page_zip),
 | |
| 				     end,
 | |
| 				     offset));
 | |
| }
 | |
| 
 | |
| /*************************************************************//**
 | |
| Find the slot of the given free record in the dense page directory.
 | |
| @return dense directory slot, or NULL if record not found */
 | |
| UNIV_INLINE
 | |
| byte*
 | |
| page_zip_dir_find_free(
 | |
| /*===================*/
 | |
| 	page_zip_des_t*	page_zip,		/*!< in: compressed page */
 | |
| 	ulint		offset)			/*!< in: offset of user record */
 | |
| {
 | |
| 	byte*	end	= page_zip->data + page_zip_get_size(page_zip);
 | |
| 
 | |
| 	ut_ad(page_zip_simple_validate(page_zip));
 | |
| 
 | |
| 	return(page_zip_dir_find_low(end - page_zip_dir_size(page_zip),
 | |
| 				     end - page_zip_dir_user_size(page_zip),
 | |
| 				     offset));
 | |
| }
 | |
| 
 | |
| /*************************************************************//**
 | |
| Read a given slot in the dense page directory.
 | |
| @return record offset on the uncompressed page, possibly ORed with
 | |
| PAGE_ZIP_DIR_SLOT_DEL or PAGE_ZIP_DIR_SLOT_OWNED */
 | |
| UNIV_INLINE
 | |
| ulint
 | |
| page_zip_dir_get(
 | |
| /*=============*/
 | |
| 	const page_zip_des_t*	page_zip,	/*!< in: compressed page */
 | |
| 	ulint			slot)		/*!< in: slot
 | |
| 						(0=first user record) */
 | |
| {
 | |
| 	ut_ad(page_zip_simple_validate(page_zip));
 | |
| 	ut_ad(slot < page_zip_dir_size(page_zip) / PAGE_ZIP_DIR_SLOT_SIZE);
 | |
| 	return(mach_read_from_2(page_zip->data + page_zip_get_size(page_zip)
 | |
| 				- PAGE_ZIP_DIR_SLOT_SIZE * (slot + 1)));
 | |
| }
 | |
| 
 | |
| /** Write a byte string to a ROW_FORMAT=COMPRESSED page.
 | |
| @param[in]      b       ROW_FORMAT=COMPRESSED index page
 | |
| @param[in]      offset  byte offset from b.zip.data
 | |
| @param[in]      len     length of the data to write */
 | |
| inline void mtr_t::zmemcpy(const buf_block_t &b, ulint offset, ulint len)
 | |
| {
 | |
|   ut_ad(fil_page_get_type(b.page.zip.data) == FIL_PAGE_INDEX ||
 | |
|         fil_page_get_type(b.page.zip.data) == FIL_PAGE_RTREE);
 | |
|   ut_ad(page_zip_simple_validate(&b.page.zip));
 | |
|   ut_ad(offset + len <= page_zip_get_size(&b.page.zip));
 | |
| 
 | |
|   memcpy_low(b, static_cast<uint16_t>(offset), &b.page.zip.data[offset], len);
 | |
|   m_last_offset= static_cast<uint16_t>(offset + len);
 | |
| }
 | |
| 
 | |
| /** Write a byte string to a ROW_FORMAT=COMPRESSED page.
 | |
| @param[in]      b       ROW_FORMAT=COMPRESSED index page
 | |
| @param[in]      dest    destination within b.zip.data
 | |
| @param[in]      str     the data to write
 | |
| @param[in]      len     length of the data to write
 | |
| @tparam w       write request type */
 | |
| template<mtr_t::write_type w>
 | |
| inline void mtr_t::zmemcpy(const buf_block_t &b, void *dest, const void *str,
 | |
|                            ulint len)
 | |
| {
 | |
|   byte *d= static_cast<byte*>(dest);
 | |
|   const byte *s= static_cast<const byte*>(str);
 | |
|   ut_ad(d >= b.page.zip.data + FIL_PAGE_OFFSET);
 | |
|   if (w != FORCED)
 | |
|   {
 | |
|     ut_ad(len);
 | |
|     const byte *const end= d + len;
 | |
|     while (*d++ == *s++)
 | |
|     {
 | |
|       if (d == end)
 | |
|       {
 | |
|         ut_ad(w == MAYBE_NOP);
 | |
|         return;
 | |
|       }
 | |
|     }
 | |
|     s--;
 | |
|     d--;
 | |
|     len= static_cast<ulint>(end - d);
 | |
|   }
 | |
|   ::memcpy(d, s, len);
 | |
|   zmemcpy(b, d - b.page.zip.data, len);
 | |
| }
 | |
| 
 | |
| /** Write redo log for compressing a ROW_FORMAT=COMPRESSED index page.
 | |
| @param[in,out]	block	ROW_FORMAT=COMPRESSED index page
 | |
| @param[in]	index	the index that the block belongs to
 | |
| @param[in,out]	mtr	mini-transaction */
 | |
| static void page_zip_compress_write_log(buf_block_t *block,
 | |
|                                         dict_index_t *index, mtr_t *mtr)
 | |
| {
 | |
|   if (!mtr->is_logged())
 | |
|     return;
 | |
| 
 | |
|   const page_t *page= block->page.frame;
 | |
|   const page_zip_des_t *page_zip= &block->page.zip;
 | |
|   /* Read the number of user records. */
 | |
|   ulint trailer_size= ulint(page_dir_get_n_heap(page_zip->data)) -
 | |
|     PAGE_HEAP_NO_USER_LOW;
 | |
|   /* Multiply by uncompressed of size stored per record */
 | |
|   if (!page_is_leaf(page))
 | |
|     trailer_size*= PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE;
 | |
|   else if (index->is_clust())
 | |
|     trailer_size*= PAGE_ZIP_DIR_SLOT_SIZE + DATA_TRX_ID_LEN +
 | |
|       DATA_ROLL_PTR_LEN;
 | |
|   else
 | |
|     trailer_size*= PAGE_ZIP_DIR_SLOT_SIZE;
 | |
|   /* Add the space occupied by BLOB pointers. */
 | |
|   trailer_size+= page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
 | |
|   ut_a(page_zip->m_end > PAGE_DATA);
 | |
|   compile_time_assert(FIL_PAGE_DATA <= PAGE_DATA);
 | |
|   ut_a(page_zip->m_end + trailer_size <= page_zip_get_size(page_zip));
 | |
| 
 | |
|   mtr->init(block);
 | |
|   mtr->zmemcpy(*block, FIL_PAGE_PREV, page_zip->m_end - FIL_PAGE_PREV);
 | |
| 
 | |
|   if (trailer_size)
 | |
|     mtr->zmemcpy(*block, page_zip_get_size(page_zip) - trailer_size,
 | |
|                  trailer_size);
 | |
| }
 | |
| 
 | |
| /******************************************************//**
 | |
| Determine how many externally stored columns are contained
 | |
| in existing records with smaller heap_no than rec. */
 | |
| static
 | |
| ulint
 | |
| page_zip_get_n_prev_extern(
 | |
| /*=======================*/
 | |
| 	const page_zip_des_t*	page_zip,/*!< in: dense page directory on
 | |
| 					compressed page */
 | |
| 	const rec_t*		rec,	/*!< in: compact physical record
 | |
| 					on a B-tree leaf page */
 | |
| 	const dict_index_t*	index)	/*!< in: record descriptor */
 | |
| {
 | |
| 	const page_t*	page	= page_align(rec);
 | |
| 	ulint		n_ext	= 0;
 | |
| 	ulint		i;
 | |
| 	ulint		left;
 | |
| 	ulint		heap_no;
 | |
| 	ulint		n_recs	= page_get_n_recs(page_zip->data);
 | |
| 
 | |
| 	ut_ad(page_is_leaf(page));
 | |
| 	ut_ad(page_is_comp(page));
 | |
| 	ut_ad(dict_table_is_comp(index->table));
 | |
| 	ut_ad(index->is_primary());
 | |
| 
 | |
| 	heap_no = rec_get_heap_no_new(rec);
 | |
| 	ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW);
 | |
| 	left = heap_no - PAGE_HEAP_NO_USER_LOW;
 | |
| 	if (UNIV_UNLIKELY(!left)) {
 | |
| 		return(0);
 | |
| 	}
 | |
| 
 | |
| 	for (i = 0; i < n_recs; i++) {
 | |
| 		const rec_t*	r	= page + (page_zip_dir_get(page_zip, i)
 | |
| 						  & PAGE_ZIP_DIR_SLOT_MASK);
 | |
| 
 | |
| 		if (rec_get_heap_no_new(r) < heap_no) {
 | |
| 			n_ext += rec_get_n_extern_new(r, index,
 | |
| 						      ULINT_UNDEFINED);
 | |
| 			if (!--left) {
 | |
| 				break;
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return(n_ext);
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Encode the length of a fixed-length column.
 | |
| @return buf + length of encoded val */
 | |
| static
 | |
| byte*
 | |
| page_zip_fixed_field_encode(
 | |
| /*========================*/
 | |
| 	byte*	buf,	/*!< in: pointer to buffer where to write */
 | |
| 	ulint	val)	/*!< in: value to write */
 | |
| {
 | |
| 	ut_ad(val >= 2);
 | |
| 
 | |
| 	if (UNIV_LIKELY(val < 126)) {
 | |
| 		/*
 | |
| 		0 = nullable variable field of at most 255 bytes length;
 | |
| 		1 = not null variable field of at most 255 bytes length;
 | |
| 		126 = nullable variable field with maximum length >255;
 | |
| 		127 = not null variable field with maximum length >255
 | |
| 		*/
 | |
| 		*buf++ = (byte) val;
 | |
| 	} else {
 | |
| 		*buf++ = (byte) (0x80 | val >> 8);
 | |
| 		*buf++ = (byte) val;
 | |
| 	}
 | |
| 
 | |
| 	return(buf);
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Write the index information for the compressed page.
 | |
| @return used size of buf */
 | |
| ulint
 | |
| page_zip_fields_encode(
 | |
| /*===================*/
 | |
| 	ulint			n,	/*!< in: number of fields
 | |
| 					to compress */
 | |
| 	const dict_index_t*	index,	/*!< in: index comprising
 | |
| 					at least n fields */
 | |
| 	ulint			trx_id_pos,
 | |
| 					/*!< in: position of the trx_id column
 | |
| 					in the index, or ULINT_UNDEFINED if
 | |
| 					this is a non-leaf page */
 | |
| 	byte*			buf)	/*!< out: buffer of (n + 1) * 2 bytes */
 | |
| {
 | |
| 	const byte*	buf_start	= buf;
 | |
| 	ulint		i;
 | |
| 	ulint		col;
 | |
| 	ulint		trx_id_col	= 0;
 | |
| 	/* sum of lengths of preceding non-nullable fixed fields, or 0 */
 | |
| 	ulint		fixed_sum	= 0;
 | |
| 
 | |
| 	ut_ad(trx_id_pos == ULINT_UNDEFINED || trx_id_pos < n);
 | |
| 
 | |
| 	for (i = col = 0; i < n; i++) {
 | |
| 		dict_field_t*	field = dict_index_get_nth_field(index, i);
 | |
| 		ulint		val;
 | |
| 
 | |
| 		if (dict_field_get_col(field)->prtype & DATA_NOT_NULL) {
 | |
| 			val = 1; /* set the "not nullable" flag */
 | |
| 		} else {
 | |
| 			val = 0; /* nullable field */
 | |
| 		}
 | |
| 
 | |
| 		if (!field->fixed_len) {
 | |
| 			/* variable-length field */
 | |
| 			const dict_col_t*	column
 | |
| 				= dict_field_get_col(field);
 | |
| 
 | |
| 			if (DATA_BIG_COL(column)) {
 | |
| 				val |= 0x7e; /* max > 255 bytes */
 | |
| 			}
 | |
| 
 | |
| 			if (fixed_sum) {
 | |
| 				/* write out the length of any
 | |
| 				preceding non-nullable fields */
 | |
| 				buf = page_zip_fixed_field_encode(
 | |
| 					buf, fixed_sum << 1 | 1);
 | |
| 				fixed_sum = 0;
 | |
| 				col++;
 | |
| 			}
 | |
| 
 | |
| 			*buf++ = (byte) val;
 | |
| 			col++;
 | |
| 		} else if (val) {
 | |
| 			/* fixed-length non-nullable field */
 | |
| 
 | |
| 			if (fixed_sum && UNIV_UNLIKELY
 | |
| 			    (fixed_sum + field->fixed_len
 | |
| 			     > DICT_MAX_FIXED_COL_LEN)) {
 | |
| 				/* Write out the length of the
 | |
| 				preceding non-nullable fields,
 | |
| 				to avoid exceeding the maximum
 | |
| 				length of a fixed-length column. */
 | |
| 				buf = page_zip_fixed_field_encode(
 | |
| 					buf, fixed_sum << 1 | 1);
 | |
| 				fixed_sum = 0;
 | |
| 				col++;
 | |
| 			}
 | |
| 
 | |
| 			if (i && UNIV_UNLIKELY(i == trx_id_pos)) {
 | |
| 				if (fixed_sum) {
 | |
| 					/* Write out the length of any
 | |
| 					preceding non-nullable fields,
 | |
| 					and start a new trx_id column. */
 | |
| 					buf = page_zip_fixed_field_encode(
 | |
| 						buf, fixed_sum << 1 | 1);
 | |
| 					col++;
 | |
| 				}
 | |
| 
 | |
| 				trx_id_col = col;
 | |
| 				fixed_sum = field->fixed_len;
 | |
| 			} else {
 | |
| 				/* add to the sum */
 | |
| 				fixed_sum += field->fixed_len;
 | |
| 			}
 | |
| 		} else {
 | |
| 			/* fixed-length nullable field */
 | |
| 
 | |
| 			if (fixed_sum) {
 | |
| 				/* write out the length of any
 | |
| 				preceding non-nullable fields */
 | |
| 				buf = page_zip_fixed_field_encode(
 | |
| 					buf, fixed_sum << 1 | 1);
 | |
| 				fixed_sum = 0;
 | |
| 				col++;
 | |
| 			}
 | |
| 
 | |
| 			buf = page_zip_fixed_field_encode(
 | |
| 				buf, ulint(field->fixed_len) << 1);
 | |
| 			col++;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if (fixed_sum) {
 | |
| 		/* Write out the lengths of last fixed-length columns. */
 | |
| 		buf = page_zip_fixed_field_encode(buf, fixed_sum << 1 | 1);
 | |
| 	}
 | |
| 
 | |
| 	if (trx_id_pos != ULINT_UNDEFINED) {
 | |
| 		/* Write out the position of the trx_id column */
 | |
| 		i = trx_id_col;
 | |
| 	} else {
 | |
| 		/* Write out the number of nullable fields */
 | |
| 		i = index->n_nullable;
 | |
| 	}
 | |
| 
 | |
| 	if (i < 128) {
 | |
| 		*buf++ = (byte) i;
 | |
| 	} else {
 | |
| 		*buf++ = (byte) (0x80 | i >> 8);
 | |
| 		*buf++ = (byte) i;
 | |
| 	}
 | |
| 
 | |
| 	ut_ad((ulint) (buf - buf_start) <= (n + 2) * 2);
 | |
| 	return((ulint) (buf - buf_start));
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Populate the dense page directory from the sparse directory. */
 | |
| static
 | |
| void
 | |
| page_zip_dir_encode(
 | |
| /*================*/
 | |
| 	const page_t*	page,	/*!< in: compact page */
 | |
| 	byte*		buf,	/*!< in: pointer to dense page directory[-1];
 | |
| 				out: dense directory on compressed page */
 | |
| 	const rec_t**	recs)	/*!< in: pointer to an array of 0, or NULL;
 | |
| 				out: dense page directory sorted by ascending
 | |
| 				address (and heap_no) */
 | |
| {
 | |
| 	const byte*	rec;
 | |
| 	ulint		status;
 | |
| 	ulint		min_mark;
 | |
| 	ulint		heap_no;
 | |
| 	ulint		i;
 | |
| 	ulint		n_heap;
 | |
| 	ulint		offs;
 | |
| 
 | |
| 	min_mark = 0;
 | |
| 
 | |
| 	if (page_is_leaf(page)) {
 | |
| 		status = REC_STATUS_ORDINARY;
 | |
| 	} else {
 | |
| 		status = REC_STATUS_NODE_PTR;
 | |
| 		if (UNIV_UNLIKELY(!page_has_prev(page))) {
 | |
| 			min_mark = REC_INFO_MIN_REC_FLAG;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	n_heap = page_dir_get_n_heap(page);
 | |
| 
 | |
| 	/* Traverse the list of stored records in the collation order,
 | |
| 	starting from the first user record. */
 | |
| 
 | |
| 	rec = page + PAGE_NEW_INFIMUM;
 | |
| 
 | |
| 	i = 0;
 | |
| 
 | |
| 	for (;;) {
 | |
| 		ulint	info_bits;
 | |
| 		offs = rec_get_next_offs(rec, TRUE);
 | |
| 		if (UNIV_UNLIKELY(offs == PAGE_NEW_SUPREMUM)) {
 | |
| 			break;
 | |
| 		}
 | |
| 		rec = page + offs;
 | |
| 		heap_no = rec_get_heap_no_new(rec);
 | |
| 		ut_a(heap_no >= PAGE_HEAP_NO_USER_LOW);
 | |
| 		ut_a(heap_no < n_heap);
 | |
| 		ut_a(offs < srv_page_size - PAGE_DIR);
 | |
| 		ut_a(offs >= PAGE_ZIP_START);
 | |
| 		compile_time_assert(!(PAGE_ZIP_DIR_SLOT_MASK
 | |
| 				      & (PAGE_ZIP_DIR_SLOT_MASK + 1)));
 | |
| 		compile_time_assert(PAGE_ZIP_DIR_SLOT_MASK
 | |
| 				    >= UNIV_ZIP_SIZE_MAX - 1);
 | |
| 
 | |
| 		if (UNIV_UNLIKELY(rec_get_n_owned_new(rec) != 0)) {
 | |
| 			offs |= PAGE_ZIP_DIR_SLOT_OWNED;
 | |
| 		}
 | |
| 
 | |
| 		info_bits = rec_get_info_bits(rec, TRUE);
 | |
| 		if (info_bits & REC_INFO_DELETED_FLAG) {
 | |
| 			info_bits &= ~REC_INFO_DELETED_FLAG;
 | |
| 			offs |= PAGE_ZIP_DIR_SLOT_DEL;
 | |
| 		}
 | |
| 		ut_a(info_bits == min_mark);
 | |
| 		/* Only the smallest user record can have
 | |
| 		REC_INFO_MIN_REC_FLAG set. */
 | |
| 		min_mark = 0;
 | |
| 
 | |
| 		mach_write_to_2(buf - PAGE_ZIP_DIR_SLOT_SIZE * ++i, offs);
 | |
| 
 | |
| 		if (UNIV_LIKELY_NULL(recs)) {
 | |
| 			/* Ensure that each heap_no occurs at most once. */
 | |
| 			ut_a(!recs[heap_no - PAGE_HEAP_NO_USER_LOW]);
 | |
| 			/* exclude infimum and supremum */
 | |
| 			recs[heap_no - PAGE_HEAP_NO_USER_LOW] = rec;
 | |
| 		}
 | |
| 
 | |
| 		ut_a(ulint(rec_get_status(rec)) == status);
 | |
| 	}
 | |
| 
 | |
| 	offs = page_header_get_field(page, PAGE_FREE);
 | |
| 
 | |
| 	/* Traverse the free list (of deleted records). */
 | |
| 	while (offs) {
 | |
| 		ut_ad(!(offs & ~PAGE_ZIP_DIR_SLOT_MASK));
 | |
| 		rec = page + offs;
 | |
| 
 | |
| 		heap_no = rec_get_heap_no_new(rec);
 | |
| 		ut_a(heap_no >= PAGE_HEAP_NO_USER_LOW);
 | |
| 		ut_a(heap_no < n_heap);
 | |
| 
 | |
| 		ut_a(!rec[-REC_N_NEW_EXTRA_BYTES]); /* info_bits and n_owned */
 | |
| 		ut_a(ulint(rec_get_status(rec)) == status);
 | |
| 
 | |
| 		mach_write_to_2(buf - PAGE_ZIP_DIR_SLOT_SIZE * ++i, offs);
 | |
| 
 | |
| 		if (UNIV_LIKELY_NULL(recs)) {
 | |
| 			/* Ensure that each heap_no occurs at most once. */
 | |
| 			ut_a(!recs[heap_no - PAGE_HEAP_NO_USER_LOW]);
 | |
| 			/* exclude infimum and supremum */
 | |
| 			recs[heap_no - PAGE_HEAP_NO_USER_LOW] = rec;
 | |
| 		}
 | |
| 
 | |
| 		offs = rec_get_next_offs(rec, TRUE);
 | |
| 	}
 | |
| 
 | |
| 	/* Ensure that each heap no occurs at least once. */
 | |
| 	ut_a(i + PAGE_HEAP_NO_USER_LOW == n_heap);
 | |
| }
 | |
| 
 | |
| extern "C" {
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Allocate memory for zlib. */
 | |
| static
 | |
| void*
 | |
| page_zip_zalloc(
 | |
| /*============*/
 | |
| 	void*	opaque,	/*!< in/out: memory heap */
 | |
| 	uInt	items,	/*!< in: number of items to allocate */
 | |
| 	uInt	size)	/*!< in: size of an item in bytes */
 | |
| {
 | |
| 	return(mem_heap_zalloc(static_cast<mem_heap_t*>(opaque), items * size));
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Deallocate memory for zlib. */
 | |
| static
 | |
| void
 | |
| page_zip_free(
 | |
| /*==========*/
 | |
| 	void*	opaque MY_ATTRIBUTE((unused)),	/*!< in: memory heap */
 | |
| 	void*	address MY_ATTRIBUTE((unused)))/*!< in: object to free */
 | |
| {
 | |
| }
 | |
| 
 | |
| } /* extern "C" */
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Configure the zlib allocator to use the given memory heap. */
 | |
| void
 | |
| page_zip_set_alloc(
 | |
| /*===============*/
 | |
| 	void*		stream,		/*!< in/out: zlib stream */
 | |
| 	mem_heap_t*	heap)		/*!< in: memory heap to use */
 | |
| {
 | |
| 	z_stream*	strm = static_cast<z_stream*>(stream);
 | |
| 
 | |
| 	strm->zalloc = page_zip_zalloc;
 | |
| 	strm->zfree = page_zip_free;
 | |
| 	strm->opaque = heap;
 | |
| }
 | |
| 
 | |
| #if 0 || defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
 | |
| /** Symbol for enabling compression and decompression diagnostics */
 | |
| # define PAGE_ZIP_COMPRESS_DBG
 | |
| #endif
 | |
| 
 | |
| #ifdef PAGE_ZIP_COMPRESS_DBG
 | |
| /** Set this variable in a debugger to enable
 | |
| excessive logging in page_zip_compress(). */
 | |
| static bool	page_zip_compress_dbg;
 | |
| /** Set this variable in a debugger to enable
 | |
| binary logging of the data passed to deflate().
 | |
| When this variable is nonzero, it will act
 | |
| as a log file name generator. */
 | |
| static unsigned	page_zip_compress_log;
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Wrapper for deflate().  Log the operation if page_zip_compress_dbg is set.
 | |
| @return deflate() status: Z_OK, Z_BUF_ERROR, ... */
 | |
| static
 | |
| int
 | |
| page_zip_compress_deflate(
 | |
| /*======================*/
 | |
| 	FILE*		logfile,/*!< in: log file, or NULL */
 | |
| 	z_streamp	strm,	/*!< in/out: compressed stream for deflate() */
 | |
| 	int		flush)	/*!< in: deflate() flushing method */
 | |
| {
 | |
| 	int	status;
 | |
| 	if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
 | |
| 		ut_print_buf(stderr, strm->next_in, strm->avail_in);
 | |
| 	}
 | |
| 	if (UNIV_LIKELY_NULL(logfile)) {
 | |
| 		if (fwrite(strm->next_in, 1, strm->avail_in, logfile)
 | |
| 		    != strm->avail_in) {
 | |
| 			perror("fwrite");
 | |
| 		}
 | |
| 	}
 | |
| 	status = deflate(strm, flush);
 | |
| 	if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
 | |
| 		fprintf(stderr, " -> %d\n", status);
 | |
| 	}
 | |
| 	return(status);
 | |
| }
 | |
| 
 | |
| /* Redefine deflate(). */
 | |
| # undef deflate
 | |
| /** Debug wrapper for the zlib compression routine deflate().
 | |
| Log the operation if page_zip_compress_dbg is set.
 | |
| @param strm in/out: compressed stream
 | |
| @param flush in: flushing method
 | |
| @return deflate() status: Z_OK, Z_BUF_ERROR, ... */
 | |
| # define deflate(strm, flush) page_zip_compress_deflate(logfile, strm, flush)
 | |
| /** Declaration of the logfile parameter */
 | |
| # define FILE_LOGFILE FILE* logfile,
 | |
| /** The logfile parameter */
 | |
| # define LOGFILE logfile,
 | |
| #else /* PAGE_ZIP_COMPRESS_DBG */
 | |
| /** Empty declaration of the logfile parameter */
 | |
| # define FILE_LOGFILE
 | |
| /** Missing logfile parameter */
 | |
| # define LOGFILE
 | |
| #endif /* PAGE_ZIP_COMPRESS_DBG */
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Compress the records of a node pointer page.
 | |
| @return Z_OK, or a zlib error code */
 | |
| static
 | |
| int
 | |
| page_zip_compress_node_ptrs(
 | |
| /*========================*/
 | |
| 	FILE_LOGFILE
 | |
| 	z_stream*	c_stream,	/*!< in/out: compressed page stream */
 | |
| 	const rec_t**	recs,		/*!< in: dense page directory
 | |
| 					sorted by address */
 | |
| 	ulint		n_dense,	/*!< in: size of recs[] */
 | |
| 	dict_index_t*	index,		/*!< in: the index of the page */
 | |
| 	byte*		storage,	/*!< in: end of dense page directory */
 | |
| 	mem_heap_t*	heap)		/*!< in: temporary memory heap */
 | |
| {
 | |
| 	int	err	= Z_OK;
 | |
| 	rec_offs* offsets = NULL;
 | |
| 
 | |
| 	do {
 | |
| 		const rec_t*	rec = *recs++;
 | |
| 
 | |
| 		offsets = rec_get_offsets(rec, index, offsets, 0,
 | |
| 					  ULINT_UNDEFINED, &heap);
 | |
| 		/* Only leaf nodes may contain externally stored columns. */
 | |
| 		ut_ad(!rec_offs_any_extern(offsets));
 | |
| 
 | |
| 		MEM_CHECK_DEFINED(rec, rec_offs_data_size(offsets));
 | |
| 		MEM_CHECK_DEFINED(rec - rec_offs_extra_size(offsets),
 | |
| 				  rec_offs_extra_size(offsets));
 | |
| 
 | |
| 		/* Compress the extra bytes. */
 | |
| 		c_stream->avail_in = static_cast<uInt>(
 | |
| 			rec - REC_N_NEW_EXTRA_BYTES - c_stream->next_in);
 | |
| 
 | |
| 		if (c_stream->avail_in) {
 | |
| 			err = deflate(c_stream, Z_NO_FLUSH);
 | |
| 			if (UNIV_UNLIKELY(err != Z_OK)) {
 | |
| 				break;
 | |
| 			}
 | |
| 		}
 | |
| 		ut_ad(!c_stream->avail_in);
 | |
| 
 | |
| 		/* Compress the data bytes, except node_ptr. */
 | |
| 		c_stream->next_in = (byte*) rec;
 | |
| 		c_stream->avail_in = static_cast<uInt>(
 | |
| 			rec_offs_data_size(offsets) - REC_NODE_PTR_SIZE);
 | |
| 
 | |
| 		if (c_stream->avail_in) {
 | |
| 			err = deflate(c_stream, Z_NO_FLUSH);
 | |
| 			if (UNIV_UNLIKELY(err != Z_OK)) {
 | |
| 				break;
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		ut_ad(!c_stream->avail_in);
 | |
| 
 | |
| 		memcpy(storage - REC_NODE_PTR_SIZE
 | |
| 		       * (rec_get_heap_no_new(rec) - 1),
 | |
| 		       c_stream->next_in, REC_NODE_PTR_SIZE);
 | |
| 		c_stream->next_in += REC_NODE_PTR_SIZE;
 | |
| 	} while (--n_dense);
 | |
| 
 | |
| 	return(err);
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Compress the records of a leaf node of a secondary index.
 | |
| @return Z_OK, or a zlib error code */
 | |
| static
 | |
| int
 | |
| page_zip_compress_sec(
 | |
| /*==================*/
 | |
| 	FILE_LOGFILE
 | |
| 	z_stream*	c_stream,	/*!< in/out: compressed page stream */
 | |
| 	const rec_t**	recs,		/*!< in: dense page directory
 | |
| 					sorted by address */
 | |
| 	ulint		n_dense)	/*!< in: size of recs[] */
 | |
| {
 | |
| 	int		err	= Z_OK;
 | |
| 
 | |
| 	ut_ad(n_dense > 0);
 | |
| 
 | |
| 	do {
 | |
| 		const rec_t*	rec = *recs++;
 | |
| 
 | |
| 		/* Compress everything up to this record. */
 | |
| 		c_stream->avail_in = static_cast<uInt>(
 | |
| 			rec - REC_N_NEW_EXTRA_BYTES
 | |
| 			- c_stream->next_in);
 | |
| 
 | |
| 		if (UNIV_LIKELY(c_stream->avail_in != 0)) {
 | |
| 			MEM_CHECK_DEFINED(c_stream->next_in,
 | |
| 					  c_stream->avail_in);
 | |
| 			err = deflate(c_stream, Z_NO_FLUSH);
 | |
| 			if (UNIV_UNLIKELY(err != Z_OK)) {
 | |
| 				break;
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		ut_ad(!c_stream->avail_in);
 | |
| 		ut_ad(c_stream->next_in == rec - REC_N_NEW_EXTRA_BYTES);
 | |
| 
 | |
| 		/* Skip the REC_N_NEW_EXTRA_BYTES. */
 | |
| 
 | |
| 		c_stream->next_in = (byte*) rec;
 | |
| 	} while (--n_dense);
 | |
| 
 | |
| 	return(err);
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Compress a record of a leaf node of a clustered index that contains
 | |
| externally stored columns.
 | |
| @return Z_OK, or a zlib error code */
 | |
| static
 | |
| int
 | |
| page_zip_compress_clust_ext(
 | |
| /*========================*/
 | |
| 	FILE_LOGFILE
 | |
| 	z_stream*	c_stream,	/*!< in/out: compressed page stream */
 | |
| 	const rec_t*	rec,		/*!< in: record */
 | |
| 	const rec_offs*	offsets,	/*!< in: rec_get_offsets(rec) */
 | |
| 	ulint		trx_id_col,	/*!< in: position of of DB_TRX_ID */
 | |
| 	byte*		deleted,	/*!< in: dense directory entry pointing
 | |
| 					to the head of the free list */
 | |
| 	byte*		storage,	/*!< in: end of dense page directory */
 | |
| 	byte**		externs,	/*!< in/out: pointer to the next
 | |
| 					available BLOB pointer */
 | |
| 	ulint*		n_blobs)	/*!< in/out: number of
 | |
| 					externally stored columns */
 | |
| {
 | |
| 	int	err;
 | |
| 	ulint	i;
 | |
| 
 | |
| 	MEM_CHECK_DEFINED(rec, rec_offs_data_size(offsets));
 | |
| 	MEM_CHECK_DEFINED(rec - rec_offs_extra_size(offsets),
 | |
| 			  rec_offs_extra_size(offsets));
 | |
| 
 | |
| 	for (i = 0; i < rec_offs_n_fields(offsets); i++) {
 | |
| 		ulint		len;
 | |
| 		const byte*	src;
 | |
| 
 | |
| 		if (UNIV_UNLIKELY(i == trx_id_col)) {
 | |
| 			ut_ad(!rec_offs_nth_extern(offsets, i));
 | |
| 			/* Store trx_id and roll_ptr
 | |
| 			in uncompressed form. */
 | |
| 			src = rec_get_nth_field(rec, offsets, i, &len);
 | |
| 			ut_ad(src + DATA_TRX_ID_LEN
 | |
| 			      == rec_get_nth_field(rec, offsets,
 | |
| 						   i + 1, &len));
 | |
| 			ut_ad(len == DATA_ROLL_PTR_LEN);
 | |
| 
 | |
| 			/* Compress any preceding bytes. */
 | |
| 			c_stream->avail_in = static_cast<uInt>(
 | |
| 				src - c_stream->next_in);
 | |
| 
 | |
| 			if (c_stream->avail_in) {
 | |
| 				err = deflate(c_stream, Z_NO_FLUSH);
 | |
| 				if (UNIV_UNLIKELY(err != Z_OK)) {
 | |
| 
 | |
| 					return(err);
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| 			ut_ad(!c_stream->avail_in);
 | |
| 			ut_ad(c_stream->next_in == src);
 | |
| 
 | |
| 			memcpy(storage
 | |
| 			       - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
 | |
| 			       * (rec_get_heap_no_new(rec) - 1),
 | |
| 			       c_stream->next_in,
 | |
| 			       DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 | |
| 
 | |
| 			c_stream->next_in
 | |
| 				+= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
 | |
| 
 | |
| 			/* Skip also roll_ptr */
 | |
| 			i++;
 | |
| 		} else if (rec_offs_nth_extern(offsets, i)) {
 | |
| 			src = rec_get_nth_field(rec, offsets, i, &len);
 | |
| 			ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
 | |
| 			src += len - BTR_EXTERN_FIELD_REF_SIZE;
 | |
| 
 | |
| 			c_stream->avail_in = static_cast<uInt>(
 | |
| 				src - c_stream->next_in);
 | |
| 			if (UNIV_LIKELY(c_stream->avail_in != 0)) {
 | |
| 				err = deflate(c_stream, Z_NO_FLUSH);
 | |
| 				if (UNIV_UNLIKELY(err != Z_OK)) {
 | |
| 
 | |
| 					return(err);
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| 			ut_ad(!c_stream->avail_in);
 | |
| 			ut_ad(c_stream->next_in == src);
 | |
| 
 | |
| 			/* Reserve space for the data at
 | |
| 			the end of the space reserved for
 | |
| 			the compressed data and the page
 | |
| 			modification log. */
 | |
| 
 | |
| 			if (UNIV_UNLIKELY
 | |
| 			    (c_stream->avail_out
 | |
| 			     <= BTR_EXTERN_FIELD_REF_SIZE)) {
 | |
| 				/* out of space */
 | |
| 				return(Z_BUF_ERROR);
 | |
| 			}
 | |
| 
 | |
| 			ut_ad(*externs == c_stream->next_out
 | |
| 			      + c_stream->avail_out
 | |
| 			      + 1/* end of modif. log */);
 | |
| 
 | |
| 			c_stream->next_in
 | |
| 				+= BTR_EXTERN_FIELD_REF_SIZE;
 | |
| 
 | |
| 			/* Skip deleted records. */
 | |
| 			if (UNIV_LIKELY_NULL
 | |
| 			    (page_zip_dir_find_low(
 | |
| 				    storage, deleted,
 | |
| 				    page_offset(rec)))) {
 | |
| 				continue;
 | |
| 			}
 | |
| 
 | |
| 			(*n_blobs)++;
 | |
| 			c_stream->avail_out
 | |
| 				-= BTR_EXTERN_FIELD_REF_SIZE;
 | |
| 			*externs -= BTR_EXTERN_FIELD_REF_SIZE;
 | |
| 
 | |
| 			/* Copy the BLOB pointer */
 | |
| 			memcpy(*externs, c_stream->next_in
 | |
| 			       - BTR_EXTERN_FIELD_REF_SIZE,
 | |
| 			       BTR_EXTERN_FIELD_REF_SIZE);
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return(Z_OK);
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Compress the records of a leaf node of a clustered index.
 | |
| @return Z_OK, or a zlib error code */
 | |
| static
 | |
| int
 | |
| page_zip_compress_clust(
 | |
| /*====================*/
 | |
| 	FILE_LOGFILE
 | |
| 	z_stream*	c_stream,	/*!< in/out: compressed page stream */
 | |
| 	const rec_t**	recs,		/*!< in: dense page directory
 | |
| 					sorted by address */
 | |
| 	ulint		n_dense,	/*!< in: size of recs[] */
 | |
| 	dict_index_t*	index,		/*!< in: the index of the page */
 | |
| 	ulint*		n_blobs,	/*!< in: 0; out: number of
 | |
| 					externally stored columns */
 | |
| 	ulint		trx_id_col,	/*!< index of the trx_id column */
 | |
| 	byte*		deleted,	/*!< in: dense directory entry pointing
 | |
| 					to the head of the free list */
 | |
| 	byte*		storage,	/*!< in: end of dense page directory */
 | |
| 	mem_heap_t*	heap)		/*!< in: temporary memory heap */
 | |
| {
 | |
| 	int	err		= Z_OK;
 | |
| 	rec_offs* offsets		= NULL;
 | |
| 	/* BTR_EXTERN_FIELD_REF storage */
 | |
| 	byte*	externs		= storage - n_dense
 | |
| 		* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 | |
| 
 | |
| 	ut_ad(*n_blobs == 0);
 | |
| 
 | |
| 	do {
 | |
| 		const rec_t*	rec = *recs++;
 | |
| 
 | |
| 		offsets = rec_get_offsets(rec, index, offsets, index->n_fields,
 | |
| 					  ULINT_UNDEFINED, &heap);
 | |
| 		ut_ad(rec_offs_n_fields(offsets)
 | |
| 		      == dict_index_get_n_fields(index));
 | |
| 		MEM_CHECK_DEFINED(rec, rec_offs_data_size(offsets));
 | |
| 		MEM_CHECK_DEFINED(rec - rec_offs_extra_size(offsets),
 | |
| 				  rec_offs_extra_size(offsets));
 | |
| 
 | |
| 		/* Compress the extra bytes. */
 | |
| 		c_stream->avail_in = static_cast<uInt>(
 | |
| 			rec - REC_N_NEW_EXTRA_BYTES
 | |
| 			- c_stream->next_in);
 | |
| 
 | |
| 		if (c_stream->avail_in) {
 | |
| 			err = deflate(c_stream, Z_NO_FLUSH);
 | |
| 			if (UNIV_UNLIKELY(err != Z_OK)) {
 | |
| 
 | |
| 				goto func_exit;
 | |
| 			}
 | |
| 		}
 | |
| 		ut_ad(!c_stream->avail_in);
 | |
| 		ut_ad(c_stream->next_in == rec - REC_N_NEW_EXTRA_BYTES);
 | |
| 
 | |
| 		/* Compress the data bytes. */
 | |
| 
 | |
| 		c_stream->next_in = (byte*) rec;
 | |
| 
 | |
| 		/* Check if there are any externally stored columns.
 | |
| 		For each externally stored column, store the
 | |
| 		BTR_EXTERN_FIELD_REF separately. */
 | |
| 		if (rec_offs_any_extern(offsets)) {
 | |
| 			ut_ad(dict_index_is_clust(index));
 | |
| 
 | |
| 			err = page_zip_compress_clust_ext(
 | |
| 				LOGFILE
 | |
| 				c_stream, rec, offsets, trx_id_col,
 | |
| 				deleted, storage, &externs, n_blobs);
 | |
| 
 | |
| 			if (UNIV_UNLIKELY(err != Z_OK)) {
 | |
| 
 | |
| 				goto func_exit;
 | |
| 			}
 | |
| 		} else {
 | |
| 			ulint		len;
 | |
| 			const byte*	src;
 | |
| 
 | |
| 			/* Store trx_id and roll_ptr in uncompressed form. */
 | |
| 			src = rec_get_nth_field(rec, offsets,
 | |
| 						trx_id_col, &len);
 | |
| 			ut_ad(src + DATA_TRX_ID_LEN
 | |
| 			      == rec_get_nth_field(rec, offsets,
 | |
| 						   trx_id_col + 1, &len));
 | |
| 			ut_ad(len == DATA_ROLL_PTR_LEN);
 | |
| 			MEM_CHECK_DEFINED(rec, rec_offs_data_size(offsets));
 | |
| 			MEM_CHECK_DEFINED(rec - rec_offs_extra_size(offsets),
 | |
| 					  rec_offs_extra_size(offsets));
 | |
| 
 | |
| 			/* Compress any preceding bytes. */
 | |
| 			c_stream->avail_in = static_cast<uInt>(
 | |
| 				src - c_stream->next_in);
 | |
| 
 | |
| 			if (c_stream->avail_in) {
 | |
| 				err = deflate(c_stream, Z_NO_FLUSH);
 | |
| 				if (UNIV_UNLIKELY(err != Z_OK)) {
 | |
| 
 | |
| 					return(err);
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| 			ut_ad(!c_stream->avail_in);
 | |
| 			ut_ad(c_stream->next_in == src);
 | |
| 
 | |
| 			memcpy(storage
 | |
| 			       - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
 | |
| 			       * (rec_get_heap_no_new(rec) - 1),
 | |
| 			       c_stream->next_in,
 | |
| 			       DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 | |
| 
 | |
| 			c_stream->next_in
 | |
| 				+= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
 | |
| 
 | |
| 			/* Skip also roll_ptr */
 | |
| 			ut_ad(trx_id_col + 1 < rec_offs_n_fields(offsets));
 | |
| 		}
 | |
| 
 | |
| 		/* Compress the last bytes of the record. */
 | |
| 		c_stream->avail_in = static_cast<uInt>(
 | |
| 			rec + rec_offs_data_size(offsets) - c_stream->next_in);
 | |
| 
 | |
| 		if (c_stream->avail_in) {
 | |
| 			err = deflate(c_stream, Z_NO_FLUSH);
 | |
| 			if (UNIV_UNLIKELY(err != Z_OK)) {
 | |
| 
 | |
| 				goto func_exit;
 | |
| 			}
 | |
| 		}
 | |
| 		ut_ad(!c_stream->avail_in);
 | |
| 	} while (--n_dense);
 | |
| 
 | |
| func_exit:
 | |
| 	return(err);}
 | |
| 
 | |
| /** Attempt to compress a ROW_FORMAT=COMPRESSED page.
 | |
| @retval true on success
 | |
| @retval false on failure; block->page.zip will be left intact. */
 | |
| bool
 | |
| page_zip_compress(
 | |
| 	buf_block_t*		block,	/*!< in/out: buffer block */
 | |
| 	dict_index_t*		index,	/*!< in: index of the B-tree node */
 | |
| 	ulint			level,	/*!< in: commpression level */
 | |
| 	mtr_t*			mtr)	/*!< in/out: mini-transaction */
 | |
| {
 | |
| 	z_stream		c_stream;
 | |
| 	int			err;
 | |
| 	byte*			fields;		/*!< index field information */
 | |
| 	byte*			buf;		/*!< compressed payload of the
 | |
| 						page */
 | |
| 	byte*			buf_end;	/* end of buf */
 | |
| 	ulint			n_dense;
 | |
| 	ulint			slot_size;	/* amount of uncompressed bytes
 | |
| 						per record */
 | |
| 	const rec_t**		recs;		/*!< dense page directory,
 | |
| 						sorted by address */
 | |
| 	mem_heap_t*		heap;
 | |
| 	ulint			trx_id_col = ULINT_UNDEFINED;
 | |
| 	ulint			n_blobs	= 0;
 | |
| 	byte*			storage;	/* storage of uncompressed
 | |
| 						columns */
 | |
| 	const ulonglong		ns = my_interval_timer();
 | |
| #ifdef PAGE_ZIP_COMPRESS_DBG
 | |
| 	FILE*			logfile = NULL;
 | |
| #endif
 | |
| 	/* A local copy of srv_cmp_per_index_enabled to avoid reading that
 | |
| 	variable multiple times in this function since it can be changed at
 | |
| 	anytime. */
 | |
| 	my_bool			cmp_per_index_enabled;
 | |
| 	cmp_per_index_enabled	= srv_cmp_per_index_enabled;
 | |
| 
 | |
| 	page_t* page = block->page.frame;
 | |
| 	page_zip_des_t* page_zip = &block->page.zip;
 | |
| 
 | |
| 	ut_a(page_is_comp(page));
 | |
| 	ut_a(fil_page_index_page_check(page));
 | |
| 	ut_ad(page_simple_validate_new((page_t*) page));
 | |
| 	ut_ad(page_zip_simple_validate(page_zip));
 | |
| 	ut_ad(dict_table_is_comp(index->table));
 | |
| 
 | |
| 	MEM_CHECK_DEFINED(page, srv_page_size);
 | |
| 
 | |
| 	/* Check the data that will be omitted. */
 | |
| 	ut_a(!memcmp(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES),
 | |
| 		     infimum_extra, sizeof infimum_extra));
 | |
| 	ut_a(!memcmp(page + PAGE_NEW_INFIMUM,
 | |
| 		     infimum_data, sizeof infimum_data));
 | |
| 	ut_a(page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES]
 | |
| 	     /* info_bits == 0, n_owned <= max */
 | |
| 	     <= PAGE_DIR_SLOT_MAX_N_OWNED);
 | |
| 	ut_a(!memcmp(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1),
 | |
| 		     supremum_extra_data, sizeof supremum_extra_data));
 | |
| 
 | |
| 	if (page_is_empty(page)) {
 | |
| 		ut_a(rec_get_next_offs(page + PAGE_NEW_INFIMUM, TRUE)
 | |
| 		     == PAGE_NEW_SUPREMUM);
 | |
| 	}
 | |
| 
 | |
| 	const ulint n_fields = page_is_leaf(page)
 | |
| 		? dict_index_get_n_fields(index)
 | |
| 		: dict_index_get_n_unique_in_tree_nonleaf(index);
 | |
| 	index_id_t ind_id = index->id;
 | |
| 
 | |
| 	/* The dense directory excludes the infimum and supremum records. */
 | |
| 	n_dense = ulint(page_dir_get_n_heap(page)) - PAGE_HEAP_NO_USER_LOW;
 | |
| #ifdef PAGE_ZIP_COMPRESS_DBG
 | |
| 	if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
 | |
| 		ib::info() << "compress "
 | |
| 			<< static_cast<void*>(page_zip) << " "
 | |
| 			<< static_cast<const void*>(page) << " "
 | |
| 			<< page_is_leaf(page) << " "
 | |
| 			<< n_fields << " " << n_dense;
 | |
| 	}
 | |
| 
 | |
| 	if (UNIV_UNLIKELY(page_zip_compress_log)) {
 | |
| 		/* Create a log file for every compression attempt. */
 | |
| 		char	logfilename[9];
 | |
| 		snprintf(logfilename, sizeof logfilename,
 | |
| 			 "%08x", page_zip_compress_log++);
 | |
| 		logfile = fopen(logfilename, "wb");
 | |
| 
 | |
| 		if (logfile) {
 | |
| 			/* Write the uncompressed page to the log. */
 | |
| 			if (fwrite(page, 1, srv_page_size, logfile)
 | |
| 			    != srv_page_size) {
 | |
| 				perror("fwrite");
 | |
| 			}
 | |
| 			/* Record the compressed size as zero.
 | |
| 			This will be overwritten at successful exit. */
 | |
| 			putc(0, logfile);
 | |
| 			putc(0, logfile);
 | |
| 			putc(0, logfile);
 | |
| 			putc(0, logfile);
 | |
| 		}
 | |
| 	}
 | |
| #endif /* PAGE_ZIP_COMPRESS_DBG */
 | |
| 	page_zip_stat[page_zip->ssize - 1].compressed++;
 | |
| 	if (cmp_per_index_enabled) {
 | |
| 		mysql_mutex_lock(&page_zip_stat_per_index_mutex);
 | |
| 		page_zip_stat_per_index[ind_id].compressed++;
 | |
| 		mysql_mutex_unlock(&page_zip_stat_per_index_mutex);
 | |
| 	}
 | |
| 
 | |
| 	if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE
 | |
| 			  >= page_zip_get_size(page_zip))) {
 | |
| 
 | |
| 		goto err_exit;
 | |
| 	}
 | |
| 
 | |
| 	MONITOR_INC(MONITOR_PAGE_COMPRESS);
 | |
| 
 | |
| 	heap = mem_heap_create(page_zip_get_size(page_zip)
 | |
| 			       + n_fields * (2 + sizeof(ulint))
 | |
| 			       + REC_OFFS_HEADER_SIZE
 | |
| 			       + n_dense * ((sizeof *recs)
 | |
| 					    - PAGE_ZIP_DIR_SLOT_SIZE)
 | |
| 			       + srv_page_size * 4
 | |
| 			       + (512 << MAX_MEM_LEVEL));
 | |
| 
 | |
| 	recs = static_cast<const rec_t**>(
 | |
| 		mem_heap_zalloc(heap, n_dense * sizeof *recs));
 | |
| 
 | |
| 	fields = static_cast<byte*>(mem_heap_alloc(heap, (n_fields + 1) * 2));
 | |
| 
 | |
| 	buf = static_cast<byte*>(
 | |
| 		mem_heap_alloc(heap, page_zip_get_size(page_zip) - PAGE_DATA));
 | |
| 
 | |
| 	buf_end = buf + page_zip_get_size(page_zip) - PAGE_DATA;
 | |
| 
 | |
| 	/* Compress the data payload. */
 | |
| 	page_zip_set_alloc(&c_stream, heap);
 | |
| 
 | |
| 	err = deflateInit2(&c_stream, static_cast<int>(level),
 | |
| 			   Z_DEFLATED, static_cast<int>(srv_page_size_shift),
 | |
| 			   MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY);
 | |
| 	ut_a(err == Z_OK);
 | |
| 
 | |
| 	c_stream.next_out = buf;
 | |
| 
 | |
| 	/* Subtract the space reserved for uncompressed data. */
 | |
| 	/* Page header and the end marker of the modification log */
 | |
| 	c_stream.avail_out = static_cast<uInt>(buf_end - buf - 1);
 | |
| 
 | |
| 	/* Dense page directory and uncompressed columns, if any */
 | |
| 	if (page_is_leaf(page)) {
 | |
| 		if (dict_index_is_clust(index)) {
 | |
| 			trx_id_col = index->db_trx_id();
 | |
| 
 | |
| 			slot_size = PAGE_ZIP_DIR_SLOT_SIZE
 | |
| 				+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
 | |
| 
 | |
| 		} else {
 | |
| 			/* Signal the absence of trx_id
 | |
| 			in page_zip_fields_encode() */
 | |
| 			trx_id_col = 0;
 | |
| 			slot_size = PAGE_ZIP_DIR_SLOT_SIZE;
 | |
| 		}
 | |
| 	} else {
 | |
| 		slot_size = PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE;
 | |
| 		trx_id_col = ULINT_UNDEFINED;
 | |
| 	}
 | |
| 
 | |
| 	if (UNIV_UNLIKELY(c_stream.avail_out <= n_dense * slot_size
 | |
| 			  + 6/* sizeof(zlib header and footer) */)) {
 | |
| 		goto zlib_error;
 | |
| 	}
 | |
| 
 | |
| 	c_stream.avail_out -= uInt(n_dense * slot_size);
 | |
| 	c_stream.avail_in = uInt(page_zip_fields_encode(n_fields, index,
 | |
| 							trx_id_col, fields));
 | |
| 	c_stream.next_in = fields;
 | |
| 
 | |
| 	if (UNIV_LIKELY(!trx_id_col)) {
 | |
| 		trx_id_col = ULINT_UNDEFINED;
 | |
| 	}
 | |
| 
 | |
| 	MEM_CHECK_DEFINED(c_stream.next_in, c_stream.avail_in);
 | |
| 	err = deflate(&c_stream, Z_FULL_FLUSH);
 | |
| 	if (err != Z_OK) {
 | |
| 		goto zlib_error;
 | |
| 	}
 | |
| 
 | |
| 	ut_ad(!c_stream.avail_in);
 | |
| 
 | |
| 	page_zip_dir_encode(page, buf_end, recs);
 | |
| 
 | |
| 	c_stream.next_in = (byte*) page + PAGE_ZIP_START;
 | |
| 
 | |
| 	storage = buf_end - n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
 | |
| 
 | |
| 	/* Compress the records in heap_no order. */
 | |
| 	if (UNIV_UNLIKELY(!n_dense)) {
 | |
| 	} else if (!page_is_leaf(page)) {
 | |
| 		/* This is a node pointer page. */
 | |
| 		err = page_zip_compress_node_ptrs(LOGFILE
 | |
| 						  &c_stream, recs, n_dense,
 | |
| 						  index, storage, heap);
 | |
| 		if (UNIV_UNLIKELY(err != Z_OK)) {
 | |
| 			goto zlib_error;
 | |
| 		}
 | |
| 	} else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
 | |
| 		/* This is a leaf page in a secondary index. */
 | |
| 		err = page_zip_compress_sec(LOGFILE
 | |
| 					    &c_stream, recs, n_dense);
 | |
| 		if (UNIV_UNLIKELY(err != Z_OK)) {
 | |
| 			goto zlib_error;
 | |
| 		}
 | |
| 	} else {
 | |
| 		/* This is a leaf page in a clustered index. */
 | |
| 		err = page_zip_compress_clust(LOGFILE
 | |
| 					      &c_stream, recs, n_dense,
 | |
| 					      index, &n_blobs, trx_id_col,
 | |
| 					      buf_end - PAGE_ZIP_DIR_SLOT_SIZE
 | |
| 					      * page_get_n_recs(page),
 | |
| 					      storage, heap);
 | |
| 		if (UNIV_UNLIKELY(err != Z_OK)) {
 | |
| 			goto zlib_error;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/* Finish the compression. */
 | |
| 	ut_ad(!c_stream.avail_in);
 | |
| 	/* Compress any trailing garbage, in case the last record was
 | |
| 	allocated from an originally longer space on the free list,
 | |
| 	or the data of the last record from page_zip_compress_sec(). */
 | |
| 	c_stream.avail_in = static_cast<uInt>(
 | |
| 		page_header_get_field(page, PAGE_HEAP_TOP)
 | |
| 		- (c_stream.next_in - page));
 | |
| 	ut_a(c_stream.avail_in <= srv_page_size - PAGE_ZIP_START - PAGE_DIR);
 | |
| 
 | |
| 	MEM_CHECK_DEFINED(c_stream.next_in, c_stream.avail_in);
 | |
| 	err = deflate(&c_stream, Z_FINISH);
 | |
| 
 | |
| 	if (UNIV_UNLIKELY(err != Z_STREAM_END)) {
 | |
| zlib_error:
 | |
| 		deflateEnd(&c_stream);
 | |
| 		mem_heap_free(heap);
 | |
| err_exit:
 | |
| #ifdef PAGE_ZIP_COMPRESS_DBG
 | |
| 		if (logfile) {
 | |
| 			fclose(logfile);
 | |
| 		}
 | |
| #endif /* PAGE_ZIP_COMPRESS_DBG */
 | |
| 		if (page_is_leaf(page)) {
 | |
| 			dict_index_zip_failure(index);
 | |
| 		}
 | |
| 
 | |
| 		const uint64_t time_diff = (my_interval_timer() - ns) / 1000;
 | |
| 		page_zip_stat[page_zip->ssize - 1].compressed_usec
 | |
| 			+= time_diff;
 | |
| 		if (cmp_per_index_enabled) {
 | |
| 			mysql_mutex_lock(&page_zip_stat_per_index_mutex);
 | |
| 			page_zip_stat_per_index[ind_id].compressed_usec
 | |
| 				+= time_diff;
 | |
| 			mysql_mutex_unlock(&page_zip_stat_per_index_mutex);
 | |
| 		}
 | |
| 		return false;
 | |
| 	}
 | |
| 
 | |
| 	err = deflateEnd(&c_stream);
 | |
| 	ut_a(err == Z_OK);
 | |
| 
 | |
| 	ut_ad(buf + c_stream.total_out == c_stream.next_out);
 | |
| 	ut_ad((ulint) (storage - c_stream.next_out) >= c_stream.avail_out);
 | |
| 
 | |
| #if defined HAVE_valgrind && !__has_feature(memory_sanitizer)
 | |
| 	/* Valgrind believes that zlib does not initialize some bits
 | |
| 	in the last 7 or 8 bytes of the stream.  Make Valgrind happy. */
 | |
| 	MEM_MAKE_DEFINED(buf, c_stream.total_out);
 | |
| #endif /* HAVE_valgrind && !memory_sanitizer */
 | |
| 
 | |
| 	/* Zero out the area reserved for the modification log.
 | |
| 	Space for the end marker of the modification log is not
 | |
| 	included in avail_out. */
 | |
| 	memset(c_stream.next_out, 0, c_stream.avail_out + 1/* end marker */);
 | |
| 
 | |
| #ifdef UNIV_DEBUG
 | |
| 	page_zip->m_start =
 | |
| #endif /* UNIV_DEBUG */
 | |
| 		page_zip->m_end = uint16_t(PAGE_DATA + c_stream.total_out);
 | |
| 	page_zip->m_nonempty = FALSE;
 | |
| 	page_zip->n_blobs = unsigned(n_blobs) & ((1U << 12) - 1);
 | |
| 	/* Copy those header fields that will not be written
 | |
| 	in buf_flush_init_for_writing() */
 | |
| 	memcpy_aligned<8>(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
 | |
| 			  FIL_PAGE_LSN - FIL_PAGE_PREV);
 | |
| 	memcpy_aligned<2>(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE,
 | |
| 			  2);
 | |
| 	memcpy_aligned<2>(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
 | |
| 			  PAGE_DATA - FIL_PAGE_DATA);
 | |
| 	/* Copy the rest of the compressed page */
 | |
| 	memcpy_aligned<2>(page_zip->data + PAGE_DATA, buf,
 | |
| 			  page_zip_get_size(page_zip) - PAGE_DATA);
 | |
| 	mem_heap_free(heap);
 | |
| #ifdef UNIV_ZIP_DEBUG
 | |
| 	ut_a(page_zip_validate(page_zip, page, index));
 | |
| #endif /* UNIV_ZIP_DEBUG */
 | |
| 
 | |
| 	page_zip_compress_write_log(block, index, mtr);
 | |
| 
 | |
| 	MEM_CHECK_DEFINED(page_zip->data, page_zip_get_size(page_zip));
 | |
| 
 | |
| #ifdef PAGE_ZIP_COMPRESS_DBG
 | |
| 	if (logfile) {
 | |
| 		/* Record the compressed size of the block. */
 | |
| 		byte sz[4];
 | |
| 		mach_write_to_4(sz, c_stream.total_out);
 | |
| 		fseek(logfile, srv_page_size, SEEK_SET);
 | |
| 		if (fwrite(sz, 1, sizeof sz, logfile) != sizeof sz) {
 | |
| 			perror("fwrite");
 | |
| 		}
 | |
| 		fclose(logfile);
 | |
| 	}
 | |
| #endif /* PAGE_ZIP_COMPRESS_DBG */
 | |
| 	const uint64_t time_diff = (my_interval_timer() - ns) / 1000;
 | |
| 	page_zip_stat[page_zip->ssize - 1].compressed_ok++;
 | |
| 	page_zip_stat[page_zip->ssize - 1].compressed_usec += time_diff;
 | |
| 	if (cmp_per_index_enabled) {
 | |
| 		mysql_mutex_lock(&page_zip_stat_per_index_mutex);
 | |
| 		page_zip_stat_per_index[ind_id].compressed_ok++;
 | |
| 		page_zip_stat_per_index[ind_id].compressed_usec += time_diff;
 | |
| 		mysql_mutex_unlock(&page_zip_stat_per_index_mutex);
 | |
| 	}
 | |
| 
 | |
| 	if (page_is_leaf(page)) {
 | |
| 		dict_index_zip_success(index);
 | |
| 	}
 | |
| 
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Deallocate the index information initialized by page_zip_fields_decode(). */
 | |
| static
 | |
| void
 | |
| page_zip_fields_free(
 | |
| /*=================*/
 | |
| 	dict_index_t*	index)	/*!< in: dummy index to be freed */
 | |
| {
 | |
| 	if (index) {
 | |
| 		dict_table_t*	table = index->table;
 | |
| 		index->zip_pad.mutex.~mutex();
 | |
| 		mem_heap_free(index->heap);
 | |
| 
 | |
| 		dict_mem_table_free(table);
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Read the index information for the compressed page.
 | |
| @return own: dummy index describing the page, or NULL on error */
 | |
| static
 | |
| dict_index_t*
 | |
| page_zip_fields_decode(
 | |
| /*===================*/
 | |
| 	const byte*	buf,	/*!< in: index information */
 | |
| 	const byte*	end,	/*!< in: end of buf */
 | |
| 	ulint*		trx_id_col,/*!< in: NULL for non-leaf pages;
 | |
| 				for leaf pages, pointer to where to store
 | |
| 				the position of the trx_id column */
 | |
| 	bool		is_spatial)/*< in: is spatial index or not */
 | |
| {
 | |
| 	const byte*	b;
 | |
| 	ulint		n;
 | |
| 	ulint		i;
 | |
| 	ulint		val;
 | |
| 	dict_table_t*	table;
 | |
| 	dict_index_t*	index;
 | |
| 
 | |
| 	/* Determine the number of fields. */
 | |
| 	for (b = buf, n = 0; b < end; n++) {
 | |
| 		if (*b++ & 0x80) {
 | |
| 			b++; /* skip the second byte */
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	n--; /* n_nullable or trx_id */
 | |
| 
 | |
| 	if (UNIV_UNLIKELY(n > REC_MAX_N_FIELDS)) {
 | |
| 
 | |
| 		page_zip_fail(("page_zip_fields_decode: n = %lu\n",
 | |
| 			       (ulong) n));
 | |
| 		return(NULL);
 | |
| 	}
 | |
| 
 | |
| 	if (UNIV_UNLIKELY(b > end)) {
 | |
| 
 | |
| 		page_zip_fail(("page_zip_fields_decode: %p > %p\n",
 | |
| 			       (const void*) b, (const void*) end));
 | |
| 		return(NULL);
 | |
| 	}
 | |
| 
 | |
| 	table = dict_table_t::create({C_STRING_WITH_LEN("ZIP_DUMMY")},
 | |
| 				     nullptr, n, 0, DICT_TF_COMPACT, 0);
 | |
| 	index = dict_mem_index_create(table, "ZIP_DUMMY", 0, n);
 | |
| 	index->n_uniq = static_cast<unsigned>(n) & dict_index_t::MAX_N_FIELDS;
 | |
| 	/* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
 | |
| 	index->cached = TRUE;
 | |
| 
 | |
| 	/* Initialize the fields. */
 | |
| 	for (b = buf, i = 0; i < n; i++) {
 | |
| 		ulint	mtype;
 | |
| 		ulint	len;
 | |
| 
 | |
| 		val = *b++;
 | |
| 
 | |
| 		if (UNIV_UNLIKELY(val & 0x80)) {
 | |
| 			/* fixed length > 62 bytes */
 | |
| 			val = (val & 0x7f) << 8 | *b++;
 | |
| 			len = val >> 1;
 | |
| 			mtype = DATA_FIXBINARY;
 | |
| 		} else if (UNIV_UNLIKELY(val >= 126)) {
 | |
| 			/* variable length with max > 255 bytes */
 | |
| 			len = 0x7fff;
 | |
| 			mtype = DATA_BINARY;
 | |
| 		} else if (val <= 1) {
 | |
| 			/* variable length with max <= 255 bytes */
 | |
| 			len = 0;
 | |
| 			mtype = DATA_BINARY;
 | |
| 		} else {
 | |
| 			/* fixed length < 62 bytes */
 | |
| 			len = val >> 1;
 | |
| 			mtype = DATA_FIXBINARY;
 | |
| 		}
 | |
| 
 | |
| 		dict_mem_table_add_col(table, NULL, NULL, mtype,
 | |
| 				       val & 1 ? DATA_NOT_NULL : 0, len);
 | |
| 		dict_index_add_col(index, table,
 | |
| 				   dict_table_get_nth_col(table, i), 0);
 | |
| 	}
 | |
| 
 | |
| 	val = *b++;
 | |
| 	if (UNIV_UNLIKELY(val & 0x80)) {
 | |
| 		val = (val & 0x7f) << 8 | *b++;
 | |
| 	}
 | |
| 
 | |
| 	/* Decode the position of the trx_id column. */
 | |
| 	if (trx_id_col) {
 | |
| 		if (!val) {
 | |
| 			val = ULINT_UNDEFINED;
 | |
| 		} else if (UNIV_UNLIKELY(val >= n)) {
 | |
| fail:
 | |
| 			page_zip_fields_free(index);
 | |
| 			return NULL;
 | |
| 		} else {
 | |
| 			index->type = DICT_CLUSTERED;
 | |
| 		}
 | |
| 
 | |
| 		*trx_id_col = val;
 | |
| 	} else {
 | |
| 		/* Decode the number of nullable fields. */
 | |
| 		if (UNIV_UNLIKELY(index->n_nullable > val)) {
 | |
| 			goto fail;
 | |
| 		} else {
 | |
| 			index->n_nullable = static_cast<unsigned>(val)
 | |
| 				& dict_index_t::MAX_N_FIELDS;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/* ROW_FORMAT=COMPRESSED does not support instant ADD COLUMN */
 | |
| 	index->n_core_fields = index->n_fields;
 | |
| 	index->n_core_null_bytes = static_cast<uint8_t>(
 | |
| 		UT_BITS_IN_BYTES(unsigned(index->n_nullable)));
 | |
| 
 | |
| 	ut_ad(b == end);
 | |
| 
 | |
| 	if (is_spatial) {
 | |
| 		index->type |= DICT_SPATIAL;
 | |
| 	}
 | |
| 
 | |
| 	return(index);
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Populate the sparse page directory from the dense directory.
 | |
| @return TRUE on success, FALSE on failure */
 | |
| static MY_ATTRIBUTE((nonnull, warn_unused_result))
 | |
| ibool
 | |
| page_zip_dir_decode(
 | |
| /*================*/
 | |
| 	const page_zip_des_t*	page_zip,/*!< in: dense page directory on
 | |
| 					compressed page */
 | |
| 	page_t*			page,	/*!< in: compact page with valid header;
 | |
| 					out: trailer and sparse page directory
 | |
| 					filled in */
 | |
| 	rec_t**			recs,	/*!< out: dense page directory sorted by
 | |
| 					ascending address (and heap_no) */
 | |
| 	ulint			n_dense)/*!< in: number of user records, and
 | |
| 					size of recs[] */
 | |
| {
 | |
| 	ulint	i;
 | |
| 	ulint	n_recs;
 | |
| 	byte*	slot;
 | |
| 
 | |
| 	n_recs = page_get_n_recs(page);
 | |
| 
 | |
| 	if (UNIV_UNLIKELY(n_recs > n_dense)) {
 | |
| 		page_zip_fail(("page_zip_dir_decode 1: %lu > %lu\n",
 | |
| 			       (ulong) n_recs, (ulong) n_dense));
 | |
| 		return(FALSE);
 | |
| 	}
 | |
| 
 | |
| 	/* Traverse the list of stored records in the sorting order,
 | |
| 	starting from the first user record. */
 | |
| 
 | |
| 	slot = page + (srv_page_size - PAGE_DIR - PAGE_DIR_SLOT_SIZE);
 | |
| 	UNIV_PREFETCH_RW(slot);
 | |
| 
 | |
| 	/* Zero out the page trailer. */
 | |
| 	memset(slot + PAGE_DIR_SLOT_SIZE, 0, PAGE_DIR);
 | |
| 
 | |
| 	mach_write_to_2(slot, PAGE_NEW_INFIMUM);
 | |
| 	slot -= PAGE_DIR_SLOT_SIZE;
 | |
| 	UNIV_PREFETCH_RW(slot);
 | |
| 
 | |
| 	/* Initialize the sparse directory and copy the dense directory. */
 | |
| 	for (i = 0; i < n_recs; i++) {
 | |
| 		ulint	offs = page_zip_dir_get(page_zip, i);
 | |
| 
 | |
| 		if (offs & PAGE_ZIP_DIR_SLOT_OWNED) {
 | |
| 			mach_write_to_2(slot, offs & PAGE_ZIP_DIR_SLOT_MASK);
 | |
| 			slot -= PAGE_DIR_SLOT_SIZE;
 | |
| 			UNIV_PREFETCH_RW(slot);
 | |
| 		}
 | |
| 
 | |
| 		if (UNIV_UNLIKELY((offs & PAGE_ZIP_DIR_SLOT_MASK)
 | |
| 				  < PAGE_ZIP_START + REC_N_NEW_EXTRA_BYTES)) {
 | |
| 			page_zip_fail(("page_zip_dir_decode 2: %u %u %lx\n",
 | |
| 				       (unsigned) i, (unsigned) n_recs,
 | |
| 				       (ulong) offs));
 | |
| 			return(FALSE);
 | |
| 		}
 | |
| 
 | |
| 		recs[i] = page + (offs & PAGE_ZIP_DIR_SLOT_MASK);
 | |
| 	}
 | |
| 
 | |
| 	mach_write_to_2(slot, PAGE_NEW_SUPREMUM);
 | |
| 	{
 | |
| 		const page_dir_slot_t*	last_slot = page_dir_get_nth_slot(
 | |
| 			page, page_dir_get_n_slots(page) - 1U);
 | |
| 
 | |
| 		if (UNIV_UNLIKELY(slot != last_slot)) {
 | |
| 			page_zip_fail(("page_zip_dir_decode 3: %p != %p\n",
 | |
| 				       (const void*) slot,
 | |
| 				       (const void*) last_slot));
 | |
| 			return(FALSE);
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/* Copy the rest of the dense directory. */
 | |
| 	for (; i < n_dense; i++) {
 | |
| 		ulint	offs = page_zip_dir_get(page_zip, i);
 | |
| 
 | |
| 		if (UNIV_UNLIKELY(offs & ~PAGE_ZIP_DIR_SLOT_MASK)) {
 | |
| 			page_zip_fail(("page_zip_dir_decode 4: %u %u %lx\n",
 | |
| 				       (unsigned) i, (unsigned) n_dense,
 | |
| 				       (ulong) offs));
 | |
| 			return(FALSE);
 | |
| 		}
 | |
| 
 | |
| 		recs[i] = page + offs;
 | |
| 	}
 | |
| 
 | |
| 	std::sort(recs, recs + n_dense);
 | |
| 	return(TRUE);
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Initialize the REC_N_NEW_EXTRA_BYTES of each record.
 | |
| @return TRUE on success, FALSE on failure */
 | |
| static
 | |
| ibool
 | |
| page_zip_set_extra_bytes(
 | |
| /*=====================*/
 | |
| 	const page_zip_des_t*	page_zip,/*!< in: compressed page */
 | |
| 	page_t*			page,	/*!< in/out: uncompressed page */
 | |
| 	ulint			info_bits)/*!< in: REC_INFO_MIN_REC_FLAG or 0 */
 | |
| {
 | |
| 	ulint	n;
 | |
| 	ulint	i;
 | |
| 	ulint	n_owned = 1;
 | |
| 	ulint	offs;
 | |
| 	rec_t*	rec;
 | |
| 
 | |
| 	n = page_get_n_recs(page);
 | |
| 	rec = page + PAGE_NEW_INFIMUM;
 | |
| 
 | |
| 	for (i = 0; i < n; i++) {
 | |
| 		offs = page_zip_dir_get(page_zip, i);
 | |
| 
 | |
| 		if (offs & PAGE_ZIP_DIR_SLOT_DEL) {
 | |
| 			info_bits |= REC_INFO_DELETED_FLAG;
 | |
| 		}
 | |
| 		if (UNIV_UNLIKELY(offs & PAGE_ZIP_DIR_SLOT_OWNED)) {
 | |
| 			info_bits |= n_owned;
 | |
| 			n_owned = 1;
 | |
| 		} else {
 | |
| 			n_owned++;
 | |
| 		}
 | |
| 		offs &= PAGE_ZIP_DIR_SLOT_MASK;
 | |
| 		if (UNIV_UNLIKELY(offs < PAGE_ZIP_START
 | |
| 				  + REC_N_NEW_EXTRA_BYTES)) {
 | |
| 			page_zip_fail(("page_zip_set_extra_bytes 1:"
 | |
| 				       " %u %u %lx\n",
 | |
| 				       (unsigned) i, (unsigned) n,
 | |
| 				       (ulong) offs));
 | |
| 			return(FALSE);
 | |
| 		}
 | |
| 
 | |
| 		rec_set_next_offs_new(rec, offs);
 | |
| 		rec = page + offs;
 | |
| 		rec[-REC_N_NEW_EXTRA_BYTES] = (byte) info_bits;
 | |
| 		info_bits = 0;
 | |
| 	}
 | |
| 
 | |
| 	/* Set the next pointer of the last user record. */
 | |
| 	rec_set_next_offs_new(rec, PAGE_NEW_SUPREMUM);
 | |
| 
 | |
| 	/* Set n_owned of the supremum record. */
 | |
| 	page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES] = (byte) n_owned;
 | |
| 
 | |
| 	/* The dense directory excludes the infimum and supremum records. */
 | |
| 	n = ulint(page_dir_get_n_heap(page)) - PAGE_HEAP_NO_USER_LOW;
 | |
| 
 | |
| 	if (i >= n) {
 | |
| 		if (UNIV_LIKELY(i == n)) {
 | |
| 			return(TRUE);
 | |
| 		}
 | |
| 
 | |
| 		page_zip_fail(("page_zip_set_extra_bytes 2: %u != %u\n",
 | |
| 			       (unsigned) i, (unsigned) n));
 | |
| 		return(FALSE);
 | |
| 	}
 | |
| 
 | |
| 	offs = page_zip_dir_get(page_zip, i);
 | |
| 
 | |
| 	/* Set the extra bytes of deleted records on the free list. */
 | |
| 	for (;;) {
 | |
| 		if (UNIV_UNLIKELY(!offs)
 | |
| 		    || UNIV_UNLIKELY(offs & ~PAGE_ZIP_DIR_SLOT_MASK)) {
 | |
| 
 | |
| 			page_zip_fail(("page_zip_set_extra_bytes 3: %lx\n",
 | |
| 				       (ulong) offs));
 | |
| 			return(FALSE);
 | |
| 		}
 | |
| 
 | |
| 		rec = page + offs;
 | |
| 		rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
 | |
| 
 | |
| 		if (++i == n) {
 | |
| 			break;
 | |
| 		}
 | |
| 
 | |
| 		offs = page_zip_dir_get(page_zip, i);
 | |
| 		rec_set_next_offs_new(rec, offs);
 | |
| 	}
 | |
| 
 | |
| 	/* Terminate the free list. */
 | |
| 	rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
 | |
| 	rec_set_next_offs_new(rec, 0);
 | |
| 
 | |
| 	return(TRUE);
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Apply the modification log to a record containing externally stored
 | |
| columns.  Do not copy the fields that are stored separately.
 | |
| @return pointer to modification log, or NULL on failure */
 | |
| static
 | |
| const byte*
 | |
| page_zip_apply_log_ext(
 | |
| /*===================*/
 | |
| 	rec_t*		rec,		/*!< in/out: record */
 | |
| 	const rec_offs*	offsets,	/*!< in: rec_get_offsets(rec) */
 | |
| 	ulint		trx_id_col,	/*!< in: position of of DB_TRX_ID */
 | |
| 	const byte*	data,		/*!< in: modification log */
 | |
| 	const byte*	end)		/*!< in: end of modification log */
 | |
| {
 | |
| 	ulint	i;
 | |
| 	ulint	len;
 | |
| 	byte*	next_out = rec;
 | |
| 
 | |
| 	/* Check if there are any externally stored columns.
 | |
| 	For each externally stored column, skip the
 | |
| 	BTR_EXTERN_FIELD_REF. */
 | |
| 
 | |
| 	for (i = 0; i < rec_offs_n_fields(offsets); i++) {
 | |
| 		byte*	dst;
 | |
| 
 | |
| 		if (UNIV_UNLIKELY(i == trx_id_col)) {
 | |
| 			/* Skip trx_id and roll_ptr */
 | |
| 			dst = rec_get_nth_field(rec, offsets,
 | |
| 						i, &len);
 | |
| 			if (UNIV_UNLIKELY(dst - next_out >= end - data)
 | |
| 			    || UNIV_UNLIKELY
 | |
| 			    (len < (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN))
 | |
| 			    || rec_offs_nth_extern(offsets, i)) {
 | |
| 				page_zip_fail(("page_zip_apply_log_ext:"
 | |
| 					       " trx_id len %lu,"
 | |
| 					       " %p - %p >= %p - %p\n",
 | |
| 					       (ulong) len,
 | |
| 					       (const void*) dst,
 | |
| 					       (const void*) next_out,
 | |
| 					       (const void*) end,
 | |
| 					       (const void*) data));
 | |
| 				return(NULL);
 | |
| 			}
 | |
| 
 | |
| 			memcpy(next_out, data, ulint(dst - next_out));
 | |
| 			data += ulint(dst - next_out);
 | |
| 			next_out = dst + (DATA_TRX_ID_LEN
 | |
| 					  + DATA_ROLL_PTR_LEN);
 | |
| 		} else if (rec_offs_nth_extern(offsets, i)) {
 | |
| 			dst = rec_get_nth_field(rec, offsets,
 | |
| 						i, &len);
 | |
| 			ut_ad(len
 | |
| 			      >= BTR_EXTERN_FIELD_REF_SIZE);
 | |
| 
 | |
| 			len += ulint(dst - next_out)
 | |
| 				- BTR_EXTERN_FIELD_REF_SIZE;
 | |
| 
 | |
| 			if (UNIV_UNLIKELY(data + len >= end)) {
 | |
| 				page_zip_fail(("page_zip_apply_log_ext:"
 | |
| 					       " ext %p+%lu >= %p\n",
 | |
| 					       (const void*) data,
 | |
| 					       (ulong) len,
 | |
| 					       (const void*) end));
 | |
| 				return(NULL);
 | |
| 			}
 | |
| 
 | |
| 			memcpy(next_out, data, len);
 | |
| 			data += len;
 | |
| 			next_out += len
 | |
| 				+ BTR_EXTERN_FIELD_REF_SIZE;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/* Copy the last bytes of the record. */
 | |
| 	len = ulint(rec_get_end(rec, offsets) - next_out);
 | |
| 	if (UNIV_UNLIKELY(data + len >= end)) {
 | |
| 		page_zip_fail(("page_zip_apply_log_ext:"
 | |
| 			       " last %p+%lu >= %p\n",
 | |
| 			       (const void*) data,
 | |
| 			       (ulong) len,
 | |
| 			       (const void*) end));
 | |
| 		return(NULL);
 | |
| 	}
 | |
| 	memcpy(next_out, data, len);
 | |
| 	data += len;
 | |
| 
 | |
| 	return(data);
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Apply the modification log to an uncompressed page.
 | |
| Do not copy the fields that are stored separately.
 | |
| @return pointer to end of modification log, or NULL on failure */
 | |
| static
 | |
| const byte*
 | |
| page_zip_apply_log(
 | |
| /*===============*/
 | |
| 	const byte*	data,	/*!< in: modification log */
 | |
| 	ulint		size,	/*!< in: maximum length of the log, in bytes */
 | |
| 	rec_t**		recs,	/*!< in: dense page directory,
 | |
| 				sorted by address (indexed by
 | |
| 				heap_no - PAGE_HEAP_NO_USER_LOW) */
 | |
| 	ulint		n_dense,/*!< in: size of recs[] */
 | |
| 	ulint		n_core,	/*!< in: index->n_fields, or 0 for non-leaf */
 | |
| 	ulint		trx_id_col,/*!< in: column number of trx_id in the index,
 | |
| 				or ULINT_UNDEFINED if none */
 | |
| 	ulint		heap_status,
 | |
| 				/*!< in: heap_no and status bits for
 | |
| 				the next record to uncompress */
 | |
| 	dict_index_t*	index,	/*!< in: index of the page */
 | |
| 	rec_offs*	offsets)/*!< in/out: work area for
 | |
| 				rec_get_offsets_reverse() */
 | |
| {
 | |
| 	const byte* const end = data + size;
 | |
| 
 | |
| 	for (;;) {
 | |
| 		ulint	val;
 | |
| 		rec_t*	rec;
 | |
| 		ulint	len;
 | |
| 		ulint	hs;
 | |
| 
 | |
| 		val = *data++;
 | |
| 		if (UNIV_UNLIKELY(!val)) {
 | |
| 			return(data - 1);
 | |
| 		}
 | |
| 		if (val & 0x80) {
 | |
| 			val = (val & 0x7f) << 8 | *data++;
 | |
| 			if (UNIV_UNLIKELY(!val)) {
 | |
| 				page_zip_fail(("page_zip_apply_log:"
 | |
| 					       " invalid val %x%x\n",
 | |
| 					       data[-2], data[-1]));
 | |
| 				return(NULL);
 | |
| 			}
 | |
| 		}
 | |
| 		if (UNIV_UNLIKELY(data >= end)) {
 | |
| 			page_zip_fail(("page_zip_apply_log: %p >= %p\n",
 | |
| 				       (const void*) data,
 | |
| 				       (const void*) end));
 | |
| 			return(NULL);
 | |
| 		}
 | |
| 		if (UNIV_UNLIKELY((val >> 1) > n_dense)) {
 | |
| 			page_zip_fail(("page_zip_apply_log: %lu>>1 > %lu\n",
 | |
| 				       (ulong) val, (ulong) n_dense));
 | |
| 			return(NULL);
 | |
| 		}
 | |
| 
 | |
| 		/* Determine the heap number and status bits of the record. */
 | |
| 		rec = recs[(val >> 1) - 1];
 | |
| 
 | |
| 		hs = ((val >> 1) + 1) << REC_HEAP_NO_SHIFT;
 | |
| 		hs |= heap_status & ((1 << REC_HEAP_NO_SHIFT) - 1);
 | |
| 
 | |
| 		/* This may either be an old record that is being
 | |
| 		overwritten (updated in place, or allocated from
 | |
| 		the free list), or a new record, with the next
 | |
| 		available_heap_no. */
 | |
| 		if (UNIV_UNLIKELY(hs > heap_status)) {
 | |
| 			page_zip_fail(("page_zip_apply_log: %lu > %lu\n",
 | |
| 				       (ulong) hs, (ulong) heap_status));
 | |
| 			return(NULL);
 | |
| 		} else if (hs == heap_status) {
 | |
| 			/* A new record was allocated from the heap. */
 | |
| 			if (UNIV_UNLIKELY(val & 1)) {
 | |
| 				/* Only existing records may be cleared. */
 | |
| 				page_zip_fail(("page_zip_apply_log:"
 | |
| 					       " attempting to create"
 | |
| 					       " deleted rec %lu\n",
 | |
| 					       (ulong) hs));
 | |
| 				return(NULL);
 | |
| 			}
 | |
| 			heap_status += 1 << REC_HEAP_NO_SHIFT;
 | |
| 		}
 | |
| 
 | |
| 		mach_write_to_2(rec - REC_NEW_HEAP_NO, hs);
 | |
| 
 | |
| 		if (val & 1) {
 | |
| 			/* Clear the data bytes of the record. */
 | |
| 			mem_heap_t*	heap	= NULL;
 | |
| 			rec_offs*	offs;
 | |
| 			offs = rec_get_offsets(rec, index, offsets, n_core,
 | |
| 					       ULINT_UNDEFINED, &heap);
 | |
| 			memset(rec, 0, rec_offs_data_size(offs));
 | |
| 
 | |
| 			if (UNIV_LIKELY_NULL(heap)) {
 | |
| 				mem_heap_free(heap);
 | |
| 			}
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 		compile_time_assert(REC_STATUS_NODE_PTR == TRUE);
 | |
| 		rec_get_offsets_reverse(data, index,
 | |
| 					hs & REC_STATUS_NODE_PTR,
 | |
| 					offsets);
 | |
| 		/* Silence a debug assertion in rec_offs_make_valid().
 | |
| 		This will be overwritten in page_zip_set_extra_bytes(),
 | |
| 		called by page_zip_decompress_low(). */
 | |
| 		ut_d(rec[-REC_NEW_INFO_BITS] = 0);
 | |
| 		rec_offs_make_valid(rec, index, n_core != 0, offsets);
 | |
| 
 | |
| 		/* Copy the extra bytes (backwards). */
 | |
| 		{
 | |
| 			byte*	start	= rec_get_start(rec, offsets);
 | |
| 			byte*	b	= rec - REC_N_NEW_EXTRA_BYTES;
 | |
| 			while (b != start) {
 | |
| 				*--b = *data++;
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		/* Copy the data bytes. */
 | |
| 		if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) {
 | |
| 			/* Non-leaf nodes should not contain any
 | |
| 			externally stored columns. */
 | |
| 			if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) {
 | |
| 				page_zip_fail(("page_zip_apply_log:"
 | |
| 					       " %lu&REC_STATUS_NODE_PTR\n",
 | |
| 					       (ulong) hs));
 | |
| 				return(NULL);
 | |
| 			}
 | |
| 
 | |
| 			data = page_zip_apply_log_ext(
 | |
| 				rec, offsets, trx_id_col, data, end);
 | |
| 
 | |
| 			if (UNIV_UNLIKELY(!data)) {
 | |
| 				return(NULL);
 | |
| 			}
 | |
| 		} else if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) {
 | |
| 			len = rec_offs_data_size(offsets)
 | |
| 				- REC_NODE_PTR_SIZE;
 | |
| 			/* Copy the data bytes, except node_ptr. */
 | |
| 			if (UNIV_UNLIKELY(data + len >= end)) {
 | |
| 				page_zip_fail(("page_zip_apply_log:"
 | |
| 					       " node_ptr %p+%lu >= %p\n",
 | |
| 					       (const void*) data,
 | |
| 					       (ulong) len,
 | |
| 					       (const void*) end));
 | |
| 				return(NULL);
 | |
| 			}
 | |
| 			memcpy(rec, data, len);
 | |
| 			data += len;
 | |
| 		} else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
 | |
| 			len = rec_offs_data_size(offsets);
 | |
| 
 | |
| 			/* Copy all data bytes of
 | |
| 			a record in a secondary index. */
 | |
| 			if (UNIV_UNLIKELY(data + len >= end)) {
 | |
| 				page_zip_fail(("page_zip_apply_log:"
 | |
| 					       " sec %p+%lu >= %p\n",
 | |
| 					       (const void*) data,
 | |
| 					       (ulong) len,
 | |
| 					       (const void*) end));
 | |
| 				return(NULL);
 | |
| 			}
 | |
| 
 | |
| 			memcpy(rec, data, len);
 | |
| 			data += len;
 | |
| 		} else {
 | |
| 			/* Skip DB_TRX_ID and DB_ROLL_PTR. */
 | |
| 			ulint	l = rec_get_nth_field_offs(offsets,
 | |
| 							   trx_id_col, &len);
 | |
| 			byte*	b;
 | |
| 
 | |
| 			if (UNIV_UNLIKELY(data + l >= end)
 | |
| 			    || UNIV_UNLIKELY(len < (DATA_TRX_ID_LEN
 | |
| 						    + DATA_ROLL_PTR_LEN))) {
 | |
| 				page_zip_fail(("page_zip_apply_log:"
 | |
| 					       " trx_id %p+%lu >= %p\n",
 | |
| 					       (const void*) data,
 | |
| 					       (ulong) l,
 | |
| 					       (const void*) end));
 | |
| 				return(NULL);
 | |
| 			}
 | |
| 
 | |
| 			/* Copy any preceding data bytes. */
 | |
| 			memcpy(rec, data, l);
 | |
| 			data += l;
 | |
| 
 | |
| 			/* Copy any bytes following DB_TRX_ID, DB_ROLL_PTR. */
 | |
| 			b = rec + l + (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 | |
| 			len = ulint(rec_get_end(rec, offsets) - b);
 | |
| 			if (UNIV_UNLIKELY(data + len >= end)) {
 | |
| 				page_zip_fail(("page_zip_apply_log:"
 | |
| 					       " clust %p+%lu >= %p\n",
 | |
| 					       (const void*) data,
 | |
| 					       (ulong) len,
 | |
| 					       (const void*) end));
 | |
| 				return(NULL);
 | |
| 			}
 | |
| 			memcpy(b, data, len);
 | |
| 			data += len;
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Set the heap_no in a record, and skip the fixed-size record header
 | |
| that is not included in the d_stream.
 | |
| @return TRUE on success, FALSE if d_stream does not end at rec */
 | |
| static
 | |
| ibool
 | |
| page_zip_decompress_heap_no(
 | |
| /*========================*/
 | |
| 	z_stream*	d_stream,	/*!< in/out: compressed page stream */
 | |
| 	rec_t*		rec,		/*!< in/out: record */
 | |
| 	ulint&		heap_status)	/*!< in/out: heap_no and status bits */
 | |
| {
 | |
| 	if (d_stream->next_out != rec - REC_N_NEW_EXTRA_BYTES) {
 | |
| 		/* n_dense has grown since the page was last compressed. */
 | |
| 		return(FALSE);
 | |
| 	}
 | |
| 
 | |
| 	/* Skip the REC_N_NEW_EXTRA_BYTES. */
 | |
| 	d_stream->next_out = rec;
 | |
| 
 | |
| 	/* Set heap_no and the status bits. */
 | |
| 	mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
 | |
| 	heap_status += 1 << REC_HEAP_NO_SHIFT;
 | |
| 	return(TRUE);
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Decompress the records of a node pointer page.
 | |
| @return TRUE on success, FALSE on failure */
 | |
| static
 | |
| ibool
 | |
| page_zip_decompress_node_ptrs(
 | |
| /*==========================*/
 | |
| 	page_zip_des_t*	page_zip,	/*!< in/out: compressed page */
 | |
| 	z_stream*	d_stream,	/*!< in/out: compressed page stream */
 | |
| 	rec_t**		recs,		/*!< in: dense page directory
 | |
| 					sorted by address */
 | |
| 	ulint		n_dense,	/*!< in: size of recs[] */
 | |
| 	dict_index_t*	index,		/*!< in: the index of the page */
 | |
| 	rec_offs*	offsets,	/*!< in/out: temporary offsets */
 | |
| 	mem_heap_t*	heap)		/*!< in: temporary memory heap */
 | |
| {
 | |
| 	ulint		heap_status = REC_STATUS_NODE_PTR
 | |
| 		| PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
 | |
| 	ulint		slot;
 | |
| 	const byte*	storage;
 | |
| 
 | |
| 	/* Subtract the space reserved for uncompressed data. */
 | |
| 	d_stream->avail_in -= static_cast<uInt>(
 | |
| 		n_dense * (PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE));
 | |
| 
 | |
| 	/* Decompress the records in heap_no order. */
 | |
| 	for (slot = 0; slot < n_dense; slot++) {
 | |
| 		rec_t*	rec = recs[slot];
 | |
| 
 | |
| 		d_stream->avail_out = static_cast<uInt>(
 | |
| 			rec - REC_N_NEW_EXTRA_BYTES - d_stream->next_out);
 | |
| 
 | |
| 		ut_ad(d_stream->avail_out < srv_page_size
 | |
| 		      - PAGE_ZIP_START - PAGE_DIR);
 | |
| 		switch (inflate(d_stream, Z_SYNC_FLUSH)) {
 | |
| 		case Z_STREAM_END:
 | |
| 			page_zip_decompress_heap_no(
 | |
| 				d_stream, rec, heap_status);
 | |
| 			goto zlib_done;
 | |
| 		case Z_OK:
 | |
| 		case Z_BUF_ERROR:
 | |
| 			if (!d_stream->avail_out) {
 | |
| 				break;
 | |
| 			}
 | |
| 			/* fall through */
 | |
| 		default:
 | |
| 			page_zip_fail(("page_zip_decompress_node_ptrs:"
 | |
| 				       " 1 inflate(Z_SYNC_FLUSH)=%s\n",
 | |
| 				       d_stream->msg));
 | |
| 			goto zlib_error;
 | |
| 		}
 | |
| 
 | |
| 		if (!page_zip_decompress_heap_no(
 | |
| 			    d_stream, rec, heap_status)) {
 | |
| 			ut_ad(0);
 | |
| 		}
 | |
| 
 | |
| 		/* Read the offsets. The status bits are needed here. */
 | |
| 		offsets = rec_get_offsets(rec, index, offsets, 0,
 | |
| 					  ULINT_UNDEFINED, &heap);
 | |
| 
 | |
| 		/* Non-leaf nodes should not have any externally
 | |
| 		stored columns. */
 | |
| 		ut_ad(!rec_offs_any_extern(offsets));
 | |
| 
 | |
| 		/* Decompress the data bytes, except node_ptr. */
 | |
| 		d_stream->avail_out =static_cast<uInt>(
 | |
| 			rec_offs_data_size(offsets) - REC_NODE_PTR_SIZE);
 | |
| 
 | |
| 		switch (inflate(d_stream, Z_SYNC_FLUSH)) {
 | |
| 		case Z_STREAM_END:
 | |
| 			goto zlib_done;
 | |
| 		case Z_OK:
 | |
| 		case Z_BUF_ERROR:
 | |
| 			if (!d_stream->avail_out) {
 | |
| 				break;
 | |
| 			}
 | |
| 			/* fall through */
 | |
| 		default:
 | |
| 			page_zip_fail(("page_zip_decompress_node_ptrs:"
 | |
| 				       " 2 inflate(Z_SYNC_FLUSH)=%s\n",
 | |
| 				       d_stream->msg));
 | |
| 			goto zlib_error;
 | |
| 		}
 | |
| 
 | |
| 		/* Clear the node pointer in case the record
 | |
| 		will be deleted and the space will be reallocated
 | |
| 		to a smaller record. */
 | |
| 		memset(d_stream->next_out, 0, REC_NODE_PTR_SIZE);
 | |
| 		d_stream->next_out += REC_NODE_PTR_SIZE;
 | |
| 
 | |
| 		ut_ad(d_stream->next_out == rec_get_end(rec, offsets));
 | |
| 	}
 | |
| 
 | |
| 	/* Decompress any trailing garbage, in case the last record was
 | |
| 	allocated from an originally longer space on the free list. */
 | |
| 	d_stream->avail_out = static_cast<uInt>(
 | |
| 		page_header_get_field(page_zip->data, PAGE_HEAP_TOP)
 | |
| 		- page_offset(d_stream->next_out));
 | |
| 	if (UNIV_UNLIKELY(d_stream->avail_out > srv_page_size
 | |
| 			  - PAGE_ZIP_START - PAGE_DIR)) {
 | |
| 
 | |
| 		page_zip_fail(("page_zip_decompress_node_ptrs:"
 | |
| 			       " avail_out = %u\n",
 | |
| 			       d_stream->avail_out));
 | |
| 		goto zlib_error;
 | |
| 	}
 | |
| 
 | |
| 	if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
 | |
| 		page_zip_fail(("page_zip_decompress_node_ptrs:"
 | |
| 			       " inflate(Z_FINISH)=%s\n",
 | |
| 			       d_stream->msg));
 | |
| zlib_error:
 | |
| 		inflateEnd(d_stream);
 | |
| 		return(FALSE);
 | |
| 	}
 | |
| 
 | |
| 	/* Note that d_stream->avail_out > 0 may hold here
 | |
| 	if the modification log is nonempty. */
 | |
| 
 | |
| zlib_done:
 | |
| 	if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
 | |
| 		ut_error;
 | |
| 	}
 | |
| 
 | |
| 	{
 | |
| 		page_t*	page = page_align(d_stream->next_out);
 | |
| 
 | |
| 		/* Clear the unused heap space on the uncompressed page. */
 | |
| 		memset(d_stream->next_out, 0,
 | |
| 		       ulint(page_dir_get_nth_slot(page,
 | |
| 						   page_dir_get_n_slots(page)
 | |
| 						   - 1U)
 | |
| 			     - d_stream->next_out));
 | |
| 	}
 | |
| 
 | |
| #ifdef UNIV_DEBUG
 | |
| 	page_zip->m_start = uint16_t(PAGE_DATA + d_stream->total_in);
 | |
| #endif /* UNIV_DEBUG */
 | |
| 
 | |
| 	/* Apply the modification log. */
 | |
| 	{
 | |
| 		const byte*	mod_log_ptr;
 | |
| 		mod_log_ptr = page_zip_apply_log(d_stream->next_in,
 | |
| 						 d_stream->avail_in + 1,
 | |
| 						 recs, n_dense, 0,
 | |
| 						 ULINT_UNDEFINED, heap_status,
 | |
| 						 index, offsets);
 | |
| 
 | |
| 		if (UNIV_UNLIKELY(!mod_log_ptr)) {
 | |
| 			return(FALSE);
 | |
| 		}
 | |
| 		page_zip->m_end = uint16_t(mod_log_ptr - page_zip->data);
 | |
| 		page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
 | |
| 	}
 | |
| 
 | |
| 	if (UNIV_UNLIKELY
 | |
| 	    (page_zip_get_trailer_len(page_zip,
 | |
| 				      dict_index_is_clust(index))
 | |
| 	     + page_zip->m_end >= page_zip_get_size(page_zip))) {
 | |
| 		page_zip_fail(("page_zip_decompress_node_ptrs:"
 | |
| 			       " %lu + %lu >= %lu, %lu\n",
 | |
| 			       (ulong) page_zip_get_trailer_len(
 | |
| 				       page_zip, dict_index_is_clust(index)),
 | |
| 			       (ulong) page_zip->m_end,
 | |
| 			       (ulong) page_zip_get_size(page_zip),
 | |
| 			       (ulong) dict_index_is_clust(index)));
 | |
| 		return(FALSE);
 | |
| 	}
 | |
| 
 | |
| 	/* Restore the uncompressed columns in heap_no order. */
 | |
| 	storage = page_zip_dir_start_low(page_zip, n_dense);
 | |
| 
 | |
| 	for (slot = 0; slot < n_dense; slot++) {
 | |
| 		rec_t*		rec	= recs[slot];
 | |
| 
 | |
| 		offsets = rec_get_offsets(rec, index, offsets, 0,
 | |
| 					  ULINT_UNDEFINED, &heap);
 | |
| 		/* Non-leaf nodes should not have any externally
 | |
| 		stored columns. */
 | |
| 		ut_ad(!rec_offs_any_extern(offsets));
 | |
| 		storage -= REC_NODE_PTR_SIZE;
 | |
| 
 | |
| 		memcpy(rec_get_end(rec, offsets) - REC_NODE_PTR_SIZE,
 | |
| 		       storage, REC_NODE_PTR_SIZE);
 | |
| 	}
 | |
| 
 | |
| 	return(TRUE);
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Decompress the records of a leaf node of a secondary index.
 | |
| @return TRUE on success, FALSE on failure */
 | |
| static
 | |
| ibool
 | |
| page_zip_decompress_sec(
 | |
| /*====================*/
 | |
| 	page_zip_des_t*	page_zip,	/*!< in/out: compressed page */
 | |
| 	z_stream*	d_stream,	/*!< in/out: compressed page stream */
 | |
| 	rec_t**		recs,		/*!< in: dense page directory
 | |
| 					sorted by address */
 | |
| 	ulint		n_dense,	/*!< in: size of recs[] */
 | |
| 	dict_index_t*	index,		/*!< in: the index of the page */
 | |
| 	rec_offs*	offsets)	/*!< in/out: temporary offsets */
 | |
| {
 | |
| 	ulint	heap_status	= REC_STATUS_ORDINARY
 | |
| 		| PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
 | |
| 	ulint	slot;
 | |
| 
 | |
| 	ut_a(!dict_index_is_clust(index));
 | |
| 
 | |
| 	/* Subtract the space reserved for uncompressed data. */
 | |
| 	d_stream->avail_in -= static_cast<uint>(
 | |
| 		n_dense * PAGE_ZIP_DIR_SLOT_SIZE);
 | |
| 
 | |
| 	for (slot = 0; slot < n_dense; slot++) {
 | |
| 		rec_t*	rec = recs[slot];
 | |
| 
 | |
| 		/* Decompress everything up to this record. */
 | |
| 		d_stream->avail_out = static_cast<uint>(
 | |
| 			rec - REC_N_NEW_EXTRA_BYTES - d_stream->next_out);
 | |
| 
 | |
| 		if (UNIV_LIKELY(d_stream->avail_out)) {
 | |
| 			switch (inflate(d_stream, Z_SYNC_FLUSH)) {
 | |
| 			case Z_STREAM_END:
 | |
| 				page_zip_decompress_heap_no(
 | |
| 					d_stream, rec, heap_status);
 | |
| 				goto zlib_done;
 | |
| 			case Z_OK:
 | |
| 			case Z_BUF_ERROR:
 | |
| 				if (!d_stream->avail_out) {
 | |
| 					break;
 | |
| 				}
 | |
| 				/* fall through */
 | |
| 			default:
 | |
| 				page_zip_fail(("page_zip_decompress_sec:"
 | |
| 					       " inflate(Z_SYNC_FLUSH)=%s\n",
 | |
| 					       d_stream->msg));
 | |
| 				goto zlib_error;
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		if (!page_zip_decompress_heap_no(
 | |
| 			    d_stream, rec, heap_status)) {
 | |
| 			ut_ad(0);
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/* Decompress the data of the last record and any trailing garbage,
 | |
| 	in case the last record was allocated from an originally longer space
 | |
| 	on the free list. */
 | |
| 	d_stream->avail_out = static_cast<uInt>(
 | |
| 		page_header_get_field(page_zip->data, PAGE_HEAP_TOP)
 | |
| 		- page_offset(d_stream->next_out));
 | |
| 	if (UNIV_UNLIKELY(d_stream->avail_out > srv_page_size
 | |
| 			  - PAGE_ZIP_START - PAGE_DIR)) {
 | |
| 
 | |
| 		page_zip_fail(("page_zip_decompress_sec:"
 | |
| 			       " avail_out = %u\n",
 | |
| 			       d_stream->avail_out));
 | |
| 		goto zlib_error;
 | |
| 	}
 | |
| 
 | |
| 	if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
 | |
| 		page_zip_fail(("page_zip_decompress_sec:"
 | |
| 			       " inflate(Z_FINISH)=%s\n",
 | |
| 			       d_stream->msg));
 | |
| zlib_error:
 | |
| 		inflateEnd(d_stream);
 | |
| 		return(FALSE);
 | |
| 	}
 | |
| 
 | |
| 	/* Note that d_stream->avail_out > 0 may hold here
 | |
| 	if the modification log is nonempty. */
 | |
| 
 | |
| zlib_done:
 | |
| 	if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
 | |
| 		ut_error;
 | |
| 	}
 | |
| 
 | |
| 	{
 | |
| 		page_t*	page = page_align(d_stream->next_out);
 | |
| 
 | |
| 		/* Clear the unused heap space on the uncompressed page. */
 | |
| 		memset(d_stream->next_out, 0,
 | |
| 		       ulint(page_dir_get_nth_slot(page,
 | |
| 						   page_dir_get_n_slots(page)
 | |
| 						   - 1U)
 | |
| 			     - d_stream->next_out));
 | |
| 	}
 | |
| 
 | |
| 	ut_d(page_zip->m_start = uint16_t(PAGE_DATA + d_stream->total_in));
 | |
| 
 | |
| 	/* Apply the modification log. */
 | |
| 	{
 | |
| 		const byte*	mod_log_ptr;
 | |
| 		mod_log_ptr = page_zip_apply_log(d_stream->next_in,
 | |
| 						 d_stream->avail_in + 1,
 | |
| 						 recs, n_dense,
 | |
| 						 index->n_fields,
 | |
| 						 ULINT_UNDEFINED, heap_status,
 | |
| 						 index, offsets);
 | |
| 
 | |
| 		if (UNIV_UNLIKELY(!mod_log_ptr)) {
 | |
| 			return(FALSE);
 | |
| 		}
 | |
| 		page_zip->m_end = uint16_t(mod_log_ptr - page_zip->data);
 | |
| 		page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
 | |
| 	}
 | |
| 
 | |
| 	if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, FALSE)
 | |
| 			  + page_zip->m_end >= page_zip_get_size(page_zip))) {
 | |
| 
 | |
| 		page_zip_fail(("page_zip_decompress_sec: %lu + %lu >= %lu\n",
 | |
| 			       (ulong) page_zip_get_trailer_len(
 | |
| 				       page_zip, FALSE),
 | |
| 			       (ulong) page_zip->m_end,
 | |
| 			       (ulong) page_zip_get_size(page_zip)));
 | |
| 		return(FALSE);
 | |
| 	}
 | |
| 
 | |
| 	/* There are no uncompressed columns on leaf pages of
 | |
| 	secondary indexes. */
 | |
| 
 | |
| 	return(TRUE);
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Decompress a record of a leaf node of a clustered index that contains
 | |
| externally stored columns.
 | |
| @return TRUE on success */
 | |
| static
 | |
| ibool
 | |
| page_zip_decompress_clust_ext(
 | |
| /*==========================*/
 | |
| 	z_stream*	d_stream,	/*!< in/out: compressed page stream */
 | |
| 	rec_t*		rec,		/*!< in/out: record */
 | |
| 	const rec_offs*	offsets,	/*!< in: rec_get_offsets(rec) */
 | |
| 	ulint		trx_id_col)	/*!< in: position of of DB_TRX_ID */
 | |
| {
 | |
| 	ulint	i;
 | |
| 
 | |
| 	for (i = 0; i < rec_offs_n_fields(offsets); i++) {
 | |
| 		ulint	len;
 | |
| 		byte*	dst;
 | |
| 
 | |
| 		if (UNIV_UNLIKELY(i == trx_id_col)) {
 | |
| 			/* Skip trx_id and roll_ptr */
 | |
| 			dst = rec_get_nth_field(rec, offsets, i, &len);
 | |
| 			if (UNIV_UNLIKELY(len < DATA_TRX_ID_LEN
 | |
| 					  + DATA_ROLL_PTR_LEN)) {
 | |
| 
 | |
| 				page_zip_fail(("page_zip_decompress_clust_ext:"
 | |
| 					       " len[%lu] = %lu\n",
 | |
| 					       (ulong) i, (ulong) len));
 | |
| 				return(FALSE);
 | |
| 			}
 | |
| 
 | |
| 			if (rec_offs_nth_extern(offsets, i)) {
 | |
| 
 | |
| 				page_zip_fail(("page_zip_decompress_clust_ext:"
 | |
| 					       " DB_TRX_ID at %lu is ext\n",
 | |
| 					       (ulong) i));
 | |
| 				return(FALSE);
 | |
| 			}
 | |
| 
 | |
| 			d_stream->avail_out = static_cast<uInt>(
 | |
| 				dst - d_stream->next_out);
 | |
| 
 | |
| 			switch (inflate(d_stream, Z_SYNC_FLUSH)) {
 | |
| 			case Z_STREAM_END:
 | |
| 			case Z_OK:
 | |
| 			case Z_BUF_ERROR:
 | |
| 				if (!d_stream->avail_out) {
 | |
| 					break;
 | |
| 				}
 | |
| 				/* fall through */
 | |
| 			default:
 | |
| 				page_zip_fail(("page_zip_decompress_clust_ext:"
 | |
| 					       " 1 inflate(Z_SYNC_FLUSH)=%s\n",
 | |
| 					       d_stream->msg));
 | |
| 				return(FALSE);
 | |
| 			}
 | |
| 
 | |
| 			ut_ad(d_stream->next_out == dst);
 | |
| 
 | |
| 			/* Clear DB_TRX_ID and DB_ROLL_PTR in order to
 | |
| 			avoid uninitialized bytes in case the record
 | |
| 			is affected by page_zip_apply_log(). */
 | |
| 			memset(dst, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 | |
| 
 | |
| 			d_stream->next_out += DATA_TRX_ID_LEN
 | |
| 				+ DATA_ROLL_PTR_LEN;
 | |
| 		} else if (rec_offs_nth_extern(offsets, i)) {
 | |
| 			dst = rec_get_nth_field(rec, offsets, i, &len);
 | |
| 			ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
 | |
| 			dst += len - BTR_EXTERN_FIELD_REF_SIZE;
 | |
| 
 | |
| 			d_stream->avail_out = static_cast<uInt>(
 | |
| 				dst - d_stream->next_out);
 | |
| 			switch (inflate(d_stream, Z_SYNC_FLUSH)) {
 | |
| 			case Z_STREAM_END:
 | |
| 			case Z_OK:
 | |
| 			case Z_BUF_ERROR:
 | |
| 				if (!d_stream->avail_out) {
 | |
| 					break;
 | |
| 				}
 | |
| 				/* fall through */
 | |
| 			default:
 | |
| 				page_zip_fail(("page_zip_decompress_clust_ext:"
 | |
| 					       " 2 inflate(Z_SYNC_FLUSH)=%s\n",
 | |
| 					       d_stream->msg));
 | |
| 				return(FALSE);
 | |
| 			}
 | |
| 
 | |
| 			ut_ad(d_stream->next_out == dst);
 | |
| 
 | |
| 			/* Clear the BLOB pointer in case
 | |
| 			the record will be deleted and the
 | |
| 			space will not be reused.  Note that
 | |
| 			the final initialization of the BLOB
 | |
| 			pointers (copying from "externs"
 | |
| 			or clearing) will have to take place
 | |
| 			only after the page modification log
 | |
| 			has been applied.  Otherwise, we
 | |
| 			could end up with an uninitialized
 | |
| 			BLOB pointer when a record is deleted,
 | |
| 			reallocated and deleted. */
 | |
| 			memset(d_stream->next_out, 0,
 | |
| 			       BTR_EXTERN_FIELD_REF_SIZE);
 | |
| 			d_stream->next_out
 | |
| 				+= BTR_EXTERN_FIELD_REF_SIZE;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return(TRUE);
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Compress the records of a leaf node of a clustered index.
 | |
| @return TRUE on success, FALSE on failure */
 | |
| static
 | |
| ibool
 | |
| page_zip_decompress_clust(
 | |
| /*======================*/
 | |
| 	page_zip_des_t*	page_zip,	/*!< in/out: compressed page */
 | |
| 	z_stream*	d_stream,	/*!< in/out: compressed page stream */
 | |
| 	rec_t**		recs,		/*!< in: dense page directory
 | |
| 					sorted by address */
 | |
| 	ulint		n_dense,	/*!< in: size of recs[] */
 | |
| 	dict_index_t*	index,		/*!< in: the index of the page */
 | |
| 	ulint		trx_id_col,	/*!< index of the trx_id column */
 | |
| 	rec_offs*	offsets,	/*!< in/out: temporary offsets */
 | |
| 	mem_heap_t*	heap)		/*!< in: temporary memory heap */
 | |
| {
 | |
| 	int		err;
 | |
| 	ulint		slot;
 | |
| 	ulint		heap_status	= REC_STATUS_ORDINARY
 | |
| 		| PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
 | |
| 	const byte*	storage;
 | |
| 	const byte*	externs;
 | |
| 
 | |
| 	ut_a(dict_index_is_clust(index));
 | |
| 
 | |
| 	/* Subtract the space reserved for uncompressed data. */
 | |
| 	d_stream->avail_in -= static_cast<uInt>(n_dense)
 | |
| 			    * (PAGE_ZIP_CLUST_LEAF_SLOT_SIZE);
 | |
| 
 | |
| 	/* Decompress the records in heap_no order. */
 | |
| 	for (slot = 0; slot < n_dense; slot++) {
 | |
| 		rec_t*	rec	= recs[slot];
 | |
| 
 | |
| 		d_stream->avail_out =static_cast<uInt>(
 | |
| 			rec - REC_N_NEW_EXTRA_BYTES - d_stream->next_out);
 | |
| 
 | |
| 		ut_ad(d_stream->avail_out < srv_page_size
 | |
| 		      - PAGE_ZIP_START - PAGE_DIR);
 | |
| 		err = inflate(d_stream, Z_SYNC_FLUSH);
 | |
| 		switch (err) {
 | |
| 		case Z_STREAM_END:
 | |
| 			page_zip_decompress_heap_no(
 | |
| 				d_stream, rec, heap_status);
 | |
| 			goto zlib_done;
 | |
| 		case Z_OK:
 | |
| 		case Z_BUF_ERROR:
 | |
| 			if (UNIV_LIKELY(!d_stream->avail_out)) {
 | |
| 				break;
 | |
| 			}
 | |
| 			/* fall through */
 | |
| 		default:
 | |
| 			page_zip_fail(("page_zip_decompress_clust:"
 | |
| 				       " 1 inflate(Z_SYNC_FLUSH)=%s\n",
 | |
| 				       d_stream->msg));
 | |
| 			goto zlib_error;
 | |
| 		}
 | |
| 
 | |
| 		if (!page_zip_decompress_heap_no(
 | |
| 			    d_stream, rec, heap_status)) {
 | |
| 			ut_ad(0);
 | |
| 		}
 | |
| 
 | |
| 		/* Read the offsets. The status bits are needed here. */
 | |
| 		offsets = rec_get_offsets(rec, index, offsets, index->n_fields,
 | |
| 					  ULINT_UNDEFINED, &heap);
 | |
| 
 | |
| 		/* This is a leaf page in a clustered index. */
 | |
| 
 | |
| 		/* Check if there are any externally stored columns.
 | |
| 		For each externally stored column, restore the
 | |
| 		BTR_EXTERN_FIELD_REF separately. */
 | |
| 
 | |
| 		if (rec_offs_any_extern(offsets)) {
 | |
| 			if (UNIV_UNLIKELY
 | |
| 			    (!page_zip_decompress_clust_ext(
 | |
| 				    d_stream, rec, offsets, trx_id_col))) {
 | |
| 
 | |
| 				goto zlib_error;
 | |
| 			}
 | |
| 		} else {
 | |
| 			/* Skip trx_id and roll_ptr */
 | |
| 			ulint	len;
 | |
| 			byte*	dst = rec_get_nth_field(rec, offsets,
 | |
| 							trx_id_col, &len);
 | |
| 			if (UNIV_UNLIKELY(len < DATA_TRX_ID_LEN
 | |
| 					  + DATA_ROLL_PTR_LEN)) {
 | |
| 
 | |
| 				page_zip_fail(("page_zip_decompress_clust:"
 | |
| 					       " len = %lu\n", (ulong) len));
 | |
| 				goto zlib_error;
 | |
| 			}
 | |
| 
 | |
| 			d_stream->avail_out = static_cast<uInt>(
 | |
| 				dst - d_stream->next_out);
 | |
| 
 | |
| 			switch (inflate(d_stream, Z_SYNC_FLUSH)) {
 | |
| 			case Z_STREAM_END:
 | |
| 			case Z_OK:
 | |
| 			case Z_BUF_ERROR:
 | |
| 				if (!d_stream->avail_out) {
 | |
| 					break;
 | |
| 				}
 | |
| 				/* fall through */
 | |
| 			default:
 | |
| 				page_zip_fail(("page_zip_decompress_clust:"
 | |
| 					       " 2 inflate(Z_SYNC_FLUSH)=%s\n",
 | |
| 					       d_stream->msg));
 | |
| 				goto zlib_error;
 | |
| 			}
 | |
| 
 | |
| 			ut_ad(d_stream->next_out == dst);
 | |
| 
 | |
| 			/* Clear DB_TRX_ID and DB_ROLL_PTR in order to
 | |
| 			avoid uninitialized bytes in case the record
 | |
| 			is affected by page_zip_apply_log(). */
 | |
| 			memset(dst, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 | |
| 
 | |
| 			d_stream->next_out += DATA_TRX_ID_LEN
 | |
| 				+ DATA_ROLL_PTR_LEN;
 | |
| 		}
 | |
| 
 | |
| 		/* Decompress the last bytes of the record. */
 | |
| 		d_stream->avail_out = static_cast<uInt>(
 | |
| 			rec_get_end(rec, offsets) - d_stream->next_out);
 | |
| 
 | |
| 		switch (inflate(d_stream, Z_SYNC_FLUSH)) {
 | |
| 		case Z_STREAM_END:
 | |
| 		case Z_OK:
 | |
| 		case Z_BUF_ERROR:
 | |
| 			if (!d_stream->avail_out) {
 | |
| 				break;
 | |
| 			}
 | |
| 			/* fall through */
 | |
| 		default:
 | |
| 			page_zip_fail(("page_zip_decompress_clust:"
 | |
| 				       " 3 inflate(Z_SYNC_FLUSH)=%s\n",
 | |
| 				       d_stream->msg));
 | |
| 			goto zlib_error;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/* Decompress any trailing garbage, in case the last record was
 | |
| 	allocated from an originally longer space on the free list. */
 | |
| 	d_stream->avail_out = static_cast<uInt>(
 | |
| 		page_header_get_field(page_zip->data, PAGE_HEAP_TOP)
 | |
| 		- page_offset(d_stream->next_out));
 | |
| 	if (UNIV_UNLIKELY(d_stream->avail_out > srv_page_size
 | |
| 			  - PAGE_ZIP_START - PAGE_DIR)) {
 | |
| 
 | |
| 		page_zip_fail(("page_zip_decompress_clust:"
 | |
| 			       " avail_out = %u\n",
 | |
| 			       d_stream->avail_out));
 | |
| 		goto zlib_error;
 | |
| 	}
 | |
| 
 | |
| 	if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
 | |
| 		page_zip_fail(("page_zip_decompress_clust:"
 | |
| 			       " inflate(Z_FINISH)=%s\n",
 | |
| 			       d_stream->msg));
 | |
| zlib_error:
 | |
| 		inflateEnd(d_stream);
 | |
| 		return(FALSE);
 | |
| 	}
 | |
| 
 | |
| 	/* Note that d_stream->avail_out > 0 may hold here
 | |
| 	if the modification log is nonempty. */
 | |
| 
 | |
| zlib_done:
 | |
| 	if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
 | |
| 		ut_error;
 | |
| 	}
 | |
| 
 | |
| 	{
 | |
| 		page_t*	page = page_align(d_stream->next_out);
 | |
| 
 | |
| 		/* Clear the unused heap space on the uncompressed page. */
 | |
| 		memset(d_stream->next_out, 0,
 | |
| 		       ulint(page_dir_get_nth_slot(page,
 | |
| 						   page_dir_get_n_slots(page)
 | |
| 						   - 1U)
 | |
| 			     - d_stream->next_out));
 | |
| 	}
 | |
| 
 | |
| 	ut_d(page_zip->m_start = uint16_t(PAGE_DATA + d_stream->total_in));
 | |
| 
 | |
| 	/* Apply the modification log. */
 | |
| 	{
 | |
| 		const byte*	mod_log_ptr;
 | |
| 		mod_log_ptr = page_zip_apply_log(d_stream->next_in,
 | |
| 						 d_stream->avail_in + 1,
 | |
| 						 recs, n_dense,
 | |
| 						 index->n_fields,
 | |
| 						 trx_id_col, heap_status,
 | |
| 						 index, offsets);
 | |
| 
 | |
| 		if (UNIV_UNLIKELY(!mod_log_ptr)) {
 | |
| 			return(FALSE);
 | |
| 		}
 | |
| 		page_zip->m_end = uint16_t(mod_log_ptr - page_zip->data);
 | |
| 		page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
 | |
| 	}
 | |
| 
 | |
| 	if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, TRUE)
 | |
| 			  + page_zip->m_end >= page_zip_get_size(page_zip))) {
 | |
| 
 | |
| 		page_zip_fail(("page_zip_decompress_clust: %lu + %lu >= %lu\n",
 | |
| 			       (ulong) page_zip_get_trailer_len(
 | |
| 				       page_zip, TRUE),
 | |
| 			       (ulong) page_zip->m_end,
 | |
| 			       (ulong) page_zip_get_size(page_zip)));
 | |
| 		return(FALSE);
 | |
| 	}
 | |
| 
 | |
| 	storage = page_zip_dir_start_low(page_zip, n_dense);
 | |
| 
 | |
| 	externs = storage - n_dense
 | |
| 		* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 | |
| 
 | |
| 	/* Restore the uncompressed columns in heap_no order. */
 | |
| 
 | |
| 	for (slot = 0; slot < n_dense; slot++) {
 | |
| 		ulint	i;
 | |
| 		ulint	len;
 | |
| 		byte*	dst;
 | |
| 		rec_t*	rec	= recs[slot];
 | |
| 		bool	exists	= !page_zip_dir_find_free(
 | |
| 			page_zip, page_offset(rec));
 | |
| 		offsets = rec_get_offsets(rec, index, offsets, index->n_fields,
 | |
| 					  ULINT_UNDEFINED, &heap);
 | |
| 
 | |
| 		dst = rec_get_nth_field(rec, offsets,
 | |
| 					trx_id_col, &len);
 | |
| 		ut_ad(len >= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 | |
| 		storage -= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
 | |
| 		memcpy(dst, storage,
 | |
| 		       DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 | |
| 
 | |
| 		/* Check if there are any externally stored
 | |
| 		columns in this record.  For each externally
 | |
| 		stored column, restore or clear the
 | |
| 		BTR_EXTERN_FIELD_REF. */
 | |
| 		if (!rec_offs_any_extern(offsets)) {
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 		for (i = 0; i < rec_offs_n_fields(offsets); i++) {
 | |
| 			if (!rec_offs_nth_extern(offsets, i)) {
 | |
| 				continue;
 | |
| 			}
 | |
| 			dst = rec_get_nth_field(rec, offsets, i, &len);
 | |
| 
 | |
| 			if (UNIV_UNLIKELY(len < BTR_EXTERN_FIELD_REF_SIZE)) {
 | |
| 				page_zip_fail(("page_zip_decompress_clust:"
 | |
| 					       " %lu < 20\n",
 | |
| 					       (ulong) len));
 | |
| 				return(FALSE);
 | |
| 			}
 | |
| 
 | |
| 			dst += len - BTR_EXTERN_FIELD_REF_SIZE;
 | |
| 
 | |
| 			if (UNIV_LIKELY(exists)) {
 | |
| 				/* Existing record:
 | |
| 				restore the BLOB pointer */
 | |
| 				externs -= BTR_EXTERN_FIELD_REF_SIZE;
 | |
| 
 | |
| 				if (UNIV_UNLIKELY
 | |
| 				    (externs < page_zip->data
 | |
| 				     + page_zip->m_end)) {
 | |
| 					page_zip_fail(("page_zip_"
 | |
| 						       "decompress_clust:"
 | |
| 						       " %p < %p + %lu\n",
 | |
| 						       (const void*) externs,
 | |
| 						       (const void*)
 | |
| 						       page_zip->data,
 | |
| 						       (ulong)
 | |
| 						       page_zip->m_end));
 | |
| 					return(FALSE);
 | |
| 				}
 | |
| 
 | |
| 				memcpy(dst, externs,
 | |
| 				       BTR_EXTERN_FIELD_REF_SIZE);
 | |
| 
 | |
| 				page_zip->n_blobs++;
 | |
| 			} else {
 | |
| 				/* Deleted record:
 | |
| 				clear the BLOB pointer */
 | |
| 				memset(dst, 0,
 | |
| 				       BTR_EXTERN_FIELD_REF_SIZE);
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return(TRUE);
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Decompress a page.  This function should tolerate errors on the compressed
 | |
| page.  Instead of letting assertions fail, it will return FALSE if an
 | |
| inconsistency is detected.
 | |
| @return TRUE on success, FALSE on failure */
 | |
| static
 | |
| ibool
 | |
| page_zip_decompress_low(
 | |
| /*====================*/
 | |
| 	page_zip_des_t*	page_zip,/*!< in: data, ssize;
 | |
| 				out: m_start, m_end, m_nonempty, n_blobs */
 | |
| 	page_t*		page,	/*!< out: uncompressed page, may be trashed */
 | |
| 	ibool		all)	/*!< in: TRUE=decompress the whole page;
 | |
| 				FALSE=verify but do not copy some
 | |
| 				page header fields that should not change
 | |
| 				after page creation */
 | |
| {
 | |
| 	z_stream	d_stream;
 | |
| 	dict_index_t*	index	= NULL;
 | |
| 	rec_t**		recs;	/*!< dense page directory, sorted by address */
 | |
| 	ulint		n_dense;/* number of user records on the page */
 | |
| 	ulint		trx_id_col = ULINT_UNDEFINED;
 | |
| 	mem_heap_t*	heap;
 | |
| 	rec_offs*	offsets;
 | |
| 
 | |
| 	ut_ad(page_zip_simple_validate(page_zip));
 | |
| 	MEM_CHECK_ADDRESSABLE(page, srv_page_size);
 | |
| 	MEM_CHECK_DEFINED(page_zip->data, page_zip_get_size(page_zip));
 | |
| 
 | |
| 	/* The dense directory excludes the infimum and supremum records. */
 | |
| 	n_dense = page_dir_get_n_heap(page_zip->data) - PAGE_HEAP_NO_USER_LOW;
 | |
| 	if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE
 | |
| 			  >= page_zip_get_size(page_zip))) {
 | |
| 		page_zip_fail(("page_zip_decompress 1: %lu %lu\n",
 | |
| 			       (ulong) n_dense,
 | |
| 			       (ulong) page_zip_get_size(page_zip)));
 | |
| 		return(FALSE);
 | |
| 	}
 | |
| 
 | |
| 	heap = mem_heap_create(n_dense * (3 * sizeof *recs) + srv_page_size);
 | |
| 
 | |
| 	recs = static_cast<rec_t**>(
 | |
| 		mem_heap_alloc(heap, n_dense * sizeof *recs));
 | |
| 
 | |
| 	if (all) {
 | |
| 		/* Copy the page header. */
 | |
| 		memcpy_aligned<2>(page, page_zip->data, PAGE_DATA);
 | |
| 	} else {
 | |
| 		/* Check that the bytes that we skip are identical. */
 | |
| #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
 | |
| 		ut_a(!memcmp(FIL_PAGE_TYPE + page,
 | |
| 			     FIL_PAGE_TYPE + page_zip->data,
 | |
| 			     PAGE_HEADER - FIL_PAGE_TYPE));
 | |
| 		ut_a(!memcmp(PAGE_HEADER + PAGE_LEVEL + page,
 | |
| 			     PAGE_HEADER + PAGE_LEVEL + page_zip->data,
 | |
| 			     PAGE_DATA - (PAGE_HEADER + PAGE_LEVEL)));
 | |
| #endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
 | |
| 
 | |
| 		/* Copy the mutable parts of the page header. */
 | |
| 		memcpy_aligned<8>(page, page_zip->data, FIL_PAGE_TYPE);
 | |
| 		memcpy_aligned<2>(PAGE_HEADER + page,
 | |
| 				  PAGE_HEADER + page_zip->data,
 | |
| 				  PAGE_LEVEL - PAGE_N_DIR_SLOTS);
 | |
| 
 | |
| #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
 | |
| 		/* Check that the page headers match after copying. */
 | |
| 		ut_a(!memcmp(page, page_zip->data, PAGE_DATA));
 | |
| #endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
 | |
| 	}
 | |
| 
 | |
| #ifdef UNIV_ZIP_DEBUG
 | |
| 	/* Clear the uncompressed page, except the header. */
 | |
| 	memset(PAGE_DATA + page, 0x55, srv_page_size - PAGE_DATA);
 | |
| #endif /* UNIV_ZIP_DEBUG */
 | |
| 	MEM_UNDEFINED(PAGE_DATA + page, srv_page_size - PAGE_DATA);
 | |
| 
 | |
| 	/* Copy the page directory. */
 | |
| 	if (UNIV_UNLIKELY(!page_zip_dir_decode(page_zip, page, recs,
 | |
| 					       n_dense))) {
 | |
| zlib_error:
 | |
| 		mem_heap_free(heap);
 | |
| 		return(FALSE);
 | |
| 	}
 | |
| 
 | |
| 	/* Copy the infimum and supremum records. */
 | |
| 	memcpy(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES),
 | |
| 	       infimum_extra, sizeof infimum_extra);
 | |
| 	if (page_is_empty(page)) {
 | |
| 		rec_set_next_offs_new(page + PAGE_NEW_INFIMUM,
 | |
| 				      PAGE_NEW_SUPREMUM);
 | |
| 	} else {
 | |
| 		rec_set_next_offs_new(page + PAGE_NEW_INFIMUM,
 | |
| 				      page_zip_dir_get(page_zip, 0)
 | |
| 				      & PAGE_ZIP_DIR_SLOT_MASK);
 | |
| 	}
 | |
| 	memcpy(page + PAGE_NEW_INFIMUM, infimum_data, sizeof infimum_data);
 | |
| 	memcpy_aligned<4>(PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1
 | |
| 			  + page, supremum_extra_data,
 | |
| 			  sizeof supremum_extra_data);
 | |
| 
 | |
| 	page_zip_set_alloc(&d_stream, heap);
 | |
| 
 | |
| 	d_stream.next_in = page_zip->data + PAGE_DATA;
 | |
| 	/* Subtract the space reserved for
 | |
| 	the page header and the end marker of the modification log. */
 | |
| 	d_stream.avail_in = static_cast<uInt>(
 | |
| 		page_zip_get_size(page_zip) - (PAGE_DATA + 1));
 | |
| 	d_stream.next_out = page + PAGE_ZIP_START;
 | |
| 	d_stream.avail_out = uInt(srv_page_size - PAGE_ZIP_START);
 | |
| 
 | |
| 	if (UNIV_UNLIKELY(inflateInit2(&d_stream, int(srv_page_size_shift))
 | |
| 			  != Z_OK)) {
 | |
| 		ut_error;
 | |
| 	}
 | |
| 
 | |
| 	/* Decode the zlib header and the index information. */
 | |
| 	if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) {
 | |
| 
 | |
| 		page_zip_fail(("page_zip_decompress:"
 | |
| 			       " 1 inflate(Z_BLOCK)=%s\n", d_stream.msg));
 | |
| 		goto zlib_error;
 | |
| 	}
 | |
| 
 | |
| 	if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) {
 | |
| 
 | |
| 		page_zip_fail(("page_zip_decompress:"
 | |
| 			       " 2 inflate(Z_BLOCK)=%s\n", d_stream.msg));
 | |
| 		goto zlib_error;
 | |
| 	}
 | |
| 
 | |
| 	index = page_zip_fields_decode(
 | |
| 		page + PAGE_ZIP_START, d_stream.next_out,
 | |
| 		page_is_leaf(page) ? &trx_id_col : NULL,
 | |
| 		fil_page_get_type(page) == FIL_PAGE_RTREE);
 | |
| 
 | |
| 	if (UNIV_UNLIKELY(!index)) {
 | |
| 
 | |
| 		goto zlib_error;
 | |
| 	}
 | |
| 
 | |
| 	/* Decompress the user records. */
 | |
| 	page_zip->n_blobs = 0;
 | |
| 	d_stream.next_out = page + PAGE_ZIP_START;
 | |
| 
 | |
| 	{
 | |
| 		/* Pre-allocate the offsets for rec_get_offsets_reverse(). */
 | |
| 		ulint	n = 1 + 1/* node ptr */ + REC_OFFS_HEADER_SIZE
 | |
| 			+ dict_index_get_n_fields(index);
 | |
| 
 | |
| 		offsets = static_cast<rec_offs*>(
 | |
| 			mem_heap_alloc(heap, n * sizeof(ulint)));
 | |
| 
 | |
| 		rec_offs_set_n_alloc(offsets, n);
 | |
| 	}
 | |
| 
 | |
| 	/* Decompress the records in heap_no order. */
 | |
| 	if (!page_is_leaf(page)) {
 | |
| 		/* This is a node pointer page. */
 | |
| 		ulint	info_bits;
 | |
| 
 | |
| 		if (UNIV_UNLIKELY
 | |
| 		    (!page_zip_decompress_node_ptrs(page_zip, &d_stream,
 | |
| 						    recs, n_dense, index,
 | |
| 						    offsets, heap))) {
 | |
| 			goto err_exit;
 | |
| 		}
 | |
| 
 | |
| 		info_bits = page_has_prev(page) ? 0 : REC_INFO_MIN_REC_FLAG;
 | |
| 
 | |
| 		if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip, page,
 | |
| 							    info_bits))) {
 | |
| 			goto err_exit;
 | |
| 		}
 | |
| 	} else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
 | |
| 		/* This is a leaf page in a secondary index. */
 | |
| 		if (UNIV_UNLIKELY(!page_zip_decompress_sec(page_zip, &d_stream,
 | |
| 							   recs, n_dense,
 | |
| 							   index, offsets))) {
 | |
| 			goto err_exit;
 | |
| 		}
 | |
| 
 | |
| 		if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip,
 | |
| 							    page, 0))) {
 | |
| err_exit:
 | |
| 			page_zip_fields_free(index);
 | |
| 			mem_heap_free(heap);
 | |
| 			return(FALSE);
 | |
| 		}
 | |
| 	} else {
 | |
| 		/* This is a leaf page in a clustered index. */
 | |
| 		if (UNIV_UNLIKELY(!page_zip_decompress_clust(page_zip,
 | |
| 							     &d_stream, recs,
 | |
| 							     n_dense, index,
 | |
| 							     trx_id_col,
 | |
| 							     offsets, heap))) {
 | |
| 			goto err_exit;
 | |
| 		}
 | |
| 
 | |
| 		if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip,
 | |
| 							    page, 0))) {
 | |
| 			goto err_exit;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	ut_a(page_is_comp(page));
 | |
| 	MEM_CHECK_DEFINED(page, srv_page_size);
 | |
| 
 | |
| 	page_zip_fields_free(index);
 | |
| 	mem_heap_free(heap);
 | |
| 
 | |
| 	return(TRUE);
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Decompress a page.  This function should tolerate errors on the compressed
 | |
| page.  Instead of letting assertions fail, it will return FALSE if an
 | |
| inconsistency is detected.
 | |
| @return TRUE on success, FALSE on failure */
 | |
| ibool
 | |
| page_zip_decompress(
 | |
| /*================*/
 | |
| 	page_zip_des_t*	page_zip,/*!< in: data, ssize;
 | |
| 				out: m_start, m_end, m_nonempty, n_blobs */
 | |
| 	page_t*		page,	/*!< out: uncompressed page, may be trashed */
 | |
| 	ibool		all)	/*!< in: TRUE=decompress the whole page;
 | |
| 				FALSE=verify but do not copy some
 | |
| 				page header fields that should not change
 | |
| 				after page creation */
 | |
| {
 | |
| 	const ulonglong ns = my_interval_timer();
 | |
| 
 | |
| 	if (!page_zip_decompress_low(page_zip, page, all)) {
 | |
| 		return(FALSE);
 | |
| 	}
 | |
| 
 | |
| 	const uint64_t time_diff = (my_interval_timer() - ns) / 1000;
 | |
| 	page_zip_stat[page_zip->ssize - 1].decompressed++;
 | |
| 	page_zip_stat[page_zip->ssize - 1].decompressed_usec += time_diff;
 | |
| 
 | |
| 	index_id_t	index_id = btr_page_get_index_id(page);
 | |
| 
 | |
| 	if (srv_cmp_per_index_enabled) {
 | |
| 		mysql_mutex_lock(&page_zip_stat_per_index_mutex);
 | |
| 		page_zip_stat_per_index[index_id].decompressed++;
 | |
| 		page_zip_stat_per_index[index_id].decompressed_usec += time_diff;
 | |
| 		mysql_mutex_unlock(&page_zip_stat_per_index_mutex);
 | |
| 	}
 | |
| 
 | |
| 	/* Update the stat counter for LRU policy. */
 | |
| 	buf_LRU_stat_inc_unzip();
 | |
| 
 | |
| 	MONITOR_INC(MONITOR_PAGE_DECOMPRESS);
 | |
| 
 | |
| 	return(TRUE);
 | |
| }
 | |
| 
 | |
| #ifdef UNIV_ZIP_DEBUG
 | |
| /**********************************************************************//**
 | |
| Dump a block of memory on the standard error stream. */
 | |
| static
 | |
| void
 | |
| page_zip_hexdump_func(
 | |
| /*==================*/
 | |
| 	const char*	name,	/*!< in: name of the data structure */
 | |
| 	const void*	buf,	/*!< in: data */
 | |
| 	ulint		size)	/*!< in: length of the data, in bytes */
 | |
| {
 | |
| 	const byte*	s	= static_cast<const byte*>(buf);
 | |
| 	ulint		addr;
 | |
| 	const ulint	width	= 32; /* bytes per line */
 | |
| 
 | |
| 	fprintf(stderr, "%s:\n", name);
 | |
| 
 | |
| 	for (addr = 0; addr < size; addr += width) {
 | |
| 		ulint	i;
 | |
| 
 | |
| 		fprintf(stderr, "%04lx ", (ulong) addr);
 | |
| 
 | |
| 		i = ut_min(width, size - addr);
 | |
| 
 | |
| 		while (i--) {
 | |
| 			fprintf(stderr, "%02x", *s++);
 | |
| 		}
 | |
| 
 | |
| 		putc('\n', stderr);
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /** Dump a block of memory on the standard error stream.
 | |
| @param buf in: data
 | |
| @param size in: length of the data, in bytes */
 | |
| #define page_zip_hexdump(buf, size) page_zip_hexdump_func(#buf, buf, size)
 | |
| 
 | |
| /** Flag: make page_zip_validate() compare page headers only */
 | |
| bool	page_zip_validate_header_only;
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Check that the compressed and decompressed pages match.
 | |
| @return TRUE if valid, FALSE if not */
 | |
| ibool
 | |
| page_zip_validate_low(
 | |
| /*==================*/
 | |
| 	const page_zip_des_t*	page_zip,/*!< in: compressed page */
 | |
| 	const page_t*		page,	/*!< in: uncompressed page */
 | |
| 	const dict_index_t*	index,	/*!< in: index of the page, if known */
 | |
| 	ibool			sloppy)	/*!< in: FALSE=strict,
 | |
| 					TRUE=ignore the MIN_REC_FLAG */
 | |
| {
 | |
| 	ibool		valid;
 | |
| 
 | |
| 	if (memcmp(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
 | |
| 		   FIL_PAGE_LSN - FIL_PAGE_PREV)
 | |
| 	    || memcmp(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE, 2)
 | |
| 	    || memcmp(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
 | |
| 		      PAGE_ROOT_AUTO_INC)
 | |
| 	    /* The PAGE_ROOT_AUTO_INC can be updated while holding an SX-latch
 | |
| 	    on the clustered index root page (page number 3 in .ibd files).
 | |
| 	    That allows concurrent readers (holding buf_block_t::lock S-latch).
 | |
| 	    Because we do not know what type of a latch our caller is holding,
 | |
| 	    we will ignore the field on clustered index root pages in order
 | |
| 	    to avoid false positives. */
 | |
| 	    || (page_get_page_no(page) != 3/* clustered index root page */
 | |
| 		&& memcmp(&page_zip->data[FIL_PAGE_DATA + PAGE_ROOT_AUTO_INC],
 | |
| 			  &page[FIL_PAGE_DATA + PAGE_ROOT_AUTO_INC], 8))
 | |
| 	    || memcmp(&page_zip->data[FIL_PAGE_DATA + PAGE_HEADER_PRIV_END],
 | |
| 		      &page[FIL_PAGE_DATA + PAGE_HEADER_PRIV_END],
 | |
| 		      PAGE_DATA - FIL_PAGE_DATA - PAGE_HEADER_PRIV_END)) {
 | |
| 		page_zip_fail(("page_zip_validate: page header\n"));
 | |
| 		page_zip_hexdump(page_zip, sizeof *page_zip);
 | |
| 		page_zip_hexdump(page_zip->data, page_zip_get_size(page_zip));
 | |
| 		page_zip_hexdump(page, srv_page_size);
 | |
| 		return(FALSE);
 | |
| 	}
 | |
| 
 | |
| 	ut_a(page_is_comp(page));
 | |
| 
 | |
| 	if (page_zip_validate_header_only) {
 | |
| 		return(TRUE);
 | |
| 	}
 | |
| 
 | |
| 	/* page_zip_decompress() expects the uncompressed page to be
 | |
| 	srv_page_size aligned. */
 | |
| 	page_t* temp_page = static_cast<byte*>(aligned_malloc(srv_page_size,
 | |
| 							      srv_page_size));
 | |
| 
 | |
| 	MEM_CHECK_DEFINED(page, srv_page_size);
 | |
| 	MEM_CHECK_DEFINED(page_zip->data, page_zip_get_size(page_zip));
 | |
| 
 | |
| 	page_zip_des_t temp_page_zip(*page_zip);
 | |
| 	valid = page_zip_decompress_low(&temp_page_zip, temp_page, TRUE);
 | |
| 	if (!valid) {
 | |
| 		fputs("page_zip_validate(): failed to decompress\n", stderr);
 | |
| 		goto func_exit;
 | |
| 	}
 | |
| 	if (page_zip->n_blobs != temp_page_zip.n_blobs) {
 | |
| 		page_zip_fail(("page_zip_validate: n_blobs: %u!=%u\n",
 | |
| 			       page_zip->n_blobs, temp_page_zip.n_blobs));
 | |
| 		valid = FALSE;
 | |
| 	}
 | |
| #ifdef UNIV_DEBUG
 | |
| 	if (page_zip->m_start != temp_page_zip.m_start) {
 | |
| 		page_zip_fail(("page_zip_validate: m_start: %u!=%u\n",
 | |
| 			       page_zip->m_start, temp_page_zip.m_start));
 | |
| 		valid = FALSE;
 | |
| 	}
 | |
| #endif /* UNIV_DEBUG */
 | |
| 	if (page_zip->m_end != temp_page_zip.m_end) {
 | |
| 		page_zip_fail(("page_zip_validate: m_end: %u!=%u\n",
 | |
| 			       page_zip->m_end, temp_page_zip.m_end));
 | |
| 		valid = FALSE;
 | |
| 	}
 | |
| 	if (page_zip->m_nonempty != temp_page_zip.m_nonempty) {
 | |
| 		page_zip_fail(("page_zip_validate(): m_nonempty: %u!=%u\n",
 | |
| 			       page_zip->m_nonempty,
 | |
| 			       temp_page_zip.m_nonempty));
 | |
| 		valid = FALSE;
 | |
| 	}
 | |
| 	if (memcmp(page + PAGE_HEADER, temp_page + PAGE_HEADER,
 | |
| 		   srv_page_size - PAGE_HEADER - FIL_PAGE_DATA_END)) {
 | |
| 
 | |
| 		/* In crash recovery, the "minimum record" flag may be
 | |
| 		set incorrectly until the mini-transaction is
 | |
| 		committed.  Let us tolerate that difference when we
 | |
| 		are performing a sloppy validation. */
 | |
| 
 | |
| 		rec_offs*	offsets;
 | |
| 		mem_heap_t*	heap;
 | |
| 		const rec_t*	rec;
 | |
| 		const rec_t*	trec;
 | |
| 		byte		info_bits_diff;
 | |
| 		ulint		offset
 | |
| 			= rec_get_next_offs(page + PAGE_NEW_INFIMUM, TRUE);
 | |
| 		ut_a(offset >= PAGE_NEW_SUPREMUM);
 | |
| 		offset -= 5/*REC_NEW_INFO_BITS*/;
 | |
| 
 | |
| 		info_bits_diff = page[offset] ^ temp_page[offset];
 | |
| 
 | |
| 		if (info_bits_diff == REC_INFO_MIN_REC_FLAG) {
 | |
| 			temp_page[offset] = page[offset];
 | |
| 
 | |
| 			if (!memcmp(page + PAGE_HEADER,
 | |
| 				    temp_page + PAGE_HEADER,
 | |
| 				    srv_page_size - PAGE_HEADER
 | |
| 				    - FIL_PAGE_DATA_END)) {
 | |
| 
 | |
| 				/* Only the minimum record flag
 | |
| 				differed.  Let us ignore it. */
 | |
| 				page_zip_fail(("page_zip_validate:"
 | |
| 					       " min_rec_flag"
 | |
| 					       " (%s" UINT32PF "," UINT32PF
 | |
| 					       ",0x%02x)\n",
 | |
| 					       sloppy ? "ignored, " : "",
 | |
| 					       page_get_space_id(page),
 | |
| 					       page_get_page_no(page),
 | |
| 					       page[offset]));
 | |
| 				/* We don't check for spatial index, since
 | |
| 				the "minimum record" could be deleted when
 | |
| 				doing rtr_update_mbr_field.
 | |
| 				GIS_FIXME: need to validate why
 | |
| 				rtr_update_mbr_field.() could affect this */
 | |
| 				if (index && dict_index_is_spatial(index)) {
 | |
| 					valid = true;
 | |
| 				} else {
 | |
| 					valid = sloppy;
 | |
| 				}
 | |
| 				goto func_exit;
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		/* Compare the pointers in the PAGE_FREE list. */
 | |
| 		rec = page_header_get_ptr(page, PAGE_FREE);
 | |
| 		trec = page_header_get_ptr(temp_page, PAGE_FREE);
 | |
| 
 | |
| 		while (rec || trec) {
 | |
| 			if (page_offset(rec) != page_offset(trec)) {
 | |
| 				page_zip_fail(("page_zip_validate:"
 | |
| 					       " PAGE_FREE list: %u!=%u\n",
 | |
| 					       (unsigned) page_offset(rec),
 | |
| 					       (unsigned) page_offset(trec)));
 | |
| 				valid = FALSE;
 | |
| 				goto func_exit;
 | |
| 			}
 | |
| 
 | |
| 			rec = page_rec_next_get<true>(page, rec);
 | |
| 			trec = page_rec_next_get<true>(temp_page, trec);
 | |
| 		}
 | |
| 
 | |
| 		/* Compare the records. */
 | |
| 		heap = NULL;
 | |
| 		offsets = NULL;
 | |
| 		rec = page_rec_next_get<true>(page, page + PAGE_NEW_INFIMUM);
 | |
| 		trec = page_rec_next_get<true>(temp_page,
 | |
| 					       temp_page + PAGE_NEW_INFIMUM);
 | |
| 		const ulint n_core = (index && page_is_leaf(page))
 | |
| 			? index->n_fields : 0;
 | |
| 
 | |
| 		do {
 | |
| 			if (page_offset(rec) != page_offset(trec)) {
 | |
| 				page_zip_fail(("page_zip_validate:"
 | |
| 					       " record list: 0x%02x!=0x%02x\n",
 | |
| 					       (unsigned) page_offset(rec),
 | |
| 					       (unsigned) page_offset(trec)));
 | |
| 				valid = FALSE;
 | |
| 				break;
 | |
| 			}
 | |
| 
 | |
| 			if (index) {
 | |
| 				/* Compare the data. */
 | |
| 				offsets = rec_get_offsets(
 | |
| 					rec, index, offsets, n_core,
 | |
| 					ULINT_UNDEFINED, &heap);
 | |
| 
 | |
| 				if (memcmp(rec - rec_offs_extra_size(offsets),
 | |
| 					   trec - rec_offs_extra_size(offsets),
 | |
| 					   rec_offs_size(offsets))) {
 | |
| 					page_zip_fail(
 | |
| 						("page_zip_validate:"
 | |
| 						 " record content: 0x%02x",
 | |
| 						 (unsigned) page_offset(rec)));
 | |
| 					valid = FALSE;
 | |
| 					break;
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| 			rec = page_rec_next_get<true>(page, rec);
 | |
| 			trec = page_rec_next_get<true>(temp_page, trec);
 | |
| 		} while (rec || trec);
 | |
| 
 | |
| 		if (heap) {
 | |
| 			mem_heap_free(heap);
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| func_exit:
 | |
| 	if (!valid) {
 | |
| 		page_zip_hexdump(page_zip, sizeof *page_zip);
 | |
| 		page_zip_hexdump(page_zip->data, page_zip_get_size(page_zip));
 | |
| 		page_zip_hexdump(page, srv_page_size);
 | |
| 		page_zip_hexdump(temp_page, srv_page_size);
 | |
| 	}
 | |
| 	aligned_free(temp_page);
 | |
| 	return(valid);
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Check that the compressed and decompressed pages match.
 | |
| @return TRUE if valid, FALSE if not */
 | |
| ibool
 | |
| page_zip_validate(
 | |
| /*==============*/
 | |
| 	const page_zip_des_t*	page_zip,/*!< in: compressed page */
 | |
| 	const page_t*		page,	/*!< in: uncompressed page */
 | |
| 	const dict_index_t*	index)	/*!< in: index of the page, if known */
 | |
| {
 | |
| 	return(page_zip_validate_low(page_zip, page, index,
 | |
| 				     recv_recovery_is_on()));
 | |
| }
 | |
| #endif /* UNIV_ZIP_DEBUG */
 | |
| 
 | |
| #ifdef UNIV_DEBUG
 | |
| /**********************************************************************//**
 | |
| Assert that the compressed and decompressed page headers match.
 | |
| @return TRUE */
 | |
| static
 | |
| ibool
 | |
| page_zip_header_cmp(
 | |
| /*================*/
 | |
| 	const page_zip_des_t*	page_zip,/*!< in: compressed page */
 | |
| 	const byte*		page)	/*!< in: uncompressed page */
 | |
| {
 | |
| 	ut_ad(!memcmp(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
 | |
| 		      FIL_PAGE_LSN - FIL_PAGE_PREV));
 | |
| 	ut_ad(!memcmp(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE,
 | |
| 		      2));
 | |
| 	ut_ad(!memcmp(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
 | |
| 		      PAGE_DATA - FIL_PAGE_DATA));
 | |
| 
 | |
| 	return(TRUE);
 | |
| }
 | |
| #endif /* UNIV_DEBUG */
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Write a record on the compressed page that contains externally stored
 | |
| columns.  The data must already have been written to the uncompressed page.
 | |
| @return end of modification log */
 | |
| static
 | |
| byte*
 | |
| page_zip_write_rec_ext(
 | |
| /*===================*/
 | |
| 	buf_block_t*	block,		/*!< in/out: compressed page */
 | |
| 	const byte*	rec,		/*!< in: record being written */
 | |
| 	const dict_index_t*index,	/*!< in: record descriptor */
 | |
| 	const rec_offs*	offsets,	/*!< in: rec_get_offsets(rec, index) */
 | |
| 	ulint		create,		/*!< in: nonzero=insert, zero=update */
 | |
| 	ulint		trx_id_col,	/*!< in: position of DB_TRX_ID */
 | |
| 	ulint		heap_no,	/*!< in: heap number of rec */
 | |
| 	byte*		storage,	/*!< in: end of dense page directory */
 | |
| 	byte*		data,		/*!< in: end of modification log */
 | |
| 	mtr_t*		mtr)		/*!< in/out: mini-transaction */
 | |
| {
 | |
| 	const byte*	start	= rec;
 | |
| 	ulint		i;
 | |
| 	ulint		len;
 | |
| 	byte*		externs	= storage;
 | |
| 	ulint		n_ext	= rec_offs_n_extern(offsets);
 | |
| 	const page_t* const page = block->page.frame;
 | |
| 	page_zip_des_t* const page_zip = &block->page.zip;
 | |
| 
 | |
| 	ut_ad(rec_offs_validate(rec, index, offsets));
 | |
| 	MEM_CHECK_DEFINED(rec, rec_offs_data_size(offsets));
 | |
| 	MEM_CHECK_DEFINED(rec - rec_offs_extra_size(offsets),
 | |
| 			  rec_offs_extra_size(offsets));
 | |
| 
 | |
| 	externs -= (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
 | |
| 		* (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW);
 | |
| 
 | |
| 	/* Note that this will not take into account
 | |
| 	the BLOB columns of rec if create==TRUE. */
 | |
| 	ut_ad(data + rec_offs_data_size(offsets)
 | |
| 	      - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
 | |
| 	      - n_ext * FIELD_REF_SIZE
 | |
| 	      < externs - FIELD_REF_SIZE * page_zip->n_blobs);
 | |
| 
 | |
| 	if (n_ext) {
 | |
| 		ulint	blob_no = page_zip_get_n_prev_extern(
 | |
| 			page_zip, rec, index);
 | |
| 		byte*	ext_end = externs - page_zip->n_blobs * FIELD_REF_SIZE;
 | |
| 		ut_ad(blob_no <= page_zip->n_blobs);
 | |
| 		externs -= blob_no * FIELD_REF_SIZE;
 | |
| 
 | |
| 		if (create) {
 | |
| 			page_zip->n_blobs = (page_zip->n_blobs + n_ext)
 | |
| 				& ((1U << 12) - 1);
 | |
| 			ASSERT_ZERO_BLOB(ext_end - n_ext * FIELD_REF_SIZE);
 | |
| 			if (ulint len = ulint(externs - ext_end)) {
 | |
| 				byte* ext_start = ext_end
 | |
| 					- n_ext * FIELD_REF_SIZE;
 | |
| 				memmove(ext_start, ext_end, len);
 | |
| 				mtr->memmove(*block,
 | |
| 					     ext_start - page_zip->data,
 | |
| 					     ext_end - page_zip->data, len);
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		ut_a(blob_no + n_ext <= page_zip->n_blobs);
 | |
| 	}
 | |
| 
 | |
| 	for (i = 0; i < rec_offs_n_fields(offsets); i++) {
 | |
| 		const byte*	src;
 | |
| 
 | |
| 		if (UNIV_UNLIKELY(i == trx_id_col)) {
 | |
| 			ut_ad(!rec_offs_nth_extern(offsets,
 | |
| 						   i));
 | |
| 			ut_ad(!rec_offs_nth_extern(offsets,
 | |
| 						   i + 1));
 | |
| 			/* Locate trx_id and roll_ptr. */
 | |
| 			src = rec_get_nth_field(rec, offsets,
 | |
| 						i, &len);
 | |
| 			ut_ad(len == DATA_TRX_ID_LEN);
 | |
| 			ut_ad(src + DATA_TRX_ID_LEN
 | |
| 			      == rec_get_nth_field(
 | |
| 				      rec, offsets,
 | |
| 				      i + 1, &len));
 | |
| 			ut_ad(len == DATA_ROLL_PTR_LEN);
 | |
| 
 | |
| 			/* Log the preceding fields. */
 | |
| 			ASSERT_ZERO(data, src - start);
 | |
| 			memcpy(data, start, ulint(src - start));
 | |
| 			data += src - start;
 | |
| 			start = src + (DATA_TRX_ID_LEN
 | |
| 				       + DATA_ROLL_PTR_LEN);
 | |
| 
 | |
| 			/* Store trx_id and roll_ptr. */
 | |
| 			constexpr ulint sys_len = DATA_TRX_ID_LEN
 | |
| 				+ DATA_ROLL_PTR_LEN;
 | |
| 			byte* sys = storage - sys_len * (heap_no - 1);
 | |
| 			memcpy(sys, src, sys_len);
 | |
| 			i++; /* skip also roll_ptr */
 | |
| 			mtr->zmemcpy(*block, sys - page_zip->data, sys_len);
 | |
| 		} else if (rec_offs_nth_extern(offsets, i)) {
 | |
| 			src = rec_get_nth_field(rec, offsets,
 | |
| 						i, &len);
 | |
| 
 | |
| 			ut_ad(dict_index_is_clust(index));
 | |
| 			ut_ad(len >= FIELD_REF_SIZE);
 | |
| 			src += len - FIELD_REF_SIZE;
 | |
| 
 | |
| 			ASSERT_ZERO(data, src - start);
 | |
| 			memcpy(data, start, ulint(src - start));
 | |
| 			data += src - start;
 | |
| 			start = src + FIELD_REF_SIZE;
 | |
| 
 | |
| 			/* Store the BLOB pointer. */
 | |
| 			externs -= FIELD_REF_SIZE;
 | |
| 			ut_ad(data < externs);
 | |
| 			memcpy(externs, src, FIELD_REF_SIZE);
 | |
| 			mtr->zmemcpy(*block, externs - page_zip->data,
 | |
| 				     FIELD_REF_SIZE);
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/* Log the last bytes of the record. */
 | |
| 	len = rec_offs_data_size(offsets) - ulint(start - rec);
 | |
| 
 | |
| 	ASSERT_ZERO(data, len);
 | |
| 	memcpy(data, start, len);
 | |
| 	data += len;
 | |
| 
 | |
| 	return(data);
 | |
| }
 | |
| 
 | |
| /** Write an entire record to the ROW_FORMAT=COMPRESSED page.
 | |
| The data must already have been written to the uncompressed page.
 | |
| @param[in,out]	block		ROW_FORMAT=COMPRESSED page
 | |
| @param[in]	rec		record in the uncompressed page
 | |
| @param[in]	index		the index that the page belongs to
 | |
| @param[in]	offsets		rec_get_offsets(rec, index)
 | |
| @param[in]	create		nonzero=insert, zero=update
 | |
| @param[in,out]	mtr		mini-transaction */
 | |
| void page_zip_write_rec(buf_block_t *block, const byte *rec,
 | |
|                         const dict_index_t *index, const rec_offs *offsets,
 | |
|                         ulint create, mtr_t *mtr)
 | |
| {
 | |
| 	const page_t* const page = block->page.frame;
 | |
| 	page_zip_des_t* const page_zip = &block->page.zip;
 | |
| 	byte*		data;
 | |
| 	byte*		storage;
 | |
| 	ulint		heap_no;
 | |
| 	byte*		slot;
 | |
| 
 | |
| 	ut_ad(page_zip_simple_validate(page_zip));
 | |
| 	ut_ad(page_zip_get_size(page_zip)
 | |
| 	      > PAGE_DATA + page_zip_dir_size(page_zip));
 | |
| 	ut_ad(rec_offs_comp(offsets));
 | |
| 	ut_ad(rec_offs_validate(rec, index, offsets));
 | |
| 
 | |
| 	ut_ad(page_zip->m_start >= PAGE_DATA);
 | |
| 
 | |
| 	ut_ad(page_zip_header_cmp(page_zip, page));
 | |
| 	ut_ad(page_simple_validate_new((page_t*) page));
 | |
| 
 | |
| 	MEM_CHECK_DEFINED(page_zip->data, page_zip_get_size(page_zip));
 | |
| 	MEM_CHECK_DEFINED(rec, rec_offs_data_size(offsets));
 | |
| 	MEM_CHECK_DEFINED(rec - rec_offs_extra_size(offsets),
 | |
| 			  rec_offs_extra_size(offsets));
 | |
| 
 | |
| 	slot = page_zip_dir_find(page_zip, page_offset(rec));
 | |
| 	ut_a(slot);
 | |
| 	byte s = *slot;
 | |
| 	/* Copy the delete mark. */
 | |
| 	if (rec_get_deleted_flag(rec, TRUE)) {
 | |
| 		/* In delete-marked records, DB_TRX_ID must
 | |
| 		always refer to an existing undo log record.
 | |
| 		On non-leaf pages, the delete-mark flag is garbage. */
 | |
| 		ut_ad(!index->is_primary() || !page_is_leaf(page)
 | |
| 		      || row_get_rec_trx_id(rec, index, offsets));
 | |
| 		s |= PAGE_ZIP_DIR_SLOT_DEL >> 8;
 | |
| 	} else {
 | |
| 		s &= byte(~(PAGE_ZIP_DIR_SLOT_DEL >> 8));
 | |
| 	}
 | |
| 
 | |
| 	if (s != *slot) {
 | |
| 		*slot = s;
 | |
| 		mtr->zmemcpy(*block, slot - page_zip->data, 1);
 | |
| 	}
 | |
| 
 | |
| 	ut_ad(rec_get_start((rec_t*) rec, offsets) >= page + PAGE_ZIP_START);
 | |
| 	ut_ad(rec_get_end((rec_t*) rec, offsets) <= page + srv_page_size
 | |
| 	      - PAGE_DIR - PAGE_DIR_SLOT_SIZE
 | |
| 	      * page_dir_get_n_slots(page));
 | |
| 
 | |
| 	heap_no = rec_get_heap_no_new(rec);
 | |
| 	ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW); /* not infimum or supremum */
 | |
| 	ut_ad(heap_no < page_dir_get_n_heap(page));
 | |
| 
 | |
| 	/* Append to the modification log. */
 | |
| 	data = page_zip->data + page_zip->m_end;
 | |
| 	ut_ad(!*data);
 | |
| 
 | |
| 	/* Identify the record by writing its heap number - 1.
 | |
| 	0 is reserved to indicate the end of the modification log. */
 | |
| 
 | |
| 	if (UNIV_UNLIKELY(heap_no - 1 >= 64)) {
 | |
| 		*data++ = (byte) (0x80 | (heap_no - 1) >> 7);
 | |
| 		ut_ad(!*data);
 | |
| 	}
 | |
| 	*data++ = (byte) ((heap_no - 1) << 1);
 | |
| 	ut_ad(!*data);
 | |
| 
 | |
| 	{
 | |
| 		const byte*	start	= rec - rec_offs_extra_size(offsets);
 | |
| 		const byte*	b	= rec - REC_N_NEW_EXTRA_BYTES;
 | |
| 
 | |
| 		/* Write the extra bytes backwards, so that
 | |
| 		rec_offs_extra_size() can be easily computed in
 | |
| 		page_zip_apply_log() by invoking
 | |
| 		rec_get_offsets_reverse(). */
 | |
| 
 | |
| 		while (b != start) {
 | |
| 			*data++ = *--b;
 | |
| 			ut_ad(!*data);
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/* Write the data bytes.  Store the uncompressed bytes separately. */
 | |
| 	storage = page_zip_dir_start(page_zip);
 | |
| 
 | |
| 	if (page_is_leaf(page)) {
 | |
| 		if (dict_index_is_clust(index)) {
 | |
| 			/* Store separately trx_id, roll_ptr and
 | |
| 			the BTR_EXTERN_FIELD_REF of each BLOB column. */
 | |
| 			if (rec_offs_any_extern(offsets)) {
 | |
| 				data = page_zip_write_rec_ext(
 | |
| 					block,
 | |
| 					rec, index, offsets, create,
 | |
| 					index->db_trx_id(), heap_no,
 | |
| 					storage, data, mtr);
 | |
| 			} else {
 | |
| 				/* Locate trx_id and roll_ptr. */
 | |
| 				ulint len;
 | |
| 				const byte*	src
 | |
| 					= rec_get_nth_field(rec, offsets,
 | |
| 							    index->db_trx_id(),
 | |
| 							    &len);
 | |
| 				ut_ad(len == DATA_TRX_ID_LEN);
 | |
| 				ut_ad(src + DATA_TRX_ID_LEN
 | |
| 				      == rec_get_nth_field(
 | |
| 					      rec, offsets,
 | |
| 					      index->db_roll_ptr(), &len));
 | |
| 				ut_ad(len == DATA_ROLL_PTR_LEN);
 | |
| 
 | |
| 				/* Log the preceding fields. */
 | |
| 				ASSERT_ZERO(data, src - rec);
 | |
| 				memcpy(data, rec, ulint(src - rec));
 | |
| 				data += src - rec;
 | |
| 
 | |
| 				/* Store trx_id and roll_ptr. */
 | |
| 				constexpr ulint sys_len
 | |
| 					= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
 | |
| 				byte* sys = storage - sys_len * (heap_no - 1);
 | |
| 				memcpy(sys, src, sys_len);
 | |
| 
 | |
| 				src += sys_len;
 | |
| 				mtr->zmemcpy(*block, sys - page_zip->data,
 | |
| 					     sys_len);
 | |
| 				/* Log the last bytes of the record. */
 | |
| 				len = rec_offs_data_size(offsets)
 | |
| 					- ulint(src - rec);
 | |
| 
 | |
| 				ASSERT_ZERO(data, len);
 | |
| 				memcpy(data, src, len);
 | |
| 				data += len;
 | |
| 			}
 | |
| 		} else {
 | |
| 			/* Leaf page of a secondary index:
 | |
| 			no externally stored columns */
 | |
| 			ut_ad(!rec_offs_any_extern(offsets));
 | |
| 
 | |
| 			/* Log the entire record. */
 | |
| 			ulint len = rec_offs_data_size(offsets);
 | |
| 
 | |
| 			ASSERT_ZERO(data, len);
 | |
| 			memcpy(data, rec, len);
 | |
| 			data += len;
 | |
| 		}
 | |
| 	} else {
 | |
| 		/* This is a node pointer page. */
 | |
| 		/* Non-leaf nodes should not have any externally
 | |
| 		stored columns. */
 | |
| 		ut_ad(!rec_offs_any_extern(offsets));
 | |
| 
 | |
| 		/* Copy the data bytes, except node_ptr. */
 | |
| 		ulint len = rec_offs_data_size(offsets) - REC_NODE_PTR_SIZE;
 | |
| 		ut_ad(data + len < storage - REC_NODE_PTR_SIZE
 | |
| 		      * (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW));
 | |
| 		ASSERT_ZERO(data, len);
 | |
| 		memcpy(data, rec, len);
 | |
| 		data += len;
 | |
| 
 | |
| 		/* Copy the node pointer to the uncompressed area. */
 | |
| 		byte* node_ptr = storage - REC_NODE_PTR_SIZE * (heap_no - 1);
 | |
| 		mtr->zmemcpy<mtr_t::MAYBE_NOP>(*block, node_ptr,
 | |
| 					       rec + len, REC_NODE_PTR_SIZE);
 | |
| 	}
 | |
| 
 | |
| 	ut_a(!*data);
 | |
| 	ut_ad((ulint) (data - page_zip->data) < page_zip_get_size(page_zip));
 | |
| 	mtr->zmemcpy(*block, page_zip->m_end,
 | |
| 		     data - page_zip->data - page_zip->m_end);
 | |
| 	page_zip->m_end = uint16_t(data - page_zip->data);
 | |
| 	page_zip->m_nonempty = TRUE;
 | |
| 
 | |
| #ifdef UNIV_ZIP_DEBUG
 | |
| 	ut_a(page_zip_validate(page_zip, page_align(rec), index));
 | |
| #endif /* UNIV_ZIP_DEBUG */
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Write a BLOB pointer of a record on the leaf page of a clustered index.
 | |
| The information must already have been updated on the uncompressed page. */
 | |
| void
 | |
| page_zip_write_blob_ptr(
 | |
| /*====================*/
 | |
| 	buf_block_t*	block,	/*!< in/out: ROW_FORMAT=COMPRESSED page */
 | |
| 	const byte*	rec,	/*!< in/out: record whose data is being
 | |
| 				written */
 | |
| 	dict_index_t*	index,	/*!< in: index of the page */
 | |
| 	const rec_offs*	offsets,/*!< in: rec_get_offsets(rec, index) */
 | |
| 	ulint		n,	/*!< in: column index */
 | |
| 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 | |
| {
 | |
| 	const byte*	field;
 | |
| 	byte*		externs;
 | |
| 	const page_t* const page = block->page.frame;
 | |
| 	page_zip_des_t* const page_zip = &block->page.zip;
 | |
| 	ulint		blob_no;
 | |
| 	ulint		len;
 | |
| 
 | |
| 	ut_ad(page_align(rec) == page);
 | |
| 	ut_ad(index != NULL);
 | |
| 	ut_ad(offsets != NULL);
 | |
| 	ut_ad(page_simple_validate_new((page_t*) page));
 | |
| 	ut_ad(page_zip_simple_validate(page_zip));
 | |
| 	ut_ad(page_zip_get_size(page_zip)
 | |
| 	      > PAGE_DATA + page_zip_dir_size(page_zip));
 | |
| 	ut_ad(rec_offs_comp(offsets));
 | |
| 	ut_ad(rec_offs_validate(rec, NULL, offsets));
 | |
| 	ut_ad(rec_offs_any_extern(offsets));
 | |
| 	ut_ad(rec_offs_nth_extern(offsets, n));
 | |
| 
 | |
| 	ut_ad(page_zip->m_start >= PAGE_DATA);
 | |
| 	ut_ad(page_zip_header_cmp(page_zip, page));
 | |
| 
 | |
| 	ut_ad(page_is_leaf(page));
 | |
| 	ut_ad(dict_index_is_clust(index));
 | |
| 
 | |
| 	MEM_CHECK_DEFINED(page_zip->data, page_zip_get_size(page_zip));
 | |
| 	MEM_CHECK_DEFINED(rec, rec_offs_data_size(offsets));
 | |
| 	MEM_CHECK_DEFINED(rec - rec_offs_extra_size(offsets),
 | |
| 			  rec_offs_extra_size(offsets));
 | |
| 
 | |
| 	blob_no = page_zip_get_n_prev_extern(page_zip, rec, index)
 | |
| 		+ rec_get_n_extern_new(rec, index, n);
 | |
| 	ut_a(blob_no < page_zip->n_blobs);
 | |
| 
 | |
| 	externs = page_zip->data + page_zip_get_size(page_zip)
 | |
| 		- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
 | |
| 		* PAGE_ZIP_CLUST_LEAF_SLOT_SIZE;
 | |
| 
 | |
| 	field = rec_get_nth_field(rec, offsets, n, &len);
 | |
| 
 | |
| 	externs -= (blob_no + 1) * BTR_EXTERN_FIELD_REF_SIZE;
 | |
| 	field += len - BTR_EXTERN_FIELD_REF_SIZE;
 | |
| 
 | |
| 	mtr->zmemcpy<mtr_t::MAYBE_NOP>(*block, externs, field,
 | |
| 				       BTR_EXTERN_FIELD_REF_SIZE);
 | |
| 
 | |
| #ifdef UNIV_ZIP_DEBUG
 | |
| 	ut_a(page_zip_validate(page_zip, page, index));
 | |
| #endif /* UNIV_ZIP_DEBUG */
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Write the node pointer of a record on a non-leaf compressed page. */
 | |
| void
 | |
| page_zip_write_node_ptr(
 | |
| /*====================*/
 | |
| 	buf_block_t*	block,	/*!< in/out: compressed page */
 | |
| 	byte*		rec,	/*!< in/out: record */
 | |
| 	ulint		size,	/*!< in: data size of rec */
 | |
| 	ulint		ptr,	/*!< in: node pointer */
 | |
| 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 | |
| {
 | |
| 	byte*	field;
 | |
| 	byte*	storage;
 | |
| 	page_zip_des_t* const page_zip = &block->page.zip;
 | |
| 
 | |
| 	ut_d(const page_t* const page = block->page.frame);
 | |
| 	ut_ad(page_simple_validate_new(page));
 | |
| 	ut_ad(page_zip_simple_validate(page_zip));
 | |
| 	ut_ad(page_zip_get_size(page_zip)
 | |
| 	      > PAGE_DATA + page_zip_dir_size(page_zip));
 | |
| 	ut_ad(page_rec_is_comp(rec));
 | |
| 
 | |
| 	ut_ad(page_zip->m_start >= PAGE_DATA);
 | |
| 	ut_ad(page_zip_header_cmp(page_zip, page));
 | |
| 
 | |
| 	ut_ad(!page_is_leaf(page));
 | |
| 
 | |
| 	MEM_CHECK_DEFINED(page_zip->data, page_zip_get_size(page_zip));
 | |
| 	MEM_CHECK_DEFINED(rec, size);
 | |
| 
 | |
| 	storage = page_zip_dir_start(page_zip)
 | |
| 		- (rec_get_heap_no_new(rec) - 1) * REC_NODE_PTR_SIZE;
 | |
| 	field = rec + size - REC_NODE_PTR_SIZE;
 | |
| 
 | |
| #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
 | |
| 	ut_a(!memcmp(storage, field, REC_NODE_PTR_SIZE));
 | |
| #endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
 | |
| 	compile_time_assert(REC_NODE_PTR_SIZE == 4);
 | |
| 	mach_write_to_4(field, ptr);
 | |
| 	mtr->zmemcpy(*block, storage, field, REC_NODE_PTR_SIZE);
 | |
| }
 | |
| 
 | |
| /** Write the DB_TRX_ID,DB_ROLL_PTR into a clustered index leaf page record.
 | |
| @param[in,out]	block		ROW_FORMAT=COMPRESSED page
 | |
| @param[in,out]	rec		record
 | |
| @param[in]	offsets		rec_get_offsets(rec, index)
 | |
| @param[in]	trx_id_field	field number of DB_TRX_ID (number of PK fields)
 | |
| @param[in]	trx_id		DB_TRX_ID value (transaction identifier)
 | |
| @param[in]	roll_ptr	DB_ROLL_PTR value (undo log pointer)
 | |
| @param[in,out]	mtr		mini-transaction */
 | |
| void
 | |
| page_zip_write_trx_id_and_roll_ptr(
 | |
| 	buf_block_t*	block,
 | |
| 	byte*		rec,
 | |
| 	const rec_offs*	offsets,
 | |
| 	ulint		trx_id_col,
 | |
| 	trx_id_t	trx_id,
 | |
| 	roll_ptr_t	roll_ptr,
 | |
| 	mtr_t*		mtr)
 | |
| {
 | |
| 	page_zip_des_t* const page_zip = &block->page.zip;
 | |
| 
 | |
| 	ut_d(const page_t* const page = block->page.frame);
 | |
| 	ut_ad(page_align(rec) == page);
 | |
| 	ut_ad(page_simple_validate_new(page));
 | |
| 	ut_ad(page_zip_simple_validate(page_zip));
 | |
| 	ut_ad(page_zip_get_size(page_zip)
 | |
| 	      > PAGE_DATA + page_zip_dir_size(page_zip));
 | |
| 	ut_ad(rec_offs_validate(rec, NULL, offsets));
 | |
| 	ut_ad(rec_offs_comp(offsets));
 | |
| 
 | |
| 	ut_ad(page_zip->m_start >= PAGE_DATA);
 | |
| 	ut_ad(page_zip_header_cmp(page_zip, page));
 | |
| 
 | |
| 	ut_ad(page_is_leaf(page));
 | |
| 
 | |
| 	MEM_CHECK_DEFINED(page_zip->data, page_zip_get_size(page_zip));
 | |
| 
 | |
| 	constexpr ulint sys_len = DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
 | |
| 	const ulint heap_no = rec_get_heap_no_new(rec);
 | |
| 	ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW);
 | |
| 	byte* storage = page_zip_dir_start(page_zip) - (heap_no - 1) * sys_len;
 | |
| 
 | |
| 	compile_time_assert(DATA_TRX_ID + 1 == DATA_ROLL_PTR);
 | |
| 	ulint len;
 | |
| 	byte* field = rec_get_nth_field(rec, offsets, trx_id_col, &len);
 | |
| 	ut_ad(len == DATA_TRX_ID_LEN);
 | |
| 	ut_ad(field + DATA_TRX_ID_LEN
 | |
| 	      == rec_get_nth_field(rec, offsets, trx_id_col + 1, &len));
 | |
| 	ut_ad(len == DATA_ROLL_PTR_LEN);
 | |
| 	compile_time_assert(DATA_TRX_ID_LEN == 6);
 | |
| 	mach_write_to_6(field, trx_id);
 | |
| 	compile_time_assert(DATA_ROLL_PTR_LEN == 7);
 | |
| 	mach_write_to_7(field + DATA_TRX_ID_LEN, roll_ptr);
 | |
| 	len = 0;
 | |
| 	if (heap_no > PAGE_HEAP_NO_USER_LOW) {
 | |
| 		byte* prev = storage + sys_len;
 | |
| 		for (; len < sys_len && prev[len] == field[len]; len++);
 | |
| 		if (len > 4) {
 | |
| 			/* We save space by replacing a single record
 | |
| 
 | |
| 			WRITE,offset(storage),byte[13]
 | |
| 
 | |
| 			with up to two records:
 | |
| 
 | |
| 			MEMMOVE,offset(storage),len(1 byte),+13(1 byte),
 | |
| 			WRITE|0x80,0,byte[13-len]
 | |
| 
 | |
| 			The single WRITE record would be x+13 bytes long (x>2).
 | |
| 			The MEMMOVE record would be x+1+1 = x+2 bytes, and
 | |
| 			the second WRITE would be 1+1+13-len = 15-len bytes.
 | |
| 
 | |
| 			The total size is: x+13 versus x+2+15-len = x+17-len.
 | |
| 			To save space, we must have len>4. */
 | |
| 			memcpy(storage, prev, len);
 | |
| 			mtr->memmove(*block, ulint(storage - page_zip->data),
 | |
| 				     ulint(storage - page_zip->data) + sys_len,
 | |
| 				     len);
 | |
| 			storage += len;
 | |
| 			field += len;
 | |
| 			if (UNIV_LIKELY(len < sys_len)) {
 | |
| 				goto write;
 | |
| 			}
 | |
| 		} else {
 | |
| 			len = 0;
 | |
| 			goto write;
 | |
| 		}
 | |
| 	} else {
 | |
| write:
 | |
|                 mtr->zmemcpy<mtr_t::MAYBE_NOP>(*block, storage, field,
 | |
| 					       sys_len - len);
 | |
| 	}
 | |
| #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
 | |
| 	ut_a(!memcmp(storage - len, field - len, sys_len));
 | |
| #endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
 | |
| 
 | |
| 	MEM_CHECK_DEFINED(rec, rec_offs_data_size(offsets));
 | |
| 	MEM_CHECK_DEFINED(rec - rec_offs_extra_size(offsets),
 | |
| 			  rec_offs_extra_size(offsets));
 | |
| 	MEM_CHECK_DEFINED(page_zip->data, page_zip_get_size(page_zip));
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Clear an area on the uncompressed and compressed page.
 | |
| Do not clear the data payload, as that would grow the modification log. */
 | |
| static
 | |
| void
 | |
| page_zip_clear_rec(
 | |
| /*===============*/
 | |
| 	buf_block_t*	block,		/*!< in/out: compressed page */
 | |
| 	byte*		rec,		/*!< in: record to clear */
 | |
| 	const dict_index_t*	index,	/*!< in: index of rec */
 | |
| 	const rec_offs*	offsets,	/*!< in: rec_get_offsets(rec, index) */
 | |
| 	mtr_t*		mtr)		/*!< in/out: mini-transaction */
 | |
| {
 | |
| 	ulint	heap_no;
 | |
| 	byte*	storage;
 | |
| 	byte*	field;
 | |
| 	ulint	len;
 | |
| 
 | |
| 	ut_ad(page_align(rec) == block->page.frame);
 | |
| 	page_zip_des_t* const page_zip = &block->page.zip;
 | |
| 
 | |
| 	/* page_zip_validate() would fail here if a record
 | |
| 	containing externally stored columns is being deleted. */
 | |
| 	ut_ad(rec_offs_validate(rec, index, offsets));
 | |
| 	ut_ad(!page_zip_dir_find(page_zip, page_offset(rec)));
 | |
| 	ut_ad(page_zip_dir_find_free(page_zip, page_offset(rec)));
 | |
| 	ut_ad(page_zip_header_cmp(page_zip, block->page.frame));
 | |
| 
 | |
| 	heap_no = rec_get_heap_no_new(rec);
 | |
| 	ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW);
 | |
| 
 | |
| 	MEM_CHECK_DEFINED(page_zip->data, page_zip_get_size(page_zip));
 | |
| 	MEM_CHECK_DEFINED(rec, rec_offs_data_size(offsets));
 | |
| 	MEM_CHECK_DEFINED(rec - rec_offs_extra_size(offsets),
 | |
| 			  rec_offs_extra_size(offsets));
 | |
| 
 | |
| 	if (!page_is_leaf(block->page.frame)) {
 | |
| 		/* Clear node_ptr. On the compressed page,
 | |
| 		there is an array of node_ptr immediately before the
 | |
| 		dense page directory, at the very end of the page. */
 | |
| 		storage	= page_zip_dir_start(page_zip);
 | |
| 		ut_ad(dict_index_get_n_unique_in_tree_nonleaf(index) ==
 | |
| 		      rec_offs_n_fields(offsets) - 1);
 | |
| 		field	= rec_get_nth_field(rec, offsets,
 | |
| 					    rec_offs_n_fields(offsets) - 1,
 | |
| 					    &len);
 | |
| 		ut_ad(len == REC_NODE_PTR_SIZE);
 | |
| 		ut_ad(!rec_offs_any_extern(offsets));
 | |
| 		memset(field, 0, REC_NODE_PTR_SIZE);
 | |
| 		storage -= (heap_no - 1) * REC_NODE_PTR_SIZE;
 | |
| 		len = REC_NODE_PTR_SIZE;
 | |
| clear_page_zip:
 | |
| 		memset(storage, 0, len);
 | |
| 		mtr->memset(*block, storage - page_zip->data, len, 0);
 | |
| 	} else if (index->is_clust()) {
 | |
| 		/* Clear trx_id and roll_ptr. On the compressed page,
 | |
| 		there is an array of these fields immediately before the
 | |
| 		dense page directory, at the very end of the page. */
 | |
| 		const ulint	trx_id_pos
 | |
| 			= dict_col_get_clust_pos(
 | |
| 			dict_table_get_sys_col(
 | |
| 				index->table, DATA_TRX_ID), index);
 | |
| 		field	= rec_get_nth_field(rec, offsets, trx_id_pos, &len);
 | |
| 		ut_ad(len == DATA_TRX_ID_LEN);
 | |
| 		memset(field, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 | |
| 
 | |
| 		if (rec_offs_any_extern(offsets)) {
 | |
| 			ulint	i;
 | |
| 
 | |
| 			for (i = rec_offs_n_fields(offsets); i--; ) {
 | |
| 				/* Clear all BLOB pointers in order to make
 | |
| 				page_zip_validate() pass. */
 | |
| 				if (rec_offs_nth_extern(offsets, i)) {
 | |
| 					field = rec_get_nth_field(
 | |
| 						rec, offsets, i, &len);
 | |
| 					ut_ad(len
 | |
| 					      == BTR_EXTERN_FIELD_REF_SIZE);
 | |
| 					memset(field + len
 | |
| 					       - BTR_EXTERN_FIELD_REF_SIZE,
 | |
| 					       0, BTR_EXTERN_FIELD_REF_SIZE);
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		len = DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
 | |
| 		storage = page_zip_dir_start(page_zip)
 | |
| 			- (heap_no - 1)
 | |
| 			* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 | |
| 		goto clear_page_zip;
 | |
| 	} else {
 | |
| 		ut_ad(!rec_offs_any_extern(offsets));
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /** Modify the delete-mark flag of a ROW_FORMAT=COMPRESSED record.
 | |
| @param[in,out]  block   buffer block
 | |
| @param[in,out]  rec     record on a physical index page
 | |
| @param[in]      flag    the value of the delete-mark flag
 | |
| @param[in,out]  mtr     mini-transaction  */
 | |
| void page_zip_rec_set_deleted(buf_block_t *block, rec_t *rec, bool flag,
 | |
|                               mtr_t *mtr)
 | |
| {
 | |
|   ut_ad(page_align(rec) == block->page.frame);
 | |
|   byte *slot= page_zip_dir_find(&block->page.zip, page_offset(rec));
 | |
|   byte b= *slot;
 | |
|   if (flag)
 | |
|     b|= (PAGE_ZIP_DIR_SLOT_DEL >> 8);
 | |
|   else
 | |
|     b&= byte(~(PAGE_ZIP_DIR_SLOT_DEL >> 8));
 | |
|   mtr->zmemcpy<mtr_t::MAYBE_NOP>(*block, slot, &b, 1);
 | |
| #ifdef UNIV_ZIP_DEBUG
 | |
|   ut_a(page_zip_validate(&block->page.zip, block->page.frame, nullptr));
 | |
| #endif /* UNIV_ZIP_DEBUG */
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Write the "owned" flag of a record on a compressed page.  The n_owned field
 | |
| must already have been written on the uncompressed page. */
 | |
| void
 | |
| page_zip_rec_set_owned(
 | |
| /*===================*/
 | |
| 	buf_block_t*	block,	/*!< in/out: ROW_FORMAT=COMPRESSED page */
 | |
| 	const byte*	rec,	/*!< in: record on the uncompressed page */
 | |
| 	ulint		flag,	/*!< in: the owned flag (nonzero=TRUE) */
 | |
| 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 | |
| {
 | |
|   ut_ad(page_align(rec) == block->page.frame);
 | |
|   page_zip_des_t *const page_zip= &block->page.zip;
 | |
|   byte *slot= page_zip_dir_find(page_zip, page_offset(rec));
 | |
|   MEM_CHECK_DEFINED(page_zip->data, page_zip_get_size(page_zip));
 | |
|   byte b= *slot;
 | |
|   if (flag)
 | |
|     b|= (PAGE_ZIP_DIR_SLOT_OWNED >> 8);
 | |
|   else
 | |
|     b&= byte(~(PAGE_ZIP_DIR_SLOT_OWNED >> 8));
 | |
|   mtr->zmemcpy<mtr_t::MAYBE_NOP>(*block, slot, &b, 1);
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Insert a record to the dense page directory. */
 | |
| void
 | |
| page_zip_dir_insert(
 | |
| /*================*/
 | |
| 	page_cur_t*	cursor,	/*!< in/out: page cursor */
 | |
| 	uint16_t	free_rec,/*!< in: record from which rec was
 | |
| 				allocated, or 0 */
 | |
| 	byte*		rec,	/*!< in: record to insert */
 | |
| 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 | |
| {
 | |
| 	ut_ad(page_align(cursor->rec) == cursor->block->page.frame);
 | |
| 	ut_ad(page_align(rec) == cursor->block->page.frame);
 | |
| 	page_zip_des_t *const page_zip= &cursor->block->page.zip;
 | |
| 
 | |
| 	ulint	n_dense;
 | |
| 	byte*	slot_rec;
 | |
| 	byte*	slot_free;
 | |
| 
 | |
| 	ut_ad(cursor->rec != rec);
 | |
| 	ut_ad(page_rec_get_next_const(cursor->rec) == rec);
 | |
| 	ut_ad(page_zip_simple_validate(page_zip));
 | |
| 
 | |
| 	MEM_CHECK_DEFINED(page_zip->data, page_zip_get_size(page_zip));
 | |
| 
 | |
| 	if (page_rec_is_infimum(cursor->rec)) {
 | |
| 		/* Use the first slot. */
 | |
| 		slot_rec = page_zip->data + page_zip_get_size(page_zip);
 | |
| 	} else {
 | |
| 		byte*	end	= page_zip->data + page_zip_get_size(page_zip);
 | |
| 		byte*	start	= end - page_zip_dir_user_size(page_zip);
 | |
| 
 | |
| 		if (UNIV_LIKELY(!free_rec)) {
 | |
| 			/* PAGE_N_RECS was already incremented
 | |
| 			in page_cur_insert_rec_zip(), but the
 | |
| 			dense directory slot at that position
 | |
| 			contains garbage.  Skip it. */
 | |
| 			start += PAGE_ZIP_DIR_SLOT_SIZE;
 | |
| 		}
 | |
| 
 | |
| 		slot_rec = page_zip_dir_find_low(start, end,
 | |
| 						 page_offset(cursor->rec));
 | |
| 		ut_a(slot_rec);
 | |
| 	}
 | |
| 
 | |
| 	/* Read the old n_dense (n_heap may have been incremented). */
 | |
| 	n_dense = page_dir_get_n_heap(page_zip->data)
 | |
| 		- (PAGE_HEAP_NO_USER_LOW + 1U);
 | |
| 
 | |
| 	if (UNIV_UNLIKELY(free_rec)) {
 | |
| 		/* The record was allocated from the free list.
 | |
| 		Shift the dense directory only up to that slot.
 | |
| 		Note that in this case, n_dense is actually
 | |
| 		off by one, because page_cur_insert_rec_zip()
 | |
| 		did not increment n_heap. */
 | |
| 		ut_ad(rec_get_heap_no_new(rec) < n_dense + 1
 | |
| 		      + PAGE_HEAP_NO_USER_LOW);
 | |
| 		ut_ad(page_offset(rec) >= free_rec);
 | |
| 		slot_free = page_zip_dir_find(page_zip, free_rec);
 | |
| 		ut_ad(slot_free);
 | |
| 		slot_free += PAGE_ZIP_DIR_SLOT_SIZE;
 | |
| 	} else {
 | |
| 		/* The record was allocated from the heap.
 | |
| 		Shift the entire dense directory. */
 | |
| 		ut_ad(rec_get_heap_no_new(rec) == n_dense
 | |
| 		      + PAGE_HEAP_NO_USER_LOW);
 | |
| 
 | |
| 		/* Shift to the end of the dense page directory. */
 | |
| 		slot_free = page_zip->data + page_zip_get_size(page_zip)
 | |
| 			- PAGE_ZIP_DIR_SLOT_SIZE * n_dense;
 | |
| 	}
 | |
| 
 | |
| 	if (const ulint slot_len = ulint(slot_rec - slot_free)) {
 | |
| 		/* Shift the dense directory to allocate place for rec. */
 | |
| 		memmove_aligned<2>(slot_free - PAGE_ZIP_DIR_SLOT_SIZE,
 | |
| 				   slot_free, slot_len);
 | |
| 		mtr->memmove(*cursor->block, (slot_free - page_zip->data)
 | |
| 			     - PAGE_ZIP_DIR_SLOT_SIZE,
 | |
| 			     slot_free - page_zip->data, slot_len);
 | |
| 	}
 | |
| 
 | |
| 	/* Write the entry for the inserted record.
 | |
| 	The "owned" flag must be zero. */
 | |
| 	uint16_t offs = page_offset(rec);
 | |
| 	if (rec_get_deleted_flag(rec, true)) {
 | |
| 		offs |= PAGE_ZIP_DIR_SLOT_DEL;
 | |
| 	}
 | |
| 
 | |
| 	mach_write_to_2(slot_rec - PAGE_ZIP_DIR_SLOT_SIZE, offs);
 | |
| 	mtr->zmemcpy(*cursor->block, slot_rec - page_zip->data
 | |
| 		     - PAGE_ZIP_DIR_SLOT_SIZE, PAGE_ZIP_DIR_SLOT_SIZE);
 | |
| }
 | |
| 
 | |
| /** Shift the dense page directory and the array of BLOB pointers
 | |
| when a record is deleted.
 | |
| @param[in,out]  block   index page
 | |
| @param[in,out]  rec     record being deleted
 | |
| @param[in]      index   the index that the page belongs to
 | |
| @param[in]      offsets rec_get_offsets(rec, index)
 | |
| @param[in]      free    previous start of the free list
 | |
| @param[in,out]  mtr     mini-transaction */
 | |
| void page_zip_dir_delete(buf_block_t *block, byte *rec,
 | |
|                          const dict_index_t *index, const rec_offs *offsets,
 | |
|                          const byte *free, mtr_t *mtr)
 | |
| {
 | |
|   ut_ad(page_align(rec) == block->page.frame);
 | |
|   page_zip_des_t *const page_zip= &block->page.zip;
 | |
| 
 | |
|   ut_ad(rec_offs_validate(rec, index, offsets));
 | |
|   ut_ad(rec_offs_comp(offsets));
 | |
| 
 | |
|   MEM_CHECK_DEFINED(page_zip->data, page_zip_get_size(page_zip));
 | |
|   MEM_CHECK_DEFINED(rec, rec_offs_data_size(offsets));
 | |
|   MEM_CHECK_DEFINED(rec - rec_offs_extra_size(offsets),
 | |
| 		    rec_offs_extra_size(offsets));
 | |
| 
 | |
|   mach_write_to_2(rec - REC_NEXT,
 | |
|                   free ? static_cast<uint16_t>(free - rec) : 0);
 | |
|   byte *page_free= my_assume_aligned<2>(PAGE_FREE + PAGE_HEADER +
 | |
|                                         block->page.frame);
 | |
|   mtr->write<2>(*block, page_free, page_offset(rec));
 | |
|   byte *garbage= my_assume_aligned<2>(PAGE_GARBAGE + PAGE_HEADER +
 | |
|                                       block->page.frame);
 | |
|   mtr->write<2>(*block, garbage, rec_offs_size(offsets) +
 | |
|                 mach_read_from_2(garbage));
 | |
|   compile_time_assert(PAGE_GARBAGE == PAGE_FREE + 2);
 | |
|   memcpy_aligned<4>(PAGE_FREE + PAGE_HEADER + page_zip->data, page_free, 4);
 | |
|   byte *slot_rec= page_zip_dir_find(page_zip, page_offset(rec));
 | |
|   ut_a(slot_rec);
 | |
|   uint16_t n_recs= page_get_n_recs(block->page.frame);
 | |
|   ut_ad(n_recs);
 | |
|   ut_ad(n_recs > 1 || page_get_page_no(block->page.frame) == index->page);
 | |
|   /* This could not be done before page_zip_dir_find(). */
 | |
|   byte *page_n_recs= my_assume_aligned<2>(PAGE_N_RECS + PAGE_HEADER +
 | |
|                                           block->page.frame);
 | |
|   mtr->write<2>(*block, page_n_recs, n_recs - 1U);
 | |
|   memcpy_aligned<2>(PAGE_N_RECS + PAGE_HEADER + page_zip->data, page_n_recs,
 | |
|                     2);
 | |
| 
 | |
|   byte *slot_free;
 | |
| 
 | |
|   if (UNIV_UNLIKELY(!free))
 | |
|     /* Make the last slot the start of the free list. */
 | |
|     slot_free= page_zip->data + page_zip_get_size(page_zip) -
 | |
|       PAGE_ZIP_DIR_SLOT_SIZE * (page_dir_get_n_heap(page_zip->data) -
 | |
|                                 PAGE_HEAP_NO_USER_LOW);
 | |
|   else
 | |
|   {
 | |
|     slot_free= page_zip_dir_find_free(page_zip, page_offset(free));
 | |
|     ut_a(slot_free < slot_rec);
 | |
|     /* Grow the free list by one slot by moving the start. */
 | |
|     slot_free+= PAGE_ZIP_DIR_SLOT_SIZE;
 | |
|   }
 | |
| 
 | |
|   const ulint slot_len= slot_rec > slot_free ? ulint(slot_rec - slot_free) : 0;
 | |
|   if (slot_len)
 | |
|   {
 | |
|     memmove_aligned<2>(slot_free + PAGE_ZIP_DIR_SLOT_SIZE, slot_free,
 | |
|                        slot_len);
 | |
|     mtr->memmove(*block, (slot_free - page_zip->data) + PAGE_ZIP_DIR_SLOT_SIZE,
 | |
|                  slot_free - page_zip->data, slot_len);
 | |
|   }
 | |
| 
 | |
|   /* Write the entry for the deleted record.
 | |
|   The "owned" and "deleted" flags will be cleared. */
 | |
|   mach_write_to_2(slot_free, page_offset(rec));
 | |
|   mtr->zmemcpy(*block, slot_free - page_zip->data, 2);
 | |
| 
 | |
|   if (const ulint n_ext= rec_offs_n_extern(offsets))
 | |
|   {
 | |
|     ut_ad(index->is_primary());
 | |
|     ut_ad(page_is_leaf(block->page.frame));
 | |
| 
 | |
|     /* Shift and zero fill the array of BLOB pointers. */
 | |
|     ulint blob_no = page_zip_get_n_prev_extern(page_zip, rec, index);
 | |
|     ut_a(blob_no + n_ext <= page_zip->n_blobs);
 | |
| 
 | |
|     byte *externs= page_zip->data + page_zip_get_size(page_zip) -
 | |
|       (page_dir_get_n_heap(block->page.frame) - PAGE_HEAP_NO_USER_LOW) *
 | |
|       PAGE_ZIP_CLUST_LEAF_SLOT_SIZE;
 | |
|     byte *ext_end= externs - page_zip->n_blobs * FIELD_REF_SIZE;
 | |
| 
 | |
|     /* Shift and zero fill the array. */
 | |
|     if (const ulint ext_len= ulint(page_zip->n_blobs - n_ext - blob_no) *
 | |
|         BTR_EXTERN_FIELD_REF_SIZE)
 | |
|     {
 | |
|       memmove(ext_end + n_ext * FIELD_REF_SIZE, ext_end, ext_len);
 | |
|       mtr->memmove(*block, (ext_end - page_zip->data) + n_ext * FIELD_REF_SIZE,
 | |
|                    ext_end - page_zip->data, ext_len);
 | |
|     }
 | |
|     memset(ext_end, 0, n_ext * FIELD_REF_SIZE);
 | |
|     mtr->memset(*block, ext_end - page_zip->data, n_ext * FIELD_REF_SIZE, 0);
 | |
|     page_zip->n_blobs = (page_zip->n_blobs - n_ext) & ((1U << 12) - 1);
 | |
|   }
 | |
| 
 | |
|   /* The compression algorithm expects info_bits and n_owned
 | |
|   to be 0 for deleted records. */
 | |
|   rec[-REC_N_NEW_EXTRA_BYTES]= 0; /* info_bits and n_owned */
 | |
| 
 | |
|   page_zip_clear_rec(block, rec, index, offsets, mtr);
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Reorganize and compress a page.  This is a low-level operation for
 | |
| compressed pages, to be used when page_zip_compress() fails.
 | |
| On success, redo log will be written.
 | |
| The function btr_page_reorganize() should be preferred whenever possible.
 | |
| @return error code
 | |
| @retval DB_FAIL on overflow; the block_zip will be left intact */
 | |
| dberr_t
 | |
| page_zip_reorganize(
 | |
| 	buf_block_t*	block,	/*!< in/out: page with compressed page;
 | |
| 				on the compressed page, in: size;
 | |
| 				out: data, n_blobs,
 | |
| 				m_start, m_end, m_nonempty */
 | |
| 	dict_index_t*	index,	/*!< in: index of the B-tree node */
 | |
| 	ulint		z_level,/*!< in: compression level */
 | |
| 	mtr_t*		mtr,	/*!< in: mini-transaction */
 | |
| 	bool		restore)/*!< whether to restore on failure */
 | |
| {
 | |
| 	page_t*		page		= buf_block_get_frame(block);
 | |
| 	buf_block_t*	temp_block;
 | |
| 	page_t*		temp_page;
 | |
| 
 | |
| 	ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX));
 | |
| 	ut_ad(block->page.zip.data);
 | |
| 	ut_ad(page_is_comp(page));
 | |
| 	ut_ad(!index->table->is_temporary());
 | |
| 	/* Note that page_zip_validate(page_zip, page, index) may fail here. */
 | |
| 	MEM_CHECK_DEFINED(page, srv_page_size);
 | |
| 	MEM_CHECK_DEFINED(buf_block_get_page_zip(block)->data,
 | |
| 			  page_zip_get_size(buf_block_get_page_zip(block)));
 | |
| 
 | |
| 	/* Disable logging */
 | |
| 	mtr_log_t	log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
 | |
| 
 | |
| 	temp_block = buf_block_alloc();
 | |
| 	btr_search_drop_page_hash_index(block, false);
 | |
| 	temp_page = temp_block->page.frame;
 | |
| 
 | |
| 	/* Copy the old page to temporary space */
 | |
| 	memcpy_aligned<UNIV_PAGE_SIZE_MIN>(temp_page, block->page.frame,
 | |
| 					   srv_page_size);
 | |
| 
 | |
| 	/* Recreate the page: note that global data on page (possible
 | |
| 	segment headers, next page-field, etc.) is preserved intact */
 | |
| 
 | |
| 	page_create(block, mtr, true);
 | |
| 	if (index->is_spatial()) {
 | |
| 		mach_write_to_2(FIL_PAGE_TYPE + page, FIL_PAGE_RTREE);
 | |
| 		memcpy_aligned<2>(block->page.zip.data + FIL_PAGE_TYPE,
 | |
| 				  page + FIL_PAGE_TYPE, 2);
 | |
| 		memset(FIL_RTREE_SPLIT_SEQ_NUM + page, 0, 8);
 | |
| 		memset(FIL_RTREE_SPLIT_SEQ_NUM + block->page.zip.data, 0, 8);
 | |
| 	}
 | |
| 
 | |
| 	/* Copy the records from the temporary space to the recreated page;
 | |
| 	do not copy the lock bits yet */
 | |
| 
 | |
| 	dberr_t err = page_copy_rec_list_end_no_locks(
 | |
| 		block, temp_block, page_get_infimum_rec(temp_page),
 | |
| 		index, mtr);
 | |
| 
 | |
| 	/* Copy the PAGE_MAX_TRX_ID or PAGE_ROOT_AUTO_INC. */
 | |
| 	memcpy_aligned<8>(page + (PAGE_HEADER + PAGE_MAX_TRX_ID),
 | |
| 			  temp_page + (PAGE_HEADER + PAGE_MAX_TRX_ID), 8);
 | |
| 	/* PAGE_MAX_TRX_ID must be set on secondary index leaf pages. */
 | |
| 	ut_ad(err != DB_SUCCESS
 | |
| 	      || index->is_clust() || !page_is_leaf(temp_page)
 | |
| 	      || page_get_max_trx_id(page) != 0);
 | |
| 	/* PAGE_MAX_TRX_ID must be zero on non-leaf pages other than
 | |
| 	clustered index root pages. */
 | |
| 	ut_ad(err != DB_SUCCESS
 | |
| 	      || page_get_max_trx_id(page) == 0
 | |
| 	      || (index->is_clust()
 | |
| 		  ? !page_has_siblings(temp_page)
 | |
| 		  : page_is_leaf(temp_page)));
 | |
| 
 | |
| 	/* Restore logging. */
 | |
| 	mtr_set_log_mode(mtr, log_mode);
 | |
| 
 | |
| 	if (!page_zip_compress(block, index, z_level, mtr)) {
 | |
| 		if (restore) {
 | |
| 			/* Restore the old page and exit. */
 | |
| #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
 | |
| 			/* Check that the bytes that we skip are identical. */
 | |
| 			ut_a(!memcmp(page, temp_page, PAGE_HEADER));
 | |
| 			ut_a(!memcmp(PAGE_HEADER + PAGE_N_RECS + page,
 | |
| 				     PAGE_HEADER + PAGE_N_RECS + temp_page,
 | |
| 				     PAGE_DATA - (PAGE_HEADER + PAGE_N_RECS)));
 | |
| 			ut_a(!memcmp(srv_page_size - FIL_PAGE_DATA_END + page,
 | |
| 				     srv_page_size - FIL_PAGE_DATA_END
 | |
| 				     + temp_page,
 | |
| 				     FIL_PAGE_DATA_END));
 | |
| #endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
 | |
| 
 | |
| 			memcpy(PAGE_HEADER + page, PAGE_HEADER + temp_page,
 | |
| 			       PAGE_N_RECS - PAGE_N_DIR_SLOTS);
 | |
| 			memcpy(PAGE_DATA + page, PAGE_DATA + temp_page,
 | |
| 			       srv_page_size - PAGE_DATA - FIL_PAGE_DATA_END);
 | |
| 
 | |
| #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
 | |
| 			ut_a(!memcmp(page, temp_page, srv_page_size));
 | |
| #endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
 | |
| 		}
 | |
| 
 | |
| 		err = DB_FAIL;
 | |
| 	} else {
 | |
| 		lock_move_reorganize_page(block, temp_block);
 | |
| 	}
 | |
| 
 | |
| 	buf_block_free(temp_block);
 | |
| 	return err;
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Copy the records of a page byte for byte.  Do not copy the page header
 | |
| or trailer, except those B-tree header fields that are directly
 | |
| related to the storage of records.  Also copy PAGE_MAX_TRX_ID.
 | |
| NOTE: The caller must update the lock table and the adaptive hash index. */
 | |
| void
 | |
| page_zip_copy_recs(
 | |
| 	buf_block_t*		block,		/*!< in/out: buffer block */
 | |
| 	const page_zip_des_t*	src_zip,	/*!< in: compressed page */
 | |
| 	const page_t*		src,		/*!< in: page */
 | |
| 	dict_index_t*		index,		/*!< in: index of the B-tree */
 | |
| 	mtr_t*			mtr)		/*!< in: mini-transaction */
 | |
| {
 | |
| 	page_t* page = block->page.frame;
 | |
| 	page_zip_des_t* page_zip = &block->page.zip;
 | |
| 
 | |
| 	ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX));
 | |
| 	ut_ad(mtr->memo_contains_page_flagged(src, MTR_MEMO_PAGE_X_FIX));
 | |
| 	ut_ad(!index->table->is_temporary());
 | |
| #ifdef UNIV_ZIP_DEBUG
 | |
| 	/* The B-tree operations that call this function may set
 | |
| 	FIL_PAGE_PREV or PAGE_LEVEL, causing a temporary min_rec_flag
 | |
| 	mismatch.  A strict page_zip_validate() will be executed later
 | |
| 	during the B-tree operations. */
 | |
| 	ut_a(page_zip_validate_low(src_zip, src, index, TRUE));
 | |
| #endif /* UNIV_ZIP_DEBUG */
 | |
| 	ut_a(page_zip_get_size(page_zip) == page_zip_get_size(src_zip));
 | |
| 	if (UNIV_UNLIKELY(src_zip->n_blobs)) {
 | |
| 		ut_a(page_is_leaf(src));
 | |
| 		ut_a(dict_index_is_clust(index));
 | |
| 	}
 | |
| 
 | |
| 	MEM_CHECK_ADDRESSABLE(page, srv_page_size);
 | |
| 	MEM_CHECK_ADDRESSABLE(page_zip->data, page_zip_get_size(page_zip));
 | |
| 	MEM_CHECK_DEFINED(src, srv_page_size);
 | |
| 	MEM_CHECK_DEFINED(src_zip->data, page_zip_get_size(page_zip));
 | |
| 
 | |
| 	/* Copy those B-tree page header fields that are related to
 | |
| 	the records stored in the page.  Also copy the field
 | |
| 	PAGE_MAX_TRX_ID.  Skip the rest of the page header and
 | |
| 	trailer.  On the compressed page, there is no trailer. */
 | |
| 	compile_time_assert(PAGE_MAX_TRX_ID + 8 == PAGE_HEADER_PRIV_END);
 | |
| 	memcpy_aligned<2>(PAGE_HEADER + page, PAGE_HEADER + src,
 | |
| 			  PAGE_HEADER_PRIV_END);
 | |
| 	memcpy_aligned<2>(PAGE_DATA + page, PAGE_DATA + src,
 | |
| 			  srv_page_size - (PAGE_DATA + FIL_PAGE_DATA_END));
 | |
| 	memcpy_aligned<2>(PAGE_HEADER + page_zip->data,
 | |
| 			  PAGE_HEADER + src_zip->data,
 | |
| 			  PAGE_HEADER_PRIV_END);
 | |
| 	memcpy_aligned<2>(PAGE_DATA + page_zip->data,
 | |
| 			  PAGE_DATA + src_zip->data,
 | |
| 			  page_zip_get_size(page_zip) - PAGE_DATA);
 | |
| 
 | |
| 	if (dict_index_is_clust(index)) {
 | |
| 		/* Reset the PAGE_ROOT_AUTO_INC field when copying
 | |
| 		from a root page. */
 | |
| 		memset_aligned<8>(PAGE_HEADER + PAGE_ROOT_AUTO_INC
 | |
| 				  + page, 0, 8);
 | |
| 		memset_aligned<8>(PAGE_HEADER + PAGE_ROOT_AUTO_INC
 | |
| 				  + page_zip->data, 0, 8);
 | |
| 	} else {
 | |
| 		/* The PAGE_MAX_TRX_ID must be nonzero on leaf pages
 | |
| 		of secondary indexes, and 0 on others. */
 | |
| 		ut_ad(!page_is_leaf(src) == !page_get_max_trx_id(src));
 | |
| 	}
 | |
| 
 | |
| 	/* Copy all fields of src_zip to page_zip, except the pointer
 | |
| 	to the compressed data page. */
 | |
| 	{
 | |
| 		page_zip_t*	data = page_zip->data;
 | |
| 		new (page_zip) page_zip_des_t(*src_zip, false);
 | |
| 		page_zip->data = data;
 | |
| 	}
 | |
| 	ut_ad(page_zip_get_trailer_len(page_zip, dict_index_is_clust(index))
 | |
| 	      + page_zip->m_end < page_zip_get_size(page_zip));
 | |
| 
 | |
| 	if (!page_is_leaf(src)
 | |
| 	    && UNIV_UNLIKELY(!page_has_prev(src))
 | |
| 	    && UNIV_LIKELY(page_has_prev(page))) {
 | |
| 		/* Clear the REC_INFO_MIN_REC_FLAG of the first user record. */
 | |
| 		ulint	offs = rec_get_next_offs(page + PAGE_NEW_INFIMUM,
 | |
| 						 TRUE);
 | |
| 		if (UNIV_LIKELY(offs != PAGE_NEW_SUPREMUM)) {
 | |
| 			rec_t*	rec = page + offs;
 | |
| 			ut_a(rec[-REC_N_NEW_EXTRA_BYTES]
 | |
| 			     & REC_INFO_MIN_REC_FLAG);
 | |
| 			rec[-REC_N_NEW_EXTRA_BYTES]
 | |
| 				&= byte(~REC_INFO_MIN_REC_FLAG);
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| #ifdef UNIV_ZIP_DEBUG
 | |
| 	ut_a(page_zip_validate(page_zip, page, index));
 | |
| #endif /* UNIV_ZIP_DEBUG */
 | |
| 	page_zip_compress_write_log(block, index, mtr);
 | |
| }
 | |
| #endif /* !UNIV_INNOCHECKSUM */
 | |
| 
 | |
| /** Calculate the compressed page checksum.
 | |
| @param data		compressed page
 | |
| @param size		size of compressed page
 | |
| @param use_adler	whether to use Adler32 instead of a XOR of 3 CRC-32C
 | |
| @return page checksum */
 | |
| uint32_t page_zip_calc_checksum(const void *data, size_t size, bool use_adler)
 | |
| {
 | |
| 	uLong		adler;
 | |
| 	const Bytef*	s = static_cast<const byte*>(data);
 | |
| 
 | |
| 	/* Exclude FIL_PAGE_SPACE_OR_CHKSUM, FIL_PAGE_LSN,
 | |
| 	and FIL_PAGE_FILE_FLUSH_LSN from the checksum. */
 | |
| 	ut_ad(size > FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
 | |
| 
 | |
| 	if (!use_adler) {
 | |
| 		return my_crc32c(0, s + FIL_PAGE_OFFSET,
 | |
| 				 FIL_PAGE_LSN - FIL_PAGE_OFFSET)
 | |
| 			^ my_crc32c(0, s + FIL_PAGE_TYPE, 2)
 | |
| 			^ my_crc32c(0, s + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
 | |
| 				    size - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
 | |
| 	} else {
 | |
| 		adler = adler32(0L, s + FIL_PAGE_OFFSET,
 | |
| 				FIL_PAGE_LSN - FIL_PAGE_OFFSET);
 | |
| 		adler = adler32(adler, s + FIL_PAGE_TYPE, 2);
 | |
| 		adler = adler32(
 | |
| 			adler, s + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
 | |
| 			static_cast<uInt>(size)
 | |
| 			- FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
 | |
| 
 | |
| 		return(uint32_t(adler));
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /** Validate the checksum on a ROW_FORMAT=COMPRESSED page.
 | |
| @param data    ROW_FORMAT=COMPRESSED page
 | |
| @param size    size of the page, in bytes
 | |
| @return whether the stored checksum matches innodb_checksum_algorithm */
 | |
| bool page_zip_verify_checksum(const byte *data, size_t size)
 | |
| {
 | |
| 	if (buf_is_zeroes(span<const byte>(data, size))) {
 | |
| 		return true;
 | |
| 	}
 | |
| 
 | |
| 	const uint32_t stored = mach_read_from_4(
 | |
| 		data + FIL_PAGE_SPACE_OR_CHKSUM);
 | |
| 
 | |
| 	uint32_t calc = page_zip_calc_checksum(data, size, false);
 | |
| 
 | |
| #ifdef UNIV_INNOCHECKSUM
 | |
| 	extern FILE* log_file;
 | |
| 	extern uint32_t cur_page_num;
 | |
| 
 | |
| 	if (log_file) {
 | |
| 		fprintf(log_file, "page::" UINT32PF ";"
 | |
| 			" checksum: calculated = " UINT32PF ";"
 | |
| 			" recorded = " UINT32PF "\n", cur_page_num,
 | |
| 			calc, stored);
 | |
| 	}
 | |
| #endif /* UNIV_INNOCHECKSUM */
 | |
| 
 | |
| 	if (stored == calc) {
 | |
| 		return(TRUE);
 | |
| 	}
 | |
| 
 | |
| #ifndef UNIV_INNOCHECKSUM
 | |
| 	switch (srv_checksum_algorithm) {
 | |
| 	case SRV_CHECKSUM_ALGORITHM_STRICT_FULL_CRC32:
 | |
| 	case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
 | |
| 		break;
 | |
| 	default:
 | |
| 		if (stored == BUF_NO_CHECKSUM_MAGIC) {
 | |
| 			return(TRUE);
 | |
| 		}
 | |
| 
 | |
| 		return stored == page_zip_calc_checksum(data, size, true);
 | |
| 	}
 | |
| #endif /* !UNIV_INNOCHECKSUM */
 | |
| 
 | |
| 	return FALSE;
 | |
| }
 | 
