2013-12-19 14:36:38 +02:00
|
|
|
/*****************************************************************************
|
|
|
|
|
|
|
|
Copyright (C) 2013 SkySQL Ab. All Rights Reserved.
|
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it under
|
|
|
|
the terms of the GNU General Public License as published by the Free Software
|
|
|
|
Foundation; version 2 of the License.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, but WITHOUT
|
|
|
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License along with
|
|
|
|
this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
|
|
|
|
*****************************************************************************/
|
|
|
|
|
|
|
|
/******************************************************************//**
|
|
|
|
@file fil/fil0pagecompress.cc
|
|
|
|
Implementation for page compressed file spaces.
|
|
|
|
|
|
|
|
Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com
|
|
|
|
***********************************************************************/
|
|
|
|
|
|
|
|
#include "fil0fil.h"
|
|
|
|
#include "fil0pagecompress.h"
|
|
|
|
|
|
|
|
#include <debug_sync.h>
|
|
|
|
#include <my_dbug.h>
|
|
|
|
|
|
|
|
#include "mem0mem.h"
|
|
|
|
#include "hash0hash.h"
|
|
|
|
#include "os0file.h"
|
|
|
|
#include "mach0data.h"
|
|
|
|
#include "buf0buf.h"
|
|
|
|
#include "buf0flu.h"
|
|
|
|
#include "log0recv.h"
|
|
|
|
#include "fsp0fsp.h"
|
|
|
|
#include "srv0srv.h"
|
|
|
|
#include "srv0start.h"
|
|
|
|
#include "mtr0mtr.h"
|
|
|
|
#include "mtr0log.h"
|
|
|
|
#include "dict0dict.h"
|
|
|
|
#include "page0page.h"
|
|
|
|
#include "page0zip.h"
|
|
|
|
#include "trx0sys.h"
|
|
|
|
#include "row0mysql.h"
|
|
|
|
#ifndef UNIV_HOTBACKUP
|
|
|
|
# include "buf0lru.h"
|
|
|
|
# include "ibuf0ibuf.h"
|
|
|
|
# include "sync0sync.h"
|
|
|
|
# include "os0sync.h"
|
|
|
|
#else /* !UNIV_HOTBACKUP */
|
|
|
|
# include "srv0srv.h"
|
|
|
|
static ulint srv_data_read, srv_data_written;
|
|
|
|
#endif /* !UNIV_HOTBACKUP */
|
|
|
|
#include "zlib.h"
|
|
|
|
#ifdef __linux__
|
|
|
|
#include <linux/fs.h>
|
|
|
|
#include <sys/ioctl.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <linux/falloc.h>
|
|
|
|
#endif
|
|
|
|
#include "row0mysql.h"
|
|
|
|
|
|
|
|
/****************************************************************//**
|
|
|
|
For page compressed pages compress the page before actual write
|
|
|
|
operation.
|
|
|
|
@return compressed page to be written*/
|
|
|
|
byte*
|
|
|
|
fil_compress_page(
|
|
|
|
/*==============*/
|
|
|
|
ulint space_id, /*!< in: tablespace id of the
|
|
|
|
table. */
|
|
|
|
byte* buf, /*!< in: buffer from which to write; in aio
|
|
|
|
this must be appropriately aligned */
|
|
|
|
byte* out_buf, /*!< out: compressed buffer */
|
|
|
|
ulint len, /*!< in: length of input buffer.*/
|
|
|
|
ulint* out_len) /*!< out: actual length of compressed page */
|
|
|
|
{
|
|
|
|
int err = Z_OK;
|
|
|
|
int level = 0;
|
|
|
|
ulint header_len = FIL_PAGE_DATA + FIL_PAGE_COMPRESSED_SIZE;
|
|
|
|
ulint write_size=0;
|
|
|
|
|
|
|
|
ut_a(buf);
|
|
|
|
ut_a(out_buf);
|
|
|
|
ut_a(len);
|
|
|
|
ut_a(out_len);
|
|
|
|
|
|
|
|
level = fil_space_get_page_compression_level(space_id);
|
|
|
|
ut_a(fil_space_is_page_compressed(space_id));
|
|
|
|
|
|
|
|
fil_system_enter();
|
|
|
|
fil_space_t* space = fil_space_get_by_id(space_id);
|
|
|
|
fil_system_exit();
|
|
|
|
|
|
|
|
/* If no compression level was provided to this table, use system
|
|
|
|
default level */
|
|
|
|
if (level == 0) {
|
|
|
|
level = srv_compress_zlib_level;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef UNIV_DEBUG
|
|
|
|
fprintf(stderr,
|
|
|
|
"InnoDB: Note: Preparing for compress for space %lu name %s len %lu\n",
|
|
|
|
space_id, fil_space_name(space), len);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
write_size = UNIV_PAGE_SIZE - header_len;
|
|
|
|
err = compress2(out_buf+header_len, &write_size, buf, len, level);
|
|
|
|
|
|
|
|
if (err != Z_OK) {
|
|
|
|
/* If error we leave the actual page as it was */
|
|
|
|
|
|
|
|
fprintf(stderr,
|
|
|
|
"InnoDB: Warning: Compression failed for space %lu name %s len %lu rt %d write %lu\n",
|
|
|
|
space_id, fil_space_name(space), len, err, write_size);
|
|
|
|
|
|
|
|
*out_len = len;
|
|
|
|
return (buf);
|
|
|
|
} else {
|
|
|
|
/* Set up the page header */
|
|
|
|
memcpy(out_buf, buf, FIL_PAGE_DATA);
|
|
|
|
/* Set up the checksum */
|
|
|
|
mach_write_to_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM, BUF_NO_CHECKSUM_MAGIC);
|
|
|
|
/* Set up the correct page type */
|
|
|
|
mach_write_to_2(out_buf+FIL_PAGE_TYPE, FIL_PAGE_PAGE_COMPRESSED);
|
|
|
|
/* Set up the flush lsn to be compression algorithm */
|
|
|
|
mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN, FIL_PAGE_COMPRESSION_ZLIB);
|
|
|
|
/* Set up the actual payload lenght */
|
|
|
|
mach_write_to_2(out_buf+FIL_PAGE_DATA, write_size);
|
|
|
|
|
|
|
|
#ifdef UNIV_DEBUG
|
|
|
|
/* Verify */
|
|
|
|
ut_ad(fil_page_is_compressed(out_buf));
|
|
|
|
ut_ad(mach_read_from_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM) == BUF_NO_CHECKSUM_MAGIC);
|
|
|
|
ut_ad(mach_read_from_2(out_buf+FIL_PAGE_DATA) == write_size);
|
|
|
|
ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN) == FIL_PAGE_COMPRESSION_ZLIB);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
write_size+=header_len;
|
|
|
|
/* Actual write needs to be alligned on block size */
|
|
|
|
if (write_size % OS_FILE_LOG_BLOCK_SIZE) {
|
|
|
|
write_size = (write_size + (OS_FILE_LOG_BLOCK_SIZE - (write_size % OS_FILE_LOG_BLOCK_SIZE)));
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef UNIV_DEBUG
|
|
|
|
fprintf(stderr,
|
|
|
|
"InnoDB: Note: Compression succeeded for space %lu name %s len %lu out_len %lu\n",
|
|
|
|
space_id, fil_space_name(space), len, write_size);
|
|
|
|
#endif
|
|
|
|
#define SECT_SIZE 512
|
|
|
|
srv_stats.page_compression_saved.add((len - write_size));
|
|
|
|
if ((len - write_size) > 0) {
|
|
|
|
srv_stats.page_compression_trim_sect512.add(((len - write_size) / SECT_SIZE));
|
|
|
|
srv_stats.page_compression_trim_sect4096.add(((len - write_size) / (SECT_SIZE*8)));
|
|
|
|
}
|
|
|
|
//srv_stats.page_compressed_trim_op.inc();
|
|
|
|
srv_stats.pages_page_compressed.inc();
|
|
|
|
*out_len = write_size;
|
|
|
|
|
|
|
|
return(out_buf);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/****************************************************************//**
|
|
|
|
For page compressed pages decompress the page after actual read
|
|
|
|
operation. */
|
|
|
|
void
|
|
|
|
fil_decompress_page(
|
|
|
|
/*================*/
|
|
|
|
byte* page_buf, /*!< in: preallocated buffer or NULL */
|
|
|
|
byte* buf, /*!< out: buffer from which to read; in aio
|
|
|
|
this must be appropriately aligned */
|
|
|
|
ulint len) /*!< in: length of output buffer.*/
|
|
|
|
{
|
|
|
|
int err = 0;
|
|
|
|
ulint actual_size = 0;
|
|
|
|
ulint compression_alg = 0;
|
|
|
|
byte *in_buf;
|
|
|
|
|
|
|
|
ut_a(buf);
|
|
|
|
ut_a(len);
|
|
|
|
|
|
|
|
/* Before actual decompress, make sure that page type is correct */
|
|
|
|
|
|
|
|
if (mach_read_from_4(buf+FIL_PAGE_SPACE_OR_CHKSUM) != BUF_NO_CHECKSUM_MAGIC ||
|
|
|
|
mach_read_from_2(buf+FIL_PAGE_TYPE) != FIL_PAGE_PAGE_COMPRESSED) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"InnoDB: Corruption: We try to uncompress corrupted page\n"
|
|
|
|
"InnoDB: CRC %lu type %lu.\n"
|
|
|
|
"InnoDB: len %lu\n",
|
|
|
|
mach_read_from_4(buf+FIL_PAGE_SPACE_OR_CHKSUM),
|
|
|
|
mach_read_from_2(buf+FIL_PAGE_TYPE), len);
|
|
|
|
|
|
|
|
fflush(stderr);
|
|
|
|
ut_error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Get compression algorithm */
|
|
|
|
compression_alg = mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN);
|
|
|
|
|
|
|
|
if (compression_alg == FIL_PAGE_COMPRESSION_ZLIB) {
|
|
|
|
// If no buffer was given, we need to allocate temporal buffer
|
|
|
|
if (page_buf == NULL) {
|
|
|
|
in_buf = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE));
|
|
|
|
} else {
|
|
|
|
in_buf = page_buf;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Get the actual size of compressed page */
|
|
|
|
actual_size = mach_read_from_2(buf+FIL_PAGE_DATA);
|
|
|
|
|
|
|
|
#ifdef UNIV_DEBUG
|
|
|
|
fprintf(stderr,
|
|
|
|
"InnoDB: Note: Preparing for decompress for len %lu\n",
|
|
|
|
actual_size);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
err= uncompress(in_buf, &len, buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, (unsigned long)actual_size);
|
|
|
|
|
|
|
|
|
|
|
|
/* If uncompress fails it means that page is corrupted */
|
|
|
|
if (err != Z_OK) {
|
|
|
|
|
|
|
|
fprintf(stderr,
|
|
|
|
"InnoDB: Corruption: Page is marked as compressed\n"
|
|
|
|
"InnoDB: but uncompress failed with error %d.\n"
|
|
|
|
"InnoDB: size %lu len %lu\n",
|
|
|
|
err, actual_size, len);
|
|
|
|
|
|
|
|
fflush(stderr);
|
|
|
|
|
|
|
|
ut_error;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef UNIV_DEBUG
|
|
|
|
fprintf(stderr,
|
|
|
|
"InnoDB: Note: Decompression succeeded for len %lu \n",
|
|
|
|
len);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Copy the uncompressed page to the buffer pool, not
|
|
|
|
really any other options. */
|
|
|
|
memcpy(buf, in_buf, len);
|
|
|
|
|
|
|
|
// Need to free temporal buffer if no buffer was given
|
|
|
|
if (page_buf == NULL) {
|
|
|
|
ut_free(in_buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
srv_stats.pages_page_decompressed.inc();
|
|
|
|
} else {
|
|
|
|
fprintf(stderr,
|
|
|
|
"InnoDB: Corruption: Page is marked as compressed\n"
|
|
|
|
"InnoDB: but compression algorithm %s\n"
|
|
|
|
"InnoDB: is not known.\n"
|
|
|
|
,fil_get_compression_alg_name(compression_alg));
|
|
|
|
|
|
|
|
fflush(stderr);
|
|
|
|
ut_error;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*******************************************************************//**
|
|
|
|
Find out wheather the page is index page or not
|
|
|
|
@return true if page type index page, false if not */
|
|
|
|
ibool
|
|
|
|
fil_page_is_index_page(
|
|
|
|
/*===================*/
|
|
|
|
byte *buf) /*!< in: page */
|
|
|
|
{
|
|
|
|
return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_INDEX);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*******************************************************************//**
|
|
|
|
Find out wheather the page is page compressed
|
|
|
|
@return true if page is page compressed, false if not */
|
|
|
|
ibool
|
|
|
|
fil_page_is_compressed(
|
|
|
|
/*===================*/
|
|
|
|
byte *buf) /*!< in: page */
|
|
|
|
{
|
|
|
|
return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*******************************************************************//**
|
|
|
|
Returns the page compression level of the space, or 0 if the space
|
|
|
|
is not compressed. The tablespace must be cached in the memory cache.
|
|
|
|
@return page compression level, ULINT_UNDEFINED if space not found */
|
|
|
|
ulint
|
|
|
|
fil_space_get_page_compression_level(
|
|
|
|
/*=================================*/
|
|
|
|
ulint id) /*!< in: space id */
|
|
|
|
{
|
|
|
|
ulint flags;
|
|
|
|
|
|
|
|
flags = fil_space_get_flags(id);
|
|
|
|
|
|
|
|
if (flags && flags != ULINT_UNDEFINED) {
|
|
|
|
|
|
|
|
return(fsp_flags_get_page_compression_level(flags));
|
|
|
|
}
|
|
|
|
|
|
|
|
return(flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*******************************************************************//**
|
|
|
|
Extract the page compression from space.
|
|
|
|
@return true if space is page compressed, false if space is not found
|
|
|
|
or space is not page compressed. */
|
|
|
|
ibool
|
|
|
|
fil_space_is_page_compressed(
|
|
|
|
/*=========================*/
|
|
|
|
ulint id) /*!< in: space id */
|
|
|
|
{
|
|
|
|
ulint flags;
|
|
|
|
|
|
|
|
flags = fil_space_get_flags(id);
|
|
|
|
|
|
|
|
if (flags && flags != ULINT_UNDEFINED) {
|
|
|
|
|
|
|
|
return(fsp_flags_is_page_compressed(flags));
|
|
|
|
}
|
|
|
|
|
|
|
|
return(flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
/****************************************************************//**
|
|
|
|
Get the name of the compression algorithm used for page
|
|
|
|
compression.
|
|
|
|
@return compression algorithm name or "UNKNOWN" if not known*/
|
|
|
|
const char*
|
|
|
|
fil_get_compression_alg_name(
|
|
|
|
/*=========================*/
|
|
|
|
ulint comp_alg) /*!<in: compression algorithm number */
|
|
|
|
{
|
|
|
|
switch(comp_alg) {
|
|
|
|
case FIL_PAGE_COMPRESSION_ZLIB:
|
|
|
|
return ("ZLIB");
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return("UNKNOWN");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*******************************************************************//**
|
|
|
|
Returns the atomic writes flag of the space, or false if the space
|
|
|
|
is not using atomic writes. The tablespace must be cached in the memory cache.
|
2014-01-10 12:11:36 +02:00
|
|
|
@return atomic writes table option value */
|
|
|
|
atomic_writes_t
|
2013-12-19 14:36:38 +02:00
|
|
|
fil_space_get_atomic_writes(
|
|
|
|
/*========================*/
|
|
|
|
ulint id) /*!< in: space id */
|
|
|
|
{
|
|
|
|
ulint flags;
|
|
|
|
|
|
|
|
flags = fil_space_get_flags(id);
|
|
|
|
|
|
|
|
if (flags && flags != ULINT_UNDEFINED) {
|
|
|
|
|
2014-01-10 12:11:36 +02:00
|
|
|
return((atomic_writes_t)fsp_flags_get_atomic_writes(flags));
|
2013-12-19 14:36:38 +02:00
|
|
|
}
|
|
|
|
|
2014-01-10 12:11:36 +02:00
|
|
|
return((atomic_writes_t)0);
|
2013-12-19 14:36:38 +02:00
|
|
|
}
|