mariadb/storage/innobase/fil/fil0pagecompress.cc
Jan Lindström 5e55d1ced5 Changes for Fusion-io multi-threaded flush, page compressed tables and
tables using atomic write/table.

This is work in progress and some parts are at most POC quality.
2013-12-19 14:36:38 +02:00

369 lines
11 KiB
C++

/*****************************************************************************
Copyright (C) 2013 SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*****************************************************************************/
/******************************************************************//**
@file fil/fil0pagecompress.cc
Implementation for page compressed file spaces.
Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com
***********************************************************************/
#include "fil0fil.h"
#include "fil0pagecompress.h"
#include <debug_sync.h>
#include <my_dbug.h>
#include "mem0mem.h"
#include "hash0hash.h"
#include "os0file.h"
#include "mach0data.h"
#include "buf0buf.h"
#include "buf0flu.h"
#include "log0recv.h"
#include "fsp0fsp.h"
#include "srv0srv.h"
#include "srv0start.h"
#include "mtr0mtr.h"
#include "mtr0log.h"
#include "dict0dict.h"
#include "page0page.h"
#include "page0zip.h"
#include "trx0sys.h"
#include "row0mysql.h"
#ifndef UNIV_HOTBACKUP
# include "buf0lru.h"
# include "ibuf0ibuf.h"
# include "sync0sync.h"
# include "os0sync.h"
#else /* !UNIV_HOTBACKUP */
# include "srv0srv.h"
static ulint srv_data_read, srv_data_written;
#endif /* !UNIV_HOTBACKUP */
#include "zlib.h"
#ifdef __linux__
#include <linux/fs.h>
#include <sys/ioctl.h>
#include <fcntl.h>
#include <linux/falloc.h>
#endif
#include "row0mysql.h"
/****************************************************************//**
For page compressed pages compress the page before actual write
operation.
@return compressed page to be written*/
byte*
fil_compress_page(
/*==============*/
ulint space_id, /*!< in: tablespace id of the
table. */
byte* buf, /*!< in: buffer from which to write; in aio
this must be appropriately aligned */
byte* out_buf, /*!< out: compressed buffer */
ulint len, /*!< in: length of input buffer.*/
ulint* out_len) /*!< out: actual length of compressed page */
{
int err = Z_OK;
int level = 0;
ulint header_len = FIL_PAGE_DATA + FIL_PAGE_COMPRESSED_SIZE;
ulint write_size=0;
ut_a(buf);
ut_a(out_buf);
ut_a(len);
ut_a(out_len);
level = fil_space_get_page_compression_level(space_id);
ut_a(fil_space_is_page_compressed(space_id));
fil_system_enter();
fil_space_t* space = fil_space_get_by_id(space_id);
fil_system_exit();
/* If no compression level was provided to this table, use system
default level */
if (level == 0) {
level = srv_compress_zlib_level;
}
#ifdef UNIV_DEBUG
fprintf(stderr,
"InnoDB: Note: Preparing for compress for space %lu name %s len %lu\n",
space_id, fil_space_name(space), len);
#endif
write_size = UNIV_PAGE_SIZE - header_len;
err = compress2(out_buf+header_len, &write_size, buf, len, level);
if (err != Z_OK) {
/* If error we leave the actual page as it was */
fprintf(stderr,
"InnoDB: Warning: Compression failed for space %lu name %s len %lu rt %d write %lu\n",
space_id, fil_space_name(space), len, err, write_size);
*out_len = len;
return (buf);
} else {
/* Set up the page header */
memcpy(out_buf, buf, FIL_PAGE_DATA);
/* Set up the checksum */
mach_write_to_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM, BUF_NO_CHECKSUM_MAGIC);
/* Set up the correct page type */
mach_write_to_2(out_buf+FIL_PAGE_TYPE, FIL_PAGE_PAGE_COMPRESSED);
/* Set up the flush lsn to be compression algorithm */
mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN, FIL_PAGE_COMPRESSION_ZLIB);
/* Set up the actual payload lenght */
mach_write_to_2(out_buf+FIL_PAGE_DATA, write_size);
#ifdef UNIV_DEBUG
/* Verify */
ut_ad(fil_page_is_compressed(out_buf));
ut_ad(mach_read_from_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM) == BUF_NO_CHECKSUM_MAGIC);
ut_ad(mach_read_from_2(out_buf+FIL_PAGE_DATA) == write_size);
ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN) == FIL_PAGE_COMPRESSION_ZLIB);
#endif
write_size+=header_len;
/* Actual write needs to be alligned on block size */
if (write_size % OS_FILE_LOG_BLOCK_SIZE) {
write_size = (write_size + (OS_FILE_LOG_BLOCK_SIZE - (write_size % OS_FILE_LOG_BLOCK_SIZE)));
}
#ifdef UNIV_DEBUG
fprintf(stderr,
"InnoDB: Note: Compression succeeded for space %lu name %s len %lu out_len %lu\n",
space_id, fil_space_name(space), len, write_size);
#endif
#define SECT_SIZE 512
srv_stats.page_compression_saved.add((len - write_size));
if ((len - write_size) > 0) {
srv_stats.page_compression_trim_sect512.add(((len - write_size) / SECT_SIZE));
srv_stats.page_compression_trim_sect4096.add(((len - write_size) / (SECT_SIZE*8)));
}
//srv_stats.page_compressed_trim_op.inc();
srv_stats.pages_page_compressed.inc();
*out_len = write_size;
return(out_buf);
}
}
/****************************************************************//**
For page compressed pages decompress the page after actual read
operation. */
void
fil_decompress_page(
/*================*/
byte* page_buf, /*!< in: preallocated buffer or NULL */
byte* buf, /*!< out: buffer from which to read; in aio
this must be appropriately aligned */
ulint len) /*!< in: length of output buffer.*/
{
int err = 0;
ulint actual_size = 0;
ulint compression_alg = 0;
byte *in_buf;
ut_a(buf);
ut_a(len);
/* Before actual decompress, make sure that page type is correct */
if (mach_read_from_4(buf+FIL_PAGE_SPACE_OR_CHKSUM) != BUF_NO_CHECKSUM_MAGIC ||
mach_read_from_2(buf+FIL_PAGE_TYPE) != FIL_PAGE_PAGE_COMPRESSED) {
fprintf(stderr,
"InnoDB: Corruption: We try to uncompress corrupted page\n"
"InnoDB: CRC %lu type %lu.\n"
"InnoDB: len %lu\n",
mach_read_from_4(buf+FIL_PAGE_SPACE_OR_CHKSUM),
mach_read_from_2(buf+FIL_PAGE_TYPE), len);
fflush(stderr);
ut_error;
}
/* Get compression algorithm */
compression_alg = mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN);
if (compression_alg == FIL_PAGE_COMPRESSION_ZLIB) {
// If no buffer was given, we need to allocate temporal buffer
if (page_buf == NULL) {
in_buf = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE));
} else {
in_buf = page_buf;
}
/* Get the actual size of compressed page */
actual_size = mach_read_from_2(buf+FIL_PAGE_DATA);
#ifdef UNIV_DEBUG
fprintf(stderr,
"InnoDB: Note: Preparing for decompress for len %lu\n",
actual_size);
#endif
err= uncompress(in_buf, &len, buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, (unsigned long)actual_size);
/* If uncompress fails it means that page is corrupted */
if (err != Z_OK) {
fprintf(stderr,
"InnoDB: Corruption: Page is marked as compressed\n"
"InnoDB: but uncompress failed with error %d.\n"
"InnoDB: size %lu len %lu\n",
err, actual_size, len);
fflush(stderr);
ut_error;
}
#ifdef UNIV_DEBUG
fprintf(stderr,
"InnoDB: Note: Decompression succeeded for len %lu \n",
len);
#endif
/* Copy the uncompressed page to the buffer pool, not
really any other options. */
memcpy(buf, in_buf, len);
// Need to free temporal buffer if no buffer was given
if (page_buf == NULL) {
ut_free(in_buf);
}
srv_stats.pages_page_decompressed.inc();
} else {
fprintf(stderr,
"InnoDB: Corruption: Page is marked as compressed\n"
"InnoDB: but compression algorithm %s\n"
"InnoDB: is not known.\n"
,fil_get_compression_alg_name(compression_alg));
fflush(stderr);
ut_error;
}
}
/*******************************************************************//**
Find out wheather the page is index page or not
@return true if page type index page, false if not */
ibool
fil_page_is_index_page(
/*===================*/
byte *buf) /*!< in: page */
{
return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_INDEX);
}
/*******************************************************************//**
Find out wheather the page is page compressed
@return true if page is page compressed, false if not */
ibool
fil_page_is_compressed(
/*===================*/
byte *buf) /*!< in: page */
{
return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED);
}
/*******************************************************************//**
Returns the page compression level of the space, or 0 if the space
is not compressed. The tablespace must be cached in the memory cache.
@return page compression level, ULINT_UNDEFINED if space not found */
ulint
fil_space_get_page_compression_level(
/*=================================*/
ulint id) /*!< in: space id */
{
ulint flags;
flags = fil_space_get_flags(id);
if (flags && flags != ULINT_UNDEFINED) {
return(fsp_flags_get_page_compression_level(flags));
}
return(flags);
}
/*******************************************************************//**
Extract the page compression from space.
@return true if space is page compressed, false if space is not found
or space is not page compressed. */
ibool
fil_space_is_page_compressed(
/*=========================*/
ulint id) /*!< in: space id */
{
ulint flags;
flags = fil_space_get_flags(id);
if (flags && flags != ULINT_UNDEFINED) {
return(fsp_flags_is_page_compressed(flags));
}
return(flags);
}
/****************************************************************//**
Get the name of the compression algorithm used for page
compression.
@return compression algorithm name or "UNKNOWN" if not known*/
const char*
fil_get_compression_alg_name(
/*=========================*/
ulint comp_alg) /*!<in: compression algorithm number */
{
switch(comp_alg) {
case FIL_PAGE_COMPRESSION_ZLIB:
return ("ZLIB");
break;
default:
return("UNKNOWN");
break;
}
}
/*******************************************************************//**
Returns the atomic writes flag of the space, or false if the space
is not using atomic writes. The tablespace must be cached in the memory cache.
@return true if space using atomic writes, false if not */
ibool
fil_space_get_atomic_writes(
/*========================*/
ulint id) /*!< in: space id */
{
ulint flags;
flags = fil_space_get_flags(id);
if (flags && flags != ULINT_UNDEFINED) {
return(fsp_flags_get_atomic_writes(flags));
}
return(flags);
}