mariadb/storage/innobase/fil/fil0pagecompress.cc
Sergei Golubchik 5dffda3ccc Merge branch 'bb-10.1-jan-encryption' into bb-10.1-serg
With changes:

* update tests to pass (new encryption/encryption_key_id syntax).
* not merged the code that makes engine aware of the encryption mode
  (CRYPT_SCHEME_1_CBC, CRYPT_SCHEME_1_CTR, storing it on disk, etc),
  because now the encryption plugin is handling it.
* compression+encryption did not work in either branch before the
  merge - and it does not work after the merge. it might be more
  broken after the merge though - some of that code was not merged.
* page checksumming code was not moved (moving of page checksumming
  from fil_space_encrypt() to fil_space_decrypt was not merged).
* restored deleted lines in buf_page_get_frame(), otherwise
  innodb_scrub test failed.
2015-04-09 19:27:40 +02:00

618 lines
16 KiB
C++

/*****************************************************************************
Copyright (C) 2013, 2015, MariaDB Corporation. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*****************************************************************************/
/******************************************************************//**
@file fil/fil0pagecompress.cc
Implementation for page compressed file spaces.
Created 11/12/2013 Jan Lindström jan.lindstrom@mariadb.com
Updated 14/02/2015
***********************************************************************/
#include "fil0fil.h"
#include "fil0pagecompress.h"
#include <debug_sync.h>
#include <my_dbug.h>
#include "mem0mem.h"
#include "hash0hash.h"
#include "os0file.h"
#include "mach0data.h"
#include "buf0buf.h"
#include "buf0flu.h"
#include "log0recv.h"
#include "fsp0fsp.h"
#include "srv0srv.h"
#include "srv0start.h"
#include "mtr0mtr.h"
#include "mtr0log.h"
#include "dict0dict.h"
#include "page0page.h"
#include "page0zip.h"
#include "trx0sys.h"
#include "row0mysql.h"
#include "ha_prototypes.h" // IB_LOG_
#ifndef UNIV_HOTBACKUP
# include "buf0lru.h"
# include "ibuf0ibuf.h"
# include "sync0sync.h"
# include "os0sync.h"
#else /* !UNIV_HOTBACKUP */
# include "srv0srv.h"
static ulint srv_data_read, srv_data_written;
#endif /* !UNIV_HOTBACKUP */
#include "zlib.h"
#ifdef __linux__
#include <linux/fs.h>
#include <sys/ioctl.h>
#include <fcntl.h>
#endif
#include "row0mysql.h"
#ifdef HAVE_LZ4
#include "lz4.h"
#endif
#ifdef HAVE_LZO
#include "lzo/lzo1x.h"
#endif
#ifdef HAVE_LZMA
#include "lzma.h"
#endif
#ifdef HAVE_BZIP2
#include "bzlib.h"
#endif
#ifdef HAVE_SNAPPY
#include "snappy-c.h"
#endif
/* Used for debugging */
//#define UNIV_PAGECOMPRESS_DEBUG 1
/****************************************************************//**
For page compressed pages compress the page before actual write
operation.
@return compressed page to be written*/
byte*
fil_compress_page(
/*==============*/
ulint space_id, /*!< in: tablespace id of the
table. */
byte* buf, /*!< in: buffer from which to write; in aio
this must be appropriately aligned */
byte* out_buf, /*!< out: compressed buffer */
ulint len, /*!< in: length of input buffer.*/
ulint compression_level, /* in: compression level */
ulint block_size, /*!< in: block size */
ulint* out_len, /*!< out: actual length of compressed
page */
byte* lzo_mem) /*!< in: temporal memory used by LZO */
{
int err = Z_OK;
int level = 0;
ulint header_len = FIL_PAGE_DATA + FIL_PAGE_COMPRESSED_SIZE;
ulint write_size=0;
/* Cache to avoid change during function execution */
ulint comp_method = innodb_compression_algorithm;
ulint orig_page_type;
ut_ad(buf);
ut_ad(out_buf);
ut_ad(len);
ut_ad(out_len);
/* read original page type */
orig_page_type = mach_read_from_2(buf + FIL_PAGE_TYPE);
/* Let's not compress file space header or
extent descriptor */
if (orig_page_type == 0 ||
orig_page_type == FIL_PAGE_TYPE_FSP_HDR ||
orig_page_type == FIL_PAGE_TYPE_XDES ||
orig_page_type == FIL_PAGE_PAGE_COMPRESSED) {
*out_len = len;
return (buf);
}
level = compression_level;
ut_ad(fil_space_is_page_compressed(space_id));
fil_system_enter();
fil_space_t* space = fil_space_get_by_id(space_id);
fil_system_exit();
/* If no compression level was provided to this table, use system
default level */
if (level == 0) {
level = page_zip_level;
}
#ifdef UNIV_PAGECOMPRESS_DEBUG
ib_logf(IB_LOG_LEVEL_INFO,
"Preparing for compress for space %lu name %s len %lu.",
space_id, fil_space_name(space), len);
#endif /* UNIV_PAGECOMPRESS_DEBUG */
write_size = UNIV_PAGE_SIZE - header_len;
switch(comp_method) {
#ifdef HAVE_LZ4
case PAGE_LZ4_ALGORITHM:
err = LZ4_compress_limitedOutput((const char *)buf,
(char *)out_buf+header_len, len, write_size);
write_size = err;
if (err == 0) {
/* If error we leave the actual page as it was */
if (space->printed_compression_failure == false) {
ib_logf(IB_LOG_LEVEL_WARN,
"Compression failed for space %lu name %s len %lu rt %d write %lu.",
space_id, fil_space_name(space), len, err, write_size);
space->printed_compression_failure = true;
}
srv_stats.pages_page_compression_error.inc();
*out_len = len;
return (buf);
}
break;
#endif /* HAVE_LZ4 */
#ifdef HAVE_LZO
case PAGE_LZO_ALGORITHM:
err = lzo1x_1_15_compress(
buf, len, out_buf+header_len, &write_size, lzo_mem);
if (err != LZO_E_OK || write_size > UNIV_PAGE_SIZE-header_len) {
if (space->printed_compression_failure == false) {
ib_logf(IB_LOG_LEVEL_WARN,
"Compression failed for space %lu name %s len %lu err %d write_size %lu.",
space_id, fil_space_name(space), len, err, write_size);
space->printed_compression_failure = true;
}
srv_stats.pages_page_compression_error.inc();
*out_len = len;
return (buf);
}
break;
#endif /* HAVE_LZO */
#ifdef HAVE_LZMA
case PAGE_LZMA_ALGORITHM: {
size_t out_pos=0;
err = lzma_easy_buffer_encode(
compression_level,
LZMA_CHECK_NONE,
NULL, /* No custom allocator, use malloc/free */
reinterpret_cast<uint8_t*>(buf),
len,
reinterpret_cast<uint8_t*>(out_buf + header_len),
&out_pos,
(size_t)write_size);
if (err != LZMA_OK || out_pos > UNIV_PAGE_SIZE-header_len) {
if (space->printed_compression_failure == false) {
ib_logf(IB_LOG_LEVEL_WARN,
"Compression failed for space %lu name %s len %lu err %d write_size %lu",
space_id, fil_space_name(space), len, err, out_pos);
space->printed_compression_failure = true;
}
srv_stats.pages_page_compression_error.inc();
*out_len = len;
return (buf);
}
write_size = out_pos;
break;
}
#endif /* HAVE_LZMA */
#ifdef HAVE_BZIP2
case PAGE_BZIP2_ALGORITHM: {
err = BZ2_bzBuffToBuffCompress(
(char *)(out_buf + header_len),
(unsigned int *)&write_size,
(char *)buf,
len,
1,
0,
0);
if (err != BZ_OK || write_size > UNIV_PAGE_SIZE-header_len) {
if (space->printed_compression_failure == false) {
ib_logf(IB_LOG_LEVEL_WARN,
"Compression failed for space %lu name %s len %lu err %d write_size %lu.",
space_id, fil_space_name(space), len, err, write_size);
space->printed_compression_failure = true;
}
srv_stats.pages_page_compression_error.inc();
*out_len = len;
return (buf);
}
break;
}
#endif /* HAVE_BZIP2 */
#ifdef HAVE_SNAPPY
case PAGE_SNAPPY_ALGORITHM:
{
snappy_status cstatus;
cstatus = snappy_compress((const char *)buf, len, (char *)(out_buf+header_len), &write_size);
if (cstatus != SNAPPY_OK || write_size > UNIV_PAGE_SIZE-header_len) {
if (space->printed_compression_failure == false) {
ib_logf(IB_LOG_LEVEL_WARN,
"Compression failed for space %lu name %s len %lu err %d write_size %lu.",
space_id, fil_space_name(space), len, (int)cstatus, write_size);
space->printed_compression_failure = true;
}
srv_stats.pages_page_compression_error.inc();
*out_len = len;
return (buf);
}
break;
}
#endif /* HAVE_SNAPPY */
case PAGE_ZLIB_ALGORITHM:
err = compress2(out_buf+header_len, (ulong*)&write_size, buf, len, level);
if (err != Z_OK) {
/* If error we leave the actual page as it was */
if (space->printed_compression_failure == false) {
ib_logf(IB_LOG_LEVEL_WARN,
"Compression failed for space %lu name %s len %lu rt %d write %lu.",
space_id, fil_space_name(space), len, err, write_size);
space->printed_compression_failure = true;
}
srv_stats.pages_page_compression_error.inc();
*out_len = len;
return (buf);
}
break;
case PAGE_UNCOMPRESSED:
*out_len = len;
return (buf);
break;
default:
ut_error;
break;
}
/* Set up the page header */
memcpy(out_buf, buf, FIL_PAGE_DATA);
/* Set up the checksum */
mach_write_to_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM, BUF_NO_CHECKSUM_MAGIC);
/* Set up the correct page type */
mach_write_to_2(out_buf+FIL_PAGE_TYPE, FIL_PAGE_PAGE_COMPRESSED);
/* Set up the flush lsn to be compression algorithm */
mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, comp_method);
/* Set up the actual payload lenght */
mach_write_to_2(out_buf+FIL_PAGE_DATA, write_size);
#ifdef UNIV_DEBUG
/* Verify */
ut_ad(fil_page_is_compressed(out_buf));
ut_ad(mach_read_from_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM) == BUF_NO_CHECKSUM_MAGIC);
ut_ad(mach_read_from_2(out_buf+FIL_PAGE_DATA) == write_size);
ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION) == (ulint)comp_method);
/* Verify that page can be decompressed */
{
byte *comp_page;
byte *uncomp_page;
comp_page = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE*3));
uncomp_page = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE*3));
memcpy(comp_page, out_buf, UNIV_PAGE_SIZE);
fil_decompress_page(uncomp_page, comp_page, len, NULL);
if(buf_page_is_corrupted(false, uncomp_page, 0)) {
buf_page_print(uncomp_page, 0, BUF_PAGE_PRINT_NO_CRASH);
ut_error;
}
ut_free(comp_page);
ut_free(uncomp_page);
}
#endif /* UNIV_DEBUG */
write_size+=header_len;
/* Actual write needs to be alligned on block size */
if (write_size % block_size) {
size_t tmp = write_size;
#ifdef UNIV_DEBUG
ut_a(block_size > 0);
#endif
write_size = (size_t)ut_uint64_align_up((ib_uint64_t)write_size, block_size);
#ifdef UNIV_DEBUG
ut_a(write_size > 0 && ((write_size % block_size) == 0));
ut_a(write_size >= tmp);
#endif
}
#ifdef UNIV_PAGECOMPRESS_DEBUG
ib_logf(IB_LOG_LEVEL_INFO,
"Compression succeeded for space %lu name %s len %lu out_len %lu.",
space_id, fil_space_name(space), len, write_size);
#endif /* UNIV_PAGECOMPRESS_DEBUG */
srv_stats.page_compression_saved.add((len - write_size));
srv_stats.pages_page_compressed.inc();
/* If we do not persistently trim rest of page, we need to write it
all */
if (!srv_use_trim) {
write_size = len;
}
*out_len = write_size;
return(out_buf);
}
/****************************************************************//**
For page compressed pages decompress the page after actual read
operation. */
void
fil_decompress_page(
/*================*/
byte* page_buf, /*!< in: preallocated buffer or NULL */
byte* buf, /*!< out: buffer from which to read; in aio
this must be appropriately aligned */
ulong len, /*!< in: length of output buffer.*/
ulint* write_size) /*!< in/out: Actual payload size of
the compressed data. */
{
int err = 0;
ulint actual_size = 0;
ulint compression_alg = 0;
byte *in_buf;
ulint ptype;
ut_ad(buf);
ut_ad(len);
ptype = mach_read_from_2(buf+FIL_PAGE_TYPE);
/* Do not try to uncompressed pages that are not compressed */
if (ptype != FIL_PAGE_PAGE_COMPRESSED && ptype != FIL_PAGE_TYPE_COMPRESSED) {
return;
}
// If no buffer was given, we need to allocate temporal buffer
if (page_buf == NULL) {
in_buf = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE*3));
} else {
in_buf = page_buf;
}
/* Before actual decompress, make sure that page type is correct */
if (mach_read_from_4(buf+FIL_PAGE_SPACE_OR_CHKSUM) != BUF_NO_CHECKSUM_MAGIC ||
mach_read_from_2(buf+FIL_PAGE_TYPE) != FIL_PAGE_PAGE_COMPRESSED) {
ib_logf(IB_LOG_LEVEL_ERROR,
"Corruption: We try to uncompress corrupted page"
" CRC %lu type %lu len %lu.",
mach_read_from_4(buf+FIL_PAGE_SPACE_OR_CHKSUM),
mach_read_from_2(buf+FIL_PAGE_TYPE), len);
fflush(stderr);
ut_error;
}
/* Get compression algorithm */
compression_alg = mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
/* Get the actual size of compressed page */
actual_size = mach_read_from_2(buf+FIL_PAGE_DATA);
/* Check if payload size is corrupted */
if (actual_size == 0 || actual_size > UNIV_PAGE_SIZE) {
ib_logf(IB_LOG_LEVEL_ERROR,
"Corruption: We try to uncompress corrupted page"
" actual size %lu compression %s.",
actual_size, fil_get_compression_alg_name(compression_alg));
fflush(stderr);
ut_error;
}
/* Store actual payload size of the compressed data. This pointer
points to buffer pool. */
if (write_size) {
*write_size = actual_size;
}
#ifdef UNIV_PAGECOMPRESS_DEBUG
ib_logf(IB_LOG_LEVEL_INFO,
"Preparing for decompress for len %lu\n",
actual_size);
#endif /* UNIV_PAGECOMPRESS_DEBUG */
switch(compression_alg) {
case PAGE_ZLIB_ALGORITHM:
err= uncompress(in_buf, &len, buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, (unsigned long)actual_size);
/* If uncompress fails it means that page is corrupted */
if (err != Z_OK) {
ib_logf(IB_LOG_LEVEL_ERROR,
"Corruption: Page is marked as compressed"
" but uncompress failed with error %d "
" size %lu len %lu.",
err, actual_size, len);
fflush(stderr);
ut_error;
}
break;
#ifdef HAVE_LZ4
case PAGE_LZ4_ALGORITHM:
err = LZ4_decompress_fast((const char *)buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, (char *)in_buf, len);
if (err != (int)actual_size) {
ib_logf(IB_LOG_LEVEL_ERROR,
"Corruption: Page is marked as compressed"
" but decompression read only %d bytes "
" size %lu len %lu.",
err, actual_size, len);
fflush(stderr);
ut_error;
}
break;
#endif /* HAVE_LZ4 */
#ifdef HAVE_LZO
case PAGE_LZO_ALGORITHM:
{
ulint olen=0;
err = lzo1x_decompress((const unsigned char *)buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE,
actual_size,(unsigned char *)in_buf, &olen, NULL);
if (err != LZO_E_OK || (olen == 0 || olen > UNIV_PAGE_SIZE)) {
ib_logf(IB_LOG_LEVEL_ERROR,
"Corruption: Page is marked as compressed"
" but decompression read only %ld bytes"
" size %lu len %lu.",
olen, actual_size, len);
fflush(stderr);
ut_error;
}
break;
}
#endif /* HAVE_LZO */
#ifdef HAVE_LZMA
case PAGE_LZMA_ALGORITHM: {
lzma_ret ret;
size_t src_pos = 0;
size_t dst_pos = 0;
uint64_t memlimit = UINT64_MAX;
ret = lzma_stream_buffer_decode(
&memlimit,
0,
NULL,
buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE,
&src_pos,
actual_size,
in_buf,
&dst_pos,
len);
if (ret != LZMA_OK || (dst_pos == 0 || dst_pos > UNIV_PAGE_SIZE)) {
ib_logf(IB_LOG_LEVEL_ERROR,
"Corruption: Page is marked as compressed"
" but decompression read only %ld bytes"
" size %lu len %lu.",
dst_pos, actual_size, len);
fflush(stderr);
ut_error;
}
break;
}
#endif /* HAVE_LZMA */
#ifdef HAVE_BZIP2
case PAGE_BZIP2_ALGORITHM: {
unsigned int dst_pos = UNIV_PAGE_SIZE;
err = BZ2_bzBuffToBuffDecompress(
(char *)in_buf,
&dst_pos,
(char *)(buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE),
actual_size,
1,
0);
if (err != BZ_OK || (dst_pos == 0 || dst_pos > UNIV_PAGE_SIZE)) {
ib_logf(IB_LOG_LEVEL_ERROR,
"Corruption: Page is marked as compressed"
" but decompression read only %du bytes"
" size %lu len %lu err %d.",
dst_pos, actual_size, len, err);
fflush(stderr);
ut_error;
}
break;
}
#endif /* HAVE_BZIP2 */
#ifdef HAVE_SNAPPY
case PAGE_SNAPPY_ALGORITHM:
{
snappy_status cstatus;
ulint olen = 0;
cstatus = snappy_uncompress(
(const char *)(buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE),
actual_size,
(char *)in_buf,
&olen);
if (cstatus != SNAPPY_OK || (olen == 0 || olen > UNIV_PAGE_SIZE)) {
ib_logf(IB_LOG_LEVEL_ERROR,
"Corruption: Page is marked as compressed"
" but decompression read only %lu bytes"
" size %lu len %lu err %d.",
olen, actual_size, len, (int)cstatus);
fflush(stderr);
ut_error;
}
break;
}
#endif /* HAVE_SNAPPY */
default:
ib_logf(IB_LOG_LEVEL_ERROR,
"Corruption: Page is marked as compressed"
" but compression algorithm %s"
" is not known."
,fil_get_compression_alg_name(compression_alg));
fflush(stderr);
ut_error;
break;
}
srv_stats.pages_page_decompressed.inc();
/* Copy the uncompressed page to the buffer pool, not
really any other options. */
memcpy(buf, in_buf, len);
// Need to free temporal buffer if no buffer was given
if (page_buf == NULL) {
ut_free(in_buf);
}
}