2013-04-17 00:00:59 -04:00
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
2013-04-16 23:59:37 -04:00
# ident "Copyright (c) 2011 Tokutek Inc. All rights reserved."
# ident "$Id$"
2013-04-17 00:01:00 -04:00
# include <toku_portability.h>
2013-04-16 23:59:37 -04:00
# include <zlib.h>
2013-04-17 00:00:14 -04:00
# include <lzma.h>
2013-04-16 23:59:37 -04:00
# include "compress.h"
# include "memory.h"
# include "quicklz.h"
# include "toku_assert.h"
2013-04-17 00:00:14 -04:00
static inline enum toku_compression_method
normalize_compression_method ( enum toku_compression_method method )
// Effect: resolve "friendly" names like "fast" and "small" into their real values.
{
switch ( method ) {
2013-04-17 00:00:17 -04:00
case TOKU_DEFAULT_COMPRESSION_METHOD :
2013-04-17 00:00:14 -04:00
case TOKU_FAST_COMPRESSION_METHOD :
return TOKU_QUICKLZ_METHOD ;
case TOKU_SMALL_COMPRESSION_METHOD :
return TOKU_LZMA_METHOD ;
default :
return method ; // everything else is fine
}
}
2013-04-16 23:59:37 -04:00
size_t toku_compress_bound ( enum toku_compression_method a , size_t size )
// See compress.h for the specification of this function.
{
2013-04-17 00:00:14 -04:00
a = normalize_compression_method ( a ) ;
2013-04-16 23:59:37 -04:00
switch ( a ) {
2013-04-16 23:59:52 -04:00
case TOKU_NO_COMPRESSION :
return size + 1 ;
2013-04-17 00:00:14 -04:00
case TOKU_LZMA_METHOD :
return 1 + lzma_stream_buffer_bound ( size ) ; // We need one extra for the rfc1950-style header byte (bits -03 are TOKU_LZMA_METHOD (1), bits 4-7 are the compression level)
2013-04-16 23:59:37 -04:00
case TOKU_QUICKLZ_METHOD :
2013-04-16 23:59:52 -04:00
return size + 400 + 1 ; // quicklz manual says 400 bytes is enough. We need one more byte for the rfc1950-style header byte. bits 0-3 are 9, bits 4-7 are the QLZ_COMPRESSION_LEVEL.
2013-04-16 23:59:37 -04:00
case TOKU_ZLIB_METHOD :
2013-04-16 23:59:52 -04:00
return compressBound ( size ) ;
2013-04-17 00:00:14 -04:00
default :
break ;
2013-04-16 23:59:37 -04:00
}
// fall through for bad enum (thus compiler can warn us if we didn't use all the enums
2013-04-16 23:59:39 -04:00
assert ( 0 ) ; return 0 ;
2013-04-16 23:59:37 -04:00
}
static const int zlib_compression_level = 5 ;
void toku_compress ( enum toku_compression_method a ,
2013-04-16 23:59:52 -04:00
// the following types and naming conventions come from zlib.h
Bytef * dest , uLongf * destLen ,
const Bytef * source , uLong sourceLen )
2013-04-16 23:59:37 -04:00
// See compress.h for the specification of this function.
{
2013-04-17 00:00:14 -04:00
a = normalize_compression_method ( a ) ;
2013-04-16 23:59:37 -04:00
assert ( sourceLen < ( 1LL < < 32 ) ) ;
switch ( a ) {
2013-04-16 23:59:52 -04:00
case TOKU_NO_COMPRESSION :
dest [ 0 ] = TOKU_NO_COMPRESSION ;
memcpy ( dest + 1 , source , sourceLen ) ;
* destLen = sourceLen + 1 ;
return ;
2013-04-16 23:59:37 -04:00
case TOKU_ZLIB_METHOD : {
2013-04-16 23:59:52 -04:00
int r = compress2 ( dest , destLen , source , sourceLen , zlib_compression_level ) ;
assert ( r = = Z_OK ) ;
assert ( ( dest [ 0 ] & 0xF ) = = TOKU_ZLIB_METHOD ) ;
return ;
2013-04-16 23:59:37 -04:00
}
case TOKU_QUICKLZ_METHOD : {
2013-04-16 23:59:52 -04:00
if ( sourceLen = = 0 ) {
// quicklz requires at least one byte, so we handle this ourselves
assert ( 1 < = * destLen ) ;
* destLen = 1 ;
} else {
2013-04-17 00:01:03 -04:00
qlz_state_compress * XCALLOC ( qsc ) ;
2013-04-16 23:59:52 -04:00
size_t actual_destlen = qlz_compress ( source , ( char * ) ( dest + 1 ) , sourceLen , qsc ) ;
assert ( actual_destlen + 1 < = * destLen ) ;
* destLen = actual_destlen + 1 ; // add one for the rfc1950-style header byte.
toku_free ( qsc ) ;
}
// Fill in that first byte
dest [ 0 ] = TOKU_QUICKLZ_METHOD + ( QLZ_COMPRESSION_LEVEL < < 4 ) ;
return ;
2013-04-17 00:00:14 -04:00
}
case TOKU_LZMA_METHOD : {
const int lzma_compression_level = 2 ;
if ( sourceLen = = 0 ) {
// lzma version 4.999 requires at least one byte, so we'll do it ourselves.
assert ( 1 < = * destLen ) ;
* destLen = 1 ;
} else {
size_t out_pos = 1 ;
lzma_ret r = lzma_easy_buffer_encode ( lzma_compression_level , LZMA_CHECK_CRC32 , NULL ,
source , sourceLen ,
dest , & out_pos , * destLen ) ;
assert ( out_pos < * destLen ) ;
2013-04-17 00:00:55 -04:00
if ( r ! = LZMA_OK ) {
fprintf ( stderr , " lzma_easy_buffer_encode() returned %d \n " , ( int ) r ) ;
}
2013-04-17 00:00:14 -04:00
assert ( r = = LZMA_OK ) ;
* destLen = out_pos ;
}
dest [ 0 ] = TOKU_LZMA_METHOD + ( lzma_compression_level < < 4 ) ;
return ;
}
default :
break ;
}
2013-04-16 23:59:37 -04:00
// default fall through to error.
assert ( 0 ) ;
}
void toku_decompress ( Bytef * dest , uLongf destLen ,
2013-04-16 23:59:52 -04:00
const Bytef * source , uLongf sourceLen )
2013-04-16 23:59:37 -04:00
// See compress.h for the specification of this function.
{
assert ( sourceLen > = 1 ) ; // need at least one byte for the RFC header.
switch ( source [ 0 ] & 0xF ) {
2013-04-16 23:59:52 -04:00
case TOKU_NO_COMPRESSION :
memcpy ( dest , source + 1 , sourceLen - 1 ) ;
return ;
2013-04-16 23:59:37 -04:00
case TOKU_ZLIB_METHOD : {
2013-04-16 23:59:52 -04:00
uLongf actual_destlen = destLen ;
int r = uncompress ( dest , & actual_destlen , source , sourceLen ) ;
assert ( r = = Z_OK ) ;
assert ( actual_destlen = = destLen ) ;
return ;
2013-04-16 23:59:37 -04:00
}
case TOKU_QUICKLZ_METHOD :
2013-04-16 23:59:52 -04:00
if ( sourceLen > 1 ) {
2013-04-17 00:01:03 -04:00
qlz_state_decompress * XCALLOC ( qsd ) ;
2013-04-16 23:59:52 -04:00
uLongf actual_destlen = qlz_decompress ( ( char * ) source + 1 , dest , qsd ) ;
assert ( actual_destlen = = destLen ) ;
toku_free ( qsd ) ;
} else {
// length 1 means there is no data, so do nothing.
assert ( destLen = = 0 ) ;
}
return ;
2013-04-17 00:00:14 -04:00
case TOKU_LZMA_METHOD : {
if ( sourceLen > 1 ) {
uint64_t memlimit = UINT64_MAX ;
size_t out_pos = 0 ;
size_t in_pos = 1 ;
lzma_ret r = lzma_stream_buffer_decode ( & memlimit , // memlimit, use UINT64_MAX to disable this check
0 , // flags
NULL , // allocator
source , & in_pos , sourceLen ,
dest , & out_pos , destLen ) ;
assert ( r = = LZMA_OK ) ;
assert ( out_pos = = destLen ) ;
} else {
// length 1 means there is no data, so do nothing.
assert ( destLen = = 0 ) ;
}
return ;
}
2013-04-16 23:59:37 -04:00
}
// default fall through to error.
assert ( 0 ) ;
}