branches/zip: Implement a more compact page format.

Add a hook to buf0flu.c for testing compression and decompression.
TODO: adapt page_zip_write() calls.
This commit is contained in:
marko 2005-11-24 14:13:10 +00:00
parent 554299f38e
commit b348fcafa5
9 changed files with 692 additions and 181 deletions

View file

@ -16,6 +16,9 @@ Created 11/11/1995 Heikki Tuuri
#include "ut0byte.h"
#include "ut0lst.h"
#include "page0page.h"
#if 1 /* testing */
# include "page0zip.h"
#endif
#include "fil0fil.h"
#include "buf0buf.h"
#include "buf0lru.h"
@ -451,6 +454,10 @@ buf_flush_init_for_writing(
ulint space, /* in: space id */
ulint page_no) /* in: page number */
{
#if 1 /* testing */
byte zip[16384];
page_zip_des_t page_zip = { zip, sizeof zip, 0, 0 };
#endif /* testing */
/* Write the newest modification lsn to the page header and trailer */
mach_write_to_8(page + FIL_PAGE_LSN, newest_lsn);
@ -475,6 +482,11 @@ buf_flush_init_for_writing(
mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
srv_use_checksums ?
buf_calc_page_old_checksum(page) : BUF_NO_CHECKSUM_MAGIC);
#if 1 /* testing */
if (page_is_comp(page)) {
ut_a(page_zip_compress(&page_zip, page));
}
#endif /* testing */
}
/************************************************************************

View file

@ -280,14 +280,6 @@ page_rec_get_n_recs_before(
/* out: number of records */
rec_t* rec); /* in: the physical record */
/*****************************************************************
Gets the size of the page trailer. */
UNIV_INLINE
ulint
page_trailer_get_len(
/*=================*/
/* out: length of page trailer, in bytes */
const page_t* page); /* in: index page */
/*****************************************************************
Gets the number of records in the heap. */
UNIV_INLINE
ulint

View file

@ -438,7 +438,7 @@ page_dir_set_n_slots(
/* Ensure that the modification log will not be overwritten. */
ulint n_slots_old = page_dir_get_n_slots(page);
if (n_slots > n_slots_old) {
ut_ad(page_zip_available(page_zip,
ut_ad(page_zip_available_noninline(page_zip,
(n_slots - n_slots_old)
* PAGE_DIR_SLOT_SIZE));
}
@ -447,19 +447,6 @@ page_dir_set_n_slots(
page_header_set_field(page, page_zip, PAGE_N_DIR_SLOTS, n_slots);
}
/*****************************************************************
Gets the size of the page trailer. */
UNIV_INLINE
ulint
page_trailer_get_len(
/*=================*/
/* out: length of page trailer, in bytes */
const page_t* page) /* in: index page */
{
return(PAGE_DIR + PAGE_DIR_SLOT_SIZE
* page_dir_get_n_slots((page_t*) page));
}
/*****************************************************************
Gets the number of records in the heap. */
UNIV_INLINE
@ -552,10 +539,11 @@ page_dir_slot_set_rec(
ut_ad(page_rec_check(rec));
mach_write_to_2(slot, ut_align_offset(rec, UNIV_PAGE_SIZE));
#if 0 /* TODO */
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write_trailer(page_zip, slot, 2);
}
#endif
}
/*******************************************************************
@ -904,22 +892,20 @@ page_mem_free(
page_rec_set_next(rec, free, page_zip);
page_header_set_ptr(page, page_zip, PAGE_FREE, rec);
#if 0 /* It's better not to destroy the user's data. */
if (1||/* TODO: remove testing */UNIV_LIKELY_NULL(page_zip)) {
ut_ad(rec_offs_comp(offsets));
/* Clear the data bytes of the deleted record in order to improve
the compression ratio of the page and to make it easier to read
page dumps in corruption reports. The extra bytes of the record
cannot be cleared, because page_mem_alloc() needs them in order
to determine the size of the deleted record. */
memset(rec, 0, rec_offs_data_size(offsets));
/* If you enable this code, make sure that the callers of
page_mem_free() account for the increased usage of space. */
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write(page_zip, page, rec, rec - page,
rec_offs_data_size(offsets));
/* The compression algorithm expects info_bits and n_owned
to be 0 for deleted records. */
rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
/* Clear the data bytes of the deleted record in order
to improve the compression ratio of the page. The extra
bytes of the record cannot be cleared, because
page_mem_alloc() needs them in order to determine the size
of the deleted record. */
memset(rec, 0, rec_offs_data_size(offsets));
}
#endif
garbage = page_header_get_field(page, PAGE_GARBAGE);

View file

@ -59,18 +59,6 @@ page_zip_write_header(
__attribute__((nonnull));
/**************************************************************************
Write data to the uncompressed trailer portion of a page. The data must
already have been written to the uncompressed page. */
UNIV_INLINE
void
page_zip_write_trailer(
/*===================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* str, /* in: address on the uncompressed page */
ulint length) /* in: length of the data */
__attribute__((nonnull));
#ifdef UNIV_DEBUG
/**************************************************************************
Determine if enough space is available in the modification log. */

View file

@ -34,7 +34,7 @@ page_zip_compress(
/*==============*/
/* out: TRUE on success, FALSE on failure;
page_zip will be left intact on failure. */
page_zip_des_t* page_zip,/* out: compressed page */
page_zip_des_t* page_zip,/* in: size; out: compressed page */
const page_t* page); /* in: uncompressed page */
/**************************************************************************
@ -71,6 +71,45 @@ page_zip_validate(
const page_t* page); /* in: uncompressed page */
#endif /* UNIV_DEBUG */
/*****************************************************************
Gets the size of the compressed page trailer (the dense page directory). */
UNIV_INLINE
ulint
page_zip_dir_size(
/*==============*/
/* out: length of dense page
directory, in bytes */
const page_zip_des_t* page_zip) /* in: compressed page */
__attribute__((pure));
/*****************************************************************
Read a given slot in the dense page directory. */
UNIV_INLINE
ulint
page_zip_dir_get(
/*==============*/
/* out: record offset
on the uncompressed page,
possibly ORed with
PAGE_ZIP_DIR_SLOT_DEL or
PAGE_ZIP_DIR_SLOT_OWNED */
const page_zip_des_t* page_zip, /* in: compressed page */
ulint slot) /* in: slot
(0=first user record) */
__attribute__((pure));
/*****************************************************************
Write a given slot in the dense page directory. */
UNIV_INLINE
void
page_zip_dir_set(
/*==============*/
const page_zip_des_t* page_zip, /* in: compressed page */
ulint slot, /* in: slot
(0=first user record) */
ulint offs) /* in: offset, possibly
ORed with
PAGE_ZIP_DIR_SLOT_DEL or
PAGE_ZIP_DIR_SLOT_OWNED */
__attribute__((pure));
/**************************************************************************
Determine the encoded length of an integer in the modification log. */
UNIV_INLINE
@ -144,7 +183,7 @@ page_zip_write(
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* str, /* in: address on the uncompressed page */
ulint length) /* in: length of the data */
__attribute__((nonnull));
__attribute__((nonnull, deprecated));
/**************************************************************************
Write data to the uncompressed header portion of a page. The data must
@ -158,19 +197,6 @@ page_zip_write_header(
ulint length) /* in: length of the data */
__attribute__((nonnull));
/**************************************************************************
Write data to the uncompressed trailer portion of a page. The data must
already have been written to the uncompressed page. */
UNIV_INLINE
void
page_zip_write_trailer(
/*===================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* str, /* in: address on the uncompressed page */
ulint length) /* in: length of the data */
__attribute__((nonnull));
#ifdef UNIV_MATERIALIZE
# undef UNIV_INLINE
# define UNIV_INLINE UNIV_INLINE_ORIGINAL

View file

@ -16,20 +16,32 @@ Created June 2005 by Marko Makela
/* The format of compressed pages is as follows.
The header and trailer of the uncompressed pages, including the page
The header and trailer of the uncompressed pages, excluding the page
directory in the trailer, are copied as is to the header and trailer
of the compressed page. Immediately preceding the page trailer,
we store a 32-bit checksum of the compressed data.
of the compressed page.
The data between PAGE_DATA and the last page directory entry
will be written in compressed format, starting at offset PAGE_DATA.
At the end of the compressed page, there is a dense page directory
pointing to every user record contained on the page, including deleted
records on the free list. The dense directory is indexed by the
record heap number. The infimum and supremum records are excluded.
The two most significant bits of the entries are allocated for the
delete-mark and an n_owned flag indicating the last record in a chain
of records pointed to from the sparse page directory on the
uncompressed page.
The data between PAGE_ZIP_START and the last page directory entry will
be written in compressed format, starting at offset PAGE_DATA.
Infimum and supremum records are not stored. We exclude the
REC_N_NEW_EXTRA_BYTES in every record header. These can be recovered
from the dense page directory stored at the end of the compressed
page.
The compressed data stream may be followed by a modification log
covering the compressed portion of the page, as follows.
MODIFICATION LOG ENTRY FORMAT
- length (1..2 bytes), not zero
- offset - PAGE_DATA (1..2 bytes)
- offset - PAGE_ZIP_START (1..2 bytes)
- data bytes
The length and the offset are stored in a variable-length format:
@ -39,6 +51,17 @@ The length and the offset are stored in a variable-length format:
The end of the modification log is marked by length=0. */
/* Start offset of the area that will be compressed */
#define PAGE_ZIP_START PAGE_NEW_SUPREMUM_END
/* Size of an compressed page directory entry */
#define PAGE_ZIP_DIR_SLOT_SIZE 2
/* Mask of record offsets */
#define PAGE_ZIP_DIR_SLOT_MASK 0x3fff
/* 'owned' flag */
#define PAGE_ZIP_DIR_SLOT_OWNED 0x4000
/* 'deleted' flag */
#define PAGE_ZIP_DIR_SLOT_DEL 0x8000
/**************************************************************************
Initialize a compressed page descriptor. */
UNIV_INLINE
@ -66,7 +89,7 @@ page_zip_ulint_size(
if (num < 16384) { /* 10xxxxxx xxxxxxxx: 0..16383 */
return(2);
}
ut_error;
ut_ad(0);
return(0);
}
@ -80,9 +103,10 @@ page_zip_entry_size(
ulint pos, /* in: offset of the uncompressed page */
ulint length) /* in: length of the data */
{
ut_ad(pos >= PAGE_DATA);
ut_ad(pos + length <= UNIV_PAGE_SIZE - PAGE_DATA /* - trailer_len */);
return(page_zip_ulint_size(pos - PAGE_DATA)
ut_ad(pos >= PAGE_ZIP_START);
ut_ad(pos + length <= UNIV_PAGE_SIZE - PAGE_ZIP_START
/* - trailer_len */);
return(page_zip_ulint_size(pos - PAGE_ZIP_START)
+ page_zip_ulint_size(length)
+ length);
}
@ -99,15 +123,73 @@ page_zip_simple_validate(
{
ut_ad(page_zip);
ut_ad(page_zip->data);
ut_ad(!(page_zip->size & (page_zip->size - 1)));
ut_ad(page_zip->size < UNIV_PAGE_SIZE);
ut_ad(page_zip->size > PAGE_DATA + PAGE_EMPTY_DIR_START);
ut_ad(!(page_zip->size & (page_zip->size - 1))); /* power of 2 */
ut_ad(page_zip->size <= UNIV_PAGE_SIZE);
ut_ad(page_zip->size > PAGE_DATA + PAGE_ZIP_DIR_SLOT_SIZE);
ut_ad(page_zip->m_start >= PAGE_DATA);
ut_ad(page_zip->m_start <= page_zip->m_end);
ut_ad(page_zip->m_end < page_zip->size);
return(TRUE);
}
#endif /* UNIV_DEBUG */
/*****************************************************************
Gets the size of the compressed page trailer (the dense page directory). */
UNIV_INLINE
ulint
page_zip_dir_size(
/*==============*/
/* out: length of dense page
directory, in bytes */
const page_zip_des_t* page_zip) /* in: compressed page */
{
ulint size = PAGE_ZIP_DIR_SLOT_SIZE
* (page_dir_get_n_heap((page_t*) page_zip->data) - 2);
ut_ad(page_zip->m_end + size < page_zip->size);
return(size);
}
/*****************************************************************
Read a given slot in the dense page directory. */
UNIV_INLINE
ulint
page_zip_dir_get(
/*==============*/
/* out: record offset
on the uncompressed page,
possibly ORed with
PAGE_ZIP_DIR_SLOT_DEL or
PAGE_ZIP_DIR_SLOT_OWNED */
const page_zip_des_t* page_zip, /* in: compressed page */
ulint slot) /* in: slot
(0=first user record) */
{
ut_ad(page_zip_simple_validate(page_zip));
ut_ad(slot + 2 < page_dir_get_n_heap((page_t*) page_zip->data));
return(mach_read_from_2(page_zip->data + page_zip->size
- PAGE_ZIP_DIR_SLOT_SIZE * slot));
}
/*****************************************************************
Write a given slot in the dense page directory. */
UNIV_INLINE
void
page_zip_dir_set(
/*==============*/
const page_zip_des_t* page_zip, /* in: compressed page */
ulint slot, /* in: slot
(0=first user record) */
ulint offs) /* in: offset, possibly
ORed with
PAGE_ZIP_DIR_SLOT_DEL or
PAGE_ZIP_DIR_SLOT_OWNED */
{
ut_ad(page_zip_simple_validate(page_zip));
ut_ad(slot + 2 < page_dir_get_n_heap((page_t*) page_zip->data));
mach_write_to_2(page_zip->data + page_zip->size
- PAGE_ZIP_DIR_SLOT_SIZE * slot,
offs);
}
/**************************************************************************
Ensure that enough space is available in the modification log.
If not, try to compress the page. */
@ -122,14 +204,13 @@ page_zip_alloc(
const page_t* page, /* in: uncompressed page */
ulint size) /* in: size of modification log entries */
{
ulint trailer_len = page_trailer_get_len(page_zip->data);
ulint trailer_len = page_zip_dir_size(page_zip);
ut_ad(page_zip_simple_validate(page_zip));
ut_ad(page_zip->m_end + trailer_len < page_zip->size);
ut_ad(size >= 3); /* modification log entries are >= 1+1+1 bytes */
ut_ad(size < page_zip->size);
if (size < page_zip->size - page_zip->m_end - trailer_len) {
if (size + page_zip->m_end + trailer_len < page_zip->size) {
return(TRUE);
}
@ -154,14 +235,13 @@ page_zip_available(
ulint size) /* in: requested size of
modification log entries */
{
ulint trailer_len = page_trailer_get_len(page_zip->data);
ulint trailer_len = page_zip_dir_size(page_zip);
ut_ad(page_zip_simple_validate(page_zip));
ut_ad(page_zip->m_end + trailer_len < page_zip->size);
ut_ad(size < page_zip->size);
return(UNIV_LIKELY(
size < page_zip->size - page_zip->m_end - trailer_len));
size + page_zip->m_end + trailer_len < page_zip->size));
}
/**************************************************************************
@ -189,35 +269,6 @@ page_zip_write_header(
ut_ad(page_zip_validate(page_zip, str - pos));
}
/**************************************************************************
Write data to the uncompressed trailer portion of a page. The data must
already have been written to the uncompressed page. */
UNIV_INLINE
void
page_zip_write_trailer(
/*===================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* str, /* in: address on the uncompressed page */
ulint length) /* in: length of the data */
{
ulint pos;
ut_ad(buf_block_get_page_zip(buf_block_align((byte*)str)) == page_zip);
ut_ad(page_zip_simple_validate(page_zip));
pos = ut_align_offset(str, UNIV_PAGE_SIZE);
ut_ad(pos > PAGE_DATA);
ut_ad(pos < UNIV_PAGE_SIZE
- page_trailer_get_len(buf_frame_align((byte*) str)));
memcpy(page_zip->data + page_zip->size - (UNIV_PAGE_SIZE - pos),
str, length);
ut_ad(page_zip_validate(page_zip, str - pos));
}
#ifdef UNIV_MATERIALIZE
# undef UNIV_INLINE
# define UNIV_INLINE UNIV_INLINE_ORIGINAL

View file

@ -21,6 +21,9 @@ Created 5/30/1994 Heikki Tuuri
/* Flag denoting the predefined minimum record: this bit is ORed in the 4
info bits of a record */
#define REC_INFO_MIN_REC_FLAG 0x10UL
/* The deleted flag in info bits */
#define REC_INFO_DELETED_FLAG 0x20UL /* when bit is set to 1, it means the
record has been delete marked */
/* Number of extra bytes in an old-style record,
in addition to the data and the offsets */
@ -35,6 +38,16 @@ in addition to the data and the offsets */
#define REC_STATUS_INFIMUM 2
#define REC_STATUS_SUPREMUM 3
/* The following two constants are needed in page0zip.c in order to
efficiently access heap_no and status when compressing and
decompressing pages. */
/* The offset of heap_no in a compact record */
#define REC_NEW_HEAP_NO 4
/* The shift of heap_no in a compact record.
The status is stored in the low-order bits. */
#define REC_HEAP_NO_SHIFT 3
/* Number of elements that should be initially allocated for the
offsets[] array, first passed to rec_get_offsets() */
#define REC_OFFS_NORMAL_SIZE 100

View file

@ -84,9 +84,11 @@ and the shift needed to obtain each bit-field of the record. */
#define REC_NEW_STATUS_SHIFT 0
#define REC_OLD_HEAP_NO 5
#define REC_NEW_HEAP_NO 4
#define REC_HEAP_NO_MASK 0xFFF8UL
#if 0 /* defined in rem0rec.h for use of page0zip.c */
#define REC_NEW_HEAP_NO 4
#define REC_HEAP_NO_SHIFT 3
#endif
#define REC_OLD_N_OWNED 6 /* This is single byte bit-field */
#define REC_NEW_N_OWNED 5 /* This is single byte bit-field */
@ -98,9 +100,6 @@ and the shift needed to obtain each bit-field of the record. */
#define REC_INFO_BITS_MASK 0xF0UL
#define REC_INFO_BITS_SHIFT 0
/* The deleted flag in info bits */
#define REC_INFO_DELETED_FLAG 0x20UL /* when bit is set to 1, it means the
record has been delete marked */
/* The following masks are used to filter the SQL null bit from
one-byte and two-byte offsets */

View file

@ -14,8 +14,132 @@ Created June 2005 by Marko Makela
#undef THIS_MODULE
#include "page0page.h"
#include "mtr0log.h"
#include "ut0sort.h"
#include "zlib.h"
/* The infimum and supremum records are omitted from the compressed page.
On compress, we compare that the records are there, and on uncompress we
restore the records. */
static const byte infimum_extra[] = {
0x01, /* info_bits=0, n_owned=1 */
0x00, 0x02 /* heap_no=0, status=2 */
/* ?, ? */ /* next=(first user rec, or supremum) */
};
static const byte infimum_data[] = {
0x69, 0x6e, 0x66, 0x69,
0x6d, 0x75, 0x6d, 0x00 /* "infimum\0" */
};
static const byte supremum_extra_data[] = {
/* 0x0?, */ /* info_bits=0, n_owned=1..8 */
0x00, 0x0b, /* heap_no=1, status=3 */
0x00, 0x00, /* next=0 */
0x73, 0x75, 0x70, 0x72,
0x65, 0x6d, 0x75, 0x6d /* "supremum" */
};
/**************************************************************************
Populate the dense page directory from the sparse directory. */
static
void
page_zip_dir_encode(
/*================*/
const page_t* page, /* in: compact page */
page_zip_des_t* page_zip,/* out: dense directory on compressed page */
const rec_t** recs) /* in: array of 0, out: dense page directory
sorted by ascending address (and heap_no) */
{
byte* rec;
ulint status;
ulint min_mark;
ulint heap_no;
ulint i;
ulint n_heap;
ulint offs;
min_mark = 0;
if (mach_read_from_2((page_t*) page + (PAGE_HEADER + PAGE_LEVEL))) {
status = REC_STATUS_NODE_PTR;
if (UNIV_UNLIKELY(mach_read_from_4((page_t*) page
+ FIL_PAGE_PREV) == FIL_NULL)) {
min_mark = REC_INFO_MIN_REC_FLAG;
}
} else {
status = REC_STATUS_ORDINARY;
}
n_heap = page_dir_get_n_heap((page_t*) page);
/* Traverse the list of stored records in the collation order,
starting from the first user record. */
rec = (page_t*) page + PAGE_NEW_INFIMUM, TRUE;
i = 0;
for (;;) {
ulint info_bits;
offs = rec_get_next_offs(rec, TRUE);
if (UNIV_UNLIKELY(offs == PAGE_NEW_SUPREMUM)) {
break;
}
rec = (page_t*) page + offs;
heap_no = rec_get_heap_no_new(rec);
ut_a(heap_no > 0);
ut_a(heap_no < n_heap);
ut_a(!(offs & ~PAGE_ZIP_DIR_SLOT_MASK));
ut_a(offs);
if (UNIV_UNLIKELY(rec_get_n_owned_new(rec))) {
offs |= PAGE_ZIP_DIR_SLOT_OWNED;
}
info_bits = rec_get_info_bits(rec, TRUE);
if (UNIV_UNLIKELY(info_bits & REC_INFO_DELETED_FLAG)) {
info_bits &= ~REC_INFO_DELETED_FLAG;
offs |= PAGE_ZIP_DIR_SLOT_DEL;
}
ut_a(info_bits == min_mark);
/* Only the smallest user record can have
REC_INFO_MIN_REC_FLAG set. */
min_mark = 0;
page_zip_dir_set(page_zip, i++, offs);
/* Ensure that each heap_no occurs at most once. */
ut_a(!recs[heap_no - 2]);
recs[heap_no - 2] = rec;
ut_a(rec_get_status(rec) == status);
}
offs = page_header_get_field((page_t*) page, PAGE_FREE);
/* Traverse the free list (of deleted records). */
while (offs) {
ut_ad(!(offs & ~PAGE_ZIP_DIR_SLOT_MASK));
rec = (page_t*) page + offs;
heap_no = rec_get_heap_no_new(rec);
ut_a(heap_no >= 2);
ut_a(heap_no < n_heap);
ut_a(!rec[-REC_N_NEW_EXTRA_BYTES]); /* info_bits and n_owned */
ut_a(rec_get_status(rec) == status);
page_zip_dir_set(page_zip, i++, offs);
/* Ensure that each heap_no occurs at most once. */
ut_a(!recs[heap_no - 2]);
recs[heap_no - 2] = rec;
offs = rec_get_next_offs(rec, TRUE);
}
/* Ensure that each heap no occurs at least once. */
ut_a(i + 2 == n_heap);
}
/**************************************************************************
Compress a page. */
@ -24,28 +148,58 @@ page_zip_compress(
/*==============*/
/* out: TRUE on success, FALSE on failure;
page_zip will be left intact on failure. */
page_zip_des_t* page_zip,/* out: compressed page */
page_zip_des_t* page_zip,/* in: size; out: compressed page */
const page_t* page) /* in: uncompressed page */
{
z_stream c_stream;
int err;
byte* buf;
ulint trailer_len;
ulint n_heap;
const byte* src;
const byte** recs; /* dense page directory, sorted by address */
mem_heap_t* heap;
ut_ad(page_zip_simple_validate(page_zip));
#ifdef UNIV_DEBUG
if (page_is_comp((page_t*) page)) {
ut_ad(page_simple_validate_new((page_t*) page));
} else {
ut_ad(page_simple_validate_old((page_t*) page));
}
#endif /* UNIV_DEBUG */
ut_a(page_is_comp((page_t*) page));
ut_ad(page_simple_validate_new((page_t*) page));
ut_ad(page_zip);
ut_ad(page_zip->data);
ut_ad(!(page_zip->size & (page_zip->size - 1))); /* power of 2 */
ut_ad(page_zip->size <= UNIV_PAGE_SIZE);
ut_ad(page_zip->size > PAGE_DATA + PAGE_ZIP_DIR_SLOT_SIZE);
buf = mem_alloc(page_zip->size - PAGE_DATA);
/* Check the data that will be omitted. */
ut_a(!memcmp(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES),
infimum_extra, sizeof infimum_extra));
ut_a(!memcmp(page + PAGE_NEW_INFIMUM,
infimum_data, sizeof infimum_data));
ut_a(page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES]
/* info_bits == 0, n_owned <= max */
<= PAGE_DIR_SLOT_MAX_N_OWNED);
ut_a(!memcmp(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1),
supremum_extra_data, sizeof supremum_extra_data));
if (UNIV_UNLIKELY(!page_get_n_recs((page_t*) page))) {
ut_a(rec_get_next_offs((page_t*) page + PAGE_NEW_INFIMUM, TRUE)
== PAGE_NEW_SUPREMUM);
}/* else {
ut_a(rec_get_next_offs((page_t*) page + PAGE_NEW_INFIMUM, TRUE)
== (page_zip_dir_get(page_zip, 0)
& PAGE_ZIP_DIR_SLOT_MASK));
}*/
/* Determine the length of the page trailer. */
trailer_len = page_trailer_get_len(page);
ut_ad(trailer_len < UNIV_PAGE_SIZE - PAGE_DATA);
n_heap = page_dir_get_n_heap((page_t*) page) - 2;
ut_a(n_heap * PAGE_ZIP_DIR_SLOT_SIZE < page_zip->size);
heap = mem_heap_create(page_zip->size
+ n_heap * ((sizeof *recs) - PAGE_ZIP_DIR_SLOT_SIZE));
recs = mem_heap_alloc(heap, n_heap * sizeof *recs);
memset(recs, 0, n_heap * sizeof *recs);
buf = mem_heap_alloc(heap, page_zip->size
- PAGE_DATA - PAGE_ZIP_DIR_SLOT_SIZE * n_heap);
page_zip_dir_encode(page, page_zip, recs);
/* Compress the data payload. */
c_stream.zalloc = (alloc_func) 0;
@ -56,28 +210,52 @@ page_zip_compress(
ut_a(err == Z_OK);
c_stream.next_out = buf;
c_stream.next_in = (void*) (page + PAGE_DATA);
c_stream.avail_out = page_zip->size - (PAGE_DATA - 1) - trailer_len;
c_stream.avail_in = page_header_get_field((page_t*) page,
PAGE_HEAP_TOP) - PAGE_DATA;
c_stream.avail_out = page_zip->size - (PAGE_DATA - 1)
- n_heap * PAGE_ZIP_DIR_SLOT_SIZE;
if (UNIV_LIKELY(n_heap > 0)
&& *recs == page + (PAGE_ZIP_START + REC_N_NEW_EXTRA_BYTES)) {
src = page + (PAGE_ZIP_START + REC_N_NEW_EXTRA_BYTES);
recs++;
} else {
src = page + PAGE_ZIP_START;
}
while (n_heap--) {
c_stream.next_in = (void*) src;
c_stream.avail_in = *recs - src - REC_N_NEW_EXTRA_BYTES;
err = deflate(&c_stream, Z_NO_FLUSH);
switch (err) {
case Z_OK:
case Z_STREAM_END:
break;
default:
goto zlib_error;
}
src = *recs++;
}
/* Compress the last record. */
c_stream.next_in = (void*) src;
c_stream.avail_in =
page_header_get_field((page_t*) page, PAGE_HEAP_TOP)
- ut_align_offset(src, UNIV_PAGE_SIZE);
ut_a(c_stream.avail_in < UNIV_PAGE_SIZE);
err = deflate(&c_stream, Z_FINISH);
if (err != Z_STREAM_END) {
zlib_error:
deflateEnd(&c_stream);
mem_free(buf);
mem_heap_free(heap);
return(FALSE);
}
err = deflateEnd(&c_stream);
ut_a(err == Z_OK);
ut_ad(c_stream.avail_in == page_header_get_field((page_t*) page,
PAGE_HEAP_TOP) - PAGE_DATA);
ut_ad(c_stream.avail_out == page_zip->size - (PAGE_DATA - 1)
- trailer_len);
ut_a(c_stream.total_in == (uLong) c_stream.avail_in);
ut_a(c_stream.total_out <= (uLong) c_stream.avail_out);
page_zip->m_end = page_zip->m_start = PAGE_DATA + c_stream.total_out;
/* Copy the page header */
memcpy(page_zip->data, page, PAGE_DATA);
@ -85,11 +263,8 @@ page_zip_compress(
memcpy(page_zip->data + PAGE_DATA, buf, c_stream.total_out);
/* Zero out the area reserved for the modification log */
memset(page_zip->data + PAGE_DATA + c_stream.total_out, 0,
page_zip->size - PAGE_DATA - trailer_len - c_stream.total_out);
/* Copy the page trailer */
memcpy(page_zip->data + page_zip->size - trailer_len,
page + UNIV_PAGE_SIZE - trailer_len, trailer_len);
mem_free(buf);
c_stream.avail_out + 1);
mem_heap_free(heap);
ut_ad(page_zip_validate(page_zip, page));
return(TRUE);
}
@ -142,6 +317,185 @@ page_zip_ulint_write(
return(0); /* 11xxxxxxx xxxxxxxx: reserved */
}
/**************************************************************************
Compare two page directory entries. */
UNIV_INLINE
ibool
page_zip_dir_cmp(
/*=============*/
/* out: positive if rec1 > rec2 */
const rec_t* rec1, /* in: rec1 */
const rec_t* rec2) /* in: rec2 */
{
return(rec1 > rec2);
}
/**************************************************************************
Sort the dense page directory by address (heap_no). */
static
void
page_zip_dir_sort(
/*==============*/
rec_t** arr, /* in/out: dense page directory */
rec_t** aux_arr,/* in/out: work area */
ulint low, /* in: lower bound of the sorting area */
ulint high) /* in: upper bound of the sorting area */
{
UT_SORT_FUNCTION_BODY(page_zip_dir_sort, arr, aux_arr, low, high,
page_zip_dir_cmp);
}
/**************************************************************************
Populate the sparse page directory from the dense directory. */
static
ibool
page_zip_dir_decode(
/*================*/
/* out: TRUE on success,
FALSE on failure */
const page_zip_des_t* page_zip,/* in: dense page directory on
compressed page */
page_t* page, /* in: compact page with valid header;
out: trailer and sparse page directory
filled in */
rec_t** recs, /* out: dense page directory sorted by
ascending address (and heap_no) */
rec_t** recs_aux,/* in/out: scratch area */
ulint n_heap) /* in: number of user records, and
size of recs[] and recs_aux[] */
{
ulint i;
ulint n_recs;
byte* slot;
n_recs = page_get_n_recs(page);
/* Traverse the list of stored records in the sorting order,
starting from the first user record. */
slot = page + (UNIV_PAGE_SIZE - PAGE_DIR - PAGE_DIR_SLOT_SIZE);
UNIV_PREFETCH_RW(slot);
/* Zero out the page trailer. */
memset(slot + PAGE_DIR_SLOT_SIZE, 0, PAGE_DIR);
mach_write_to_2(slot, PAGE_NEW_INFIMUM);
slot -= PAGE_DIR_SLOT_SIZE;
UNIV_PREFETCH_RW(slot);
/* Initialize the sparse directory and copy the dense directory. */
for (i = 0; i < n_recs; i++) {
ulint offs = page_zip_dir_get(page_zip, i);
if (offs & PAGE_ZIP_DIR_SLOT_OWNED) {
mach_write_to_2(slot, offs & PAGE_ZIP_DIR_SLOT_MASK);
slot -= PAGE_DIR_SLOT_SIZE;
UNIV_PREFETCH_RW(slot);
}
recs[i] = page + (offs & PAGE_ZIP_DIR_SLOT_MASK);
}
mach_write_to_2(slot, PAGE_NEW_SUPREMUM);
if (UNIV_UNLIKELY(slot != page_dir_get_nth_slot(page,
page_dir_get_n_slots(page) - 1))) {
return(FALSE);
}
/* Copy the rest of the dense directory. */
for (i = 0; i < n_heap; i++) {
ulint offs = page_zip_dir_get(page_zip, i);
if (UNIV_UNLIKELY(offs & ~PAGE_ZIP_DIR_SLOT_MASK)) {
return(FALSE);
}
recs[i] = page + offs;
}
if (UNIV_LIKELY(n_heap > 1)) {
page_zip_dir_sort(recs, recs_aux, 0, n_heap - 1);
}
return(TRUE);
}
static
ibool
page_zip_set_extra_bytes(
/*=====================*/
/* out: TRUE on success,
FALSE on failure */
const page_zip_des_t* page_zip,/* in: compressed page */
page_t* page, /* in/out: uncompressed page */
ulint info_bits)/* in: REC_INFO_MIN_REC_FLAG or 0 */
{
ulint n;
ulint i;
ulint n_owned = 1;
ulint offs;
rec_t* rec;
n = page_get_n_recs(page);
rec = page + PAGE_NEW_INFIMUM;
for (i = 0; i < n; i++) {
offs = page_zip_dir_get(page_zip, i);
if (UNIV_UNLIKELY(offs & PAGE_ZIP_DIR_SLOT_DEL)) {
info_bits |= REC_INFO_DELETED_FLAG;
}
if (UNIV_UNLIKELY(offs & PAGE_ZIP_DIR_SLOT_OWNED)) {
info_bits |= n_owned;
n_owned = 1;
} else {
n_owned++;
}
rec[-REC_N_NEW_EXTRA_BYTES] = info_bits;
info_bits = 0;
offs &= PAGE_ZIP_DIR_SLOT_MASK;
if (UNIV_UNLIKELY(!offs)) {
return(FALSE);
}
rec_set_next_offs_new(rec, NULL, offs);
rec = page + offs;
}
/* Set the next pointer of the last user record. */
rec_set_next_offs_new(rec, NULL, PAGE_NEW_SUPREMUM);
/* Set n_owned of the supremum record. */
page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES] = n_owned;
n = page_dir_get_n_heap(page);
if (i + 2 >= n) {
return(UNIV_LIKELY(i + 2 == n));
}
offs = page_zip_dir_get(page_zip, i);
/* Set the extra bytes of deleted records on the free list. */
for (;;) {
if (UNIV_UNLIKELY(!offs)
|| UNIV_UNLIKELY(offs & ~PAGE_ZIP_DIR_SLOT_MASK)) {
return(FALSE);
}
rec = page + offs;
rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
offs = page_zip_dir_get(page_zip, ++i);
rec_set_next_offs_new(rec, NULL, offs);
}
/* Terminate the free list. */
rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
rec_set_next_offs_new(rec, NULL, 0);
return(UNIV_LIKELY(i + 2 == n));
}
/**************************************************************************
Apply the modification log to an uncompressed page. */
static
@ -172,7 +526,7 @@ page_zip_apply_log(
|| UNIV_UNLIKELY(data + length >= end)) {
return(NULL);
}
/* TODO: determine offset from heap_no */
offset += PAGE_DATA;
ut_a(offset + length < UNIV_PAGE_SIZE);
@ -197,14 +551,48 @@ page_zip_decompress(
{
z_stream d_stream;
int err;
ulint trailer_len;
byte** recs; /* dense page directory, sorted by address */
byte* dst;
ulint heap_status;/* heap_no and status bits */
ulint n_heap;
mem_heap_t* heap;
ulint info_bits;
ut_ad(page_zip_simple_validate(page_zip));
trailer_len = page_trailer_get_len(page_zip->data);
ut_ad(trailer_len < page_zip->size - PAGE_DATA);
ut_ad(page_header_get_field((page_t*) page_zip->data, PAGE_HEAP_TOP)
<= UNIV_PAGE_SIZE - trailer_len);
n_heap = page_dir_get_n_heap(page_zip->data) - 2;
ut_a(n_heap * PAGE_ZIP_DIR_SLOT_SIZE < page_zip->size);
heap = mem_heap_create(n_heap * (2 * sizeof *recs));
recs = mem_heap_alloc(heap, n_heap * (2 * sizeof *recs));
/* Copy the page header. */
memcpy(page, page_zip->data, PAGE_DATA);
/* Copy the page directory. */
if (UNIV_UNLIKELY(!page_zip_dir_decode(page_zip, page,
recs, recs + n_heap, n_heap))) {
mem_heap_free(heap);
return(FALSE);
}
/* Copy the infimum and supremum records. */
memcpy(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES),
infimum_extra, sizeof infimum_extra);
if (UNIV_UNLIKELY(!page_get_n_recs((page_t*) page))) {
rec_set_next_offs_new(page + PAGE_NEW_INFIMUM,
NULL, PAGE_NEW_SUPREMUM);
} else {
rec_set_next_offs_new(page + PAGE_NEW_INFIMUM,
NULL,
page_zip_dir_get(page_zip, 0)
& PAGE_ZIP_DIR_SLOT_MASK);
}
memcpy(page + PAGE_NEW_INFIMUM, infimum_data, sizeof infimum_data);
memcpy(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1),
supremum_extra_data, sizeof supremum_extra_data);
/* Decompress the user records. */
d_stream.zalloc = (alloc_func) 0;
d_stream.zfree = (free_func) 0;
d_stream.opaque = (voidpf) 0;
@ -213,38 +601,90 @@ page_zip_decompress(
ut_a(err == Z_OK);
d_stream.next_in = page_zip->data + PAGE_DATA;
d_stream.next_out = page + PAGE_DATA;
d_stream.avail_in = page_zip->size - trailer_len - (PAGE_DATA - 1);
d_stream.avail_out = page_header_get_field(page_zip->data, PAGE_HEAP_TOP)
- PAGE_DATA;
d_stream.avail_in = page_zip->size - n_heap - (PAGE_DATA + 1);
if (UNIV_LIKELY(n_heap > 0)
&& *recs == page + (PAGE_ZIP_START + REC_N_NEW_EXTRA_BYTES)) {
dst = page + (PAGE_ZIP_START + REC_N_NEW_EXTRA_BYTES);
recs++;
} else {
dst = page + PAGE_ZIP_START;
}
info_bits = 0;
if (mach_read_from_2((page_t*) page + (PAGE_HEADER + PAGE_LEVEL))) {
heap_status = REC_STATUS_NODE_PTR | 2 << REC_HEAP_NO_SHIFT;
if (UNIV_UNLIKELY(mach_read_from_4((page_t*) page
+ FIL_PAGE_PREV) == FIL_NULL)) {
info_bits = REC_INFO_MIN_REC_FLAG;
}
} else {
heap_status = REC_STATUS_ORDINARY | 2 << REC_HEAP_NO_SHIFT;
}
while (n_heap--) {
d_stream.next_out = dst;
d_stream.avail_out = *recs - dst - REC_N_NEW_EXTRA_BYTES;
/* set heap_no and the status bits */
mach_write_to_2(dst - REC_NEW_HEAP_NO, heap_status);
heap_status += REC_HEAP_NO_SHIFT;
err = inflate(&d_stream, Z_NO_FLUSH);
switch (err) {
case Z_OK:
case Z_STREAM_END:
break;
case Z_BUF_ERROR:
if (!d_stream.avail_out) {
break;
}
/* fall through */
default:
goto zlib_error;
}
dst = *recs++;
}
/* Decompress the last record. */
d_stream.next_out = dst;
d_stream.avail_out =
page_header_get_field(page, PAGE_HEAP_TOP)
- ut_align_offset(dst, UNIV_PAGE_SIZE);
ut_a(d_stream.avail_out < UNIV_PAGE_SIZE);
err = inflate(&d_stream, Z_FINISH);
if (err != Z_STREAM_END) {
zlib_error:
inflateEnd(&d_stream);
mem_heap_free(heap);
return(FALSE);
}
err = inflateEnd(&d_stream);
ut_a(err == Z_OK);
ut_ad(d_stream.avail_in
== page_zip->size - trailer_len - (PAGE_DATA - 1));
ut_ad(d_stream.avail_out
== page_header_get_field(page_zip->data, PAGE_HEAP_TOP) - PAGE_DATA);
ut_a(d_stream.total_in <= (uLong) d_stream.avail_in);
ut_a(d_stream.total_out == d_stream.total_out);
mem_heap_free(heap);
if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(
page_zip, page, info_bits))) {
return(FALSE);
}
n_heap = page_dir_get_n_heap(page) - 2;
page_zip->m_start = PAGE_DATA + d_stream.total_in;
page_zip->m_end = page_zip->m_start = PAGE_DATA + d_stream.total_in;
/* Copy the page header */
memcpy(page, page_zip->data, PAGE_DATA);
/* Copy the page trailer */
memcpy(page_zip->data + page_zip->size - trailer_len,
page + UNIV_PAGE_SIZE - trailer_len, trailer_len);
/* Apply the modification log. */
{
const byte* mod_log_ptr;
mod_log_ptr = page_zip_apply_log(
page_zip->data + page_zip->m_end,
page_zip->data + page_zip->size - trailer_len,
page_zip->data + page_zip->m_start,
page_zip->data + page_zip->size
- n_heap * PAGE_ZIP_DIR_SLOT_SIZE,
page);
if (UNIV_UNLIKELY(!mod_log_ptr)) {
return(FALSE);
@ -279,13 +719,18 @@ page_zip_validate(
const page_t* page) /* in: uncompressed page */
{
page_zip_des_t temp_page_zip = *page_zip;
page_t temp_page[UNIV_PAGE_SIZE];
page_t* temp_page = buf_frame_alloc();
ibool valid;
#if 0 /* disabled during testing hack in buf0flu.c */
ut_ad(buf_block_get_page_zip(buf_block_align((byte*)page))
== page_zip);
#endif
return(page_zip_decompress(&temp_page_zip, temp_page, NULL)
&& !memcmp(page, temp_page, UNIV_PAGE_SIZE));
valid = page_zip_decompress(&temp_page_zip, temp_page, NULL)
&& !memcmp(page, temp_page, UNIV_PAGE_SIZE);
buf_frame_free(temp_page);
return(valid);
}
#endif /* UNIV_DEBUG */
@ -302,22 +747,21 @@ page_zip_write(
{
ulint pos = ut_align_offset(str, UNIV_PAGE_SIZE);
#ifdef UNIV_DEBUG
ulint trailer_len = page_trailer_get_len(page_zip->data);
ulint trailer_len = page_zip_dir_size(page_zip);
#endif /* UNIV_DEBUG */
ut_ad(buf_block_get_page_zip(buf_block_align((byte*)str)) == page_zip);
ut_ad(page_zip_simple_validate(page_zip));
ut_ad(page_zip->m_start >= PAGE_DATA);
ut_ad(page_dir_get_n_slots(ut_align_down((byte*) str, UNIV_PAGE_SIZE))
== page_dir_get_n_slots((page_t*) page_zip->data));
== page_dir_get_n_slots(page_zip->data));
ut_ad(!page_zip->data[page_zip->m_end]);
ut_ad(PAGE_DATA + trailer_len < page_zip->size);
ut_ad(pos >= PAGE_DATA);
ut_ad(pos + length <= UNIV_PAGE_SIZE - trailer_len);
ut_ad(pos + length <= UNIV_PAGE_SIZE - PAGE_DIR - PAGE_DIR_SLOT_SIZE
* page_dir_get_n_slots(buf_frame_align((byte*)str)));
pos -= PAGE_DATA;
/* TODO: encode heap_no instead of pos */
ut_ad(page_zip_available(page_zip, page_zip_entry_size(pos, length)));
@ -329,7 +773,7 @@ page_zip_write(
memcpy(&page_zip->data[page_zip->m_end], str, length);
page_zip->m_end += length;
ut_ad(!page_zip->data[page_zip->m_end]);
ut_ad(page_zip->m_end < page_zip->size - trailer_len);
ut_ad(page_zip->m_end + trailer_len < page_zip->size);
ut_ad(page_zip_validate(page_zip,
ut_align_down((byte*) str, UNIV_PAGE_SIZE)));
}