2005-10-27 11:48:10 +00:00
|
|
|
/******************************************************
|
|
|
|
Compressed page interface
|
|
|
|
|
|
|
|
(c) 2005 Innobase Oy
|
|
|
|
|
|
|
|
Created June 2005 by Marko Makela
|
|
|
|
*******************************************************/
|
|
|
|
|
|
|
|
#ifdef UNIV_MATERIALIZE
|
|
|
|
# undef UNIV_INLINE
|
|
|
|
# define UNIV_INLINE
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include "page0zip.h"
|
|
|
|
#include "page0page.h"
|
|
|
|
|
|
|
|
/* The format of compressed pages is as follows.
|
|
|
|
|
2005-11-24 14:13:10 +00:00
|
|
|
The header and trailer of the uncompressed pages, excluding the page
|
2005-10-27 11:48:10 +00:00
|
|
|
directory in the trailer, are copied as is to the header and trailer
|
2005-11-24 14:13:10 +00:00
|
|
|
of the compressed page.
|
|
|
|
|
|
|
|
At the end of the compressed page, there is a dense page directory
|
|
|
|
pointing to every user record contained on the page, including deleted
|
2006-02-10 15:06:17 +00:00
|
|
|
records on the free list. The dense directory is indexed in the
|
|
|
|
collation order, i.e., in the order in which the record list is
|
|
|
|
linked on the uncompressed page. The infimum and supremum records are
|
|
|
|
excluded. The two most significant bits of the entries are allocated
|
|
|
|
for the delete-mark and an n_owned flag indicating the last record in
|
|
|
|
a chain of records pointed to from the sparse page directory on the
|
2005-11-24 14:13:10 +00:00
|
|
|
uncompressed page.
|
|
|
|
|
|
|
|
The data between PAGE_ZIP_START and the last page directory entry will
|
|
|
|
be written in compressed format, starting at offset PAGE_DATA.
|
|
|
|
Infimum and supremum records are not stored. We exclude the
|
|
|
|
REC_N_NEW_EXTRA_BYTES in every record header. These can be recovered
|
|
|
|
from the dense page directory stored at the end of the compressed
|
|
|
|
page.
|
2005-10-27 11:48:10 +00:00
|
|
|
|
2006-02-10 15:06:17 +00:00
|
|
|
The fields node_ptr (in non-leaf B-tree nodes; level>0), trx_id and
|
|
|
|
roll_ptr (in leaf B-tree nodes; level=0), and BLOB pointers of
|
|
|
|
externally stored columns are stored separately, in ascending order of
|
|
|
|
heap_no and column index, starting backwards from the dense page
|
|
|
|
directory.
|
|
|
|
|
2005-10-27 11:48:10 +00:00
|
|
|
The compressed data stream may be followed by a modification log
|
|
|
|
covering the compressed portion of the page, as follows.
|
|
|
|
|
|
|
|
MODIFICATION LOG ENTRY FORMAT
|
2006-02-10 15:06:17 +00:00
|
|
|
- write record:
|
2006-03-14 14:38:45 +00:00
|
|
|
- (heap_no - 1) << 1 (1..2 bytes)
|
2006-02-10 15:06:17 +00:00
|
|
|
- extra bytes backwards
|
|
|
|
- data bytes
|
2006-03-14 14:38:45 +00:00
|
|
|
- clear record:
|
|
|
|
- (heap_no - 1) << 1 | 1 (1..2 bytes)
|
2006-02-10 15:06:17 +00:00
|
|
|
|
|
|
|
The integer values are stored in a variable-length format:
|
|
|
|
- 0xxxxxxx: 0..127
|
|
|
|
- 1xxxxxxx xxxxxxxx: 0..32767
|
|
|
|
|
|
|
|
The end of the modification log is marked by a 0 byte.
|
|
|
|
|
|
|
|
In summary, the compressed page looks like this:
|
|
|
|
|
|
|
|
(1) Uncompressed page header (PAGE_DATA bytes)
|
|
|
|
(2) Compressed index information
|
|
|
|
(3) Compressed page data
|
|
|
|
(4) Page modification log (page_zip->m_start..page_zip->m_end)
|
|
|
|
(5) Empty zero-filled space
|
2006-03-09 17:26:02 +00:00
|
|
|
(6) BLOB pointers (on leaf pages)
|
2006-02-10 15:06:17 +00:00
|
|
|
- BTR_EXTERN_FIELD_REF_SIZE for each externally stored column
|
|
|
|
- in descending collation order
|
|
|
|
(7) Uncompressed columns of user records, n_dense * uncompressed_size bytes,
|
|
|
|
- indexed by heap_no
|
2006-03-09 17:26:02 +00:00
|
|
|
- DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN for leaf pages of clustered indexes
|
|
|
|
- REC_NODE_PTR_SIZE for non-leaf pages
|
|
|
|
- 0 otherwise
|
2006-02-27 10:26:59 +00:00
|
|
|
(8) dense page directory, stored backwards
|
2006-02-10 15:06:17 +00:00
|
|
|
- n_dense = n_heap - 2
|
|
|
|
- existing records in ascending collation order
|
|
|
|
- deleted records (free list) in link order
|
|
|
|
*/
|
2005-10-27 11:48:10 +00:00
|
|
|
|
2005-11-24 14:13:10 +00:00
|
|
|
/* Start offset of the area that will be compressed */
|
|
|
|
#define PAGE_ZIP_START PAGE_NEW_SUPREMUM_END
|
|
|
|
/* Size of an compressed page directory entry */
|
|
|
|
#define PAGE_ZIP_DIR_SLOT_SIZE 2
|
|
|
|
/* Mask of record offsets */
|
|
|
|
#define PAGE_ZIP_DIR_SLOT_MASK 0x3fff
|
|
|
|
/* 'owned' flag */
|
|
|
|
#define PAGE_ZIP_DIR_SLOT_OWNED 0x4000
|
|
|
|
/* 'deleted' flag */
|
|
|
|
#define PAGE_ZIP_DIR_SLOT_DEL 0x8000
|
|
|
|
|
2006-02-22 13:49:05 +00:00
|
|
|
/**************************************************************************
|
|
|
|
Determine if enough space is available for a page_zip_write_rec() call
|
|
|
|
in the modification log. */
|
|
|
|
UNIV_INLINE
|
|
|
|
ibool
|
|
|
|
page_zip_available(
|
|
|
|
/*===============*/
|
|
|
|
/* out: TRUE if page_zip_write_rec()
|
|
|
|
will succeed */
|
|
|
|
const page_zip_des_t* page_zip,/* in: compressed page */
|
2006-03-14 14:38:45 +00:00
|
|
|
dict_index_t* index, /* in: index of the B-tree node */
|
2006-02-22 13:49:05 +00:00
|
|
|
ulint length, /* in: combined size of the record */
|
|
|
|
ulint create) /* in: nonzero=add the record to
|
|
|
|
the heap */
|
|
|
|
__attribute__((warn_unused_result, nonnull, pure));
|
|
|
|
|
2005-10-27 11:48:10 +00:00
|
|
|
/**************************************************************************
|
|
|
|
Initialize a compressed page descriptor. */
|
|
|
|
UNIV_INLINE
|
|
|
|
void
|
|
|
|
page_zip_des_init(
|
|
|
|
/*==============*/
|
|
|
|
page_zip_des_t* page_zip) /* in/out: compressed page
|
|
|
|
descriptor */
|
|
|
|
{
|
|
|
|
memset(page_zip, 0, sizeof *page_zip);
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef UNIV_DEBUG
|
|
|
|
/**************************************************************************
|
|
|
|
Validate a compressed page descriptor. */
|
|
|
|
UNIV_INLINE
|
|
|
|
ibool
|
|
|
|
page_zip_simple_validate(
|
|
|
|
/*=====================*/
|
|
|
|
/* out: TRUE if ok */
|
|
|
|
const page_zip_des_t* page_zip)/* in: compressed page descriptor */
|
|
|
|
{
|
|
|
|
ut_ad(page_zip);
|
|
|
|
ut_ad(page_zip->data);
|
2005-11-24 14:13:10 +00:00
|
|
|
ut_ad(!(page_zip->size & (page_zip->size - 1))); /* power of 2 */
|
|
|
|
ut_ad(page_zip->size <= UNIV_PAGE_SIZE);
|
|
|
|
ut_ad(page_zip->size > PAGE_DATA + PAGE_ZIP_DIR_SLOT_SIZE);
|
2005-10-27 11:48:10 +00:00
|
|
|
ut_ad(page_zip->m_start <= page_zip->m_end);
|
|
|
|
ut_ad(page_zip->m_end < page_zip->size);
|
2006-02-10 15:06:17 +00:00
|
|
|
ut_ad(page_zip->n_blobs < page_zip->size / BTR_EXTERN_FIELD_REF_SIZE);
|
2005-10-27 11:48:10 +00:00
|
|
|
return(TRUE);
|
|
|
|
}
|
|
|
|
#endif /* UNIV_DEBUG */
|
|
|
|
|
|
|
|
/**************************************************************************
|
|
|
|
Ensure that enough space is available in the modification log.
|
|
|
|
If not, try to compress the page. */
|
|
|
|
UNIV_INLINE
|
|
|
|
ibool
|
|
|
|
page_zip_alloc(
|
|
|
|
/*===========*/
|
|
|
|
/* out: TRUE if enough space is available */
|
|
|
|
page_zip_des_t* page_zip,/* in/out: compressed page;
|
|
|
|
will only be modified if compression is needed
|
|
|
|
and successful */
|
|
|
|
const page_t* page, /* in: uncompressed page */
|
2006-02-10 15:06:17 +00:00
|
|
|
dict_index_t* index, /* in: index of the B-tree node */
|
|
|
|
mtr_t* mtr, /* in: mini-transaction handle,
|
|
|
|
or NULL if no logging is desired */
|
|
|
|
ulint length, /* in: combined size of the record */
|
|
|
|
ulint create) /* in: nonzero=add the record to the heap */
|
2005-10-27 11:48:10 +00:00
|
|
|
{
|
2006-02-10 15:06:17 +00:00
|
|
|
ut_ad(page_is_comp((page_t*) page));
|
|
|
|
ut_ad(page_zip_validate(page_zip, page));
|
2005-10-27 11:48:10 +00:00
|
|
|
|
2006-03-14 14:38:45 +00:00
|
|
|
if (page_zip_available(page_zip, index, length, create)) {
|
2005-10-27 11:48:10 +00:00
|
|
|
return(TRUE);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (page_zip->m_start == page_zip->m_end) {
|
|
|
|
/* The page has been freshly compressed, so
|
|
|
|
recompressing it will not help. */
|
|
|
|
return(FALSE);
|
|
|
|
}
|
|
|
|
|
2006-02-10 15:06:17 +00:00
|
|
|
if (!page_zip_compress(page_zip, page, index, mtr)) {
|
|
|
|
/* Unable to compress the page */
|
|
|
|
return(FALSE);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Check if there is enough space available after compression. */
|
2006-03-14 14:38:45 +00:00
|
|
|
return(page_zip_available(page_zip, index, length, create));
|
|
|
|
}
|
|
|
|
|
|
|
|
/**************************************************************************
|
|
|
|
Determine if the length of the page trailer. */
|
|
|
|
UNIV_INLINE
|
|
|
|
ibool
|
|
|
|
page_zip_get_trailer_len(
|
|
|
|
/*=====================*/
|
|
|
|
/* out: length of the page trailer,
|
|
|
|
in bytes, not including the terminating
|
|
|
|
zero byte of the modification log */
|
|
|
|
const page_zip_des_t* page_zip,/* in: compressed page */
|
|
|
|
dict_index_t* index, /* in: index of the B-tree node */
|
|
|
|
ulint* entry_size)/* out: size of the uncompressed
|
|
|
|
portion of a user record */
|
|
|
|
{
|
|
|
|
ulint uncompressed_size;
|
|
|
|
|
|
|
|
ut_ad(page_zip_simple_validate(page_zip));
|
|
|
|
|
|
|
|
if (UNIV_UNLIKELY(!page_is_leaf((page_t*) page_zip->data))) {
|
|
|
|
uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE
|
|
|
|
+ REC_NODE_PTR_SIZE;
|
|
|
|
} else if (dict_index_is_clust(index)) {
|
|
|
|
uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE
|
|
|
|
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
|
|
|
|
} else {
|
|
|
|
uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (entry_size) {
|
|
|
|
*entry_size = uncompressed_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
return((page_dir_get_n_heap((page_t*) page_zip->data) - 2)
|
|
|
|
* uncompressed_size
|
|
|
|
+ page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE);
|
2005-10-27 11:48:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**************************************************************************
|
|
|
|
Determine if enough space is available in the modification log. */
|
|
|
|
UNIV_INLINE
|
|
|
|
ibool
|
|
|
|
page_zip_available(
|
|
|
|
/*===============*/
|
|
|
|
/* out: TRUE if enough space
|
|
|
|
is available */
|
|
|
|
const page_zip_des_t* page_zip,/* in: compressed page */
|
2006-03-14 14:38:45 +00:00
|
|
|
dict_index_t* index, /* in: index of the B-tree node */
|
2006-02-10 15:06:17 +00:00
|
|
|
ulint length, /* in: combined size of the record */
|
|
|
|
ulint create) /* in: nonzero=add the record to
|
|
|
|
the heap */
|
2005-10-27 11:48:10 +00:00
|
|
|
{
|
2006-02-10 15:06:17 +00:00
|
|
|
ulint uncompressed_size;
|
|
|
|
ulint trailer_len;
|
2005-10-27 11:48:10 +00:00
|
|
|
|
2006-02-10 15:06:17 +00:00
|
|
|
ut_ad(length > REC_N_NEW_EXTRA_BYTES);
|
|
|
|
|
2006-03-14 14:38:45 +00:00
|
|
|
trailer_len = page_zip_get_trailer_len(page_zip, index,
|
|
|
|
&uncompressed_size);
|
2006-02-10 15:06:17 +00:00
|
|
|
|
|
|
|
/* Subtract the fixed extra bytes and add the maximum
|
|
|
|
space needed for identifying the record (encoded heap_no). */
|
|
|
|
length -= REC_N_NEW_EXTRA_BYTES - 2;
|
|
|
|
|
|
|
|
if (UNIV_UNLIKELY(create)) {
|
|
|
|
/* When a record is created, a pointer may be added to
|
2006-02-27 10:26:59 +00:00
|
|
|
the dense directory.
|
2006-02-10 15:06:17 +00:00
|
|
|
Likewise, space for the columns that will not be
|
|
|
|
compressed will be allocated from the page trailer.
|
|
|
|
Also the BLOB pointers will be allocated from there, but
|
|
|
|
we may as well count them in the length of the record. */
|
|
|
|
|
|
|
|
trailer_len += PAGE_ZIP_DIR_SLOT_SIZE + uncompressed_size;
|
|
|
|
}
|
2005-10-27 11:48:10 +00:00
|
|
|
|
|
|
|
return(UNIV_LIKELY(
|
2006-02-10 15:06:17 +00:00
|
|
|
length
|
|
|
|
+ trailer_len
|
|
|
|
+ page_zip->m_end
|
|
|
|
< page_zip->size));
|
2005-10-27 11:48:10 +00:00
|
|
|
}
|
|
|
|
|
2006-02-22 13:02:40 +00:00
|
|
|
/**************************************************************************
|
|
|
|
Write a log record of writing to the uncompressed header portion of a page. */
|
|
|
|
|
|
|
|
void
|
|
|
|
page_zip_write_header_log(
|
|
|
|
/*======================*/
|
|
|
|
const page_zip_des_t* page_zip,/* in: compressed page */
|
|
|
|
ulint offset, /* in: offset to the data */
|
|
|
|
ulint length, /* in: length of the data */
|
|
|
|
mtr_t* mtr); /* in: mini-transaction */
|
|
|
|
|
2005-10-27 11:48:10 +00:00
|
|
|
/**************************************************************************
|
|
|
|
Write data to the uncompressed header portion of a page. The data must
|
2006-02-10 15:06:17 +00:00
|
|
|
already have been written to the uncompressed page.
|
|
|
|
However, the data portion of the uncompressed page may differ from
|
|
|
|
the compressed page when a record is being inserted in
|
|
|
|
page_cur_insert_rec_low(). */
|
2005-10-27 11:48:10 +00:00
|
|
|
UNIV_INLINE
|
|
|
|
void
|
|
|
|
page_zip_write_header(
|
|
|
|
/*==================*/
|
|
|
|
page_zip_des_t* page_zip,/* in/out: compressed page */
|
|
|
|
const byte* str, /* in: address on the uncompressed page */
|
2006-02-22 13:02:40 +00:00
|
|
|
ulint length, /* in: length of the data */
|
|
|
|
mtr_t* mtr) /* in: mini-transaction, or NULL */
|
2005-10-27 11:48:10 +00:00
|
|
|
{
|
|
|
|
ulint pos;
|
|
|
|
|
|
|
|
ut_ad(buf_block_get_page_zip(buf_block_align((byte*)str)) == page_zip);
|
|
|
|
ut_ad(page_zip_simple_validate(page_zip));
|
|
|
|
|
|
|
|
pos = ut_align_offset(str, UNIV_PAGE_SIZE);
|
|
|
|
|
|
|
|
ut_ad(pos < PAGE_DATA);
|
|
|
|
|
2006-03-02 14:05:32 +00:00
|
|
|
memcpy(page_zip->data + pos, str, length);
|
2005-10-27 11:48:10 +00:00
|
|
|
|
2006-02-10 15:06:17 +00:00
|
|
|
/* The following would fail in page_cur_insert_rec_low(). */
|
|
|
|
/* ut_ad(page_zip_validate(page_zip, str - pos)); */
|
2006-02-22 13:02:40 +00:00
|
|
|
|
|
|
|
if (UNIV_LIKELY_NULL(mtr)) {
|
|
|
|
page_zip_write_header_log(page_zip, pos, length, mtr);
|
|
|
|
}
|
2005-10-27 11:48:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef UNIV_MATERIALIZE
|
|
|
|
# undef UNIV_INLINE
|
|
|
|
# define UNIV_INLINE UNIV_INLINE_ORIGINAL
|
|
|
|
#endif
|