branches/zip:

Initial import of the zip-like compression of B-tree pages.
This commit is contained in:
marko 2005-10-27 11:48:10 +00:00
parent d345f80435
commit 88e6d6863d
41 changed files with 3212 additions and 1230 deletions

View file

@ -14,6 +14,7 @@ Created 6/2/1994 Heikki Tuuri
#include "fsp0fsp.h"
#include "page0page.h"
#include "page0zip.h"
#include "btr0cur.h"
#include "btr0sea.h"
#include "btr0pcur.h"
@ -105,8 +106,9 @@ static
void
btr_page_empty(
/*===========*/
page_t* page, /* in: page to be emptied */
mtr_t* mtr); /* in: mtr */
page_t* page, /* in: page to be emptied */
page_zip_des_t* page_zip,/* out: compressed page, or NULL */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Returns TRUE if the insert fits on the appropriate half-page
with the chosen split_rec. */
@ -258,7 +260,7 @@ btr_page_create(
{
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
MTR_MEMO_PAGE_X_FIX));
page_create(page, mtr,
page_create(page, NULL, mtr,
UT_LIST_GET_FIRST(tree->tree_indexes)->table->comp);
buf_block_align(page)->check_index_page_at_flush = TRUE;
@ -662,6 +664,7 @@ btr_create(
buf_frame_t* ibuf_hdr_frame;
buf_frame_t* frame;
page_t* page;
page_zip_des_t* page_zip;
/* Create the two new segments (one, in the case of an ibuf tree) for
the index tree; the segment headers are put on the allocated root page
@ -723,7 +726,7 @@ btr_create(
}
/* Create a new index page on the the allocated segment page */
page = page_create(frame, mtr, comp);
page = page_create(frame, NULL, mtr, comp);
buf_block_align(page)->check_index_page_at_flush = TRUE;
/* Set the index id of the page */
@ -748,6 +751,14 @@ btr_create(
ut_ad(page_get_max_insert_size(page, 2) > 2 * BTR_PAGE_MAX_REC_SIZE);
page_zip = buf_block_get_page_zip(buf_block_align(page));
if (UNIV_LIKELY_NULL(page_zip)) {
if (UNIV_UNLIKELY(page_zip_compress(page_zip, page))) {
/* An empty page should always be compressible */
ut_error;
}
}
return(page_no);
}
@ -833,7 +844,8 @@ btr_page_reorganize_low(
there cannot exist locks on the
page, and a hash index should not be
dropped: it cannot exist */
page_t* page, /* in: page to be reorganized */
page_t* page, /* in/out: page to be reorganized */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
dict_index_t* index, /* in: record descriptor */
mtr_t* mtr) /* in: mtr */
{
@ -846,7 +858,7 @@ btr_page_reorganize_low(
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
MTR_MEMO_PAGE_X_FIX));
ut_ad(!!page_is_comp(page) == index->table->comp);
ut_ad((ibool) !!page_is_comp(page) == index->table->comp);
data_size1 = page_get_data_size(page);
max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1);
@ -863,25 +875,34 @@ btr_page_reorganize_low(
/* Copy the old page to temporary space */
buf_frame_copy(new_page, page);
if (!recovery) {
if (UNIV_LIKELY(!recovery)) {
btr_search_drop_page_hash_index(page);
}
/* Recreate the page: note that global data on page (possible
segment headers, next page-field, etc.) is preserved intact */
page_create(page, mtr, page_is_comp(page));
page_create(page, NULL, mtr, page_is_comp(page));
buf_block_align(page)->check_index_page_at_flush = TRUE;
/* Copy the records from the temporary space to the recreated page;
do not copy the lock bits yet */
page_copy_rec_list_end_no_locks(page, new_page,
page_copy_rec_list_end_no_locks(page,
page_get_infimum_rec(new_page), index, mtr);
/* Copy max trx id to recreated page */
page_set_max_trx_id(page, page_get_max_trx_id(new_page));
if (!recovery) {
if (UNIV_LIKELY_NULL(page_zip)) {
if (UNIV_UNLIKELY(!page_zip_compress(page_zip, page))) {
/* Reorganizing a page should reduce entropy,
making the compressed page occupy less space. */
ut_error;
}
}
if (UNIV_LIKELY(!recovery)) {
/* Update the record lock bitmaps */
lock_move_reorganize_page(page, new_page);
}
@ -889,7 +910,8 @@ btr_page_reorganize_low(
data_size2 = page_get_data_size(page);
max_ins_size2 = page_get_max_insert_size_after_reorganize(page, 1);
if (data_size1 != data_size2 || max_ins_size1 != max_ins_size2) {
if (UNIV_UNLIKELY(data_size1 != data_size2)
|| UNIV_UNLIKELY(max_ins_size1 != max_ins_size2)) {
buf_page_print(page);
buf_page_print(new_page);
fprintf(stderr,
@ -917,7 +939,9 @@ btr_page_reorganize(
dict_index_t* index, /* in: record descriptor */
mtr_t* mtr) /* in: mtr */
{
btr_page_reorganize_low(FALSE, page, index, mtr);
btr_page_reorganize_low(FALSE, page,
buf_block_get_page_zip(buf_block_align(page)),
index, mtr);
}
/***************************************************************
@ -938,8 +962,10 @@ btr_parse_page_reorganize(
/* The record is empty, except for the record initial part */
if (page) {
btr_page_reorganize_low(TRUE, page, index, mtr);
if (UNIV_LIKELY(page != NULL)) {
page_zip_des_t* page_zip = buf_block_get_page_zip(
buf_block_align(page));
btr_page_reorganize_low(TRUE, page, page_zip, index, mtr);
}
return(ptr);
@ -951,17 +977,20 @@ static
void
btr_page_empty(
/*===========*/
page_t* page, /* in: page to be emptied */
mtr_t* mtr) /* in: mtr */
page_t* page, /* in: page to be emptied */
page_zip_des_t* page_zip,/* out: compressed page, or NULL */
mtr_t* mtr) /* in: mtr */
{
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
MTR_MEMO_PAGE_X_FIX));
ut_ad(!page_zip || page_zip_validate(page_zip, page));
btr_search_drop_page_hash_index(page);
/* Recreate the page: note that global data on page (possible
segment headers, next page-field, etc.) is preserved intact */
page_create(page, mtr, page_is_comp(page));
page_create(page, page_zip, mtr, page_is_comp(page));
buf_block_align(page)->check_index_page_at_flush = TRUE;
}
@ -993,6 +1022,7 @@ btr_root_raise_and_insert(
ulint level;
rec_t* node_ptr_rec;
page_cur_t* page_cursor;
page_zip_des_t* page_zip;
root = btr_cur_get_page(cursor);
tree = btr_cur_get_tree(cursor);
@ -1025,8 +1055,12 @@ btr_root_raise_and_insert(
/* Move the records from root to the new page */
page_move_rec_list_end(new_page, root, page_get_infimum_rec(root),
cursor->index, mtr);
page_zip = buf_block_get_page_zip(buf_block_align(new_page));
page_move_rec_list_end(new_page, page_zip,
page_get_infimum_rec(root), NULL,
cursor->index, mtr);
/* If this is a pessimistic insert which is actually done to
perform a pessimistic update then we have stored the lock
information of the record to be inserted on the infimum of the
@ -1046,7 +1080,7 @@ btr_root_raise_and_insert(
node_ptr = dict_tree_build_node_ptr(tree, rec, new_page_no, heap,
level);
/* Reorganize the root to get free space */
btr_page_reorganize(root, cursor->index, mtr);
btr_page_reorganize_low(FALSE, root, NULL, cursor->index, mtr);
page_cursor = btr_cur_get_page_cur(cursor);
@ -1054,17 +1088,26 @@ btr_root_raise_and_insert(
page_cur_set_before_first(root, page_cursor);
node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr,
cursor->index, mtr);
node_ptr_rec = page_cur_tuple_insert(page_cursor, NULL,
node_ptr, cursor->index, mtr);
ut_ad(node_ptr_rec);
page_zip = buf_block_get_page_zip(buf_block_align(root));
/* The node pointer must be marked as the predefined minimum record,
as there is no lower alphabetical limit to records in the leftmost
node of a level: */
btr_set_min_rec_mark(node_ptr_rec, page_is_comp(root), mtr);
btr_set_min_rec_mark(node_ptr_rec, NULL, mtr);
if (!UNIV_UNLIKELY(page_zip_compress(page_zip, root))) {
/* The root page should only contain the
node pointer to new_page at this point.
Thus, the data should fit. */
ut_error;
}
/* Free the memory heap */
mem_heap_free(heap);
@ -1564,15 +1607,13 @@ btr_page_split_and_insert(
mtr_t* mtr) /* in: mtr */
{
dict_tree_t* tree;
page_t* page;
ulint page_no;
byte direction;
ulint hint_page_no;
page_t* new_page;
rec_t* split_rec;
page_t* left_page;
page_t* right_page;
page_t* insert_page;
page_zip_des_t* left_page_zip;
page_zip_des_t* right_page_zip;
page_cur_t* page_cursor;
rec_t* first_rec;
byte* buf = 0; /* remove warning */
@ -1597,13 +1638,13 @@ func_start:
ut_ad(rw_lock_own(dict_tree_get_lock(tree), RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
page = btr_cur_get_page(cursor);
left_page = btr_cur_get_page(cursor);
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
ut_ad(mtr_memo_contains(mtr, buf_block_align(left_page),
MTR_MEMO_PAGE_X_FIX));
ut_ad(page_get_n_recs(page) >= 2);
page_no = buf_frame_get_page_no(page);
ut_ad(page_get_n_recs(left_page) >= 2);
left_page_zip = buf_block_get_page_zip(buf_block_align(left_page));
/* 1. Decide the split record; split_rec == NULL means that the
tuple to be inserted should be the first record on the upper
@ -1611,26 +1652,24 @@ func_start:
if (n_iterations > 0) {
direction = FSP_UP;
hint_page_no = page_no + 1;
split_rec = btr_page_get_sure_split_rec(cursor, tuple);
} else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) {
direction = FSP_UP;
hint_page_no = page_no + 1;
} else if (btr_page_get_split_rec_to_left(cursor, &split_rec)) {
direction = FSP_DOWN;
hint_page_no = page_no - 1;
} else {
direction = FSP_UP;
hint_page_no = page_no + 1;
split_rec = page_get_middle_rec(page);
split_rec = page_get_middle_rec(left_page);
}
/* 2. Allocate a new page to the tree */
new_page = btr_page_alloc(tree, hint_page_no, direction,
btr_page_get_level(page, mtr), mtr);
btr_page_create(new_page, tree, mtr);
right_page = btr_page_alloc(tree,
buf_frame_get_page_no(left_page) + 1,
direction,
btr_page_get_level(left_page, mtr), mtr);
btr_page_create(right_page, tree, mtr);
/* 3. Calculate the first record on the upper half-page, and the
first record (move_limit) on original page which ends up on the
@ -1649,7 +1688,8 @@ func_start:
/* 4. Do first the modifications in the tree structure */
btr_attach_half_pages(tree, page, first_rec, new_page, direction, mtr);
btr_attach_half_pages(tree, left_page, first_rec, right_page,
direction, mtr);
if (split_rec == NULL) {
mem_free(buf);
@ -1667,34 +1707,31 @@ func_start:
insert_will_fit = btr_page_insert_fits(cursor,
split_rec, offsets, tuple, heap);
} else {
mem_free(buf);
insert_will_fit = btr_page_insert_fits(cursor,
NULL, NULL, tuple, heap);
}
if (insert_will_fit && (btr_page_get_level(page, mtr) == 0)) {
if (insert_will_fit && (btr_page_get_level(left_page, mtr) == 0)) {
mtr_memo_release(mtr, dict_tree_get_lock(tree),
MTR_MEMO_X_LOCK);
}
/* 5. Move then the records to the new page */
if (direction == FSP_DOWN) {
/* fputs("Split left\n", stderr); */
right_page_zip = buf_block_get_page_zip(buf_block_align(right_page));
page_move_rec_list_start(new_page, page, move_limit,
cursor->index, mtr);
left_page = new_page;
right_page = page;
page_move_rec_list_end(right_page, right_page_zip,
move_limit, left_page_zip,
cursor->index, mtr);
if (UNIV_UNLIKELY(direction == FSP_DOWN)) {
fputs("Split left\n", stderr); /* TODO: coverage test */
lock_update_split_left(right_page, left_page);
} else {
/* fputs("Split right\n", stderr); */
page_move_rec_list_end(new_page, page, move_limit,
cursor->index, mtr);
left_page = page;
right_page = new_page;
lock_update_split_right(right_page, left_page);
}
@ -1722,9 +1759,12 @@ func_start:
page_cur_search(insert_page, cursor->index, tuple,
PAGE_CUR_LE, page_cursor);
rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr);
rec = page_cur_tuple_insert(page_cursor, left_page_zip,
tuple, cursor->index, mtr);
if (rec != NULL) {
ut_ad(!left_page_zip || page_zip_validate(left_page_zip, left_page));
if (UNIV_LIKELY(rec != NULL)) {
/* Insert fit on the page: update the free bits for the
left and right pages in the same mtr */
@ -1744,14 +1784,16 @@ func_start:
page_cur_search(insert_page, cursor->index, tuple,
PAGE_CUR_LE, page_cursor);
rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr);
rec = page_cur_tuple_insert(page_cursor, left_page_zip,
tuple, cursor->index, mtr);
if (UNIV_UNLIKELY(rec == NULL)) {
if (rec == NULL) {
/* The insert did not fit on the page: loop back to the
start of the function for a new split */
/* We play safe and reset the free bits for new_page */
ibuf_reset_free_bits(cursor->index, new_page);
/* We play safe and reset the free bits for right_page */
ibuf_reset_free_bits(cursor->index, right_page);
/* fprintf(stderr, "Split second round %lu\n",
buf_frame_get_page_no(page)); */
@ -1830,11 +1872,10 @@ void
btr_set_min_rec_mark_log(
/*=====================*/
rec_t* rec, /* in: record */
ulint comp, /* nonzero=compact record format */
byte type, /* in: MLOG_COMP_REC_MIN_MARK or MLOG_REC_MIN_MARK */
mtr_t* mtr) /* in: mtr */
{
mlog_write_initial_log_record(rec,
comp ? MLOG_COMP_REC_MIN_MARK : MLOG_REC_MIN_MARK, mtr);
mlog_write_initial_log_record(rec, type, mtr);
/* Write rec offset as a 2-byte ulint */
mlog_catenate_ulint(mtr, ut_align_offset(rec, UNIV_PAGE_SIZE),
@ -1863,11 +1904,14 @@ btr_parse_set_min_rec_mark(
}
if (page) {
page_zip_des_t* page_zip = buf_block_get_page_zip(
buf_block_align(page));
ut_a(!page_is_comp(page) == !comp);
rec = page + mach_read_from_2(ptr);
btr_set_min_rec_mark(rec, comp, mtr);
btr_set_min_rec_mark(rec, page_zip, mtr);
}
return(ptr + 2);
@ -1879,17 +1923,29 @@ Sets a record as the predefined minimum record. */
void
btr_set_min_rec_mark(
/*=================*/
rec_t* rec, /* in: record */
ulint comp, /* in: nonzero=compact page format */
mtr_t* mtr) /* in: mtr */
rec_t* rec, /* in: record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
mtr_t* mtr) /* in: mtr */
{
ulint info_bits;
info_bits = rec_get_info_bits(rec, comp);
if (UNIV_LIKELY(page_rec_is_comp(rec))) {
info_bits = rec_get_info_bits(rec, TRUE);
rec_set_info_bits(rec, comp, info_bits | REC_INFO_MIN_REC_FLAG);
rec_set_info_bits_new(rec, page_zip,
info_bits | REC_INFO_MIN_REC_FLAG);
btr_set_min_rec_mark_log(rec, comp, mtr);
btr_set_min_rec_mark_log(rec, MLOG_COMP_REC_MIN_MARK, mtr);
} else {
ut_ad(!page_zip);
info_bits = rec_get_info_bits(rec, FALSE);
rec_set_info_bits_old(rec, info_bits | REC_INFO_MIN_REC_FLAG);
btr_set_min_rec_mark_log(rec, MLOG_REC_MIN_MARK, mtr);
}
}
/*****************************************************************
@ -1928,9 +1984,10 @@ btr_node_ptr_delete(
If page is the only on its level, this function moves its records to the
father page, thus reducing the tree height. */
static
void
ibool
btr_lift_page_up(
/*=============*/
/* out: TRUE on success */
dict_tree_t* tree, /* in: index tree */
page_t* page, /* in: page which is the only on its level;
must not be empty: use
@ -1941,6 +1998,7 @@ btr_lift_page_up(
page_t* father_page;
ulint page_level;
dict_index_t* index;
page_zip_des_t* father_page_zip;
ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
@ -1948,22 +2006,39 @@ btr_lift_page_up(
MTR_MEMO_PAGE_X_FIX));
father_page = buf_frame_align(
btr_page_get_father_node_ptr(tree, page, mtr));
father_page_zip = buf_block_get_page_zip(buf_block_align(father_page));
page_level = btr_page_get_level(page, mtr);
index = UT_LIST_GET_FIRST(tree->tree_indexes);
btr_search_drop_page_hash_index(page);
/* Make the father empty */
btr_page_empty(father_page, mtr);
btr_page_empty(father_page, NULL, mtr);
/* Move records to the father */
page_copy_rec_list_end(father_page, page, page_get_infimum_rec(page),
index, mtr);
lock_update_copy_and_discard(father_page, page);
if (!page_copy_rec_list_end(father_page, NULL,
page_get_infimum_rec(page), index, mtr)) {
ut_error;
}
btr_page_set_level(father_page, page_level, mtr);
if (UNIV_LIKELY_NULL(father_page_zip)) {
if (UNIV_UNLIKELY(!page_zip_compress(
father_page_zip, father_page))) {
/* Restore the old page from temporary space */
if (UNIV_UNLIKELY(!page_zip_decompress(
father_page_zip, father_page, mtr))) {
ut_error; /* probably memory corruption */
}
return(FALSE);
}
}
lock_update_copy_and_discard(father_page, page);
/* Free the file page */
btr_page_free(tree, page, mtr);
@ -1971,6 +2046,8 @@ btr_lift_page_up(
ibuf_reset_free_bits(index, father_page);
ut_ad(page_validate(father_page, index));
ut_ad(btr_check_node_ptr(tree, father_page, mtr));
return(TRUE);
}
/*****************************************************************
@ -1981,12 +2058,12 @@ conditions, looks at the right brother. If the page is the only one on that
level lifts the records of the page to the father page, thus reducing the
tree height. It is assumed that mtr holds an x-latch on the tree and on the
page. If cursor is on the leaf level, mtr must also hold x-latches to the
brothers, if they exist. NOTE: it is assumed that the caller has reserved
enough free extents so that the compression will always succeed if done! */
brothers, if they exist. */
void
ibool
btr_compress(
/*=========*/
/* out: TRUE on success */
btr_cur_t* cursor, /* in: cursor on the page to merge or lift;
the page must not be empty: in record delete
use btr_discard_page if the page would become
@ -2001,20 +2078,16 @@ btr_compress(
page_t* father_page;
ibool is_left;
page_t* page;
rec_t* orig_pred;
rec_t* orig_succ;
rec_t* node_ptr;
ulint data_size;
ulint n_recs;
ulint max_ins_size;
ulint max_ins_size_reorg;
ulint level;
ulint comp;
page = btr_cur_get_page(cursor);
tree = btr_cur_get_tree(cursor);
comp = page_is_comp(page);
ut_a((ibool)!!comp == cursor->index->table->comp);
ut_a((ibool)!!page_is_comp(page) == cursor->index->table->comp);
ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree),
MTR_MEMO_X_LOCK));
@ -2030,34 +2103,33 @@ btr_compress(
right_page_no); */
node_ptr = btr_page_get_father_node_ptr(tree, page, mtr);
ut_ad(!comp || rec_get_status(node_ptr) == REC_STATUS_NODE_PTR);
ut_ad(!page_is_comp(page)
|| rec_get_status(node_ptr) == REC_STATUS_NODE_PTR);
father_page = buf_frame_align(node_ptr);
ut_a(comp == page_is_comp(father_page));
ut_a(page_is_comp(page) == page_is_comp(father_page));
/* Decide the page to which we try to merge and which will inherit
the locks */
if (left_page_no != FIL_NULL) {
is_left = left_page_no != FIL_NULL;
if (is_left) {
is_left = TRUE;
merge_page = btr_page_get(space, left_page_no, RW_X_LATCH,
mtr);
} else if (right_page_no != FIL_NULL) {
is_left = FALSE;
merge_page = btr_page_get(space, right_page_no, RW_X_LATCH,
mtr);
} else {
/* The page is the only one on the level, lift the records
to the father */
btr_lift_page_up(tree, page, mtr);
return;
return(btr_lift_page_up(tree, page, mtr));
}
n_recs = page_get_n_recs(page);
data_size = page_get_data_size(page);
ut_a(page_is_comp(merge_page) == comp);
ut_a(page_is_comp(merge_page) == page_is_comp(page));
max_ins_size_reorg = page_get_max_insert_size_after_reorganize(
merge_page, n_recs);
@ -2065,14 +2137,14 @@ btr_compress(
/* No space for merge */
return;
return(FALSE);
}
ut_ad(page_validate(merge_page, cursor->index));
max_ins_size = page_get_max_insert_size(merge_page, n_recs);
if (data_size > max_ins_size) {
if (UNIV_UNLIKELY(data_size > max_ins_size)) {
/* We have to reorganize merge_page */
@ -2083,13 +2155,14 @@ btr_compress(
ut_ad(page_validate(merge_page, cursor->index));
ut_ad(page_get_max_insert_size(merge_page, n_recs)
== max_ins_size_reorg);
}
if (data_size > max_ins_size) {
if (UNIV_UNLIKELY(data_size > max_ins_size)) {
/* Add fault tolerance, though this should never happen */
/* Add fault tolerance, though this should
never happen */
return;
return(FALSE);
}
}
btr_search_drop_page_hash_index(page);
@ -2118,17 +2191,27 @@ btr_compress(
/* Move records to the merge page */
if (is_left) {
orig_pred = page_rec_get_prev(
rec_t* orig_pred = page_rec_get_prev(
page_get_supremum_rec(merge_page));
page_copy_rec_list_start(merge_page, page,
page_get_supremum_rec(page), cursor->index, mtr);
if (UNIV_UNLIKELY(!page_copy_rec_list_start(
merge_page, buf_block_get_page_zip(
buf_block_align(merge_page)),
page_get_supremum_rec(page),
cursor->index, mtr))) {
return(FALSE);
}
lock_update_merge_left(merge_page, orig_pred, page);
} else {
orig_succ = page_rec_get_next(
rec_t* orig_succ = page_rec_get_next(
page_get_infimum_rec(merge_page));
page_copy_rec_list_end(merge_page, page,
page_get_infimum_rec(page), cursor->index, mtr);
if (UNIV_UNLIKELY(!page_copy_rec_list_end(
merge_page, buf_block_get_page_zip(
buf_block_align(merge_page)),
page_get_infimum_rec(page),
cursor->index, mtr))) {
return(FALSE);
}
lock_update_merge_right(orig_succ, page);
}
@ -2143,6 +2226,7 @@ btr_compress(
btr_page_free(tree, page, mtr);
ut_ad(btr_check_node_ptr(tree, merge_page, mtr));
return(TRUE);
}
/*****************************************************************
@ -2155,7 +2239,6 @@ btr_discard_only_page_on_level(
page_t* page, /* in: page which is the only on its level */
mtr_t* mtr) /* in: mtr */
{
rec_t* node_ptr;
page_t* father_page;
ulint page_level;
@ -2165,8 +2248,8 @@ btr_discard_only_page_on_level(
MTR_MEMO_PAGE_X_FIX));
btr_search_drop_page_hash_index(page);
node_ptr = btr_page_get_father_node_ptr(tree, page, mtr);
father_page = buf_frame_align(node_ptr);
father_page = buf_frame_align(
btr_page_get_father_node_ptr(tree, page, mtr));
page_level = btr_page_get_level(page, mtr);
@ -2177,10 +2260,13 @@ btr_discard_only_page_on_level(
/* Free the file page */
btr_page_free(tree, page, mtr);
if (buf_frame_get_page_no(father_page) == dict_tree_get_page(tree)) {
if (UNIV_UNLIKELY(buf_frame_get_page_no(father_page)
== dict_tree_get_page(tree))) {
/* The father is the root page */
btr_page_empty(father_page, mtr);
btr_page_empty(father_page,
buf_block_get_page_zip(buf_block_align(father_page)),
mtr);
/* We play safe and reset the free bits for the father */
ibuf_reset_free_bits(UT_LIST_GET_FIRST(tree->tree_indexes),
@ -2209,7 +2295,6 @@ btr_discard_page(
ulint left_page_no;
ulint right_page_no;
page_t* merge_page;
ibool is_left;
page_t* page;
rec_t* node_ptr;
@ -2229,11 +2314,9 @@ btr_discard_page(
right_page_no = btr_page_get_next(page, mtr);
if (left_page_no != FIL_NULL) {
is_left = TRUE;
merge_page = btr_page_get(space, left_page_no, RW_X_LATCH,
mtr);
} else if (right_page_no != FIL_NULL) {
is_left = FALSE;
merge_page = btr_page_get(space, right_page_no, RW_X_LATCH,
mtr);
} else {
@ -2249,12 +2332,21 @@ btr_discard_page(
/* We have to mark the leftmost node pointer on the right
side page as the predefined minimum record */
page_zip_des_t* merge_page_zip;
merge_page_zip = buf_block_get_page_zip(
buf_block_align(merge_page));
if (UNIV_LIKELY_NULL(merge_page_zip)
&& UNIV_UNLIKELY(!page_zip_alloc(
merge_page_zip, merge_page, 5))) {
ut_error; /* TODO: handle this gracefully */
}
node_ptr = page_rec_get_next(page_get_infimum_rec(merge_page));
ut_ad(page_rec_is_user_rec(node_ptr));
btr_set_min_rec_mark(node_ptr, page_is_comp(merge_page), mtr);
btr_set_min_rec_mark(node_ptr, merge_page_zip, mtr);
}
btr_node_ptr_delete(tree, page, mtr);
@ -2262,7 +2354,7 @@ btr_discard_page(
/* Remove the page from the level list */
btr_level_list_remove(tree, page, mtr);
if (is_left) {
if (left_page_no != FIL_NULL) {
lock_update_discard(page_get_supremum_rec(merge_page), page);
} else {
lock_update_discard(page_rec_get_next(

View file

@ -24,6 +24,7 @@ Created 10/16/1994 Heikki Tuuri
#endif
#include "page0page.h"
#include "page0zip.h"
#include "rem0rec.h"
#include "rem0cmp.h"
#include "btr0btr.h"
@ -115,6 +116,35 @@ btr_rec_get_externally_stored_len(
rec_t* rec, /* in: record */
const ulint* offsets);/* in: array returned by rec_get_offsets() */
/**********************************************************
The following function is used to set the deleted bit of a record. */
UNIV_INLINE
ibool
btr_rec_set_deleted_flag(
/*=====================*/
/* out: TRUE on success;
FALSE on page_zip overflow */
rec_t* rec, /* in/out: physical record */
page_zip_des_t* page_zip,/* in/out: compressed page (or NULL) */
ulint flag) /* in: nonzero if delete marked */
{
if (page_rec_is_comp(rec)) {
if (UNIV_LIKELY_NULL(page_zip)
&& UNIV_UNLIKELY(!page_zip_alloc(page_zip,
ut_align_down(rec, UNIV_PAGE_SIZE), 5))) {
rec_set_deleted_flag_new(rec, NULL, flag);
return(FALSE);
}
rec_set_deleted_flag_new(rec, page_zip, flag);
} else {
ut_ad(!page_zip);
rec_set_deleted_flag_old(rec, flag);
}
return(TRUE);
}
/*==================== B-TREE SEARCH =========================*/
/************************************************************************
@ -405,19 +435,6 @@ btr_cur_search_to_nth_level(
/* Loop and search until we arrive at the desired level */
for (;;) {
if ((height == 0) && (latch_mode <= BTR_MODIFY_LEAF)) {
rw_latch = latch_mode;
if (insert_planned && ibuf_should_try(index,
ignore_sec_unique)) {
/* Try insert to the insert buffer if the
page is not in the buffer pool */
buf_mode = BUF_GET_IF_IN_POOL;
}
}
retry_page_get:
page = buf_page_get_gen(space, page_no, rw_latch, guess,
buf_mode,
@ -460,7 +477,7 @@ retry_page_get:
ut_ad(0 == ut_dulint_cmp(tree->id,
btr_page_get_index_id(page)));
if (height == ULINT_UNDEFINED) {
if (UNIV_UNLIKELY(height == ULINT_UNDEFINED)) {
/* We are in the root node */
height = btr_page_get_level(page, mtr);
@ -522,6 +539,21 @@ retry_page_get:
ut_ad(height > 0);
height--;
if ((height == 0) && (latch_mode <= BTR_MODIFY_LEAF)) {
rw_latch = latch_mode;
if (insert_planned && ibuf_should_try(index,
ignore_sec_unique)) {
/* Try insert to the insert buffer if the
page is not in the buffer pool */
buf_mode = BUF_GET_IF_IN_POOL;
}
}
guess = NULL;
node_ptr = page_cur_get_rec(page_cursor);
@ -788,6 +820,7 @@ btr_cur_insert_if_possible(
else NULL */
btr_cur_t* cursor, /* in: cursor on page after which to insert;
cursor stays valid */
page_zip_des_t* page_zip,/* in: compressed page of cursor */
dtuple_t* tuple, /* in: tuple to insert; the size info need not
have been stored to tuple */
ibool* reorg, /* out: TRUE if reorganization occurred */
@ -808,9 +841,10 @@ btr_cur_insert_if_possible(
page_cursor = btr_cur_get_page_cur(cursor);
/* Now, try the insert */
rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr);
rec = page_cur_tuple_insert(page_cursor, page_zip,
tuple, cursor->index, mtr);
if (!rec) {
if (UNIV_UNLIKELY(!rec)) {
/* If record did not fit, reorganize */
btr_page_reorganize(page, cursor->index, mtr);
@ -820,8 +854,8 @@ btr_cur_insert_if_possible(
page_cur_search(page, cursor->index, tuple,
PAGE_CUR_LE, page_cursor);
rec = page_cur_tuple_insert(page_cursor, tuple,
cursor->index, mtr);
rec = page_cur_tuple_insert(page_cursor, page_zip,
tuple, cursor->index, mtr);
}
return(rec);
@ -935,6 +969,7 @@ btr_cur_optimistic_insert(
dict_index_t* index;
page_cur_t* page_cursor;
page_t* page;
page_zip_des_t* page_zip;
ulint max_size;
rec_t* dummy_rec;
ulint level;
@ -1033,9 +1068,10 @@ calculate_sizes_again:
reorg = FALSE;
/* Now, try the insert */
page_zip = buf_block_get_page_zip(buf_block_align(page));
*rec = page_cur_insert_rec_low(page_cursor, entry, index,
NULL, NULL, mtr);
*rec = page_cur_insert_rec_low(page_cursor, page_zip,
entry, index, NULL, NULL, mtr);
if (UNIV_UNLIKELY(!(*rec))) {
/* If the record did not fit, reorganize */
btr_page_reorganize(page, index, mtr);
@ -1046,9 +1082,15 @@ calculate_sizes_again:
page_cur_search(page, index, entry, PAGE_CUR_LE, page_cursor);
*rec = page_cur_tuple_insert(page_cursor, entry, index, mtr);
*rec = page_cur_tuple_insert(page_cursor, page_zip,
entry, index, mtr);
if (UNIV_UNLIKELY(!*rec)) {
if (UNIV_LIKELY_NULL(page_zip)) {
/* Likely a compressed page overflow */
return(DB_FAIL);
}
fputs("InnoDB: Error: cannot insert tuple ", stderr);
dtuple_print(stderr, entry);
fputs(" into ", stderr);
@ -1343,7 +1385,8 @@ btr_cur_parse_update_in_place(
/* out: end of log record or NULL */
byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */
page_t* page, /* in: page or NULL */
page_t* page, /* in/out: page or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
dict_index_t* index) /* in: index corresponding to page */
{
ulint flags;
@ -1399,12 +1442,19 @@ btr_cur_parse_update_in_place(
offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
if (!(flags & BTR_KEEP_SYS_FLAG)) {
row_upd_rec_sys_fields_in_recovery(rec, offsets,
row_upd_rec_sys_fields_in_recovery(rec, page_zip, offsets,
pos, trx_id, roll_ptr);
}
row_upd_rec_in_place(rec, offsets, update);
if (UNIV_LIKELY_NULL(page_zip)) {
btr_cur_unmark_extern_fields(rec, NULL, offsets);
page_zip_write(page_zip, rec - rec_offs_extra_size(offsets),
rec_offs_size(offsets));
}
func_exit:
mem_heap_free(heap);
@ -1431,6 +1481,7 @@ btr_cur_update_in_place(
{
dict_index_t* index;
buf_block_t* block;
page_zip_des_t* page_zip;
ulint err;
rec_t* rec;
dulint roll_ptr = ut_dulint_zero;
@ -1465,8 +1516,12 @@ btr_cur_update_in_place(
}
block = buf_block_align(rec);
ut_ad(!!page_is_comp(buf_block_get_frame(block))
== index->table->comp);
page_zip = buf_block_get_page_zip(block);
if (UNIV_UNLIKELY(!page_zip_alloc(page_zip, buf_block_get_frame(block),
4 + rec_offs_size(offsets)))) {
return(DB_OVERFLOW);
}
if (block->is_hashed) {
/* The function row_upd_changes_ord_field_binary works only
@ -1484,7 +1539,8 @@ btr_cur_update_in_place(
}
if (!(flags & BTR_KEEP_SYS_FLAG)) {
row_upd_rec_sys_fields(rec, index, offsets, trx, roll_ptr);
row_upd_rec_sys_fields(rec, NULL,
index, offsets, trx, roll_ptr);
}
/* FIXME: in a mixed tree, all records may not have enough ordering
@ -1506,9 +1562,22 @@ btr_cur_update_in_place(
/* The new updated record owns its possible externally
stored fields */
if (UNIV_LIKELY_NULL(page_zip)) {
/* Do not log the btr_cur_unmark_extern_fields()
if the page is compressed. Do the operation in
crash recovery of MLOG_COMP_REC_UPDATE_IN_PLACE
in that case. */
mtr = NULL;
}
btr_cur_unmark_extern_fields(rec, mtr, offsets);
}
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write(page_zip, rec - rec_offs_extra_size(offsets),
rec_offs_size(offsets));
}
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
@ -1543,7 +1612,10 @@ btr_cur_optimistic_update(
page_cur_t* page_cursor;
ulint err;
page_t* page;
page_zip_des_t* page_zip;
page_zip_des_t* page_zip_used;
rec_t* rec;
rec_t* orig_rec;
ulint max_size;
ulint new_rec_size;
ulint old_rec_size;
@ -1556,7 +1628,7 @@ btr_cur_optimistic_update(
ulint* offsets;
page = btr_cur_get_page(cursor);
rec = btr_cur_get_rec(cursor);
orig_rec = rec = btr_cur_get_rec(cursor);
index = cursor->index;
ut_ad(!!page_rec_is_comp(rec) == index->table->comp);
@ -1663,7 +1735,18 @@ btr_cur_optimistic_update(
btr_search_update_hash_on_delete(cursor);
page_cur_delete_rec(page_cursor, index, offsets, mtr);
page_zip = buf_block_get_page_zip(buf_block_align(page));
if (UNIV_LIKELY(!page_zip)
|| UNIV_UNLIKELY(!page_zip_available(page_zip, 32))) {
/* If there is not enough space in the page
modification log, ignore the log and
try compressing the page afterwards. */
page_zip_used = NULL;
} else {
page_zip_used = page_zip;
}
page_cur_delete_rec(page_cursor, index, offsets, page_zip_used, mtr);
page_cur_move_to_prev(page_cursor);
@ -1676,7 +1759,8 @@ btr_cur_optimistic_update(
trx->id);
}
rec = btr_cur_insert_if_possible(cursor, new_entry, &reorganized, mtr);
rec = btr_cur_insert_if_possible(cursor, page_zip_used,
new_entry, &reorganized, mtr);
ut_a(rec); /* <- We calculated above the insert would fit */
@ -1689,6 +1773,22 @@ btr_cur_optimistic_update(
btr_cur_unmark_extern_fields(rec, mtr, offsets);
}
if (UNIV_LIKELY_NULL(page_zip) && UNIV_UNLIKELY(!page_zip_used)) {
if (!page_zip_compress(page_zip, page)) {
if (UNIV_UNLIKELY(!page_zip_decompress(
page_zip, page, mtr))) {
ut_error;
}
/* TODO: is this correct? */
lock_rec_restore_from_page_infimum(orig_rec, page);
mem_heap_free(heap);
return(DB_OVERFLOW);
}
}
/* Restore the old explicit lock state on the record */
lock_rec_restore_from_page_infimum(rec, page);
@ -1768,6 +1868,7 @@ btr_cur_pessimistic_update(
big_rec_t* dummy_big_rec;
dict_index_t* index;
page_t* page;
page_zip_des_t* page_zip;
dict_tree_t* tree;
rec_t* rec;
page_cur_t* page_cursor;
@ -1790,6 +1891,7 @@ btr_cur_pessimistic_update(
*big_rec = NULL;
page = btr_cur_get_page(cursor);
page_zip = buf_block_get_page_zip(buf_block_align(page));
rec = btr_cur_get_rec(cursor);
index = cursor->index;
tree = index->tree;
@ -1906,11 +2008,11 @@ btr_cur_pessimistic_update(
btr_search_update_hash_on_delete(cursor);
page_cur_delete_rec(page_cursor, index, offsets, mtr);
page_cur_delete_rec(page_cursor, index, offsets, page_zip, mtr);
page_cur_move_to_prev(page_cursor);
rec = btr_cur_insert_if_possible(cursor, new_entry,
rec = btr_cur_insert_if_possible(cursor, page_zip, new_entry,
&dummy_reorganized, mtr);
ut_a(rec || optim_err != DB_UNDERFLOW);
@ -2045,8 +2147,9 @@ btr_cur_parse_del_mark_set_clust_rec(
/* out: end of log record or NULL */
byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */
dict_index_t* index, /* in: index corresponding to page */
page_t* page) /* in: page or NULL */
page_t* page, /* in/out: page or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
dict_index_t* index) /* in: index corresponding to page */
{
ulint flags;
ulint val;
@ -2087,13 +2190,25 @@ btr_cur_parse_del_mark_set_clust_rec(
if (page) {
rec = page + offset;
/* We do not need to reserve btr_search_latch, as the page
is only being recovered, and there cannot be a hash index to
it. */
if (UNIV_UNLIKELY(!btr_rec_set_deleted_flag(rec,
page_zip, val))) {
/* page_zip overflow should have been detected
before writing MLOG_COMP_REC_CLUST_DELETE_MARK */
ut_error;
}
if (!(flags & BTR_KEEP_SYS_FLAG)) {
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
row_upd_rec_sys_fields_in_recovery(rec,
/* TODO: page_zip_write(whole record)? */
row_upd_rec_sys_fields_in_recovery(rec, page_zip,
rec_get_offsets(rec, index, offsets_,
ULINT_UNDEFINED, &heap),
pos, trx_id, roll_ptr);
@ -2101,12 +2216,6 @@ btr_cur_parse_del_mark_set_clust_rec(
mem_heap_free(heap);
}
}
/* We do not need to reserve btr_search_latch, as the page
is only being recovered, and there cannot be a hash index to
it. */
rec_set_deleted_flag(rec, page_is_comp(page), val);
}
return(ptr);
@ -2134,6 +2243,7 @@ btr_cur_del_mark_set_clust_rec(
dulint roll_ptr;
ulint err;
rec_t* rec;
page_zip_des_t* page_zip;
trx_t* trx;
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
@ -2155,15 +2265,28 @@ btr_cur_del_mark_set_clust_rec(
ut_ad(index->type & DICT_CLUSTERED);
ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
page_zip = buf_block_get_page_zip(buf_block_align(rec));
if (UNIV_LIKELY_NULL(page_zip)) {
ulint size = 5;
if (!(flags & BTR_KEEP_SYS_FLAG)) {
size += 21;/* row_upd_rec_sys_fields() */
}
if (UNIV_UNLIKELY(!page_zip_alloc(page_zip,
ut_align_down(rec, UNIV_PAGE_SIZE), size))) {
err = DB_OVERFLOW;
goto func_exit;
}
}
err = lock_clust_rec_modify_check_and_lock(flags,
rec, index, offsets, thr);
if (err != DB_SUCCESS) {
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
return(err);
goto func_exit;
}
err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr,
@ -2171,10 +2294,7 @@ btr_cur_del_mark_set_clust_rec(
&roll_ptr);
if (err != DB_SUCCESS) {
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
return(err);
goto func_exit;
}
block = buf_block_align(rec);
@ -2183,12 +2303,13 @@ btr_cur_del_mark_set_clust_rec(
rw_lock_x_lock(&btr_search_latch);
}
rec_set_deleted_flag(rec, rec_offs_comp(offsets), val);
btr_rec_set_deleted_flag(rec, page_zip, val);
trx = thr_get_trx(thr);
if (!(flags & BTR_KEEP_SYS_FLAG)) {
row_upd_rec_sys_fields(rec, index, offsets, trx, roll_ptr);
row_upd_rec_sys_fields(rec, page_zip,
index, offsets, trx, roll_ptr);
}
if (block->is_hashed) {
@ -2197,10 +2318,12 @@ btr_cur_del_mark_set_clust_rec(
btr_cur_del_mark_set_clust_rec_log(flags, rec, index, val, trx,
roll_ptr, mtr);
func_exit:
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
return(DB_SUCCESS);
return(err);
}
/********************************************************************
@ -2246,7 +2369,8 @@ btr_cur_parse_del_mark_set_sec_rec(
/* out: end of log record or NULL */
byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */
page_t* page) /* in: page or NULL */
page_t* page, /* in/out: page or NULL */
page_zip_des_t* page_zip)/* in/out: compressed page, or NULL */
{
ulint val;
ulint offset;
@ -2272,7 +2396,10 @@ btr_cur_parse_del_mark_set_sec_rec(
is only being recovered, and there cannot be a hash index to
it. */
rec_set_deleted_flag(rec, page_is_comp(page), val);
if (!btr_rec_set_deleted_flag(rec, page_zip, val)) {
/* page_zip overflow should have been detected
before writing MLOG_COMP_REC_SEC_DELETE_MARK */
}
}
return(ptr);
@ -2293,6 +2420,7 @@ btr_cur_del_mark_set_sec_rec(
mtr_t* mtr) /* in: mtr */
{
buf_block_t* block;
page_zip_des_t* page_zip;
rec_t* rec;
ulint err;
@ -2316,13 +2444,15 @@ btr_cur_del_mark_set_sec_rec(
block = buf_block_align(rec);
ut_ad(!!page_is_comp(buf_block_get_frame(block))
== cursor->index->table->comp);
page_zip = buf_block_get_page_zip(block);
if (block->is_hashed) {
rw_lock_x_lock(&btr_search_latch);
}
rec_set_deleted_flag(rec, page_is_comp(buf_block_get_frame(block)),
val);
if (!btr_rec_set_deleted_flag(rec, page_zip, val)) {
ut_error; /* TODO */
}
if (block->is_hashed) {
rw_lock_x_unlock(&btr_search_latch);
@ -2344,40 +2474,16 @@ btr_cur_del_unmark_for_ibuf(
mtr_t* mtr) /* in: mtr */
{
/* We do not need to reserve btr_search_latch, as the page has just
been read to the buffer pool and there cannot be a hash index to it. */
rec_set_deleted_flag(rec, page_is_comp(buf_frame_align(rec)), FALSE);
btr_rec_set_deleted_flag(rec, NULL, FALSE);
btr_cur_del_mark_set_sec_rec_log(rec, FALSE, mtr);
}
/*==================== B-TREE RECORD REMOVE =========================*/
/*****************************************************************
Tries to compress a page of the tree on the leaf level. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. To avoid
deadlocks, mtr must also own x-latches to brothers of page, if those
brothers exist. NOTE: it is assumed that the caller has reserved enough
free extents so that the compression will always succeed if done! */
void
btr_cur_compress(
/*=============*/
btr_cur_t* cursor, /* in: cursor on the page to compress;
cursor does not stay valid */
mtr_t* mtr) /* in: mtr */
{
ut_ad(mtr_memo_contains(mtr,
dict_tree_get_lock(btr_cur_get_tree(cursor)),
MTR_MEMO_X_LOCK));
ut_ad(mtr_memo_contains(mtr, buf_block_align(
btr_cur_get_page(cursor)),
MTR_MEMO_PAGE_X_FIX));
ut_ad(btr_page_get_level(btr_cur_get_page(cursor), mtr) == 0);
btr_compress(cursor, mtr);
}
/*****************************************************************
Tries to compress a page of the tree if it seems useful. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. To avoid
@ -2403,9 +2509,7 @@ btr_cur_compress_if_useful(
if (btr_cur_compress_recommendation(cursor, mtr)) {
btr_compress(cursor, mtr);
return(TRUE);
return(btr_compress(cursor, mtr));
}
return(FALSE);
@ -2454,17 +2558,41 @@ btr_cur_optimistic_delete(
if (no_compress_needed) {
page_zip_des_t* page_zip;
page_zip_des_t* page_zip_used;
lock_update_delete(rec);
btr_search_update_hash_on_delete(cursor);
max_ins_size = page_get_max_insert_size_after_reorganize(page,
1);
page_zip = buf_block_get_page_zip(
buf_block_align(btr_cur_get_page(cursor)));
if (UNIV_LIKELY(!page_zip)
|| UNIV_UNLIKELY(!page_zip_available(page_zip, 32))) {
/* If there is not enough space in the page
modification log, ignore the log and
try compressing the page afterwards. */
page_zip_used = NULL;
} else {
page_zip_used = page_zip;
}
page_cur_delete_rec(btr_cur_get_page_cur(cursor),
cursor->index, offsets, mtr);
cursor->index, offsets,
page_zip_used, mtr);
ibuf_update_free_bits_low(cursor->index, page, max_ins_size,
mtr);
if (UNIV_LIKELY_NULL(page_zip)
&& UNIV_UNLIKELY(!page_zip_used)) {
/* Reorganize the page to ensure that the
compression succeeds after deleting the record. */
btr_page_reorganize(page, cursor->index, mtr);
}
}
if (UNIV_LIKELY_NULL(heap)) {
@ -2503,6 +2631,8 @@ btr_cur_pessimistic_delete(
mtr_t* mtr) /* in: mtr */
{
page_t* page;
page_zip_des_t* page_zip;
page_zip_des_t* page_zip_used;
dict_tree_t* tree;
rec_t* rec;
dtuple_t* node_ptr;
@ -2546,7 +2676,7 @@ btr_cur_pessimistic_delete(
/* Free externally stored fields if the record is neither
a node pointer nor in two-byte format.
This avoids an unnecessary loop. */
This condition avoids an unnecessary loop. */
if (page_is_comp(page)
? !rec_get_node_ptr_flag(rec)
: !rec_get_1byte_offs_flag(rec)) {
@ -2569,6 +2699,14 @@ btr_cur_pessimistic_delete(
goto return_after_reservations;
}
page_zip = buf_block_get_page_zip(buf_block_align(page));
if (UNIV_LIKELY(!page_zip)
|| UNIV_UNLIKELY(!page_zip_available(page_zip, 32))) {
page_zip_used = NULL;
} else {
page_zip_used = page_zip;
}
lock_update_delete(rec);
level = btr_page_get_level(page, mtr);
@ -2584,8 +2722,13 @@ btr_cur_pessimistic_delete(
non-leaf level, we must mark the new leftmost node
pointer as the predefined minimum record */
btr_set_min_rec_mark(next_rec, page_is_comp(page),
mtr);
if (UNIV_LIKELY_NULL(page_zip_used)
&& UNIV_UNLIKELY(!page_zip_available(
page_zip_used, 5 + 32))) {
page_zip_used = NULL;
}
btr_set_min_rec_mark(next_rec, page_zip_used, mtr);
} else {
/* Otherwise, if we delete the leftmost node pointer
on a page, we have to change the father node pointer
@ -2607,10 +2750,16 @@ btr_cur_pessimistic_delete(
btr_search_update_hash_on_delete(cursor);
page_cur_delete_rec(btr_cur_get_page_cur(cursor), cursor->index,
offsets, mtr);
offsets, page_zip_used, mtr);
ut_ad(btr_check_node_ptr(tree, page, mtr));
if (UNIV_LIKELY_NULL(page_zip) && UNIV_UNLIKELY(!page_zip_used)) {
/* Reorganize the page to ensure that the
compression succeeds after deleting the record. */
btr_page_reorganize(page, cursor->index, mtr);
}
*err = DB_SUCCESS;
return_after_reservations:
@ -3038,7 +3187,7 @@ btr_cur_set_ownership_of_extern_field(
const ulint* offsets,/* in: array returned by rec_get_offsets() */
ulint i, /* in: field number */
ibool val, /* in: value to set */
mtr_t* mtr) /* in: mtr */
mtr_t* mtr) /* in: mtr, or NULL if not logged */
{
byte* data;
ulint local_len;
@ -3057,9 +3206,13 @@ btr_cur_set_ownership_of_extern_field(
} else {
byte_val = byte_val | BTR_EXTERN_OWNER_FLAG;
}
mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, byte_val,
if (UNIV_LIKELY(mtr != NULL)) {
mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, byte_val,
MLOG_1BYTE, mtr);
} else {
mach_write_to_1(data + local_len + BTR_EXTERN_LEN, byte_val);
}
}
/***********************************************************************
@ -3074,9 +3227,8 @@ btr_cur_mark_extern_inherited_fields(
rec_t* rec, /* in: record in a clustered index */
const ulint* offsets,/* in: array returned by rec_get_offsets() */
upd_t* update, /* in: update vector */
mtr_t* mtr) /* in: mtr */
mtr_t* mtr) /* in: mtr, or NULL if not logged */
{
ibool is_updated;
ulint n;
ulint j;
ulint i;
@ -3089,22 +3241,22 @@ btr_cur_mark_extern_inherited_fields(
if (rec_offs_nth_extern(offsets, i)) {
/* Check it is not in updated fields */
is_updated = FALSE;
if (update) {
for (j = 0; j < upd_get_n_fields(update);
j++) {
if (upd_get_nth_field(update, j)
->field_no == i) {
is_updated = TRUE;
goto updated;
}
}
}
if (!is_updated) {
btr_cur_set_ownership_of_extern_field(rec,
offsets, i, FALSE, mtr);
}
btr_cur_set_ownership_of_extern_field(rec,
offsets, i, FALSE, mtr);
updated:
;
}
}
}
@ -3176,7 +3328,7 @@ void
btr_cur_unmark_extern_fields(
/*=========================*/
rec_t* rec, /* in: record in a clustered index */
mtr_t* mtr, /* in: mtr */
mtr_t* mtr, /* in: mtr, or NULL if not logged */
const ulint* offsets)/* in: array returned by rec_get_offsets() */
{
ulint n;
@ -3188,8 +3340,8 @@ btr_cur_unmark_extern_fields(
for (i = 0; i < n; i++) {
if (rec_offs_nth_extern(offsets, i)) {
btr_cur_set_ownership_of_extern_field(rec, offsets, i,
TRUE, mtr);
btr_cur_set_ownership_of_extern_field(rec,
offsets, i, TRUE, mtr);
}
}
}
@ -3468,7 +3620,7 @@ btr_store_big_rec_extern_fields(
rec_set_nth_field_extern_bit(rec, index,
big_rec_vec->fields[i].field_no,
TRUE, &mtr);
&mtr);
}
prev_page_no = page_no;

View file

@ -37,6 +37,7 @@ Created 11/5/1995 Heikki Tuuri
#include "log0log.h"
#include "trx0undo.h"
#include "srv0srv.h"
#include "page0zip.h"
/*
IMPLEMENTATION OF THE BUFFER POOL
@ -482,6 +483,8 @@ buf_block_init(
block->n_pointers = 0;
page_zip_des_init(&block->page_zip);
rw_lock_create(&(block->lock));
ut_ad(rw_lock_validate(&(block->lock)));

View file

@ -264,9 +264,11 @@ buf_flush_buffered_writes(void)
"InnoDB: before posting to the doublewrite buffer.\n");
}
if (block->check_index_page_at_flush
&& !page_simple_validate(block->frame)) {
if (!block->check_index_page_at_flush) {
} else if (page_is_comp(block->frame)
&& UNIV_UNLIKELY(!page_simple_validate_new(
block->frame))) {
corrupted_page:
buf_page_print(block->frame);
ut_print_timestamp(stderr);
@ -278,6 +280,10 @@ buf_flush_buffered_writes(void)
(ulong) block->offset, (ulong) block->space);
ut_error;
} else if (UNIV_UNLIKELY(!page_simple_validate_old(
block->frame))) {
goto corrupted_page;
}
}

View file

@ -2761,6 +2761,7 @@ ibuf_insert(
ut_ad(dtuple_check_typed(entry));
ut_a(!(index->type & DICT_CLUSTERED));
ut_a(!index->table->zip);
if (rec_get_converted_size(index, entry)
>= page_get_free_space_of_empty(index->table->comp) / 2) {
@ -2846,9 +2847,10 @@ ibuf_insert_to_index_page(
btr_cur_del_unmark_for_ibuf(rec, mtr);
} else {
rec = page_cur_tuple_insert(&page_cur, entry, index, mtr);
rec = page_cur_tuple_insert(&page_cur, NULL,
entry, index, mtr);
if (rec == NULL) {
if (UNIV_UNLIKELY(rec == NULL)) {
/* If the record did not fit, reorganize */
btr_page_reorganize(page, index, mtr);
@ -2858,7 +2860,8 @@ ibuf_insert_to_index_page(
/* This time the record must fit */
if (UNIV_UNLIKELY(!page_cur_tuple_insert(
&page_cur, entry, index, mtr))) {
&page_cur, NULL,
entry, index, mtr))) {
ut_print_timestamp(stderr);

View file

@ -265,9 +265,10 @@ Sets a record as the predefined minimum record. */
void
btr_set_min_rec_mark(
/*=================*/
rec_t* rec, /* in: record */
ulint comp, /* in: nonzero=compact page format */
mtr_t* mtr); /* in: mtr */
rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Deletes on the upper level the node pointer to a page. */
@ -295,11 +296,12 @@ conditions, looks at the right brother. If the page is the only one on that
level lifts the records of the page to the father page, thus reducing the
tree height. It is assumed that mtr holds an x-latch on the tree and on the
page. If cursor is on the leaf level, mtr must also hold x-latches to
the brothers, if they exist. NOTE: it is assumed that the caller has reserved
enough free extents so that the compression will always succeed if done! */
void
the brothers, if they exist. */
ibool
btr_compress(
/*=========*/
/* out: TRUE on success */
btr_cur_t* cursor, /* in: cursor on the page to merge or lift;
the page must not be empty: in record delete
use btr_discard_page if the page would become

View file

@ -286,19 +286,6 @@ btr_cur_del_unmark_for_ibuf(
rec_t* rec, /* in: record to delete unmark */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Tries to compress a page of the tree on the leaf level. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. To avoid
deadlocks, mtr must also own x-latches to brothers of page, if those
brothers exist. NOTE: it is assumed that the caller has reserved enough
free extents so that the compression will always succeed if done! */
void
btr_cur_compress(
/*=============*/
btr_cur_t* cursor, /* in: cursor on the page to compress;
cursor does not stay valid */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Tries to compress a page of the tree if it seems useful. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. To avoid
deadlocks, mtr must also own x-latches to brothers of page, if those
@ -364,7 +351,8 @@ btr_cur_parse_update_in_place(
/* out: end of log record or NULL */
byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */
page_t* page, /* in: page or NULL */
page_t* page, /* in/out: page or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
dict_index_t* index); /* in: index corresponding to page */
/********************************************************************
Parses the redo log record for delete marking or unmarking of a clustered
@ -376,8 +364,9 @@ btr_cur_parse_del_mark_set_clust_rec(
/* out: end of log record or NULL */
byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */
dict_index_t* index, /* in: index corresponding to page */
page_t* page); /* in: page or NULL */
page_t* page, /* in/out: page or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
dict_index_t* index); /* in: index corresponding to page */
/********************************************************************
Parses the redo log record for delete marking or unmarking of a secondary
index record. */
@ -388,7 +377,8 @@ btr_cur_parse_del_mark_set_sec_rec(
/* out: end of log record or NULL */
byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */
page_t* page); /* in: page or NULL */
page_t* page, /* in/out: page or NULL */
page_zip_des_t* page_zip);/* in/out: compressed page, or NULL */
/***********************************************************************
Estimates the number of rows in a given index range. */

View file

@ -31,6 +31,7 @@ Created 11/5/1995 Heikki Tuuri
#include "hash0hash.h"
#include "ut0byte.h"
#include "os0proc.h"
#include "page0types.h"
/* Flags for flush types */
#define BUF_FLUSH_LRU 1
@ -612,6 +613,14 @@ buf_block_get_page_no(
/*==================*/
/* out: page number */
buf_block_t* block); /* in: pointer to the control block */
/*************************************************************************
Gets the compressed page descriptor of a block if applicable. */
UNIV_INLINE
page_zip_des_t*
buf_block_get_page_zip(
/*===================*/
/* out: compressed page descriptor, or NULL */
buf_block_t* block); /* in: pointer to the control block */
/***********************************************************************
Gets the block to whose frame the pointer is pointing to. */
UNIV_INLINE
@ -850,6 +859,7 @@ struct buf_block_struct{
ulint curr_side; /* BTR_SEARCH_LEFT_SIDE or
BTR_SEARCH_RIGHT_SIDE in hash
indexing */
page_zip_des_t page_zip; /* compressed page info */
/* 6. Debug fields */
#ifdef UNIV_SYNC_DEBUG
rw_lock_t debug_latch; /* in the debug version, each thread

View file

@ -191,6 +191,24 @@ buf_block_get_page_no(
return(block->offset);
}
/*************************************************************************
Gets the compressed page descriptor of a block if applicable. */
UNIV_INLINE
page_zip_des_t*
buf_block_get_page_zip(
/*===================*/
/* out: compressed page descriptor, or NULL */
buf_block_t* block) /* in: pointer to the control block */
{
ut_ad(block);
if (UNIV_LIKELY_NULL(block->page_zip.data)) {
return(&block->page_zip);
}
return(NULL);
}
/***********************************************************************
Gets the block to whose frame the pointer is pointing to. */
UNIV_INLINE
@ -614,8 +632,6 @@ buf_page_release(
RW_NO_LATCH */
mtr_t* mtr) /* in: mtr */
{
ulint buf_fix_count;
ut_ad(block);
mutex_enter_fast(&(buf_pool->mutex));
@ -631,8 +647,7 @@ buf_page_release(
#ifdef UNIV_SYNC_DEBUG
rw_lock_s_unlock(&(block->debug_latch));
#endif
buf_fix_count = block->buf_fix_count;
block->buf_fix_count = buf_fix_count - 1;
block->buf_fix_count--;
mutex_exit(&(buf_pool->mutex));

View file

@ -329,6 +329,7 @@ struct dict_table_struct{
user calls DISCARD TABLESPACE on this table,
and reset to FALSE in IMPORT TABLESPACE */
ibool comp; /* flag: TRUE=compact page format */
ibool zip; /* flag: TRUE=compressed page format */
hash_node_t name_hash; /* hash chain node */
hash_node_t id_hash; /* hash chain node */
ulint n_def; /* number of columns defined so far */

View file

@ -88,6 +88,7 @@ ibuf_should_try(
decide */
{
if (!(index->type & DICT_CLUSTERED)
&& !index->table->zip
&& (ignore_sec_unique || !(index->type & DICT_UNIQUE))
&& ibuf->meter > IBUF_THRESHOLD) {

View file

@ -129,8 +129,11 @@ flag value must give the length also! */
/* copy compact record list end
to a new created index page */
#define MLOG_COMP_PAGE_REORGANIZE ((byte)46) /* reorganize an index page */
#define MLOG_COMP_DECOMPRESS ((byte)47) /* decompress a page
to undo a compressed page
overflow */
#define MLOG_BIGGEST_TYPE ((byte)46) /* biggest value (used in
#define MLOG_BIGGEST_TYPE ((byte)47) /* biggest value (used in
asserts) */
/*******************************************************************

View file

@ -130,6 +130,8 @@ page_cur_tuple_insert(
/* out: pointer to record if succeed, NULL
otherwise */
page_cur_t* cursor, /* in: a page cursor */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
25 + rec_size bytes available, or NULL */
dtuple_t* tuple, /* in: pointer to a data tuple */
dict_index_t* index, /* in: record descriptor */
mtr_t* mtr); /* in: mini-transaction handle */
@ -144,6 +146,8 @@ page_cur_rec_insert(
/* out: pointer to record if succeed, NULL
otherwise */
page_cur_t* cursor, /* in: a page cursor */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
25 + rec_size bytes available, or NULL */
rec_t* rec, /* in: record to insert */
dict_index_t* index, /* in: record descriptor */
ulint* offsets,/* in: rec_get_offsets(rec, index) */
@ -160,6 +164,8 @@ page_cur_insert_rec_low(
/* out: pointer to record if succeed, NULL
otherwise */
page_cur_t* cursor, /* in: a page cursor */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
25 + rec_size bytes available, or NULL */
dtuple_t* tuple, /* in: pointer to a data tuple or NULL */
dict_index_t* index, /* in: record descriptor */
rec_t* rec, /* in: pointer to a physical record or NULL */
@ -173,7 +179,6 @@ void
page_copy_rec_list_end_to_created_page(
/*===================================*/
page_t* new_page, /* in: index page to copy to */
page_t* page, /* in: index page */
rec_t* rec, /* in: first record to copy */
dict_index_t* index, /* in: record descriptor */
mtr_t* mtr); /* in: mtr */
@ -184,9 +189,11 @@ next record after the deleted one. */
void
page_cur_delete_rec(
/*================*/
page_cur_t* cursor, /* in: a page cursor */
page_cur_t* cursor, /* in/out: a page cursor */
dict_index_t* index, /* in: record descriptor */
const ulint* offsets,/* in: rec_get_offsets(cursor->rec, index) */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
32 bytes available, or NULL */
mtr_t* mtr); /* in: mini-transaction handle */
/********************************************************************
Searches the right position for a page cursor. */
@ -245,7 +252,9 @@ page_cur_parse_insert_rec(
byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */
dict_index_t* index, /* in: record descriptor */
page_t* page, /* in: page or NULL */
page_t* page, /* in/out: page or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
25 + rec_size bytes available, or NULL */
mtr_t* mtr); /* in: mtr or NULL */
/**************************************************************
Parses a log record of copying a record list end to a new created page. */
@ -257,7 +266,8 @@ page_parse_copy_rec_list_to_created_page(
byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */
dict_index_t* index, /* in: record descriptor */
page_t* page, /* in: page or NULL */
page_t* page, /* in/out: page or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page or NULL */
mtr_t* mtr); /* in: mtr or NULL */
/***************************************************************
Parses log record of a record delete on a page. */
@ -269,7 +279,9 @@ page_cur_parse_delete_rec(
byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */
dict_index_t* index, /* in: record descriptor */
page_t* page, /* in: page or NULL */
page_t* page, /* in/out: page or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
32 bytes available, or NULL */
mtr_t* mtr); /* in: mtr or NULL */
/* Index page cursor */

View file

@ -181,11 +181,14 @@ page_cur_tuple_insert(
/* out: pointer to record if succeed, NULL
otherwise */
page_cur_t* cursor, /* in: a page cursor */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
25 + rec_size bytes available, or NULL */
dtuple_t* tuple, /* in: pointer to a data tuple */
dict_index_t* index, /* in: record descriptor */
mtr_t* mtr) /* in: mini-transaction handle */
{
return(page_cur_insert_rec_low(cursor, tuple, index, NULL, NULL, mtr));
return(page_cur_insert_rec_low(cursor, page_zip, tuple,
index, NULL, NULL, mtr));
}
/***************************************************************
@ -199,12 +202,14 @@ page_cur_rec_insert(
/* out: pointer to record if succeed, NULL
otherwise */
page_cur_t* cursor, /* in: a page cursor */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
25 + rec_size bytes available, or NULL */
rec_t* rec, /* in: record to insert */
dict_index_t* index, /* in: record descriptor */
ulint* offsets,/* in: rec_get_offsets(rec, index) */
mtr_t* mtr) /* in: mini-transaction handle */
{
return(page_cur_insert_rec_low(cursor, NULL, index, rec,
offsets, mtr));
return(page_cur_insert_rec_low(cursor, page_zip, NULL,
index, rec, offsets, mtr));
}

View file

@ -145,7 +145,7 @@ Sets the max trx id field value. */
void
page_set_max_trx_id(
/*================*/
page_t* page, /* in: page */
page_t* page, /* in/out: page */
dulint trx_id);/* in: transaction id */
/*****************************************************************
Sets the max trx id field value if trx_id is bigger than the previous
@ -154,8 +154,8 @@ UNIV_INLINE
void
page_update_max_trx_id(
/*===================*/
page_t* page, /* in: page */
dulint trx_id); /* in: transaction id */
page_t* page, /* in/out: page */
dulint trx_id);/* in: transaction id */
/*****************************************************************
Reads the given header field. */
UNIV_INLINE
@ -170,9 +170,10 @@ UNIV_INLINE
void
page_header_set_field(
/*==================*/
page_t* page, /* in: page */
ulint field, /* in: PAGE_N_DIR_SLOTS, ... */
ulint val); /* in: value */
page_t* page, /* in/out: page */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
ulint field, /* in: PAGE_N_DIR_SLOTS, ... */
ulint val); /* in: value */
/*****************************************************************
Returns the pointer stored in the given header field. */
UNIV_INLINE
@ -188,9 +189,10 @@ UNIV_INLINE
void
page_header_set_ptr(
/*================*/
page_t* page, /* in: page */
ulint field, /* in: PAGE_FREE, ... */
byte* ptr); /* in: pointer or NULL*/
page_t* page, /* in/out: page */
page_zip_des_t* page_zip,/* in: compressed page, or NULL */
ulint field, /* in/out: PAGE_FREE, ... */
const byte* ptr); /* in: pointer or NULL*/
/*****************************************************************
Resets the last insert info field in the page header. Writes to mlog
about this operation. */
@ -283,8 +285,9 @@ UNIV_INLINE
void
page_dir_set_n_heap(
/*================*/
page_t* page, /* in: index page */
ulint n_heap);/* in: number of records */
page_t* page, /* in/out: index page */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
ulint n_heap);/* in: number of records */
/*****************************************************************
Gets the number of dir slots in directory. */
UNIV_INLINE
@ -299,9 +302,9 @@ UNIV_INLINE
void
page_dir_set_n_slots(
/*=================*/
/* out: number of slots */
page_t* page, /* in: index page */
ulint n_slots);/* in: number of slots */
page_t* page, /* in/out: page */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
ulint n_slots);/* in: number of slots */
/*****************************************************************
Gets pointer to nth directory slot. */
UNIV_INLINE
@ -349,9 +352,10 @@ UNIV_INLINE
void
page_dir_slot_set_n_owned(
/*======================*/
page_dir_slot_t* slot, /* in: directory slot */
ulint n); /* in: number of records owned
by the slot */
page_dir_slot_t*slot, /* in/out: directory slot */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
ulint n); /* in: number of records owned by the slot */
/****************************************************************
Calculates the space reserved for directory slots of a given
number of records. The exact value is a fraction number
@ -402,10 +406,12 @@ UNIV_INLINE
void
page_rec_set_next(
/*==============*/
rec_t* rec, /* in: pointer to record, must not be
page supremum */
rec_t* next); /* in: pointer to next record, must not
be page infimum */
rec_t* rec, /* in: pointer to record,
must not be page supremum */
rec_t* next, /* in: pointer to next record,
must not be page infimum */
page_zip_des_t* page_zip);/* in/out: compressed page with at least
6 bytes available, or NULL */
/****************************************************************
Gets the pointer to the previous record. */
UNIV_INLINE
@ -513,6 +519,16 @@ page_get_free_space_of_empty(
/* out: free space */
ulint comp) /* in: nonzero=compact page format */
__attribute__((const));
/**************************************************************
Returns the base extra size of a physical record. This is the
size of the fixed header, independent of the record size. */
UNIV_INLINE
ulint
page_rec_get_base_extra_size(
/*=========================*/
/* out: REC_N_NEW_EXTRA_BYTES
or REC_N_OLD_EXTRA_BYTES */
const rec_t* rec); /* in: physical record */
/****************************************************************
Returns the sum of the sizes of the records in the record list
excluding the infimum and supremum records. */
@ -530,7 +546,8 @@ page_mem_alloc(
/*===========*/
/* out: pointer to start of allocated
buffer, or NULL if allocation fails */
page_t* page, /* in: index page */
page_t* page, /* in/out: index page */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
ulint need, /* in: number of bytes needed */
dict_index_t* index, /* in: record descriptor */
ulint* heap_no);/* out: this contains the heap number
@ -542,7 +559,9 @@ UNIV_INLINE
void
page_mem_free(
/*==========*/
page_t* page, /* in: index page */
page_t* page, /* in/out: index page */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
6 bytes available, or NULL */
rec_t* rec, /* in: pointer to the (origin of) record */
const ulint* offsets);/* in: array returned by rec_get_offsets() */
/**************************************************************
@ -554,17 +573,17 @@ page_create(
/* out: pointer to the page */
buf_frame_t* frame, /* in: a buffer frame where the page is
created */
page_zip_des_t* page_zip, /* in/out: compressed page, or NULL */
mtr_t* mtr, /* in: mini-transaction handle */
ulint comp); /* in: nonzero=compact page format */
/*****************************************************************
Differs from page_copy_rec_list_end, because this function does not
touch the lock table and max trx id on page. */
touch the lock table and max trx id on page or compress the page. */
void
page_copy_rec_list_end_no_locks(
/*============================*/
page_t* new_page, /* in: index page to copy to */
page_t* page, /* in: index page */
rec_t* rec, /* in: record on page */
dict_index_t* index, /* in: record descriptor */
mtr_t* mtr); /* in: mtr */
@ -573,27 +592,31 @@ Copies records from page to new_page, from the given record onward,
including that record. Infimum and supremum records are not copied.
The records are copied to the start of the record list on new_page. */
void
ibool
page_copy_rec_list_end(
/*===================*/
page_t* new_page, /* in: index page to copy to */
page_t* page, /* in: index page */
/* out: TRUE on success */
page_t* new_page, /* in/out: index page to copy to */
page_zip_des_t* new_page_zip, /* in/out: compressed page, or NULL */
rec_t* rec, /* in: record on page */
dict_index_t* index, /* in: record descriptor */
mtr_t* mtr); /* in: mtr */
mtr_t* mtr) /* in: mtr */
__attribute__((warn_unused_result, nonnull(1, 3, 4, 5)));
/*****************************************************************
Copies records from page to new_page, up to the given record, NOT
including that record. Infimum and supremum records are not copied.
The records are copied to the end of the record list on new_page. */
void
ibool
page_copy_rec_list_start(
/*=====================*/
page_t* new_page, /* in: index page to copy to */
page_t* page, /* in: index page */
/* out: TRUE on success */
page_t* new_page, /* in/out: index page to copy to */
page_zip_des_t* new_page_zip, /* in/out: compressed page, or NULL */
rec_t* rec, /* in: record on page */
dict_index_t* index, /* in: record descriptor */
mtr_t* mtr); /* in: mtr */
mtr_t* mtr) /* in: mtr */
__attribute__((warn_unused_result, nonnull(1, 3, 4, 5)));
/*****************************************************************
Deletes records from a page from a given record onward, including that record.
The infimum and supremum records are not deleted. */
@ -601,26 +624,16 @@ The infimum and supremum records are not deleted. */
void
page_delete_rec_list_end(
/*=====================*/
page_t* page, /* in: index page */
rec_t* rec, /* in: record on page */
rec_t* rec, /* in: pointer to record on page */
dict_index_t* index, /* in: record descriptor */
ulint n_recs, /* in: number of records to delete,
or ULINT_UNDEFINED if not known */
ulint size, /* in: the sum of the sizes of the
records in the end of the chain to
delete, or ULINT_UNDEFINED if not known */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Deletes records from page, up to the given record, NOT including
that record. Infimum and supremum records are not deleted. */
void
page_delete_rec_list_start(
/*=======================*/
page_t* page, /* in: index page */
rec_t* rec, /* in: record on page */
dict_index_t* index, /* in: record descriptor */
mtr_t* mtr); /* in: mtr */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
mtr_t* mtr) /* in: mtr */
__attribute__((nonnull(1, 2, 6)));
/*****************************************************************
Moves record list end to another page. Moved records include
split_rec. */
@ -629,30 +642,25 @@ void
page_move_rec_list_end(
/*===================*/
page_t* new_page, /* in: index page where to move */
page_t* page, /* in: index page */
page_zip_des_t* new_page_zip, /* in/out: compressed page of
new_page, or NULL */
rec_t* split_rec, /* in: first record to move */
page_zip_des_t* page_zip, /* in/out: compressed page of
split_rec, or NULL */
dict_index_t* index, /* in: record descriptor */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Moves record list start to another page. Moved records do not include
split_rec. */
void
page_move_rec_list_start(
/*=====================*/
page_t* new_page, /* in: index page where to move */
page_t* page, /* in: index page */
rec_t* split_rec, /* in: first record not to move */
dict_index_t* index, /* in: record descriptor */
mtr_t* mtr); /* in: mtr */
mtr_t* mtr) /* in: mtr */
__attribute__((nonnull(1, 3, 5, 6)));
/********************************************************************
Splits a directory slot which owns too many records. */
void
page_dir_split_slot(
/*================*/
page_t* page, /* in: the index page in question */
ulint slot_no); /* in: the directory slot */
page_t* page, /* in: index page */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 10 bytes available, or NULL */
ulint slot_no)/* in: the directory slot */
__attribute__((nonnull(1)));
/*****************************************************************
Tries to balance the given directory slot with too few records
with the upper neighbor, so that there are at least the minimum number
@ -662,8 +670,11 @@ two slots. */
void
page_dir_balance_slot(
/*==================*/
page_t* page, /* in: index page */
ulint slot_no); /* in: the directory slot */
page_t* page, /* in/out: index page */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 15 bytes available, or NULL */
ulint slot_no)/* in: the directory slot */
__attribute__((nonnull(1)));
/**************************************************************
Parses a log record of a record list end or start deletion. */
@ -766,10 +777,20 @@ know the index. This is also resilient so that this should never crash
even if the page is total garbage. */
ibool
page_simple_validate(
/*=================*/
page_simple_validate_old(
/*=====================*/
/* out: TRUE if ok */
page_t* page); /* in: index page */
page_t* page); /* in: old-style index page */
/*******************************************************************
This function checks the consistency of an index page when we do not
know the index. This is also resilient so that this should never crash
even if the page is total garbage. */
ibool
page_simple_validate_new(
/*=====================*/
/* out: TRUE if ok */
page_t* page); /* in: new-style index page */
/*******************************************************************
This function checks the consistency of an index page. */

View file

@ -35,7 +35,7 @@ UNIV_INLINE
void
page_update_max_trx_id(
/*===================*/
page_t* page, /* in: page */
page_t* page, /* in/out: page */
dulint trx_id) /* in: transaction id */
{
ut_ad(page);
@ -67,9 +67,10 @@ UNIV_INLINE
void
page_header_set_field(
/*==================*/
page_t* page, /* in: page */
ulint field, /* in: PAGE_LEVEL, ... */
ulint val) /* in: value */
page_t* page, /* in/out: page */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
ulint field, /* in: PAGE_N_DIR_SLOTS, ... */
ulint val) /* in: value */
{
ut_ad(page);
ut_ad(field <= PAGE_N_RECS);
@ -77,6 +78,9 @@ page_header_set_field(
ut_ad(field != PAGE_N_HEAP || (val & 0x7fff) < UNIV_PAGE_SIZE);
mach_write_to_2(page + PAGE_HEADER + field, val);
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write_header(page_zip, page + PAGE_HEADER + field, 2);
}
}
/*****************************************************************
@ -114,9 +118,10 @@ UNIV_INLINE
void
page_header_set_ptr(
/*================*/
page_t* page, /* in: page */
ulint field, /* in: PAGE_FREE, ... */
byte* ptr) /* in: pointer or NULL*/
page_t* page, /* in: page */
page_zip_des_t* page_zip,/* in: compressed page, or NULL */
ulint field, /* in: PAGE_FREE, ... */
const byte* ptr) /* in: pointer or NULL*/
{
ulint offs;
@ -133,7 +138,7 @@ page_header_set_ptr(
ut_ad((field != PAGE_HEAP_TOP) || offs);
page_header_set_field(page, field, offs);
page_header_set_field(page, page_zip, field, offs);
}
/*****************************************************************
@ -413,11 +418,11 @@ UNIV_INLINE
void
page_dir_set_n_slots(
/*=================*/
/* out: number of slots */
page_t* page, /* in: index page */
ulint n_slots)/* in: number of slots */
page_t* page, /* in/out: page */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
ulint n_slots)/* in: number of slots */
{
page_header_set_field(page, PAGE_N_DIR_SLOTS, n_slots);
page_header_set_field(page, page_zip, PAGE_N_DIR_SLOTS, n_slots);
}
/*****************************************************************
@ -438,12 +443,13 @@ UNIV_INLINE
void
page_dir_set_n_heap(
/*================*/
page_t* page, /* in: index page */
ulint n_heap) /* in: number of records */
page_t* page, /* in/out: index page */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
ulint n_heap) /* in: number of records */
{
ut_ad(n_heap < 0x8000);
page_header_set_field(page, PAGE_N_HEAP, n_heap | (0x8000 &
page_header_set_field(page, page_zip, PAGE_N_HEAP, n_heap | (0x8000 &
page_header_get_field(page, PAGE_N_HEAP)));
}
@ -520,7 +526,11 @@ page_dir_slot_get_n_owned(
page_dir_slot_t* slot) /* in: page directory slot */
{
rec_t* rec = page_dir_slot_get_rec(slot);
return(rec_get_n_owned(rec, page_rec_is_comp(rec)));
if (page_rec_is_comp(slot)) {
return(rec_get_n_owned_new(rec));
} else {
return(rec_get_n_owned_old(rec));
}
}
/*******************************************************************
@ -529,12 +539,18 @@ UNIV_INLINE
void
page_dir_slot_set_n_owned(
/*======================*/
page_dir_slot_t* slot, /* in: directory slot */
ulint n) /* in: number of records owned
by the slot */
page_dir_slot_t*slot, /* in/out: directory slot */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
ulint n) /* in: number of records owned by the slot */
{
rec_t* rec = page_dir_slot_get_rec(slot);
rec_set_n_owned(rec, page_rec_is_comp(rec), n);
if (page_rec_is_comp(slot)) {
rec_set_n_owned_new(rec, page_zip, n);
} else {
ut_ad(!page_zip);
rec_set_n_owned_old(rec, n);
}
}
/****************************************************************
@ -597,26 +613,29 @@ UNIV_INLINE
void
page_rec_set_next(
/*==============*/
rec_t* rec, /* in: pointer to record, must not be page supremum */
rec_t* next) /* in: pointer to next record, must not be page
infimum */
rec_t* rec, /* in: pointer to record,
must not be page supremum */
rec_t* next, /* in: pointer to next record,
must not be page infimum */
page_zip_des_t* page_zip) /* in/out: compressed page with
at least 6 bytes available, or NULL */
{
page_t* page;
ulint offs;
ut_ad(page_rec_check(rec));
ut_ad(!page_rec_is_supremum(rec));
page = ut_align_down(rec, UNIV_PAGE_SIZE);
if (next) {
ut_ad(!page_rec_is_infimum(next));
ut_ad(page == ut_align_down(next, UNIV_PAGE_SIZE));
offs = (ulint) (next - page);
ut_ad(!next || !page_rec_is_infimum(next));
ut_ad(!next || ut_align_down(rec, UNIV_PAGE_SIZE)
== ut_align_down(next, UNIV_PAGE_SIZE));
offs = ut_align_offset(next, UNIV_PAGE_SIZE);
if (page_rec_is_comp(rec)) {
rec_set_next_offs_new(rec, page_zip, offs);
} else {
offs = 0;
rec_set_next_offs_old(rec, offs);
ut_ad(!page_zip);
}
rec_set_next_offs(rec, page_is_comp(page), offs);
}
/****************************************************************
@ -671,11 +690,11 @@ page_rec_find_owner_rec(
ut_ad(page_rec_check(rec));
if (page_rec_is_comp(rec)) {
while (rec_get_n_owned(rec, TRUE) == 0) {
while (rec_get_n_owned_new(rec) == 0) {
rec = page_rec_get_next(rec);
}
} else {
while (rec_get_n_owned(rec, FALSE) == 0) {
while (rec_get_n_owned_old(rec) == 0) {
rec = page_rec_get_next(rec);
}
}
@ -683,6 +702,23 @@ page_rec_find_owner_rec(
return(rec);
}
/**************************************************************
Returns the base extra size of a physical record. This is the
size of the fixed header, independent of the record size. */
UNIV_INLINE
ulint
page_rec_get_base_extra_size(
/*=========================*/
/* out: REC_N_NEW_EXTRA_BYTES
or REC_N_OLD_EXTRA_BYTES */
const rec_t* rec) /* in: physical record */
{
#if REC_N_NEW_EXTRA_BYTES + 1 != REC_N_OLD_EXTRA_BYTES
# error "REC_N_NEW_EXTRA_BYTES + 1 != REC_N_OLD_EXTRA_BYTES"
#endif
return(REC_N_NEW_EXTRA_BYTES + (ulint) !page_rec_is_comp(rec));
}
/****************************************************************
Returns the sum of the sizes of the records in the record list, excluding
the infimum and supremum records. */
@ -805,7 +841,9 @@ UNIV_INLINE
void
page_mem_free(
/*==========*/
page_t* page, /* in: index page */
page_t* page, /* in/out: index page */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
6 bytes available, or NULL */
rec_t* rec, /* in: pointer to the (origin of) record */
const ulint* offsets)/* in: array returned by rec_get_offsets() */
{
@ -816,8 +854,8 @@ page_mem_free(
ut_ad(!rec_offs_comp(offsets) == !page_rec_is_comp(rec));
free = page_header_get_ptr(page, PAGE_FREE);
page_rec_set_next(rec, free);
page_header_set_ptr(page, PAGE_FREE, rec);
page_rec_set_next(rec, free, page_zip);
page_header_set_ptr(page, page_zip, PAGE_FREE, rec);
#if 0 /* It's better not to destroy the user's data. */
@ -827,11 +865,18 @@ page_mem_free(
cannot be cleared, because page_mem_alloc() needs them in order
to determine the size of the deleted record. */
memset(rec, 0, rec_offs_data_size(offsets));
/* If you enable this code, make sure that the callers of
page_mem_free() account for the increased usage of space. */
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write(page_zip, page, rec, rec - page,
rec_offs_data_size(offsets));
}
#endif
garbage = page_header_get_field(page, PAGE_GARBAGE);
page_header_set_field(page, PAGE_GARBAGE,
page_header_set_field(page, page_zip, PAGE_GARBAGE,
garbage + rec_offs_size(offsets));
}

View file

@ -18,5 +18,71 @@ typedef byte page_t;
typedef struct page_search_struct page_search_t;
typedef struct page_cur_struct page_cur_t;
typedef byte page_zip_t;
typedef struct page_zip_des_struct page_zip_des_t;
/* The following definitions would better belong to page0zip.h,
but we cannot include page0zip.h from rem0rec.ic, because
page0*.h includes rem0rec.h and may include rem0rec.ic. */
/* Compressed page descriptor */
struct page_zip_des_struct
{
page_zip_t* data; /* compressed page data */
ulint size; /* total size of compressed page */
ulint m_start; /* start offset of modification log */
ulint m_end; /* end offset of modification log */
};
/**************************************************************************
Write data to the compressed page. The data must already be written to
the uncompressed page. */
void
page_zip_write(
/*===========*/
page_zip_des_t* page_zip,/* out: compressed page */
const byte* str, /* in: address on the uncompressed page */
ulint length) /* in: length of the data */
__attribute__((nonnull));
/**************************************************************************
Write data to the uncompressed header portion of a page. The data must
already have been written to the uncompressed page. */
UNIV_INLINE
void
page_zip_write_header(
/*==================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* str, /* in: address on the uncompressed page */
ulint length) /* in: length of the data */
__attribute__((nonnull));
/**************************************************************************
Write data to the uncompressed trailer portion of a page. The data must
already have been written to the uncompressed page. */
UNIV_INLINE
void
page_zip_write_trailer(
/*===================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* str, /* in: address on the uncompressed page */
ulint length) /* in: length of the data */
__attribute__((nonnull));
#ifdef UNIV_DEBUG
/**************************************************************************
Determine if enough space is available in the modification log. */
ibool
page_zip_available_noninline(
/*=========================*/
/* out: TRUE if enough space
is available */
const page_zip_des_t* page_zip,/* in: compressed page */
ulint size)
__attribute__((warn_unused_result, nonnull, pure));
#endif /* UNIV_DEBUG */
#endif

183
include/page0zip.h Normal file
View file

@ -0,0 +1,183 @@
/******************************************************
Compressed page interface
(c) 2005 Innobase Oy
Created June 2005 by Marko Makela
*******************************************************/
#ifndef page0zip_h
#define page0zip_h
#ifdef UNIV_MATERIALIZE
# undef UNIV_INLINE
# define UNIV_INLINE
#endif
#include "mtr0types.h"
#include "page0types.h"
/**************************************************************************
Initialize a compressed page descriptor. */
UNIV_INLINE
void
page_zip_des_init(
/*==============*/
page_zip_des_t* page_zip); /* in/out: compressed page
descriptor */
/**************************************************************************
Compress a page. */
ibool
page_zip_compress(
/*==============*/
/* out: TRUE on success, FALSE on failure;
page_zip will be left intact on failure. */
page_zip_des_t* page_zip,/* out: compressed page */
const page_t* page); /* in: uncompressed page */
/**************************************************************************
Decompress a page. */
ibool
page_zip_decompress(
/*================*/
/* out: TRUE on success, FALSE on failure */
page_zip_des_t* page_zip,/* in: data, size; out: m_start, m_end */
page_t* page, /* out: uncompressed page, may be trashed */
mtr_t* mtr) /* in: mini-transaction handle,
or NULL if no logging is needed */
__attribute__((warn_unused_result, nonnull(1, 2)));
#ifdef UNIV_DEBUG
/**************************************************************************
Validate a compressed page descriptor. */
UNIV_INLINE
ibool
page_zip_simple_validate(
/*=====================*/
/* out: TRUE if ok */
const page_zip_des_t* page_zip); /* in: compressed page
descriptor */
/**************************************************************************
Check that the compressed and decompressed pages match. */
ibool
page_zip_validate(
/*==============*/
const page_zip_des_t* page_zip,/* in: compressed page */
const page_t* page); /* in: uncompressed page */
#endif /* UNIV_DEBUG */
/**************************************************************************
Determine the encoded length of an integer in the modification log. */
UNIV_INLINE
ulint
page_zip_ulint_size(
/*================*/
/* out: length of the integer, in bytes */
ulint num) /* in: the integer */
__attribute__((const));
/**************************************************************************
Determine the size of a modification log entry. */
UNIV_INLINE
ulint
page_zip_entry_size(
/*================*/
/* out: length of the log entry, in bytes */
ulint pos, /* in: offset of the uncompressed page */
ulint length) /* in: length of the data */
__attribute__((const));
/**************************************************************************
Ensure that enough space is available in the modification log.
If not, try to compress the page. */
UNIV_INLINE
ibool
page_zip_alloc(
/*===========*/
/* out: TRUE if enough space is available */
page_zip_des_t* page_zip,/* in/out: compressed page;
will only be modified if compression is needed
and successful */
const page_t* page, /* in: uncompressed page */
ulint size) /* in: size of modification log entries */
__attribute__((nonnull));
/**************************************************************************
Determine if enough space is available in the modification log. */
UNIV_INLINE
ibool
page_zip_available(
/*===============*/
/* out: TRUE if enough space
is available */
const page_zip_des_t* page_zip,/* in: compressed page */
ulint size) /* in: requested size of
modification log entries */
__attribute__((warn_unused_result, nonnull, pure));
#ifdef UNIV_DEBUG
/**************************************************************************
Determine if enough space is available in the modification log. */
ibool
page_zip_available_noninline(
/*=========================*/
/* out: TRUE if enough space
is available */
const page_zip_des_t* page_zip,/* in: compressed page */
ulint size)
__attribute__((warn_unused_result, nonnull, pure));
#endif /* UNIV_DEBUG */
/**************************************************************************
Write data to the compressed portion of a page. The data must already
have been written to the uncompressed page. */
void
page_zip_write(
/*===========*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* str, /* in: address on the uncompressed page */
ulint length) /* in: length of the data */
__attribute__((nonnull));
/**************************************************************************
Write data to the uncompressed header portion of a page. The data must
already have been written to the uncompressed page. */
UNIV_INLINE
void
page_zip_write_header(
/*==================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* str, /* in: address on the uncompressed page */
ulint length) /* in: length of the data */
__attribute__((nonnull));
/**************************************************************************
Write data to the uncompressed trailer portion of a page. The data must
already have been written to the uncompressed page. */
UNIV_INLINE
void
page_zip_write_trailer(
/*===================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* str, /* in: address on the uncompressed page */
ulint length) /* in: length of the data */
__attribute__((nonnull));
#ifdef UNIV_MATERIALIZE
# undef UNIV_INLINE
# define UNIV_INLINE UNIV_INLINE_ORIGINAL
#endif
#ifndef UNIV_NONINL
# include "page0zip.ic"
#endif
#endif /* page0zip_h */

224
include/page0zip.ic Normal file
View file

@ -0,0 +1,224 @@
/******************************************************
Compressed page interface
(c) 2005 Innobase Oy
Created June 2005 by Marko Makela
*******************************************************/
#ifdef UNIV_MATERIALIZE
# undef UNIV_INLINE
# define UNIV_INLINE
#endif
#include "page0zip.h"
#include "page0page.h"
/* The format of compressed pages is as follows.
The header and trailer of the uncompressed pages, including the page
directory in the trailer, are copied as is to the header and trailer
of the compressed page. Immediately preceding the page trailer,
we store a 32-bit checksum of the compressed data.
The data between PAGE_DATA and the last page directory entry
will be written in compressed format, starting at offset PAGE_DATA.
The compressed data stream may be followed by a modification log
covering the compressed portion of the page, as follows.
MODIFICATION LOG ENTRY FORMAT
- length (1..2 bytes), not zero
- offset - PAGE_DATA (1..2 bytes)
- data bytes
The length and the offset are stored in a variable-length format:
- 0xxxxxxxx : 0..127
- 10xxxxxxx xxxxxxxx: 0..16383
- 11xxxxxxx xxxxxxxx: reserved
The end of the modification log is marked by length=0. */
/**************************************************************************
Initialize a compressed page descriptor. */
UNIV_INLINE
void
page_zip_des_init(
/*==============*/
page_zip_des_t* page_zip) /* in/out: compressed page
descriptor */
{
memset(page_zip, 0, sizeof *page_zip);
}
/**************************************************************************
Determine the encoded length of an integer in the modification log. */
UNIV_INLINE
ulint
page_zip_ulint_size(
/*================*/
/* out: length of the integer, in bytes */
ulint num) /* in: the integer */
{
if (num < 128) { /* 0xxxxxxx: 0..127 */
return(1);
}
if (num < 16384) { /* 10xxxxxx xxxxxxxx: 0..16383 */
return(2);
}
ut_error;
return(0);
}
/**************************************************************************
Determine the size of a modification log entry. */
UNIV_INLINE
ulint
page_zip_entry_size(
/*================*/
/* out: length of the log entry, in bytes */
ulint pos, /* in: offset of the uncompressed page */
ulint length) /* in: length of the data */
{
ut_ad(pos >= PAGE_DATA);
ut_ad(pos + length <= UNIV_PAGE_SIZE - PAGE_DATA /* - trailer_len */);
return(page_zip_ulint_size(pos - PAGE_DATA)
+ page_zip_ulint_size(length)
+ length);
}
#ifdef UNIV_DEBUG
/**************************************************************************
Validate a compressed page descriptor. */
UNIV_INLINE
ibool
page_zip_simple_validate(
/*=====================*/
/* out: TRUE if ok */
const page_zip_des_t* page_zip)/* in: compressed page descriptor */
{
ut_ad(page_zip);
ut_ad(page_zip->data);
ut_ad(!(page_zip->size & (page_zip->size - 1)));
ut_ad(page_zip->size < UNIV_PAGE_SIZE);
ut_ad(page_zip->size > PAGE_DATA + PAGE_EMPTY_DIR_START);
ut_ad(page_zip->m_start <= page_zip->m_end);
ut_ad(page_zip->m_end < page_zip->size);
return(TRUE);
}
#endif /* UNIV_DEBUG */
/**************************************************************************
Ensure that enough space is available in the modification log.
If not, try to compress the page. */
UNIV_INLINE
ibool
page_zip_alloc(
/*===========*/
/* out: TRUE if enough space is available */
page_zip_des_t* page_zip,/* in/out: compressed page;
will only be modified if compression is needed
and successful */
const page_t* page, /* in: uncompressed page */
ulint size) /* in: size of modification log entries */
{
ulint trailer_len = PAGE_DIR + PAGE_DIR_SLOT_SIZE
* page_dir_get_n_slots((page_t*) page_zip->data);
ut_ad(page_zip_simple_validate(page_zip));
ut_ad(page_zip->m_end + trailer_len < page_zip->size);
ut_ad(size >= 3); /* modification log entries are >= 1+1+1 bytes */
ut_ad(size < page_zip->size);
if (size < page_zip->size - page_zip->m_end - trailer_len) {
return(TRUE);
}
if (page_zip->m_start == page_zip->m_end) {
/* The page has been freshly compressed, so
recompressing it will not help. */
return(FALSE);
}
return(page_zip_compress(page_zip, page));
}
/**************************************************************************
Determine if enough space is available in the modification log. */
UNIV_INLINE
ibool
page_zip_available(
/*===============*/
/* out: TRUE if enough space
is available */
const page_zip_des_t* page_zip,/* in: compressed page */
ulint size) /* in: requested size of
modification log entries */
{
ulint trailer_len = PAGE_DIR + PAGE_DIR_SLOT_SIZE
* page_dir_get_n_slots((page_t*) page_zip->data);
ut_ad(page_zip_simple_validate(page_zip));
ut_ad(page_zip->m_end + trailer_len < page_zip->size);
ut_ad(size >= 3); /* modification log entries are >= 1+1+1 bytes */
ut_ad(size < page_zip->size);
return(UNIV_LIKELY(
size < page_zip->size - page_zip->m_end - trailer_len));
}
/**************************************************************************
Write data to the uncompressed header portion of a page. The data must
already have been written to the uncompressed page. */
UNIV_INLINE
void
page_zip_write_header(
/*==================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* str, /* in: address on the uncompressed page */
ulint length) /* in: length of the data */
{
ulint pos;
ut_ad(buf_block_get_page_zip(buf_block_align((byte*)str)) == page_zip);
ut_ad(page_zip_simple_validate(page_zip));
pos = ut_align_offset(str, UNIV_PAGE_SIZE);
ut_ad(pos < PAGE_DATA);
memcpy(page_zip + pos, str, length);
ut_ad(page_zip_validate(page_zip, str - pos));
}
/**************************************************************************
Write data to the uncompressed trailer portion of a page. The data must
already have been written to the uncompressed page. */
UNIV_INLINE
void
page_zip_write_trailer(
/*===================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* str, /* in: address on the uncompressed page */
ulint length) /* in: length of the data */
{
ulint pos;
ut_ad(buf_block_get_page_zip(buf_block_align((byte*)str)) == page_zip);
ut_ad(page_zip_simple_validate(page_zip));
pos = ut_align_offset(str, UNIV_PAGE_SIZE);
ut_ad(pos < PAGE_DATA);/* TODO */
memcpy(page_zip + pos/* TODO */, str, length);
ut_ad(page_zip_validate(page_zip, str - pos));
}
#ifdef UNIV_MATERIALIZE
# undef UNIV_INLINE
# define UNIV_INLINE UNIV_INLINE_ORIGINAL
#endif

View file

@ -13,11 +13,10 @@ Created 5/30/1994 Heikki Tuuri
#include "data0data.h"
#include "rem0types.h"
#include "mtr0types.h"
#include "page0types.h"
/* Maximum values for various fields (for non-blob tuples) */
#define REC_MAX_N_FIELDS (1024 - 1)
#define REC_MAX_HEAP_NO (2 * 8192 - 1)
#define REC_MAX_N_OWNED (16 - 1)
/* Flag denoting the predefined minimum record: this bit is ORed in the 4
info bits of a record */
@ -41,6 +40,17 @@ offsets[] array, first passed to rec_get_offsets() */
#define REC_OFFS_NORMAL_SIZE 100
#define REC_OFFS_SMALL_SIZE 10
/**********************************************************
The following function is used to get the pointer of the next chained record
on the same page. */
UNIV_INLINE
rec_t*
rec_get_next_ptr(
/*=============*/
/* out: pointer to the next chained record, or
NULL if none */
rec_t* rec, /* in: physical record */
ulint comp); /* in: nonzero=compact page format */
/**********************************************************
The following function is used to get the offset of the
next chained record on the same page. */
@ -54,15 +64,25 @@ rec_get_next_offs(
ulint comp); /* in: nonzero=compact page format */
/**********************************************************
The following function is used to set the next record offset field
of the record. */
of an old-style record. */
UNIV_INLINE
void
rec_set_next_offs(
/*==============*/
rec_t* rec, /* in: physical record */
ulint comp, /* in: nonzero=compact page format */
rec_set_next_offs_old(
/*==================*/
rec_t* rec, /* in: old-style physical record */
ulint next); /* in: offset of the next record */
/**********************************************************
The following function is used to set the next record offset field
of a new-style record. */
UNIV_INLINE
void
rec_set_next_offs_new(
/*==================*/
rec_t* rec, /* in/out: new-style physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
6 bytes available, or NULL */
ulint next); /* in: offset of the next record */
/**********************************************************
The following function is used to get the number of fields
in an old-style record. */
UNIV_INLINE
@ -82,26 +102,44 @@ rec_get_n_fields(
rec_t* rec, /* in: physical record */
dict_index_t* index); /* in: record descriptor */
/**********************************************************
The following function is used to get the number of records
owned by the previous directory record. */
The following function is used to get the number of records owned by the
previous directory record. */
UNIV_INLINE
ulint
rec_get_n_owned(
/*============*/
rec_get_n_owned_old(
/*================*/
/* out: number of owned records */
rec_t* rec, /* in: physical record */
ulint comp); /* in: nonzero=compact page format */
rec_t* rec); /* in: old-style physical record */
/**********************************************************
The following function is used to set the number of owned
records. */
The following function is used to set the number of owned records. */
UNIV_INLINE
void
rec_set_n_owned(
/*============*/
rec_t* rec, /* in: physical record */
ulint comp, /* in: nonzero=compact page format */
rec_set_n_owned_old(
/*================*/
/* out: TRUE on success */
rec_t* rec, /* in: old-style physical record */
ulint n_owned); /* in: the number of owned */
/**********************************************************
The following function is used to get the number of records owned by the
previous directory record. */
UNIV_INLINE
ulint
rec_get_n_owned_new(
/*================*/
/* out: number of owned records */
rec_t* rec); /* in: new-style physical record */
/**********************************************************
The following function is used to set the number of owned records. */
UNIV_INLINE
void
rec_set_n_owned_new(
/*================*/
/* out: TRUE on success */
rec_t* rec, /* in/out: new-style physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
5 bytes available, or NULL */
ulint n_owned);/* in: the number of owned */
/**********************************************************
The following function is used to retrieve the info bits of
a record. */
UNIV_INLINE
@ -115,12 +153,21 @@ rec_get_info_bits(
The following function is used to set the info bits of a record. */
UNIV_INLINE
void
rec_set_info_bits(
/*==============*/
rec_t* rec, /* in: physical record */
ulint comp, /* in: nonzero=compact page format */
rec_set_info_bits_old(
/*==================*/
rec_t* rec, /* in: old-style physical record */
ulint bits); /* in: info bits */
/**********************************************************
The following function is used to set the info bits of a record. */
UNIV_INLINE
void
rec_set_info_bits_new(
/*==================*/
rec_t* rec, /* in/out: new-style physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
ulint bits); /* in: info bits */
/**********************************************************
The following function retrieves the status bits of a new-style record. */
UNIV_INLINE
ulint
@ -135,8 +182,10 @@ UNIV_INLINE
void
rec_set_status(
/*===========*/
rec_t* rec, /* in: physical record */
ulint bits); /* in: info bits */
rec_t* rec, /* in/out: physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
ulint bits); /* in: info bits */
/**********************************************************
The following function is used to retrieve the info and status
@ -155,9 +204,10 @@ UNIV_INLINE
void
rec_set_info_and_status_bits(
/*=========================*/
rec_t* rec, /* in: physical record */
ulint comp, /* in: nonzero=compact page format */
ulint bits); /* in: info bits */
rec_t* rec, /* in/out: compact physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
ulint bits); /* in: info bits */
/**********************************************************
The following function tells if record is delete marked. */
@ -172,40 +222,67 @@ rec_get_deleted_flag(
The following function is used to set the deleted bit. */
UNIV_INLINE
void
rec_set_deleted_flag(
/*=================*/
rec_t* rec, /* in: physical record */
ulint comp, /* in: nonzero=compact page format */
rec_set_deleted_flag_old(
/*=====================*/
rec_t* rec, /* in: old-style physical record */
ulint flag); /* in: nonzero if delete marked */
/**********************************************************
The following function is used to set the deleted bit. */
UNIV_INLINE
void
rec_set_deleted_flag_new(
/*=====================*/
rec_t* rec, /* in/out: new-style physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
ulint flag); /* in: nonzero if delete marked */
/**********************************************************
The following function tells if a new-style record is a node pointer. */
UNIV_INLINE
ibool
rec_get_node_ptr_flag(
/*=================*/
/*==================*/
/* out: TRUE if node pointer */
rec_t* rec); /* in: physical record */
/**********************************************************
The following function is used to get the order number
of the record in the heap of the index page. */
of an old-style record in the heap of the index page. */
UNIV_INLINE
ulint
rec_get_heap_no(
/*=============*/
rec_get_heap_no_old(
/*================*/
/* out: heap order number */
rec_t* rec, /* in: physical record */
ulint comp); /* in: nonzero=compact page format */
rec_t* rec); /* in: physical record */
/**********************************************************
The following function is used to set the heap number
field in the record. */
field in an old-style record. */
UNIV_INLINE
void
rec_set_heap_no(
/*=============*/
rec_set_heap_no_old(
/*================*/
rec_t* rec, /* in: physical record */
ulint comp, /* in: nonzero=compact page format */
ulint heap_no);/* in: the heap number */
/**********************************************************
The following function is used to get the order number
of a new-style record in the heap of the index page. */
UNIV_INLINE
ulint
rec_get_heap_no_new(
/*================*/
/* out: heap order number */
rec_t* rec); /* in: physical record */
/**********************************************************
The following function is used to set the heap number
field in a new-style record. */
UNIV_INLINE
void
rec_set_heap_no_new(
/*================*/
rec_t* rec, /* in/out: physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 6 bytes available, or NULL */
ulint heap_no);/* in: the heap number */
/**********************************************************
The following function is used to test whether the data offsets
in the record are stored in one-byte or two-byte format. */
UNIV_INLINE
@ -340,7 +417,7 @@ rec_offs_any_extern(
/* out: TRUE if a field is stored externally */
const ulint* offsets);/* in: array returned by rec_get_offsets() */
/***************************************************************
Sets the value of the ith field extern storage bit. */
Sets the ith field extern storage bit. */
UNIV_INLINE
void
rec_set_nth_field_extern_bit(
@ -348,7 +425,6 @@ rec_set_nth_field_extern_bit(
rec_t* rec, /* in: record */
dict_index_t* index, /* in: record descriptor */
ulint i, /* in: ith field */
ibool val, /* in: value to set */
mtr_t* mtr); /* in: mtr holding an X-latch to the page
where rec is, or NULL; in the NULL case
we do not write to log about the change */
@ -489,8 +565,8 @@ rec_fold(
in an incomplete last field */
dulint tree_id); /* in: index tree id */
/*************************************************************
Builds a physical record out of a data tuple and stores it beginning from
address destination. */
Builds a physical record out of a data tuple and
stores it into the given buffer. */
rec_t*
rec_convert_dtuple_to_rec(

View file

@ -148,19 +148,18 @@ rec_set_nth_field_sql_null(
ulint n); /* in: index of the field */
/***************************************************************
Sets the value of the ith field extern storage bit of an old-style record. */
Sets the ith field extern storage bit of an old-style record. */
void
rec_set_nth_field_extern_bit_old(
/*=============================*/
rec_t* rec, /* in: old-style record */
ulint i, /* in: ith field */
ibool val, /* in: value to set */
mtr_t* mtr); /* in: mtr holding an X-latch to the page where
rec is, or NULL; in the NULL case we do not
write to log about the change */
/***************************************************************
Sets the value of the ith field extern storage bit of a new-style record. */
Sets the ith field extern storage bit of a new-style record. */
void
rec_set_nth_field_extern_bit_new(
@ -168,7 +167,6 @@ rec_set_nth_field_extern_bit_new(
rec_t* rec, /* in: record */
dict_index_t* index, /* in: record descriptor */
ulint ith, /* in: ith field */
ibool val, /* in: value to set */
mtr_t* mtr); /* in: mtr holding an X-latch to the page
where rec is, or NULL; in the NULL case
we do not write to log about the change */
@ -255,6 +253,55 @@ rec_set_bit_field_2(
| (val << shift));
}
/**********************************************************
The following function is used to get the pointer of the next chained record
on the same page. */
UNIV_INLINE
rec_t*
rec_get_next_ptr(
/*=============*/
/* out: pointer to the next chained record, or
NULL if none */
rec_t* rec, /* in: physical record */
ulint comp) /* in: nonzero=compact page format */
{
ulint field_value;
ut_ad(REC_NEXT_MASK == 0xFFFFUL);
ut_ad(REC_NEXT_SHIFT == 0);
field_value = mach_read_from_2(rec - REC_NEXT);
if (UNIV_UNLIKELY(field_value == 0)) {
return(NULL);
}
if (UNIV_EXPECT(comp, REC_OFFS_COMPACT)) {
#if UNIV_PAGE_SIZE <= 32768
/* Note that for 64 KiB pages, field_value can 'wrap around'
and the debug assertion is not valid */
/* In the following assertion, field_value is interpreted
as signed 16-bit integer in 2's complement arithmetics.
If all platforms defined int16_t in the standard headers,
the expression could be written simpler as
(int16_t) field_value + ut_align_offset(...) < UNIV_PAGE_SIZE
*/
ut_ad((field_value >= 32768
? field_value - 65536
: field_value)
+ ut_align_offset(rec, UNIV_PAGE_SIZE)
< UNIV_PAGE_SIZE);
#endif
return(rec + field_value);
} else {
ut_ad(field_value < UNIV_PAGE_SIZE);
return(ut_align_down(rec, UNIV_PAGE_SIZE) + field_value);
}
}
/**********************************************************
The following function is used to get the offset of the next chained record
on the same page. */
@ -274,7 +321,7 @@ rec_get_next_offs(
field_value = mach_read_from_2(rec - REC_NEXT);
if (comp) {
if (UNIV_EXPECT(comp, REC_OFFS_COMPACT)) {
#if UNIV_PAGE_SIZE <= 32768
/* Note that for 64 KiB pages, field_value can 'wrap around'
and the debug assertion is not valid */
@ -291,7 +338,7 @@ rec_get_next_offs(
+ ut_align_offset(rec, UNIV_PAGE_SIZE)
< UNIV_PAGE_SIZE);
#endif
if (field_value == 0) {
if (UNIV_UNLIKELY(field_value == 0)) {
return(0);
}
@ -305,39 +352,59 @@ rec_get_next_offs(
}
/**********************************************************
The following function is used to set the next record offset field of the
record. */
The following function is used to set the next record offset field
of an old-style record. */
UNIV_INLINE
void
rec_set_next_offs(
/*==============*/
rec_t* rec, /* in: physical record */
ulint comp, /* in: nonzero=compact page format */
ulint next) /* in: offset of the next record, or 0 if none */
rec_set_next_offs_old(
/*==================*/
rec_t* rec, /* in: old-style physical record */
ulint next) /* in: offset of the next record */
{
ut_ad(rec);
ut_ad(UNIV_PAGE_SIZE > next);
ut_ad(REC_NEXT_MASK == 0xFFFFUL);
ut_ad(REC_NEXT_SHIFT == 0);
#if REC_NEXT_MASK != 0xFFFFUL
# error "REC_NEXT_MASK != 0xFFFFUL"
#endif
#if REC_NEXT_SHIFT
# error "REC_NEXT_SHIFT != 0"
#endif
if (comp) {
ulint field_value;
mach_write_to_2(rec - REC_NEXT, next);
}
if (next) {
/* The following two statements calculate
next - offset_of_rec mod 64Ki, where mod is the modulo
as a non-negative number */
field_value = (ulint)((lint)next
- (lint)ut_align_offset(rec, UNIV_PAGE_SIZE));
field_value &= REC_NEXT_MASK;
} else {
field_value = 0;
}
/**********************************************************
The following function is used to set the next record offset field
of a new-style record. */
UNIV_INLINE
void
rec_set_next_offs_new(
/*==================*/
rec_t* rec, /* in/out: new-style physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
6 bytes available, or NULL */
ulint next) /* in: offset of the next record */
{
ut_ad(rec);
ut_ad(UNIV_PAGE_SIZE > next);
mach_write_to_2(rec - REC_NEXT, field_value);
ulint field_value;
if (UNIV_UNLIKELY(!next)) {
field_value = 0;
} else {
mach_write_to_2(rec - REC_NEXT, next);
/* The following two statements calculate
next - offset_of_rec mod 64Ki, where mod is the modulo
as a non-negative number */
field_value = (ulint)((lint)next
- (lint)ut_align_offset(rec, UNIV_PAGE_SIZE));
field_value &= REC_NEXT_MASK;
}
mach_write_to_2(rec - REC_NEXT, field_value);
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write(page_zip, rec - REC_NEXT, 2);
}
}
@ -436,42 +503,62 @@ The following function is used to get the number of records owned by the
previous directory record. */
UNIV_INLINE
ulint
rec_get_n_owned(
/*============*/
rec_get_n_owned_old(
/*================*/
/* out: number of owned records */
rec_t* rec, /* in: physical record */
ulint comp) /* in: nonzero=compact page format */
rec_t* rec) /* in: old-style physical record */
{
ulint ret;
ut_ad(rec);
ret = rec_get_bit_field_1(rec,
comp ? REC_NEW_N_OWNED : REC_OLD_N_OWNED,
REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
ut_ad(ret <= REC_MAX_N_OWNED);
return(ret);
return(rec_get_bit_field_1(rec, REC_OLD_N_OWNED,
REC_N_OWNED_MASK, REC_N_OWNED_SHIFT));
}
/**********************************************************
The following function is used to set the number of owned records. */
UNIV_INLINE
void
rec_set_n_owned(
/*============*/
rec_t* rec, /* in: physical record */
ulint comp, /* in: nonzero=compact page format */
rec_set_n_owned_old(
/*================*/
/* out: TRUE on success */
rec_t* rec, /* in: old-style physical record */
ulint n_owned) /* in: the number of owned */
{
ut_ad(rec);
ut_ad(n_owned <= REC_MAX_N_OWNED);
rec_set_bit_field_1(rec, n_owned,
comp ? REC_NEW_N_OWNED : REC_OLD_N_OWNED,
rec_set_bit_field_1(rec, n_owned, REC_OLD_N_OWNED,
REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
}
/**********************************************************
The following function is used to get the number of records owned by the
previous directory record. */
UNIV_INLINE
ulint
rec_get_n_owned_new(
/*================*/
/* out: number of owned records */
rec_t* rec) /* in: new-style physical record */
{
return(rec_get_bit_field_1(rec, REC_NEW_N_OWNED,
REC_N_OWNED_MASK, REC_N_OWNED_SHIFT));
}
/**********************************************************
The following function is used to set the number of owned records. */
UNIV_INLINE
void
rec_set_n_owned_new(
/*================*/
/* out: TRUE on success */
rec_t* rec, /* in/out: new-style physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
ulint n_owned)/* in: the number of owned */
{
rec_set_bit_field_1(rec, n_owned, REC_NEW_N_OWNED,
REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write(page_zip, rec - REC_NEW_N_OWNED, 1);
}
}
/**********************************************************
The following function is used to retrieve the info bits of a record. */
UNIV_INLINE
@ -482,35 +569,40 @@ rec_get_info_bits(
rec_t* rec, /* in: physical record */
ulint comp) /* in: nonzero=compact page format */
{
ulint ret;
ut_ad(rec);
ret = rec_get_bit_field_1(rec,
return(rec_get_bit_field_1(rec,
comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS,
REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
ut_ad((ret & ~REC_INFO_BITS_MASK) == 0);
return(ret);
REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT));
}
/**********************************************************
The following function is used to set the info bits of a record. */
UNIV_INLINE
void
rec_set_info_bits(
/*==============*/
rec_t* rec, /* in: physical record */
ulint comp, /* in: nonzero=compact page format */
rec_set_info_bits_old(
/*==================*/
rec_t* rec, /* in: old-style physical record */
ulint bits) /* in: info bits */
{
ut_ad(rec);
ut_ad((bits & ~REC_INFO_BITS_MASK) == 0);
rec_set_bit_field_1(rec, bits,
comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS,
rec_set_bit_field_1(rec, bits, REC_OLD_INFO_BITS,
REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
}
/**********************************************************
The following function is used to set the info bits of a record. */
UNIV_INLINE
void
rec_set_info_bits_new(
/*==================*/
rec_t* rec, /* in/out: new-style physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
ulint bits) /* in: info bits */
{
rec_set_bit_field_1(rec, bits, REC_NEW_INFO_BITS,
REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write(page_zip, rec - REC_NEW_INFO_BITS, 1);
}
}
/**********************************************************
The following function is used to set the status bits of a new-style record. */
@ -518,14 +610,16 @@ UNIV_INLINE
void
rec_set_status(
/*===========*/
rec_t* rec, /* in: physical record */
ulint bits) /* in: info bits */
rec_t* rec, /* in/out: physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
ulint bits) /* in: info bits */
{
ut_ad(rec);
ut_ad((bits & ~REC_NEW_STATUS_MASK) == 0);
rec_set_bit_field_1(rec, bits, REC_NEW_STATUS,
REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT);
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write(page_zip, rec - REC_NEW_STATUS, 1);
}
}
/**********************************************************
@ -559,20 +653,17 @@ UNIV_INLINE
void
rec_set_info_and_status_bits(
/*=========================*/
rec_t* rec, /* in: physical record */
ulint comp, /* in: nonzero=compact page format */
ulint bits) /* in: info bits */
rec_t* rec, /* in/out: physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
ulint bits) /* in: info bits */
{
#if (REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT) \
& (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)
# error "REC_NEW_STATUS_MASK and REC_INFO_BITS_MASK overlap"
#endif
if (comp) {
rec_set_status(rec, bits & REC_NEW_STATUS_MASK);
} else {
ut_ad(!(bits & ~(REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)));
}
rec_set_info_bits(rec, comp, bits & ~REC_NEW_STATUS_MASK);
rec_set_status(rec, page_zip, bits & REC_NEW_STATUS_MASK);
rec_set_info_bits_new(rec, page_zip, bits & ~REC_NEW_STATUS_MASK);
}
/**********************************************************
@ -600,15 +691,14 @@ rec_get_deleted_flag(
The following function is used to set the deleted bit. */
UNIV_INLINE
void
rec_set_deleted_flag(
/*=================*/
rec_t* rec, /* in: physical record */
ulint comp, /* in: nonzero=compact page format */
rec_set_deleted_flag_old(
/*=====================*/
rec_t* rec, /* in: old-style physical record */
ulint flag) /* in: nonzero if delete marked */
{
ulint val;
val = rec_get_info_bits(rec, comp);
val = rec_get_info_bits(rec, FALSE);
if (flag) {
val |= REC_INFO_DELETED_FLAG;
@ -616,7 +706,31 @@ rec_set_deleted_flag(
val &= ~REC_INFO_DELETED_FLAG;
}
rec_set_info_bits(rec, comp, val);
rec_set_info_bits_old(rec, val);
}
/**********************************************************
The following function is used to set the deleted bit. */
UNIV_INLINE
void
rec_set_deleted_flag_new(
/*=====================*/
rec_t* rec, /* in/out: new-style physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
ulint flag) /* in: nonzero if delete marked */
{
ulint val;
val = rec_get_info_bits(rec, TRUE);
if (flag) {
val |= REC_INFO_DELETED_FLAG;
} else {
val &= ~REC_INFO_DELETED_FLAG;
}
rec_set_info_bits_new(rec, page_zip, val);
}
/**********************************************************
@ -624,7 +738,7 @@ The following function tells if a new-style record is a node pointer. */
UNIV_INLINE
ibool
rec_get_node_ptr_flag(
/*=================*/
/*==================*/
/* out: TRUE if node pointer */
rec_t* rec) /* in: physical record */
{
@ -632,45 +746,66 @@ rec_get_node_ptr_flag(
}
/**********************************************************
The following function is used to get the order number of the record in the
heap of the index page. */
The following function is used to get the order number
of an old-style record in the heap of the index page. */
UNIV_INLINE
ulint
rec_get_heap_no(
/*=============*/
rec_get_heap_no_old(
/*================*/
/* out: heap order number */
rec_t* rec, /* in: physical record */
ulint comp) /* in: nonzero=compact page format */
rec_t* rec) /* in: physical record */
{
ulint ret;
ut_ad(rec);
ret = rec_get_bit_field_2(rec,
comp ? REC_NEW_HEAP_NO : REC_OLD_HEAP_NO,
REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT);
ut_ad(ret <= REC_MAX_HEAP_NO);
return(ret);
}
return(rec_get_bit_field_2(rec, REC_OLD_HEAP_NO,
REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT));
}
/**********************************************************
The following function is used to set the heap number field in the record. */
The following function is used to set the heap number
field in an old-style record. */
UNIV_INLINE
void
rec_set_heap_no(
/*=============*/
rec_set_heap_no_old(
/*================*/
rec_t* rec, /* in: physical record */
ulint comp, /* in: nonzero=compact page format */
ulint heap_no)/* in: the heap number */
{
ut_ad(heap_no <= REC_MAX_HEAP_NO);
rec_set_bit_field_2(rec, heap_no,
comp ? REC_NEW_HEAP_NO : REC_OLD_HEAP_NO,
rec_set_bit_field_2(rec, heap_no, REC_OLD_HEAP_NO,
REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT);
}
/**********************************************************
The following function is used to get the order number
of a new-style record in the heap of the index page. */
UNIV_INLINE
ulint
rec_get_heap_no_new(
/*================*/
/* out: heap order number */
rec_t* rec) /* in: physical record */
{
return(rec_get_bit_field_2(rec, REC_NEW_HEAP_NO,
REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT));
}
/**********************************************************
The following function is used to set the heap number
field in a new-style record. */
UNIV_INLINE
void
rec_set_heap_no_new(
/*================*/
rec_t* rec, /* in/out: physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 6 bytes available, or NULL */
ulint heap_no)/* in: the heap number */
{
rec_set_bit_field_2(rec, heap_no, REC_NEW_HEAP_NO,
REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT);
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write(page_zip, rec - REC_NEW_HEAP_NO, 2);
}
}
/**********************************************************
The following function is used to test whether the data offsets in the record
are stored in one-byte or two-byte format. */
@ -1006,7 +1141,7 @@ rec_offs_any_extern(
}
/***************************************************************
Sets the value of the ith field extern storage bit. */
Sets the ith field extern storage bit. */
UNIV_INLINE
void
rec_set_nth_field_extern_bit(
@ -1014,15 +1149,14 @@ rec_set_nth_field_extern_bit(
rec_t* rec, /* in: record */
dict_index_t* index, /* in: record descriptor */
ulint i, /* in: ith field */
ibool val, /* in: value to set */
mtr_t* mtr) /* in: mtr holding an X-latch to the page
where rec is, or NULL; in the NULL case
we do not write to log about the change */
{
if (UNIV_LIKELY(index->table->comp)) {
rec_set_nth_field_extern_bit_new(rec, index, i, val, mtr);
rec_set_nth_field_extern_bit_new(rec, index, i, mtr);
} else {
rec_set_nth_field_extern_bit_old(rec, i, val, mtr);
rec_set_nth_field_extern_bit_old(rec, i, mtr);
}
}

View file

@ -45,7 +45,9 @@ UNIV_INLINE
void
row_set_rec_trx_id(
/*===============*/
rec_t* rec, /* in: record */
rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 10 bytes available,, or NULL */
dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
dulint trx_id);/* in: value of the field */
@ -55,7 +57,9 @@ UNIV_INLINE
void
row_set_rec_roll_ptr(
/*=================*/
rec_t* rec, /* in: record */
rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 11 bytes available, or NULL */
dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
dulint roll_ptr);/* in: value of the field */

View file

@ -29,9 +29,10 @@ is slower than the specialized inline functions. */
void
row_set_rec_sys_field(
/*==================*/
/* out: value of the field */
ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
rec_t* rec, /* in: record */
rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
10 or 11 bytes available, or NULL */
dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
dulint val); /* in: value to set */
@ -94,7 +95,9 @@ UNIV_INLINE
void
row_set_rec_trx_id(
/*===============*/
rec_t* rec, /* in: record */
rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 10 bytes available, or NULL */
dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
dulint trx_id) /* in: value of the field */
@ -107,10 +110,10 @@ row_set_rec_trx_id(
offset = index->trx_id_offset;
if (offset) {
trx_write_trx_id(rec + offset, trx_id);
trx_write_trx_id(rec + offset, page_zip, trx_id);
} else {
row_set_rec_sys_field(DATA_TRX_ID,
rec, index, offsets, trx_id);
rec, page_zip, index, offsets, trx_id);
}
}
@ -120,7 +123,9 @@ UNIV_INLINE
void
row_set_rec_roll_ptr(
/*=================*/
rec_t* rec, /* in: record */
rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 11 bytes available, or NULL */
dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
dulint roll_ptr)/* in: value of the field */
@ -133,10 +138,11 @@ row_set_rec_roll_ptr(
offset = index->trx_id_offset;
if (offset) {
trx_write_roll_ptr(rec + offset + DATA_TRX_ID_LEN, roll_ptr);
trx_write_roll_ptr(rec + offset + DATA_TRX_ID_LEN,
page_zip, roll_ptr);
} else {
row_set_rec_sys_field(DATA_ROLL_PTR,
rec, index, offsets, roll_ptr);
rec, page_zip, index, offsets, roll_ptr);
}
}

View file

@ -78,7 +78,9 @@ UNIV_INLINE
void
row_upd_rec_sys_fields(
/*===================*/
rec_t* rec, /* in: record */
rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 21 bytes available, or NULL */
dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
trx_t* trx, /* in: transaction */
@ -276,7 +278,8 @@ recovery. */
void
row_upd_rec_sys_fields_in_recovery(
/*===============================*/
rec_t* rec, /* in: record */
rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
const ulint* offsets,/* in: array returned by rec_get_offsets() */
ulint pos, /* in: TRX_ID position in rec */
dulint trx_id, /* in: transaction id */

View file

@ -11,6 +11,7 @@ Created 12/27/1996 Heikki Tuuri
#include "trx0undo.h"
#include "row0row.h"
#include "btr0sea.h"
#include "page0zip.h"
/*************************************************************************
Creates an update vector object. */
@ -104,7 +105,9 @@ UNIV_INLINE
void
row_upd_rec_sys_fields(
/*===================*/
rec_t* rec, /* in: record */
rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 21 bytes available, or NULL */
dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
trx_t* trx, /* in: transaction */
@ -116,7 +119,8 @@ row_upd_rec_sys_fields(
ut_ad(!buf_block_align(rec)->is_hashed
|| rw_lock_own(&btr_search_latch, RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
ut_ad(!page_zip || page_zip_available(page_zip, 21));
row_set_rec_trx_id(rec, index, offsets, trx->id);
row_set_rec_roll_ptr(rec, index, offsets, roll_ptr);
row_set_rec_trx_id(rec, page_zip, index, offsets, trx->id);
row_set_rec_roll_ptr(rec, page_zip, index, offsets, roll_ptr);
}

View file

@ -23,6 +23,7 @@ Created 3/26/1996 Heikki Tuuri
#include "fut0lst.h"
#include "fsp0fsp.h"
#include "read0types.h"
#include "page0types.h"
/* In a MySQL replication slave, in crash recovery we store the master log
file name and position here. We have successfully got the updates to InnoDB
@ -210,8 +211,10 @@ UNIV_INLINE
void
trx_write_trx_id(
/*=============*/
byte* ptr, /* in: pointer to memory where written */
dulint id); /* in: id */
byte* ptr, /* in: pointer to memory where written */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 10 bytes available, or NULL */
dulint id); /* in: id */
/*********************************************************************
Reads a trx id from an index page. In case that the id size changes in
some future version, this function should be used instead of

View file

@ -9,6 +9,7 @@ Created 3/26/1996 Heikki Tuuri
#include "srv0srv.h"
#include "trx0trx.h"
#include "data0type.h"
#include "page0zip.h"
/* The typedef for rseg slot in the file copy */
typedef byte trx_sysf_rseg_t;
@ -213,12 +214,18 @@ UNIV_INLINE
void
trx_write_trx_id(
/*=============*/
byte* ptr, /* in: pointer to memory where written */
dulint id) /* in: id */
byte* ptr, /* in: pointer to memory where written */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 10 bytes available, or NULL */
dulint id) /* in: id */
{
ut_ad(DATA_TRX_ID_LEN == 6);
mach_write_to_6(ptr, id);
if (UNIV_LIKELY_NULL(page_zip)) {
ut_ad(page_zip_available(page_zip, 4 + DATA_TRX_ID_LEN));
page_zip_write(page_zip, ptr, DATA_TRX_ID_LEN);
}
}
/*********************************************************************

View file

@ -55,6 +55,8 @@ void
trx_write_roll_ptr(
/*===============*/
byte* ptr, /* in: pointer to memory where written */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 11 bytes available, or NULL */
dulint roll_ptr); /* in: roll ptr */
/*********************************************************************
Reads a roll ptr from an index page. In case that the roll ptr size

View file

@ -7,6 +7,7 @@ Created 3/26/1996 Heikki Tuuri
*******************************************************/
#include "data0type.h"
#include "page0zip.h"
/***************************************************************************
Builds a roll pointer dulint. */
@ -87,12 +88,18 @@ UNIV_INLINE
void
trx_write_roll_ptr(
/*===============*/
byte* ptr, /* in: pointer to memory where written */
dulint roll_ptr) /* in: roll ptr */
byte* ptr, /* in: pointer to memory where written */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 11 bytes available, or NULL */
dulint roll_ptr)/* in: roll ptr */
{
ut_ad(DATA_ROLL_PTR_LEN == 7);
mach_write_to_7(ptr, roll_ptr);
if (UNIV_LIKELY_NULL(page_zip)) {
ut_ad(page_zip_available(page_zip, 4 + DATA_ROLL_PTR_LEN));
page_zip_write(page_zip, ptr, DATA_ROLL_PTR_LEN);
}
}
/*********************************************************************

View file

@ -1289,25 +1289,17 @@ lock_t*
lock_rec_get_next(
/*==============*/
/* out: next lock, NULL if none exists */
rec_t* rec, /* in: record on a page */
ulint heap_no,/* in: heap number of the record */
lock_t* lock) /* in: lock */
{
#ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&kernel_mutex));
#endif /* UNIV_SYNC_DEBUG */
ut_ad(lock_get_type(lock) == LOCK_REC);
if (page_rec_is_comp(rec)) {
do {
lock = lock_rec_get_next_on_page(lock);
} while (lock && !lock_rec_get_nth_bit(lock,
rec_get_heap_no(rec, TRUE)));
} else {
do {
lock = lock_rec_get_next_on_page(lock);
} while (lock && !lock_rec_get_nth_bit(lock,
rec_get_heap_no(rec, FALSE)));
}
do {
ut_ad(lock_get_type(lock) == LOCK_REC);
lock = lock_rec_get_next_on_page(lock);
} while (lock && !lock_rec_get_nth_bit(lock, heap_no));
return(lock);
}
@ -1319,7 +1311,8 @@ lock_t*
lock_rec_get_first(
/*===============*/
/* out: first lock, NULL if none exists */
rec_t* rec) /* in: record on a page */
rec_t* rec, /* in: record on a page */
ulint heap_no)/* in: heap number of the record */
{
lock_t* lock;
@ -1329,8 +1322,6 @@ lock_rec_get_first(
lock = lock_rec_get_first_on_page(rec);
if (UNIV_LIKELY_NULL(lock)) {
ulint heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec));
while (lock && !lock_rec_get_nth_bit(lock, heap_no)) {
lock = lock_rec_get_next_on_page(lock);
}
@ -1495,6 +1486,7 @@ lock_rec_has_expl(
for a supremum record we regard this always a gap
type request */
rec_t* rec, /* in: record */
ulint heap_no,/* in: heap number of the record */
trx_t* trx) /* in: transaction */
{
lock_t* lock;
@ -1506,7 +1498,7 @@ lock_rec_has_expl(
|| (precise_mode & LOCK_MODE_MASK) == LOCK_X);
ut_ad(!(precise_mode & LOCK_INSERT_INTENTION));
lock = lock_rec_get_first(rec);
lock = lock_rec_get_first(rec, heap_no);
while (lock) {
if (lock->trx == trx
@ -1524,7 +1516,7 @@ lock_rec_has_expl(
return(lock);
}
lock = lock_rec_get_next(rec, lock);
lock = lock_rec_get_next(heap_no, lock);
}
return(NULL);
@ -1543,6 +1535,7 @@ lock_rec_other_has_expl_req(
ulint wait, /* in: LOCK_WAIT if also waiting locks are
taken into account, or 0 if not */
rec_t* rec, /* in: record to look at */
ulint heap_no,/* in: heap number of hte record */
trx_t* trx) /* in: transaction, or NULL if requests by all
transactions are taken into account */
{
@ -1555,7 +1548,7 @@ lock_rec_other_has_expl_req(
ut_ad(gap == 0 || gap == LOCK_GAP);
ut_ad(wait == 0 || wait == LOCK_WAIT);
lock = lock_rec_get_first(rec);
lock = lock_rec_get_first(rec, heap_no);
while (lock) {
if (lock->trx != trx
@ -1567,7 +1560,7 @@ lock_rec_other_has_expl_req(
return(lock);
}
lock = lock_rec_get_next(rec, lock);
lock = lock_rec_get_next(heap_no, lock);
}
return(NULL);
@ -1585,6 +1578,7 @@ lock_rec_other_has_conflicting(
possibly ORed to LOCK_GAP or LOC_REC_NOT_GAP,
LOCK_INSERT_INTENTION */
rec_t* rec, /* in: record to look at */
ulint heap_no,/* in: heap number of the record */
trx_t* trx) /* in: our transaction */
{
lock_t* lock;
@ -1592,16 +1586,30 @@ lock_rec_other_has_conflicting(
ut_ad(mutex_own(&kernel_mutex));
#endif /* UNIV_SYNC_DEBUG */
lock = lock_rec_get_first(rec);
lock = lock_rec_get_first(rec, heap_no);
while (lock) {
if (lock_rec_has_to_wait(trx, mode, lock,
page_rec_is_supremum(rec))) {
if (UNIV_LIKELY_NULL(lock)) {
if (page_rec_is_supremum(rec)) {
return(lock);
do {
if (lock_rec_has_to_wait(trx, mode, lock,
TRUE)) {
return(lock);
}
lock = lock_rec_get_next(heap_no, lock);
} while (lock);
} else {
do {
if (lock_rec_has_to_wait(trx, mode, lock,
FALSE)) {
return(lock);
}
lock = lock_rec_get_next(heap_no, lock);
} while (lock);
}
lock = lock_rec_get_next(rec, lock);
}
return(NULL);
@ -1617,19 +1625,14 @@ lock_rec_find_similar_on_page(
/*==========================*/
/* out: lock or NULL */
ulint type_mode, /* in: lock type_mode field */
rec_t* rec, /* in: record */
ulint heap_no, /* in: heap number of the record */
lock_t* lock, /* in: lock_rec_get_first_on_page() */
trx_t* trx) /* in: transaction */
{
lock_t* lock;
ulint heap_no;
#ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&kernel_mutex));
#endif /* UNIV_SYNC_DEBUG */
heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec));
lock = lock_rec_get_first_on_page(rec);
while (lock != NULL) {
if (lock->trx == trx
&& lock->type_mode == type_mode
@ -1709,13 +1712,13 @@ lock_rec_create(
ulint type_mode,/* in: lock mode and wait flag, type is
ignored and replaced by LOCK_REC */
rec_t* rec, /* in: record on page */
ulint heap_no,/* in: heap number of the record */
dict_index_t* index, /* in: index of record */
trx_t* trx) /* in: transaction */
{
page_t* page;
lock_t* lock;
ulint page_no;
ulint heap_no;
ulint space;
ulint n_bits;
ulint n_bytes;
@ -1727,9 +1730,8 @@ lock_rec_create(
page = buf_frame_align(rec);
space = buf_frame_get_space_id(page);
page_no = buf_frame_get_page_no(page);
heap_no = rec_get_heap_no(rec, page_is_comp(page));
ut_ad(!!page_is_comp(page) == index->table->comp);
ut_ad((ibool) !!page_is_comp(page) == index->table->comp);
/* If rec is the supremum record, then we reset the gap and
LOCK_REC_NOT_GAP bits, as all locks on the supremum are
@ -1806,6 +1808,7 @@ lock_rec_enqueue_waiting(
{
lock_t* lock;
trx_t* trx;
ulint heap_no;
#ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&kernel_mutex));
@ -1815,7 +1818,7 @@ lock_rec_enqueue_waiting(
we do not enqueue a lock request if the query thread should be
stopped anyway */
if (que_thr_stop(thr)) {
if (UNIV_UNLIKELY(que_thr_stop(thr))) {
ut_error;
@ -1824,7 +1827,7 @@ lock_rec_enqueue_waiting(
trx = thr_get_trx(thr);
if (trx->dict_operation) {
if (UNIV_UNLIKELY(trx->dict_operation)) {
ut_print_timestamp(stderr);
fputs(
" InnoDB: Error: a record lock wait happens in a dictionary operation!\n"
@ -1834,18 +1837,24 @@ lock_rec_enqueue_waiting(
"InnoDB: Submit a detailed bug report to http://bugs.mysql.com\n",
stderr);
}
if (page_rec_is_comp(rec)) {
heap_no = rec_get_heap_no_new(rec);
} else {
heap_no = rec_get_heap_no_old(rec);
}
/* Enqueue the lock request that will wait to be granted */
lock = lock_rec_create(type_mode | LOCK_WAIT, rec, index, trx);
lock = lock_rec_create(type_mode | LOCK_WAIT, rec,
heap_no, index, trx);
/* Check if a deadlock occurs: if yes, remove the lock request and
return an error code */
if (lock_deadlock_occurs(lock, trx)) {
if (UNIV_UNLIKELY(lock_deadlock_occurs(lock, trx))) {
lock_reset_lock_and_trx_wait(lock);
lock_rec_reset_nth_bit(lock, rec_get_heap_no(rec,
page_rec_is_comp(rec)));
lock_rec_reset_nth_bit(lock, heap_no);
return(DB_DEADLOCK);
}
@ -1891,25 +1900,24 @@ lock_rec_add_to_queue(
ulint type_mode,/* in: lock mode, wait, gap etc. flags;
type is ignored and replaced by LOCK_REC */
rec_t* rec, /* in: record on page */
ulint heap_no,/* in: heap number of the record */
dict_index_t* index, /* in: index of record */
trx_t* trx) /* in: transaction */
{
lock_t* lock;
lock_t* similar_lock = NULL;
ulint heap_no;
ibool somebody_waits = FALSE;
#ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&kernel_mutex));
#endif /* UNIV_SYNC_DEBUG */
ut_ad((type_mode & (LOCK_WAIT | LOCK_GAP))
|| ((type_mode & LOCK_MODE_MASK) != LOCK_S)
|| !lock_rec_other_has_expl_req(LOCK_X, 0, LOCK_WAIT,
rec, trx));
rec, heap_no, trx));
ut_ad((type_mode & (LOCK_WAIT | LOCK_GAP))
|| ((type_mode & LOCK_MODE_MASK) != LOCK_X)
|| !lock_rec_other_has_expl_req(LOCK_S, 0, LOCK_WAIT,
rec, trx));
rec, heap_no, trx));
type_mode = type_mode | LOCK_REC;
@ -1929,32 +1937,37 @@ lock_rec_add_to_queue(
/* Look for a waiting lock request on the same record or on a gap */
heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec));
lock = lock_rec_get_first_on_page(rec);
while (lock != NULL) {
if (lock_get_wait(lock)
&& (lock_rec_get_nth_bit(lock, heap_no))) {
somebody_waits = TRUE;
goto somebody_waits;
}
lock = lock_rec_get_next_on_page(lock);
}
/* Look for a similar record lock on the same page: if one is found
and there are no waiting lock requests, we can just set the bit */
if (!(type_mode & LOCK_WAIT)) {
similar_lock = lock_rec_find_similar_on_page(type_mode, rec, trx);
/* Look for a similar record lock on the same page:
if one is found and there are no waiting lock requests,
we can just set the bit */
if (similar_lock && !somebody_waits && !(type_mode & LOCK_WAIT)) {
lock = lock_rec_find_similar_on_page(type_mode, heap_no,
lock_rec_get_first_on_page(rec), trx);
lock_rec_set_nth_bit(similar_lock, heap_no);
if (lock) {
return(similar_lock);
lock_rec_set_nth_bit(lock, heap_no);
return(lock);
}
}
return(lock_rec_create(type_mode, rec, index, trx));
somebody_waits:
return(lock_rec_create(type_mode, rec, heap_no, index, trx));
}
/*************************************************************************
@ -1975,11 +1988,11 @@ lock_rec_lock_fast(
ulint mode, /* in: lock mode: LOCK_X or LOCK_S possibly
ORed to either LOCK_GAP or LOCK_REC_NOT_GAP */
rec_t* rec, /* in: record */
ulint heap_no,/* in: heap number of record */
dict_index_t* index, /* in: index of record */
que_thr_t* thr) /* in: query thread */
{
lock_t* lock;
ulint heap_no;
trx_t* trx;
#ifdef UNIV_SYNC_DEBUG
@ -1995,15 +2008,13 @@ lock_rec_lock_fast(
|| mode - (LOCK_MODE_MASK & mode) == 0
|| mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec));
lock = lock_rec_get_first_on_page(rec);
trx = thr_get_trx(thr);
if (lock == NULL) {
if (!impl) {
lock_rec_create(mode, rec, index, trx);
lock_rec_create(mode, rec, heap_no, index, trx);
if (srv_locks_unsafe_for_binlog) {
trx_register_new_rec_lock(trx, index);
@ -2057,6 +2068,7 @@ lock_rec_lock_slow(
ulint mode, /* in: lock mode: LOCK_X or LOCK_S possibly
ORed to either LOCK_GAP or LOCK_REC_NOT_GAP */
rec_t* rec, /* in: record */
ulint heap_no,/* in: heap number of record */
dict_index_t* index, /* in: index of record */
que_thr_t* thr) /* in: query thread */
{
@ -2078,12 +2090,12 @@ lock_rec_lock_slow(
trx = thr_get_trx(thr);
if (lock_rec_has_expl(mode, rec, trx)) {
if (lock_rec_has_expl(mode, rec, heap_no, trx)) {
/* The trx already has a strong enough lock on rec: do
nothing */
err = DB_SUCCESS;
} else if (lock_rec_other_has_conflicting(mode, rec, trx)) {
} else if (lock_rec_other_has_conflicting(mode, rec, heap_no, trx)) {
/* If another transaction has a non-gap conflicting request in
the queue, as this transaction does not have a lock strong
@ -2098,8 +2110,8 @@ lock_rec_lock_slow(
if (!impl) {
/* Set the requested lock on the record */
lock_rec_add_to_queue(LOCK_REC | mode, rec, index,
trx);
lock_rec_add_to_queue(LOCK_REC | mode, rec, heap_no,
index, trx);
if (srv_locks_unsafe_for_binlog) {
trx_register_new_rec_lock(trx, index);
}
@ -2133,6 +2145,7 @@ lock_rec_lock(
que_thr_t* thr) /* in: query thread */
{
ulint err;
ulint heap_no;
#ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&kernel_mutex));
@ -2147,14 +2160,20 @@ lock_rec_lock(
|| mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP
|| mode - (LOCK_MODE_MASK & mode) == 0);
if (lock_rec_lock_fast(impl, mode, rec, index, thr)) {
if (page_rec_is_comp(rec)) {
heap_no = rec_get_heap_no_new(rec);
} else {
heap_no = rec_get_heap_no_old(rec);
}
if (lock_rec_lock_fast(impl, mode, rec, heap_no, index, thr)) {
/* We try a simplified and faster subroutine for the most
common cases */
err = DB_SUCCESS;
} else {
err = lock_rec_lock_slow(impl, mode, rec, index, thr);
err = lock_rec_lock_slow(impl, mode, rec, heap_no, index, thr);
}
return(err);
@ -2405,9 +2424,13 @@ lock_rec_reset_and_release_wait(
ut_ad(mutex_own(&kernel_mutex));
#endif /* UNIV_SYNC_DEBUG */
heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec));
lock = lock_rec_get_first(rec);
if (page_rec_is_comp(rec)) {
heap_no = rec_get_heap_no_new(rec);
} else {
heap_no = rec_get_heap_no_old(rec);
}
lock = lock_rec_get_first(rec, heap_no);
while (lock != NULL) {
if (lock_get_wait(lock)) {
@ -2416,7 +2439,7 @@ lock_rec_reset_and_release_wait(
lock_rec_reset_nth_bit(lock, heap_no);
}
lock = lock_rec_get_next(rec, lock);
lock = lock_rec_get_next(heap_no, lock);
}
}
@ -2434,11 +2457,21 @@ lock_rec_inherit_to_gap(
the locks on this record */
{
lock_t* lock;
ulint heir_heap_no;
ulint heap_no;
#ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&kernel_mutex));
#endif /* UNIV_SYNC_DEBUG */
lock = lock_rec_get_first(rec);
if (page_rec_is_comp(rec)) {
heir_heap_no = rec_get_heap_no_new(heir);
heap_no = rec_get_heap_no_new(rec);
} else {
heir_heap_no = rec_get_heap_no_old(heir);
heap_no = rec_get_heap_no_old(rec);
}
lock = lock_rec_get_first(rec, heap_no);
/* If srv_locks_unsafe_for_binlog is TRUE, we do not want locks set
by an UPDATE or a DELETE to be inherited as gap type locks. But we
@ -2452,10 +2485,11 @@ lock_rec_inherit_to_gap(
lock_rec_add_to_queue(LOCK_REC | lock_get_mode(lock)
| LOCK_GAP,
heir, lock->index, lock->trx);
heir, heir_heap_no,
lock->index, lock->trx);
}
lock = lock_rec_get_next(rec, lock);
lock = lock_rec_get_next(heap_no, lock);
}
}
@ -2472,11 +2506,21 @@ lock_rec_inherit_to_gap_if_gap_lock(
the locks on this record */
{
lock_t* lock;
ulint heir_heap_no;
ulint heap_no;
#ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&kernel_mutex));
#endif /* UNIV_SYNC_DEBUG */
lock = lock_rec_get_first(rec);
if (page_rec_is_comp(rec)) {
heir_heap_no = rec_get_heap_no_new(heir);
heap_no = rec_get_heap_no_new(rec);
} else {
heir_heap_no = rec_get_heap_no_old(heir);
heap_no = rec_get_heap_no_old(rec);
}
lock = lock_rec_get_first(rec, heap_no);
while (lock != NULL) {
if (!lock_rec_get_insert_intention(lock)
@ -2485,10 +2529,11 @@ lock_rec_inherit_to_gap_if_gap_lock(
lock_rec_add_to_queue(LOCK_REC | lock_get_mode(lock)
| LOCK_GAP,
heir, lock->index, lock->trx);
heir, heir_heap_no,
lock->index, lock->trx);
}
lock = lock_rec_get_next(rec, lock);
lock = lock_rec_get_next(heap_no, lock);
}
}
@ -2505,37 +2550,44 @@ lock_rec_move(
ulint comp) /* in: nonzero=compact page format */
{
lock_t* lock;
ulint heap_no;
ulint receiver_heap_no;
ulint donator_heap_no;
ulint type_mode;
#ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&kernel_mutex));
#endif /* UNIV_SYNC_DEBUG */
heap_no = rec_get_heap_no(donator, comp);
lock = lock_rec_get_first(donator);
if (UNIV_LIKELY(comp)) {
receiver_heap_no = rec_get_heap_no_new(donator);
donator_heap_no = rec_get_heap_no_new(donator);
} else {
receiver_heap_no = rec_get_heap_no_old(donator);
donator_heap_no = rec_get_heap_no_old(donator);
}
ut_ad(lock_rec_get_first(receiver) == NULL);
lock = lock_rec_get_first(donator, donator_heap_no);
ut_ad(lock_rec_get_first(receiver, receiver_heap_no) == NULL);
while (lock != NULL) {
type_mode = lock->type_mode;
lock_rec_reset_nth_bit(lock, heap_no);
lock_rec_reset_nth_bit(lock, donator_heap_no);
if (lock_get_wait(lock)) {
if (UNIV_UNLIKELY(lock_get_wait(lock))) {
lock_reset_lock_and_trx_wait(lock);
}
/* Note that we FIRST reset the bit, and then set the lock:
the function works also if donator == receiver */
lock_rec_add_to_queue(type_mode, receiver, lock->index,
lock->trx);
lock = lock_rec_get_next(donator, lock);
lock_rec_add_to_queue(type_mode, receiver, receiver_heap_no,
lock->index, lock->trx);
lock = lock_rec_get_next(donator_heap_no, lock);
}
ut_ad(lock_rec_get_first(donator) == NULL);
ut_ad(lock_rec_get_first(donator, donator_heap_no) == NULL);
}
/*****************************************************************
@ -2555,9 +2607,9 @@ lock_move_reorganize_page(
page_cur_t cur1;
page_cur_t cur2;
ulint old_heap_no;
ulint new_heap_no;
UT_LIST_BASE_NODE_T(lock_t) old_locks;
mem_heap_t* heap = NULL;
rec_t* sup;
ulint comp;
lock_mutex_enter_kernel();
@ -2595,8 +2647,6 @@ lock_move_reorganize_page(
lock = lock_rec_get_next_on_page(lock);
}
sup = page_get_supremum_rec(page);
lock = UT_LIST_GET_FIRST(old_locks);
comp = page_is_comp(page);
@ -2617,8 +2667,17 @@ lock_move_reorganize_page(
page_cur_get_rec(&cur2),
rec_get_data_size_old(
page_cur_get_rec(&cur2))));
old_heap_no = rec_get_heap_no(page_cur_get_rec(&cur2),
comp);
if (UNIV_LIKELY(comp)) {
old_heap_no = rec_get_heap_no_new(
page_cur_get_rec(&cur2));
new_heap_no = rec_get_heap_no_new(
page_cur_get_rec(&cur1));
} else {
old_heap_no = rec_get_heap_no_old(
page_cur_get_rec(&cur2));
new_heap_no = rec_get_heap_no_old(
page_cur_get_rec(&cur1));
}
if (lock_rec_get_nth_bit(lock, old_heap_no)) {
@ -2627,9 +2686,10 @@ lock_move_reorganize_page(
lock_rec_add_to_queue(lock->type_mode,
page_cur_get_rec(&cur1),
new_heap_no,
lock->index, lock->trx);
/* if ((page_cur_get_rec(&cur1) == sup)
/* if ((page_cur_is_after_last(&cur1))
&& lock_get_wait(lock)) {
fprintf(stderr,
"---\n--\n!!!Lock reorg: supr type %lu\n",
@ -2637,7 +2697,7 @@ lock_move_reorganize_page(
} */
}
if (page_cur_get_rec(&cur1) == sup) {
if (page_cur_is_after_last(&cur1)) {
break;
}
@ -2675,9 +2735,8 @@ lock_move_rec_list_end(
page_cur_t cur1;
page_cur_t cur2;
ulint heap_no;
rec_t* sup;
ulint type_mode;
ulint comp;
ut_ad(page_is_comp(page) == page_is_comp(new_page));
ut_ad(page == buf_frame_align(rec));
lock_mutex_enter_kernel();
@ -2688,12 +2747,8 @@ lock_move_rec_list_end(
table to the end of the hash chain, and lock_rec_add_to_queue
does not reuse locks if there are waiters in the queue. */
sup = page_get_supremum_rec(page);
lock = lock_rec_get_first_on_page(page);
comp = page_is_comp(page);
while (lock != NULL) {
page_cur_position(rec, &cur1);
@ -2708,13 +2763,19 @@ lock_move_rec_list_end(
/* Copy lock requests on user records to new page and
reset the lock bits on the old */
while (page_cur_get_rec(&cur1) != sup) {
ut_ad(comp || 0 == ut_memcmp(page_cur_get_rec(&cur1),
while (!page_cur_is_after_last(&cur1)) {
ut_ad(page_is_comp(page)
|| 0 == ut_memcmp(page_cur_get_rec(&cur1),
page_cur_get_rec(&cur2),
rec_get_data_size_old(
page_cur_get_rec(&cur2))));
heap_no = rec_get_heap_no(page_cur_get_rec(&cur1),
comp);
if (page_is_comp(page)) {
heap_no = rec_get_heap_no_new(
page_cur_get_rec(&cur1));
} else {
heap_no = rec_get_heap_no_old(
page_cur_get_rec(&cur1));
}
if (lock_rec_get_nth_bit(lock, heap_no)) {
type_mode = lock->type_mode;
@ -2725,8 +2786,17 @@ lock_move_rec_list_end(
lock_reset_lock_and_trx_wait(lock);
}
if (page_is_comp(page)) {
heap_no = rec_get_heap_no_new(
page_cur_get_rec(&cur2));
} else {
heap_no = rec_get_heap_no_old(
page_cur_get_rec(&cur2));
}
lock_rec_add_to_queue(type_mode,
page_cur_get_rec(&cur2),
heap_no,
lock->index, lock->trx);
}
@ -2764,15 +2834,13 @@ lock_move_rec_list_start(
page_cur_t cur2;
ulint heap_no;
ulint type_mode;
ulint comp;
ut_a(new_page);
lock_mutex_enter_kernel();
lock = lock_rec_get_first_on_page(page);
comp = page_is_comp(page);
ut_ad(comp == page_is_comp(new_page));
ut_ad(page_is_comp(page) == page_is_comp(new_page));
ut_ad(page == buf_frame_align(rec));
while (lock != NULL) {
@ -2787,12 +2855,18 @@ lock_move_rec_list_start(
reset the lock bits on the old */
while (page_cur_get_rec(&cur1) != rec) {
ut_ad(comp || 0 == ut_memcmp(page_cur_get_rec(&cur1),
ut_ad(page_is_comp(page)
|| 0 == ut_memcmp(page_cur_get_rec(&cur1),
page_cur_get_rec(&cur2),
rec_get_data_size_old(
page_cur_get_rec(&cur2))));
heap_no = rec_get_heap_no(page_cur_get_rec(&cur1),
comp);
if (page_is_comp(page)) {
heap_no = rec_get_heap_no_new(
page_cur_get_rec(&cur1));
} else {
heap_no = rec_get_heap_no_old(
page_cur_get_rec(&cur1));
}
if (lock_rec_get_nth_bit(lock, heap_no)) {
type_mode = lock->type_mode;
@ -2803,8 +2877,17 @@ lock_move_rec_list_start(
lock_reset_lock_and_trx_wait(lock);
}
if (page_is_comp(page)) {
heap_no = rec_get_heap_no_new(
page_cur_get_rec(&cur2));
} else {
heap_no = rec_get_heap_no_old(
page_cur_get_rec(&cur2));
}
lock_rec_add_to_queue(type_mode,
page_cur_get_rec(&cur2),
heap_no,
lock->index, lock->trx);
}
@ -2832,16 +2915,15 @@ lock_update_split_right(
page_t* right_page, /* in: right page */
page_t* left_page) /* in: left page */
{
ulint comp;
lock_mutex_enter_kernel();
comp = page_is_comp(left_page);
ut_ad(comp == page_is_comp(right_page));
ut_ad(page_is_comp(left_page) == page_is_comp(right_page));
/* Move the locks on the supremum of the left page to the supremum
of the right page */
lock_rec_move(page_get_supremum_rec(right_page),
page_get_supremum_rec(left_page), comp);
page_get_supremum_rec(left_page),
page_is_comp(left_page));
/* Inherit the locks to the supremum of left page from the successor
of the infimum on right page */
@ -3196,7 +3278,7 @@ retry:
goto retry;
}
if (ret == LOCK_VICTIM_IS_START) {
if (UNIV_UNLIKELY(ret == LOCK_VICTIM_IS_START)) {
if (lock_get_type(lock) & LOCK_TABLE) {
table = lock->un_member.tab_lock.table;
index = NULL;
@ -4403,6 +4485,7 @@ lock_rec_queue_validate(
{
trx_t* impl_trx;
lock_t* lock;
ulint heap_no;
ut_a(rec);
ut_ad(rec_offs_validate(rec, index, offsets));
@ -4410,9 +4493,15 @@ lock_rec_queue_validate(
lock_mutex_enter_kernel();
if (page_rec_is_comp(rec)) {
heap_no = rec_get_heap_no_new(rec);
} else {
heap_no = rec_get_heap_no_old(rec);
}
if (!page_rec_is_user_rec(rec)) {
lock = lock_rec_get_first(rec);
lock = lock_rec_get_first(rec, heap_no);
while (lock) {
ut_a(lock->trx->conc_state == TRX_ACTIVE
@ -4430,7 +4519,7 @@ lock_rec_queue_validate(
ut_a(lock->index == index);
}
lock = lock_rec_get_next(rec, lock);
lock = lock_rec_get_next(heap_no, lock);
}
lock_mutex_exit_kernel();
@ -4438,19 +4527,18 @@ lock_rec_queue_validate(
return(TRUE);
}
if (index && (index->type & DICT_CLUSTERED)) {
if (!index);
else if (index->type & DICT_CLUSTERED) {
impl_trx = lock_clust_rec_some_has_impl(rec, index, offsets);
if (impl_trx && lock_rec_other_has_expl_req(LOCK_S, 0,
LOCK_WAIT, rec, impl_trx)) {
LOCK_WAIT, rec, heap_no, impl_trx)) {
ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, rec,
impl_trx));
heap_no, impl_trx));
}
}
if (index && !(index->type & DICT_CLUSTERED)) {
} else {
/* The kernel mutex may get released temporarily in the
next function call: we have to release lock table mutex
@ -4460,14 +4548,14 @@ lock_rec_queue_validate(
rec, index, offsets);
if (impl_trx && lock_rec_other_has_expl_req(LOCK_S, 0,
LOCK_WAIT, rec, impl_trx)) {
LOCK_WAIT, rec, heap_no, impl_trx)) {
ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
rec, impl_trx));
rec, heap_no, impl_trx));
}
}
lock = lock_rec_get_first(rec);
lock = lock_rec_get_first(rec, heap_no);
while (lock) {
ut_a(lock->trx->conc_state == TRX_ACTIVE
@ -4488,15 +4576,15 @@ lock_rec_queue_validate(
} else {
mode = LOCK_S;
}
ut_a(!lock_rec_other_has_expl_req(mode,
0, 0, rec, lock->trx));
ut_a(!lock_rec_other_has_expl_req(mode, 0, 0,
rec, heap_no, lock->trx));
} else if (lock_get_wait(lock) && !lock_rec_get_gap(lock)) {
ut_a(lock_rec_has_to_wait_in_queue(lock));
}
lock = lock_rec_get_next(rec, lock);
lock = lock_rec_get_next(heap_no, lock);
}
lock_mutex_exit_kernel();
@ -4704,6 +4792,7 @@ lock_rec_insert_check_and_lock(
trx_t* trx;
lock_t* lock;
ulint err;
ulint next_rec_heap_no;
if (flags & BTR_NO_LOCKING_FLAG) {
@ -4715,15 +4804,19 @@ lock_rec_insert_check_and_lock(
trx = thr_get_trx(thr);
next_rec = page_rec_get_next(rec);
*inherit = FALSE;
lock_mutex_enter_kernel();
ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
lock = lock_rec_get_first(next_rec);
if (page_rec_is_comp(next_rec)) {
next_rec_heap_no = rec_get_heap_no_new(next_rec);
} else {
next_rec_heap_no = rec_get_heap_no_old(next_rec);
}
if (lock == NULL) {
lock = lock_rec_get_first(next_rec, next_rec_heap_no);
if (UNIV_LIKELY(lock == NULL)) {
/* We optimize CPU time usage in the simplest case */
lock_mutex_exit_kernel();
@ -4735,6 +4828,8 @@ lock_rec_insert_check_and_lock(
thr_get_trx(thr)->id);
}
*inherit = FALSE;
return(DB_SUCCESS);
}
@ -4751,7 +4846,8 @@ lock_rec_insert_check_and_lock(
on the successor, which produced an unnecessary deadlock. */
if (lock_rec_other_has_conflicting(LOCK_X | LOCK_GAP
| LOCK_INSERT_INTENTION, next_rec, trx)) {
| LOCK_INSERT_INTENTION,
next_rec, next_rec_heap_no, trx)) {
/* Note that we may get DB_SUCCESS also here! */
err = lock_rec_enqueue_waiting(LOCK_X | LOCK_GAP
@ -4821,12 +4917,20 @@ lock_rec_convert_impl_to_expl(
/* If the transaction has no explicit x-lock set on the
record, set one for it */
ulint heap_no;
if (page_rec_is_comp(rec)) {
heap_no = rec_get_heap_no_new(rec);
} else {
heap_no = rec_get_heap_no_old(rec);
}
if (!lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, rec,
impl_trx)) {
heap_no, impl_trx)) {
lock_rec_add_to_queue(LOCK_REC | LOCK_X
| LOCK_REC_NOT_GAP, rec, index,
impl_trx);
| LOCK_REC_NOT_GAP, rec, heap_no,
index, impl_trx);
}
}
}

View file

@ -22,6 +22,7 @@ Created 9/20/1997 Heikki Tuuri
#include "mtr0log.h"
#include "page0page.h"
#include "page0cur.h"
#include "page0zip.h"
#include "btr0btr.h"
#include "btr0cur.h"
#include "ibuf0ibuf.h"
@ -753,9 +754,10 @@ recv_parse_or_apply_log_rec_body(
byte type, /* in: type */
byte* ptr, /* in: pointer to a buffer */
byte* end_ptr,/* in: pointer to the buffer end */
page_t* page, /* in: buffer page or NULL; if not NULL, then the log
record is applied to the page, and the log record
page_t* page, /* in/out: buffer page or NULL; if not NULL, then the
log record is applied to the page, and the log record
should be complete then */
page_zip_des_t* page_zip,/* in/out: compressed page or NULL */
mtr_t* mtr) /* in: mtr or NULL; should be non-NULL if and only if
page is non-NULL */
{
@ -771,7 +773,7 @@ recv_parse_or_apply_log_rec_body(
ut_a(!page
|| (ibool)!!page_is_comp(page)==index->table->comp);
ptr = page_cur_parse_insert_rec(FALSE, ptr, end_ptr,
index, page, mtr);
index, page, page_zip, mtr);
}
break;
case MLOG_REC_CLUST_DELETE_MARK: case MLOG_COMP_REC_CLUST_DELETE_MARK:
@ -780,7 +782,7 @@ recv_parse_or_apply_log_rec_body(
ut_a(!page
|| (ibool)!!page_is_comp(page)==index->table->comp);
ptr = btr_cur_parse_del_mark_set_clust_rec(ptr,
end_ptr, index, page);
end_ptr, page, page_zip, index);
}
break;
case MLOG_COMP_REC_SEC_DELETE_MARK:
@ -793,7 +795,8 @@ recv_parse_or_apply_log_rec_body(
}
/* Fall through */
case MLOG_REC_SEC_DELETE_MARK:
ptr = btr_cur_parse_del_mark_set_sec_rec(ptr, end_ptr, page);
ptr = btr_cur_parse_del_mark_set_sec_rec(ptr, end_ptr,
page, page_zip);
break;
case MLOG_REC_UPDATE_IN_PLACE: case MLOG_COMP_REC_UPDATE_IN_PLACE:
if (NULL != (ptr = mlog_parse_index(ptr, end_ptr,
@ -801,7 +804,7 @@ recv_parse_or_apply_log_rec_body(
ut_a(!page
|| (ibool)!!page_is_comp(page)==index->table->comp);
ptr = btr_cur_parse_update_in_place(ptr, end_ptr,
page, index);
page, page_zip, index);
}
break;
case MLOG_LIST_END_DELETE: case MLOG_COMP_LIST_END_DELETE:
@ -821,7 +824,7 @@ recv_parse_or_apply_log_rec_body(
ut_a(!page
|| (ibool)!!page_is_comp(page)==index->table->comp);
ptr = page_parse_copy_rec_list_to_created_page(ptr,
end_ptr, index, page, mtr);
end_ptr, index, page, page_zip, mtr);
}
break;
case MLOG_PAGE_REORGANIZE: case MLOG_COMP_PAGE_REORGANIZE:
@ -864,7 +867,7 @@ recv_parse_or_apply_log_rec_body(
ut_a(!page
|| (ibool)!!page_is_comp(page)==index->table->comp);
ptr = page_cur_parse_delete_rec(ptr, end_ptr,
index, page, mtr);
index, page, page_zip, mtr);
}
break;
case MLOG_IBUF_BITMAP_INIT:
@ -882,6 +885,16 @@ recv_parse_or_apply_log_rec_body(
ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, FALSE,
ULINT_UNDEFINED);
break;
case MLOG_COMP_DECOMPRESS:
if (page) {
ut_a(page_is_comp(page));
ut_a(page_zip);
if (UNIV_UNLIKELY(!page_zip_decompress(
page_zip, page, NULL))) {
ut_error;
}
}
break;
default:
ptr = NULL;
recv_sys->found_corrupt_log = TRUE;
@ -1089,6 +1102,7 @@ recv_recover_page(
ulint page_no) /* in: page number */
{
buf_block_t* block = NULL;
page_zip_des_t* page_zip = NULL;
recv_addr_t* recv_addr;
recv_t* recv;
byte* buf;
@ -1133,6 +1147,7 @@ recv_recover_page(
if (!recover_backup) {
block = buf_block_align(page);
page_zip = buf_block_get_page_zip(block);
if (just_read_in) {
/* Move the ownership of the x-latch on the page to this OS
@ -1220,7 +1235,8 @@ recv_recover_page(
#endif /* UNIV_DEBUG */
recv_parse_or_apply_log_rec_body(recv->type, buf,
buf + recv->len, page, &mtr);
buf + recv->len,
page, page_zip, &mtr);
mach_write_to_8(page + UNIV_PAGE_SIZE
- FIL_PAGE_END_LSN_OLD_CHKSUM,
ut_dulint_add(recv->start_lsn,
@ -1613,8 +1629,8 @@ recv_update_replicate(
buf_page_dbg_add_level(replica, SYNC_NO_ORDER_CHECK);
#endif /* UNIV_SYNC_DEBUG */
ptr = recv_parse_or_apply_log_rec_body(type, body, end_ptr, replica,
&mtr);
ptr = recv_parse_or_apply_log_rec_body(type, body, end_ptr,
replica, NULL, &mtr);
ut_a(ptr == end_ptr);
/* Notify the buffer manager that the page has been updated */
@ -1845,7 +1861,7 @@ recv_parse_log_rec(
}
new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr,
NULL, NULL);
NULL, NULL, NULL);
if (UNIV_UNLIKELY(new_ptr == NULL)) {
return(0);

View file

@ -19,6 +19,6 @@ include ../include/Makefile.i
noinst_LIBRARIES = libpage.a
libpage_a_SOURCES = page0page.c page0cur.c
libpage_a_SOURCES = page0page.c page0cur.c page0zip.c
EXTRA_PROGRAMS =

View file

@ -11,6 +11,7 @@ Created 10/4/1994 Heikki Tuuri
#include "page0cur.ic"
#endif
#include "page0zip.h"
#include "rem0cmp.h"
#include "mtr0log.h"
#include "log0recv.h"
@ -483,7 +484,7 @@ page_cur_open_on_rnd_user_rec(
ulint rnd;
rec_t* rec;
if (page_get_n_recs(page) == 0) {
if (UNIV_UNLIKELY(page_get_n_recs(page) == 0)) {
page_cur_position(page_get_infimum_rec(page), cursor);
return;
@ -522,19 +523,14 @@ page_cur_insert_rec_write_log(
ulint cur_rec_size;
ulint extra_size;
ulint cur_extra_size;
ulint min_rec_size;
byte* ins_ptr;
byte* cur_ptr;
ulint extra_info_yes;
const byte* ins_ptr;
byte* log_ptr;
byte* log_end;
const byte* log_end;
ulint i;
ulint comp;
ut_a(rec_size < UNIV_PAGE_SIZE);
ut_ad(buf_frame_align(insert_rec) == buf_frame_align(cursor_rec));
ut_ad(!page_rec_is_comp(insert_rec) == !index->table->comp);
comp = page_rec_is_comp(insert_rec);
{
mem_heap_t* heap = NULL;
@ -567,45 +563,55 @@ page_cur_insert_rec_write_log(
i = 0;
if (cur_extra_size == extra_size) {
min_rec_size = ut_min(cur_rec_size, rec_size);
ulint min_rec_size = ut_min(cur_rec_size, rec_size);
cur_ptr = cursor_rec - cur_extra_size;
const byte* cur_ptr = cursor_rec - cur_extra_size;
/* Find out the first byte in insert_rec which differs from
cursor_rec; skip the bytes in the record info */
for (;;) {
if (i >= min_rec_size) {
break;
} else if (*ins_ptr == *cur_ptr) {
do {
if (*ins_ptr == *cur_ptr) {
i++;
ins_ptr++;
cur_ptr++;
} else if ((i < extra_size)
&& (i >= extra_size - (comp
? REC_N_NEW_EXTRA_BYTES
: REC_N_OLD_EXTRA_BYTES))) {
&& (i >= extra_size -
page_rec_get_base_extra_size(
insert_rec))) {
i = extra_size;
ins_ptr = insert_rec;
cur_ptr = cursor_rec;
} else {
break;
}
}
} while (i < min_rec_size);
}
if (mtr_get_log_mode(mtr) != MTR_LOG_SHORT_INSERTS) {
log_ptr = mlog_open_and_write_index(mtr, insert_rec, index,
comp
? MLOG_COMP_REC_INSERT : MLOG_REC_INSERT,
2 + 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN);
if (page_rec_is_comp(insert_rec)) {
log_ptr = mlog_open_and_write_index(mtr, insert_rec,
index, MLOG_COMP_REC_INSERT,
2 + 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN);
if (UNIV_UNLIKELY(!log_ptr)) {
/* Logging in mtr is switched off
during crash recovery: in that case
mlog_open returns NULL */
return;
}
} else {
log_ptr = mlog_open(mtr, 11
+ 2 + 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN);
if (UNIV_UNLIKELY(!log_ptr)) {
/* Logging in mtr is switched off
during crash recovery: in that case
mlog_open returns NULL */
return;
}
if (!log_ptr) {
/* Logging in mtr is switched off during crash
recovery: in that case mlog_open returns NULL */
return;
log_ptr = mlog_write_initial_log_record_fast(
insert_rec, MLOG_REC_INSERT, log_ptr, mtr);
}
log_end = &log_ptr[2 + 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN];
@ -623,24 +629,33 @@ page_cur_insert_rec_write_log(
log_end = &log_ptr[5 + 1 + 5 + 5 + MLOG_BUF_MARGIN];
}
if ((rec_get_info_and_status_bits(insert_rec, comp) !=
rec_get_info_and_status_bits(cursor_rec, comp))
|| (extra_size != cur_extra_size)
|| (rec_size != cur_rec_size)) {
if (page_rec_is_comp(insert_rec)) {
if (UNIV_UNLIKELY
(rec_get_info_and_status_bits(insert_rec, TRUE) !=
rec_get_info_and_status_bits(cursor_rec, TRUE))) {
extra_info_yes = 1;
goto need_extra_info;
}
} else {
extra_info_yes = 0;
if (UNIV_UNLIKELY
(rec_get_info_and_status_bits(insert_rec, FALSE) !=
rec_get_info_and_status_bits(cursor_rec, FALSE))) {
goto need_extra_info;
}
}
/* Write the record end segment length and the extra info storage
flag */
log_ptr += mach_write_compressed(log_ptr, 2 * (rec_size - i)
+ extra_info_yes);
if (extra_info_yes) {
if (extra_size != cur_extra_size || rec_size != cur_rec_size) {
need_extra_info:
/* Write the record end segment length
and the extra info storage flag */
log_ptr += mach_write_compressed(log_ptr,
2 * (rec_size - i) + 1);
/* Write the info bits */
mach_write_to_1(log_ptr,
rec_get_info_and_status_bits(insert_rec, comp));
rec_get_info_and_status_bits(insert_rec,
page_rec_is_comp(insert_rec)));
log_ptr++;
/* Write the record origin offset */
@ -651,8 +666,12 @@ page_cur_insert_rec_write_log(
ut_a(i < UNIV_PAGE_SIZE);
ut_a(extra_size < UNIV_PAGE_SIZE);
} else {
/* Write the record end segment length
and the extra info storage flag */
log_ptr += mach_write_compressed(log_ptr, 2 * (rec_size - i));
}
/* Write to the log the inserted index record end segment which
differs from the cursor record */
@ -679,10 +698,11 @@ page_cur_parse_insert_rec(
byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */
dict_index_t* index, /* in: record descriptor */
page_t* page, /* in: page or NULL */
page_t* page, /* in/out: page or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
25 + rec_size bytes available, or NULL */
mtr_t* mtr) /* in: mtr or NULL */
{
ulint extra_info_yes;
ulint offset = 0; /* remove warning */
ulint origin_offset;
ulint end_seg_len;
@ -725,16 +745,13 @@ page_cur_parse_insert_rec(
return(NULL);
}
extra_info_yes = end_seg_len & 0x1UL;
end_seg_len >>= 1;
if (end_seg_len >= UNIV_PAGE_SIZE) {
if (UNIV_UNLIKELY(end_seg_len >= UNIV_PAGE_SIZE << 1)) {
recv_sys->found_corrupt_log = TRUE;
return(NULL);
}
if (extra_info_yes) {
if (end_seg_len & 0x1UL) {
/* Read the info bits */
if (end_ptr < ptr + 1) {
@ -764,17 +781,18 @@ page_cur_parse_insert_rec(
ut_a(mismatch_index < UNIV_PAGE_SIZE);
}
if (end_ptr < ptr + end_seg_len) {
if (end_ptr < ptr + (end_seg_len >> 1)) {
return(NULL);
}
if (page == NULL) {
return(ptr + end_seg_len);
return(ptr + (end_seg_len >> 1));
}
ut_ad(!!page_is_comp(page) == index->table->comp);
ut_ad((ibool) !!page_is_comp(page) == index->table->comp);
ut_ad(!page_zip || page_is_comp(page));
/* Read from the log the inserted index record end segment which
differs from the cursor record */
@ -788,12 +806,14 @@ page_cur_parse_insert_rec(
offsets = rec_get_offsets(cursor_rec, index, offsets,
ULINT_UNDEFINED, &heap);
if (extra_info_yes == 0) {
if (!(end_seg_len & 0x1UL)) {
info_and_status_bits = rec_get_info_and_status_bits(
cursor_rec, page_is_comp(page));
origin_offset = rec_offs_extra_size(offsets);
mismatch_index = rec_offs_size(offsets) - end_seg_len;
mismatch_index = rec_offs_size(offsets) - (end_seg_len >> 1);
}
end_seg_len >>= 1;
if (mismatch_index + end_seg_len < sizeof buf1) {
buf = buf1;
@ -803,7 +823,7 @@ page_cur_parse_insert_rec(
/* Build the inserted record to buf */
if (mismatch_index >= UNIV_PAGE_SIZE) {
if (UNIV_UNLIKELY(mismatch_index >= UNIV_PAGE_SIZE)) {
fprintf(stderr,
"Is short %lu, info_and_status_bits %lu, offset %lu, "
"o_offset %lu\n"
@ -826,14 +846,24 @@ page_cur_parse_insert_rec(
ut_memcpy(buf, rec_get_start(cursor_rec, offsets), mismatch_index);
ut_memcpy(buf + mismatch_index, ptr, end_seg_len);
rec_set_info_and_status_bits(buf + origin_offset, page_is_comp(page),
if (page_is_comp(page)) {
rec_set_info_and_status_bits(buf + origin_offset, NULL,
info_and_status_bits);
} else {
rec_set_info_bits_old(buf + origin_offset,
info_and_status_bits);
}
page_cur_position(cursor_rec, &cursor);
offsets = rec_get_offsets(buf + origin_offset, index, offsets,
ULINT_UNDEFINED, &heap);
page_cur_rec_insert(&cursor, buf + origin_offset, index, offsets, mtr);
if (UNIV_UNLIKELY(!page_cur_rec_insert(&cursor, page_zip,
buf + origin_offset, index, offsets, mtr))) {
/* The redo log record should only have been written
after the write was successful. */
ut_error;
}
if (buf != buf1) {
@ -859,6 +889,8 @@ page_cur_insert_rec_low(
/* out: pointer to record if succeed, NULL
otherwise */
page_cur_t* cursor, /* in: a page cursor */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
25 + rec_size bytes available, or NULL */
dtuple_t* tuple, /* in: pointer to a data tuple or NULL */
dict_index_t* index, /* in: record descriptor */
rec_t* rec, /* in: pointer to a physical record or NULL */
@ -873,14 +905,7 @@ page_cur_insert_rec_low(
ulint heap_no; /* heap number of the inserted record */
rec_t* current_rec; /* current record after which the
new record is inserted */
rec_t* next_rec; /* next record after current before
the insertion */
ulint owner_slot; /* the slot which owns the
inserted record */
rec_t* owner_rec;
ulint n_owned;
mem_heap_t* heap = NULL;
ulint comp;
ut_ad(cursor && mtr);
ut_ad(tuple || rec);
@ -888,10 +913,9 @@ page_cur_insert_rec_low(
ut_ad(rec || dtuple_check_typed(tuple));
page = page_cur_get_page(cursor);
comp = page_is_comp(page);
ut_ad(index->table->comp == !!comp);
ut_ad(index->table->comp == (ibool) !!page_is_comp(page));
ut_ad(cursor->rec != page_get_supremum_rec(page));
ut_ad(!page_rec_is_supremum(cursor->rec));
/* 1. Get the size of the physical record in the page */
if (tuple != NULL) {
@ -905,10 +929,20 @@ page_cur_insert_rec_low(
rec_size = rec_offs_size(offsets);
}
/* 2. Try to find suitable space from page memory management */
insert_buf = page_mem_alloc(page, rec_size, index, &heap_no);
if (UNIV_LIKELY_NULL(page_zip)) {
if (UNIV_UNLIKELY(!page_zip_alloc(
page_zip, page, 25 + rec_size))) {
if (insert_buf == NULL) {
goto err_exit;
}
}
/* 2. Try to find suitable space from page memory management */
insert_buf = page_mem_alloc(page, page_zip, rec_size,
index, &heap_no);
if (UNIV_UNLIKELY(insert_buf == NULL)) {
err_exit:
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
@ -933,66 +967,95 @@ page_cur_insert_rec_low(
/* 4. Insert the record in the linked list of records */
current_rec = cursor->rec;
ut_ad(!comp || rec_get_status(current_rec) <= REC_STATUS_INFIMUM);
ut_ad(!comp || rec_get_status(insert_rec) < REC_STATUS_INFIMUM);
{
/* next record after current before the insertion */
rec_t* next_rec = page_rec_get_next(current_rec);
#ifdef UNIV_DEBUG
if (page_is_comp(page)) {
ut_ad(rec_get_status(current_rec)
<= REC_STATUS_INFIMUM);
ut_ad(rec_get_status(insert_rec) < REC_STATUS_INFIMUM);
ut_ad(rec_get_status(next_rec) != REC_STATUS_INFIMUM);
}
#endif
page_rec_set_next(insert_rec, next_rec, NULL);
page_rec_set_next(current_rec, insert_rec, page_zip);
}
next_rec = page_rec_get_next(current_rec);
ut_ad(!comp || rec_get_status(next_rec) != REC_STATUS_INFIMUM);
page_rec_set_next(insert_rec, next_rec);
page_rec_set_next(current_rec, insert_rec);
page_header_set_field(page, PAGE_N_RECS, 1 + page_get_n_recs(page));
page_header_set_field(page, page_zip, PAGE_N_RECS,
1 + page_get_n_recs(page));
/* 5. Set the n_owned field in the inserted record to zero,
and set the heap_no field */
rec_set_n_owned(insert_rec, comp, 0);
rec_set_heap_no(insert_rec, comp, heap_no);
if (page_is_comp(page)) {
rec_set_n_owned_new(insert_rec, NULL, 0);
rec_set_heap_no_new(insert_rec, NULL, heap_no);
} else {
rec_set_n_owned_old(insert_rec, 0);
rec_set_heap_no_old(insert_rec, heap_no);
}
/* 6. Update the last insertion info in page header */
last_insert = page_header_get_ptr(page, PAGE_LAST_INSERT);
ut_ad(!last_insert || !comp
ut_ad(!last_insert || !page_is_comp(page)
|| rec_get_node_ptr_flag(last_insert)
== rec_get_node_ptr_flag(insert_rec));
if (last_insert == NULL) {
page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION);
page_header_set_field(page, PAGE_N_DIRECTION, 0);
if (UNIV_UNLIKELY(last_insert == NULL)) {
page_header_set_field(page, page_zip, PAGE_DIRECTION,
PAGE_NO_DIRECTION);
page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0);
} else if ((last_insert == current_rec)
&& (page_header_get_field(page, PAGE_DIRECTION) != PAGE_LEFT)) {
page_header_set_field(page, PAGE_DIRECTION, PAGE_RIGHT);
page_header_set_field(page, PAGE_N_DIRECTION,
page_header_set_field(page, page_zip, PAGE_DIRECTION,
PAGE_RIGHT);
page_header_set_field(page, page_zip, PAGE_N_DIRECTION,
page_header_get_field(page, PAGE_N_DIRECTION) + 1);
} else if ((page_rec_get_next(insert_rec) == last_insert)
&& (page_header_get_field(page, PAGE_DIRECTION) != PAGE_RIGHT)) {
page_header_set_field(page, PAGE_DIRECTION, PAGE_LEFT);
page_header_set_field(page, PAGE_N_DIRECTION,
page_header_set_field(page, page_zip, PAGE_DIRECTION,
PAGE_LEFT);
page_header_set_field(page, page_zip, PAGE_N_DIRECTION,
page_header_get_field(page, PAGE_N_DIRECTION) + 1);
} else {
page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION);
page_header_set_field(page, PAGE_N_DIRECTION, 0);
page_header_set_field(page, page_zip, PAGE_DIRECTION,
PAGE_NO_DIRECTION);
page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0);
}
page_header_set_ptr(page, PAGE_LAST_INSERT, insert_rec);
page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, insert_rec);
/* 7. It remains to update the owner record. */
owner_rec = page_rec_find_owner_rec(insert_rec);
n_owned = rec_get_n_owned(owner_rec, comp);
rec_set_n_owned(owner_rec, comp, n_owned + 1);
{
rec_t* owner_rec = page_rec_find_owner_rec(insert_rec);
ulint n_owned;
if (page_is_comp(page)) {
n_owned = rec_get_n_owned_new(owner_rec);
rec_set_n_owned_new(owner_rec, page_zip, n_owned + 1);
} else {
n_owned = rec_get_n_owned_old(owner_rec);
rec_set_n_owned_old(owner_rec, n_owned + 1);
}
/* 8. Now we have incremented the n_owned field of the owner
record. If the number exceeds PAGE_DIR_SLOT_MAX_N_OWNED,
we have to split the corresponding directory slot in two. */
/* 8. Now we have incremented the n_owned field of the owner
record. If the number exceeds PAGE_DIR_SLOT_MAX_N_OWNED,
we have to split the corresponding directory slot in two. */
if (n_owned == PAGE_DIR_SLOT_MAX_N_OWNED) {
owner_slot = page_dir_find_owner_slot(owner_rec);
page_dir_split_slot(page, owner_slot);
if (UNIV_UNLIKELY(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED)) {
page_dir_split_slot(page, page_zip,
page_dir_find_owner_slot(owner_rec));
}
}
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write(page_zip,
insert_rec - rec_offs_extra_size(offsets),
rec_size);
}
/* 9. Write log record of the insert */
@ -1041,7 +1104,8 @@ page_parse_copy_rec_list_to_created_page(
byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */
dict_index_t* index, /* in: record descriptor */
page_t* page, /* in: page or NULL */
page_t* page, /* in/out: page or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page or NULL */
mtr_t* mtr) /* in: mtr or NULL */
{
byte* rec_end;
@ -1069,14 +1133,15 @@ page_parse_copy_rec_list_to_created_page(
while (ptr < rec_end) {
ptr = page_cur_parse_insert_rec(TRUE, ptr, end_ptr,
index, page, mtr);
index, page, page_zip, mtr);
}
ut_a(ptr == rec_end);
page_header_set_ptr(page, PAGE_LAST_INSERT, NULL);
page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION);
page_header_set_field(page, PAGE_N_DIRECTION, 0);
page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, NULL);
page_header_set_field(page, page_zip, PAGE_DIRECTION,
PAGE_NO_DIRECTION);
page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0);
return(rec_end);
}
@ -1089,7 +1154,6 @@ void
page_copy_rec_list_end_to_created_page(
/*===================================*/
page_t* new_page, /* in: index page to copy to */
page_t* page, /* in: index page */
rec_t* rec, /* in: first record to copy */
dict_index_t* index, /* in: record descriptor */
mtr_t* mtr) /* in: mtr */
@ -1105,22 +1169,21 @@ page_copy_rec_list_end_to_created_page(
ulint log_mode;
byte* log_ptr;
ulint log_data_len;
ulint comp = page_is_comp(page);
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
ut_ad(page_dir_get_n_heap(new_page) == 2);
ut_ad(page != new_page);
ut_ad(comp == page_is_comp(new_page));
ut_ad(ut_align_down(rec, UNIV_PAGE_SIZE) != new_page);
ut_ad(page_rec_is_comp(rec) == page_is_comp(new_page));
if (rec == page_get_infimum_rec(page)) {
if (page_rec_is_infimum(rec)) {
rec = page_rec_get_next(rec);
}
if (rec == page_get_supremum_rec(page)) {
if (page_rec_is_supremum(rec)) {
return;
}
@ -1128,8 +1191,8 @@ page_copy_rec_list_end_to_created_page(
#ifdef UNIV_DEBUG
/* To pass the debug tests we have to set these dummy values
in the debug version */
page_dir_set_n_slots(new_page, UNIV_PAGE_SIZE / 2);
page_header_set_ptr(new_page, PAGE_HEAP_TOP,
page_dir_set_n_slots(new_page, NULL, UNIV_PAGE_SIZE / 2);
page_header_set_ptr(new_page, NULL, PAGE_HEAP_TOP,
new_page + UNIV_PAGE_SIZE - 1);
#endif
@ -1143,7 +1206,7 @@ page_copy_rec_list_end_to_created_page(
log_mode = mtr_set_log_mode(mtr, MTR_LOG_SHORT_INSERTS);
prev_rec = page_get_infimum_rec(new_page);
if (comp) {
if (page_is_comp(new_page)) {
heap_top = new_page + PAGE_NEW_SUPREMUM_END;
} else {
heap_top = new_page + PAGE_OLD_SUPREMUM_END;
@ -1152,43 +1215,52 @@ page_copy_rec_list_end_to_created_page(
slot_index = 0;
n_recs = 0;
/* should be do ... until, comment by Jani */
while (rec != page_get_supremum_rec(page)) {
do {
offsets = rec_get_offsets(rec, index, offsets,
ULINT_UNDEFINED, &heap);
insert_rec = rec_copy(heap_top, rec, offsets);
rec_set_next_offs(prev_rec, comp, insert_rec - new_page);
if (page_is_comp(new_page)) {
rec_set_next_offs_new(prev_rec, NULL,
ut_align_offset(insert_rec, UNIV_PAGE_SIZE));
rec_set_n_owned(insert_rec, comp, 0);
rec_set_heap_no(insert_rec, comp, 2 + n_recs);
rec_set_n_owned_new(insert_rec, NULL, 0);
rec_set_heap_no_new(insert_rec, NULL, 2 + n_recs);
} else {
rec_set_next_offs_old(prev_rec,
ut_align_offset(insert_rec, UNIV_PAGE_SIZE));
rec_size = rec_offs_size(offsets);
heap_top = heap_top + rec_size;
ut_ad(heap_top < new_page + UNIV_PAGE_SIZE);
rec_set_n_owned_old(insert_rec, 0);
rec_set_heap_no_old(insert_rec, 2 + n_recs);
}
count++;
n_recs++;
if (count == (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2) {
if (UNIV_UNLIKELY(count ==
(PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2)) {
slot_index++;
slot = page_dir_get_nth_slot(new_page, slot_index);
page_dir_slot_set_rec(slot, insert_rec);
page_dir_slot_set_n_owned(slot, count);
page_dir_slot_set_n_owned(slot, NULL, count);
count = 0;
}
rec_size = rec_offs_size(offsets);
ut_ad(heap_top < new_page + UNIV_PAGE_SIZE);
heap_top += rec_size;
page_cur_insert_rec_write_log(insert_rec, rec_size, prev_rec,
index, mtr);
prev_rec = insert_rec;
rec = page_rec_get_next(rec);
}
} while (!page_rec_is_supremum(rec));
if ((slot_index > 0) && (count + 1
+ (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2
@ -1202,7 +1274,7 @@ page_copy_rec_list_end_to_created_page(
count += (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2;
page_dir_slot_set_n_owned(slot, 0);
page_dir_slot_set_n_owned(slot, NULL, 0);
slot_index--;
}
@ -1216,23 +1288,27 @@ page_copy_rec_list_end_to_created_page(
ut_a(log_data_len < 100 * UNIV_PAGE_SIZE);
mach_write_to_4(log_ptr, log_data_len);
rec_set_next_offs(insert_rec, comp,
comp ? PAGE_NEW_SUPREMUM : PAGE_OLD_SUPREMUM);
if (page_is_comp(new_page)) {
rec_set_next_offs_new(insert_rec, NULL, PAGE_NEW_SUPREMUM);
} else {
rec_set_next_offs_old(insert_rec, PAGE_OLD_SUPREMUM);
}
slot = page_dir_get_nth_slot(new_page, 1 + slot_index);
page_dir_slot_set_rec(slot, page_get_supremum_rec(new_page));
page_dir_slot_set_n_owned(slot, count + 1);
page_dir_slot_set_n_owned(slot, NULL, count + 1);
page_dir_set_n_slots(new_page, 2 + slot_index);
page_header_set_ptr(new_page, PAGE_HEAP_TOP, heap_top);
page_dir_set_n_heap(new_page, 2 + n_recs);
page_header_set_field(new_page, PAGE_N_RECS, n_recs);
page_dir_set_n_slots(new_page, NULL, 2 + slot_index);
page_header_set_ptr(new_page, NULL, PAGE_HEAP_TOP, heap_top);
page_dir_set_n_heap(new_page, NULL, 2 + n_recs);
page_header_set_field(new_page, NULL, PAGE_N_RECS, n_recs);
page_header_set_ptr(new_page, PAGE_LAST_INSERT, NULL);
page_header_set_field(new_page, PAGE_DIRECTION, PAGE_NO_DIRECTION);
page_header_set_field(new_page, PAGE_N_DIRECTION, 0);
page_header_set_ptr(new_page, NULL, PAGE_LAST_INSERT, NULL);
page_header_set_field(new_page, NULL, PAGE_DIRECTION,
PAGE_NO_DIRECTION);
page_header_set_field(new_page, NULL, PAGE_N_DIRECTION, 0);
/* Restore the log mode */
@ -1251,7 +1327,7 @@ page_cur_delete_rec_write_log(
{
byte* log_ptr;
ut_ad(!!page_rec_is_comp(rec) == index->table->comp);
ut_ad((ibool) !!page_rec_is_comp(rec) == index->table->comp);
log_ptr = mlog_open_and_write_index(mtr, rec, index,
page_rec_is_comp(rec)
@ -1280,7 +1356,9 @@ page_cur_parse_delete_rec(
byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */
dict_index_t* index, /* in: record descriptor */
page_t* page, /* in: page or NULL */
page_t* page, /* in/out: page or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
32 bytes available, or NULL */
mtr_t* mtr) /* in: mtr or NULL */
{
ulint offset;
@ -1304,10 +1382,11 @@ page_cur_parse_delete_rec(
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
page_cur_position(rec, &cursor);
ut_ad(!page_zip || page_is_comp(page));
page_cur_delete_rec(&cursor, index,
rec_get_offsets(rec, index, offsets_,
ULINT_UNDEFINED, &heap), mtr);
ULINT_UNDEFINED, &heap), page_zip, mtr);
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
@ -1323,9 +1402,11 @@ record after the deleted one. */
void
page_cur_delete_rec(
/*================*/
page_cur_t* cursor, /* in: a page cursor */
page_cur_t* cursor, /* in/out: a page cursor */
dict_index_t* index, /* in: record descriptor */
const ulint* offsets,/* in: rec_get_offsets(cursor->rec, index) */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
32 bytes available, or NULL */
mtr_t* mtr) /* in: mini-transaction handle */
{
page_dir_slot_t* cur_dir_slot;
@ -1343,12 +1424,12 @@ page_cur_delete_rec(
page = page_cur_get_page(cursor);
current_rec = cursor->rec;
ut_ad(rec_offs_validate(current_rec, index, offsets));
ut_ad(!!page_is_comp(page) == index->table->comp);
ut_ad((ibool) !!page_is_comp(page) == index->table->comp);
ut_ad(!page_zip || page_zip_available(page_zip, 32));
/* The record must not be the supremum or infimum record. */
ut_ad(current_rec != page_get_supremum_rec(page));
ut_ad(current_rec != page_get_infimum_rec(page));
ut_ad(page_rec_is_user_rec(current_rec));
/* Save to local variables some data associated with current_rec */
cur_slot_no = page_dir_find_owner_slot(current_rec);
cur_dir_slot = page_dir_get_nth_slot(page, cur_slot_no);
@ -1360,7 +1441,7 @@ page_cur_delete_rec(
/* 1. Reset the last insert info in the page header and increment
the modify clock for the frame */
page_header_set_ptr(page, PAGE_LAST_INSERT, NULL);
page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, NULL);
/* The page gets invalid for optimistic searches: increment the
frame modify clock */
@ -1388,8 +1469,8 @@ page_cur_delete_rec(
/* 3. Remove the record from the linked list of records */
page_rec_set_next(prev_rec, next_rec);
page_header_set_field(page, PAGE_N_RECS,
page_rec_set_next(prev_rec, next_rec, page_zip);
page_header_set_field(page, page_zip, PAGE_N_RECS,
(ulint)(page_get_n_recs(page) - 1));
/* 4. If the deleted record is pointed to by a dir slot, update the
@ -1406,16 +1487,16 @@ page_cur_delete_rec(
/* 5. Update the number of owned records of the slot */
page_dir_slot_set_n_owned(cur_dir_slot, cur_n_owned - 1);
page_dir_slot_set_n_owned(cur_dir_slot, page_zip, cur_n_owned - 1);
/* 6. Free the memory occupied by the record */
page_mem_free(page, current_rec, offsets);
page_mem_free(page, page_zip, current_rec, offsets);
/* 7. Now we have decremented the number of owned records of the slot.
If the number drops below PAGE_DIR_SLOT_MIN_N_OWNED, we balance the
slots. */
if (cur_n_owned <= PAGE_DIR_SLOT_MIN_N_OWNED) {
page_dir_balance_slot(page, cur_slot_no);
if (UNIV_UNLIKELY(cur_n_owned <= PAGE_DIR_SLOT_MIN_N_OWNED)) {
page_dir_balance_slot(page, page_zip, cur_slot_no);
}
}

File diff suppressed because it is too large Load diff

331
page/page0zip.c Normal file
View file

@ -0,0 +1,331 @@
/******************************************************
Compressed page interface
(c) 2005 Innobase Oy
Created June 2005 by Marko Makela
*******************************************************/
#define THIS_MODULE
#include "page0zip.h"
#ifdef UNIV_NONINL
# include "page0zip.ic"
#endif
#undef THIS_MODULE
#include "page0page.h"
#include "mtr0log.h"
#include "zlib.h"
/**************************************************************************
Compress a page. */
ibool
page_zip_compress(
/*==============*/
/* out: TRUE on success, FALSE on failure;
page_zip will be left intact on failure. */
page_zip_des_t* page_zip,/* out: compressed page */
const page_t* page) /* in: uncompressed page */
{
z_stream c_stream;
int err;
byte* buf;
ulint trailer_len;
ut_ad(page_zip_simple_validate(page_zip));
#ifdef UNIV_DEBUG
if (page_is_comp((page_t*) page)) {
ut_ad(page_simple_validate_new((page_t*) page));
} else {
ut_ad(page_simple_validate_old((page_t*) page));
}
#endif /* UNIV_DEBUG */
buf = mem_alloc(page_zip->size - PAGE_DATA);
/* Determine the length of the page trailer. */
trailer_len = page + UNIV_PAGE_SIZE
- page_dir_get_nth_slot((page_t*) page,
page_dir_get_n_slots((page_t*) page) - 1);
ut_ad(trailer_len < UNIV_PAGE_SIZE - PAGE_DATA);
/* Compress the data payload. */
c_stream.zalloc = (alloc_func) 0;
c_stream.zfree = (free_func) 0;
c_stream.opaque = (voidpf) 0;
err = deflateInit(&c_stream, Z_DEFAULT_COMPRESSION);
ut_a(err == Z_OK);
c_stream.next_out = buf;
c_stream.next_in = (void*) (page + PAGE_DATA);
c_stream.avail_out = page_zip->size - (PAGE_DATA - 1) - trailer_len;
c_stream.avail_in = page_header_get_field((page_t*) page,
PAGE_HEAP_TOP) - PAGE_DATA;
err = deflate(&c_stream, Z_FINISH);
if (err != Z_STREAM_END) {
deflateEnd(&c_stream);
mem_free(buf);
return(FALSE);
}
err = deflateEnd(&c_stream);
ut_a(err == Z_OK);
ut_ad(c_stream.avail_in == page_header_get_field((page_t*) page,
PAGE_HEAP_TOP) - PAGE_DATA);
ut_ad(c_stream.avail_out == page_zip->size - (PAGE_DATA - 1)
- trailer_len);
ut_a(c_stream.total_in == (uLong) c_stream.avail_in);
ut_a(c_stream.total_out <= (uLong) c_stream.avail_out);
page_zip->m_end = page_zip->m_start = PAGE_DATA + c_stream.total_out;
/* Copy the page header */
memcpy(page_zip->data, page, PAGE_DATA);
/* Copy the compressed data */
memcpy(page_zip->data + PAGE_DATA, buf, c_stream.total_out);
/* Zero out the area reserved for the modification log */
memset(page_zip->data + PAGE_DATA + c_stream.total_out, 0,
page_zip->size - PAGE_DATA - trailer_len - c_stream.total_out);
/* Copy the page trailer */
memcpy(page_zip->data + page_zip->size - trailer_len,
page + UNIV_PAGE_SIZE - trailer_len, trailer_len);
mem_free(buf);
ut_ad(page_zip_validate(page_zip, page));
return(TRUE);
}
/**************************************************************************
Read an integer from the modification log of the compressed page. */
static
ulint
page_zip_ulint_read(
/*================*/
/* out: length of the integer, in bytes;
zero on failure */
const byte* src, /* in: where to read */
ulint* dest) /* out: the decoded integer */
{
ulint num = (unsigned char) *src;
if (num < 128) {
*dest = num; /* 0xxxxxxx: 0..127 */
return(1);
}
if (num < 192) { /* 10xxxxxx xxxxxxxx: 0..16383 */
*dest = ((num << 8) & ~0x8000) | (unsigned char) src[1];
return(2);
}
*dest = ULINT_MAX;
return(0); /* 11xxxxxxx xxxxxxxx: reserved */
}
/**************************************************************************
Write an integer to the modification log of the compressed page. */
static
ulint
page_zip_ulint_write(
/*=================*/
/* out: length of the integer, in bytes;
zero on failure */
byte* dest, /* in: where to write */
ulint num) /* out: integer to write */
{
if (num < 128) {
*dest = num; /* 0xxxxxxx: 0..127 */
return(1);
}
if (num < 16384) { /* 10xxxxxx xxxxxxxx: 0..16383 */
dest[0] = num >> 8 | 0x80;
dest[1] = num;
return(2);
}
ut_error;
return(0); /* 11xxxxxxx xxxxxxxx: reserved */
}
/**************************************************************************
Decompress a page. */
ibool
page_zip_decompress(
/*================*/
/* out: TRUE on success, FALSE on failure */
page_zip_des_t* page_zip,/* in: data, size; out: m_start, m_end */
page_t* page, /* out: uncompressed page, may be trashed */
mtr_t* mtr) /* in: mini-transaction handle,
or NULL if no logging is needed */
{
z_stream d_stream;
int err;
ulint trailer_len;
ut_ad(page_zip_simple_validate(page_zip));
trailer_len = PAGE_DIR
+ PAGE_DIR_SLOT_SIZE
* page_dir_get_n_slots((page_t*) page_zip->data);
ut_ad(trailer_len < page_zip->size - PAGE_DATA);
ut_ad(page_header_get_field((page_t*) page_zip->data, PAGE_HEAP_TOP)
<= UNIV_PAGE_SIZE - trailer_len);
d_stream.zalloc = (alloc_func) 0;
d_stream.zfree = (free_func) 0;
d_stream.opaque = (voidpf) 0;
err = inflateInit(&d_stream);
ut_a(err == Z_OK);
d_stream.next_in = page_zip->data + PAGE_DATA;
d_stream.next_out = page + PAGE_DATA;
d_stream.avail_in = page_zip->size - trailer_len - (PAGE_DATA - 1);
d_stream.avail_out = page_header_get_field(page_zip->data, PAGE_HEAP_TOP)
- PAGE_DATA;
err = inflate(&d_stream, Z_FINISH);
if (err != Z_STREAM_END) {
inflateEnd(&d_stream);
return(FALSE);
}
err = inflateEnd(&d_stream);
ut_a(err == Z_OK);
ut_ad(d_stream.avail_in
== page_zip->size - trailer_len - (PAGE_DATA - 1));
ut_ad(d_stream.avail_out
== page_header_get_field(page_zip->data, PAGE_HEAP_TOP) - PAGE_DATA);
ut_a(d_stream.total_in <= (uLong) d_stream.avail_in);
ut_a(d_stream.total_out == d_stream.total_out);
page_zip->m_end = page_zip->m_start = PAGE_DATA + d_stream.total_in;
/* Copy the page header */
memcpy(page, page_zip->data, PAGE_DATA);
/* Copy the page trailer */
memcpy(page_zip->data + page_zip->size - trailer_len,
page + UNIV_PAGE_SIZE - trailer_len, trailer_len);
/* Apply the modification log. */
while (page_zip->data[page_zip->m_end]) {
ulint ulint_len;
ulint length, offset;
ulint_len = page_zip_ulint_read(page_zip->data + page_zip->m_end,
&length);
page_zip->m_end += ulint_len;
if (!ulint_len
|| page_zip->m_end + length >= page_zip->size - trailer_len) {
return(FALSE);
}
ut_a(length > 0 && length < UNIV_PAGE_SIZE - PAGE_DATA);
ulint_len = page_zip_ulint_read(page_zip->data + page_zip->m_end,
&offset);
page_zip->m_end += ulint_len;
if (!ulint_len
|| page_zip->m_end + length >= page_zip->size - trailer_len) {
return(FALSE);
}
offset += PAGE_DATA;
ut_a(offset + length < UNIV_PAGE_SIZE - trailer_len);
memcpy(page + offset, page_zip->data + page_zip->m_end, length);
page_zip->m_end += length;
}
ut_a(page_is_comp(page));
ut_ad(page_simple_validate_new(page));
if (UNIV_LIKELY_NULL(mtr)) {
byte* log_ptr = mlog_open(mtr, 11);
if (log_ptr) {
log_ptr = mlog_write_initial_log_record_fast(
page, MLOG_COMP_DECOMPRESS,
log_ptr, mtr);
mlog_close(mtr, log_ptr);
}
}
return(TRUE);
}
#ifdef UNIV_DEBUG
/**************************************************************************
Check that the compressed and decompressed pages match. */
ibool
page_zip_validate(
/*==============*/
const page_zip_des_t* page_zip, /* in: compressed page */
const page_t* page) /* in: uncompressed page */
{
page_zip_des_t temp_page_zip = *page_zip;
page_t temp_page[UNIV_PAGE_SIZE];
ut_ad(buf_block_get_page_zip(buf_block_align((byte*)page))
== page_zip);
return(page_zip_decompress(&temp_page_zip, temp_page, NULL)
&& !memcmp(page, temp_page, UNIV_PAGE_SIZE));
}
#endif /* UNIV_DEBUG */
/**************************************************************************
Write data to the compressed portion of a page. The data must already
have been written to the uncompressed page. */
void
page_zip_write(
/*===========*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* str, /* in: address on the uncompressed page */
ulint length) /* in: length of the data */
{
ulint pos = ut_align_offset(str, UNIV_PAGE_SIZE);
#ifdef UNIV_DEBUG
ulint trailer_len = PAGE_DIR
+ PAGE_DIR_SLOT_SIZE
* page_dir_get_n_slots((page_t*) page_zip->data);
#endif /* UNIV_DEBUG */
ut_ad(buf_block_get_page_zip(buf_block_align((byte*)str)) == page_zip);
ut_ad(page_zip_simple_validate(page_zip));
ut_ad(page_zip->m_start >= PAGE_DATA);
ut_ad(page_dir_get_n_slots(ut_align_down((byte*) str, UNIV_PAGE_SIZE))
== page_dir_get_n_slots((page_t*) page_zip->data));
ut_ad(!page_zip->data[page_zip->m_end]);
ut_ad(PAGE_DATA + trailer_len < page_zip->size);
ut_ad(pos >= PAGE_DATA);
ut_ad(pos + length <= UNIV_PAGE_SIZE - trailer_len);
pos -= PAGE_DATA;
ut_ad(page_zip_available(page_zip, page_zip_entry_size(pos, length)));
/* Append to the modification log. */
page_zip->m_end += page_zip_ulint_write(
page_zip->data + page_zip->m_end, length);
page_zip->m_end += page_zip_ulint_write(
page_zip->data + page_zip->m_end, pos);
memcpy(&page_zip->data[page_zip->m_end], str, length);
page_zip->m_end += length;
ut_ad(!page_zip->data[page_zip->m_end]);
ut_ad(page_zip->m_end < page_zip->size - trailer_len);
ut_ad(page_zip_validate(page_zip,
ut_align_down((byte*) str, UNIV_PAGE_SIZE)));
}
#ifdef UNIV_DEBUG
/**************************************************************************
Determine if enough space is available in the modification log. */
ibool
page_zip_available_noninline(
/*=========================*/
/* out: TRUE if enough space
is available */
const page_zip_des_t* page_zip,/* in: compressed page */
ulint size)
{
return(page_zip_available(page_zip, size));
}
#endif /* UNIV_DEBUG */

View file

@ -537,14 +537,13 @@ rec_set_nth_field_null_bit(
}
/***************************************************************
Sets the value of the ith field extern storage bit of an old-style record. */
Sets the ith field extern storage bit of an old-style record. */
void
rec_set_nth_field_extern_bit_old(
/*=============================*/
rec_t* rec, /* in: old-style record */
ulint i, /* in: ith field */
ibool val, /* in: value to set */
mtr_t* mtr) /* in: mtr holding an X-latch to the page where
rec is, or NULL; in the NULL case we do not
write to log about the change */
@ -556,11 +555,7 @@ rec_set_nth_field_extern_bit_old(
info = rec_2_get_field_end_info(rec, i);
if (val) {
info = info | REC_2BYTE_EXTERN_MASK;
} else {
info = info & ~REC_2BYTE_EXTERN_MASK;
}
info |= REC_2BYTE_EXTERN_MASK;
if (mtr) {
mlog_write_ulint(rec - REC_N_OLD_EXTRA_BYTES - 2 * (i + 1),
@ -571,7 +566,7 @@ rec_set_nth_field_extern_bit_old(
}
/***************************************************************
Sets the value of the ith field extern storage bit of a new-style record. */
Sets the ith field extern storage bit of a new-style record. */
void
rec_set_nth_field_extern_bit_new(
@ -579,7 +574,6 @@ rec_set_nth_field_extern_bit_new(
rec_t* rec, /* in: record */
dict_index_t* index, /* in: record descriptor */
ulint ith, /* in: ith field */
ibool val, /* in: value to set */
mtr_t* mtr) /* in: mtr holding an X-latch to the page
where rec is, or NULL; in the NULL case
we do not write to log about the change */
@ -632,11 +626,11 @@ rec_set_nth_field_extern_bit_new(
ulint len = lens[1];
if (len & 0x80) { /* 1exxxxxx: 2-byte length */
if (i == ith) {
if (!val == !(len & 0x40)) {
if (len & 0x40) {
return; /* no change */
}
/* toggle the extern bit */
len ^= 0x40;
len |= 0x40;
if (mtr) {
mlog_write_ulint(lens + 1, len,
MLOG_1BYTE, mtr);
@ -677,12 +671,11 @@ rec_set_field_extern_bits(
if (UNIV_LIKELY(index->table->comp)) {
for (i = 0; i < n_fields; i++) {
rec_set_nth_field_extern_bit_new(rec, index, vec[i],
TRUE, mtr);
mtr);
}
} else {
for (i = 0; i < n_fields; i++) {
rec_set_nth_field_extern_bit_old(rec, vec[i],
TRUE, mtr);
rec_set_nth_field_extern_bit_old(rec, vec[i], mtr);
}
}
}
@ -745,7 +738,7 @@ rec_convert_dtuple_to_rec_old(
rec_set_n_fields_old(rec, n_fields);
/* Set the info bits of the record */
rec_set_info_bits(rec, FALSE,
rec_set_info_bits_old(rec,
dtuple_get_info_bits(dtuple) & REC_INFO_BITS_MASK);
/* Store the data and the offsets */
@ -835,8 +828,6 @@ rec_convert_dtuple_to_rec_new(
ulint fixed_len;
ulint null_mask = 1;
const ulint n_fields = dtuple_get_n_fields(dtuple);
const ulint status = dtuple_get_info_bits(dtuple)
& REC_NEW_STATUS_MASK;
ut_ad(index->table->comp);
ut_ad(n_fields > 0);
@ -847,7 +838,8 @@ rec_convert_dtuple_to_rec_new(
UNIV_PREFETCH_RW(rec - REC_N_NEW_EXTRA_BYTES - n_fields);
UNIV_PREFETCH_RW(rec);
switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
switch (UNIV_EXPECT(dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK,
REC_STATUS_ORDINARY)) {
case REC_STATUS_ORDINARY:
ut_ad(n_fields <= dict_index_get_n_fields(index));
n_node_ptr_field = ULINT_UNDEFINED;
@ -862,7 +854,7 @@ rec_convert_dtuple_to_rec_new(
n_node_ptr_field = ULINT_UNDEFINED;
goto init;
default:
ut_a(0);
ut_error;
return(0);
}
@ -912,10 +904,8 @@ init:
memset (lens + 1, 0, nulls - lens);
/* Set the info bits of the record */
rec_set_status(rec, status);
rec_set_info_bits(rec, TRUE,
dtuple_get_info_bits(dtuple) & REC_INFO_BITS_MASK);
rec_set_info_and_status_bits(rec, NULL,
dtuple_get_info_bits(dtuple));
/* Store the data and the offsets */
@ -928,6 +918,7 @@ init:
ut_ad(dtype_get_prtype(type) & DATA_NOT_NULL);
ut_ad(len == 4);
memcpy(end, dfield_get_data(field), len);
end += 4;
break;
}
fixed_len = dict_index_get_nth_field(index, i)->fixed_len;

View file

@ -2409,7 +2409,7 @@ row_ins_step(
goto same_trx;
}
trx_write_trx_id(node->trx_id_buf, trx->id);
trx_write_trx_id(node->trx_id_buf, NULL, trx->id);
err = lock_table(0, node->table, LOCK_IX, thr);

View file

@ -67,9 +67,10 @@ is slower than the specialized inline functions. */
void
row_set_rec_sys_field(
/*==================*/
/* out: value of the field */
ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
rec_t* rec, /* in: record */
rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
10 or 11 bytes available, or NULL */
dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
dulint val) /* in: value to set */
@ -87,11 +88,11 @@ row_set_rec_sys_field(
if (type == DATA_TRX_ID) {
trx_write_trx_id(field, val);
trx_write_trx_id(field, page_zip/* 10 bytes */, val);
} else {
ut_ad(type == DATA_ROLL_PTR);
trx_write_roll_ptr(field, val);
trx_write_roll_ptr(field, page_zip/* 11 bytes */, val);
}
}

View file

@ -2059,7 +2059,7 @@ row_sel_convert_mysql_key_to_innobase(
dfield = dtuple_get_nth_field(tuple, 0);
field = dict_index_get_nth_field(index, 0);
if (dfield_get_type(dfield)->mtype == DATA_SYS) {
if (UNIV_UNLIKELY(dfield_get_type(dfield)->mtype == DATA_SYS)) {
/* A special case: we are looking for a position in the
generated clustered index which InnoDB automatically added
to a table with no primary key: the first and the only
@ -2077,8 +2077,9 @@ row_sel_convert_mysql_key_to_innobase(
while (key_ptr < key_end) {
ut_a(dict_col_get_type(field->col)->mtype
== dfield_get_type(dfield)->mtype);
type = dfield_get_type(dfield)->mtype;
ut_a(dict_col_get_type(field->col)->mtype == type);
data_offset = 0;
is_null = FALSE;
@ -2096,8 +2097,6 @@ row_sel_convert_mysql_key_to_innobase(
}
}
type = dfield_get_type(dfield)->mtype;
/* Calculate data length and data field total length */
if (type == DATA_BLOB) {
@ -2143,9 +2142,9 @@ row_sel_convert_mysql_key_to_innobase(
data_field_len = data_offset + data_len;
}
if (dtype_get_mysql_type(dfield_get_type(dfield))
== DATA_MYSQL_TRUE_VARCHAR
&& dfield_get_type(dfield)->mtype != DATA_INT) {
if (UNIV_UNLIKELY(dtype_get_mysql_type(dfield_get_type(dfield))
== DATA_MYSQL_TRUE_VARCHAR)
&& UNIV_LIKELY(type != DATA_INT)) {
/* In a MySQL key value format, a true VARCHAR is
always preceded by 2 bytes of a length field.
dfield_get_type(dfield)->len returns the maximum
@ -2161,7 +2160,7 @@ row_sel_convert_mysql_key_to_innobase(
/* Storing may use at most data_len bytes of buf */
if (!is_null) {
if (UNIV_LIKELY(!is_null)) {
row_mysql_store_col_in_innobase_format(
dfield,
buf,
@ -2174,7 +2173,7 @@ row_sel_convert_mysql_key_to_innobase(
key_ptr += data_field_len;
if (key_ptr > key_end) {
if (UNIV_UNLIKELY(key_ptr > key_end)) {
/* The last field in key was not a complete key field
but a prefix of it.

View file

@ -301,7 +301,8 @@ recovery. */
void
row_upd_rec_sys_fields_in_recovery(
/*===============================*/
rec_t* rec, /* in: record */
rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
const ulint* offsets,/* in: array returned by rec_get_offsets() */
ulint pos, /* in: TRX_ID position in rec */
dulint trx_id, /* in: transaction id */
@ -312,11 +313,11 @@ row_upd_rec_sys_fields_in_recovery(
field = rec_get_nth_field(rec, offsets, pos, &len);
ut_ad(len == DATA_TRX_ID_LEN);
trx_write_trx_id(field, trx_id);
trx_write_trx_id(field, page_zip, trx_id);
field = rec_get_nth_field(rec, offsets, pos + 1, &len);
ut_ad(len == DATA_ROLL_PTR_LEN);
trx_write_roll_ptr(field, roll_ptr);
trx_write_roll_ptr(field, page_zip, roll_ptr);
}
/*************************************************************************
@ -345,10 +346,10 @@ row_upd_index_entry_sys_field(
field = dfield_get_data(dfield);
if (type == DATA_TRX_ID) {
trx_write_trx_id(field, val);
trx_write_trx_id(field, NULL, val);
} else {
ut_ad(type == DATA_ROLL_PTR);
trx_write_roll_ptr(field, val);
trx_write_roll_ptr(field, NULL, val);
}
}
@ -445,7 +446,11 @@ row_upd_rec_in_place(
ut_ad(rec_offs_validate(rec, NULL, offsets));
rec_set_info_bits(rec, rec_offs_comp(offsets), update->info_bits);
if (rec_offs_comp(offsets)) {
rec_set_info_bits_new(rec, NULL, update->info_bits);
} else {
rec_set_info_bits_old(rec, update->info_bits);
}
n_fields = upd_get_n_fields(update);
@ -480,7 +485,7 @@ row_upd_write_sys_vals_to_log(
log_ptr += mach_write_compressed(log_ptr,
dict_index_get_sys_col_pos(index, DATA_TRX_ID));
trx_write_roll_ptr(log_ptr, roll_ptr);
trx_write_roll_ptr(log_ptr, NULL, roll_ptr);
log_ptr += DATA_ROLL_PTR_LEN;
log_ptr += mach_dulint_write_compressed(log_ptr, trx->id);
@ -2040,5 +2045,11 @@ row_upd_in_place_in_select(
err = btr_cur_update_in_place(BTR_NO_LOCKING_FLAG, btr_cur,
node->update, node->cmpl_info,
thr, mtr);
/* TODO: the above can fail if page_zip != NULL.
However, this function row_upd_in_place_in_select() is only invoked
when executing UPDATE statements of the built-in InnoDB SQL parser.
The built-in SQL is only used for InnoDB system tables, which
always are in the old, uncompressed format (ROW_FORMAT=REDUNDANT,
comp == FALSE, page_zip == NULL). */
ut_ad(err == DB_SUCCESS);
}

View file

@ -807,7 +807,7 @@ trx_undo_update_rec_get_update(
upd_field = upd_get_nth_field(update, n_fields);
buf = mem_heap_alloc(heap, DATA_TRX_ID_LEN);
trx_write_trx_id(buf, trx_id);
trx_write_trx_id(buf, NULL, trx_id);
upd_field_set_field_no(upd_field,
dict_index_get_sys_col_pos(index, DATA_TRX_ID),
@ -816,7 +816,7 @@ trx_undo_update_rec_get_update(
upd_field = upd_get_nth_field(update, n_fields + 1);
buf = mem_heap_alloc(heap, DATA_ROLL_PTR_LEN);
trx_write_roll_ptr(buf, roll_ptr);
trx_write_roll_ptr(buf, NULL, roll_ptr);
upd_field_set_field_no(upd_field,
dict_index_get_sys_col_pos(index, DATA_ROLL_PTR),