branches/zip:

Initial import of the zip-like compression of B-tree pages.
This commit is contained in:
marko 2005-10-27 11:48:10 +00:00
parent d345f80435
commit 88e6d6863d
41 changed files with 3212 additions and 1230 deletions

View file

@ -14,6 +14,7 @@ Created 6/2/1994 Heikki Tuuri
#include "fsp0fsp.h" #include "fsp0fsp.h"
#include "page0page.h" #include "page0page.h"
#include "page0zip.h"
#include "btr0cur.h" #include "btr0cur.h"
#include "btr0sea.h" #include "btr0sea.h"
#include "btr0pcur.h" #include "btr0pcur.h"
@ -105,8 +106,9 @@ static
void void
btr_page_empty( btr_page_empty(
/*===========*/ /*===========*/
page_t* page, /* in: page to be emptied */ page_t* page, /* in: page to be emptied */
mtr_t* mtr); /* in: mtr */ page_zip_des_t* page_zip,/* out: compressed page, or NULL */
mtr_t* mtr); /* in: mtr */
/***************************************************************** /*****************************************************************
Returns TRUE if the insert fits on the appropriate half-page Returns TRUE if the insert fits on the appropriate half-page
with the chosen split_rec. */ with the chosen split_rec. */
@ -258,7 +260,7 @@ btr_page_create(
{ {
ut_ad(mtr_memo_contains(mtr, buf_block_align(page), ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
MTR_MEMO_PAGE_X_FIX)); MTR_MEMO_PAGE_X_FIX));
page_create(page, mtr, page_create(page, NULL, mtr,
UT_LIST_GET_FIRST(tree->tree_indexes)->table->comp); UT_LIST_GET_FIRST(tree->tree_indexes)->table->comp);
buf_block_align(page)->check_index_page_at_flush = TRUE; buf_block_align(page)->check_index_page_at_flush = TRUE;
@ -662,6 +664,7 @@ btr_create(
buf_frame_t* ibuf_hdr_frame; buf_frame_t* ibuf_hdr_frame;
buf_frame_t* frame; buf_frame_t* frame;
page_t* page; page_t* page;
page_zip_des_t* page_zip;
/* Create the two new segments (one, in the case of an ibuf tree) for /* Create the two new segments (one, in the case of an ibuf tree) for
the index tree; the segment headers are put on the allocated root page the index tree; the segment headers are put on the allocated root page
@ -723,7 +726,7 @@ btr_create(
} }
/* Create a new index page on the the allocated segment page */ /* Create a new index page on the the allocated segment page */
page = page_create(frame, mtr, comp); page = page_create(frame, NULL, mtr, comp);
buf_block_align(page)->check_index_page_at_flush = TRUE; buf_block_align(page)->check_index_page_at_flush = TRUE;
/* Set the index id of the page */ /* Set the index id of the page */
@ -748,6 +751,14 @@ btr_create(
ut_ad(page_get_max_insert_size(page, 2) > 2 * BTR_PAGE_MAX_REC_SIZE); ut_ad(page_get_max_insert_size(page, 2) > 2 * BTR_PAGE_MAX_REC_SIZE);
page_zip = buf_block_get_page_zip(buf_block_align(page));
if (UNIV_LIKELY_NULL(page_zip)) {
if (UNIV_UNLIKELY(page_zip_compress(page_zip, page))) {
/* An empty page should always be compressible */
ut_error;
}
}
return(page_no); return(page_no);
} }
@ -833,7 +844,8 @@ btr_page_reorganize_low(
there cannot exist locks on the there cannot exist locks on the
page, and a hash index should not be page, and a hash index should not be
dropped: it cannot exist */ dropped: it cannot exist */
page_t* page, /* in: page to be reorganized */ page_t* page, /* in/out: page to be reorganized */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
dict_index_t* index, /* in: record descriptor */ dict_index_t* index, /* in: record descriptor */
mtr_t* mtr) /* in: mtr */ mtr_t* mtr) /* in: mtr */
{ {
@ -846,7 +858,7 @@ btr_page_reorganize_low(
ut_ad(mtr_memo_contains(mtr, buf_block_align(page), ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
MTR_MEMO_PAGE_X_FIX)); MTR_MEMO_PAGE_X_FIX));
ut_ad(!!page_is_comp(page) == index->table->comp); ut_ad((ibool) !!page_is_comp(page) == index->table->comp);
data_size1 = page_get_data_size(page); data_size1 = page_get_data_size(page);
max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1); max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1);
@ -863,25 +875,34 @@ btr_page_reorganize_low(
/* Copy the old page to temporary space */ /* Copy the old page to temporary space */
buf_frame_copy(new_page, page); buf_frame_copy(new_page, page);
if (!recovery) { if (UNIV_LIKELY(!recovery)) {
btr_search_drop_page_hash_index(page); btr_search_drop_page_hash_index(page);
} }
/* Recreate the page: note that global data on page (possible /* Recreate the page: note that global data on page (possible
segment headers, next page-field, etc.) is preserved intact */ segment headers, next page-field, etc.) is preserved intact */
page_create(page, mtr, page_is_comp(page)); page_create(page, NULL, mtr, page_is_comp(page));
buf_block_align(page)->check_index_page_at_flush = TRUE; buf_block_align(page)->check_index_page_at_flush = TRUE;
/* Copy the records from the temporary space to the recreated page; /* Copy the records from the temporary space to the recreated page;
do not copy the lock bits yet */ do not copy the lock bits yet */
page_copy_rec_list_end_no_locks(page, new_page, page_copy_rec_list_end_no_locks(page,
page_get_infimum_rec(new_page), index, mtr); page_get_infimum_rec(new_page), index, mtr);
/* Copy max trx id to recreated page */ /* Copy max trx id to recreated page */
page_set_max_trx_id(page, page_get_max_trx_id(new_page)); page_set_max_trx_id(page, page_get_max_trx_id(new_page));
if (!recovery) { if (UNIV_LIKELY_NULL(page_zip)) {
if (UNIV_UNLIKELY(!page_zip_compress(page_zip, page))) {
/* Reorganizing a page should reduce entropy,
making the compressed page occupy less space. */
ut_error;
}
}
if (UNIV_LIKELY(!recovery)) {
/* Update the record lock bitmaps */ /* Update the record lock bitmaps */
lock_move_reorganize_page(page, new_page); lock_move_reorganize_page(page, new_page);
} }
@ -889,7 +910,8 @@ btr_page_reorganize_low(
data_size2 = page_get_data_size(page); data_size2 = page_get_data_size(page);
max_ins_size2 = page_get_max_insert_size_after_reorganize(page, 1); max_ins_size2 = page_get_max_insert_size_after_reorganize(page, 1);
if (data_size1 != data_size2 || max_ins_size1 != max_ins_size2) { if (UNIV_UNLIKELY(data_size1 != data_size2)
|| UNIV_UNLIKELY(max_ins_size1 != max_ins_size2)) {
buf_page_print(page); buf_page_print(page);
buf_page_print(new_page); buf_page_print(new_page);
fprintf(stderr, fprintf(stderr,
@ -917,7 +939,9 @@ btr_page_reorganize(
dict_index_t* index, /* in: record descriptor */ dict_index_t* index, /* in: record descriptor */
mtr_t* mtr) /* in: mtr */ mtr_t* mtr) /* in: mtr */
{ {
btr_page_reorganize_low(FALSE, page, index, mtr); btr_page_reorganize_low(FALSE, page,
buf_block_get_page_zip(buf_block_align(page)),
index, mtr);
} }
/*************************************************************** /***************************************************************
@ -938,8 +962,10 @@ btr_parse_page_reorganize(
/* The record is empty, except for the record initial part */ /* The record is empty, except for the record initial part */
if (page) { if (UNIV_LIKELY(page != NULL)) {
btr_page_reorganize_low(TRUE, page, index, mtr); page_zip_des_t* page_zip = buf_block_get_page_zip(
buf_block_align(page));
btr_page_reorganize_low(TRUE, page, page_zip, index, mtr);
} }
return(ptr); return(ptr);
@ -951,17 +977,20 @@ static
void void
btr_page_empty( btr_page_empty(
/*===========*/ /*===========*/
page_t* page, /* in: page to be emptied */ page_t* page, /* in: page to be emptied */
mtr_t* mtr) /* in: mtr */ page_zip_des_t* page_zip,/* out: compressed page, or NULL */
mtr_t* mtr) /* in: mtr */
{ {
ut_ad(mtr_memo_contains(mtr, buf_block_align(page), ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
MTR_MEMO_PAGE_X_FIX)); MTR_MEMO_PAGE_X_FIX));
ut_ad(!page_zip || page_zip_validate(page_zip, page));
btr_search_drop_page_hash_index(page); btr_search_drop_page_hash_index(page);
/* Recreate the page: note that global data on page (possible /* Recreate the page: note that global data on page (possible
segment headers, next page-field, etc.) is preserved intact */ segment headers, next page-field, etc.) is preserved intact */
page_create(page, mtr, page_is_comp(page)); page_create(page, page_zip, mtr, page_is_comp(page));
buf_block_align(page)->check_index_page_at_flush = TRUE; buf_block_align(page)->check_index_page_at_flush = TRUE;
} }
@ -993,6 +1022,7 @@ btr_root_raise_and_insert(
ulint level; ulint level;
rec_t* node_ptr_rec; rec_t* node_ptr_rec;
page_cur_t* page_cursor; page_cur_t* page_cursor;
page_zip_des_t* page_zip;
root = btr_cur_get_page(cursor); root = btr_cur_get_page(cursor);
tree = btr_cur_get_tree(cursor); tree = btr_cur_get_tree(cursor);
@ -1025,8 +1055,12 @@ btr_root_raise_and_insert(
/* Move the records from root to the new page */ /* Move the records from root to the new page */
page_move_rec_list_end(new_page, root, page_get_infimum_rec(root), page_zip = buf_block_get_page_zip(buf_block_align(new_page));
cursor->index, mtr);
page_move_rec_list_end(new_page, page_zip,
page_get_infimum_rec(root), NULL,
cursor->index, mtr);
/* If this is a pessimistic insert which is actually done to /* If this is a pessimistic insert which is actually done to
perform a pessimistic update then we have stored the lock perform a pessimistic update then we have stored the lock
information of the record to be inserted on the infimum of the information of the record to be inserted on the infimum of the
@ -1046,7 +1080,7 @@ btr_root_raise_and_insert(
node_ptr = dict_tree_build_node_ptr(tree, rec, new_page_no, heap, node_ptr = dict_tree_build_node_ptr(tree, rec, new_page_no, heap,
level); level);
/* Reorganize the root to get free space */ /* Reorganize the root to get free space */
btr_page_reorganize(root, cursor->index, mtr); btr_page_reorganize_low(FALSE, root, NULL, cursor->index, mtr);
page_cursor = btr_cur_get_page_cur(cursor); page_cursor = btr_cur_get_page_cur(cursor);
@ -1054,16 +1088,25 @@ btr_root_raise_and_insert(
page_cur_set_before_first(root, page_cursor); page_cur_set_before_first(root, page_cursor);
node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr, node_ptr_rec = page_cur_tuple_insert(page_cursor, NULL,
cursor->index, mtr); node_ptr, cursor->index, mtr);
ut_ad(node_ptr_rec); ut_ad(node_ptr_rec);
page_zip = buf_block_get_page_zip(buf_block_align(root));
/* The node pointer must be marked as the predefined minimum record, /* The node pointer must be marked as the predefined minimum record,
as there is no lower alphabetical limit to records in the leftmost as there is no lower alphabetical limit to records in the leftmost
node of a level: */ node of a level: */
btr_set_min_rec_mark(node_ptr_rec, page_is_comp(root), mtr); btr_set_min_rec_mark(node_ptr_rec, NULL, mtr);
if (!UNIV_UNLIKELY(page_zip_compress(page_zip, root))) {
/* The root page should only contain the
node pointer to new_page at this point.
Thus, the data should fit. */
ut_error;
}
/* Free the memory heap */ /* Free the memory heap */
mem_heap_free(heap); mem_heap_free(heap);
@ -1564,15 +1607,13 @@ btr_page_split_and_insert(
mtr_t* mtr) /* in: mtr */ mtr_t* mtr) /* in: mtr */
{ {
dict_tree_t* tree; dict_tree_t* tree;
page_t* page;
ulint page_no;
byte direction; byte direction;
ulint hint_page_no;
page_t* new_page;
rec_t* split_rec; rec_t* split_rec;
page_t* left_page; page_t* left_page;
page_t* right_page; page_t* right_page;
page_t* insert_page; page_t* insert_page;
page_zip_des_t* left_page_zip;
page_zip_des_t* right_page_zip;
page_cur_t* page_cursor; page_cur_t* page_cursor;
rec_t* first_rec; rec_t* first_rec;
byte* buf = 0; /* remove warning */ byte* buf = 0; /* remove warning */
@ -1597,13 +1638,13 @@ func_start:
ut_ad(rw_lock_own(dict_tree_get_lock(tree), RW_LOCK_EX)); ut_ad(rw_lock_own(dict_tree_get_lock(tree), RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */ #endif /* UNIV_SYNC_DEBUG */
page = btr_cur_get_page(cursor); left_page = btr_cur_get_page(cursor);
ut_ad(mtr_memo_contains(mtr, buf_block_align(page), ut_ad(mtr_memo_contains(mtr, buf_block_align(left_page),
MTR_MEMO_PAGE_X_FIX)); MTR_MEMO_PAGE_X_FIX));
ut_ad(page_get_n_recs(page) >= 2); ut_ad(page_get_n_recs(left_page) >= 2);
page_no = buf_frame_get_page_no(page); left_page_zip = buf_block_get_page_zip(buf_block_align(left_page));
/* 1. Decide the split record; split_rec == NULL means that the /* 1. Decide the split record; split_rec == NULL means that the
tuple to be inserted should be the first record on the upper tuple to be inserted should be the first record on the upper
@ -1611,26 +1652,24 @@ func_start:
if (n_iterations > 0) { if (n_iterations > 0) {
direction = FSP_UP; direction = FSP_UP;
hint_page_no = page_no + 1;
split_rec = btr_page_get_sure_split_rec(cursor, tuple); split_rec = btr_page_get_sure_split_rec(cursor, tuple);
} else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) { } else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) {
direction = FSP_UP; direction = FSP_UP;
hint_page_no = page_no + 1;
} else if (btr_page_get_split_rec_to_left(cursor, &split_rec)) { } else if (btr_page_get_split_rec_to_left(cursor, &split_rec)) {
direction = FSP_DOWN; direction = FSP_DOWN;
hint_page_no = page_no - 1;
} else { } else {
direction = FSP_UP; direction = FSP_UP;
hint_page_no = page_no + 1; split_rec = page_get_middle_rec(left_page);
split_rec = page_get_middle_rec(page);
} }
/* 2. Allocate a new page to the tree */ /* 2. Allocate a new page to the tree */
new_page = btr_page_alloc(tree, hint_page_no, direction, right_page = btr_page_alloc(tree,
btr_page_get_level(page, mtr), mtr); buf_frame_get_page_no(left_page) + 1,
btr_page_create(new_page, tree, mtr); direction,
btr_page_get_level(left_page, mtr), mtr);
btr_page_create(right_page, tree, mtr);
/* 3. Calculate the first record on the upper half-page, and the /* 3. Calculate the first record on the upper half-page, and the
first record (move_limit) on original page which ends up on the first record (move_limit) on original page which ends up on the
@ -1649,7 +1688,8 @@ func_start:
/* 4. Do first the modifications in the tree structure */ /* 4. Do first the modifications in the tree structure */
btr_attach_half_pages(tree, page, first_rec, new_page, direction, mtr); btr_attach_half_pages(tree, left_page, first_rec, right_page,
direction, mtr);
if (split_rec == NULL) { if (split_rec == NULL) {
mem_free(buf); mem_free(buf);
@ -1667,34 +1707,31 @@ func_start:
insert_will_fit = btr_page_insert_fits(cursor, insert_will_fit = btr_page_insert_fits(cursor,
split_rec, offsets, tuple, heap); split_rec, offsets, tuple, heap);
} else { } else {
mem_free(buf);
insert_will_fit = btr_page_insert_fits(cursor, insert_will_fit = btr_page_insert_fits(cursor,
NULL, NULL, tuple, heap); NULL, NULL, tuple, heap);
} }
if (insert_will_fit && (btr_page_get_level(page, mtr) == 0)) { if (insert_will_fit && (btr_page_get_level(left_page, mtr) == 0)) {
mtr_memo_release(mtr, dict_tree_get_lock(tree), mtr_memo_release(mtr, dict_tree_get_lock(tree),
MTR_MEMO_X_LOCK); MTR_MEMO_X_LOCK);
} }
/* 5. Move then the records to the new page */ /* 5. Move then the records to the new page */
if (direction == FSP_DOWN) { right_page_zip = buf_block_get_page_zip(buf_block_align(right_page));
/* fputs("Split left\n", stderr); */
page_move_rec_list_start(new_page, page, move_limit, page_move_rec_list_end(right_page, right_page_zip,
cursor->index, mtr); move_limit, left_page_zip,
left_page = new_page; cursor->index, mtr);
right_page = page;
if (UNIV_UNLIKELY(direction == FSP_DOWN)) {
fputs("Split left\n", stderr); /* TODO: coverage test */
lock_update_split_left(right_page, left_page); lock_update_split_left(right_page, left_page);
} else { } else {
/* fputs("Split right\n", stderr); */ /* fputs("Split right\n", stderr); */
page_move_rec_list_end(new_page, page, move_limit,
cursor->index, mtr);
left_page = page;
right_page = new_page;
lock_update_split_right(right_page, left_page); lock_update_split_right(right_page, left_page);
} }
@ -1722,9 +1759,12 @@ func_start:
page_cur_search(insert_page, cursor->index, tuple, page_cur_search(insert_page, cursor->index, tuple,
PAGE_CUR_LE, page_cursor); PAGE_CUR_LE, page_cursor);
rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr); rec = page_cur_tuple_insert(page_cursor, left_page_zip,
tuple, cursor->index, mtr);
if (rec != NULL) { ut_ad(!left_page_zip || page_zip_validate(left_page_zip, left_page));
if (UNIV_LIKELY(rec != NULL)) {
/* Insert fit on the page: update the free bits for the /* Insert fit on the page: update the free bits for the
left and right pages in the same mtr */ left and right pages in the same mtr */
@ -1744,14 +1784,16 @@ func_start:
page_cur_search(insert_page, cursor->index, tuple, page_cur_search(insert_page, cursor->index, tuple,
PAGE_CUR_LE, page_cursor); PAGE_CUR_LE, page_cursor);
rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr); rec = page_cur_tuple_insert(page_cursor, left_page_zip,
tuple, cursor->index, mtr);
if (UNIV_UNLIKELY(rec == NULL)) {
if (rec == NULL) {
/* The insert did not fit on the page: loop back to the /* The insert did not fit on the page: loop back to the
start of the function for a new split */ start of the function for a new split */
/* We play safe and reset the free bits for new_page */ /* We play safe and reset the free bits for right_page */
ibuf_reset_free_bits(cursor->index, new_page); ibuf_reset_free_bits(cursor->index, right_page);
/* fprintf(stderr, "Split second round %lu\n", /* fprintf(stderr, "Split second round %lu\n",
buf_frame_get_page_no(page)); */ buf_frame_get_page_no(page)); */
@ -1830,11 +1872,10 @@ void
btr_set_min_rec_mark_log( btr_set_min_rec_mark_log(
/*=====================*/ /*=====================*/
rec_t* rec, /* in: record */ rec_t* rec, /* in: record */
ulint comp, /* nonzero=compact record format */ byte type, /* in: MLOG_COMP_REC_MIN_MARK or MLOG_REC_MIN_MARK */
mtr_t* mtr) /* in: mtr */ mtr_t* mtr) /* in: mtr */
{ {
mlog_write_initial_log_record(rec, mlog_write_initial_log_record(rec, type, mtr);
comp ? MLOG_COMP_REC_MIN_MARK : MLOG_REC_MIN_MARK, mtr);
/* Write rec offset as a 2-byte ulint */ /* Write rec offset as a 2-byte ulint */
mlog_catenate_ulint(mtr, ut_align_offset(rec, UNIV_PAGE_SIZE), mlog_catenate_ulint(mtr, ut_align_offset(rec, UNIV_PAGE_SIZE),
@ -1863,11 +1904,14 @@ btr_parse_set_min_rec_mark(
} }
if (page) { if (page) {
page_zip_des_t* page_zip = buf_block_get_page_zip(
buf_block_align(page));
ut_a(!page_is_comp(page) == !comp); ut_a(!page_is_comp(page) == !comp);
rec = page + mach_read_from_2(ptr); rec = page + mach_read_from_2(ptr);
btr_set_min_rec_mark(rec, comp, mtr); btr_set_min_rec_mark(rec, page_zip, mtr);
} }
return(ptr + 2); return(ptr + 2);
@ -1879,17 +1923,29 @@ Sets a record as the predefined minimum record. */
void void
btr_set_min_rec_mark( btr_set_min_rec_mark(
/*=================*/ /*=================*/
rec_t* rec, /* in: record */ rec_t* rec, /* in: record */
ulint comp, /* in: nonzero=compact page format */ page_zip_des_t* page_zip,/* in/out: compressed page with
mtr_t* mtr) /* in: mtr */ at least 5 bytes available, or NULL */
mtr_t* mtr) /* in: mtr */
{ {
ulint info_bits; ulint info_bits;
info_bits = rec_get_info_bits(rec, comp); if (UNIV_LIKELY(page_rec_is_comp(rec))) {
info_bits = rec_get_info_bits(rec, TRUE);
rec_set_info_bits(rec, comp, info_bits | REC_INFO_MIN_REC_FLAG); rec_set_info_bits_new(rec, page_zip,
info_bits | REC_INFO_MIN_REC_FLAG);
btr_set_min_rec_mark_log(rec, comp, mtr); btr_set_min_rec_mark_log(rec, MLOG_COMP_REC_MIN_MARK, mtr);
} else {
ut_ad(!page_zip);
info_bits = rec_get_info_bits(rec, FALSE);
rec_set_info_bits_old(rec, info_bits | REC_INFO_MIN_REC_FLAG);
btr_set_min_rec_mark_log(rec, MLOG_REC_MIN_MARK, mtr);
}
} }
/***************************************************************** /*****************************************************************
@ -1928,9 +1984,10 @@ btr_node_ptr_delete(
If page is the only on its level, this function moves its records to the If page is the only on its level, this function moves its records to the
father page, thus reducing the tree height. */ father page, thus reducing the tree height. */
static static
void ibool
btr_lift_page_up( btr_lift_page_up(
/*=============*/ /*=============*/
/* out: TRUE on success */
dict_tree_t* tree, /* in: index tree */ dict_tree_t* tree, /* in: index tree */
page_t* page, /* in: page which is the only on its level; page_t* page, /* in: page which is the only on its level;
must not be empty: use must not be empty: use
@ -1941,6 +1998,7 @@ btr_lift_page_up(
page_t* father_page; page_t* father_page;
ulint page_level; ulint page_level;
dict_index_t* index; dict_index_t* index;
page_zip_des_t* father_page_zip;
ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL); ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
ut_ad(btr_page_get_next(page, mtr) == FIL_NULL); ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
@ -1948,6 +2006,7 @@ btr_lift_page_up(
MTR_MEMO_PAGE_X_FIX)); MTR_MEMO_PAGE_X_FIX));
father_page = buf_frame_align( father_page = buf_frame_align(
btr_page_get_father_node_ptr(tree, page, mtr)); btr_page_get_father_node_ptr(tree, page, mtr));
father_page_zip = buf_block_get_page_zip(buf_block_align(father_page));
page_level = btr_page_get_level(page, mtr); page_level = btr_page_get_level(page, mtr);
index = UT_LIST_GET_FIRST(tree->tree_indexes); index = UT_LIST_GET_FIRST(tree->tree_indexes);
@ -1955,15 +2014,31 @@ btr_lift_page_up(
btr_search_drop_page_hash_index(page); btr_search_drop_page_hash_index(page);
/* Make the father empty */ /* Make the father empty */
btr_page_empty(father_page, mtr); btr_page_empty(father_page, NULL, mtr);
/* Move records to the father */ /* Move records to the father */
page_copy_rec_list_end(father_page, page, page_get_infimum_rec(page), if (!page_copy_rec_list_end(father_page, NULL,
index, mtr); page_get_infimum_rec(page), index, mtr)) {
lock_update_copy_and_discard(father_page, page); ut_error;
}
btr_page_set_level(father_page, page_level, mtr); btr_page_set_level(father_page, page_level, mtr);
if (UNIV_LIKELY_NULL(father_page_zip)) {
if (UNIV_UNLIKELY(!page_zip_compress(
father_page_zip, father_page))) {
/* Restore the old page from temporary space */
if (UNIV_UNLIKELY(!page_zip_decompress(
father_page_zip, father_page, mtr))) {
ut_error; /* probably memory corruption */
}
return(FALSE);
}
}
lock_update_copy_and_discard(father_page, page);
/* Free the file page */ /* Free the file page */
btr_page_free(tree, page, mtr); btr_page_free(tree, page, mtr);
@ -1971,6 +2046,8 @@ btr_lift_page_up(
ibuf_reset_free_bits(index, father_page); ibuf_reset_free_bits(index, father_page);
ut_ad(page_validate(father_page, index)); ut_ad(page_validate(father_page, index));
ut_ad(btr_check_node_ptr(tree, father_page, mtr)); ut_ad(btr_check_node_ptr(tree, father_page, mtr));
return(TRUE);
} }
/***************************************************************** /*****************************************************************
@ -1981,12 +2058,12 @@ conditions, looks at the right brother. If the page is the only one on that
level lifts the records of the page to the father page, thus reducing the level lifts the records of the page to the father page, thus reducing the
tree height. It is assumed that mtr holds an x-latch on the tree and on the tree height. It is assumed that mtr holds an x-latch on the tree and on the
page. If cursor is on the leaf level, mtr must also hold x-latches to the page. If cursor is on the leaf level, mtr must also hold x-latches to the
brothers, if they exist. NOTE: it is assumed that the caller has reserved brothers, if they exist. */
enough free extents so that the compression will always succeed if done! */
void ibool
btr_compress( btr_compress(
/*=========*/ /*=========*/
/* out: TRUE on success */
btr_cur_t* cursor, /* in: cursor on the page to merge or lift; btr_cur_t* cursor, /* in: cursor on the page to merge or lift;
the page must not be empty: in record delete the page must not be empty: in record delete
use btr_discard_page if the page would become use btr_discard_page if the page would become
@ -2001,20 +2078,16 @@ btr_compress(
page_t* father_page; page_t* father_page;
ibool is_left; ibool is_left;
page_t* page; page_t* page;
rec_t* orig_pred;
rec_t* orig_succ;
rec_t* node_ptr; rec_t* node_ptr;
ulint data_size; ulint data_size;
ulint n_recs; ulint n_recs;
ulint max_ins_size; ulint max_ins_size;
ulint max_ins_size_reorg; ulint max_ins_size_reorg;
ulint level; ulint level;
ulint comp;
page = btr_cur_get_page(cursor); page = btr_cur_get_page(cursor);
tree = btr_cur_get_tree(cursor); tree = btr_cur_get_tree(cursor);
comp = page_is_comp(page); ut_a((ibool)!!page_is_comp(page) == cursor->index->table->comp);
ut_a((ibool)!!comp == cursor->index->table->comp);
ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree), ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree),
MTR_MEMO_X_LOCK)); MTR_MEMO_X_LOCK));
@ -2030,34 +2103,33 @@ btr_compress(
right_page_no); */ right_page_no); */
node_ptr = btr_page_get_father_node_ptr(tree, page, mtr); node_ptr = btr_page_get_father_node_ptr(tree, page, mtr);
ut_ad(!comp || rec_get_status(node_ptr) == REC_STATUS_NODE_PTR); ut_ad(!page_is_comp(page)
|| rec_get_status(node_ptr) == REC_STATUS_NODE_PTR);
father_page = buf_frame_align(node_ptr); father_page = buf_frame_align(node_ptr);
ut_a(comp == page_is_comp(father_page)); ut_a(page_is_comp(page) == page_is_comp(father_page));
/* Decide the page to which we try to merge and which will inherit /* Decide the page to which we try to merge and which will inherit
the locks */ the locks */
if (left_page_no != FIL_NULL) { is_left = left_page_no != FIL_NULL;
if (is_left) {
is_left = TRUE;
merge_page = btr_page_get(space, left_page_no, RW_X_LATCH, merge_page = btr_page_get(space, left_page_no, RW_X_LATCH,
mtr); mtr);
} else if (right_page_no != FIL_NULL) { } else if (right_page_no != FIL_NULL) {
is_left = FALSE;
merge_page = btr_page_get(space, right_page_no, RW_X_LATCH, merge_page = btr_page_get(space, right_page_no, RW_X_LATCH,
mtr); mtr);
} else { } else {
/* The page is the only one on the level, lift the records /* The page is the only one on the level, lift the records
to the father */ to the father */
btr_lift_page_up(tree, page, mtr); return(btr_lift_page_up(tree, page, mtr));
return;
} }
n_recs = page_get_n_recs(page); n_recs = page_get_n_recs(page);
data_size = page_get_data_size(page); data_size = page_get_data_size(page);
ut_a(page_is_comp(merge_page) == comp); ut_a(page_is_comp(merge_page) == page_is_comp(page));
max_ins_size_reorg = page_get_max_insert_size_after_reorganize( max_ins_size_reorg = page_get_max_insert_size_after_reorganize(
merge_page, n_recs); merge_page, n_recs);
@ -2065,14 +2137,14 @@ btr_compress(
/* No space for merge */ /* No space for merge */
return; return(FALSE);
} }
ut_ad(page_validate(merge_page, cursor->index)); ut_ad(page_validate(merge_page, cursor->index));
max_ins_size = page_get_max_insert_size(merge_page, n_recs); max_ins_size = page_get_max_insert_size(merge_page, n_recs);
if (data_size > max_ins_size) { if (UNIV_UNLIKELY(data_size > max_ins_size)) {
/* We have to reorganize merge_page */ /* We have to reorganize merge_page */
@ -2083,13 +2155,14 @@ btr_compress(
ut_ad(page_validate(merge_page, cursor->index)); ut_ad(page_validate(merge_page, cursor->index));
ut_ad(page_get_max_insert_size(merge_page, n_recs) ut_ad(page_get_max_insert_size(merge_page, n_recs)
== max_ins_size_reorg); == max_ins_size_reorg);
}
if (data_size > max_ins_size) { if (UNIV_UNLIKELY(data_size > max_ins_size)) {
/* Add fault tolerance, though this should never happen */ /* Add fault tolerance, though this should
never happen */
return; return(FALSE);
}
} }
btr_search_drop_page_hash_index(page); btr_search_drop_page_hash_index(page);
@ -2118,17 +2191,27 @@ btr_compress(
/* Move records to the merge page */ /* Move records to the merge page */
if (is_left) { if (is_left) {
orig_pred = page_rec_get_prev( rec_t* orig_pred = page_rec_get_prev(
page_get_supremum_rec(merge_page)); page_get_supremum_rec(merge_page));
page_copy_rec_list_start(merge_page, page, if (UNIV_UNLIKELY(!page_copy_rec_list_start(
page_get_supremum_rec(page), cursor->index, mtr); merge_page, buf_block_get_page_zip(
buf_block_align(merge_page)),
page_get_supremum_rec(page),
cursor->index, mtr))) {
return(FALSE);
}
lock_update_merge_left(merge_page, orig_pred, page); lock_update_merge_left(merge_page, orig_pred, page);
} else { } else {
orig_succ = page_rec_get_next( rec_t* orig_succ = page_rec_get_next(
page_get_infimum_rec(merge_page)); page_get_infimum_rec(merge_page));
page_copy_rec_list_end(merge_page, page, if (UNIV_UNLIKELY(!page_copy_rec_list_end(
page_get_infimum_rec(page), cursor->index, mtr); merge_page, buf_block_get_page_zip(
buf_block_align(merge_page)),
page_get_infimum_rec(page),
cursor->index, mtr))) {
return(FALSE);
}
lock_update_merge_right(orig_succ, page); lock_update_merge_right(orig_succ, page);
} }
@ -2143,6 +2226,7 @@ btr_compress(
btr_page_free(tree, page, mtr); btr_page_free(tree, page, mtr);
ut_ad(btr_check_node_ptr(tree, merge_page, mtr)); ut_ad(btr_check_node_ptr(tree, merge_page, mtr));
return(TRUE);
} }
/***************************************************************** /*****************************************************************
@ -2155,7 +2239,6 @@ btr_discard_only_page_on_level(
page_t* page, /* in: page which is the only on its level */ page_t* page, /* in: page which is the only on its level */
mtr_t* mtr) /* in: mtr */ mtr_t* mtr) /* in: mtr */
{ {
rec_t* node_ptr;
page_t* father_page; page_t* father_page;
ulint page_level; ulint page_level;
@ -2165,8 +2248,8 @@ btr_discard_only_page_on_level(
MTR_MEMO_PAGE_X_FIX)); MTR_MEMO_PAGE_X_FIX));
btr_search_drop_page_hash_index(page); btr_search_drop_page_hash_index(page);
node_ptr = btr_page_get_father_node_ptr(tree, page, mtr); father_page = buf_frame_align(
father_page = buf_frame_align(node_ptr); btr_page_get_father_node_ptr(tree, page, mtr));
page_level = btr_page_get_level(page, mtr); page_level = btr_page_get_level(page, mtr);
@ -2177,10 +2260,13 @@ btr_discard_only_page_on_level(
/* Free the file page */ /* Free the file page */
btr_page_free(tree, page, mtr); btr_page_free(tree, page, mtr);
if (buf_frame_get_page_no(father_page) == dict_tree_get_page(tree)) { if (UNIV_UNLIKELY(buf_frame_get_page_no(father_page)
== dict_tree_get_page(tree))) {
/* The father is the root page */ /* The father is the root page */
btr_page_empty(father_page, mtr); btr_page_empty(father_page,
buf_block_get_page_zip(buf_block_align(father_page)),
mtr);
/* We play safe and reset the free bits for the father */ /* We play safe and reset the free bits for the father */
ibuf_reset_free_bits(UT_LIST_GET_FIRST(tree->tree_indexes), ibuf_reset_free_bits(UT_LIST_GET_FIRST(tree->tree_indexes),
@ -2209,7 +2295,6 @@ btr_discard_page(
ulint left_page_no; ulint left_page_no;
ulint right_page_no; ulint right_page_no;
page_t* merge_page; page_t* merge_page;
ibool is_left;
page_t* page; page_t* page;
rec_t* node_ptr; rec_t* node_ptr;
@ -2229,11 +2314,9 @@ btr_discard_page(
right_page_no = btr_page_get_next(page, mtr); right_page_no = btr_page_get_next(page, mtr);
if (left_page_no != FIL_NULL) { if (left_page_no != FIL_NULL) {
is_left = TRUE;
merge_page = btr_page_get(space, left_page_no, RW_X_LATCH, merge_page = btr_page_get(space, left_page_no, RW_X_LATCH,
mtr); mtr);
} else if (right_page_no != FIL_NULL) { } else if (right_page_no != FIL_NULL) {
is_left = FALSE;
merge_page = btr_page_get(space, right_page_no, RW_X_LATCH, merge_page = btr_page_get(space, right_page_no, RW_X_LATCH,
mtr); mtr);
} else { } else {
@ -2249,12 +2332,21 @@ btr_discard_page(
/* We have to mark the leftmost node pointer on the right /* We have to mark the leftmost node pointer on the right
side page as the predefined minimum record */ side page as the predefined minimum record */
page_zip_des_t* merge_page_zip;
merge_page_zip = buf_block_get_page_zip(
buf_block_align(merge_page));
if (UNIV_LIKELY_NULL(merge_page_zip)
&& UNIV_UNLIKELY(!page_zip_alloc(
merge_page_zip, merge_page, 5))) {
ut_error; /* TODO: handle this gracefully */
}
node_ptr = page_rec_get_next(page_get_infimum_rec(merge_page)); node_ptr = page_rec_get_next(page_get_infimum_rec(merge_page));
ut_ad(page_rec_is_user_rec(node_ptr)); ut_ad(page_rec_is_user_rec(node_ptr));
btr_set_min_rec_mark(node_ptr, page_is_comp(merge_page), mtr); btr_set_min_rec_mark(node_ptr, merge_page_zip, mtr);
} }
btr_node_ptr_delete(tree, page, mtr); btr_node_ptr_delete(tree, page, mtr);
@ -2262,7 +2354,7 @@ btr_discard_page(
/* Remove the page from the level list */ /* Remove the page from the level list */
btr_level_list_remove(tree, page, mtr); btr_level_list_remove(tree, page, mtr);
if (is_left) { if (left_page_no != FIL_NULL) {
lock_update_discard(page_get_supremum_rec(merge_page), page); lock_update_discard(page_get_supremum_rec(merge_page), page);
} else { } else {
lock_update_discard(page_rec_get_next( lock_update_discard(page_rec_get_next(

View file

@ -24,6 +24,7 @@ Created 10/16/1994 Heikki Tuuri
#endif #endif
#include "page0page.h" #include "page0page.h"
#include "page0zip.h"
#include "rem0rec.h" #include "rem0rec.h"
#include "rem0cmp.h" #include "rem0cmp.h"
#include "btr0btr.h" #include "btr0btr.h"
@ -115,6 +116,35 @@ btr_rec_get_externally_stored_len(
rec_t* rec, /* in: record */ rec_t* rec, /* in: record */
const ulint* offsets);/* in: array returned by rec_get_offsets() */ const ulint* offsets);/* in: array returned by rec_get_offsets() */
/**********************************************************
The following function is used to set the deleted bit of a record. */
UNIV_INLINE
ibool
btr_rec_set_deleted_flag(
/*=====================*/
/* out: TRUE on success;
FALSE on page_zip overflow */
rec_t* rec, /* in/out: physical record */
page_zip_des_t* page_zip,/* in/out: compressed page (or NULL) */
ulint flag) /* in: nonzero if delete marked */
{
if (page_rec_is_comp(rec)) {
if (UNIV_LIKELY_NULL(page_zip)
&& UNIV_UNLIKELY(!page_zip_alloc(page_zip,
ut_align_down(rec, UNIV_PAGE_SIZE), 5))) {
rec_set_deleted_flag_new(rec, NULL, flag);
return(FALSE);
}
rec_set_deleted_flag_new(rec, page_zip, flag);
} else {
ut_ad(!page_zip);
rec_set_deleted_flag_old(rec, flag);
}
return(TRUE);
}
/*==================== B-TREE SEARCH =========================*/ /*==================== B-TREE SEARCH =========================*/
/************************************************************************ /************************************************************************
@ -405,19 +435,6 @@ btr_cur_search_to_nth_level(
/* Loop and search until we arrive at the desired level */ /* Loop and search until we arrive at the desired level */
for (;;) { for (;;) {
if ((height == 0) && (latch_mode <= BTR_MODIFY_LEAF)) {
rw_latch = latch_mode;
if (insert_planned && ibuf_should_try(index,
ignore_sec_unique)) {
/* Try insert to the insert buffer if the
page is not in the buffer pool */
buf_mode = BUF_GET_IF_IN_POOL;
}
}
retry_page_get: retry_page_get:
page = buf_page_get_gen(space, page_no, rw_latch, guess, page = buf_page_get_gen(space, page_no, rw_latch, guess,
buf_mode, buf_mode,
@ -460,7 +477,7 @@ retry_page_get:
ut_ad(0 == ut_dulint_cmp(tree->id, ut_ad(0 == ut_dulint_cmp(tree->id,
btr_page_get_index_id(page))); btr_page_get_index_id(page)));
if (height == ULINT_UNDEFINED) { if (UNIV_UNLIKELY(height == ULINT_UNDEFINED)) {
/* We are in the root node */ /* We are in the root node */
height = btr_page_get_level(page, mtr); height = btr_page_get_level(page, mtr);
@ -522,6 +539,21 @@ retry_page_get:
ut_ad(height > 0); ut_ad(height > 0);
height--; height--;
if ((height == 0) && (latch_mode <= BTR_MODIFY_LEAF)) {
rw_latch = latch_mode;
if (insert_planned && ibuf_should_try(index,
ignore_sec_unique)) {
/* Try insert to the insert buffer if the
page is not in the buffer pool */
buf_mode = BUF_GET_IF_IN_POOL;
}
}
guess = NULL; guess = NULL;
node_ptr = page_cur_get_rec(page_cursor); node_ptr = page_cur_get_rec(page_cursor);
@ -788,6 +820,7 @@ btr_cur_insert_if_possible(
else NULL */ else NULL */
btr_cur_t* cursor, /* in: cursor on page after which to insert; btr_cur_t* cursor, /* in: cursor on page after which to insert;
cursor stays valid */ cursor stays valid */
page_zip_des_t* page_zip,/* in: compressed page of cursor */
dtuple_t* tuple, /* in: tuple to insert; the size info need not dtuple_t* tuple, /* in: tuple to insert; the size info need not
have been stored to tuple */ have been stored to tuple */
ibool* reorg, /* out: TRUE if reorganization occurred */ ibool* reorg, /* out: TRUE if reorganization occurred */
@ -808,9 +841,10 @@ btr_cur_insert_if_possible(
page_cursor = btr_cur_get_page_cur(cursor); page_cursor = btr_cur_get_page_cur(cursor);
/* Now, try the insert */ /* Now, try the insert */
rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr); rec = page_cur_tuple_insert(page_cursor, page_zip,
tuple, cursor->index, mtr);
if (!rec) { if (UNIV_UNLIKELY(!rec)) {
/* If record did not fit, reorganize */ /* If record did not fit, reorganize */
btr_page_reorganize(page, cursor->index, mtr); btr_page_reorganize(page, cursor->index, mtr);
@ -820,8 +854,8 @@ btr_cur_insert_if_possible(
page_cur_search(page, cursor->index, tuple, page_cur_search(page, cursor->index, tuple,
PAGE_CUR_LE, page_cursor); PAGE_CUR_LE, page_cursor);
rec = page_cur_tuple_insert(page_cursor, tuple, rec = page_cur_tuple_insert(page_cursor, page_zip,
cursor->index, mtr); tuple, cursor->index, mtr);
} }
return(rec); return(rec);
@ -935,6 +969,7 @@ btr_cur_optimistic_insert(
dict_index_t* index; dict_index_t* index;
page_cur_t* page_cursor; page_cur_t* page_cursor;
page_t* page; page_t* page;
page_zip_des_t* page_zip;
ulint max_size; ulint max_size;
rec_t* dummy_rec; rec_t* dummy_rec;
ulint level; ulint level;
@ -1033,9 +1068,10 @@ calculate_sizes_again:
reorg = FALSE; reorg = FALSE;
/* Now, try the insert */ /* Now, try the insert */
page_zip = buf_block_get_page_zip(buf_block_align(page));
*rec = page_cur_insert_rec_low(page_cursor, entry, index, *rec = page_cur_insert_rec_low(page_cursor, page_zip,
NULL, NULL, mtr); entry, index, NULL, NULL, mtr);
if (UNIV_UNLIKELY(!(*rec))) { if (UNIV_UNLIKELY(!(*rec))) {
/* If the record did not fit, reorganize */ /* If the record did not fit, reorganize */
btr_page_reorganize(page, index, mtr); btr_page_reorganize(page, index, mtr);
@ -1046,9 +1082,15 @@ calculate_sizes_again:
page_cur_search(page, index, entry, PAGE_CUR_LE, page_cursor); page_cur_search(page, index, entry, PAGE_CUR_LE, page_cursor);
*rec = page_cur_tuple_insert(page_cursor, entry, index, mtr); *rec = page_cur_tuple_insert(page_cursor, page_zip,
entry, index, mtr);
if (UNIV_UNLIKELY(!*rec)) { if (UNIV_UNLIKELY(!*rec)) {
if (UNIV_LIKELY_NULL(page_zip)) {
/* Likely a compressed page overflow */
return(DB_FAIL);
}
fputs("InnoDB: Error: cannot insert tuple ", stderr); fputs("InnoDB: Error: cannot insert tuple ", stderr);
dtuple_print(stderr, entry); dtuple_print(stderr, entry);
fputs(" into ", stderr); fputs(" into ", stderr);
@ -1343,7 +1385,8 @@ btr_cur_parse_update_in_place(
/* out: end of log record or NULL */ /* out: end of log record or NULL */
byte* ptr, /* in: buffer */ byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */ byte* end_ptr,/* in: buffer end */
page_t* page, /* in: page or NULL */ page_t* page, /* in/out: page or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
dict_index_t* index) /* in: index corresponding to page */ dict_index_t* index) /* in: index corresponding to page */
{ {
ulint flags; ulint flags;
@ -1399,12 +1442,19 @@ btr_cur_parse_update_in_place(
offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap); offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
if (!(flags & BTR_KEEP_SYS_FLAG)) { if (!(flags & BTR_KEEP_SYS_FLAG)) {
row_upd_rec_sys_fields_in_recovery(rec, offsets, row_upd_rec_sys_fields_in_recovery(rec, page_zip, offsets,
pos, trx_id, roll_ptr); pos, trx_id, roll_ptr);
} }
row_upd_rec_in_place(rec, offsets, update); row_upd_rec_in_place(rec, offsets, update);
if (UNIV_LIKELY_NULL(page_zip)) {
btr_cur_unmark_extern_fields(rec, NULL, offsets);
page_zip_write(page_zip, rec - rec_offs_extra_size(offsets),
rec_offs_size(offsets));
}
func_exit: func_exit:
mem_heap_free(heap); mem_heap_free(heap);
@ -1431,6 +1481,7 @@ btr_cur_update_in_place(
{ {
dict_index_t* index; dict_index_t* index;
buf_block_t* block; buf_block_t* block;
page_zip_des_t* page_zip;
ulint err; ulint err;
rec_t* rec; rec_t* rec;
dulint roll_ptr = ut_dulint_zero; dulint roll_ptr = ut_dulint_zero;
@ -1465,8 +1516,12 @@ btr_cur_update_in_place(
} }
block = buf_block_align(rec); block = buf_block_align(rec);
ut_ad(!!page_is_comp(buf_block_get_frame(block))
== index->table->comp); page_zip = buf_block_get_page_zip(block);
if (UNIV_UNLIKELY(!page_zip_alloc(page_zip, buf_block_get_frame(block),
4 + rec_offs_size(offsets)))) {
return(DB_OVERFLOW);
}
if (block->is_hashed) { if (block->is_hashed) {
/* The function row_upd_changes_ord_field_binary works only /* The function row_upd_changes_ord_field_binary works only
@ -1484,7 +1539,8 @@ btr_cur_update_in_place(
} }
if (!(flags & BTR_KEEP_SYS_FLAG)) { if (!(flags & BTR_KEEP_SYS_FLAG)) {
row_upd_rec_sys_fields(rec, index, offsets, trx, roll_ptr); row_upd_rec_sys_fields(rec, NULL,
index, offsets, trx, roll_ptr);
} }
/* FIXME: in a mixed tree, all records may not have enough ordering /* FIXME: in a mixed tree, all records may not have enough ordering
@ -1506,9 +1562,22 @@ btr_cur_update_in_place(
/* The new updated record owns its possible externally /* The new updated record owns its possible externally
stored fields */ stored fields */
if (UNIV_LIKELY_NULL(page_zip)) {
/* Do not log the btr_cur_unmark_extern_fields()
if the page is compressed. Do the operation in
crash recovery of MLOG_COMP_REC_UPDATE_IN_PLACE
in that case. */
mtr = NULL;
}
btr_cur_unmark_extern_fields(rec, mtr, offsets); btr_cur_unmark_extern_fields(rec, mtr, offsets);
} }
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write(page_zip, rec - rec_offs_extra_size(offsets),
rec_offs_size(offsets));
}
if (UNIV_LIKELY_NULL(heap)) { if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap); mem_heap_free(heap);
} }
@ -1543,7 +1612,10 @@ btr_cur_optimistic_update(
page_cur_t* page_cursor; page_cur_t* page_cursor;
ulint err; ulint err;
page_t* page; page_t* page;
page_zip_des_t* page_zip;
page_zip_des_t* page_zip_used;
rec_t* rec; rec_t* rec;
rec_t* orig_rec;
ulint max_size; ulint max_size;
ulint new_rec_size; ulint new_rec_size;
ulint old_rec_size; ulint old_rec_size;
@ -1556,7 +1628,7 @@ btr_cur_optimistic_update(
ulint* offsets; ulint* offsets;
page = btr_cur_get_page(cursor); page = btr_cur_get_page(cursor);
rec = btr_cur_get_rec(cursor); orig_rec = rec = btr_cur_get_rec(cursor);
index = cursor->index; index = cursor->index;
ut_ad(!!page_rec_is_comp(rec) == index->table->comp); ut_ad(!!page_rec_is_comp(rec) == index->table->comp);
@ -1663,7 +1735,18 @@ btr_cur_optimistic_update(
btr_search_update_hash_on_delete(cursor); btr_search_update_hash_on_delete(cursor);
page_cur_delete_rec(page_cursor, index, offsets, mtr); page_zip = buf_block_get_page_zip(buf_block_align(page));
if (UNIV_LIKELY(!page_zip)
|| UNIV_UNLIKELY(!page_zip_available(page_zip, 32))) {
/* If there is not enough space in the page
modification log, ignore the log and
try compressing the page afterwards. */
page_zip_used = NULL;
} else {
page_zip_used = page_zip;
}
page_cur_delete_rec(page_cursor, index, offsets, page_zip_used, mtr);
page_cur_move_to_prev(page_cursor); page_cur_move_to_prev(page_cursor);
@ -1676,7 +1759,8 @@ btr_cur_optimistic_update(
trx->id); trx->id);
} }
rec = btr_cur_insert_if_possible(cursor, new_entry, &reorganized, mtr); rec = btr_cur_insert_if_possible(cursor, page_zip_used,
new_entry, &reorganized, mtr);
ut_a(rec); /* <- We calculated above the insert would fit */ ut_a(rec); /* <- We calculated above the insert would fit */
@ -1689,6 +1773,22 @@ btr_cur_optimistic_update(
btr_cur_unmark_extern_fields(rec, mtr, offsets); btr_cur_unmark_extern_fields(rec, mtr, offsets);
} }
if (UNIV_LIKELY_NULL(page_zip) && UNIV_UNLIKELY(!page_zip_used)) {
if (!page_zip_compress(page_zip, page)) {
if (UNIV_UNLIKELY(!page_zip_decompress(
page_zip, page, mtr))) {
ut_error;
}
/* TODO: is this correct? */
lock_rec_restore_from_page_infimum(orig_rec, page);
mem_heap_free(heap);
return(DB_OVERFLOW);
}
}
/* Restore the old explicit lock state on the record */ /* Restore the old explicit lock state on the record */
lock_rec_restore_from_page_infimum(rec, page); lock_rec_restore_from_page_infimum(rec, page);
@ -1768,6 +1868,7 @@ btr_cur_pessimistic_update(
big_rec_t* dummy_big_rec; big_rec_t* dummy_big_rec;
dict_index_t* index; dict_index_t* index;
page_t* page; page_t* page;
page_zip_des_t* page_zip;
dict_tree_t* tree; dict_tree_t* tree;
rec_t* rec; rec_t* rec;
page_cur_t* page_cursor; page_cur_t* page_cursor;
@ -1790,6 +1891,7 @@ btr_cur_pessimistic_update(
*big_rec = NULL; *big_rec = NULL;
page = btr_cur_get_page(cursor); page = btr_cur_get_page(cursor);
page_zip = buf_block_get_page_zip(buf_block_align(page));
rec = btr_cur_get_rec(cursor); rec = btr_cur_get_rec(cursor);
index = cursor->index; index = cursor->index;
tree = index->tree; tree = index->tree;
@ -1906,11 +2008,11 @@ btr_cur_pessimistic_update(
btr_search_update_hash_on_delete(cursor); btr_search_update_hash_on_delete(cursor);
page_cur_delete_rec(page_cursor, index, offsets, mtr); page_cur_delete_rec(page_cursor, index, offsets, page_zip, mtr);
page_cur_move_to_prev(page_cursor); page_cur_move_to_prev(page_cursor);
rec = btr_cur_insert_if_possible(cursor, new_entry, rec = btr_cur_insert_if_possible(cursor, page_zip, new_entry,
&dummy_reorganized, mtr); &dummy_reorganized, mtr);
ut_a(rec || optim_err != DB_UNDERFLOW); ut_a(rec || optim_err != DB_UNDERFLOW);
@ -2045,8 +2147,9 @@ btr_cur_parse_del_mark_set_clust_rec(
/* out: end of log record or NULL */ /* out: end of log record or NULL */
byte* ptr, /* in: buffer */ byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */ byte* end_ptr,/* in: buffer end */
dict_index_t* index, /* in: index corresponding to page */ page_t* page, /* in/out: page or NULL */
page_t* page) /* in: page or NULL */ page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
dict_index_t* index) /* in: index corresponding to page */
{ {
ulint flags; ulint flags;
ulint val; ulint val;
@ -2088,12 +2191,24 @@ btr_cur_parse_del_mark_set_clust_rec(
if (page) { if (page) {
rec = page + offset; rec = page + offset;
/* We do not need to reserve btr_search_latch, as the page
is only being recovered, and there cannot be a hash index to
it. */
if (UNIV_UNLIKELY(!btr_rec_set_deleted_flag(rec,
page_zip, val))) {
/* page_zip overflow should have been detected
before writing MLOG_COMP_REC_CLUST_DELETE_MARK */
ut_error;
}
if (!(flags & BTR_KEEP_SYS_FLAG)) { if (!(flags & BTR_KEEP_SYS_FLAG)) {
mem_heap_t* heap = NULL; mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE]; ulint offsets_[REC_OFFS_NORMAL_SIZE];
*offsets_ = (sizeof offsets_) / sizeof *offsets_; *offsets_ = (sizeof offsets_) / sizeof *offsets_;
row_upd_rec_sys_fields_in_recovery(rec, /* TODO: page_zip_write(whole record)? */
row_upd_rec_sys_fields_in_recovery(rec, page_zip,
rec_get_offsets(rec, index, offsets_, rec_get_offsets(rec, index, offsets_,
ULINT_UNDEFINED, &heap), ULINT_UNDEFINED, &heap),
pos, trx_id, roll_ptr); pos, trx_id, roll_ptr);
@ -2101,12 +2216,6 @@ btr_cur_parse_del_mark_set_clust_rec(
mem_heap_free(heap); mem_heap_free(heap);
} }
} }
/* We do not need to reserve btr_search_latch, as the page
is only being recovered, and there cannot be a hash index to
it. */
rec_set_deleted_flag(rec, page_is_comp(page), val);
} }
return(ptr); return(ptr);
@ -2134,6 +2243,7 @@ btr_cur_del_mark_set_clust_rec(
dulint roll_ptr; dulint roll_ptr;
ulint err; ulint err;
rec_t* rec; rec_t* rec;
page_zip_des_t* page_zip;
trx_t* trx; trx_t* trx;
mem_heap_t* heap = NULL; mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE]; ulint offsets_[REC_OFFS_NORMAL_SIZE];
@ -2155,15 +2265,28 @@ btr_cur_del_mark_set_clust_rec(
ut_ad(index->type & DICT_CLUSTERED); ut_ad(index->type & DICT_CLUSTERED);
ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets))); ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
page_zip = buf_block_get_page_zip(buf_block_align(rec));
if (UNIV_LIKELY_NULL(page_zip)) {
ulint size = 5;
if (!(flags & BTR_KEEP_SYS_FLAG)) {
size += 21;/* row_upd_rec_sys_fields() */
}
if (UNIV_UNLIKELY(!page_zip_alloc(page_zip,
ut_align_down(rec, UNIV_PAGE_SIZE), size))) {
err = DB_OVERFLOW;
goto func_exit;
}
}
err = lock_clust_rec_modify_check_and_lock(flags, err = lock_clust_rec_modify_check_and_lock(flags,
rec, index, offsets, thr); rec, index, offsets, thr);
if (err != DB_SUCCESS) { if (err != DB_SUCCESS) {
if (UNIV_LIKELY_NULL(heap)) { goto func_exit;
mem_heap_free(heap);
}
return(err);
} }
err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr, err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr,
@ -2171,10 +2294,7 @@ btr_cur_del_mark_set_clust_rec(
&roll_ptr); &roll_ptr);
if (err != DB_SUCCESS) { if (err != DB_SUCCESS) {
if (UNIV_LIKELY_NULL(heap)) { goto func_exit;
mem_heap_free(heap);
}
return(err);
} }
block = buf_block_align(rec); block = buf_block_align(rec);
@ -2183,12 +2303,13 @@ btr_cur_del_mark_set_clust_rec(
rw_lock_x_lock(&btr_search_latch); rw_lock_x_lock(&btr_search_latch);
} }
rec_set_deleted_flag(rec, rec_offs_comp(offsets), val); btr_rec_set_deleted_flag(rec, page_zip, val);
trx = thr_get_trx(thr); trx = thr_get_trx(thr);
if (!(flags & BTR_KEEP_SYS_FLAG)) { if (!(flags & BTR_KEEP_SYS_FLAG)) {
row_upd_rec_sys_fields(rec, index, offsets, trx, roll_ptr); row_upd_rec_sys_fields(rec, page_zip,
index, offsets, trx, roll_ptr);
} }
if (block->is_hashed) { if (block->is_hashed) {
@ -2197,10 +2318,12 @@ btr_cur_del_mark_set_clust_rec(
btr_cur_del_mark_set_clust_rec_log(flags, rec, index, val, trx, btr_cur_del_mark_set_clust_rec_log(flags, rec, index, val, trx,
roll_ptr, mtr); roll_ptr, mtr);
func_exit:
if (UNIV_LIKELY_NULL(heap)) { if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap); mem_heap_free(heap);
} }
return(DB_SUCCESS); return(err);
} }
/******************************************************************** /********************************************************************
@ -2246,7 +2369,8 @@ btr_cur_parse_del_mark_set_sec_rec(
/* out: end of log record or NULL */ /* out: end of log record or NULL */
byte* ptr, /* in: buffer */ byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */ byte* end_ptr,/* in: buffer end */
page_t* page) /* in: page or NULL */ page_t* page, /* in/out: page or NULL */
page_zip_des_t* page_zip)/* in/out: compressed page, or NULL */
{ {
ulint val; ulint val;
ulint offset; ulint offset;
@ -2272,7 +2396,10 @@ btr_cur_parse_del_mark_set_sec_rec(
is only being recovered, and there cannot be a hash index to is only being recovered, and there cannot be a hash index to
it. */ it. */
rec_set_deleted_flag(rec, page_is_comp(page), val); if (!btr_rec_set_deleted_flag(rec, page_zip, val)) {
/* page_zip overflow should have been detected
before writing MLOG_COMP_REC_SEC_DELETE_MARK */
}
} }
return(ptr); return(ptr);
@ -2293,6 +2420,7 @@ btr_cur_del_mark_set_sec_rec(
mtr_t* mtr) /* in: mtr */ mtr_t* mtr) /* in: mtr */
{ {
buf_block_t* block; buf_block_t* block;
page_zip_des_t* page_zip;
rec_t* rec; rec_t* rec;
ulint err; ulint err;
@ -2316,13 +2444,15 @@ btr_cur_del_mark_set_sec_rec(
block = buf_block_align(rec); block = buf_block_align(rec);
ut_ad(!!page_is_comp(buf_block_get_frame(block)) ut_ad(!!page_is_comp(buf_block_get_frame(block))
== cursor->index->table->comp); == cursor->index->table->comp);
page_zip = buf_block_get_page_zip(block);
if (block->is_hashed) { if (block->is_hashed) {
rw_lock_x_lock(&btr_search_latch); rw_lock_x_lock(&btr_search_latch);
} }
rec_set_deleted_flag(rec, page_is_comp(buf_block_get_frame(block)), if (!btr_rec_set_deleted_flag(rec, page_zip, val)) {
val); ut_error; /* TODO */
}
if (block->is_hashed) { if (block->is_hashed) {
rw_lock_x_unlock(&btr_search_latch); rw_lock_x_unlock(&btr_search_latch);
@ -2344,40 +2474,16 @@ btr_cur_del_unmark_for_ibuf(
mtr_t* mtr) /* in: mtr */ mtr_t* mtr) /* in: mtr */
{ {
/* We do not need to reserve btr_search_latch, as the page has just /* We do not need to reserve btr_search_latch, as the page has just
been read to the buffer pool and there cannot be a hash index to it. */ been read to the buffer pool and there cannot be a hash index to it. */
rec_set_deleted_flag(rec, page_is_comp(buf_frame_align(rec)), FALSE); btr_rec_set_deleted_flag(rec, NULL, FALSE);
btr_cur_del_mark_set_sec_rec_log(rec, FALSE, mtr); btr_cur_del_mark_set_sec_rec_log(rec, FALSE, mtr);
} }
/*==================== B-TREE RECORD REMOVE =========================*/ /*==================== B-TREE RECORD REMOVE =========================*/
/*****************************************************************
Tries to compress a page of the tree on the leaf level. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. To avoid
deadlocks, mtr must also own x-latches to brothers of page, if those
brothers exist. NOTE: it is assumed that the caller has reserved enough
free extents so that the compression will always succeed if done! */
void
btr_cur_compress(
/*=============*/
btr_cur_t* cursor, /* in: cursor on the page to compress;
cursor does not stay valid */
mtr_t* mtr) /* in: mtr */
{
ut_ad(mtr_memo_contains(mtr,
dict_tree_get_lock(btr_cur_get_tree(cursor)),
MTR_MEMO_X_LOCK));
ut_ad(mtr_memo_contains(mtr, buf_block_align(
btr_cur_get_page(cursor)),
MTR_MEMO_PAGE_X_FIX));
ut_ad(btr_page_get_level(btr_cur_get_page(cursor), mtr) == 0);
btr_compress(cursor, mtr);
}
/***************************************************************** /*****************************************************************
Tries to compress a page of the tree if it seems useful. It is assumed Tries to compress a page of the tree if it seems useful. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. To avoid that mtr holds an x-latch on the tree and on the cursor page. To avoid
@ -2403,9 +2509,7 @@ btr_cur_compress_if_useful(
if (btr_cur_compress_recommendation(cursor, mtr)) { if (btr_cur_compress_recommendation(cursor, mtr)) {
btr_compress(cursor, mtr); return(btr_compress(cursor, mtr));
return(TRUE);
} }
return(FALSE); return(FALSE);
@ -2454,17 +2558,41 @@ btr_cur_optimistic_delete(
if (no_compress_needed) { if (no_compress_needed) {
page_zip_des_t* page_zip;
page_zip_des_t* page_zip_used;
lock_update_delete(rec); lock_update_delete(rec);
btr_search_update_hash_on_delete(cursor); btr_search_update_hash_on_delete(cursor);
max_ins_size = page_get_max_insert_size_after_reorganize(page, max_ins_size = page_get_max_insert_size_after_reorganize(page,
1); 1);
page_zip = buf_block_get_page_zip(
buf_block_align(btr_cur_get_page(cursor)));
if (UNIV_LIKELY(!page_zip)
|| UNIV_UNLIKELY(!page_zip_available(page_zip, 32))) {
/* If there is not enough space in the page
modification log, ignore the log and
try compressing the page afterwards. */
page_zip_used = NULL;
} else {
page_zip_used = page_zip;
}
page_cur_delete_rec(btr_cur_get_page_cur(cursor), page_cur_delete_rec(btr_cur_get_page_cur(cursor),
cursor->index, offsets, mtr); cursor->index, offsets,
page_zip_used, mtr);
ibuf_update_free_bits_low(cursor->index, page, max_ins_size, ibuf_update_free_bits_low(cursor->index, page, max_ins_size,
mtr); mtr);
if (UNIV_LIKELY_NULL(page_zip)
&& UNIV_UNLIKELY(!page_zip_used)) {
/* Reorganize the page to ensure that the
compression succeeds after deleting the record. */
btr_page_reorganize(page, cursor->index, mtr);
}
} }
if (UNIV_LIKELY_NULL(heap)) { if (UNIV_LIKELY_NULL(heap)) {
@ -2503,6 +2631,8 @@ btr_cur_pessimistic_delete(
mtr_t* mtr) /* in: mtr */ mtr_t* mtr) /* in: mtr */
{ {
page_t* page; page_t* page;
page_zip_des_t* page_zip;
page_zip_des_t* page_zip_used;
dict_tree_t* tree; dict_tree_t* tree;
rec_t* rec; rec_t* rec;
dtuple_t* node_ptr; dtuple_t* node_ptr;
@ -2546,7 +2676,7 @@ btr_cur_pessimistic_delete(
/* Free externally stored fields if the record is neither /* Free externally stored fields if the record is neither
a node pointer nor in two-byte format. a node pointer nor in two-byte format.
This avoids an unnecessary loop. */ This condition avoids an unnecessary loop. */
if (page_is_comp(page) if (page_is_comp(page)
? !rec_get_node_ptr_flag(rec) ? !rec_get_node_ptr_flag(rec)
: !rec_get_1byte_offs_flag(rec)) { : !rec_get_1byte_offs_flag(rec)) {
@ -2569,6 +2699,14 @@ btr_cur_pessimistic_delete(
goto return_after_reservations; goto return_after_reservations;
} }
page_zip = buf_block_get_page_zip(buf_block_align(page));
if (UNIV_LIKELY(!page_zip)
|| UNIV_UNLIKELY(!page_zip_available(page_zip, 32))) {
page_zip_used = NULL;
} else {
page_zip_used = page_zip;
}
lock_update_delete(rec); lock_update_delete(rec);
level = btr_page_get_level(page, mtr); level = btr_page_get_level(page, mtr);
@ -2584,8 +2722,13 @@ btr_cur_pessimistic_delete(
non-leaf level, we must mark the new leftmost node non-leaf level, we must mark the new leftmost node
pointer as the predefined minimum record */ pointer as the predefined minimum record */
btr_set_min_rec_mark(next_rec, page_is_comp(page), if (UNIV_LIKELY_NULL(page_zip_used)
mtr); && UNIV_UNLIKELY(!page_zip_available(
page_zip_used, 5 + 32))) {
page_zip_used = NULL;
}
btr_set_min_rec_mark(next_rec, page_zip_used, mtr);
} else { } else {
/* Otherwise, if we delete the leftmost node pointer /* Otherwise, if we delete the leftmost node pointer
on a page, we have to change the father node pointer on a page, we have to change the father node pointer
@ -2607,10 +2750,16 @@ btr_cur_pessimistic_delete(
btr_search_update_hash_on_delete(cursor); btr_search_update_hash_on_delete(cursor);
page_cur_delete_rec(btr_cur_get_page_cur(cursor), cursor->index, page_cur_delete_rec(btr_cur_get_page_cur(cursor), cursor->index,
offsets, mtr); offsets, page_zip_used, mtr);
ut_ad(btr_check_node_ptr(tree, page, mtr)); ut_ad(btr_check_node_ptr(tree, page, mtr));
if (UNIV_LIKELY_NULL(page_zip) && UNIV_UNLIKELY(!page_zip_used)) {
/* Reorganize the page to ensure that the
compression succeeds after deleting the record. */
btr_page_reorganize(page, cursor->index, mtr);
}
*err = DB_SUCCESS; *err = DB_SUCCESS;
return_after_reservations: return_after_reservations:
@ -3038,7 +3187,7 @@ btr_cur_set_ownership_of_extern_field(
const ulint* offsets,/* in: array returned by rec_get_offsets() */ const ulint* offsets,/* in: array returned by rec_get_offsets() */
ulint i, /* in: field number */ ulint i, /* in: field number */
ibool val, /* in: value to set */ ibool val, /* in: value to set */
mtr_t* mtr) /* in: mtr */ mtr_t* mtr) /* in: mtr, or NULL if not logged */
{ {
byte* data; byte* data;
ulint local_len; ulint local_len;
@ -3058,8 +3207,12 @@ btr_cur_set_ownership_of_extern_field(
byte_val = byte_val | BTR_EXTERN_OWNER_FLAG; byte_val = byte_val | BTR_EXTERN_OWNER_FLAG;
} }
mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, byte_val, if (UNIV_LIKELY(mtr != NULL)) {
mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, byte_val,
MLOG_1BYTE, mtr); MLOG_1BYTE, mtr);
} else {
mach_write_to_1(data + local_len + BTR_EXTERN_LEN, byte_val);
}
} }
/*********************************************************************** /***********************************************************************
@ -3074,9 +3227,8 @@ btr_cur_mark_extern_inherited_fields(
rec_t* rec, /* in: record in a clustered index */ rec_t* rec, /* in: record in a clustered index */
const ulint* offsets,/* in: array returned by rec_get_offsets() */ const ulint* offsets,/* in: array returned by rec_get_offsets() */
upd_t* update, /* in: update vector */ upd_t* update, /* in: update vector */
mtr_t* mtr) /* in: mtr */ mtr_t* mtr) /* in: mtr, or NULL if not logged */
{ {
ibool is_updated;
ulint n; ulint n;
ulint j; ulint j;
ulint i; ulint i;
@ -3089,22 +3241,22 @@ btr_cur_mark_extern_inherited_fields(
if (rec_offs_nth_extern(offsets, i)) { if (rec_offs_nth_extern(offsets, i)) {
/* Check it is not in updated fields */ /* Check it is not in updated fields */
is_updated = FALSE;
if (update) { if (update) {
for (j = 0; j < upd_get_n_fields(update); for (j = 0; j < upd_get_n_fields(update);
j++) { j++) {
if (upd_get_nth_field(update, j) if (upd_get_nth_field(update, j)
->field_no == i) { ->field_no == i) {
is_updated = TRUE;
goto updated;
} }
} }
} }
if (!is_updated) { btr_cur_set_ownership_of_extern_field(rec,
btr_cur_set_ownership_of_extern_field(rec, offsets, i, FALSE, mtr);
offsets, i, FALSE, mtr); updated:
} ;
} }
} }
} }
@ -3176,7 +3328,7 @@ void
btr_cur_unmark_extern_fields( btr_cur_unmark_extern_fields(
/*=========================*/ /*=========================*/
rec_t* rec, /* in: record in a clustered index */ rec_t* rec, /* in: record in a clustered index */
mtr_t* mtr, /* in: mtr */ mtr_t* mtr, /* in: mtr, or NULL if not logged */
const ulint* offsets)/* in: array returned by rec_get_offsets() */ const ulint* offsets)/* in: array returned by rec_get_offsets() */
{ {
ulint n; ulint n;
@ -3188,8 +3340,8 @@ btr_cur_unmark_extern_fields(
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
if (rec_offs_nth_extern(offsets, i)) { if (rec_offs_nth_extern(offsets, i)) {
btr_cur_set_ownership_of_extern_field(rec, offsets, i, btr_cur_set_ownership_of_extern_field(rec,
TRUE, mtr); offsets, i, TRUE, mtr);
} }
} }
} }
@ -3468,7 +3620,7 @@ btr_store_big_rec_extern_fields(
rec_set_nth_field_extern_bit(rec, index, rec_set_nth_field_extern_bit(rec, index,
big_rec_vec->fields[i].field_no, big_rec_vec->fields[i].field_no,
TRUE, &mtr); &mtr);
} }
prev_page_no = page_no; prev_page_no = page_no;

View file

@ -37,6 +37,7 @@ Created 11/5/1995 Heikki Tuuri
#include "log0log.h" #include "log0log.h"
#include "trx0undo.h" #include "trx0undo.h"
#include "srv0srv.h" #include "srv0srv.h"
#include "page0zip.h"
/* /*
IMPLEMENTATION OF THE BUFFER POOL IMPLEMENTATION OF THE BUFFER POOL
@ -482,6 +483,8 @@ buf_block_init(
block->n_pointers = 0; block->n_pointers = 0;
page_zip_des_init(&block->page_zip);
rw_lock_create(&(block->lock)); rw_lock_create(&(block->lock));
ut_ad(rw_lock_validate(&(block->lock))); ut_ad(rw_lock_validate(&(block->lock)));

View file

@ -264,9 +264,11 @@ buf_flush_buffered_writes(void)
"InnoDB: before posting to the doublewrite buffer.\n"); "InnoDB: before posting to the doublewrite buffer.\n");
} }
if (block->check_index_page_at_flush if (!block->check_index_page_at_flush) {
&& !page_simple_validate(block->frame)) { } else if (page_is_comp(block->frame)
&& UNIV_UNLIKELY(!page_simple_validate_new(
block->frame))) {
corrupted_page:
buf_page_print(block->frame); buf_page_print(block->frame);
ut_print_timestamp(stderr); ut_print_timestamp(stderr);
@ -278,6 +280,10 @@ buf_flush_buffered_writes(void)
(ulong) block->offset, (ulong) block->space); (ulong) block->offset, (ulong) block->space);
ut_error; ut_error;
} else if (UNIV_UNLIKELY(!page_simple_validate_old(
block->frame))) {
goto corrupted_page;
} }
} }

View file

@ -2761,6 +2761,7 @@ ibuf_insert(
ut_ad(dtuple_check_typed(entry)); ut_ad(dtuple_check_typed(entry));
ut_a(!(index->type & DICT_CLUSTERED)); ut_a(!(index->type & DICT_CLUSTERED));
ut_a(!index->table->zip);
if (rec_get_converted_size(index, entry) if (rec_get_converted_size(index, entry)
>= page_get_free_space_of_empty(index->table->comp) / 2) { >= page_get_free_space_of_empty(index->table->comp) / 2) {
@ -2846,9 +2847,10 @@ ibuf_insert_to_index_page(
btr_cur_del_unmark_for_ibuf(rec, mtr); btr_cur_del_unmark_for_ibuf(rec, mtr);
} else { } else {
rec = page_cur_tuple_insert(&page_cur, entry, index, mtr); rec = page_cur_tuple_insert(&page_cur, NULL,
entry, index, mtr);
if (rec == NULL) { if (UNIV_UNLIKELY(rec == NULL)) {
/* If the record did not fit, reorganize */ /* If the record did not fit, reorganize */
btr_page_reorganize(page, index, mtr); btr_page_reorganize(page, index, mtr);
@ -2858,7 +2860,8 @@ ibuf_insert_to_index_page(
/* This time the record must fit */ /* This time the record must fit */
if (UNIV_UNLIKELY(!page_cur_tuple_insert( if (UNIV_UNLIKELY(!page_cur_tuple_insert(
&page_cur, entry, index, mtr))) { &page_cur, NULL,
entry, index, mtr))) {
ut_print_timestamp(stderr); ut_print_timestamp(stderr);

View file

@ -265,9 +265,10 @@ Sets a record as the predefined minimum record. */
void void
btr_set_min_rec_mark( btr_set_min_rec_mark(
/*=================*/ /*=================*/
rec_t* rec, /* in: record */ rec_t* rec, /* in/out: record */
ulint comp, /* in: nonzero=compact page format */ page_zip_des_t* page_zip,/* in/out: compressed page with
mtr_t* mtr); /* in: mtr */ at least 5 bytes available, or NULL */
mtr_t* mtr); /* in: mtr */
/***************************************************************** /*****************************************************************
Deletes on the upper level the node pointer to a page. */ Deletes on the upper level the node pointer to a page. */
@ -295,11 +296,12 @@ conditions, looks at the right brother. If the page is the only one on that
level lifts the records of the page to the father page, thus reducing the level lifts the records of the page to the father page, thus reducing the
tree height. It is assumed that mtr holds an x-latch on the tree and on the tree height. It is assumed that mtr holds an x-latch on the tree and on the
page. If cursor is on the leaf level, mtr must also hold x-latches to page. If cursor is on the leaf level, mtr must also hold x-latches to
the brothers, if they exist. NOTE: it is assumed that the caller has reserved the brothers, if they exist. */
enough free extents so that the compression will always succeed if done! */
void ibool
btr_compress( btr_compress(
/*=========*/ /*=========*/
/* out: TRUE on success */
btr_cur_t* cursor, /* in: cursor on the page to merge or lift; btr_cur_t* cursor, /* in: cursor on the page to merge or lift;
the page must not be empty: in record delete the page must not be empty: in record delete
use btr_discard_page if the page would become use btr_discard_page if the page would become

View file

@ -286,19 +286,6 @@ btr_cur_del_unmark_for_ibuf(
rec_t* rec, /* in: record to delete unmark */ rec_t* rec, /* in: record to delete unmark */
mtr_t* mtr); /* in: mtr */ mtr_t* mtr); /* in: mtr */
/***************************************************************** /*****************************************************************
Tries to compress a page of the tree on the leaf level. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. To avoid
deadlocks, mtr must also own x-latches to brothers of page, if those
brothers exist. NOTE: it is assumed that the caller has reserved enough
free extents so that the compression will always succeed if done! */
void
btr_cur_compress(
/*=============*/
btr_cur_t* cursor, /* in: cursor on the page to compress;
cursor does not stay valid */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Tries to compress a page of the tree if it seems useful. It is assumed Tries to compress a page of the tree if it seems useful. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. To avoid that mtr holds an x-latch on the tree and on the cursor page. To avoid
deadlocks, mtr must also own x-latches to brothers of page, if those deadlocks, mtr must also own x-latches to brothers of page, if those
@ -364,7 +351,8 @@ btr_cur_parse_update_in_place(
/* out: end of log record or NULL */ /* out: end of log record or NULL */
byte* ptr, /* in: buffer */ byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */ byte* end_ptr,/* in: buffer end */
page_t* page, /* in: page or NULL */ page_t* page, /* in/out: page or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
dict_index_t* index); /* in: index corresponding to page */ dict_index_t* index); /* in: index corresponding to page */
/******************************************************************** /********************************************************************
Parses the redo log record for delete marking or unmarking of a clustered Parses the redo log record for delete marking or unmarking of a clustered
@ -376,8 +364,9 @@ btr_cur_parse_del_mark_set_clust_rec(
/* out: end of log record or NULL */ /* out: end of log record or NULL */
byte* ptr, /* in: buffer */ byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */ byte* end_ptr,/* in: buffer end */
dict_index_t* index, /* in: index corresponding to page */ page_t* page, /* in/out: page or NULL */
page_t* page); /* in: page or NULL */ page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
dict_index_t* index); /* in: index corresponding to page */
/******************************************************************** /********************************************************************
Parses the redo log record for delete marking or unmarking of a secondary Parses the redo log record for delete marking or unmarking of a secondary
index record. */ index record. */
@ -388,7 +377,8 @@ btr_cur_parse_del_mark_set_sec_rec(
/* out: end of log record or NULL */ /* out: end of log record or NULL */
byte* ptr, /* in: buffer */ byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */ byte* end_ptr,/* in: buffer end */
page_t* page); /* in: page or NULL */ page_t* page, /* in/out: page or NULL */
page_zip_des_t* page_zip);/* in/out: compressed page, or NULL */
/*********************************************************************** /***********************************************************************
Estimates the number of rows in a given index range. */ Estimates the number of rows in a given index range. */

View file

@ -31,6 +31,7 @@ Created 11/5/1995 Heikki Tuuri
#include "hash0hash.h" #include "hash0hash.h"
#include "ut0byte.h" #include "ut0byte.h"
#include "os0proc.h" #include "os0proc.h"
#include "page0types.h"
/* Flags for flush types */ /* Flags for flush types */
#define BUF_FLUSH_LRU 1 #define BUF_FLUSH_LRU 1
@ -612,6 +613,14 @@ buf_block_get_page_no(
/*==================*/ /*==================*/
/* out: page number */ /* out: page number */
buf_block_t* block); /* in: pointer to the control block */ buf_block_t* block); /* in: pointer to the control block */
/*************************************************************************
Gets the compressed page descriptor of a block if applicable. */
UNIV_INLINE
page_zip_des_t*
buf_block_get_page_zip(
/*===================*/
/* out: compressed page descriptor, or NULL */
buf_block_t* block); /* in: pointer to the control block */
/*********************************************************************** /***********************************************************************
Gets the block to whose frame the pointer is pointing to. */ Gets the block to whose frame the pointer is pointing to. */
UNIV_INLINE UNIV_INLINE
@ -850,6 +859,7 @@ struct buf_block_struct{
ulint curr_side; /* BTR_SEARCH_LEFT_SIDE or ulint curr_side; /* BTR_SEARCH_LEFT_SIDE or
BTR_SEARCH_RIGHT_SIDE in hash BTR_SEARCH_RIGHT_SIDE in hash
indexing */ indexing */
page_zip_des_t page_zip; /* compressed page info */
/* 6. Debug fields */ /* 6. Debug fields */
#ifdef UNIV_SYNC_DEBUG #ifdef UNIV_SYNC_DEBUG
rw_lock_t debug_latch; /* in the debug version, each thread rw_lock_t debug_latch; /* in the debug version, each thread

View file

@ -191,6 +191,24 @@ buf_block_get_page_no(
return(block->offset); return(block->offset);
} }
/*************************************************************************
Gets the compressed page descriptor of a block if applicable. */
UNIV_INLINE
page_zip_des_t*
buf_block_get_page_zip(
/*===================*/
/* out: compressed page descriptor, or NULL */
buf_block_t* block) /* in: pointer to the control block */
{
ut_ad(block);
if (UNIV_LIKELY_NULL(block->page_zip.data)) {
return(&block->page_zip);
}
return(NULL);
}
/*********************************************************************** /***********************************************************************
Gets the block to whose frame the pointer is pointing to. */ Gets the block to whose frame the pointer is pointing to. */
UNIV_INLINE UNIV_INLINE
@ -614,8 +632,6 @@ buf_page_release(
RW_NO_LATCH */ RW_NO_LATCH */
mtr_t* mtr) /* in: mtr */ mtr_t* mtr) /* in: mtr */
{ {
ulint buf_fix_count;
ut_ad(block); ut_ad(block);
mutex_enter_fast(&(buf_pool->mutex)); mutex_enter_fast(&(buf_pool->mutex));
@ -631,8 +647,7 @@ buf_page_release(
#ifdef UNIV_SYNC_DEBUG #ifdef UNIV_SYNC_DEBUG
rw_lock_s_unlock(&(block->debug_latch)); rw_lock_s_unlock(&(block->debug_latch));
#endif #endif
buf_fix_count = block->buf_fix_count; block->buf_fix_count--;
block->buf_fix_count = buf_fix_count - 1;
mutex_exit(&(buf_pool->mutex)); mutex_exit(&(buf_pool->mutex));

View file

@ -329,6 +329,7 @@ struct dict_table_struct{
user calls DISCARD TABLESPACE on this table, user calls DISCARD TABLESPACE on this table,
and reset to FALSE in IMPORT TABLESPACE */ and reset to FALSE in IMPORT TABLESPACE */
ibool comp; /* flag: TRUE=compact page format */ ibool comp; /* flag: TRUE=compact page format */
ibool zip; /* flag: TRUE=compressed page format */
hash_node_t name_hash; /* hash chain node */ hash_node_t name_hash; /* hash chain node */
hash_node_t id_hash; /* hash chain node */ hash_node_t id_hash; /* hash chain node */
ulint n_def; /* number of columns defined so far */ ulint n_def; /* number of columns defined so far */

View file

@ -88,6 +88,7 @@ ibuf_should_try(
decide */ decide */
{ {
if (!(index->type & DICT_CLUSTERED) if (!(index->type & DICT_CLUSTERED)
&& !index->table->zip
&& (ignore_sec_unique || !(index->type & DICT_UNIQUE)) && (ignore_sec_unique || !(index->type & DICT_UNIQUE))
&& ibuf->meter > IBUF_THRESHOLD) { && ibuf->meter > IBUF_THRESHOLD) {

View file

@ -129,8 +129,11 @@ flag value must give the length also! */
/* copy compact record list end /* copy compact record list end
to a new created index page */ to a new created index page */
#define MLOG_COMP_PAGE_REORGANIZE ((byte)46) /* reorganize an index page */ #define MLOG_COMP_PAGE_REORGANIZE ((byte)46) /* reorganize an index page */
#define MLOG_COMP_DECOMPRESS ((byte)47) /* decompress a page
to undo a compressed page
overflow */
#define MLOG_BIGGEST_TYPE ((byte)46) /* biggest value (used in #define MLOG_BIGGEST_TYPE ((byte)47) /* biggest value (used in
asserts) */ asserts) */
/******************************************************************* /*******************************************************************

View file

@ -130,6 +130,8 @@ page_cur_tuple_insert(
/* out: pointer to record if succeed, NULL /* out: pointer to record if succeed, NULL
otherwise */ otherwise */
page_cur_t* cursor, /* in: a page cursor */ page_cur_t* cursor, /* in: a page cursor */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
25 + rec_size bytes available, or NULL */
dtuple_t* tuple, /* in: pointer to a data tuple */ dtuple_t* tuple, /* in: pointer to a data tuple */
dict_index_t* index, /* in: record descriptor */ dict_index_t* index, /* in: record descriptor */
mtr_t* mtr); /* in: mini-transaction handle */ mtr_t* mtr); /* in: mini-transaction handle */
@ -144,6 +146,8 @@ page_cur_rec_insert(
/* out: pointer to record if succeed, NULL /* out: pointer to record if succeed, NULL
otherwise */ otherwise */
page_cur_t* cursor, /* in: a page cursor */ page_cur_t* cursor, /* in: a page cursor */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
25 + rec_size bytes available, or NULL */
rec_t* rec, /* in: record to insert */ rec_t* rec, /* in: record to insert */
dict_index_t* index, /* in: record descriptor */ dict_index_t* index, /* in: record descriptor */
ulint* offsets,/* in: rec_get_offsets(rec, index) */ ulint* offsets,/* in: rec_get_offsets(rec, index) */
@ -160,6 +164,8 @@ page_cur_insert_rec_low(
/* out: pointer to record if succeed, NULL /* out: pointer to record if succeed, NULL
otherwise */ otherwise */
page_cur_t* cursor, /* in: a page cursor */ page_cur_t* cursor, /* in: a page cursor */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
25 + rec_size bytes available, or NULL */
dtuple_t* tuple, /* in: pointer to a data tuple or NULL */ dtuple_t* tuple, /* in: pointer to a data tuple or NULL */
dict_index_t* index, /* in: record descriptor */ dict_index_t* index, /* in: record descriptor */
rec_t* rec, /* in: pointer to a physical record or NULL */ rec_t* rec, /* in: pointer to a physical record or NULL */
@ -173,7 +179,6 @@ void
page_copy_rec_list_end_to_created_page( page_copy_rec_list_end_to_created_page(
/*===================================*/ /*===================================*/
page_t* new_page, /* in: index page to copy to */ page_t* new_page, /* in: index page to copy to */
page_t* page, /* in: index page */
rec_t* rec, /* in: first record to copy */ rec_t* rec, /* in: first record to copy */
dict_index_t* index, /* in: record descriptor */ dict_index_t* index, /* in: record descriptor */
mtr_t* mtr); /* in: mtr */ mtr_t* mtr); /* in: mtr */
@ -184,9 +189,11 @@ next record after the deleted one. */
void void
page_cur_delete_rec( page_cur_delete_rec(
/*================*/ /*================*/
page_cur_t* cursor, /* in: a page cursor */ page_cur_t* cursor, /* in/out: a page cursor */
dict_index_t* index, /* in: record descriptor */ dict_index_t* index, /* in: record descriptor */
const ulint* offsets,/* in: rec_get_offsets(cursor->rec, index) */ const ulint* offsets,/* in: rec_get_offsets(cursor->rec, index) */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
32 bytes available, or NULL */
mtr_t* mtr); /* in: mini-transaction handle */ mtr_t* mtr); /* in: mini-transaction handle */
/******************************************************************** /********************************************************************
Searches the right position for a page cursor. */ Searches the right position for a page cursor. */
@ -245,7 +252,9 @@ page_cur_parse_insert_rec(
byte* ptr, /* in: buffer */ byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */ byte* end_ptr,/* in: buffer end */
dict_index_t* index, /* in: record descriptor */ dict_index_t* index, /* in: record descriptor */
page_t* page, /* in: page or NULL */ page_t* page, /* in/out: page or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
25 + rec_size bytes available, or NULL */
mtr_t* mtr); /* in: mtr or NULL */ mtr_t* mtr); /* in: mtr or NULL */
/************************************************************** /**************************************************************
Parses a log record of copying a record list end to a new created page. */ Parses a log record of copying a record list end to a new created page. */
@ -257,7 +266,8 @@ page_parse_copy_rec_list_to_created_page(
byte* ptr, /* in: buffer */ byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */ byte* end_ptr,/* in: buffer end */
dict_index_t* index, /* in: record descriptor */ dict_index_t* index, /* in: record descriptor */
page_t* page, /* in: page or NULL */ page_t* page, /* in/out: page or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page or NULL */
mtr_t* mtr); /* in: mtr or NULL */ mtr_t* mtr); /* in: mtr or NULL */
/*************************************************************** /***************************************************************
Parses log record of a record delete on a page. */ Parses log record of a record delete on a page. */
@ -269,7 +279,9 @@ page_cur_parse_delete_rec(
byte* ptr, /* in: buffer */ byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */ byte* end_ptr,/* in: buffer end */
dict_index_t* index, /* in: record descriptor */ dict_index_t* index, /* in: record descriptor */
page_t* page, /* in: page or NULL */ page_t* page, /* in/out: page or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
32 bytes available, or NULL */
mtr_t* mtr); /* in: mtr or NULL */ mtr_t* mtr); /* in: mtr or NULL */
/* Index page cursor */ /* Index page cursor */

View file

@ -181,11 +181,14 @@ page_cur_tuple_insert(
/* out: pointer to record if succeed, NULL /* out: pointer to record if succeed, NULL
otherwise */ otherwise */
page_cur_t* cursor, /* in: a page cursor */ page_cur_t* cursor, /* in: a page cursor */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
25 + rec_size bytes available, or NULL */
dtuple_t* tuple, /* in: pointer to a data tuple */ dtuple_t* tuple, /* in: pointer to a data tuple */
dict_index_t* index, /* in: record descriptor */ dict_index_t* index, /* in: record descriptor */
mtr_t* mtr) /* in: mini-transaction handle */ mtr_t* mtr) /* in: mini-transaction handle */
{ {
return(page_cur_insert_rec_low(cursor, tuple, index, NULL, NULL, mtr)); return(page_cur_insert_rec_low(cursor, page_zip, tuple,
index, NULL, NULL, mtr));
} }
/*************************************************************** /***************************************************************
@ -199,12 +202,14 @@ page_cur_rec_insert(
/* out: pointer to record if succeed, NULL /* out: pointer to record if succeed, NULL
otherwise */ otherwise */
page_cur_t* cursor, /* in: a page cursor */ page_cur_t* cursor, /* in: a page cursor */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
25 + rec_size bytes available, or NULL */
rec_t* rec, /* in: record to insert */ rec_t* rec, /* in: record to insert */
dict_index_t* index, /* in: record descriptor */ dict_index_t* index, /* in: record descriptor */
ulint* offsets,/* in: rec_get_offsets(rec, index) */ ulint* offsets,/* in: rec_get_offsets(rec, index) */
mtr_t* mtr) /* in: mini-transaction handle */ mtr_t* mtr) /* in: mini-transaction handle */
{ {
return(page_cur_insert_rec_low(cursor, NULL, index, rec, return(page_cur_insert_rec_low(cursor, page_zip, NULL,
offsets, mtr)); index, rec, offsets, mtr));
} }

View file

@ -145,7 +145,7 @@ Sets the max trx id field value. */
void void
page_set_max_trx_id( page_set_max_trx_id(
/*================*/ /*================*/
page_t* page, /* in: page */ page_t* page, /* in/out: page */
dulint trx_id);/* in: transaction id */ dulint trx_id);/* in: transaction id */
/***************************************************************** /*****************************************************************
Sets the max trx id field value if trx_id is bigger than the previous Sets the max trx id field value if trx_id is bigger than the previous
@ -154,8 +154,8 @@ UNIV_INLINE
void void
page_update_max_trx_id( page_update_max_trx_id(
/*===================*/ /*===================*/
page_t* page, /* in: page */ page_t* page, /* in/out: page */
dulint trx_id); /* in: transaction id */ dulint trx_id);/* in: transaction id */
/***************************************************************** /*****************************************************************
Reads the given header field. */ Reads the given header field. */
UNIV_INLINE UNIV_INLINE
@ -170,9 +170,10 @@ UNIV_INLINE
void void
page_header_set_field( page_header_set_field(
/*==================*/ /*==================*/
page_t* page, /* in: page */ page_t* page, /* in/out: page */
ulint field, /* in: PAGE_N_DIR_SLOTS, ... */ page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
ulint val); /* in: value */ ulint field, /* in: PAGE_N_DIR_SLOTS, ... */
ulint val); /* in: value */
/***************************************************************** /*****************************************************************
Returns the pointer stored in the given header field. */ Returns the pointer stored in the given header field. */
UNIV_INLINE UNIV_INLINE
@ -188,9 +189,10 @@ UNIV_INLINE
void void
page_header_set_ptr( page_header_set_ptr(
/*================*/ /*================*/
page_t* page, /* in: page */ page_t* page, /* in/out: page */
ulint field, /* in: PAGE_FREE, ... */ page_zip_des_t* page_zip,/* in: compressed page, or NULL */
byte* ptr); /* in: pointer or NULL*/ ulint field, /* in/out: PAGE_FREE, ... */
const byte* ptr); /* in: pointer or NULL*/
/***************************************************************** /*****************************************************************
Resets the last insert info field in the page header. Writes to mlog Resets the last insert info field in the page header. Writes to mlog
about this operation. */ about this operation. */
@ -283,8 +285,9 @@ UNIV_INLINE
void void
page_dir_set_n_heap( page_dir_set_n_heap(
/*================*/ /*================*/
page_t* page, /* in: index page */ page_t* page, /* in/out: index page */
ulint n_heap);/* in: number of records */ page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
ulint n_heap);/* in: number of records */
/***************************************************************** /*****************************************************************
Gets the number of dir slots in directory. */ Gets the number of dir slots in directory. */
UNIV_INLINE UNIV_INLINE
@ -299,9 +302,9 @@ UNIV_INLINE
void void
page_dir_set_n_slots( page_dir_set_n_slots(
/*=================*/ /*=================*/
/* out: number of slots */ page_t* page, /* in/out: page */
page_t* page, /* in: index page */ page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
ulint n_slots);/* in: number of slots */ ulint n_slots);/* in: number of slots */
/***************************************************************** /*****************************************************************
Gets pointer to nth directory slot. */ Gets pointer to nth directory slot. */
UNIV_INLINE UNIV_INLINE
@ -349,9 +352,10 @@ UNIV_INLINE
void void
page_dir_slot_set_n_owned( page_dir_slot_set_n_owned(
/*======================*/ /*======================*/
page_dir_slot_t* slot, /* in: directory slot */ page_dir_slot_t*slot, /* in/out: directory slot */
ulint n); /* in: number of records owned page_zip_des_t* page_zip,/* in/out: compressed page with
by the slot */ at least 5 bytes available, or NULL */
ulint n); /* in: number of records owned by the slot */
/**************************************************************** /****************************************************************
Calculates the space reserved for directory slots of a given Calculates the space reserved for directory slots of a given
number of records. The exact value is a fraction number number of records. The exact value is a fraction number
@ -402,10 +406,12 @@ UNIV_INLINE
void void
page_rec_set_next( page_rec_set_next(
/*==============*/ /*==============*/
rec_t* rec, /* in: pointer to record, must not be rec_t* rec, /* in: pointer to record,
page supremum */ must not be page supremum */
rec_t* next); /* in: pointer to next record, must not rec_t* next, /* in: pointer to next record,
be page infimum */ must not be page infimum */
page_zip_des_t* page_zip);/* in/out: compressed page with at least
6 bytes available, or NULL */
/**************************************************************** /****************************************************************
Gets the pointer to the previous record. */ Gets the pointer to the previous record. */
UNIV_INLINE UNIV_INLINE
@ -513,6 +519,16 @@ page_get_free_space_of_empty(
/* out: free space */ /* out: free space */
ulint comp) /* in: nonzero=compact page format */ ulint comp) /* in: nonzero=compact page format */
__attribute__((const)); __attribute__((const));
/**************************************************************
Returns the base extra size of a physical record. This is the
size of the fixed header, independent of the record size. */
UNIV_INLINE
ulint
page_rec_get_base_extra_size(
/*=========================*/
/* out: REC_N_NEW_EXTRA_BYTES
or REC_N_OLD_EXTRA_BYTES */
const rec_t* rec); /* in: physical record */
/**************************************************************** /****************************************************************
Returns the sum of the sizes of the records in the record list Returns the sum of the sizes of the records in the record list
excluding the infimum and supremum records. */ excluding the infimum and supremum records. */
@ -530,7 +546,8 @@ page_mem_alloc(
/*===========*/ /*===========*/
/* out: pointer to start of allocated /* out: pointer to start of allocated
buffer, or NULL if allocation fails */ buffer, or NULL if allocation fails */
page_t* page, /* in: index page */ page_t* page, /* in/out: index page */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
ulint need, /* in: number of bytes needed */ ulint need, /* in: number of bytes needed */
dict_index_t* index, /* in: record descriptor */ dict_index_t* index, /* in: record descriptor */
ulint* heap_no);/* out: this contains the heap number ulint* heap_no);/* out: this contains the heap number
@ -542,7 +559,9 @@ UNIV_INLINE
void void
page_mem_free( page_mem_free(
/*==========*/ /*==========*/
page_t* page, /* in: index page */ page_t* page, /* in/out: index page */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
6 bytes available, or NULL */
rec_t* rec, /* in: pointer to the (origin of) record */ rec_t* rec, /* in: pointer to the (origin of) record */
const ulint* offsets);/* in: array returned by rec_get_offsets() */ const ulint* offsets);/* in: array returned by rec_get_offsets() */
/************************************************************** /**************************************************************
@ -554,17 +573,17 @@ page_create(
/* out: pointer to the page */ /* out: pointer to the page */
buf_frame_t* frame, /* in: a buffer frame where the page is buf_frame_t* frame, /* in: a buffer frame where the page is
created */ created */
page_zip_des_t* page_zip, /* in/out: compressed page, or NULL */
mtr_t* mtr, /* in: mini-transaction handle */ mtr_t* mtr, /* in: mini-transaction handle */
ulint comp); /* in: nonzero=compact page format */ ulint comp); /* in: nonzero=compact page format */
/***************************************************************** /*****************************************************************
Differs from page_copy_rec_list_end, because this function does not Differs from page_copy_rec_list_end, because this function does not
touch the lock table and max trx id on page. */ touch the lock table and max trx id on page or compress the page. */
void void
page_copy_rec_list_end_no_locks( page_copy_rec_list_end_no_locks(
/*============================*/ /*============================*/
page_t* new_page, /* in: index page to copy to */ page_t* new_page, /* in: index page to copy to */
page_t* page, /* in: index page */
rec_t* rec, /* in: record on page */ rec_t* rec, /* in: record on page */
dict_index_t* index, /* in: record descriptor */ dict_index_t* index, /* in: record descriptor */
mtr_t* mtr); /* in: mtr */ mtr_t* mtr); /* in: mtr */
@ -573,27 +592,31 @@ Copies records from page to new_page, from the given record onward,
including that record. Infimum and supremum records are not copied. including that record. Infimum and supremum records are not copied.
The records are copied to the start of the record list on new_page. */ The records are copied to the start of the record list on new_page. */
void ibool
page_copy_rec_list_end( page_copy_rec_list_end(
/*===================*/ /*===================*/
page_t* new_page, /* in: index page to copy to */ /* out: TRUE on success */
page_t* page, /* in: index page */ page_t* new_page, /* in/out: index page to copy to */
page_zip_des_t* new_page_zip, /* in/out: compressed page, or NULL */
rec_t* rec, /* in: record on page */ rec_t* rec, /* in: record on page */
dict_index_t* index, /* in: record descriptor */ dict_index_t* index, /* in: record descriptor */
mtr_t* mtr); /* in: mtr */ mtr_t* mtr) /* in: mtr */
__attribute__((warn_unused_result, nonnull(1, 3, 4, 5)));
/***************************************************************** /*****************************************************************
Copies records from page to new_page, up to the given record, NOT Copies records from page to new_page, up to the given record, NOT
including that record. Infimum and supremum records are not copied. including that record. Infimum and supremum records are not copied.
The records are copied to the end of the record list on new_page. */ The records are copied to the end of the record list on new_page. */
void ibool
page_copy_rec_list_start( page_copy_rec_list_start(
/*=====================*/ /*=====================*/
page_t* new_page, /* in: index page to copy to */ /* out: TRUE on success */
page_t* page, /* in: index page */ page_t* new_page, /* in/out: index page to copy to */
page_zip_des_t* new_page_zip, /* in/out: compressed page, or NULL */
rec_t* rec, /* in: record on page */ rec_t* rec, /* in: record on page */
dict_index_t* index, /* in: record descriptor */ dict_index_t* index, /* in: record descriptor */
mtr_t* mtr); /* in: mtr */ mtr_t* mtr) /* in: mtr */
__attribute__((warn_unused_result, nonnull(1, 3, 4, 5)));
/***************************************************************** /*****************************************************************
Deletes records from a page from a given record onward, including that record. Deletes records from a page from a given record onward, including that record.
The infimum and supremum records are not deleted. */ The infimum and supremum records are not deleted. */
@ -601,26 +624,16 @@ The infimum and supremum records are not deleted. */
void void
page_delete_rec_list_end( page_delete_rec_list_end(
/*=====================*/ /*=====================*/
page_t* page, /* in: index page */ rec_t* rec, /* in: pointer to record on page */
rec_t* rec, /* in: record on page */
dict_index_t* index, /* in: record descriptor */ dict_index_t* index, /* in: record descriptor */
ulint n_recs, /* in: number of records to delete, ulint n_recs, /* in: number of records to delete,
or ULINT_UNDEFINED if not known */ or ULINT_UNDEFINED if not known */
ulint size, /* in: the sum of the sizes of the ulint size, /* in: the sum of the sizes of the
records in the end of the chain to records in the end of the chain to
delete, or ULINT_UNDEFINED if not known */ delete, or ULINT_UNDEFINED if not known */
mtr_t* mtr); /* in: mtr */ page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
/***************************************************************** mtr_t* mtr) /* in: mtr */
Deletes records from page, up to the given record, NOT including __attribute__((nonnull(1, 2, 6)));
that record. Infimum and supremum records are not deleted. */
void
page_delete_rec_list_start(
/*=======================*/
page_t* page, /* in: index page */
rec_t* rec, /* in: record on page */
dict_index_t* index, /* in: record descriptor */
mtr_t* mtr); /* in: mtr */
/***************************************************************** /*****************************************************************
Moves record list end to another page. Moved records include Moves record list end to another page. Moved records include
split_rec. */ split_rec. */
@ -629,30 +642,25 @@ void
page_move_rec_list_end( page_move_rec_list_end(
/*===================*/ /*===================*/
page_t* new_page, /* in: index page where to move */ page_t* new_page, /* in: index page where to move */
page_t* page, /* in: index page */ page_zip_des_t* new_page_zip, /* in/out: compressed page of
new_page, or NULL */
rec_t* split_rec, /* in: first record to move */ rec_t* split_rec, /* in: first record to move */
page_zip_des_t* page_zip, /* in/out: compressed page of
split_rec, or NULL */
dict_index_t* index, /* in: record descriptor */ dict_index_t* index, /* in: record descriptor */
mtr_t* mtr); /* in: mtr */ mtr_t* mtr) /* in: mtr */
/***************************************************************** __attribute__((nonnull(1, 3, 5, 6)));
Moves record list start to another page. Moved records do not include
split_rec. */
void
page_move_rec_list_start(
/*=====================*/
page_t* new_page, /* in: index page where to move */
page_t* page, /* in: index page */
rec_t* split_rec, /* in: first record not to move */
dict_index_t* index, /* in: record descriptor */
mtr_t* mtr); /* in: mtr */
/******************************************************************** /********************************************************************
Splits a directory slot which owns too many records. */ Splits a directory slot which owns too many records. */
void void
page_dir_split_slot( page_dir_split_slot(
/*================*/ /*================*/
page_t* page, /* in: the index page in question */ page_t* page, /* in: index page */
ulint slot_no); /* in: the directory slot */ page_zip_des_t* page_zip,/* in/out: compressed page with
at least 10 bytes available, or NULL */
ulint slot_no)/* in: the directory slot */
__attribute__((nonnull(1)));
/***************************************************************** /*****************************************************************
Tries to balance the given directory slot with too few records Tries to balance the given directory slot with too few records
with the upper neighbor, so that there are at least the minimum number with the upper neighbor, so that there are at least the minimum number
@ -662,8 +670,11 @@ two slots. */
void void
page_dir_balance_slot( page_dir_balance_slot(
/*==================*/ /*==================*/
page_t* page, /* in: index page */ page_t* page, /* in/out: index page */
ulint slot_no); /* in: the directory slot */ page_zip_des_t* page_zip,/* in/out: compressed page with
at least 15 bytes available, or NULL */
ulint slot_no)/* in: the directory slot */
__attribute__((nonnull(1)));
/************************************************************** /**************************************************************
Parses a log record of a record list end or start deletion. */ Parses a log record of a record list end or start deletion. */
@ -766,10 +777,20 @@ know the index. This is also resilient so that this should never crash
even if the page is total garbage. */ even if the page is total garbage. */
ibool ibool
page_simple_validate( page_simple_validate_old(
/*=================*/ /*=====================*/
/* out: TRUE if ok */ /* out: TRUE if ok */
page_t* page); /* in: index page */ page_t* page); /* in: old-style index page */
/*******************************************************************
This function checks the consistency of an index page when we do not
know the index. This is also resilient so that this should never crash
even if the page is total garbage. */
ibool
page_simple_validate_new(
/*=====================*/
/* out: TRUE if ok */
page_t* page); /* in: new-style index page */
/******************************************************************* /*******************************************************************
This function checks the consistency of an index page. */ This function checks the consistency of an index page. */

View file

@ -35,7 +35,7 @@ UNIV_INLINE
void void
page_update_max_trx_id( page_update_max_trx_id(
/*===================*/ /*===================*/
page_t* page, /* in: page */ page_t* page, /* in/out: page */
dulint trx_id) /* in: transaction id */ dulint trx_id) /* in: transaction id */
{ {
ut_ad(page); ut_ad(page);
@ -67,9 +67,10 @@ UNIV_INLINE
void void
page_header_set_field( page_header_set_field(
/*==================*/ /*==================*/
page_t* page, /* in: page */ page_t* page, /* in/out: page */
ulint field, /* in: PAGE_LEVEL, ... */ page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
ulint val) /* in: value */ ulint field, /* in: PAGE_N_DIR_SLOTS, ... */
ulint val) /* in: value */
{ {
ut_ad(page); ut_ad(page);
ut_ad(field <= PAGE_N_RECS); ut_ad(field <= PAGE_N_RECS);
@ -77,6 +78,9 @@ page_header_set_field(
ut_ad(field != PAGE_N_HEAP || (val & 0x7fff) < UNIV_PAGE_SIZE); ut_ad(field != PAGE_N_HEAP || (val & 0x7fff) < UNIV_PAGE_SIZE);
mach_write_to_2(page + PAGE_HEADER + field, val); mach_write_to_2(page + PAGE_HEADER + field, val);
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write_header(page_zip, page + PAGE_HEADER + field, 2);
}
} }
/***************************************************************** /*****************************************************************
@ -114,9 +118,10 @@ UNIV_INLINE
void void
page_header_set_ptr( page_header_set_ptr(
/*================*/ /*================*/
page_t* page, /* in: page */ page_t* page, /* in: page */
ulint field, /* in: PAGE_FREE, ... */ page_zip_des_t* page_zip,/* in: compressed page, or NULL */
byte* ptr) /* in: pointer or NULL*/ ulint field, /* in: PAGE_FREE, ... */
const byte* ptr) /* in: pointer or NULL*/
{ {
ulint offs; ulint offs;
@ -133,7 +138,7 @@ page_header_set_ptr(
ut_ad((field != PAGE_HEAP_TOP) || offs); ut_ad((field != PAGE_HEAP_TOP) || offs);
page_header_set_field(page, field, offs); page_header_set_field(page, page_zip, field, offs);
} }
/***************************************************************** /*****************************************************************
@ -413,11 +418,11 @@ UNIV_INLINE
void void
page_dir_set_n_slots( page_dir_set_n_slots(
/*=================*/ /*=================*/
/* out: number of slots */ page_t* page, /* in/out: page */
page_t* page, /* in: index page */ page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
ulint n_slots)/* in: number of slots */ ulint n_slots)/* in: number of slots */
{ {
page_header_set_field(page, PAGE_N_DIR_SLOTS, n_slots); page_header_set_field(page, page_zip, PAGE_N_DIR_SLOTS, n_slots);
} }
/***************************************************************** /*****************************************************************
@ -438,12 +443,13 @@ UNIV_INLINE
void void
page_dir_set_n_heap( page_dir_set_n_heap(
/*================*/ /*================*/
page_t* page, /* in: index page */ page_t* page, /* in/out: index page */
ulint n_heap) /* in: number of records */ page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
ulint n_heap) /* in: number of records */
{ {
ut_ad(n_heap < 0x8000); ut_ad(n_heap < 0x8000);
page_header_set_field(page, PAGE_N_HEAP, n_heap | (0x8000 & page_header_set_field(page, page_zip, PAGE_N_HEAP, n_heap | (0x8000 &
page_header_get_field(page, PAGE_N_HEAP))); page_header_get_field(page, PAGE_N_HEAP)));
} }
@ -520,7 +526,11 @@ page_dir_slot_get_n_owned(
page_dir_slot_t* slot) /* in: page directory slot */ page_dir_slot_t* slot) /* in: page directory slot */
{ {
rec_t* rec = page_dir_slot_get_rec(slot); rec_t* rec = page_dir_slot_get_rec(slot);
return(rec_get_n_owned(rec, page_rec_is_comp(rec))); if (page_rec_is_comp(slot)) {
return(rec_get_n_owned_new(rec));
} else {
return(rec_get_n_owned_old(rec));
}
} }
/******************************************************************* /*******************************************************************
@ -529,12 +539,18 @@ UNIV_INLINE
void void
page_dir_slot_set_n_owned( page_dir_slot_set_n_owned(
/*======================*/ /*======================*/
page_dir_slot_t* slot, /* in: directory slot */ page_dir_slot_t*slot, /* in/out: directory slot */
ulint n) /* in: number of records owned page_zip_des_t* page_zip,/* in/out: compressed page with
by the slot */ at least 5 bytes available, or NULL */
ulint n) /* in: number of records owned by the slot */
{ {
rec_t* rec = page_dir_slot_get_rec(slot); rec_t* rec = page_dir_slot_get_rec(slot);
rec_set_n_owned(rec, page_rec_is_comp(rec), n); if (page_rec_is_comp(slot)) {
rec_set_n_owned_new(rec, page_zip, n);
} else {
ut_ad(!page_zip);
rec_set_n_owned_old(rec, n);
}
} }
/**************************************************************** /****************************************************************
@ -597,26 +613,29 @@ UNIV_INLINE
void void
page_rec_set_next( page_rec_set_next(
/*==============*/ /*==============*/
rec_t* rec, /* in: pointer to record, must not be page supremum */ rec_t* rec, /* in: pointer to record,
rec_t* next) /* in: pointer to next record, must not be page must not be page supremum */
infimum */ rec_t* next, /* in: pointer to next record,
must not be page infimum */
page_zip_des_t* page_zip) /* in/out: compressed page with
at least 6 bytes available, or NULL */
{ {
page_t* page;
ulint offs; ulint offs;
ut_ad(page_rec_check(rec)); ut_ad(page_rec_check(rec));
ut_ad(!page_rec_is_supremum(rec)); ut_ad(!page_rec_is_supremum(rec));
page = ut_align_down(rec, UNIV_PAGE_SIZE);
if (next) { ut_ad(!next || !page_rec_is_infimum(next));
ut_ad(!page_rec_is_infimum(next)); ut_ad(!next || ut_align_down(rec, UNIV_PAGE_SIZE)
ut_ad(page == ut_align_down(next, UNIV_PAGE_SIZE)); == ut_align_down(next, UNIV_PAGE_SIZE));
offs = (ulint) (next - page);
offs = ut_align_offset(next, UNIV_PAGE_SIZE);
if (page_rec_is_comp(rec)) {
rec_set_next_offs_new(rec, page_zip, offs);
} else { } else {
offs = 0; rec_set_next_offs_old(rec, offs);
ut_ad(!page_zip);
} }
rec_set_next_offs(rec, page_is_comp(page), offs);
} }
/**************************************************************** /****************************************************************
@ -671,11 +690,11 @@ page_rec_find_owner_rec(
ut_ad(page_rec_check(rec)); ut_ad(page_rec_check(rec));
if (page_rec_is_comp(rec)) { if (page_rec_is_comp(rec)) {
while (rec_get_n_owned(rec, TRUE) == 0) { while (rec_get_n_owned_new(rec) == 0) {
rec = page_rec_get_next(rec); rec = page_rec_get_next(rec);
} }
} else { } else {
while (rec_get_n_owned(rec, FALSE) == 0) { while (rec_get_n_owned_old(rec) == 0) {
rec = page_rec_get_next(rec); rec = page_rec_get_next(rec);
} }
} }
@ -683,6 +702,23 @@ page_rec_find_owner_rec(
return(rec); return(rec);
} }
/**************************************************************
Returns the base extra size of a physical record. This is the
size of the fixed header, independent of the record size. */
UNIV_INLINE
ulint
page_rec_get_base_extra_size(
/*=========================*/
/* out: REC_N_NEW_EXTRA_BYTES
or REC_N_OLD_EXTRA_BYTES */
const rec_t* rec) /* in: physical record */
{
#if REC_N_NEW_EXTRA_BYTES + 1 != REC_N_OLD_EXTRA_BYTES
# error "REC_N_NEW_EXTRA_BYTES + 1 != REC_N_OLD_EXTRA_BYTES"
#endif
return(REC_N_NEW_EXTRA_BYTES + (ulint) !page_rec_is_comp(rec));
}
/**************************************************************** /****************************************************************
Returns the sum of the sizes of the records in the record list, excluding Returns the sum of the sizes of the records in the record list, excluding
the infimum and supremum records. */ the infimum and supremum records. */
@ -805,7 +841,9 @@ UNIV_INLINE
void void
page_mem_free( page_mem_free(
/*==========*/ /*==========*/
page_t* page, /* in: index page */ page_t* page, /* in/out: index page */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
6 bytes available, or NULL */
rec_t* rec, /* in: pointer to the (origin of) record */ rec_t* rec, /* in: pointer to the (origin of) record */
const ulint* offsets)/* in: array returned by rec_get_offsets() */ const ulint* offsets)/* in: array returned by rec_get_offsets() */
{ {
@ -816,8 +854,8 @@ page_mem_free(
ut_ad(!rec_offs_comp(offsets) == !page_rec_is_comp(rec)); ut_ad(!rec_offs_comp(offsets) == !page_rec_is_comp(rec));
free = page_header_get_ptr(page, PAGE_FREE); free = page_header_get_ptr(page, PAGE_FREE);
page_rec_set_next(rec, free); page_rec_set_next(rec, free, page_zip);
page_header_set_ptr(page, PAGE_FREE, rec); page_header_set_ptr(page, page_zip, PAGE_FREE, rec);
#if 0 /* It's better not to destroy the user's data. */ #if 0 /* It's better not to destroy the user's data. */
@ -827,11 +865,18 @@ page_mem_free(
cannot be cleared, because page_mem_alloc() needs them in order cannot be cleared, because page_mem_alloc() needs them in order
to determine the size of the deleted record. */ to determine the size of the deleted record. */
memset(rec, 0, rec_offs_data_size(offsets)); memset(rec, 0, rec_offs_data_size(offsets));
/* If you enable this code, make sure that the callers of
page_mem_free() account for the increased usage of space. */
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write(page_zip, page, rec, rec - page,
rec_offs_data_size(offsets));
}
#endif #endif
garbage = page_header_get_field(page, PAGE_GARBAGE); garbage = page_header_get_field(page, PAGE_GARBAGE);
page_header_set_field(page, PAGE_GARBAGE, page_header_set_field(page, page_zip, PAGE_GARBAGE,
garbage + rec_offs_size(offsets)); garbage + rec_offs_size(offsets));
} }

View file

@ -18,5 +18,71 @@ typedef byte page_t;
typedef struct page_search_struct page_search_t; typedef struct page_search_struct page_search_t;
typedef struct page_cur_struct page_cur_t; typedef struct page_cur_struct page_cur_t;
typedef byte page_zip_t;
typedef struct page_zip_des_struct page_zip_des_t;
/* The following definitions would better belong to page0zip.h,
but we cannot include page0zip.h from rem0rec.ic, because
page0*.h includes rem0rec.h and may include rem0rec.ic. */
/* Compressed page descriptor */
struct page_zip_des_struct
{
page_zip_t* data; /* compressed page data */
ulint size; /* total size of compressed page */
ulint m_start; /* start offset of modification log */
ulint m_end; /* end offset of modification log */
};
/**************************************************************************
Write data to the compressed page. The data must already be written to
the uncompressed page. */
void
page_zip_write(
/*===========*/
page_zip_des_t* page_zip,/* out: compressed page */
const byte* str, /* in: address on the uncompressed page */
ulint length) /* in: length of the data */
__attribute__((nonnull));
/**************************************************************************
Write data to the uncompressed header portion of a page. The data must
already have been written to the uncompressed page. */
UNIV_INLINE
void
page_zip_write_header(
/*==================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* str, /* in: address on the uncompressed page */
ulint length) /* in: length of the data */
__attribute__((nonnull));
/**************************************************************************
Write data to the uncompressed trailer portion of a page. The data must
already have been written to the uncompressed page. */
UNIV_INLINE
void
page_zip_write_trailer(
/*===================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* str, /* in: address on the uncompressed page */
ulint length) /* in: length of the data */
__attribute__((nonnull));
#ifdef UNIV_DEBUG
/**************************************************************************
Determine if enough space is available in the modification log. */
ibool
page_zip_available_noninline(
/*=========================*/
/* out: TRUE if enough space
is available */
const page_zip_des_t* page_zip,/* in: compressed page */
ulint size)
__attribute__((warn_unused_result, nonnull, pure));
#endif /* UNIV_DEBUG */
#endif #endif

183
include/page0zip.h Normal file
View file

@ -0,0 +1,183 @@
/******************************************************
Compressed page interface
(c) 2005 Innobase Oy
Created June 2005 by Marko Makela
*******************************************************/
#ifndef page0zip_h
#define page0zip_h
#ifdef UNIV_MATERIALIZE
# undef UNIV_INLINE
# define UNIV_INLINE
#endif
#include "mtr0types.h"
#include "page0types.h"
/**************************************************************************
Initialize a compressed page descriptor. */
UNIV_INLINE
void
page_zip_des_init(
/*==============*/
page_zip_des_t* page_zip); /* in/out: compressed page
descriptor */
/**************************************************************************
Compress a page. */
ibool
page_zip_compress(
/*==============*/
/* out: TRUE on success, FALSE on failure;
page_zip will be left intact on failure. */
page_zip_des_t* page_zip,/* out: compressed page */
const page_t* page); /* in: uncompressed page */
/**************************************************************************
Decompress a page. */
ibool
page_zip_decompress(
/*================*/
/* out: TRUE on success, FALSE on failure */
page_zip_des_t* page_zip,/* in: data, size; out: m_start, m_end */
page_t* page, /* out: uncompressed page, may be trashed */
mtr_t* mtr) /* in: mini-transaction handle,
or NULL if no logging is needed */
__attribute__((warn_unused_result, nonnull(1, 2)));
#ifdef UNIV_DEBUG
/**************************************************************************
Validate a compressed page descriptor. */
UNIV_INLINE
ibool
page_zip_simple_validate(
/*=====================*/
/* out: TRUE if ok */
const page_zip_des_t* page_zip); /* in: compressed page
descriptor */
/**************************************************************************
Check that the compressed and decompressed pages match. */
ibool
page_zip_validate(
/*==============*/
const page_zip_des_t* page_zip,/* in: compressed page */
const page_t* page); /* in: uncompressed page */
#endif /* UNIV_DEBUG */
/**************************************************************************
Determine the encoded length of an integer in the modification log. */
UNIV_INLINE
ulint
page_zip_ulint_size(
/*================*/
/* out: length of the integer, in bytes */
ulint num) /* in: the integer */
__attribute__((const));
/**************************************************************************
Determine the size of a modification log entry. */
UNIV_INLINE
ulint
page_zip_entry_size(
/*================*/
/* out: length of the log entry, in bytes */
ulint pos, /* in: offset of the uncompressed page */
ulint length) /* in: length of the data */
__attribute__((const));
/**************************************************************************
Ensure that enough space is available in the modification log.
If not, try to compress the page. */
UNIV_INLINE
ibool
page_zip_alloc(
/*===========*/
/* out: TRUE if enough space is available */
page_zip_des_t* page_zip,/* in/out: compressed page;
will only be modified if compression is needed
and successful */
const page_t* page, /* in: uncompressed page */
ulint size) /* in: size of modification log entries */
__attribute__((nonnull));
/**************************************************************************
Determine if enough space is available in the modification log. */
UNIV_INLINE
ibool
page_zip_available(
/*===============*/
/* out: TRUE if enough space
is available */
const page_zip_des_t* page_zip,/* in: compressed page */
ulint size) /* in: requested size of
modification log entries */
__attribute__((warn_unused_result, nonnull, pure));
#ifdef UNIV_DEBUG
/**************************************************************************
Determine if enough space is available in the modification log. */
ibool
page_zip_available_noninline(
/*=========================*/
/* out: TRUE if enough space
is available */
const page_zip_des_t* page_zip,/* in: compressed page */
ulint size)
__attribute__((warn_unused_result, nonnull, pure));
#endif /* UNIV_DEBUG */
/**************************************************************************
Write data to the compressed portion of a page. The data must already
have been written to the uncompressed page. */
void
page_zip_write(
/*===========*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* str, /* in: address on the uncompressed page */
ulint length) /* in: length of the data */
__attribute__((nonnull));
/**************************************************************************
Write data to the uncompressed header portion of a page. The data must
already have been written to the uncompressed page. */
UNIV_INLINE
void
page_zip_write_header(
/*==================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* str, /* in: address on the uncompressed page */
ulint length) /* in: length of the data */
__attribute__((nonnull));
/**************************************************************************
Write data to the uncompressed trailer portion of a page. The data must
already have been written to the uncompressed page. */
UNIV_INLINE
void
page_zip_write_trailer(
/*===================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* str, /* in: address on the uncompressed page */
ulint length) /* in: length of the data */
__attribute__((nonnull));
#ifdef UNIV_MATERIALIZE
# undef UNIV_INLINE
# define UNIV_INLINE UNIV_INLINE_ORIGINAL
#endif
#ifndef UNIV_NONINL
# include "page0zip.ic"
#endif
#endif /* page0zip_h */

224
include/page0zip.ic Normal file
View file

@ -0,0 +1,224 @@
/******************************************************
Compressed page interface
(c) 2005 Innobase Oy
Created June 2005 by Marko Makela
*******************************************************/
#ifdef UNIV_MATERIALIZE
# undef UNIV_INLINE
# define UNIV_INLINE
#endif
#include "page0zip.h"
#include "page0page.h"
/* The format of compressed pages is as follows.
The header and trailer of the uncompressed pages, including the page
directory in the trailer, are copied as is to the header and trailer
of the compressed page. Immediately preceding the page trailer,
we store a 32-bit checksum of the compressed data.
The data between PAGE_DATA and the last page directory entry
will be written in compressed format, starting at offset PAGE_DATA.
The compressed data stream may be followed by a modification log
covering the compressed portion of the page, as follows.
MODIFICATION LOG ENTRY FORMAT
- length (1..2 bytes), not zero
- offset - PAGE_DATA (1..2 bytes)
- data bytes
The length and the offset are stored in a variable-length format:
- 0xxxxxxxx : 0..127
- 10xxxxxxx xxxxxxxx: 0..16383
- 11xxxxxxx xxxxxxxx: reserved
The end of the modification log is marked by length=0. */
/**************************************************************************
Initialize a compressed page descriptor. */
UNIV_INLINE
void
page_zip_des_init(
/*==============*/
page_zip_des_t* page_zip) /* in/out: compressed page
descriptor */
{
memset(page_zip, 0, sizeof *page_zip);
}
/**************************************************************************
Determine the encoded length of an integer in the modification log. */
UNIV_INLINE
ulint
page_zip_ulint_size(
/*================*/
/* out: length of the integer, in bytes */
ulint num) /* in: the integer */
{
if (num < 128) { /* 0xxxxxxx: 0..127 */
return(1);
}
if (num < 16384) { /* 10xxxxxx xxxxxxxx: 0..16383 */
return(2);
}
ut_error;
return(0);
}
/**************************************************************************
Determine the size of a modification log entry. */
UNIV_INLINE
ulint
page_zip_entry_size(
/*================*/
/* out: length of the log entry, in bytes */
ulint pos, /* in: offset of the uncompressed page */
ulint length) /* in: length of the data */
{
ut_ad(pos >= PAGE_DATA);
ut_ad(pos + length <= UNIV_PAGE_SIZE - PAGE_DATA /* - trailer_len */);
return(page_zip_ulint_size(pos - PAGE_DATA)
+ page_zip_ulint_size(length)
+ length);
}
#ifdef UNIV_DEBUG
/**************************************************************************
Validate a compressed page descriptor. */
UNIV_INLINE
ibool
page_zip_simple_validate(
/*=====================*/
/* out: TRUE if ok */
const page_zip_des_t* page_zip)/* in: compressed page descriptor */
{
ut_ad(page_zip);
ut_ad(page_zip->data);
ut_ad(!(page_zip->size & (page_zip->size - 1)));
ut_ad(page_zip->size < UNIV_PAGE_SIZE);
ut_ad(page_zip->size > PAGE_DATA + PAGE_EMPTY_DIR_START);
ut_ad(page_zip->m_start <= page_zip->m_end);
ut_ad(page_zip->m_end < page_zip->size);
return(TRUE);
}
#endif /* UNIV_DEBUG */
/**************************************************************************
Ensure that enough space is available in the modification log.
If not, try to compress the page. */
UNIV_INLINE
ibool
page_zip_alloc(
/*===========*/
/* out: TRUE if enough space is available */
page_zip_des_t* page_zip,/* in/out: compressed page;
will only be modified if compression is needed
and successful */
const page_t* page, /* in: uncompressed page */
ulint size) /* in: size of modification log entries */
{
ulint trailer_len = PAGE_DIR + PAGE_DIR_SLOT_SIZE
* page_dir_get_n_slots((page_t*) page_zip->data);
ut_ad(page_zip_simple_validate(page_zip));
ut_ad(page_zip->m_end + trailer_len < page_zip->size);
ut_ad(size >= 3); /* modification log entries are >= 1+1+1 bytes */
ut_ad(size < page_zip->size);
if (size < page_zip->size - page_zip->m_end - trailer_len) {
return(TRUE);
}
if (page_zip->m_start == page_zip->m_end) {
/* The page has been freshly compressed, so
recompressing it will not help. */
return(FALSE);
}
return(page_zip_compress(page_zip, page));
}
/**************************************************************************
Determine if enough space is available in the modification log. */
UNIV_INLINE
ibool
page_zip_available(
/*===============*/
/* out: TRUE if enough space
is available */
const page_zip_des_t* page_zip,/* in: compressed page */
ulint size) /* in: requested size of
modification log entries */
{
ulint trailer_len = PAGE_DIR + PAGE_DIR_SLOT_SIZE
* page_dir_get_n_slots((page_t*) page_zip->data);
ut_ad(page_zip_simple_validate(page_zip));
ut_ad(page_zip->m_end + trailer_len < page_zip->size);
ut_ad(size >= 3); /* modification log entries are >= 1+1+1 bytes */
ut_ad(size < page_zip->size);
return(UNIV_LIKELY(
size < page_zip->size - page_zip->m_end - trailer_len));
}
/**************************************************************************
Write data to the uncompressed header portion of a page. The data must
already have been written to the uncompressed page. */
UNIV_INLINE
void
page_zip_write_header(
/*==================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* str, /* in: address on the uncompressed page */
ulint length) /* in: length of the data */
{
ulint pos;
ut_ad(buf_block_get_page_zip(buf_block_align((byte*)str)) == page_zip);
ut_ad(page_zip_simple_validate(page_zip));
pos = ut_align_offset(str, UNIV_PAGE_SIZE);
ut_ad(pos < PAGE_DATA);
memcpy(page_zip + pos, str, length);
ut_ad(page_zip_validate(page_zip, str - pos));
}
/**************************************************************************
Write data to the uncompressed trailer portion of a page. The data must
already have been written to the uncompressed page. */
UNIV_INLINE
void
page_zip_write_trailer(
/*===================*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* str, /* in: address on the uncompressed page */
ulint length) /* in: length of the data */
{
ulint pos;
ut_ad(buf_block_get_page_zip(buf_block_align((byte*)str)) == page_zip);
ut_ad(page_zip_simple_validate(page_zip));
pos = ut_align_offset(str, UNIV_PAGE_SIZE);
ut_ad(pos < PAGE_DATA);/* TODO */
memcpy(page_zip + pos/* TODO */, str, length);
ut_ad(page_zip_validate(page_zip, str - pos));
}
#ifdef UNIV_MATERIALIZE
# undef UNIV_INLINE
# define UNIV_INLINE UNIV_INLINE_ORIGINAL
#endif

View file

@ -13,11 +13,10 @@ Created 5/30/1994 Heikki Tuuri
#include "data0data.h" #include "data0data.h"
#include "rem0types.h" #include "rem0types.h"
#include "mtr0types.h" #include "mtr0types.h"
#include "page0types.h"
/* Maximum values for various fields (for non-blob tuples) */ /* Maximum values for various fields (for non-blob tuples) */
#define REC_MAX_N_FIELDS (1024 - 1) #define REC_MAX_N_FIELDS (1024 - 1)
#define REC_MAX_HEAP_NO (2 * 8192 - 1)
#define REC_MAX_N_OWNED (16 - 1)
/* Flag denoting the predefined minimum record: this bit is ORed in the 4 /* Flag denoting the predefined minimum record: this bit is ORed in the 4
info bits of a record */ info bits of a record */
@ -41,6 +40,17 @@ offsets[] array, first passed to rec_get_offsets() */
#define REC_OFFS_NORMAL_SIZE 100 #define REC_OFFS_NORMAL_SIZE 100
#define REC_OFFS_SMALL_SIZE 10 #define REC_OFFS_SMALL_SIZE 10
/**********************************************************
The following function is used to get the pointer of the next chained record
on the same page. */
UNIV_INLINE
rec_t*
rec_get_next_ptr(
/*=============*/
/* out: pointer to the next chained record, or
NULL if none */
rec_t* rec, /* in: physical record */
ulint comp); /* in: nonzero=compact page format */
/********************************************************** /**********************************************************
The following function is used to get the offset of the The following function is used to get the offset of the
next chained record on the same page. */ next chained record on the same page. */
@ -54,15 +64,25 @@ rec_get_next_offs(
ulint comp); /* in: nonzero=compact page format */ ulint comp); /* in: nonzero=compact page format */
/********************************************************** /**********************************************************
The following function is used to set the next record offset field The following function is used to set the next record offset field
of the record. */ of an old-style record. */
UNIV_INLINE UNIV_INLINE
void void
rec_set_next_offs( rec_set_next_offs_old(
/*==============*/ /*==================*/
rec_t* rec, /* in: physical record */ rec_t* rec, /* in: old-style physical record */
ulint comp, /* in: nonzero=compact page format */
ulint next); /* in: offset of the next record */ ulint next); /* in: offset of the next record */
/********************************************************** /**********************************************************
The following function is used to set the next record offset field
of a new-style record. */
UNIV_INLINE
void
rec_set_next_offs_new(
/*==================*/
rec_t* rec, /* in/out: new-style physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
6 bytes available, or NULL */
ulint next); /* in: offset of the next record */
/**********************************************************
The following function is used to get the number of fields The following function is used to get the number of fields
in an old-style record. */ in an old-style record. */
UNIV_INLINE UNIV_INLINE
@ -82,26 +102,44 @@ rec_get_n_fields(
rec_t* rec, /* in: physical record */ rec_t* rec, /* in: physical record */
dict_index_t* index); /* in: record descriptor */ dict_index_t* index); /* in: record descriptor */
/********************************************************** /**********************************************************
The following function is used to get the number of records The following function is used to get the number of records owned by the
owned by the previous directory record. */ previous directory record. */
UNIV_INLINE UNIV_INLINE
ulint ulint
rec_get_n_owned( rec_get_n_owned_old(
/*============*/ /*================*/
/* out: number of owned records */ /* out: number of owned records */
rec_t* rec, /* in: physical record */ rec_t* rec); /* in: old-style physical record */
ulint comp); /* in: nonzero=compact page format */
/********************************************************** /**********************************************************
The following function is used to set the number of owned The following function is used to set the number of owned records. */
records. */
UNIV_INLINE UNIV_INLINE
void void
rec_set_n_owned( rec_set_n_owned_old(
/*============*/ /*================*/
rec_t* rec, /* in: physical record */ /* out: TRUE on success */
ulint comp, /* in: nonzero=compact page format */ rec_t* rec, /* in: old-style physical record */
ulint n_owned); /* in: the number of owned */ ulint n_owned); /* in: the number of owned */
/********************************************************** /**********************************************************
The following function is used to get the number of records owned by the
previous directory record. */
UNIV_INLINE
ulint
rec_get_n_owned_new(
/*================*/
/* out: number of owned records */
rec_t* rec); /* in: new-style physical record */
/**********************************************************
The following function is used to set the number of owned records. */
UNIV_INLINE
void
rec_set_n_owned_new(
/*================*/
/* out: TRUE on success */
rec_t* rec, /* in/out: new-style physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
5 bytes available, or NULL */
ulint n_owned);/* in: the number of owned */
/**********************************************************
The following function is used to retrieve the info bits of The following function is used to retrieve the info bits of
a record. */ a record. */
UNIV_INLINE UNIV_INLINE
@ -115,12 +153,21 @@ rec_get_info_bits(
The following function is used to set the info bits of a record. */ The following function is used to set the info bits of a record. */
UNIV_INLINE UNIV_INLINE
void void
rec_set_info_bits( rec_set_info_bits_old(
/*==============*/ /*==================*/
rec_t* rec, /* in: physical record */ rec_t* rec, /* in: old-style physical record */
ulint comp, /* in: nonzero=compact page format */
ulint bits); /* in: info bits */ ulint bits); /* in: info bits */
/********************************************************** /**********************************************************
The following function is used to set the info bits of a record. */
UNIV_INLINE
void
rec_set_info_bits_new(
/*==================*/
rec_t* rec, /* in/out: new-style physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
ulint bits); /* in: info bits */
/**********************************************************
The following function retrieves the status bits of a new-style record. */ The following function retrieves the status bits of a new-style record. */
UNIV_INLINE UNIV_INLINE
ulint ulint
@ -135,8 +182,10 @@ UNIV_INLINE
void void
rec_set_status( rec_set_status(
/*===========*/ /*===========*/
rec_t* rec, /* in: physical record */ rec_t* rec, /* in/out: physical record */
ulint bits); /* in: info bits */ page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
ulint bits); /* in: info bits */
/********************************************************** /**********************************************************
The following function is used to retrieve the info and status The following function is used to retrieve the info and status
@ -155,9 +204,10 @@ UNIV_INLINE
void void
rec_set_info_and_status_bits( rec_set_info_and_status_bits(
/*=========================*/ /*=========================*/
rec_t* rec, /* in: physical record */ rec_t* rec, /* in/out: compact physical record */
ulint comp, /* in: nonzero=compact page format */ page_zip_des_t* page_zip,/* in/out: compressed page with
ulint bits); /* in: info bits */ at least 5 bytes available, or NULL */
ulint bits); /* in: info bits */
/********************************************************** /**********************************************************
The following function tells if record is delete marked. */ The following function tells if record is delete marked. */
@ -172,40 +222,67 @@ rec_get_deleted_flag(
The following function is used to set the deleted bit. */ The following function is used to set the deleted bit. */
UNIV_INLINE UNIV_INLINE
void void
rec_set_deleted_flag( rec_set_deleted_flag_old(
/*=================*/ /*=====================*/
rec_t* rec, /* in: physical record */ rec_t* rec, /* in: old-style physical record */
ulint comp, /* in: nonzero=compact page format */
ulint flag); /* in: nonzero if delete marked */ ulint flag); /* in: nonzero if delete marked */
/********************************************************** /**********************************************************
The following function is used to set the deleted bit. */
UNIV_INLINE
void
rec_set_deleted_flag_new(
/*=====================*/
rec_t* rec, /* in/out: new-style physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
ulint flag); /* in: nonzero if delete marked */
/**********************************************************
The following function tells if a new-style record is a node pointer. */ The following function tells if a new-style record is a node pointer. */
UNIV_INLINE UNIV_INLINE
ibool ibool
rec_get_node_ptr_flag( rec_get_node_ptr_flag(
/*=================*/ /*==================*/
/* out: TRUE if node pointer */ /* out: TRUE if node pointer */
rec_t* rec); /* in: physical record */ rec_t* rec); /* in: physical record */
/********************************************************** /**********************************************************
The following function is used to get the order number The following function is used to get the order number
of the record in the heap of the index page. */ of an old-style record in the heap of the index page. */
UNIV_INLINE UNIV_INLINE
ulint ulint
rec_get_heap_no( rec_get_heap_no_old(
/*=============*/ /*================*/
/* out: heap order number */ /* out: heap order number */
rec_t* rec, /* in: physical record */ rec_t* rec); /* in: physical record */
ulint comp); /* in: nonzero=compact page format */
/********************************************************** /**********************************************************
The following function is used to set the heap number The following function is used to set the heap number
field in the record. */ field in an old-style record. */
UNIV_INLINE UNIV_INLINE
void void
rec_set_heap_no( rec_set_heap_no_old(
/*=============*/ /*================*/
rec_t* rec, /* in: physical record */ rec_t* rec, /* in: physical record */
ulint comp, /* in: nonzero=compact page format */
ulint heap_no);/* in: the heap number */ ulint heap_no);/* in: the heap number */
/********************************************************** /**********************************************************
The following function is used to get the order number
of a new-style record in the heap of the index page. */
UNIV_INLINE
ulint
rec_get_heap_no_new(
/*================*/
/* out: heap order number */
rec_t* rec); /* in: physical record */
/**********************************************************
The following function is used to set the heap number
field in a new-style record. */
UNIV_INLINE
void
rec_set_heap_no_new(
/*================*/
rec_t* rec, /* in/out: physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 6 bytes available, or NULL */
ulint heap_no);/* in: the heap number */
/**********************************************************
The following function is used to test whether the data offsets The following function is used to test whether the data offsets
in the record are stored in one-byte or two-byte format. */ in the record are stored in one-byte or two-byte format. */
UNIV_INLINE UNIV_INLINE
@ -340,7 +417,7 @@ rec_offs_any_extern(
/* out: TRUE if a field is stored externally */ /* out: TRUE if a field is stored externally */
const ulint* offsets);/* in: array returned by rec_get_offsets() */ const ulint* offsets);/* in: array returned by rec_get_offsets() */
/*************************************************************** /***************************************************************
Sets the value of the ith field extern storage bit. */ Sets the ith field extern storage bit. */
UNIV_INLINE UNIV_INLINE
void void
rec_set_nth_field_extern_bit( rec_set_nth_field_extern_bit(
@ -348,7 +425,6 @@ rec_set_nth_field_extern_bit(
rec_t* rec, /* in: record */ rec_t* rec, /* in: record */
dict_index_t* index, /* in: record descriptor */ dict_index_t* index, /* in: record descriptor */
ulint i, /* in: ith field */ ulint i, /* in: ith field */
ibool val, /* in: value to set */
mtr_t* mtr); /* in: mtr holding an X-latch to the page mtr_t* mtr); /* in: mtr holding an X-latch to the page
where rec is, or NULL; in the NULL case where rec is, or NULL; in the NULL case
we do not write to log about the change */ we do not write to log about the change */
@ -489,8 +565,8 @@ rec_fold(
in an incomplete last field */ in an incomplete last field */
dulint tree_id); /* in: index tree id */ dulint tree_id); /* in: index tree id */
/************************************************************* /*************************************************************
Builds a physical record out of a data tuple and stores it beginning from Builds a physical record out of a data tuple and
address destination. */ stores it into the given buffer. */
rec_t* rec_t*
rec_convert_dtuple_to_rec( rec_convert_dtuple_to_rec(

View file

@ -148,19 +148,18 @@ rec_set_nth_field_sql_null(
ulint n); /* in: index of the field */ ulint n); /* in: index of the field */
/*************************************************************** /***************************************************************
Sets the value of the ith field extern storage bit of an old-style record. */ Sets the ith field extern storage bit of an old-style record. */
void void
rec_set_nth_field_extern_bit_old( rec_set_nth_field_extern_bit_old(
/*=============================*/ /*=============================*/
rec_t* rec, /* in: old-style record */ rec_t* rec, /* in: old-style record */
ulint i, /* in: ith field */ ulint i, /* in: ith field */
ibool val, /* in: value to set */
mtr_t* mtr); /* in: mtr holding an X-latch to the page where mtr_t* mtr); /* in: mtr holding an X-latch to the page where
rec is, or NULL; in the NULL case we do not rec is, or NULL; in the NULL case we do not
write to log about the change */ write to log about the change */
/*************************************************************** /***************************************************************
Sets the value of the ith field extern storage bit of a new-style record. */ Sets the ith field extern storage bit of a new-style record. */
void void
rec_set_nth_field_extern_bit_new( rec_set_nth_field_extern_bit_new(
@ -168,7 +167,6 @@ rec_set_nth_field_extern_bit_new(
rec_t* rec, /* in: record */ rec_t* rec, /* in: record */
dict_index_t* index, /* in: record descriptor */ dict_index_t* index, /* in: record descriptor */
ulint ith, /* in: ith field */ ulint ith, /* in: ith field */
ibool val, /* in: value to set */
mtr_t* mtr); /* in: mtr holding an X-latch to the page mtr_t* mtr); /* in: mtr holding an X-latch to the page
where rec is, or NULL; in the NULL case where rec is, or NULL; in the NULL case
we do not write to log about the change */ we do not write to log about the change */
@ -255,6 +253,55 @@ rec_set_bit_field_2(
| (val << shift)); | (val << shift));
} }
/**********************************************************
The following function is used to get the pointer of the next chained record
on the same page. */
UNIV_INLINE
rec_t*
rec_get_next_ptr(
/*=============*/
/* out: pointer to the next chained record, or
NULL if none */
rec_t* rec, /* in: physical record */
ulint comp) /* in: nonzero=compact page format */
{
ulint field_value;
ut_ad(REC_NEXT_MASK == 0xFFFFUL);
ut_ad(REC_NEXT_SHIFT == 0);
field_value = mach_read_from_2(rec - REC_NEXT);
if (UNIV_UNLIKELY(field_value == 0)) {
return(NULL);
}
if (UNIV_EXPECT(comp, REC_OFFS_COMPACT)) {
#if UNIV_PAGE_SIZE <= 32768
/* Note that for 64 KiB pages, field_value can 'wrap around'
and the debug assertion is not valid */
/* In the following assertion, field_value is interpreted
as signed 16-bit integer in 2's complement arithmetics.
If all platforms defined int16_t in the standard headers,
the expression could be written simpler as
(int16_t) field_value + ut_align_offset(...) < UNIV_PAGE_SIZE
*/
ut_ad((field_value >= 32768
? field_value - 65536
: field_value)
+ ut_align_offset(rec, UNIV_PAGE_SIZE)
< UNIV_PAGE_SIZE);
#endif
return(rec + field_value);
} else {
ut_ad(field_value < UNIV_PAGE_SIZE);
return(ut_align_down(rec, UNIV_PAGE_SIZE) + field_value);
}
}
/********************************************************** /**********************************************************
The following function is used to get the offset of the next chained record The following function is used to get the offset of the next chained record
on the same page. */ on the same page. */
@ -274,7 +321,7 @@ rec_get_next_offs(
field_value = mach_read_from_2(rec - REC_NEXT); field_value = mach_read_from_2(rec - REC_NEXT);
if (comp) { if (UNIV_EXPECT(comp, REC_OFFS_COMPACT)) {
#if UNIV_PAGE_SIZE <= 32768 #if UNIV_PAGE_SIZE <= 32768
/* Note that for 64 KiB pages, field_value can 'wrap around' /* Note that for 64 KiB pages, field_value can 'wrap around'
and the debug assertion is not valid */ and the debug assertion is not valid */
@ -291,7 +338,7 @@ rec_get_next_offs(
+ ut_align_offset(rec, UNIV_PAGE_SIZE) + ut_align_offset(rec, UNIV_PAGE_SIZE)
< UNIV_PAGE_SIZE); < UNIV_PAGE_SIZE);
#endif #endif
if (field_value == 0) { if (UNIV_UNLIKELY(field_value == 0)) {
return(0); return(0);
} }
@ -305,39 +352,59 @@ rec_get_next_offs(
} }
/********************************************************** /**********************************************************
The following function is used to set the next record offset field of the The following function is used to set the next record offset field
record. */ of an old-style record. */
UNIV_INLINE UNIV_INLINE
void void
rec_set_next_offs( rec_set_next_offs_old(
/*==============*/ /*==================*/
rec_t* rec, /* in: physical record */ rec_t* rec, /* in: old-style physical record */
ulint comp, /* in: nonzero=compact page format */ ulint next) /* in: offset of the next record */
ulint next) /* in: offset of the next record, or 0 if none */
{ {
ut_ad(rec); ut_ad(rec);
ut_ad(UNIV_PAGE_SIZE > next); ut_ad(UNIV_PAGE_SIZE > next);
ut_ad(REC_NEXT_MASK == 0xFFFFUL); #if REC_NEXT_MASK != 0xFFFFUL
ut_ad(REC_NEXT_SHIFT == 0); # error "REC_NEXT_MASK != 0xFFFFUL"
#endif
#if REC_NEXT_SHIFT
# error "REC_NEXT_SHIFT != 0"
#endif
if (comp) { mach_write_to_2(rec - REC_NEXT, next);
ulint field_value; }
if (next) { /**********************************************************
/* The following two statements calculate The following function is used to set the next record offset field
next - offset_of_rec mod 64Ki, where mod is the modulo of a new-style record. */
as a non-negative number */ UNIV_INLINE
void
rec_set_next_offs_new(
/*==================*/
rec_t* rec, /* in/out: new-style physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
6 bytes available, or NULL */
ulint next) /* in: offset of the next record */
{
ut_ad(rec);
ut_ad(UNIV_PAGE_SIZE > next);
field_value = (ulint)((lint)next ulint field_value;
- (lint)ut_align_offset(rec, UNIV_PAGE_SIZE));
field_value &= REC_NEXT_MASK;
} else {
field_value = 0;
}
mach_write_to_2(rec - REC_NEXT, field_value); if (UNIV_UNLIKELY(!next)) {
field_value = 0;
} else { } else {
mach_write_to_2(rec - REC_NEXT, next); /* The following two statements calculate
next - offset_of_rec mod 64Ki, where mod is the modulo
as a non-negative number */
field_value = (ulint)((lint)next
- (lint)ut_align_offset(rec, UNIV_PAGE_SIZE));
field_value &= REC_NEXT_MASK;
}
mach_write_to_2(rec - REC_NEXT, field_value);
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write(page_zip, rec - REC_NEXT, 2);
} }
} }
@ -436,42 +503,62 @@ The following function is used to get the number of records owned by the
previous directory record. */ previous directory record. */
UNIV_INLINE UNIV_INLINE
ulint ulint
rec_get_n_owned( rec_get_n_owned_old(
/*============*/ /*================*/
/* out: number of owned records */ /* out: number of owned records */
rec_t* rec, /* in: physical record */ rec_t* rec) /* in: old-style physical record */
ulint comp) /* in: nonzero=compact page format */
{ {
ulint ret; return(rec_get_bit_field_1(rec, REC_OLD_N_OWNED,
REC_N_OWNED_MASK, REC_N_OWNED_SHIFT));
ut_ad(rec);
ret = rec_get_bit_field_1(rec,
comp ? REC_NEW_N_OWNED : REC_OLD_N_OWNED,
REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
ut_ad(ret <= REC_MAX_N_OWNED);
return(ret);
} }
/********************************************************** /**********************************************************
The following function is used to set the number of owned records. */ The following function is used to set the number of owned records. */
UNIV_INLINE UNIV_INLINE
void void
rec_set_n_owned( rec_set_n_owned_old(
/*============*/ /*================*/
rec_t* rec, /* in: physical record */ /* out: TRUE on success */
ulint comp, /* in: nonzero=compact page format */ rec_t* rec, /* in: old-style physical record */
ulint n_owned) /* in: the number of owned */ ulint n_owned) /* in: the number of owned */
{ {
ut_ad(rec); rec_set_bit_field_1(rec, n_owned, REC_OLD_N_OWNED,
ut_ad(n_owned <= REC_MAX_N_OWNED);
rec_set_bit_field_1(rec, n_owned,
comp ? REC_NEW_N_OWNED : REC_OLD_N_OWNED,
REC_N_OWNED_MASK, REC_N_OWNED_SHIFT); REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
} }
/**********************************************************
The following function is used to get the number of records owned by the
previous directory record. */
UNIV_INLINE
ulint
rec_get_n_owned_new(
/*================*/
/* out: number of owned records */
rec_t* rec) /* in: new-style physical record */
{
return(rec_get_bit_field_1(rec, REC_NEW_N_OWNED,
REC_N_OWNED_MASK, REC_N_OWNED_SHIFT));
}
/**********************************************************
The following function is used to set the number of owned records. */
UNIV_INLINE
void
rec_set_n_owned_new(
/*================*/
/* out: TRUE on success */
rec_t* rec, /* in/out: new-style physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
ulint n_owned)/* in: the number of owned */
{
rec_set_bit_field_1(rec, n_owned, REC_NEW_N_OWNED,
REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write(page_zip, rec - REC_NEW_N_OWNED, 1);
}
}
/********************************************************** /**********************************************************
The following function is used to retrieve the info bits of a record. */ The following function is used to retrieve the info bits of a record. */
UNIV_INLINE UNIV_INLINE
@ -482,35 +569,40 @@ rec_get_info_bits(
rec_t* rec, /* in: physical record */ rec_t* rec, /* in: physical record */
ulint comp) /* in: nonzero=compact page format */ ulint comp) /* in: nonzero=compact page format */
{ {
ulint ret; return(rec_get_bit_field_1(rec,
ut_ad(rec);
ret = rec_get_bit_field_1(rec,
comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS, comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS,
REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT); REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT));
ut_ad((ret & ~REC_INFO_BITS_MASK) == 0);
return(ret);
} }
/********************************************************** /**********************************************************
The following function is used to set the info bits of a record. */ The following function is used to set the info bits of a record. */
UNIV_INLINE UNIV_INLINE
void void
rec_set_info_bits( rec_set_info_bits_old(
/*==============*/ /*==================*/
rec_t* rec, /* in: physical record */ rec_t* rec, /* in: old-style physical record */
ulint comp, /* in: nonzero=compact page format */
ulint bits) /* in: info bits */ ulint bits) /* in: info bits */
{ {
ut_ad(rec); rec_set_bit_field_1(rec, bits, REC_OLD_INFO_BITS,
ut_ad((bits & ~REC_INFO_BITS_MASK) == 0);
rec_set_bit_field_1(rec, bits,
comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS,
REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT); REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
} }
/**********************************************************
The following function is used to set the info bits of a record. */
UNIV_INLINE
void
rec_set_info_bits_new(
/*==================*/
rec_t* rec, /* in/out: new-style physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
ulint bits) /* in: info bits */
{
rec_set_bit_field_1(rec, bits, REC_NEW_INFO_BITS,
REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write(page_zip, rec - REC_NEW_INFO_BITS, 1);
}
}
/********************************************************** /**********************************************************
The following function is used to set the status bits of a new-style record. */ The following function is used to set the status bits of a new-style record. */
@ -518,14 +610,16 @@ UNIV_INLINE
void void
rec_set_status( rec_set_status(
/*===========*/ /*===========*/
rec_t* rec, /* in: physical record */ rec_t* rec, /* in/out: physical record */
ulint bits) /* in: info bits */ page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
ulint bits) /* in: info bits */
{ {
ut_ad(rec);
ut_ad((bits & ~REC_NEW_STATUS_MASK) == 0);
rec_set_bit_field_1(rec, bits, REC_NEW_STATUS, rec_set_bit_field_1(rec, bits, REC_NEW_STATUS,
REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT); REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT);
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write(page_zip, rec - REC_NEW_STATUS, 1);
}
} }
/********************************************************** /**********************************************************
@ -559,20 +653,17 @@ UNIV_INLINE
void void
rec_set_info_and_status_bits( rec_set_info_and_status_bits(
/*=========================*/ /*=========================*/
rec_t* rec, /* in: physical record */ rec_t* rec, /* in/out: physical record */
ulint comp, /* in: nonzero=compact page format */ page_zip_des_t* page_zip,/* in/out: compressed page with
ulint bits) /* in: info bits */ at least 5 bytes available, or NULL */
ulint bits) /* in: info bits */
{ {
#if (REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT) \ #if (REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT) \
& (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT) & (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)
# error "REC_NEW_STATUS_MASK and REC_INFO_BITS_MASK overlap" # error "REC_NEW_STATUS_MASK and REC_INFO_BITS_MASK overlap"
#endif #endif
if (comp) { rec_set_status(rec, page_zip, bits & REC_NEW_STATUS_MASK);
rec_set_status(rec, bits & REC_NEW_STATUS_MASK); rec_set_info_bits_new(rec, page_zip, bits & ~REC_NEW_STATUS_MASK);
} else {
ut_ad(!(bits & ~(REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)));
}
rec_set_info_bits(rec, comp, bits & ~REC_NEW_STATUS_MASK);
} }
/********************************************************** /**********************************************************
@ -600,15 +691,14 @@ rec_get_deleted_flag(
The following function is used to set the deleted bit. */ The following function is used to set the deleted bit. */
UNIV_INLINE UNIV_INLINE
void void
rec_set_deleted_flag( rec_set_deleted_flag_old(
/*=================*/ /*=====================*/
rec_t* rec, /* in: physical record */ rec_t* rec, /* in: old-style physical record */
ulint comp, /* in: nonzero=compact page format */
ulint flag) /* in: nonzero if delete marked */ ulint flag) /* in: nonzero if delete marked */
{ {
ulint val; ulint val;
val = rec_get_info_bits(rec, comp); val = rec_get_info_bits(rec, FALSE);
if (flag) { if (flag) {
val |= REC_INFO_DELETED_FLAG; val |= REC_INFO_DELETED_FLAG;
@ -616,7 +706,31 @@ rec_set_deleted_flag(
val &= ~REC_INFO_DELETED_FLAG; val &= ~REC_INFO_DELETED_FLAG;
} }
rec_set_info_bits(rec, comp, val); rec_set_info_bits_old(rec, val);
}
/**********************************************************
The following function is used to set the deleted bit. */
UNIV_INLINE
void
rec_set_deleted_flag_new(
/*=====================*/
rec_t* rec, /* in/out: new-style physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 5 bytes available, or NULL */
ulint flag) /* in: nonzero if delete marked */
{
ulint val;
val = rec_get_info_bits(rec, TRUE);
if (flag) {
val |= REC_INFO_DELETED_FLAG;
} else {
val &= ~REC_INFO_DELETED_FLAG;
}
rec_set_info_bits_new(rec, page_zip, val);
} }
/********************************************************** /**********************************************************
@ -624,7 +738,7 @@ The following function tells if a new-style record is a node pointer. */
UNIV_INLINE UNIV_INLINE
ibool ibool
rec_get_node_ptr_flag( rec_get_node_ptr_flag(
/*=================*/ /*==================*/
/* out: TRUE if node pointer */ /* out: TRUE if node pointer */
rec_t* rec) /* in: physical record */ rec_t* rec) /* in: physical record */
{ {
@ -632,45 +746,66 @@ rec_get_node_ptr_flag(
} }
/********************************************************** /**********************************************************
The following function is used to get the order number of the record in the The following function is used to get the order number
heap of the index page. */ of an old-style record in the heap of the index page. */
UNIV_INLINE UNIV_INLINE
ulint ulint
rec_get_heap_no( rec_get_heap_no_old(
/*=============*/ /*================*/
/* out: heap order number */ /* out: heap order number */
rec_t* rec, /* in: physical record */ rec_t* rec) /* in: physical record */
ulint comp) /* in: nonzero=compact page format */
{ {
ulint ret; return(rec_get_bit_field_2(rec, REC_OLD_HEAP_NO,
REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT));
ut_ad(rec);
ret = rec_get_bit_field_2(rec,
comp ? REC_NEW_HEAP_NO : REC_OLD_HEAP_NO,
REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT);
ut_ad(ret <= REC_MAX_HEAP_NO);
return(ret);
} }
/********************************************************** /**********************************************************
The following function is used to set the heap number field in the record. */ The following function is used to set the heap number
field in an old-style record. */
UNIV_INLINE UNIV_INLINE
void void
rec_set_heap_no( rec_set_heap_no_old(
/*=============*/ /*================*/
rec_t* rec, /* in: physical record */ rec_t* rec, /* in: physical record */
ulint comp, /* in: nonzero=compact page format */
ulint heap_no)/* in: the heap number */ ulint heap_no)/* in: the heap number */
{ {
ut_ad(heap_no <= REC_MAX_HEAP_NO); rec_set_bit_field_2(rec, heap_no, REC_OLD_HEAP_NO,
rec_set_bit_field_2(rec, heap_no,
comp ? REC_NEW_HEAP_NO : REC_OLD_HEAP_NO,
REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT); REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT);
} }
/**********************************************************
The following function is used to get the order number
of a new-style record in the heap of the index page. */
UNIV_INLINE
ulint
rec_get_heap_no_new(
/*================*/
/* out: heap order number */
rec_t* rec) /* in: physical record */
{
return(rec_get_bit_field_2(rec, REC_NEW_HEAP_NO,
REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT));
}
/**********************************************************
The following function is used to set the heap number
field in a new-style record. */
UNIV_INLINE
void
rec_set_heap_no_new(
/*================*/
rec_t* rec, /* in/out: physical record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 6 bytes available, or NULL */
ulint heap_no)/* in: the heap number */
{
rec_set_bit_field_2(rec, heap_no, REC_NEW_HEAP_NO,
REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT);
if (UNIV_LIKELY_NULL(page_zip)) {
page_zip_write(page_zip, rec - REC_NEW_HEAP_NO, 2);
}
}
/********************************************************** /**********************************************************
The following function is used to test whether the data offsets in the record The following function is used to test whether the data offsets in the record
are stored in one-byte or two-byte format. */ are stored in one-byte or two-byte format. */
@ -1006,7 +1141,7 @@ rec_offs_any_extern(
} }
/*************************************************************** /***************************************************************
Sets the value of the ith field extern storage bit. */ Sets the ith field extern storage bit. */
UNIV_INLINE UNIV_INLINE
void void
rec_set_nth_field_extern_bit( rec_set_nth_field_extern_bit(
@ -1014,15 +1149,14 @@ rec_set_nth_field_extern_bit(
rec_t* rec, /* in: record */ rec_t* rec, /* in: record */
dict_index_t* index, /* in: record descriptor */ dict_index_t* index, /* in: record descriptor */
ulint i, /* in: ith field */ ulint i, /* in: ith field */
ibool val, /* in: value to set */
mtr_t* mtr) /* in: mtr holding an X-latch to the page mtr_t* mtr) /* in: mtr holding an X-latch to the page
where rec is, or NULL; in the NULL case where rec is, or NULL; in the NULL case
we do not write to log about the change */ we do not write to log about the change */
{ {
if (UNIV_LIKELY(index->table->comp)) { if (UNIV_LIKELY(index->table->comp)) {
rec_set_nth_field_extern_bit_new(rec, index, i, val, mtr); rec_set_nth_field_extern_bit_new(rec, index, i, mtr);
} else { } else {
rec_set_nth_field_extern_bit_old(rec, i, val, mtr); rec_set_nth_field_extern_bit_old(rec, i, mtr);
} }
} }

View file

@ -45,7 +45,9 @@ UNIV_INLINE
void void
row_set_rec_trx_id( row_set_rec_trx_id(
/*===============*/ /*===============*/
rec_t* rec, /* in: record */ rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 10 bytes available,, or NULL */
dict_index_t* index, /* in: clustered index */ dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
dulint trx_id);/* in: value of the field */ dulint trx_id);/* in: value of the field */
@ -55,7 +57,9 @@ UNIV_INLINE
void void
row_set_rec_roll_ptr( row_set_rec_roll_ptr(
/*=================*/ /*=================*/
rec_t* rec, /* in: record */ rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 11 bytes available, or NULL */
dict_index_t* index, /* in: clustered index */ dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
dulint roll_ptr);/* in: value of the field */ dulint roll_ptr);/* in: value of the field */

View file

@ -29,9 +29,10 @@ is slower than the specialized inline functions. */
void void
row_set_rec_sys_field( row_set_rec_sys_field(
/*==================*/ /*==================*/
/* out: value of the field */
ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */ ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
rec_t* rec, /* in: record */ rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
10 or 11 bytes available, or NULL */
dict_index_t* index, /* in: clustered index */ dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
dulint val); /* in: value to set */ dulint val); /* in: value to set */
@ -94,7 +95,9 @@ UNIV_INLINE
void void
row_set_rec_trx_id( row_set_rec_trx_id(
/*===============*/ /*===============*/
rec_t* rec, /* in: record */ rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 10 bytes available, or NULL */
dict_index_t* index, /* in: clustered index */ dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
dulint trx_id) /* in: value of the field */ dulint trx_id) /* in: value of the field */
@ -107,10 +110,10 @@ row_set_rec_trx_id(
offset = index->trx_id_offset; offset = index->trx_id_offset;
if (offset) { if (offset) {
trx_write_trx_id(rec + offset, trx_id); trx_write_trx_id(rec + offset, page_zip, trx_id);
} else { } else {
row_set_rec_sys_field(DATA_TRX_ID, row_set_rec_sys_field(DATA_TRX_ID,
rec, index, offsets, trx_id); rec, page_zip, index, offsets, trx_id);
} }
} }
@ -120,7 +123,9 @@ UNIV_INLINE
void void
row_set_rec_roll_ptr( row_set_rec_roll_ptr(
/*=================*/ /*=================*/
rec_t* rec, /* in: record */ rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 11 bytes available, or NULL */
dict_index_t* index, /* in: clustered index */ dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
dulint roll_ptr)/* in: value of the field */ dulint roll_ptr)/* in: value of the field */
@ -133,10 +138,11 @@ row_set_rec_roll_ptr(
offset = index->trx_id_offset; offset = index->trx_id_offset;
if (offset) { if (offset) {
trx_write_roll_ptr(rec + offset + DATA_TRX_ID_LEN, roll_ptr); trx_write_roll_ptr(rec + offset + DATA_TRX_ID_LEN,
page_zip, roll_ptr);
} else { } else {
row_set_rec_sys_field(DATA_ROLL_PTR, row_set_rec_sys_field(DATA_ROLL_PTR,
rec, index, offsets, roll_ptr); rec, page_zip, index, offsets, roll_ptr);
} }
} }

View file

@ -78,7 +78,9 @@ UNIV_INLINE
void void
row_upd_rec_sys_fields( row_upd_rec_sys_fields(
/*===================*/ /*===================*/
rec_t* rec, /* in: record */ rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 21 bytes available, or NULL */
dict_index_t* index, /* in: clustered index */ dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
trx_t* trx, /* in: transaction */ trx_t* trx, /* in: transaction */
@ -276,7 +278,8 @@ recovery. */
void void
row_upd_rec_sys_fields_in_recovery( row_upd_rec_sys_fields_in_recovery(
/*===============================*/ /*===============================*/
rec_t* rec, /* in: record */ rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
const ulint* offsets,/* in: array returned by rec_get_offsets() */ const ulint* offsets,/* in: array returned by rec_get_offsets() */
ulint pos, /* in: TRX_ID position in rec */ ulint pos, /* in: TRX_ID position in rec */
dulint trx_id, /* in: transaction id */ dulint trx_id, /* in: transaction id */

View file

@ -11,6 +11,7 @@ Created 12/27/1996 Heikki Tuuri
#include "trx0undo.h" #include "trx0undo.h"
#include "row0row.h" #include "row0row.h"
#include "btr0sea.h" #include "btr0sea.h"
#include "page0zip.h"
/************************************************************************* /*************************************************************************
Creates an update vector object. */ Creates an update vector object. */
@ -104,7 +105,9 @@ UNIV_INLINE
void void
row_upd_rec_sys_fields( row_upd_rec_sys_fields(
/*===================*/ /*===================*/
rec_t* rec, /* in: record */ rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 21 bytes available, or NULL */
dict_index_t* index, /* in: clustered index */ dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
trx_t* trx, /* in: transaction */ trx_t* trx, /* in: transaction */
@ -116,7 +119,8 @@ row_upd_rec_sys_fields(
ut_ad(!buf_block_align(rec)->is_hashed ut_ad(!buf_block_align(rec)->is_hashed
|| rw_lock_own(&btr_search_latch, RW_LOCK_EX)); || rw_lock_own(&btr_search_latch, RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */ #endif /* UNIV_SYNC_DEBUG */
ut_ad(!page_zip || page_zip_available(page_zip, 21));
row_set_rec_trx_id(rec, index, offsets, trx->id); row_set_rec_trx_id(rec, page_zip, index, offsets, trx->id);
row_set_rec_roll_ptr(rec, index, offsets, roll_ptr); row_set_rec_roll_ptr(rec, page_zip, index, offsets, roll_ptr);
} }

View file

@ -23,6 +23,7 @@ Created 3/26/1996 Heikki Tuuri
#include "fut0lst.h" #include "fut0lst.h"
#include "fsp0fsp.h" #include "fsp0fsp.h"
#include "read0types.h" #include "read0types.h"
#include "page0types.h"
/* In a MySQL replication slave, in crash recovery we store the master log /* In a MySQL replication slave, in crash recovery we store the master log
file name and position here. We have successfully got the updates to InnoDB file name and position here. We have successfully got the updates to InnoDB
@ -210,8 +211,10 @@ UNIV_INLINE
void void
trx_write_trx_id( trx_write_trx_id(
/*=============*/ /*=============*/
byte* ptr, /* in: pointer to memory where written */ byte* ptr, /* in: pointer to memory where written */
dulint id); /* in: id */ page_zip_des_t* page_zip,/* in/out: compressed page with
at least 10 bytes available, or NULL */
dulint id); /* in: id */
/********************************************************************* /*********************************************************************
Reads a trx id from an index page. In case that the id size changes in Reads a trx id from an index page. In case that the id size changes in
some future version, this function should be used instead of some future version, this function should be used instead of

View file

@ -9,6 +9,7 @@ Created 3/26/1996 Heikki Tuuri
#include "srv0srv.h" #include "srv0srv.h"
#include "trx0trx.h" #include "trx0trx.h"
#include "data0type.h" #include "data0type.h"
#include "page0zip.h"
/* The typedef for rseg slot in the file copy */ /* The typedef for rseg slot in the file copy */
typedef byte trx_sysf_rseg_t; typedef byte trx_sysf_rseg_t;
@ -213,12 +214,18 @@ UNIV_INLINE
void void
trx_write_trx_id( trx_write_trx_id(
/*=============*/ /*=============*/
byte* ptr, /* in: pointer to memory where written */ byte* ptr, /* in: pointer to memory where written */
dulint id) /* in: id */ page_zip_des_t* page_zip,/* in/out: compressed page with
at least 10 bytes available, or NULL */
dulint id) /* in: id */
{ {
ut_ad(DATA_TRX_ID_LEN == 6); ut_ad(DATA_TRX_ID_LEN == 6);
mach_write_to_6(ptr, id); mach_write_to_6(ptr, id);
if (UNIV_LIKELY_NULL(page_zip)) {
ut_ad(page_zip_available(page_zip, 4 + DATA_TRX_ID_LEN));
page_zip_write(page_zip, ptr, DATA_TRX_ID_LEN);
}
} }
/********************************************************************* /*********************************************************************

View file

@ -55,6 +55,8 @@ void
trx_write_roll_ptr( trx_write_roll_ptr(
/*===============*/ /*===============*/
byte* ptr, /* in: pointer to memory where written */ byte* ptr, /* in: pointer to memory where written */
page_zip_des_t* page_zip,/* in/out: compressed page with
at least 11 bytes available, or NULL */
dulint roll_ptr); /* in: roll ptr */ dulint roll_ptr); /* in: roll ptr */
/********************************************************************* /*********************************************************************
Reads a roll ptr from an index page. In case that the roll ptr size Reads a roll ptr from an index page. In case that the roll ptr size

View file

@ -7,6 +7,7 @@ Created 3/26/1996 Heikki Tuuri
*******************************************************/ *******************************************************/
#include "data0type.h" #include "data0type.h"
#include "page0zip.h"
/*************************************************************************** /***************************************************************************
Builds a roll pointer dulint. */ Builds a roll pointer dulint. */
@ -87,12 +88,18 @@ UNIV_INLINE
void void
trx_write_roll_ptr( trx_write_roll_ptr(
/*===============*/ /*===============*/
byte* ptr, /* in: pointer to memory where written */ byte* ptr, /* in: pointer to memory where written */
dulint roll_ptr) /* in: roll ptr */ page_zip_des_t* page_zip,/* in/out: compressed page with
at least 11 bytes available, or NULL */
dulint roll_ptr)/* in: roll ptr */
{ {
ut_ad(DATA_ROLL_PTR_LEN == 7); ut_ad(DATA_ROLL_PTR_LEN == 7);
mach_write_to_7(ptr, roll_ptr); mach_write_to_7(ptr, roll_ptr);
if (UNIV_LIKELY_NULL(page_zip)) {
ut_ad(page_zip_available(page_zip, 4 + DATA_ROLL_PTR_LEN));
page_zip_write(page_zip, ptr, DATA_ROLL_PTR_LEN);
}
} }
/********************************************************************* /*********************************************************************

View file

@ -1289,25 +1289,17 @@ lock_t*
lock_rec_get_next( lock_rec_get_next(
/*==============*/ /*==============*/
/* out: next lock, NULL if none exists */ /* out: next lock, NULL if none exists */
rec_t* rec, /* in: record on a page */ ulint heap_no,/* in: heap number of the record */
lock_t* lock) /* in: lock */ lock_t* lock) /* in: lock */
{ {
#ifdef UNIV_SYNC_DEBUG #ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&kernel_mutex)); ut_ad(mutex_own(&kernel_mutex));
#endif /* UNIV_SYNC_DEBUG */ #endif /* UNIV_SYNC_DEBUG */
ut_ad(lock_get_type(lock) == LOCK_REC);
if (page_rec_is_comp(rec)) { do {
do { ut_ad(lock_get_type(lock) == LOCK_REC);
lock = lock_rec_get_next_on_page(lock); lock = lock_rec_get_next_on_page(lock);
} while (lock && !lock_rec_get_nth_bit(lock, } while (lock && !lock_rec_get_nth_bit(lock, heap_no));
rec_get_heap_no(rec, TRUE)));
} else {
do {
lock = lock_rec_get_next_on_page(lock);
} while (lock && !lock_rec_get_nth_bit(lock,
rec_get_heap_no(rec, FALSE)));
}
return(lock); return(lock);
} }
@ -1319,7 +1311,8 @@ lock_t*
lock_rec_get_first( lock_rec_get_first(
/*===============*/ /*===============*/
/* out: first lock, NULL if none exists */ /* out: first lock, NULL if none exists */
rec_t* rec) /* in: record on a page */ rec_t* rec, /* in: record on a page */
ulint heap_no)/* in: heap number of the record */
{ {
lock_t* lock; lock_t* lock;
@ -1329,8 +1322,6 @@ lock_rec_get_first(
lock = lock_rec_get_first_on_page(rec); lock = lock_rec_get_first_on_page(rec);
if (UNIV_LIKELY_NULL(lock)) { if (UNIV_LIKELY_NULL(lock)) {
ulint heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec));
while (lock && !lock_rec_get_nth_bit(lock, heap_no)) { while (lock && !lock_rec_get_nth_bit(lock, heap_no)) {
lock = lock_rec_get_next_on_page(lock); lock = lock_rec_get_next_on_page(lock);
} }
@ -1495,6 +1486,7 @@ lock_rec_has_expl(
for a supremum record we regard this always a gap for a supremum record we regard this always a gap
type request */ type request */
rec_t* rec, /* in: record */ rec_t* rec, /* in: record */
ulint heap_no,/* in: heap number of the record */
trx_t* trx) /* in: transaction */ trx_t* trx) /* in: transaction */
{ {
lock_t* lock; lock_t* lock;
@ -1506,7 +1498,7 @@ lock_rec_has_expl(
|| (precise_mode & LOCK_MODE_MASK) == LOCK_X); || (precise_mode & LOCK_MODE_MASK) == LOCK_X);
ut_ad(!(precise_mode & LOCK_INSERT_INTENTION)); ut_ad(!(precise_mode & LOCK_INSERT_INTENTION));
lock = lock_rec_get_first(rec); lock = lock_rec_get_first(rec, heap_no);
while (lock) { while (lock) {
if (lock->trx == trx if (lock->trx == trx
@ -1524,7 +1516,7 @@ lock_rec_has_expl(
return(lock); return(lock);
} }
lock = lock_rec_get_next(rec, lock); lock = lock_rec_get_next(heap_no, lock);
} }
return(NULL); return(NULL);
@ -1543,6 +1535,7 @@ lock_rec_other_has_expl_req(
ulint wait, /* in: LOCK_WAIT if also waiting locks are ulint wait, /* in: LOCK_WAIT if also waiting locks are
taken into account, or 0 if not */ taken into account, or 0 if not */
rec_t* rec, /* in: record to look at */ rec_t* rec, /* in: record to look at */
ulint heap_no,/* in: heap number of hte record */
trx_t* trx) /* in: transaction, or NULL if requests by all trx_t* trx) /* in: transaction, or NULL if requests by all
transactions are taken into account */ transactions are taken into account */
{ {
@ -1555,7 +1548,7 @@ lock_rec_other_has_expl_req(
ut_ad(gap == 0 || gap == LOCK_GAP); ut_ad(gap == 0 || gap == LOCK_GAP);
ut_ad(wait == 0 || wait == LOCK_WAIT); ut_ad(wait == 0 || wait == LOCK_WAIT);
lock = lock_rec_get_first(rec); lock = lock_rec_get_first(rec, heap_no);
while (lock) { while (lock) {
if (lock->trx != trx if (lock->trx != trx
@ -1567,7 +1560,7 @@ lock_rec_other_has_expl_req(
return(lock); return(lock);
} }
lock = lock_rec_get_next(rec, lock); lock = lock_rec_get_next(heap_no, lock);
} }
return(NULL); return(NULL);
@ -1585,6 +1578,7 @@ lock_rec_other_has_conflicting(
possibly ORed to LOCK_GAP or LOC_REC_NOT_GAP, possibly ORed to LOCK_GAP or LOC_REC_NOT_GAP,
LOCK_INSERT_INTENTION */ LOCK_INSERT_INTENTION */
rec_t* rec, /* in: record to look at */ rec_t* rec, /* in: record to look at */
ulint heap_no,/* in: heap number of the record */
trx_t* trx) /* in: our transaction */ trx_t* trx) /* in: our transaction */
{ {
lock_t* lock; lock_t* lock;
@ -1592,16 +1586,30 @@ lock_rec_other_has_conflicting(
ut_ad(mutex_own(&kernel_mutex)); ut_ad(mutex_own(&kernel_mutex));
#endif /* UNIV_SYNC_DEBUG */ #endif /* UNIV_SYNC_DEBUG */
lock = lock_rec_get_first(rec); lock = lock_rec_get_first(rec, heap_no);
while (lock) { if (UNIV_LIKELY_NULL(lock)) {
if (lock_rec_has_to_wait(trx, mode, lock, if (page_rec_is_supremum(rec)) {
page_rec_is_supremum(rec))) {
return(lock); do {
if (lock_rec_has_to_wait(trx, mode, lock,
TRUE)) {
return(lock);
}
lock = lock_rec_get_next(heap_no, lock);
} while (lock);
} else {
do {
if (lock_rec_has_to_wait(trx, mode, lock,
FALSE)) {
return(lock);
}
lock = lock_rec_get_next(heap_no, lock);
} while (lock);
} }
lock = lock_rec_get_next(rec, lock);
} }
return(NULL); return(NULL);
@ -1617,19 +1625,14 @@ lock_rec_find_similar_on_page(
/*==========================*/ /*==========================*/
/* out: lock or NULL */ /* out: lock or NULL */
ulint type_mode, /* in: lock type_mode field */ ulint type_mode, /* in: lock type_mode field */
rec_t* rec, /* in: record */ ulint heap_no, /* in: heap number of the record */
lock_t* lock, /* in: lock_rec_get_first_on_page() */
trx_t* trx) /* in: transaction */ trx_t* trx) /* in: transaction */
{ {
lock_t* lock;
ulint heap_no;
#ifdef UNIV_SYNC_DEBUG #ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&kernel_mutex)); ut_ad(mutex_own(&kernel_mutex));
#endif /* UNIV_SYNC_DEBUG */ #endif /* UNIV_SYNC_DEBUG */
heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec));
lock = lock_rec_get_first_on_page(rec);
while (lock != NULL) { while (lock != NULL) {
if (lock->trx == trx if (lock->trx == trx
&& lock->type_mode == type_mode && lock->type_mode == type_mode
@ -1709,13 +1712,13 @@ lock_rec_create(
ulint type_mode,/* in: lock mode and wait flag, type is ulint type_mode,/* in: lock mode and wait flag, type is
ignored and replaced by LOCK_REC */ ignored and replaced by LOCK_REC */
rec_t* rec, /* in: record on page */ rec_t* rec, /* in: record on page */
ulint heap_no,/* in: heap number of the record */
dict_index_t* index, /* in: index of record */ dict_index_t* index, /* in: index of record */
trx_t* trx) /* in: transaction */ trx_t* trx) /* in: transaction */
{ {
page_t* page; page_t* page;
lock_t* lock; lock_t* lock;
ulint page_no; ulint page_no;
ulint heap_no;
ulint space; ulint space;
ulint n_bits; ulint n_bits;
ulint n_bytes; ulint n_bytes;
@ -1727,9 +1730,8 @@ lock_rec_create(
page = buf_frame_align(rec); page = buf_frame_align(rec);
space = buf_frame_get_space_id(page); space = buf_frame_get_space_id(page);
page_no = buf_frame_get_page_no(page); page_no = buf_frame_get_page_no(page);
heap_no = rec_get_heap_no(rec, page_is_comp(page));
ut_ad(!!page_is_comp(page) == index->table->comp); ut_ad((ibool) !!page_is_comp(page) == index->table->comp);
/* If rec is the supremum record, then we reset the gap and /* If rec is the supremum record, then we reset the gap and
LOCK_REC_NOT_GAP bits, as all locks on the supremum are LOCK_REC_NOT_GAP bits, as all locks on the supremum are
@ -1806,6 +1808,7 @@ lock_rec_enqueue_waiting(
{ {
lock_t* lock; lock_t* lock;
trx_t* trx; trx_t* trx;
ulint heap_no;
#ifdef UNIV_SYNC_DEBUG #ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&kernel_mutex)); ut_ad(mutex_own(&kernel_mutex));
@ -1815,7 +1818,7 @@ lock_rec_enqueue_waiting(
we do not enqueue a lock request if the query thread should be we do not enqueue a lock request if the query thread should be
stopped anyway */ stopped anyway */
if (que_thr_stop(thr)) { if (UNIV_UNLIKELY(que_thr_stop(thr))) {
ut_error; ut_error;
@ -1824,7 +1827,7 @@ lock_rec_enqueue_waiting(
trx = thr_get_trx(thr); trx = thr_get_trx(thr);
if (trx->dict_operation) { if (UNIV_UNLIKELY(trx->dict_operation)) {
ut_print_timestamp(stderr); ut_print_timestamp(stderr);
fputs( fputs(
" InnoDB: Error: a record lock wait happens in a dictionary operation!\n" " InnoDB: Error: a record lock wait happens in a dictionary operation!\n"
@ -1835,17 +1838,23 @@ lock_rec_enqueue_waiting(
stderr); stderr);
} }
if (page_rec_is_comp(rec)) {
heap_no = rec_get_heap_no_new(rec);
} else {
heap_no = rec_get_heap_no_old(rec);
}
/* Enqueue the lock request that will wait to be granted */ /* Enqueue the lock request that will wait to be granted */
lock = lock_rec_create(type_mode | LOCK_WAIT, rec, index, trx); lock = lock_rec_create(type_mode | LOCK_WAIT, rec,
heap_no, index, trx);
/* Check if a deadlock occurs: if yes, remove the lock request and /* Check if a deadlock occurs: if yes, remove the lock request and
return an error code */ return an error code */
if (lock_deadlock_occurs(lock, trx)) { if (UNIV_UNLIKELY(lock_deadlock_occurs(lock, trx))) {
lock_reset_lock_and_trx_wait(lock); lock_reset_lock_and_trx_wait(lock);
lock_rec_reset_nth_bit(lock, rec_get_heap_no(rec, lock_rec_reset_nth_bit(lock, heap_no);
page_rec_is_comp(rec)));
return(DB_DEADLOCK); return(DB_DEADLOCK);
} }
@ -1891,25 +1900,24 @@ lock_rec_add_to_queue(
ulint type_mode,/* in: lock mode, wait, gap etc. flags; ulint type_mode,/* in: lock mode, wait, gap etc. flags;
type is ignored and replaced by LOCK_REC */ type is ignored and replaced by LOCK_REC */
rec_t* rec, /* in: record on page */ rec_t* rec, /* in: record on page */
ulint heap_no,/* in: heap number of the record */
dict_index_t* index, /* in: index of record */ dict_index_t* index, /* in: index of record */
trx_t* trx) /* in: transaction */ trx_t* trx) /* in: transaction */
{ {
lock_t* lock; lock_t* lock;
lock_t* similar_lock = NULL;
ulint heap_no;
ibool somebody_waits = FALSE;
#ifdef UNIV_SYNC_DEBUG #ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&kernel_mutex)); ut_ad(mutex_own(&kernel_mutex));
#endif /* UNIV_SYNC_DEBUG */ #endif /* UNIV_SYNC_DEBUG */
ut_ad((type_mode & (LOCK_WAIT | LOCK_GAP)) ut_ad((type_mode & (LOCK_WAIT | LOCK_GAP))
|| ((type_mode & LOCK_MODE_MASK) != LOCK_S) || ((type_mode & LOCK_MODE_MASK) != LOCK_S)
|| !lock_rec_other_has_expl_req(LOCK_X, 0, LOCK_WAIT, || !lock_rec_other_has_expl_req(LOCK_X, 0, LOCK_WAIT,
rec, trx)); rec, heap_no, trx));
ut_ad((type_mode & (LOCK_WAIT | LOCK_GAP)) ut_ad((type_mode & (LOCK_WAIT | LOCK_GAP))
|| ((type_mode & LOCK_MODE_MASK) != LOCK_X) || ((type_mode & LOCK_MODE_MASK) != LOCK_X)
|| !lock_rec_other_has_expl_req(LOCK_S, 0, LOCK_WAIT, || !lock_rec_other_has_expl_req(LOCK_S, 0, LOCK_WAIT,
rec, trx)); rec, heap_no, trx));
type_mode = type_mode | LOCK_REC; type_mode = type_mode | LOCK_REC;
@ -1929,32 +1937,37 @@ lock_rec_add_to_queue(
/* Look for a waiting lock request on the same record or on a gap */ /* Look for a waiting lock request on the same record or on a gap */
heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec));
lock = lock_rec_get_first_on_page(rec); lock = lock_rec_get_first_on_page(rec);
while (lock != NULL) { while (lock != NULL) {
if (lock_get_wait(lock) if (lock_get_wait(lock)
&& (lock_rec_get_nth_bit(lock, heap_no))) { && (lock_rec_get_nth_bit(lock, heap_no))) {
somebody_waits = TRUE; goto somebody_waits;
} }
lock = lock_rec_get_next_on_page(lock); lock = lock_rec_get_next_on_page(lock);
} }
/* Look for a similar record lock on the same page: if one is found if (!(type_mode & LOCK_WAIT)) {
and there are no waiting lock requests, we can just set the bit */
similar_lock = lock_rec_find_similar_on_page(type_mode, rec, trx); /* Look for a similar record lock on the same page:
if one is found and there are no waiting lock requests,
we can just set the bit */
if (similar_lock && !somebody_waits && !(type_mode & LOCK_WAIT)) { lock = lock_rec_find_similar_on_page(type_mode, heap_no,
lock_rec_get_first_on_page(rec), trx);
lock_rec_set_nth_bit(similar_lock, heap_no); if (lock) {
return(similar_lock); lock_rec_set_nth_bit(lock, heap_no);
return(lock);
}
} }
return(lock_rec_create(type_mode, rec, index, trx)); somebody_waits:
return(lock_rec_create(type_mode, rec, heap_no, index, trx));
} }
/************************************************************************* /*************************************************************************
@ -1975,11 +1988,11 @@ lock_rec_lock_fast(
ulint mode, /* in: lock mode: LOCK_X or LOCK_S possibly ulint mode, /* in: lock mode: LOCK_X or LOCK_S possibly
ORed to either LOCK_GAP or LOCK_REC_NOT_GAP */ ORed to either LOCK_GAP or LOCK_REC_NOT_GAP */
rec_t* rec, /* in: record */ rec_t* rec, /* in: record */
ulint heap_no,/* in: heap number of record */
dict_index_t* index, /* in: index of record */ dict_index_t* index, /* in: index of record */
que_thr_t* thr) /* in: query thread */ que_thr_t* thr) /* in: query thread */
{ {
lock_t* lock; lock_t* lock;
ulint heap_no;
trx_t* trx; trx_t* trx;
#ifdef UNIV_SYNC_DEBUG #ifdef UNIV_SYNC_DEBUG
@ -1995,15 +2008,13 @@ lock_rec_lock_fast(
|| mode - (LOCK_MODE_MASK & mode) == 0 || mode - (LOCK_MODE_MASK & mode) == 0
|| mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP); || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec));
lock = lock_rec_get_first_on_page(rec); lock = lock_rec_get_first_on_page(rec);
trx = thr_get_trx(thr); trx = thr_get_trx(thr);
if (lock == NULL) { if (lock == NULL) {
if (!impl) { if (!impl) {
lock_rec_create(mode, rec, index, trx); lock_rec_create(mode, rec, heap_no, index, trx);
if (srv_locks_unsafe_for_binlog) { if (srv_locks_unsafe_for_binlog) {
trx_register_new_rec_lock(trx, index); trx_register_new_rec_lock(trx, index);
@ -2057,6 +2068,7 @@ lock_rec_lock_slow(
ulint mode, /* in: lock mode: LOCK_X or LOCK_S possibly ulint mode, /* in: lock mode: LOCK_X or LOCK_S possibly
ORed to either LOCK_GAP or LOCK_REC_NOT_GAP */ ORed to either LOCK_GAP or LOCK_REC_NOT_GAP */
rec_t* rec, /* in: record */ rec_t* rec, /* in: record */
ulint heap_no,/* in: heap number of record */
dict_index_t* index, /* in: index of record */ dict_index_t* index, /* in: index of record */
que_thr_t* thr) /* in: query thread */ que_thr_t* thr) /* in: query thread */
{ {
@ -2078,12 +2090,12 @@ lock_rec_lock_slow(
trx = thr_get_trx(thr); trx = thr_get_trx(thr);
if (lock_rec_has_expl(mode, rec, trx)) { if (lock_rec_has_expl(mode, rec, heap_no, trx)) {
/* The trx already has a strong enough lock on rec: do /* The trx already has a strong enough lock on rec: do
nothing */ nothing */
err = DB_SUCCESS; err = DB_SUCCESS;
} else if (lock_rec_other_has_conflicting(mode, rec, trx)) { } else if (lock_rec_other_has_conflicting(mode, rec, heap_no, trx)) {
/* If another transaction has a non-gap conflicting request in /* If another transaction has a non-gap conflicting request in
the queue, as this transaction does not have a lock strong the queue, as this transaction does not have a lock strong
@ -2098,8 +2110,8 @@ lock_rec_lock_slow(
if (!impl) { if (!impl) {
/* Set the requested lock on the record */ /* Set the requested lock on the record */
lock_rec_add_to_queue(LOCK_REC | mode, rec, index, lock_rec_add_to_queue(LOCK_REC | mode, rec, heap_no,
trx); index, trx);
if (srv_locks_unsafe_for_binlog) { if (srv_locks_unsafe_for_binlog) {
trx_register_new_rec_lock(trx, index); trx_register_new_rec_lock(trx, index);
} }
@ -2133,6 +2145,7 @@ lock_rec_lock(
que_thr_t* thr) /* in: query thread */ que_thr_t* thr) /* in: query thread */
{ {
ulint err; ulint err;
ulint heap_no;
#ifdef UNIV_SYNC_DEBUG #ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&kernel_mutex)); ut_ad(mutex_own(&kernel_mutex));
@ -2147,14 +2160,20 @@ lock_rec_lock(
|| mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP
|| mode - (LOCK_MODE_MASK & mode) == 0); || mode - (LOCK_MODE_MASK & mode) == 0);
if (lock_rec_lock_fast(impl, mode, rec, index, thr)) { if (page_rec_is_comp(rec)) {
heap_no = rec_get_heap_no_new(rec);
} else {
heap_no = rec_get_heap_no_old(rec);
}
if (lock_rec_lock_fast(impl, mode, rec, heap_no, index, thr)) {
/* We try a simplified and faster subroutine for the most /* We try a simplified and faster subroutine for the most
common cases */ common cases */
err = DB_SUCCESS; err = DB_SUCCESS;
} else { } else {
err = lock_rec_lock_slow(impl, mode, rec, index, thr); err = lock_rec_lock_slow(impl, mode, rec, heap_no, index, thr);
} }
return(err); return(err);
@ -2405,9 +2424,13 @@ lock_rec_reset_and_release_wait(
ut_ad(mutex_own(&kernel_mutex)); ut_ad(mutex_own(&kernel_mutex));
#endif /* UNIV_SYNC_DEBUG */ #endif /* UNIV_SYNC_DEBUG */
heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec)); if (page_rec_is_comp(rec)) {
heap_no = rec_get_heap_no_new(rec);
} else {
heap_no = rec_get_heap_no_old(rec);
}
lock = lock_rec_get_first(rec); lock = lock_rec_get_first(rec, heap_no);
while (lock != NULL) { while (lock != NULL) {
if (lock_get_wait(lock)) { if (lock_get_wait(lock)) {
@ -2416,7 +2439,7 @@ lock_rec_reset_and_release_wait(
lock_rec_reset_nth_bit(lock, heap_no); lock_rec_reset_nth_bit(lock, heap_no);
} }
lock = lock_rec_get_next(rec, lock); lock = lock_rec_get_next(heap_no, lock);
} }
} }
@ -2434,11 +2457,21 @@ lock_rec_inherit_to_gap(
the locks on this record */ the locks on this record */
{ {
lock_t* lock; lock_t* lock;
ulint heir_heap_no;
ulint heap_no;
#ifdef UNIV_SYNC_DEBUG #ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&kernel_mutex)); ut_ad(mutex_own(&kernel_mutex));
#endif /* UNIV_SYNC_DEBUG */ #endif /* UNIV_SYNC_DEBUG */
lock = lock_rec_get_first(rec); if (page_rec_is_comp(rec)) {
heir_heap_no = rec_get_heap_no_new(heir);
heap_no = rec_get_heap_no_new(rec);
} else {
heir_heap_no = rec_get_heap_no_old(heir);
heap_no = rec_get_heap_no_old(rec);
}
lock = lock_rec_get_first(rec, heap_no);
/* If srv_locks_unsafe_for_binlog is TRUE, we do not want locks set /* If srv_locks_unsafe_for_binlog is TRUE, we do not want locks set
by an UPDATE or a DELETE to be inherited as gap type locks. But we by an UPDATE or a DELETE to be inherited as gap type locks. But we
@ -2452,10 +2485,11 @@ lock_rec_inherit_to_gap(
lock_rec_add_to_queue(LOCK_REC | lock_get_mode(lock) lock_rec_add_to_queue(LOCK_REC | lock_get_mode(lock)
| LOCK_GAP, | LOCK_GAP,
heir, lock->index, lock->trx); heir, heir_heap_no,
lock->index, lock->trx);
} }
lock = lock_rec_get_next(rec, lock); lock = lock_rec_get_next(heap_no, lock);
} }
} }
@ -2472,11 +2506,21 @@ lock_rec_inherit_to_gap_if_gap_lock(
the locks on this record */ the locks on this record */
{ {
lock_t* lock; lock_t* lock;
ulint heir_heap_no;
ulint heap_no;
#ifdef UNIV_SYNC_DEBUG #ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&kernel_mutex)); ut_ad(mutex_own(&kernel_mutex));
#endif /* UNIV_SYNC_DEBUG */ #endif /* UNIV_SYNC_DEBUG */
lock = lock_rec_get_first(rec); if (page_rec_is_comp(rec)) {
heir_heap_no = rec_get_heap_no_new(heir);
heap_no = rec_get_heap_no_new(rec);
} else {
heir_heap_no = rec_get_heap_no_old(heir);
heap_no = rec_get_heap_no_old(rec);
}
lock = lock_rec_get_first(rec, heap_no);
while (lock != NULL) { while (lock != NULL) {
if (!lock_rec_get_insert_intention(lock) if (!lock_rec_get_insert_intention(lock)
@ -2485,10 +2529,11 @@ lock_rec_inherit_to_gap_if_gap_lock(
lock_rec_add_to_queue(LOCK_REC | lock_get_mode(lock) lock_rec_add_to_queue(LOCK_REC | lock_get_mode(lock)
| LOCK_GAP, | LOCK_GAP,
heir, lock->index, lock->trx); heir, heir_heap_no,
lock->index, lock->trx);
} }
lock = lock_rec_get_next(rec, lock); lock = lock_rec_get_next(heap_no, lock);
} }
} }
@ -2505,37 +2550,44 @@ lock_rec_move(
ulint comp) /* in: nonzero=compact page format */ ulint comp) /* in: nonzero=compact page format */
{ {
lock_t* lock; lock_t* lock;
ulint heap_no; ulint receiver_heap_no;
ulint donator_heap_no;
ulint type_mode; ulint type_mode;
#ifdef UNIV_SYNC_DEBUG #ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&kernel_mutex)); ut_ad(mutex_own(&kernel_mutex));
#endif /* UNIV_SYNC_DEBUG */ #endif /* UNIV_SYNC_DEBUG */
heap_no = rec_get_heap_no(donator, comp); if (UNIV_LIKELY(comp)) {
receiver_heap_no = rec_get_heap_no_new(donator);
donator_heap_no = rec_get_heap_no_new(donator);
} else {
receiver_heap_no = rec_get_heap_no_old(donator);
donator_heap_no = rec_get_heap_no_old(donator);
}
lock = lock_rec_get_first(donator); lock = lock_rec_get_first(donator, donator_heap_no);
ut_ad(lock_rec_get_first(receiver) == NULL); ut_ad(lock_rec_get_first(receiver, receiver_heap_no) == NULL);
while (lock != NULL) { while (lock != NULL) {
type_mode = lock->type_mode; type_mode = lock->type_mode;
lock_rec_reset_nth_bit(lock, heap_no); lock_rec_reset_nth_bit(lock, donator_heap_no);
if (lock_get_wait(lock)) { if (UNIV_UNLIKELY(lock_get_wait(lock))) {
lock_reset_lock_and_trx_wait(lock); lock_reset_lock_and_trx_wait(lock);
} }
/* Note that we FIRST reset the bit, and then set the lock: /* Note that we FIRST reset the bit, and then set the lock:
the function works also if donator == receiver */ the function works also if donator == receiver */
lock_rec_add_to_queue(type_mode, receiver, lock->index, lock_rec_add_to_queue(type_mode, receiver, receiver_heap_no,
lock->trx); lock->index, lock->trx);
lock = lock_rec_get_next(donator, lock); lock = lock_rec_get_next(donator_heap_no, lock);
} }
ut_ad(lock_rec_get_first(donator) == NULL); ut_ad(lock_rec_get_first(donator, donator_heap_no) == NULL);
} }
/***************************************************************** /*****************************************************************
@ -2555,9 +2607,9 @@ lock_move_reorganize_page(
page_cur_t cur1; page_cur_t cur1;
page_cur_t cur2; page_cur_t cur2;
ulint old_heap_no; ulint old_heap_no;
ulint new_heap_no;
UT_LIST_BASE_NODE_T(lock_t) old_locks; UT_LIST_BASE_NODE_T(lock_t) old_locks;
mem_heap_t* heap = NULL; mem_heap_t* heap = NULL;
rec_t* sup;
ulint comp; ulint comp;
lock_mutex_enter_kernel(); lock_mutex_enter_kernel();
@ -2595,8 +2647,6 @@ lock_move_reorganize_page(
lock = lock_rec_get_next_on_page(lock); lock = lock_rec_get_next_on_page(lock);
} }
sup = page_get_supremum_rec(page);
lock = UT_LIST_GET_FIRST(old_locks); lock = UT_LIST_GET_FIRST(old_locks);
comp = page_is_comp(page); comp = page_is_comp(page);
@ -2617,8 +2667,17 @@ lock_move_reorganize_page(
page_cur_get_rec(&cur2), page_cur_get_rec(&cur2),
rec_get_data_size_old( rec_get_data_size_old(
page_cur_get_rec(&cur2)))); page_cur_get_rec(&cur2))));
old_heap_no = rec_get_heap_no(page_cur_get_rec(&cur2), if (UNIV_LIKELY(comp)) {
comp); old_heap_no = rec_get_heap_no_new(
page_cur_get_rec(&cur2));
new_heap_no = rec_get_heap_no_new(
page_cur_get_rec(&cur1));
} else {
old_heap_no = rec_get_heap_no_old(
page_cur_get_rec(&cur2));
new_heap_no = rec_get_heap_no_old(
page_cur_get_rec(&cur1));
}
if (lock_rec_get_nth_bit(lock, old_heap_no)) { if (lock_rec_get_nth_bit(lock, old_heap_no)) {
@ -2627,9 +2686,10 @@ lock_move_reorganize_page(
lock_rec_add_to_queue(lock->type_mode, lock_rec_add_to_queue(lock->type_mode,
page_cur_get_rec(&cur1), page_cur_get_rec(&cur1),
new_heap_no,
lock->index, lock->trx); lock->index, lock->trx);
/* if ((page_cur_get_rec(&cur1) == sup) /* if ((page_cur_is_after_last(&cur1))
&& lock_get_wait(lock)) { && lock_get_wait(lock)) {
fprintf(stderr, fprintf(stderr,
"---\n--\n!!!Lock reorg: supr type %lu\n", "---\n--\n!!!Lock reorg: supr type %lu\n",
@ -2637,7 +2697,7 @@ lock_move_reorganize_page(
} */ } */
} }
if (page_cur_get_rec(&cur1) == sup) { if (page_cur_is_after_last(&cur1)) {
break; break;
} }
@ -2675,9 +2735,8 @@ lock_move_rec_list_end(
page_cur_t cur1; page_cur_t cur1;
page_cur_t cur2; page_cur_t cur2;
ulint heap_no; ulint heap_no;
rec_t* sup;
ulint type_mode; ulint type_mode;
ulint comp; ut_ad(page_is_comp(page) == page_is_comp(new_page));
ut_ad(page == buf_frame_align(rec)); ut_ad(page == buf_frame_align(rec));
lock_mutex_enter_kernel(); lock_mutex_enter_kernel();
@ -2688,12 +2747,8 @@ lock_move_rec_list_end(
table to the end of the hash chain, and lock_rec_add_to_queue table to the end of the hash chain, and lock_rec_add_to_queue
does not reuse locks if there are waiters in the queue. */ does not reuse locks if there are waiters in the queue. */
sup = page_get_supremum_rec(page);
lock = lock_rec_get_first_on_page(page); lock = lock_rec_get_first_on_page(page);
comp = page_is_comp(page);
while (lock != NULL) { while (lock != NULL) {
page_cur_position(rec, &cur1); page_cur_position(rec, &cur1);
@ -2708,13 +2763,19 @@ lock_move_rec_list_end(
/* Copy lock requests on user records to new page and /* Copy lock requests on user records to new page and
reset the lock bits on the old */ reset the lock bits on the old */
while (page_cur_get_rec(&cur1) != sup) { while (!page_cur_is_after_last(&cur1)) {
ut_ad(comp || 0 == ut_memcmp(page_cur_get_rec(&cur1), ut_ad(page_is_comp(page)
|| 0 == ut_memcmp(page_cur_get_rec(&cur1),
page_cur_get_rec(&cur2), page_cur_get_rec(&cur2),
rec_get_data_size_old( rec_get_data_size_old(
page_cur_get_rec(&cur2)))); page_cur_get_rec(&cur2))));
heap_no = rec_get_heap_no(page_cur_get_rec(&cur1), if (page_is_comp(page)) {
comp); heap_no = rec_get_heap_no_new(
page_cur_get_rec(&cur1));
} else {
heap_no = rec_get_heap_no_old(
page_cur_get_rec(&cur1));
}
if (lock_rec_get_nth_bit(lock, heap_no)) { if (lock_rec_get_nth_bit(lock, heap_no)) {
type_mode = lock->type_mode; type_mode = lock->type_mode;
@ -2725,8 +2786,17 @@ lock_move_rec_list_end(
lock_reset_lock_and_trx_wait(lock); lock_reset_lock_and_trx_wait(lock);
} }
if (page_is_comp(page)) {
heap_no = rec_get_heap_no_new(
page_cur_get_rec(&cur2));
} else {
heap_no = rec_get_heap_no_old(
page_cur_get_rec(&cur2));
}
lock_rec_add_to_queue(type_mode, lock_rec_add_to_queue(type_mode,
page_cur_get_rec(&cur2), page_cur_get_rec(&cur2),
heap_no,
lock->index, lock->trx); lock->index, lock->trx);
} }
@ -2764,15 +2834,13 @@ lock_move_rec_list_start(
page_cur_t cur2; page_cur_t cur2;
ulint heap_no; ulint heap_no;
ulint type_mode; ulint type_mode;
ulint comp;
ut_a(new_page); ut_a(new_page);
lock_mutex_enter_kernel(); lock_mutex_enter_kernel();
lock = lock_rec_get_first_on_page(page); lock = lock_rec_get_first_on_page(page);
comp = page_is_comp(page); ut_ad(page_is_comp(page) == page_is_comp(new_page));
ut_ad(comp == page_is_comp(new_page));
ut_ad(page == buf_frame_align(rec)); ut_ad(page == buf_frame_align(rec));
while (lock != NULL) { while (lock != NULL) {
@ -2787,12 +2855,18 @@ lock_move_rec_list_start(
reset the lock bits on the old */ reset the lock bits on the old */
while (page_cur_get_rec(&cur1) != rec) { while (page_cur_get_rec(&cur1) != rec) {
ut_ad(comp || 0 == ut_memcmp(page_cur_get_rec(&cur1), ut_ad(page_is_comp(page)
|| 0 == ut_memcmp(page_cur_get_rec(&cur1),
page_cur_get_rec(&cur2), page_cur_get_rec(&cur2),
rec_get_data_size_old( rec_get_data_size_old(
page_cur_get_rec(&cur2)))); page_cur_get_rec(&cur2))));
heap_no = rec_get_heap_no(page_cur_get_rec(&cur1), if (page_is_comp(page)) {
comp); heap_no = rec_get_heap_no_new(
page_cur_get_rec(&cur1));
} else {
heap_no = rec_get_heap_no_old(
page_cur_get_rec(&cur1));
}
if (lock_rec_get_nth_bit(lock, heap_no)) { if (lock_rec_get_nth_bit(lock, heap_no)) {
type_mode = lock->type_mode; type_mode = lock->type_mode;
@ -2803,8 +2877,17 @@ lock_move_rec_list_start(
lock_reset_lock_and_trx_wait(lock); lock_reset_lock_and_trx_wait(lock);
} }
if (page_is_comp(page)) {
heap_no = rec_get_heap_no_new(
page_cur_get_rec(&cur2));
} else {
heap_no = rec_get_heap_no_old(
page_cur_get_rec(&cur2));
}
lock_rec_add_to_queue(type_mode, lock_rec_add_to_queue(type_mode,
page_cur_get_rec(&cur2), page_cur_get_rec(&cur2),
heap_no,
lock->index, lock->trx); lock->index, lock->trx);
} }
@ -2832,16 +2915,15 @@ lock_update_split_right(
page_t* right_page, /* in: right page */ page_t* right_page, /* in: right page */
page_t* left_page) /* in: left page */ page_t* left_page) /* in: left page */
{ {
ulint comp;
lock_mutex_enter_kernel(); lock_mutex_enter_kernel();
comp = page_is_comp(left_page); ut_ad(page_is_comp(left_page) == page_is_comp(right_page));
ut_ad(comp == page_is_comp(right_page));
/* Move the locks on the supremum of the left page to the supremum /* Move the locks on the supremum of the left page to the supremum
of the right page */ of the right page */
lock_rec_move(page_get_supremum_rec(right_page), lock_rec_move(page_get_supremum_rec(right_page),
page_get_supremum_rec(left_page), comp); page_get_supremum_rec(left_page),
page_is_comp(left_page));
/* Inherit the locks to the supremum of left page from the successor /* Inherit the locks to the supremum of left page from the successor
of the infimum on right page */ of the infimum on right page */
@ -3196,7 +3278,7 @@ retry:
goto retry; goto retry;
} }
if (ret == LOCK_VICTIM_IS_START) { if (UNIV_UNLIKELY(ret == LOCK_VICTIM_IS_START)) {
if (lock_get_type(lock) & LOCK_TABLE) { if (lock_get_type(lock) & LOCK_TABLE) {
table = lock->un_member.tab_lock.table; table = lock->un_member.tab_lock.table;
index = NULL; index = NULL;
@ -4403,6 +4485,7 @@ lock_rec_queue_validate(
{ {
trx_t* impl_trx; trx_t* impl_trx;
lock_t* lock; lock_t* lock;
ulint heap_no;
ut_a(rec); ut_a(rec);
ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(rec_offs_validate(rec, index, offsets));
@ -4410,9 +4493,15 @@ lock_rec_queue_validate(
lock_mutex_enter_kernel(); lock_mutex_enter_kernel();
if (page_rec_is_comp(rec)) {
heap_no = rec_get_heap_no_new(rec);
} else {
heap_no = rec_get_heap_no_old(rec);
}
if (!page_rec_is_user_rec(rec)) { if (!page_rec_is_user_rec(rec)) {
lock = lock_rec_get_first(rec); lock = lock_rec_get_first(rec, heap_no);
while (lock) { while (lock) {
ut_a(lock->trx->conc_state == TRX_ACTIVE ut_a(lock->trx->conc_state == TRX_ACTIVE
@ -4430,7 +4519,7 @@ lock_rec_queue_validate(
ut_a(lock->index == index); ut_a(lock->index == index);
} }
lock = lock_rec_get_next(rec, lock); lock = lock_rec_get_next(heap_no, lock);
} }
lock_mutex_exit_kernel(); lock_mutex_exit_kernel();
@ -4438,19 +4527,18 @@ lock_rec_queue_validate(
return(TRUE); return(TRUE);
} }
if (index && (index->type & DICT_CLUSTERED)) { if (!index);
else if (index->type & DICT_CLUSTERED) {
impl_trx = lock_clust_rec_some_has_impl(rec, index, offsets); impl_trx = lock_clust_rec_some_has_impl(rec, index, offsets);
if (impl_trx && lock_rec_other_has_expl_req(LOCK_S, 0, if (impl_trx && lock_rec_other_has_expl_req(LOCK_S, 0,
LOCK_WAIT, rec, impl_trx)) { LOCK_WAIT, rec, heap_no, impl_trx)) {
ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, rec, ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, rec,
impl_trx)); heap_no, impl_trx));
} }
} } else {
if (index && !(index->type & DICT_CLUSTERED)) {
/* The kernel mutex may get released temporarily in the /* The kernel mutex may get released temporarily in the
next function call: we have to release lock table mutex next function call: we have to release lock table mutex
@ -4460,14 +4548,14 @@ lock_rec_queue_validate(
rec, index, offsets); rec, index, offsets);
if (impl_trx && lock_rec_other_has_expl_req(LOCK_S, 0, if (impl_trx && lock_rec_other_has_expl_req(LOCK_S, 0,
LOCK_WAIT, rec, impl_trx)) { LOCK_WAIT, rec, heap_no, impl_trx)) {
ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
rec, impl_trx)); rec, heap_no, impl_trx));
} }
} }
lock = lock_rec_get_first(rec); lock = lock_rec_get_first(rec, heap_no);
while (lock) { while (lock) {
ut_a(lock->trx->conc_state == TRX_ACTIVE ut_a(lock->trx->conc_state == TRX_ACTIVE
@ -4488,15 +4576,15 @@ lock_rec_queue_validate(
} else { } else {
mode = LOCK_S; mode = LOCK_S;
} }
ut_a(!lock_rec_other_has_expl_req(mode, ut_a(!lock_rec_other_has_expl_req(mode, 0, 0,
0, 0, rec, lock->trx)); rec, heap_no, lock->trx));
} else if (lock_get_wait(lock) && !lock_rec_get_gap(lock)) { } else if (lock_get_wait(lock) && !lock_rec_get_gap(lock)) {
ut_a(lock_rec_has_to_wait_in_queue(lock)); ut_a(lock_rec_has_to_wait_in_queue(lock));
} }
lock = lock_rec_get_next(rec, lock); lock = lock_rec_get_next(heap_no, lock);
} }
lock_mutex_exit_kernel(); lock_mutex_exit_kernel();
@ -4704,6 +4792,7 @@ lock_rec_insert_check_and_lock(
trx_t* trx; trx_t* trx;
lock_t* lock; lock_t* lock;
ulint err; ulint err;
ulint next_rec_heap_no;
if (flags & BTR_NO_LOCKING_FLAG) { if (flags & BTR_NO_LOCKING_FLAG) {
@ -4715,15 +4804,19 @@ lock_rec_insert_check_and_lock(
trx = thr_get_trx(thr); trx = thr_get_trx(thr);
next_rec = page_rec_get_next(rec); next_rec = page_rec_get_next(rec);
*inherit = FALSE;
lock_mutex_enter_kernel(); lock_mutex_enter_kernel();
ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX)); ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
lock = lock_rec_get_first(next_rec); if (page_rec_is_comp(next_rec)) {
next_rec_heap_no = rec_get_heap_no_new(next_rec);
} else {
next_rec_heap_no = rec_get_heap_no_old(next_rec);
}
if (lock == NULL) { lock = lock_rec_get_first(next_rec, next_rec_heap_no);
if (UNIV_LIKELY(lock == NULL)) {
/* We optimize CPU time usage in the simplest case */ /* We optimize CPU time usage in the simplest case */
lock_mutex_exit_kernel(); lock_mutex_exit_kernel();
@ -4735,6 +4828,8 @@ lock_rec_insert_check_and_lock(
thr_get_trx(thr)->id); thr_get_trx(thr)->id);
} }
*inherit = FALSE;
return(DB_SUCCESS); return(DB_SUCCESS);
} }
@ -4751,7 +4846,8 @@ lock_rec_insert_check_and_lock(
on the successor, which produced an unnecessary deadlock. */ on the successor, which produced an unnecessary deadlock. */
if (lock_rec_other_has_conflicting(LOCK_X | LOCK_GAP if (lock_rec_other_has_conflicting(LOCK_X | LOCK_GAP
| LOCK_INSERT_INTENTION, next_rec, trx)) { | LOCK_INSERT_INTENTION,
next_rec, next_rec_heap_no, trx)) {
/* Note that we may get DB_SUCCESS also here! */ /* Note that we may get DB_SUCCESS also here! */
err = lock_rec_enqueue_waiting(LOCK_X | LOCK_GAP err = lock_rec_enqueue_waiting(LOCK_X | LOCK_GAP
@ -4821,12 +4917,20 @@ lock_rec_convert_impl_to_expl(
/* If the transaction has no explicit x-lock set on the /* If the transaction has no explicit x-lock set on the
record, set one for it */ record, set one for it */
ulint heap_no;
if (page_rec_is_comp(rec)) {
heap_no = rec_get_heap_no_new(rec);
} else {
heap_no = rec_get_heap_no_old(rec);
}
if (!lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, rec, if (!lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, rec,
impl_trx)) { heap_no, impl_trx)) {
lock_rec_add_to_queue(LOCK_REC | LOCK_X lock_rec_add_to_queue(LOCK_REC | LOCK_X
| LOCK_REC_NOT_GAP, rec, index, | LOCK_REC_NOT_GAP, rec, heap_no,
impl_trx); index, impl_trx);
} }
} }
} }

View file

@ -22,6 +22,7 @@ Created 9/20/1997 Heikki Tuuri
#include "mtr0log.h" #include "mtr0log.h"
#include "page0page.h" #include "page0page.h"
#include "page0cur.h" #include "page0cur.h"
#include "page0zip.h"
#include "btr0btr.h" #include "btr0btr.h"
#include "btr0cur.h" #include "btr0cur.h"
#include "ibuf0ibuf.h" #include "ibuf0ibuf.h"
@ -753,9 +754,10 @@ recv_parse_or_apply_log_rec_body(
byte type, /* in: type */ byte type, /* in: type */
byte* ptr, /* in: pointer to a buffer */ byte* ptr, /* in: pointer to a buffer */
byte* end_ptr,/* in: pointer to the buffer end */ byte* end_ptr,/* in: pointer to the buffer end */
page_t* page, /* in: buffer page or NULL; if not NULL, then the log page_t* page, /* in/out: buffer page or NULL; if not NULL, then the
record is applied to the page, and the log record log record is applied to the page, and the log record
should be complete then */ should be complete then */
page_zip_des_t* page_zip,/* in/out: compressed page or NULL */
mtr_t* mtr) /* in: mtr or NULL; should be non-NULL if and only if mtr_t* mtr) /* in: mtr or NULL; should be non-NULL if and only if
page is non-NULL */ page is non-NULL */
{ {
@ -771,7 +773,7 @@ recv_parse_or_apply_log_rec_body(
ut_a(!page ut_a(!page
|| (ibool)!!page_is_comp(page)==index->table->comp); || (ibool)!!page_is_comp(page)==index->table->comp);
ptr = page_cur_parse_insert_rec(FALSE, ptr, end_ptr, ptr = page_cur_parse_insert_rec(FALSE, ptr, end_ptr,
index, page, mtr); index, page, page_zip, mtr);
} }
break; break;
case MLOG_REC_CLUST_DELETE_MARK: case MLOG_COMP_REC_CLUST_DELETE_MARK: case MLOG_REC_CLUST_DELETE_MARK: case MLOG_COMP_REC_CLUST_DELETE_MARK:
@ -780,7 +782,7 @@ recv_parse_or_apply_log_rec_body(
ut_a(!page ut_a(!page
|| (ibool)!!page_is_comp(page)==index->table->comp); || (ibool)!!page_is_comp(page)==index->table->comp);
ptr = btr_cur_parse_del_mark_set_clust_rec(ptr, ptr = btr_cur_parse_del_mark_set_clust_rec(ptr,
end_ptr, index, page); end_ptr, page, page_zip, index);
} }
break; break;
case MLOG_COMP_REC_SEC_DELETE_MARK: case MLOG_COMP_REC_SEC_DELETE_MARK:
@ -793,7 +795,8 @@ recv_parse_or_apply_log_rec_body(
} }
/* Fall through */ /* Fall through */
case MLOG_REC_SEC_DELETE_MARK: case MLOG_REC_SEC_DELETE_MARK:
ptr = btr_cur_parse_del_mark_set_sec_rec(ptr, end_ptr, page); ptr = btr_cur_parse_del_mark_set_sec_rec(ptr, end_ptr,
page, page_zip);
break; break;
case MLOG_REC_UPDATE_IN_PLACE: case MLOG_COMP_REC_UPDATE_IN_PLACE: case MLOG_REC_UPDATE_IN_PLACE: case MLOG_COMP_REC_UPDATE_IN_PLACE:
if (NULL != (ptr = mlog_parse_index(ptr, end_ptr, if (NULL != (ptr = mlog_parse_index(ptr, end_ptr,
@ -801,7 +804,7 @@ recv_parse_or_apply_log_rec_body(
ut_a(!page ut_a(!page
|| (ibool)!!page_is_comp(page)==index->table->comp); || (ibool)!!page_is_comp(page)==index->table->comp);
ptr = btr_cur_parse_update_in_place(ptr, end_ptr, ptr = btr_cur_parse_update_in_place(ptr, end_ptr,
page, index); page, page_zip, index);
} }
break; break;
case MLOG_LIST_END_DELETE: case MLOG_COMP_LIST_END_DELETE: case MLOG_LIST_END_DELETE: case MLOG_COMP_LIST_END_DELETE:
@ -821,7 +824,7 @@ recv_parse_or_apply_log_rec_body(
ut_a(!page ut_a(!page
|| (ibool)!!page_is_comp(page)==index->table->comp); || (ibool)!!page_is_comp(page)==index->table->comp);
ptr = page_parse_copy_rec_list_to_created_page(ptr, ptr = page_parse_copy_rec_list_to_created_page(ptr,
end_ptr, index, page, mtr); end_ptr, index, page, page_zip, mtr);
} }
break; break;
case MLOG_PAGE_REORGANIZE: case MLOG_COMP_PAGE_REORGANIZE: case MLOG_PAGE_REORGANIZE: case MLOG_COMP_PAGE_REORGANIZE:
@ -864,7 +867,7 @@ recv_parse_or_apply_log_rec_body(
ut_a(!page ut_a(!page
|| (ibool)!!page_is_comp(page)==index->table->comp); || (ibool)!!page_is_comp(page)==index->table->comp);
ptr = page_cur_parse_delete_rec(ptr, end_ptr, ptr = page_cur_parse_delete_rec(ptr, end_ptr,
index, page, mtr); index, page, page_zip, mtr);
} }
break; break;
case MLOG_IBUF_BITMAP_INIT: case MLOG_IBUF_BITMAP_INIT:
@ -882,6 +885,16 @@ recv_parse_or_apply_log_rec_body(
ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, FALSE, ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, FALSE,
ULINT_UNDEFINED); ULINT_UNDEFINED);
break; break;
case MLOG_COMP_DECOMPRESS:
if (page) {
ut_a(page_is_comp(page));
ut_a(page_zip);
if (UNIV_UNLIKELY(!page_zip_decompress(
page_zip, page, NULL))) {
ut_error;
}
}
break;
default: default:
ptr = NULL; ptr = NULL;
recv_sys->found_corrupt_log = TRUE; recv_sys->found_corrupt_log = TRUE;
@ -1089,6 +1102,7 @@ recv_recover_page(
ulint page_no) /* in: page number */ ulint page_no) /* in: page number */
{ {
buf_block_t* block = NULL; buf_block_t* block = NULL;
page_zip_des_t* page_zip = NULL;
recv_addr_t* recv_addr; recv_addr_t* recv_addr;
recv_t* recv; recv_t* recv;
byte* buf; byte* buf;
@ -1133,6 +1147,7 @@ recv_recover_page(
if (!recover_backup) { if (!recover_backup) {
block = buf_block_align(page); block = buf_block_align(page);
page_zip = buf_block_get_page_zip(block);
if (just_read_in) { if (just_read_in) {
/* Move the ownership of the x-latch on the page to this OS /* Move the ownership of the x-latch on the page to this OS
@ -1220,7 +1235,8 @@ recv_recover_page(
#endif /* UNIV_DEBUG */ #endif /* UNIV_DEBUG */
recv_parse_or_apply_log_rec_body(recv->type, buf, recv_parse_or_apply_log_rec_body(recv->type, buf,
buf + recv->len, page, &mtr); buf + recv->len,
page, page_zip, &mtr);
mach_write_to_8(page + UNIV_PAGE_SIZE mach_write_to_8(page + UNIV_PAGE_SIZE
- FIL_PAGE_END_LSN_OLD_CHKSUM, - FIL_PAGE_END_LSN_OLD_CHKSUM,
ut_dulint_add(recv->start_lsn, ut_dulint_add(recv->start_lsn,
@ -1613,8 +1629,8 @@ recv_update_replicate(
buf_page_dbg_add_level(replica, SYNC_NO_ORDER_CHECK); buf_page_dbg_add_level(replica, SYNC_NO_ORDER_CHECK);
#endif /* UNIV_SYNC_DEBUG */ #endif /* UNIV_SYNC_DEBUG */
ptr = recv_parse_or_apply_log_rec_body(type, body, end_ptr, replica, ptr = recv_parse_or_apply_log_rec_body(type, body, end_ptr,
&mtr); replica, NULL, &mtr);
ut_a(ptr == end_ptr); ut_a(ptr == end_ptr);
/* Notify the buffer manager that the page has been updated */ /* Notify the buffer manager that the page has been updated */
@ -1845,7 +1861,7 @@ recv_parse_log_rec(
} }
new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr, new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr,
NULL, NULL); NULL, NULL, NULL);
if (UNIV_UNLIKELY(new_ptr == NULL)) { if (UNIV_UNLIKELY(new_ptr == NULL)) {
return(0); return(0);

View file

@ -19,6 +19,6 @@ include ../include/Makefile.i
noinst_LIBRARIES = libpage.a noinst_LIBRARIES = libpage.a
libpage_a_SOURCES = page0page.c page0cur.c libpage_a_SOURCES = page0page.c page0cur.c page0zip.c
EXTRA_PROGRAMS = EXTRA_PROGRAMS =

View file

@ -11,6 +11,7 @@ Created 10/4/1994 Heikki Tuuri
#include "page0cur.ic" #include "page0cur.ic"
#endif #endif
#include "page0zip.h"
#include "rem0cmp.h" #include "rem0cmp.h"
#include "mtr0log.h" #include "mtr0log.h"
#include "log0recv.h" #include "log0recv.h"
@ -483,7 +484,7 @@ page_cur_open_on_rnd_user_rec(
ulint rnd; ulint rnd;
rec_t* rec; rec_t* rec;
if (page_get_n_recs(page) == 0) { if (UNIV_UNLIKELY(page_get_n_recs(page) == 0)) {
page_cur_position(page_get_infimum_rec(page), cursor); page_cur_position(page_get_infimum_rec(page), cursor);
return; return;
@ -522,19 +523,14 @@ page_cur_insert_rec_write_log(
ulint cur_rec_size; ulint cur_rec_size;
ulint extra_size; ulint extra_size;
ulint cur_extra_size; ulint cur_extra_size;
ulint min_rec_size; const byte* ins_ptr;
byte* ins_ptr;
byte* cur_ptr;
ulint extra_info_yes;
byte* log_ptr; byte* log_ptr;
byte* log_end; const byte* log_end;
ulint i; ulint i;
ulint comp;
ut_a(rec_size < UNIV_PAGE_SIZE); ut_a(rec_size < UNIV_PAGE_SIZE);
ut_ad(buf_frame_align(insert_rec) == buf_frame_align(cursor_rec)); ut_ad(buf_frame_align(insert_rec) == buf_frame_align(cursor_rec));
ut_ad(!page_rec_is_comp(insert_rec) == !index->table->comp); ut_ad(!page_rec_is_comp(insert_rec) == !index->table->comp);
comp = page_rec_is_comp(insert_rec);
{ {
mem_heap_t* heap = NULL; mem_heap_t* heap = NULL;
@ -567,45 +563,55 @@ page_cur_insert_rec_write_log(
i = 0; i = 0;
if (cur_extra_size == extra_size) { if (cur_extra_size == extra_size) {
min_rec_size = ut_min(cur_rec_size, rec_size); ulint min_rec_size = ut_min(cur_rec_size, rec_size);
cur_ptr = cursor_rec - cur_extra_size; const byte* cur_ptr = cursor_rec - cur_extra_size;
/* Find out the first byte in insert_rec which differs from /* Find out the first byte in insert_rec which differs from
cursor_rec; skip the bytes in the record info */ cursor_rec; skip the bytes in the record info */
for (;;) { do {
if (i >= min_rec_size) { if (*ins_ptr == *cur_ptr) {
break;
} else if (*ins_ptr == *cur_ptr) {
i++; i++;
ins_ptr++; ins_ptr++;
cur_ptr++; cur_ptr++;
} else if ((i < extra_size) } else if ((i < extra_size)
&& (i >= extra_size - (comp && (i >= extra_size -
? REC_N_NEW_EXTRA_BYTES page_rec_get_base_extra_size(
: REC_N_OLD_EXTRA_BYTES))) { insert_rec))) {
i = extra_size; i = extra_size;
ins_ptr = insert_rec; ins_ptr = insert_rec;
cur_ptr = cursor_rec; cur_ptr = cursor_rec;
} else { } else {
break; break;
} }
} } while (i < min_rec_size);
} }
if (mtr_get_log_mode(mtr) != MTR_LOG_SHORT_INSERTS) { if (mtr_get_log_mode(mtr) != MTR_LOG_SHORT_INSERTS) {
log_ptr = mlog_open_and_write_index(mtr, insert_rec, index, if (page_rec_is_comp(insert_rec)) {
comp log_ptr = mlog_open_and_write_index(mtr, insert_rec,
? MLOG_COMP_REC_INSERT : MLOG_REC_INSERT, index, MLOG_COMP_REC_INSERT,
2 + 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN); 2 + 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN);
if (UNIV_UNLIKELY(!log_ptr)) {
/* Logging in mtr is switched off
during crash recovery: in that case
mlog_open returns NULL */
return;
}
} else {
log_ptr = mlog_open(mtr, 11
+ 2 + 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN);
if (UNIV_UNLIKELY(!log_ptr)) {
/* Logging in mtr is switched off
during crash recovery: in that case
mlog_open returns NULL */
return;
}
if (!log_ptr) { log_ptr = mlog_write_initial_log_record_fast(
/* Logging in mtr is switched off during crash insert_rec, MLOG_REC_INSERT, log_ptr, mtr);
recovery: in that case mlog_open returns NULL */
return;
} }
log_end = &log_ptr[2 + 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN]; log_end = &log_ptr[2 + 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN];
@ -623,24 +629,33 @@ page_cur_insert_rec_write_log(
log_end = &log_ptr[5 + 1 + 5 + 5 + MLOG_BUF_MARGIN]; log_end = &log_ptr[5 + 1 + 5 + 5 + MLOG_BUF_MARGIN];
} }
if ((rec_get_info_and_status_bits(insert_rec, comp) != if (page_rec_is_comp(insert_rec)) {
rec_get_info_and_status_bits(cursor_rec, comp)) if (UNIV_UNLIKELY
|| (extra_size != cur_extra_size) (rec_get_info_and_status_bits(insert_rec, TRUE) !=
|| (rec_size != cur_rec_size)) { rec_get_info_and_status_bits(cursor_rec, TRUE))) {
extra_info_yes = 1; goto need_extra_info;
}
} else { } else {
extra_info_yes = 0; if (UNIV_UNLIKELY
(rec_get_info_and_status_bits(insert_rec, FALSE) !=
rec_get_info_and_status_bits(cursor_rec, FALSE))) {
goto need_extra_info;
}
} }
/* Write the record end segment length and the extra info storage if (extra_size != cur_extra_size || rec_size != cur_rec_size) {
flag */ need_extra_info:
log_ptr += mach_write_compressed(log_ptr, 2 * (rec_size - i) /* Write the record end segment length
+ extra_info_yes); and the extra info storage flag */
if (extra_info_yes) { log_ptr += mach_write_compressed(log_ptr,
2 * (rec_size - i) + 1);
/* Write the info bits */ /* Write the info bits */
mach_write_to_1(log_ptr, mach_write_to_1(log_ptr,
rec_get_info_and_status_bits(insert_rec, comp)); rec_get_info_and_status_bits(insert_rec,
page_rec_is_comp(insert_rec)));
log_ptr++; log_ptr++;
/* Write the record origin offset */ /* Write the record origin offset */
@ -651,6 +666,10 @@ page_cur_insert_rec_write_log(
ut_a(i < UNIV_PAGE_SIZE); ut_a(i < UNIV_PAGE_SIZE);
ut_a(extra_size < UNIV_PAGE_SIZE); ut_a(extra_size < UNIV_PAGE_SIZE);
} else {
/* Write the record end segment length
and the extra info storage flag */
log_ptr += mach_write_compressed(log_ptr, 2 * (rec_size - i));
} }
/* Write to the log the inserted index record end segment which /* Write to the log the inserted index record end segment which
@ -679,10 +698,11 @@ page_cur_parse_insert_rec(
byte* ptr, /* in: buffer */ byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */ byte* end_ptr,/* in: buffer end */
dict_index_t* index, /* in: record descriptor */ dict_index_t* index, /* in: record descriptor */
page_t* page, /* in: page or NULL */ page_t* page, /* in/out: page or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
25 + rec_size bytes available, or NULL */
mtr_t* mtr) /* in: mtr or NULL */ mtr_t* mtr) /* in: mtr or NULL */
{ {
ulint extra_info_yes;
ulint offset = 0; /* remove warning */ ulint offset = 0; /* remove warning */
ulint origin_offset; ulint origin_offset;
ulint end_seg_len; ulint end_seg_len;
@ -725,16 +745,13 @@ page_cur_parse_insert_rec(
return(NULL); return(NULL);
} }
extra_info_yes = end_seg_len & 0x1UL; if (UNIV_UNLIKELY(end_seg_len >= UNIV_PAGE_SIZE << 1)) {
end_seg_len >>= 1;
if (end_seg_len >= UNIV_PAGE_SIZE) {
recv_sys->found_corrupt_log = TRUE; recv_sys->found_corrupt_log = TRUE;
return(NULL); return(NULL);
} }
if (extra_info_yes) { if (end_seg_len & 0x1UL) {
/* Read the info bits */ /* Read the info bits */
if (end_ptr < ptr + 1) { if (end_ptr < ptr + 1) {
@ -764,17 +781,18 @@ page_cur_parse_insert_rec(
ut_a(mismatch_index < UNIV_PAGE_SIZE); ut_a(mismatch_index < UNIV_PAGE_SIZE);
} }
if (end_ptr < ptr + end_seg_len) { if (end_ptr < ptr + (end_seg_len >> 1)) {
return(NULL); return(NULL);
} }
if (page == NULL) { if (page == NULL) {
return(ptr + end_seg_len); return(ptr + (end_seg_len >> 1));
} }
ut_ad(!!page_is_comp(page) == index->table->comp); ut_ad((ibool) !!page_is_comp(page) == index->table->comp);
ut_ad(!page_zip || page_is_comp(page));
/* Read from the log the inserted index record end segment which /* Read from the log the inserted index record end segment which
differs from the cursor record */ differs from the cursor record */
@ -788,13 +806,15 @@ page_cur_parse_insert_rec(
offsets = rec_get_offsets(cursor_rec, index, offsets, offsets = rec_get_offsets(cursor_rec, index, offsets,
ULINT_UNDEFINED, &heap); ULINT_UNDEFINED, &heap);
if (extra_info_yes == 0) { if (!(end_seg_len & 0x1UL)) {
info_and_status_bits = rec_get_info_and_status_bits( info_and_status_bits = rec_get_info_and_status_bits(
cursor_rec, page_is_comp(page)); cursor_rec, page_is_comp(page));
origin_offset = rec_offs_extra_size(offsets); origin_offset = rec_offs_extra_size(offsets);
mismatch_index = rec_offs_size(offsets) - end_seg_len; mismatch_index = rec_offs_size(offsets) - (end_seg_len >> 1);
} }
end_seg_len >>= 1;
if (mismatch_index + end_seg_len < sizeof buf1) { if (mismatch_index + end_seg_len < sizeof buf1) {
buf = buf1; buf = buf1;
} else { } else {
@ -803,7 +823,7 @@ page_cur_parse_insert_rec(
/* Build the inserted record to buf */ /* Build the inserted record to buf */
if (mismatch_index >= UNIV_PAGE_SIZE) { if (UNIV_UNLIKELY(mismatch_index >= UNIV_PAGE_SIZE)) {
fprintf(stderr, fprintf(stderr,
"Is short %lu, info_and_status_bits %lu, offset %lu, " "Is short %lu, info_and_status_bits %lu, offset %lu, "
"o_offset %lu\n" "o_offset %lu\n"
@ -826,14 +846,24 @@ page_cur_parse_insert_rec(
ut_memcpy(buf, rec_get_start(cursor_rec, offsets), mismatch_index); ut_memcpy(buf, rec_get_start(cursor_rec, offsets), mismatch_index);
ut_memcpy(buf + mismatch_index, ptr, end_seg_len); ut_memcpy(buf + mismatch_index, ptr, end_seg_len);
rec_set_info_and_status_bits(buf + origin_offset, page_is_comp(page), if (page_is_comp(page)) {
rec_set_info_and_status_bits(buf + origin_offset, NULL,
info_and_status_bits); info_and_status_bits);
} else {
rec_set_info_bits_old(buf + origin_offset,
info_and_status_bits);
}
page_cur_position(cursor_rec, &cursor); page_cur_position(cursor_rec, &cursor);
offsets = rec_get_offsets(buf + origin_offset, index, offsets, offsets = rec_get_offsets(buf + origin_offset, index, offsets,
ULINT_UNDEFINED, &heap); ULINT_UNDEFINED, &heap);
page_cur_rec_insert(&cursor, buf + origin_offset, index, offsets, mtr); if (UNIV_UNLIKELY(!page_cur_rec_insert(&cursor, page_zip,
buf + origin_offset, index, offsets, mtr))) {
/* The redo log record should only have been written
after the write was successful. */
ut_error;
}
if (buf != buf1) { if (buf != buf1) {
@ -859,6 +889,8 @@ page_cur_insert_rec_low(
/* out: pointer to record if succeed, NULL /* out: pointer to record if succeed, NULL
otherwise */ otherwise */
page_cur_t* cursor, /* in: a page cursor */ page_cur_t* cursor, /* in: a page cursor */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
25 + rec_size bytes available, or NULL */
dtuple_t* tuple, /* in: pointer to a data tuple or NULL */ dtuple_t* tuple, /* in: pointer to a data tuple or NULL */
dict_index_t* index, /* in: record descriptor */ dict_index_t* index, /* in: record descriptor */
rec_t* rec, /* in: pointer to a physical record or NULL */ rec_t* rec, /* in: pointer to a physical record or NULL */
@ -873,14 +905,7 @@ page_cur_insert_rec_low(
ulint heap_no; /* heap number of the inserted record */ ulint heap_no; /* heap number of the inserted record */
rec_t* current_rec; /* current record after which the rec_t* current_rec; /* current record after which the
new record is inserted */ new record is inserted */
rec_t* next_rec; /* next record after current before
the insertion */
ulint owner_slot; /* the slot which owns the
inserted record */
rec_t* owner_rec;
ulint n_owned;
mem_heap_t* heap = NULL; mem_heap_t* heap = NULL;
ulint comp;
ut_ad(cursor && mtr); ut_ad(cursor && mtr);
ut_ad(tuple || rec); ut_ad(tuple || rec);
@ -888,10 +913,9 @@ page_cur_insert_rec_low(
ut_ad(rec || dtuple_check_typed(tuple)); ut_ad(rec || dtuple_check_typed(tuple));
page = page_cur_get_page(cursor); page = page_cur_get_page(cursor);
comp = page_is_comp(page); ut_ad(index->table->comp == (ibool) !!page_is_comp(page));
ut_ad(index->table->comp == !!comp);
ut_ad(cursor->rec != page_get_supremum_rec(page)); ut_ad(!page_rec_is_supremum(cursor->rec));
/* 1. Get the size of the physical record in the page */ /* 1. Get the size of the physical record in the page */
if (tuple != NULL) { if (tuple != NULL) {
@ -905,10 +929,20 @@ page_cur_insert_rec_low(
rec_size = rec_offs_size(offsets); rec_size = rec_offs_size(offsets);
} }
/* 2. Try to find suitable space from page memory management */ if (UNIV_LIKELY_NULL(page_zip)) {
insert_buf = page_mem_alloc(page, rec_size, index, &heap_no); if (UNIV_UNLIKELY(!page_zip_alloc(
page_zip, page, 25 + rec_size))) {
if (insert_buf == NULL) { goto err_exit;
}
}
/* 2. Try to find suitable space from page memory management */
insert_buf = page_mem_alloc(page, page_zip, rec_size,
index, &heap_no);
if (UNIV_UNLIKELY(insert_buf == NULL)) {
err_exit:
if (UNIV_LIKELY_NULL(heap)) { if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap); mem_heap_free(heap);
} }
@ -933,66 +967,95 @@ page_cur_insert_rec_low(
/* 4. Insert the record in the linked list of records */ /* 4. Insert the record in the linked list of records */
current_rec = cursor->rec; current_rec = cursor->rec;
ut_ad(!comp || rec_get_status(current_rec) <= REC_STATUS_INFIMUM); {
ut_ad(!comp || rec_get_status(insert_rec) < REC_STATUS_INFIMUM); /* next record after current before the insertion */
rec_t* next_rec = page_rec_get_next(current_rec);
#ifdef UNIV_DEBUG
if (page_is_comp(page)) {
ut_ad(rec_get_status(current_rec)
<= REC_STATUS_INFIMUM);
ut_ad(rec_get_status(insert_rec) < REC_STATUS_INFIMUM);
ut_ad(rec_get_status(next_rec) != REC_STATUS_INFIMUM);
}
#endif
page_rec_set_next(insert_rec, next_rec, NULL);
page_rec_set_next(current_rec, insert_rec, page_zip);
}
next_rec = page_rec_get_next(current_rec); page_header_set_field(page, page_zip, PAGE_N_RECS,
ut_ad(!comp || rec_get_status(next_rec) != REC_STATUS_INFIMUM); 1 + page_get_n_recs(page));
page_rec_set_next(insert_rec, next_rec);
page_rec_set_next(current_rec, insert_rec);
page_header_set_field(page, PAGE_N_RECS, 1 + page_get_n_recs(page));
/* 5. Set the n_owned field in the inserted record to zero, /* 5. Set the n_owned field in the inserted record to zero,
and set the heap_no field */ and set the heap_no field */
if (page_is_comp(page)) {
rec_set_n_owned(insert_rec, comp, 0); rec_set_n_owned_new(insert_rec, NULL, 0);
rec_set_heap_no(insert_rec, comp, heap_no); rec_set_heap_no_new(insert_rec, NULL, heap_no);
} else {
rec_set_n_owned_old(insert_rec, 0);
rec_set_heap_no_old(insert_rec, heap_no);
}
/* 6. Update the last insertion info in page header */ /* 6. Update the last insertion info in page header */
last_insert = page_header_get_ptr(page, PAGE_LAST_INSERT); last_insert = page_header_get_ptr(page, PAGE_LAST_INSERT);
ut_ad(!last_insert || !comp ut_ad(!last_insert || !page_is_comp(page)
|| rec_get_node_ptr_flag(last_insert) || rec_get_node_ptr_flag(last_insert)
== rec_get_node_ptr_flag(insert_rec)); == rec_get_node_ptr_flag(insert_rec));
if (last_insert == NULL) { if (UNIV_UNLIKELY(last_insert == NULL)) {
page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION); page_header_set_field(page, page_zip, PAGE_DIRECTION,
page_header_set_field(page, PAGE_N_DIRECTION, 0); PAGE_NO_DIRECTION);
page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0);
} else if ((last_insert == current_rec) } else if ((last_insert == current_rec)
&& (page_header_get_field(page, PAGE_DIRECTION) != PAGE_LEFT)) { && (page_header_get_field(page, PAGE_DIRECTION) != PAGE_LEFT)) {
page_header_set_field(page, PAGE_DIRECTION, PAGE_RIGHT); page_header_set_field(page, page_zip, PAGE_DIRECTION,
page_header_set_field(page, PAGE_N_DIRECTION, PAGE_RIGHT);
page_header_set_field(page, page_zip, PAGE_N_DIRECTION,
page_header_get_field(page, PAGE_N_DIRECTION) + 1); page_header_get_field(page, PAGE_N_DIRECTION) + 1);
} else if ((page_rec_get_next(insert_rec) == last_insert) } else if ((page_rec_get_next(insert_rec) == last_insert)
&& (page_header_get_field(page, PAGE_DIRECTION) != PAGE_RIGHT)) { && (page_header_get_field(page, PAGE_DIRECTION) != PAGE_RIGHT)) {
page_header_set_field(page, PAGE_DIRECTION, PAGE_LEFT); page_header_set_field(page, page_zip, PAGE_DIRECTION,
page_header_set_field(page, PAGE_N_DIRECTION, PAGE_LEFT);
page_header_set_field(page, page_zip, PAGE_N_DIRECTION,
page_header_get_field(page, PAGE_N_DIRECTION) + 1); page_header_get_field(page, PAGE_N_DIRECTION) + 1);
} else { } else {
page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION); page_header_set_field(page, page_zip, PAGE_DIRECTION,
page_header_set_field(page, PAGE_N_DIRECTION, 0); PAGE_NO_DIRECTION);
page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0);
} }
page_header_set_ptr(page, PAGE_LAST_INSERT, insert_rec); page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, insert_rec);
/* 7. It remains to update the owner record. */ /* 7. It remains to update the owner record. */
{
rec_t* owner_rec = page_rec_find_owner_rec(insert_rec);
ulint n_owned;
if (page_is_comp(page)) {
n_owned = rec_get_n_owned_new(owner_rec);
rec_set_n_owned_new(owner_rec, page_zip, n_owned + 1);
} else {
n_owned = rec_get_n_owned_old(owner_rec);
rec_set_n_owned_old(owner_rec, n_owned + 1);
}
owner_rec = page_rec_find_owner_rec(insert_rec); /* 8. Now we have incremented the n_owned field of the owner
n_owned = rec_get_n_owned(owner_rec, comp); record. If the number exceeds PAGE_DIR_SLOT_MAX_N_OWNED,
rec_set_n_owned(owner_rec, comp, n_owned + 1); we have to split the corresponding directory slot in two. */
/* 8. Now we have incremented the n_owned field of the owner if (UNIV_UNLIKELY(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED)) {
record. If the number exceeds PAGE_DIR_SLOT_MAX_N_OWNED, page_dir_split_slot(page, page_zip,
we have to split the corresponding directory slot in two. */ page_dir_find_owner_slot(owner_rec));
}
}
if (n_owned == PAGE_DIR_SLOT_MAX_N_OWNED) { if (UNIV_LIKELY_NULL(page_zip)) {
owner_slot = page_dir_find_owner_slot(owner_rec); page_zip_write(page_zip,
page_dir_split_slot(page, owner_slot); insert_rec - rec_offs_extra_size(offsets),
rec_size);
} }
/* 9. Write log record of the insert */ /* 9. Write log record of the insert */
@ -1041,7 +1104,8 @@ page_parse_copy_rec_list_to_created_page(
byte* ptr, /* in: buffer */ byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */ byte* end_ptr,/* in: buffer end */
dict_index_t* index, /* in: record descriptor */ dict_index_t* index, /* in: record descriptor */
page_t* page, /* in: page or NULL */ page_t* page, /* in/out: page or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page or NULL */
mtr_t* mtr) /* in: mtr or NULL */ mtr_t* mtr) /* in: mtr or NULL */
{ {
byte* rec_end; byte* rec_end;
@ -1069,14 +1133,15 @@ page_parse_copy_rec_list_to_created_page(
while (ptr < rec_end) { while (ptr < rec_end) {
ptr = page_cur_parse_insert_rec(TRUE, ptr, end_ptr, ptr = page_cur_parse_insert_rec(TRUE, ptr, end_ptr,
index, page, mtr); index, page, page_zip, mtr);
} }
ut_a(ptr == rec_end); ut_a(ptr == rec_end);
page_header_set_ptr(page, PAGE_LAST_INSERT, NULL); page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, NULL);
page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION); page_header_set_field(page, page_zip, PAGE_DIRECTION,
page_header_set_field(page, PAGE_N_DIRECTION, 0); PAGE_NO_DIRECTION);
page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0);
return(rec_end); return(rec_end);
} }
@ -1089,7 +1154,6 @@ void
page_copy_rec_list_end_to_created_page( page_copy_rec_list_end_to_created_page(
/*===================================*/ /*===================================*/
page_t* new_page, /* in: index page to copy to */ page_t* new_page, /* in: index page to copy to */
page_t* page, /* in: index page */
rec_t* rec, /* in: first record to copy */ rec_t* rec, /* in: first record to copy */
dict_index_t* index, /* in: record descriptor */ dict_index_t* index, /* in: record descriptor */
mtr_t* mtr) /* in: mtr */ mtr_t* mtr) /* in: mtr */
@ -1105,22 +1169,21 @@ page_copy_rec_list_end_to_created_page(
ulint log_mode; ulint log_mode;
byte* log_ptr; byte* log_ptr;
ulint log_data_len; ulint log_data_len;
ulint comp = page_is_comp(page);
mem_heap_t* heap = NULL; mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE]; ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_; ulint* offsets = offsets_;
*offsets_ = (sizeof offsets_) / sizeof *offsets_; *offsets_ = (sizeof offsets_) / sizeof *offsets_;
ut_ad(page_dir_get_n_heap(new_page) == 2); ut_ad(page_dir_get_n_heap(new_page) == 2);
ut_ad(page != new_page); ut_ad(ut_align_down(rec, UNIV_PAGE_SIZE) != new_page);
ut_ad(comp == page_is_comp(new_page)); ut_ad(page_rec_is_comp(rec) == page_is_comp(new_page));
if (rec == page_get_infimum_rec(page)) { if (page_rec_is_infimum(rec)) {
rec = page_rec_get_next(rec); rec = page_rec_get_next(rec);
} }
if (rec == page_get_supremum_rec(page)) { if (page_rec_is_supremum(rec)) {
return; return;
} }
@ -1128,8 +1191,8 @@ page_copy_rec_list_end_to_created_page(
#ifdef UNIV_DEBUG #ifdef UNIV_DEBUG
/* To pass the debug tests we have to set these dummy values /* To pass the debug tests we have to set these dummy values
in the debug version */ in the debug version */
page_dir_set_n_slots(new_page, UNIV_PAGE_SIZE / 2); page_dir_set_n_slots(new_page, NULL, UNIV_PAGE_SIZE / 2);
page_header_set_ptr(new_page, PAGE_HEAP_TOP, page_header_set_ptr(new_page, NULL, PAGE_HEAP_TOP,
new_page + UNIV_PAGE_SIZE - 1); new_page + UNIV_PAGE_SIZE - 1);
#endif #endif
@ -1143,7 +1206,7 @@ page_copy_rec_list_end_to_created_page(
log_mode = mtr_set_log_mode(mtr, MTR_LOG_SHORT_INSERTS); log_mode = mtr_set_log_mode(mtr, MTR_LOG_SHORT_INSERTS);
prev_rec = page_get_infimum_rec(new_page); prev_rec = page_get_infimum_rec(new_page);
if (comp) { if (page_is_comp(new_page)) {
heap_top = new_page + PAGE_NEW_SUPREMUM_END; heap_top = new_page + PAGE_NEW_SUPREMUM_END;
} else { } else {
heap_top = new_page + PAGE_OLD_SUPREMUM_END; heap_top = new_page + PAGE_OLD_SUPREMUM_END;
@ -1152,43 +1215,52 @@ page_copy_rec_list_end_to_created_page(
slot_index = 0; slot_index = 0;
n_recs = 0; n_recs = 0;
/* should be do ... until, comment by Jani */ do {
while (rec != page_get_supremum_rec(page)) {
offsets = rec_get_offsets(rec, index, offsets, offsets = rec_get_offsets(rec, index, offsets,
ULINT_UNDEFINED, &heap); ULINT_UNDEFINED, &heap);
insert_rec = rec_copy(heap_top, rec, offsets); insert_rec = rec_copy(heap_top, rec, offsets);
rec_set_next_offs(prev_rec, comp, insert_rec - new_page); if (page_is_comp(new_page)) {
rec_set_next_offs_new(prev_rec, NULL,
ut_align_offset(insert_rec, UNIV_PAGE_SIZE));
rec_set_n_owned(insert_rec, comp, 0); rec_set_n_owned_new(insert_rec, NULL, 0);
rec_set_heap_no(insert_rec, comp, 2 + n_recs); rec_set_heap_no_new(insert_rec, NULL, 2 + n_recs);
} else {
rec_set_next_offs_old(prev_rec,
ut_align_offset(insert_rec, UNIV_PAGE_SIZE));
rec_size = rec_offs_size(offsets); rec_set_n_owned_old(insert_rec, 0);
rec_set_heap_no_old(insert_rec, 2 + n_recs);
heap_top = heap_top + rec_size; }
ut_ad(heap_top < new_page + UNIV_PAGE_SIZE);
count++; count++;
n_recs++; n_recs++;
if (count == (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2) { if (UNIV_UNLIKELY(count ==
(PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2)) {
slot_index++; slot_index++;
slot = page_dir_get_nth_slot(new_page, slot_index); slot = page_dir_get_nth_slot(new_page, slot_index);
page_dir_slot_set_rec(slot, insert_rec); page_dir_slot_set_rec(slot, insert_rec);
page_dir_slot_set_n_owned(slot, count); page_dir_slot_set_n_owned(slot, NULL, count);
count = 0; count = 0;
} }
rec_size = rec_offs_size(offsets);
ut_ad(heap_top < new_page + UNIV_PAGE_SIZE);
heap_top += rec_size;
page_cur_insert_rec_write_log(insert_rec, rec_size, prev_rec, page_cur_insert_rec_write_log(insert_rec, rec_size, prev_rec,
index, mtr); index, mtr);
prev_rec = insert_rec; prev_rec = insert_rec;
rec = page_rec_get_next(rec); rec = page_rec_get_next(rec);
} } while (!page_rec_is_supremum(rec));
if ((slot_index > 0) && (count + 1 if ((slot_index > 0) && (count + 1
+ (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2 + (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2
@ -1202,7 +1274,7 @@ page_copy_rec_list_end_to_created_page(
count += (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2; count += (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2;
page_dir_slot_set_n_owned(slot, 0); page_dir_slot_set_n_owned(slot, NULL, 0);
slot_index--; slot_index--;
} }
@ -1217,22 +1289,26 @@ page_copy_rec_list_end_to_created_page(
mach_write_to_4(log_ptr, log_data_len); mach_write_to_4(log_ptr, log_data_len);
rec_set_next_offs(insert_rec, comp, if (page_is_comp(new_page)) {
comp ? PAGE_NEW_SUPREMUM : PAGE_OLD_SUPREMUM); rec_set_next_offs_new(insert_rec, NULL, PAGE_NEW_SUPREMUM);
} else {
rec_set_next_offs_old(insert_rec, PAGE_OLD_SUPREMUM);
}
slot = page_dir_get_nth_slot(new_page, 1 + slot_index); slot = page_dir_get_nth_slot(new_page, 1 + slot_index);
page_dir_slot_set_rec(slot, page_get_supremum_rec(new_page)); page_dir_slot_set_rec(slot, page_get_supremum_rec(new_page));
page_dir_slot_set_n_owned(slot, count + 1); page_dir_slot_set_n_owned(slot, NULL, count + 1);
page_dir_set_n_slots(new_page, 2 + slot_index); page_dir_set_n_slots(new_page, NULL, 2 + slot_index);
page_header_set_ptr(new_page, PAGE_HEAP_TOP, heap_top); page_header_set_ptr(new_page, NULL, PAGE_HEAP_TOP, heap_top);
page_dir_set_n_heap(new_page, 2 + n_recs); page_dir_set_n_heap(new_page, NULL, 2 + n_recs);
page_header_set_field(new_page, PAGE_N_RECS, n_recs); page_header_set_field(new_page, NULL, PAGE_N_RECS, n_recs);
page_header_set_ptr(new_page, PAGE_LAST_INSERT, NULL); page_header_set_ptr(new_page, NULL, PAGE_LAST_INSERT, NULL);
page_header_set_field(new_page, PAGE_DIRECTION, PAGE_NO_DIRECTION); page_header_set_field(new_page, NULL, PAGE_DIRECTION,
page_header_set_field(new_page, PAGE_N_DIRECTION, 0); PAGE_NO_DIRECTION);
page_header_set_field(new_page, NULL, PAGE_N_DIRECTION, 0);
/* Restore the log mode */ /* Restore the log mode */
@ -1251,7 +1327,7 @@ page_cur_delete_rec_write_log(
{ {
byte* log_ptr; byte* log_ptr;
ut_ad(!!page_rec_is_comp(rec) == index->table->comp); ut_ad((ibool) !!page_rec_is_comp(rec) == index->table->comp);
log_ptr = mlog_open_and_write_index(mtr, rec, index, log_ptr = mlog_open_and_write_index(mtr, rec, index,
page_rec_is_comp(rec) page_rec_is_comp(rec)
@ -1280,7 +1356,9 @@ page_cur_parse_delete_rec(
byte* ptr, /* in: buffer */ byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */ byte* end_ptr,/* in: buffer end */
dict_index_t* index, /* in: record descriptor */ dict_index_t* index, /* in: record descriptor */
page_t* page, /* in: page or NULL */ page_t* page, /* in/out: page or NULL */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
32 bytes available, or NULL */
mtr_t* mtr) /* in: mtr or NULL */ mtr_t* mtr) /* in: mtr or NULL */
{ {
ulint offset; ulint offset;
@ -1304,10 +1382,11 @@ page_cur_parse_delete_rec(
*offsets_ = (sizeof offsets_) / sizeof *offsets_; *offsets_ = (sizeof offsets_) / sizeof *offsets_;
page_cur_position(rec, &cursor); page_cur_position(rec, &cursor);
ut_ad(!page_zip || page_is_comp(page));
page_cur_delete_rec(&cursor, index, page_cur_delete_rec(&cursor, index,
rec_get_offsets(rec, index, offsets_, rec_get_offsets(rec, index, offsets_,
ULINT_UNDEFINED, &heap), mtr); ULINT_UNDEFINED, &heap), page_zip, mtr);
if (UNIV_LIKELY_NULL(heap)) { if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap); mem_heap_free(heap);
} }
@ -1323,9 +1402,11 @@ record after the deleted one. */
void void
page_cur_delete_rec( page_cur_delete_rec(
/*================*/ /*================*/
page_cur_t* cursor, /* in: a page cursor */ page_cur_t* cursor, /* in/out: a page cursor */
dict_index_t* index, /* in: record descriptor */ dict_index_t* index, /* in: record descriptor */
const ulint* offsets,/* in: rec_get_offsets(cursor->rec, index) */ const ulint* offsets,/* in: rec_get_offsets(cursor->rec, index) */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
32 bytes available, or NULL */
mtr_t* mtr) /* in: mini-transaction handle */ mtr_t* mtr) /* in: mini-transaction handle */
{ {
page_dir_slot_t* cur_dir_slot; page_dir_slot_t* cur_dir_slot;
@ -1343,11 +1424,11 @@ page_cur_delete_rec(
page = page_cur_get_page(cursor); page = page_cur_get_page(cursor);
current_rec = cursor->rec; current_rec = cursor->rec;
ut_ad(rec_offs_validate(current_rec, index, offsets)); ut_ad(rec_offs_validate(current_rec, index, offsets));
ut_ad(!!page_is_comp(page) == index->table->comp); ut_ad((ibool) !!page_is_comp(page) == index->table->comp);
ut_ad(!page_zip || page_zip_available(page_zip, 32));
/* The record must not be the supremum or infimum record. */ /* The record must not be the supremum or infimum record. */
ut_ad(current_rec != page_get_supremum_rec(page)); ut_ad(page_rec_is_user_rec(current_rec));
ut_ad(current_rec != page_get_infimum_rec(page));
/* Save to local variables some data associated with current_rec */ /* Save to local variables some data associated with current_rec */
cur_slot_no = page_dir_find_owner_slot(current_rec); cur_slot_no = page_dir_find_owner_slot(current_rec);
@ -1360,7 +1441,7 @@ page_cur_delete_rec(
/* 1. Reset the last insert info in the page header and increment /* 1. Reset the last insert info in the page header and increment
the modify clock for the frame */ the modify clock for the frame */
page_header_set_ptr(page, PAGE_LAST_INSERT, NULL); page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, NULL);
/* The page gets invalid for optimistic searches: increment the /* The page gets invalid for optimistic searches: increment the
frame modify clock */ frame modify clock */
@ -1388,8 +1469,8 @@ page_cur_delete_rec(
/* 3. Remove the record from the linked list of records */ /* 3. Remove the record from the linked list of records */
page_rec_set_next(prev_rec, next_rec); page_rec_set_next(prev_rec, next_rec, page_zip);
page_header_set_field(page, PAGE_N_RECS, page_header_set_field(page, page_zip, PAGE_N_RECS,
(ulint)(page_get_n_recs(page) - 1)); (ulint)(page_get_n_recs(page) - 1));
/* 4. If the deleted record is pointed to by a dir slot, update the /* 4. If the deleted record is pointed to by a dir slot, update the
@ -1406,16 +1487,16 @@ page_cur_delete_rec(
/* 5. Update the number of owned records of the slot */ /* 5. Update the number of owned records of the slot */
page_dir_slot_set_n_owned(cur_dir_slot, cur_n_owned - 1); page_dir_slot_set_n_owned(cur_dir_slot, page_zip, cur_n_owned - 1);
/* 6. Free the memory occupied by the record */ /* 6. Free the memory occupied by the record */
page_mem_free(page, current_rec, offsets); page_mem_free(page, page_zip, current_rec, offsets);
/* 7. Now we have decremented the number of owned records of the slot. /* 7. Now we have decremented the number of owned records of the slot.
If the number drops below PAGE_DIR_SLOT_MIN_N_OWNED, we balance the If the number drops below PAGE_DIR_SLOT_MIN_N_OWNED, we balance the
slots. */ slots. */
if (cur_n_owned <= PAGE_DIR_SLOT_MIN_N_OWNED) { if (UNIV_UNLIKELY(cur_n_owned <= PAGE_DIR_SLOT_MIN_N_OWNED)) {
page_dir_balance_slot(page, cur_slot_no); page_dir_balance_slot(page, page_zip, cur_slot_no);
} }
} }

File diff suppressed because it is too large Load diff

331
page/page0zip.c Normal file
View file

@ -0,0 +1,331 @@
/******************************************************
Compressed page interface
(c) 2005 Innobase Oy
Created June 2005 by Marko Makela
*******************************************************/
#define THIS_MODULE
#include "page0zip.h"
#ifdef UNIV_NONINL
# include "page0zip.ic"
#endif
#undef THIS_MODULE
#include "page0page.h"
#include "mtr0log.h"
#include "zlib.h"
/**************************************************************************
Compress a page. */
ibool
page_zip_compress(
/*==============*/
/* out: TRUE on success, FALSE on failure;
page_zip will be left intact on failure. */
page_zip_des_t* page_zip,/* out: compressed page */
const page_t* page) /* in: uncompressed page */
{
z_stream c_stream;
int err;
byte* buf;
ulint trailer_len;
ut_ad(page_zip_simple_validate(page_zip));
#ifdef UNIV_DEBUG
if (page_is_comp((page_t*) page)) {
ut_ad(page_simple_validate_new((page_t*) page));
} else {
ut_ad(page_simple_validate_old((page_t*) page));
}
#endif /* UNIV_DEBUG */
buf = mem_alloc(page_zip->size - PAGE_DATA);
/* Determine the length of the page trailer. */
trailer_len = page + UNIV_PAGE_SIZE
- page_dir_get_nth_slot((page_t*) page,
page_dir_get_n_slots((page_t*) page) - 1);
ut_ad(trailer_len < UNIV_PAGE_SIZE - PAGE_DATA);
/* Compress the data payload. */
c_stream.zalloc = (alloc_func) 0;
c_stream.zfree = (free_func) 0;
c_stream.opaque = (voidpf) 0;
err = deflateInit(&c_stream, Z_DEFAULT_COMPRESSION);
ut_a(err == Z_OK);
c_stream.next_out = buf;
c_stream.next_in = (void*) (page + PAGE_DATA);
c_stream.avail_out = page_zip->size - (PAGE_DATA - 1) - trailer_len;
c_stream.avail_in = page_header_get_field((page_t*) page,
PAGE_HEAP_TOP) - PAGE_DATA;
err = deflate(&c_stream, Z_FINISH);
if (err != Z_STREAM_END) {
deflateEnd(&c_stream);
mem_free(buf);
return(FALSE);
}
err = deflateEnd(&c_stream);
ut_a(err == Z_OK);
ut_ad(c_stream.avail_in == page_header_get_field((page_t*) page,
PAGE_HEAP_TOP) - PAGE_DATA);
ut_ad(c_stream.avail_out == page_zip->size - (PAGE_DATA - 1)
- trailer_len);
ut_a(c_stream.total_in == (uLong) c_stream.avail_in);
ut_a(c_stream.total_out <= (uLong) c_stream.avail_out);
page_zip->m_end = page_zip->m_start = PAGE_DATA + c_stream.total_out;
/* Copy the page header */
memcpy(page_zip->data, page, PAGE_DATA);
/* Copy the compressed data */
memcpy(page_zip->data + PAGE_DATA, buf, c_stream.total_out);
/* Zero out the area reserved for the modification log */
memset(page_zip->data + PAGE_DATA + c_stream.total_out, 0,
page_zip->size - PAGE_DATA - trailer_len - c_stream.total_out);
/* Copy the page trailer */
memcpy(page_zip->data + page_zip->size - trailer_len,
page + UNIV_PAGE_SIZE - trailer_len, trailer_len);
mem_free(buf);
ut_ad(page_zip_validate(page_zip, page));
return(TRUE);
}
/**************************************************************************
Read an integer from the modification log of the compressed page. */
static
ulint
page_zip_ulint_read(
/*================*/
/* out: length of the integer, in bytes;
zero on failure */
const byte* src, /* in: where to read */
ulint* dest) /* out: the decoded integer */
{
ulint num = (unsigned char) *src;
if (num < 128) {
*dest = num; /* 0xxxxxxx: 0..127 */
return(1);
}
if (num < 192) { /* 10xxxxxx xxxxxxxx: 0..16383 */
*dest = ((num << 8) & ~0x8000) | (unsigned char) src[1];
return(2);
}
*dest = ULINT_MAX;
return(0); /* 11xxxxxxx xxxxxxxx: reserved */
}
/**************************************************************************
Write an integer to the modification log of the compressed page. */
static
ulint
page_zip_ulint_write(
/*=================*/
/* out: length of the integer, in bytes;
zero on failure */
byte* dest, /* in: where to write */
ulint num) /* out: integer to write */
{
if (num < 128) {
*dest = num; /* 0xxxxxxx: 0..127 */
return(1);
}
if (num < 16384) { /* 10xxxxxx xxxxxxxx: 0..16383 */
dest[0] = num >> 8 | 0x80;
dest[1] = num;
return(2);
}
ut_error;
return(0); /* 11xxxxxxx xxxxxxxx: reserved */
}
/**************************************************************************
Decompress a page. */
ibool
page_zip_decompress(
/*================*/
/* out: TRUE on success, FALSE on failure */
page_zip_des_t* page_zip,/* in: data, size; out: m_start, m_end */
page_t* page, /* out: uncompressed page, may be trashed */
mtr_t* mtr) /* in: mini-transaction handle,
or NULL if no logging is needed */
{
z_stream d_stream;
int err;
ulint trailer_len;
ut_ad(page_zip_simple_validate(page_zip));
trailer_len = PAGE_DIR
+ PAGE_DIR_SLOT_SIZE
* page_dir_get_n_slots((page_t*) page_zip->data);
ut_ad(trailer_len < page_zip->size - PAGE_DATA);
ut_ad(page_header_get_field((page_t*) page_zip->data, PAGE_HEAP_TOP)
<= UNIV_PAGE_SIZE - trailer_len);
d_stream.zalloc = (alloc_func) 0;
d_stream.zfree = (free_func) 0;
d_stream.opaque = (voidpf) 0;
err = inflateInit(&d_stream);
ut_a(err == Z_OK);
d_stream.next_in = page_zip->data + PAGE_DATA;
d_stream.next_out = page + PAGE_DATA;
d_stream.avail_in = page_zip->size - trailer_len - (PAGE_DATA - 1);
d_stream.avail_out = page_header_get_field(page_zip->data, PAGE_HEAP_TOP)
- PAGE_DATA;
err = inflate(&d_stream, Z_FINISH);
if (err != Z_STREAM_END) {
inflateEnd(&d_stream);
return(FALSE);
}
err = inflateEnd(&d_stream);
ut_a(err == Z_OK);
ut_ad(d_stream.avail_in
== page_zip->size - trailer_len - (PAGE_DATA - 1));
ut_ad(d_stream.avail_out
== page_header_get_field(page_zip->data, PAGE_HEAP_TOP) - PAGE_DATA);
ut_a(d_stream.total_in <= (uLong) d_stream.avail_in);
ut_a(d_stream.total_out == d_stream.total_out);
page_zip->m_end = page_zip->m_start = PAGE_DATA + d_stream.total_in;
/* Copy the page header */
memcpy(page, page_zip->data, PAGE_DATA);
/* Copy the page trailer */
memcpy(page_zip->data + page_zip->size - trailer_len,
page + UNIV_PAGE_SIZE - trailer_len, trailer_len);
/* Apply the modification log. */
while (page_zip->data[page_zip->m_end]) {
ulint ulint_len;
ulint length, offset;
ulint_len = page_zip_ulint_read(page_zip->data + page_zip->m_end,
&length);
page_zip->m_end += ulint_len;
if (!ulint_len
|| page_zip->m_end + length >= page_zip->size - trailer_len) {
return(FALSE);
}
ut_a(length > 0 && length < UNIV_PAGE_SIZE - PAGE_DATA);
ulint_len = page_zip_ulint_read(page_zip->data + page_zip->m_end,
&offset);
page_zip->m_end += ulint_len;
if (!ulint_len
|| page_zip->m_end + length >= page_zip->size - trailer_len) {
return(FALSE);
}
offset += PAGE_DATA;
ut_a(offset + length < UNIV_PAGE_SIZE - trailer_len);
memcpy(page + offset, page_zip->data + page_zip->m_end, length);
page_zip->m_end += length;
}
ut_a(page_is_comp(page));
ut_ad(page_simple_validate_new(page));
if (UNIV_LIKELY_NULL(mtr)) {
byte* log_ptr = mlog_open(mtr, 11);
if (log_ptr) {
log_ptr = mlog_write_initial_log_record_fast(
page, MLOG_COMP_DECOMPRESS,
log_ptr, mtr);
mlog_close(mtr, log_ptr);
}
}
return(TRUE);
}
#ifdef UNIV_DEBUG
/**************************************************************************
Check that the compressed and decompressed pages match. */
ibool
page_zip_validate(
/*==============*/
const page_zip_des_t* page_zip, /* in: compressed page */
const page_t* page) /* in: uncompressed page */
{
page_zip_des_t temp_page_zip = *page_zip;
page_t temp_page[UNIV_PAGE_SIZE];
ut_ad(buf_block_get_page_zip(buf_block_align((byte*)page))
== page_zip);
return(page_zip_decompress(&temp_page_zip, temp_page, NULL)
&& !memcmp(page, temp_page, UNIV_PAGE_SIZE));
}
#endif /* UNIV_DEBUG */
/**************************************************************************
Write data to the compressed portion of a page. The data must already
have been written to the uncompressed page. */
void
page_zip_write(
/*===========*/
page_zip_des_t* page_zip,/* in/out: compressed page */
const byte* str, /* in: address on the uncompressed page */
ulint length) /* in: length of the data */
{
ulint pos = ut_align_offset(str, UNIV_PAGE_SIZE);
#ifdef UNIV_DEBUG
ulint trailer_len = PAGE_DIR
+ PAGE_DIR_SLOT_SIZE
* page_dir_get_n_slots((page_t*) page_zip->data);
#endif /* UNIV_DEBUG */
ut_ad(buf_block_get_page_zip(buf_block_align((byte*)str)) == page_zip);
ut_ad(page_zip_simple_validate(page_zip));
ut_ad(page_zip->m_start >= PAGE_DATA);
ut_ad(page_dir_get_n_slots(ut_align_down((byte*) str, UNIV_PAGE_SIZE))
== page_dir_get_n_slots((page_t*) page_zip->data));
ut_ad(!page_zip->data[page_zip->m_end]);
ut_ad(PAGE_DATA + trailer_len < page_zip->size);
ut_ad(pos >= PAGE_DATA);
ut_ad(pos + length <= UNIV_PAGE_SIZE - trailer_len);
pos -= PAGE_DATA;
ut_ad(page_zip_available(page_zip, page_zip_entry_size(pos, length)));
/* Append to the modification log. */
page_zip->m_end += page_zip_ulint_write(
page_zip->data + page_zip->m_end, length);
page_zip->m_end += page_zip_ulint_write(
page_zip->data + page_zip->m_end, pos);
memcpy(&page_zip->data[page_zip->m_end], str, length);
page_zip->m_end += length;
ut_ad(!page_zip->data[page_zip->m_end]);
ut_ad(page_zip->m_end < page_zip->size - trailer_len);
ut_ad(page_zip_validate(page_zip,
ut_align_down((byte*) str, UNIV_PAGE_SIZE)));
}
#ifdef UNIV_DEBUG
/**************************************************************************
Determine if enough space is available in the modification log. */
ibool
page_zip_available_noninline(
/*=========================*/
/* out: TRUE if enough space
is available */
const page_zip_des_t* page_zip,/* in: compressed page */
ulint size)
{
return(page_zip_available(page_zip, size));
}
#endif /* UNIV_DEBUG */

View file

@ -537,14 +537,13 @@ rec_set_nth_field_null_bit(
} }
/*************************************************************** /***************************************************************
Sets the value of the ith field extern storage bit of an old-style record. */ Sets the ith field extern storage bit of an old-style record. */
void void
rec_set_nth_field_extern_bit_old( rec_set_nth_field_extern_bit_old(
/*=============================*/ /*=============================*/
rec_t* rec, /* in: old-style record */ rec_t* rec, /* in: old-style record */
ulint i, /* in: ith field */ ulint i, /* in: ith field */
ibool val, /* in: value to set */
mtr_t* mtr) /* in: mtr holding an X-latch to the page where mtr_t* mtr) /* in: mtr holding an X-latch to the page where
rec is, or NULL; in the NULL case we do not rec is, or NULL; in the NULL case we do not
write to log about the change */ write to log about the change */
@ -556,11 +555,7 @@ rec_set_nth_field_extern_bit_old(
info = rec_2_get_field_end_info(rec, i); info = rec_2_get_field_end_info(rec, i);
if (val) { info |= REC_2BYTE_EXTERN_MASK;
info = info | REC_2BYTE_EXTERN_MASK;
} else {
info = info & ~REC_2BYTE_EXTERN_MASK;
}
if (mtr) { if (mtr) {
mlog_write_ulint(rec - REC_N_OLD_EXTRA_BYTES - 2 * (i + 1), mlog_write_ulint(rec - REC_N_OLD_EXTRA_BYTES - 2 * (i + 1),
@ -571,7 +566,7 @@ rec_set_nth_field_extern_bit_old(
} }
/*************************************************************** /***************************************************************
Sets the value of the ith field extern storage bit of a new-style record. */ Sets the ith field extern storage bit of a new-style record. */
void void
rec_set_nth_field_extern_bit_new( rec_set_nth_field_extern_bit_new(
@ -579,7 +574,6 @@ rec_set_nth_field_extern_bit_new(
rec_t* rec, /* in: record */ rec_t* rec, /* in: record */
dict_index_t* index, /* in: record descriptor */ dict_index_t* index, /* in: record descriptor */
ulint ith, /* in: ith field */ ulint ith, /* in: ith field */
ibool val, /* in: value to set */
mtr_t* mtr) /* in: mtr holding an X-latch to the page mtr_t* mtr) /* in: mtr holding an X-latch to the page
where rec is, or NULL; in the NULL case where rec is, or NULL; in the NULL case
we do not write to log about the change */ we do not write to log about the change */
@ -632,11 +626,11 @@ rec_set_nth_field_extern_bit_new(
ulint len = lens[1]; ulint len = lens[1];
if (len & 0x80) { /* 1exxxxxx: 2-byte length */ if (len & 0x80) { /* 1exxxxxx: 2-byte length */
if (i == ith) { if (i == ith) {
if (!val == !(len & 0x40)) { if (len & 0x40) {
return; /* no change */ return; /* no change */
} }
/* toggle the extern bit */ /* toggle the extern bit */
len ^= 0x40; len |= 0x40;
if (mtr) { if (mtr) {
mlog_write_ulint(lens + 1, len, mlog_write_ulint(lens + 1, len,
MLOG_1BYTE, mtr); MLOG_1BYTE, mtr);
@ -677,12 +671,11 @@ rec_set_field_extern_bits(
if (UNIV_LIKELY(index->table->comp)) { if (UNIV_LIKELY(index->table->comp)) {
for (i = 0; i < n_fields; i++) { for (i = 0; i < n_fields; i++) {
rec_set_nth_field_extern_bit_new(rec, index, vec[i], rec_set_nth_field_extern_bit_new(rec, index, vec[i],
TRUE, mtr); mtr);
} }
} else { } else {
for (i = 0; i < n_fields; i++) { for (i = 0; i < n_fields; i++) {
rec_set_nth_field_extern_bit_old(rec, vec[i], rec_set_nth_field_extern_bit_old(rec, vec[i], mtr);
TRUE, mtr);
} }
} }
} }
@ -745,7 +738,7 @@ rec_convert_dtuple_to_rec_old(
rec_set_n_fields_old(rec, n_fields); rec_set_n_fields_old(rec, n_fields);
/* Set the info bits of the record */ /* Set the info bits of the record */
rec_set_info_bits(rec, FALSE, rec_set_info_bits_old(rec,
dtuple_get_info_bits(dtuple) & REC_INFO_BITS_MASK); dtuple_get_info_bits(dtuple) & REC_INFO_BITS_MASK);
/* Store the data and the offsets */ /* Store the data and the offsets */
@ -835,8 +828,6 @@ rec_convert_dtuple_to_rec_new(
ulint fixed_len; ulint fixed_len;
ulint null_mask = 1; ulint null_mask = 1;
const ulint n_fields = dtuple_get_n_fields(dtuple); const ulint n_fields = dtuple_get_n_fields(dtuple);
const ulint status = dtuple_get_info_bits(dtuple)
& REC_NEW_STATUS_MASK;
ut_ad(index->table->comp); ut_ad(index->table->comp);
ut_ad(n_fields > 0); ut_ad(n_fields > 0);
@ -847,7 +838,8 @@ rec_convert_dtuple_to_rec_new(
UNIV_PREFETCH_RW(rec - REC_N_NEW_EXTRA_BYTES - n_fields); UNIV_PREFETCH_RW(rec - REC_N_NEW_EXTRA_BYTES - n_fields);
UNIV_PREFETCH_RW(rec); UNIV_PREFETCH_RW(rec);
switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) { switch (UNIV_EXPECT(dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK,
REC_STATUS_ORDINARY)) {
case REC_STATUS_ORDINARY: case REC_STATUS_ORDINARY:
ut_ad(n_fields <= dict_index_get_n_fields(index)); ut_ad(n_fields <= dict_index_get_n_fields(index));
n_node_ptr_field = ULINT_UNDEFINED; n_node_ptr_field = ULINT_UNDEFINED;
@ -862,7 +854,7 @@ rec_convert_dtuple_to_rec_new(
n_node_ptr_field = ULINT_UNDEFINED; n_node_ptr_field = ULINT_UNDEFINED;
goto init; goto init;
default: default:
ut_a(0); ut_error;
return(0); return(0);
} }
@ -912,10 +904,8 @@ init:
memset (lens + 1, 0, nulls - lens); memset (lens + 1, 0, nulls - lens);
/* Set the info bits of the record */ /* Set the info bits of the record */
rec_set_status(rec, status); rec_set_info_and_status_bits(rec, NULL,
dtuple_get_info_bits(dtuple));
rec_set_info_bits(rec, TRUE,
dtuple_get_info_bits(dtuple) & REC_INFO_BITS_MASK);
/* Store the data and the offsets */ /* Store the data and the offsets */
@ -928,6 +918,7 @@ init:
ut_ad(dtype_get_prtype(type) & DATA_NOT_NULL); ut_ad(dtype_get_prtype(type) & DATA_NOT_NULL);
ut_ad(len == 4); ut_ad(len == 4);
memcpy(end, dfield_get_data(field), len); memcpy(end, dfield_get_data(field), len);
end += 4;
break; break;
} }
fixed_len = dict_index_get_nth_field(index, i)->fixed_len; fixed_len = dict_index_get_nth_field(index, i)->fixed_len;

View file

@ -2409,7 +2409,7 @@ row_ins_step(
goto same_trx; goto same_trx;
} }
trx_write_trx_id(node->trx_id_buf, trx->id); trx_write_trx_id(node->trx_id_buf, NULL, trx->id);
err = lock_table(0, node->table, LOCK_IX, thr); err = lock_table(0, node->table, LOCK_IX, thr);

View file

@ -67,9 +67,10 @@ is slower than the specialized inline functions. */
void void
row_set_rec_sys_field( row_set_rec_sys_field(
/*==================*/ /*==================*/
/* out: value of the field */
ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */ ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
rec_t* rec, /* in: record */ rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page with at least
10 or 11 bytes available, or NULL */
dict_index_t* index, /* in: clustered index */ dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
dulint val) /* in: value to set */ dulint val) /* in: value to set */
@ -87,11 +88,11 @@ row_set_rec_sys_field(
if (type == DATA_TRX_ID) { if (type == DATA_TRX_ID) {
trx_write_trx_id(field, val); trx_write_trx_id(field, page_zip/* 10 bytes */, val);
} else { } else {
ut_ad(type == DATA_ROLL_PTR); ut_ad(type == DATA_ROLL_PTR);
trx_write_roll_ptr(field, val); trx_write_roll_ptr(field, page_zip/* 11 bytes */, val);
} }
} }

View file

@ -2059,7 +2059,7 @@ row_sel_convert_mysql_key_to_innobase(
dfield = dtuple_get_nth_field(tuple, 0); dfield = dtuple_get_nth_field(tuple, 0);
field = dict_index_get_nth_field(index, 0); field = dict_index_get_nth_field(index, 0);
if (dfield_get_type(dfield)->mtype == DATA_SYS) { if (UNIV_UNLIKELY(dfield_get_type(dfield)->mtype == DATA_SYS)) {
/* A special case: we are looking for a position in the /* A special case: we are looking for a position in the
generated clustered index which InnoDB automatically added generated clustered index which InnoDB automatically added
to a table with no primary key: the first and the only to a table with no primary key: the first and the only
@ -2077,8 +2077,9 @@ row_sel_convert_mysql_key_to_innobase(
while (key_ptr < key_end) { while (key_ptr < key_end) {
ut_a(dict_col_get_type(field->col)->mtype type = dfield_get_type(dfield)->mtype;
== dfield_get_type(dfield)->mtype);
ut_a(dict_col_get_type(field->col)->mtype == type);
data_offset = 0; data_offset = 0;
is_null = FALSE; is_null = FALSE;
@ -2096,8 +2097,6 @@ row_sel_convert_mysql_key_to_innobase(
} }
} }
type = dfield_get_type(dfield)->mtype;
/* Calculate data length and data field total length */ /* Calculate data length and data field total length */
if (type == DATA_BLOB) { if (type == DATA_BLOB) {
@ -2143,9 +2142,9 @@ row_sel_convert_mysql_key_to_innobase(
data_field_len = data_offset + data_len; data_field_len = data_offset + data_len;
} }
if (dtype_get_mysql_type(dfield_get_type(dfield)) if (UNIV_UNLIKELY(dtype_get_mysql_type(dfield_get_type(dfield))
== DATA_MYSQL_TRUE_VARCHAR == DATA_MYSQL_TRUE_VARCHAR)
&& dfield_get_type(dfield)->mtype != DATA_INT) { && UNIV_LIKELY(type != DATA_INT)) {
/* In a MySQL key value format, a true VARCHAR is /* In a MySQL key value format, a true VARCHAR is
always preceded by 2 bytes of a length field. always preceded by 2 bytes of a length field.
dfield_get_type(dfield)->len returns the maximum dfield_get_type(dfield)->len returns the maximum
@ -2161,7 +2160,7 @@ row_sel_convert_mysql_key_to_innobase(
/* Storing may use at most data_len bytes of buf */ /* Storing may use at most data_len bytes of buf */
if (!is_null) { if (UNIV_LIKELY(!is_null)) {
row_mysql_store_col_in_innobase_format( row_mysql_store_col_in_innobase_format(
dfield, dfield,
buf, buf,
@ -2174,7 +2173,7 @@ row_sel_convert_mysql_key_to_innobase(
key_ptr += data_field_len; key_ptr += data_field_len;
if (key_ptr > key_end) { if (UNIV_UNLIKELY(key_ptr > key_end)) {
/* The last field in key was not a complete key field /* The last field in key was not a complete key field
but a prefix of it. but a prefix of it.

View file

@ -301,7 +301,8 @@ recovery. */
void void
row_upd_rec_sys_fields_in_recovery( row_upd_rec_sys_fields_in_recovery(
/*===============================*/ /*===============================*/
rec_t* rec, /* in: record */ rec_t* rec, /* in/out: record */
page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
const ulint* offsets,/* in: array returned by rec_get_offsets() */ const ulint* offsets,/* in: array returned by rec_get_offsets() */
ulint pos, /* in: TRX_ID position in rec */ ulint pos, /* in: TRX_ID position in rec */
dulint trx_id, /* in: transaction id */ dulint trx_id, /* in: transaction id */
@ -312,11 +313,11 @@ row_upd_rec_sys_fields_in_recovery(
field = rec_get_nth_field(rec, offsets, pos, &len); field = rec_get_nth_field(rec, offsets, pos, &len);
ut_ad(len == DATA_TRX_ID_LEN); ut_ad(len == DATA_TRX_ID_LEN);
trx_write_trx_id(field, trx_id); trx_write_trx_id(field, page_zip, trx_id);
field = rec_get_nth_field(rec, offsets, pos + 1, &len); field = rec_get_nth_field(rec, offsets, pos + 1, &len);
ut_ad(len == DATA_ROLL_PTR_LEN); ut_ad(len == DATA_ROLL_PTR_LEN);
trx_write_roll_ptr(field, roll_ptr); trx_write_roll_ptr(field, page_zip, roll_ptr);
} }
/************************************************************************* /*************************************************************************
@ -345,10 +346,10 @@ row_upd_index_entry_sys_field(
field = dfield_get_data(dfield); field = dfield_get_data(dfield);
if (type == DATA_TRX_ID) { if (type == DATA_TRX_ID) {
trx_write_trx_id(field, val); trx_write_trx_id(field, NULL, val);
} else { } else {
ut_ad(type == DATA_ROLL_PTR); ut_ad(type == DATA_ROLL_PTR);
trx_write_roll_ptr(field, val); trx_write_roll_ptr(field, NULL, val);
} }
} }
@ -445,7 +446,11 @@ row_upd_rec_in_place(
ut_ad(rec_offs_validate(rec, NULL, offsets)); ut_ad(rec_offs_validate(rec, NULL, offsets));
rec_set_info_bits(rec, rec_offs_comp(offsets), update->info_bits); if (rec_offs_comp(offsets)) {
rec_set_info_bits_new(rec, NULL, update->info_bits);
} else {
rec_set_info_bits_old(rec, update->info_bits);
}
n_fields = upd_get_n_fields(update); n_fields = upd_get_n_fields(update);
@ -480,7 +485,7 @@ row_upd_write_sys_vals_to_log(
log_ptr += mach_write_compressed(log_ptr, log_ptr += mach_write_compressed(log_ptr,
dict_index_get_sys_col_pos(index, DATA_TRX_ID)); dict_index_get_sys_col_pos(index, DATA_TRX_ID));
trx_write_roll_ptr(log_ptr, roll_ptr); trx_write_roll_ptr(log_ptr, NULL, roll_ptr);
log_ptr += DATA_ROLL_PTR_LEN; log_ptr += DATA_ROLL_PTR_LEN;
log_ptr += mach_dulint_write_compressed(log_ptr, trx->id); log_ptr += mach_dulint_write_compressed(log_ptr, trx->id);
@ -2040,5 +2045,11 @@ row_upd_in_place_in_select(
err = btr_cur_update_in_place(BTR_NO_LOCKING_FLAG, btr_cur, err = btr_cur_update_in_place(BTR_NO_LOCKING_FLAG, btr_cur,
node->update, node->cmpl_info, node->update, node->cmpl_info,
thr, mtr); thr, mtr);
/* TODO: the above can fail if page_zip != NULL.
However, this function row_upd_in_place_in_select() is only invoked
when executing UPDATE statements of the built-in InnoDB SQL parser.
The built-in SQL is only used for InnoDB system tables, which
always are in the old, uncompressed format (ROW_FORMAT=REDUNDANT,
comp == FALSE, page_zip == NULL). */
ut_ad(err == DB_SUCCESS); ut_ad(err == DB_SUCCESS);
} }

View file

@ -807,7 +807,7 @@ trx_undo_update_rec_get_update(
upd_field = upd_get_nth_field(update, n_fields); upd_field = upd_get_nth_field(update, n_fields);
buf = mem_heap_alloc(heap, DATA_TRX_ID_LEN); buf = mem_heap_alloc(heap, DATA_TRX_ID_LEN);
trx_write_trx_id(buf, trx_id); trx_write_trx_id(buf, NULL, trx_id);
upd_field_set_field_no(upd_field, upd_field_set_field_no(upd_field,
dict_index_get_sys_col_pos(index, DATA_TRX_ID), dict_index_get_sys_col_pos(index, DATA_TRX_ID),
@ -816,7 +816,7 @@ trx_undo_update_rec_get_update(
upd_field = upd_get_nth_field(update, n_fields + 1); upd_field = upd_get_nth_field(update, n_fields + 1);
buf = mem_heap_alloc(heap, DATA_ROLL_PTR_LEN); buf = mem_heap_alloc(heap, DATA_ROLL_PTR_LEN);
trx_write_roll_ptr(buf, roll_ptr); trx_write_roll_ptr(buf, NULL, roll_ptr);
upd_field_set_field_no(upd_field, upd_field_set_field_no(upd_field,
dict_index_get_sys_col_pos(index, DATA_ROLL_PTR), dict_index_get_sys_col_pos(index, DATA_ROLL_PTR),