mirror of
https://github.com/MariaDB/server.git
synced 2025-01-25 00:04:33 +01:00
564 lines
14 KiB
C
564 lines
14 KiB
C
/************************************************************************
|
|
Record manager
|
|
|
|
(c) 1994-2001 Innobase Oy
|
|
|
|
Created 5/30/1994 Heikki Tuuri
|
|
*************************************************************************/
|
|
|
|
#include "rem0rec.h"
|
|
|
|
#ifdef UNIV_NONINL
|
|
#include "rem0rec.ic"
|
|
#endif
|
|
|
|
#include "mtr0mtr.h"
|
|
#include "mtr0log.h"
|
|
|
|
/* PHYSICAL RECORD
|
|
===============
|
|
|
|
The physical record, which is the data type of all the records
|
|
found in index pages of the database, has the following format
|
|
(lower addresses and more significant bits inside a byte are below
|
|
represented on a higher text line):
|
|
|
|
| offset of the end of the last field of data, the most significant
|
|
bit is set to 1 if and only if the field is SQL-null,
|
|
if the offset is 2-byte, then the second most significant
|
|
bit is set to 1 if the field is stored on another page:
|
|
mostly this will occur in the case of big BLOB fields |
|
|
...
|
|
| offset of the end of the first field of data + the SQL-null bit |
|
|
| 4 bits used to delete mark a record, and mark a predefined
|
|
minimum record in alphabetical order |
|
|
| 4 bits giving the number of records owned by this record
|
|
(this term is explained in page0page.h) |
|
|
| 13 bits giving the order number of this record in the
|
|
heap of the index page |
|
|
| 10 bits giving the number of fields in this record |
|
|
| 1 bit which is set to 1 if the offsets above are given in
|
|
one byte format, 0 if in two byte format |
|
|
| two bytes giving the pointer to the next record in the page |
|
|
ORIGIN of the record
|
|
| first field of data |
|
|
...
|
|
| last field of data |
|
|
|
|
The origin of the record is the start address of the first field
|
|
of data. The offsets are given relative to the origin.
|
|
The offsets of the data fields are stored in an inverted
|
|
order because then the offset of the first fields are near the
|
|
origin, giving maybe a better processor cache hit rate in searches.
|
|
|
|
The offsets of the data fields are given as one-byte
|
|
(if there are less than 127 bytes of data in the record)
|
|
or two-byte unsigned integers. The most significant bit
|
|
is not part of the offset, instead it indicates the SQL-null
|
|
if the bit is set to 1.
|
|
|
|
CANONICAL COORDINATES. A record can be seen as a single
|
|
string of 'characters' in the following way: catenate the bytes
|
|
in each field, in the order of fields. An SQL-null field
|
|
is taken to be an empty sequence of bytes. Then after
|
|
the position of each field insert in the string
|
|
the 'character' <FIELD-END>, except that after an SQL-null field
|
|
insert <NULL-FIELD-END>. Now the ordinal position of each
|
|
byte in this canonical string is its canonical coordinate.
|
|
So, for the record ("AA", SQL-NULL, "BB", ""), the canonical
|
|
string is "AA<FIELD_END><NULL-FIELD-END>BB<FIELD-END><FIELD-END>".
|
|
We identify prefixes (= initial segments) of a record
|
|
with prefixes of the canonical string. The canonical
|
|
length of the prefix is the length of the corresponding
|
|
prefix of the canonical string. The canonical length of
|
|
a record is the length of its canonical string.
|
|
|
|
For example, the maximal common prefix of records
|
|
("AA", SQL-NULL, "BB", "C") and ("AA", SQL-NULL, "B", "C")
|
|
is "AA<FIELD-END><NULL-FIELD-END>B", and its canonical
|
|
length is 5.
|
|
|
|
A complete-field prefix of a record is a prefix which ends at the
|
|
end of some field (containing also <FIELD-END>).
|
|
A record is a complete-field prefix of another record, if
|
|
the corresponding canonical strings have the same property. */
|
|
|
|
ulint rec_dummy; /* this is used to fool compiler in
|
|
rec_validate */
|
|
|
|
/****************************************************************
|
|
The following function is used to get a pointer to the nth data field in a
|
|
record. */
|
|
|
|
byte*
|
|
rec_get_nth_field(
|
|
/*==============*/
|
|
/* out: pointer to the field */
|
|
rec_t* rec, /* in: record */
|
|
ulint n, /* in: index of the field */
|
|
ulint* len) /* out: length of the field; UNIV_SQL_NULL if SQL
|
|
null */
|
|
{
|
|
ulint os;
|
|
ulint next_os;
|
|
|
|
ut_ad(rec && len);
|
|
ut_ad(n < rec_get_n_fields(rec));
|
|
|
|
if (n > 1024) {
|
|
fprintf(stderr, "Error: trying to access field %lu in rec\n",
|
|
(ulong) n);
|
|
ut_error;
|
|
}
|
|
|
|
if (rec == NULL) {
|
|
fputs("Error: rec is NULL pointer\n", stderr);
|
|
ut_error;
|
|
}
|
|
|
|
if (rec_get_1byte_offs_flag(rec)) {
|
|
os = rec_1_get_field_start_offs(rec, n);
|
|
|
|
next_os = rec_1_get_field_end_info(rec, n);
|
|
|
|
if (next_os & REC_1BYTE_SQL_NULL_MASK) {
|
|
*len = UNIV_SQL_NULL;
|
|
|
|
return(rec + os);
|
|
}
|
|
|
|
next_os = next_os & ~REC_1BYTE_SQL_NULL_MASK;
|
|
} else {
|
|
os = rec_2_get_field_start_offs(rec, n);
|
|
|
|
next_os = rec_2_get_field_end_info(rec, n);
|
|
|
|
if (next_os & REC_2BYTE_SQL_NULL_MASK) {
|
|
*len = UNIV_SQL_NULL;
|
|
|
|
return(rec + os);
|
|
}
|
|
|
|
next_os = next_os & ~(REC_2BYTE_SQL_NULL_MASK
|
|
| REC_2BYTE_EXTERN_MASK);
|
|
}
|
|
|
|
*len = next_os - os;
|
|
|
|
ut_ad(*len < UNIV_PAGE_SIZE);
|
|
|
|
return(rec + os);
|
|
}
|
|
|
|
/***************************************************************
|
|
Sets the value of the ith field SQL null bit. */
|
|
|
|
void
|
|
rec_set_nth_field_null_bit(
|
|
/*=======================*/
|
|
rec_t* rec, /* in: record */
|
|
ulint i, /* in: ith field */
|
|
ibool val) /* in: value to set */
|
|
{
|
|
ulint info;
|
|
|
|
if (rec_get_1byte_offs_flag(rec)) {
|
|
|
|
info = rec_1_get_field_end_info(rec, i);
|
|
|
|
if (val) {
|
|
info = info | REC_1BYTE_SQL_NULL_MASK;
|
|
} else {
|
|
info = info & ~REC_1BYTE_SQL_NULL_MASK;
|
|
}
|
|
|
|
rec_1_set_field_end_info(rec, i, info);
|
|
|
|
return;
|
|
}
|
|
|
|
info = rec_2_get_field_end_info(rec, i);
|
|
|
|
if (val) {
|
|
info = info | REC_2BYTE_SQL_NULL_MASK;
|
|
} else {
|
|
info = info & ~REC_2BYTE_SQL_NULL_MASK;
|
|
}
|
|
|
|
rec_2_set_field_end_info(rec, i, info);
|
|
}
|
|
|
|
/***************************************************************
|
|
Sets the value of the ith field extern storage bit. */
|
|
|
|
void
|
|
rec_set_nth_field_extern_bit(
|
|
/*=========================*/
|
|
rec_t* rec, /* in: record */
|
|
ulint i, /* in: ith field */
|
|
ibool val, /* in: value to set */
|
|
mtr_t* mtr) /* in: mtr holding an X-latch to the page where
|
|
rec is, or NULL; in the NULL case we do not
|
|
write to log about the change */
|
|
{
|
|
ulint info;
|
|
|
|
ut_a(!rec_get_1byte_offs_flag(rec));
|
|
ut_a(i < rec_get_n_fields(rec));
|
|
|
|
info = rec_2_get_field_end_info(rec, i);
|
|
|
|
if (val) {
|
|
info = info | REC_2BYTE_EXTERN_MASK;
|
|
} else {
|
|
info = info & ~REC_2BYTE_EXTERN_MASK;
|
|
}
|
|
|
|
if (mtr) {
|
|
mlog_write_ulint(rec - REC_N_EXTRA_BYTES - 2 * (i + 1), info,
|
|
MLOG_2BYTES, mtr);
|
|
} else {
|
|
rec_2_set_field_end_info(rec, i, info);
|
|
}
|
|
}
|
|
|
|
/***************************************************************
|
|
Sets TRUE the extern storage bits of fields mentioned in an array. */
|
|
|
|
void
|
|
rec_set_field_extern_bits(
|
|
/*======================*/
|
|
rec_t* rec, /* in: record */
|
|
ulint* vec, /* in: array of field numbers */
|
|
ulint n_fields, /* in: number of fields numbers */
|
|
mtr_t* mtr) /* in: mtr holding an X-latch to the page
|
|
where rec is, or NULL; in the NULL case we
|
|
do not write to log about the change */
|
|
{
|
|
ulint i;
|
|
|
|
for (i = 0; i < n_fields; i++) {
|
|
rec_set_nth_field_extern_bit(rec, vec[i], TRUE, mtr);
|
|
}
|
|
}
|
|
|
|
/***************************************************************
|
|
Sets a record field to SQL null. The physical size of the field is not
|
|
changed. */
|
|
|
|
void
|
|
rec_set_nth_field_sql_null(
|
|
/*=======================*/
|
|
rec_t* rec, /* in: record */
|
|
ulint n) /* in: index of the field */
|
|
{
|
|
ulint offset;
|
|
|
|
offset = rec_get_field_start_offs(rec, n);
|
|
|
|
data_write_sql_null(rec + offset, rec_get_nth_field_size(rec, n));
|
|
|
|
rec_set_nth_field_null_bit(rec, n, TRUE);
|
|
}
|
|
|
|
/*************************************************************
|
|
Builds a physical record out of a data tuple and stores it beginning from
|
|
address destination. */
|
|
|
|
rec_t*
|
|
rec_convert_dtuple_to_rec_low(
|
|
/*==========================*/
|
|
/* out: pointer to the origin of physical
|
|
record */
|
|
byte* destination, /* in: start address of the physical record */
|
|
dtuple_t* dtuple, /* in: data tuple */
|
|
ulint data_size) /* in: data size of dtuple */
|
|
{
|
|
dfield_t* field;
|
|
ulint n_fields;
|
|
rec_t* rec;
|
|
ulint end_offset;
|
|
ulint ored_offset;
|
|
byte* data;
|
|
ulint len;
|
|
ulint i;
|
|
|
|
ut_ad(destination && dtuple);
|
|
ut_ad(dtuple_validate(dtuple));
|
|
ut_ad(dtuple_check_typed(dtuple));
|
|
ut_ad(dtuple_get_data_size(dtuple) == data_size);
|
|
|
|
n_fields = dtuple_get_n_fields(dtuple);
|
|
|
|
ut_ad(n_fields > 0);
|
|
|
|
/* Calculate the offset of the origin in the physical record */
|
|
|
|
rec = destination + rec_get_converted_extra_size(data_size, n_fields);
|
|
|
|
/* Store the number of fields */
|
|
rec_set_n_fields(rec, n_fields);
|
|
|
|
/* Set the info bits of the record */
|
|
rec_set_info_bits(rec, dtuple_get_info_bits(dtuple));
|
|
|
|
/* Store the data and the offsets */
|
|
|
|
end_offset = 0;
|
|
|
|
if (data_size <= REC_1BYTE_OFFS_LIMIT) {
|
|
|
|
rec_set_1byte_offs_flag(rec, TRUE);
|
|
|
|
for (i = 0; i < n_fields; i++) {
|
|
|
|
field = dtuple_get_nth_field(dtuple, i);
|
|
|
|
data = dfield_get_data(field);
|
|
len = dfield_get_len(field);
|
|
|
|
if (len == UNIV_SQL_NULL) {
|
|
len = dtype_get_sql_null_size(dfield_get_type(field));
|
|
data_write_sql_null(rec + end_offset, len);
|
|
|
|
end_offset += len;
|
|
ored_offset = end_offset | REC_1BYTE_SQL_NULL_MASK;
|
|
} else {
|
|
/* If the data is not SQL null, store it */
|
|
ut_memcpy(rec + end_offset, data, len);
|
|
|
|
end_offset += len;
|
|
ored_offset = end_offset;
|
|
}
|
|
|
|
rec_1_set_field_end_info(rec, i, ored_offset);
|
|
}
|
|
} else {
|
|
rec_set_1byte_offs_flag(rec, FALSE);
|
|
|
|
for (i = 0; i < n_fields; i++) {
|
|
|
|
field = dtuple_get_nth_field(dtuple, i);
|
|
|
|
data = dfield_get_data(field);
|
|
len = dfield_get_len(field);
|
|
|
|
if (len == UNIV_SQL_NULL) {
|
|
len = dtype_get_sql_null_size(dfield_get_type(field));
|
|
data_write_sql_null(rec + end_offset, len);
|
|
|
|
end_offset += len;
|
|
ored_offset = end_offset | REC_2BYTE_SQL_NULL_MASK;
|
|
} else {
|
|
/* If the data is not SQL null, store it */
|
|
ut_memcpy(rec + end_offset, data, len);
|
|
|
|
end_offset += len;
|
|
ored_offset = end_offset;
|
|
}
|
|
|
|
rec_2_set_field_end_info(rec, i, ored_offset);
|
|
}
|
|
}
|
|
|
|
ut_ad(rec_validate(rec));
|
|
|
|
return(rec);
|
|
}
|
|
|
|
/******************************************************************
|
|
Copies the first n fields of a physical record to a data tuple. The fields
|
|
are copied to the memory heap. */
|
|
|
|
void
|
|
rec_copy_prefix_to_dtuple(
|
|
/*======================*/
|
|
dtuple_t* tuple, /* in: data tuple */
|
|
rec_t* rec, /* in: physical record */
|
|
ulint n_fields, /* in: number of fields to copy */
|
|
mem_heap_t* heap) /* in: memory heap */
|
|
{
|
|
dfield_t* field;
|
|
byte* data;
|
|
ulint len;
|
|
byte* buf = NULL;
|
|
ulint i;
|
|
|
|
ut_ad(rec_validate(rec));
|
|
ut_ad(dtuple_check_typed(tuple));
|
|
|
|
dtuple_set_info_bits(tuple, rec_get_info_bits(rec));
|
|
|
|
for (i = 0; i < n_fields; i++) {
|
|
|
|
field = dtuple_get_nth_field(tuple, i);
|
|
data = rec_get_nth_field(rec, i, &len);
|
|
|
|
if (len != UNIV_SQL_NULL) {
|
|
buf = mem_heap_alloc(heap, len);
|
|
|
|
ut_memcpy(buf, data, len);
|
|
}
|
|
|
|
dfield_set_data(field, buf, len);
|
|
}
|
|
}
|
|
|
|
/******************************************************************
|
|
Copies the first n fields of a physical record to a new physical record in
|
|
a buffer. */
|
|
|
|
rec_t*
|
|
rec_copy_prefix_to_buf(
|
|
/*===================*/
|
|
/* out, own: copied record */
|
|
rec_t* rec, /* in: physical record */
|
|
ulint n_fields, /* in: number of fields to copy */
|
|
byte** buf, /* in/out: memory buffer for the copied prefix,
|
|
or NULL */
|
|
ulint* buf_size) /* in/out: buffer size */
|
|
{
|
|
rec_t* copy_rec;
|
|
ulint area_start;
|
|
ulint area_end;
|
|
ulint prefix_len;
|
|
|
|
ut_ad(rec_validate(rec));
|
|
|
|
area_end = rec_get_field_start_offs(rec, n_fields);
|
|
|
|
if (rec_get_1byte_offs_flag(rec)) {
|
|
area_start = REC_N_EXTRA_BYTES + n_fields;
|
|
} else {
|
|
area_start = REC_N_EXTRA_BYTES + 2 * n_fields;
|
|
}
|
|
|
|
prefix_len = area_start + area_end;
|
|
|
|
if ((*buf == NULL) || (*buf_size < prefix_len)) {
|
|
if (*buf != NULL) {
|
|
mem_free(*buf);
|
|
}
|
|
|
|
*buf = mem_alloc(prefix_len);
|
|
*buf_size = prefix_len;
|
|
}
|
|
|
|
ut_memcpy(*buf, rec - area_start, prefix_len);
|
|
|
|
copy_rec = *buf + area_start;
|
|
|
|
rec_set_n_fields(copy_rec, n_fields);
|
|
|
|
return(copy_rec);
|
|
}
|
|
|
|
/*******************************************************************
|
|
Validates the consistency of a physical record. */
|
|
|
|
ibool
|
|
rec_validate(
|
|
/*=========*/
|
|
/* out: TRUE if ok */
|
|
rec_t* rec) /* in: physical record */
|
|
{
|
|
byte* data;
|
|
ulint len;
|
|
ulint n_fields;
|
|
ulint len_sum = 0;
|
|
ulint sum = 0;
|
|
ulint i;
|
|
|
|
ut_a(rec);
|
|
n_fields = rec_get_n_fields(rec);
|
|
|
|
if ((n_fields == 0) || (n_fields > REC_MAX_N_FIELDS)) {
|
|
fprintf(stderr, "InnoDB: Error: record has %lu fields\n",
|
|
(ulong) n_fields);
|
|
return(FALSE);
|
|
}
|
|
|
|
for (i = 0; i < n_fields; i++) {
|
|
data = rec_get_nth_field(rec, i, &len);
|
|
|
|
if (!((len < UNIV_PAGE_SIZE) || (len == UNIV_SQL_NULL))) {
|
|
fprintf(stderr,
|
|
"InnoDB: Error: record field %lu len %lu\n", (ulong) i,
|
|
(ulong) len);
|
|
return(FALSE);
|
|
}
|
|
|
|
if (len != UNIV_SQL_NULL) {
|
|
len_sum += len;
|
|
sum += *(data + len -1); /* dereference the
|
|
end of the field to
|
|
cause a memory trap
|
|
if possible */
|
|
} else {
|
|
len_sum += rec_get_nth_field_size(rec, i);
|
|
}
|
|
}
|
|
|
|
if (len_sum != (ulint)(rec_get_end(rec) - rec)) {
|
|
fprintf(stderr,
|
|
"InnoDB: Error: record len should be %lu, len %lu\n",
|
|
(ulong) len_sum,
|
|
(ulong) (rec_get_end(rec) - rec));
|
|
return(FALSE);
|
|
}
|
|
|
|
rec_dummy = sum; /* This is here only to fool the compiler */
|
|
|
|
return(TRUE);
|
|
}
|
|
|
|
/*******************************************************************
|
|
Prints a physical record. */
|
|
|
|
void
|
|
rec_print(
|
|
/*======*/
|
|
FILE* file, /* in: file where to print */
|
|
rec_t* rec) /* in: physical record */
|
|
{
|
|
byte* data;
|
|
ulint len;
|
|
ulint n;
|
|
ulint i;
|
|
|
|
ut_ad(rec);
|
|
|
|
n = rec_get_n_fields(rec);
|
|
|
|
fprintf(file, "PHYSICAL RECORD: n_fields %lu;"
|
|
" 1-byte offs %s; info bits %lu\n",
|
|
(ulong) n, rec_get_1byte_offs_flag(rec) ? "TRUE" : "FALSE",
|
|
(ulong) rec_get_info_bits(rec));
|
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
data = rec_get_nth_field(rec, i, &len);
|
|
|
|
fprintf(file, " %lu:", (ulong) i);
|
|
|
|
if (len != UNIV_SQL_NULL) {
|
|
if (len <= 30) {
|
|
|
|
ut_print_buf(file, data, len);
|
|
} else {
|
|
ut_print_buf(file, data, 30);
|
|
|
|
fputs("...(truncated)", file);
|
|
}
|
|
} else {
|
|
fprintf(file, " SQL NULL, size %lu ",
|
|
(ulong) rec_get_nth_field_size(rec, i));
|
|
|
|
}
|
|
putc(';', file);
|
|
}
|
|
|
|
putc('\n', file);
|
|
|
|
rec_validate(rec);
|
|
}
|