mirror of
https://github.com/MariaDB/server.git
synced 2025-01-27 17:33:44 +01:00
MDEV-19506 Remove the global sequence DICT_HDR_ROW_ID for DB_ROW_ID
InnoDB tables that lack a primary key (and any UNIQUE INDEX whose all columns are NOT NULL) will use an internally generated index, called GEN_CLUST_INDEX(DB_ROW_ID) in the InnoDB data dictionary, and hidden from the SQL layer. The 48-bit (6-byte) DB_ROW_ID is being assigned from a global sequence that is persisted in the DICT_HDR page. There is absolutely no reason for the DB_ROW_ID to be globally unique across all InnoDB tables. A downgrade to earlier versions will be prevented by the file format change related to removing the InnoDB change buffer (MDEV-29694). DICT_HDR_ROW_ID, dict_sys_t::row_id: Remove. dict_table_t::row_id: The per-table sequence of DB_ROW_ID. commit_try_rebuild(): Copy dict_table_t::row_id from the old table. btr_cur_instant_init(), row_import_cleanup(): If needed, perform the equivalent of SELECT MAX(DB_ROW_ID) to initialize dict_table_t::row_id. row_ins(): If needed, obtain DB_ROW_ID from dict_table_t::row_id. Should it exceed the maximum 48-bit value, return DB_OUT_OF_FILE_SPACE to prevent further inserts into the table. dict_load_table_one(): Move a condition to btr_cur_instant_init_low() so that dict_table_t::row_id will be restored also for ROW_FORMAT=COMPRESSED tables. Tested by: Matthias Leich
This commit is contained in:
parent
f27e9c8947
commit
944beb9e7a
11 changed files with 62 additions and 168 deletions
|
@ -348,10 +348,14 @@ when loading a table definition.
|
|||
static dberr_t btr_cur_instant_init_low(dict_index_t* index, mtr_t* mtr)
|
||||
{
|
||||
ut_ad(index->is_primary());
|
||||
ut_ad(index->n_core_null_bytes == dict_index_t::NO_CORE_NULL_BYTES);
|
||||
ut_ad(index->table->supports_instant());
|
||||
ut_ad(index->table->is_readable());
|
||||
|
||||
if (!index->table->supports_instant()) {
|
||||
return DB_SUCCESS;
|
||||
}
|
||||
|
||||
ut_ad(index->n_core_null_bytes == dict_index_t::NO_CORE_NULL_BYTES);
|
||||
|
||||
dberr_t err;
|
||||
const fil_space_t* space = index->table->space;
|
||||
if (!space) {
|
||||
|
@ -618,17 +622,25 @@ when loading a table definition.
|
|||
@param[in,out] table table definition from the data dictionary
|
||||
@return error code
|
||||
@retval DB_SUCCESS if no error occurred */
|
||||
dberr_t
|
||||
btr_cur_instant_init(dict_table_t* table)
|
||||
dberr_t btr_cur_instant_init(dict_table_t *table)
|
||||
{
|
||||
mtr_t mtr;
|
||||
dict_index_t* index = dict_table_get_first_index(table);
|
||||
mtr.start();
|
||||
dberr_t err = index
|
||||
? btr_cur_instant_init_low(index, &mtr)
|
||||
: DB_CORRUPTION;
|
||||
mtr.commit();
|
||||
return(err);
|
||||
mtr_t mtr;
|
||||
dict_index_t *index= dict_table_get_first_index(table);
|
||||
mtr.start();
|
||||
dberr_t err = index ? btr_cur_instant_init_low(index, &mtr) : DB_CORRUPTION;
|
||||
mtr.commit();
|
||||
if (err == DB_SUCCESS && index->is_gen_clust())
|
||||
{
|
||||
btr_cur_t cur;
|
||||
mtr.start();
|
||||
err= cur.open_leaf(false, index, BTR_SEARCH_LEAF, &mtr);
|
||||
if (err != DB_SUCCESS);
|
||||
else if (const rec_t *rec= page_rec_get_prev(btr_cur_get_rec(&cur)))
|
||||
if (page_rec_is_user_rec(rec))
|
||||
table->row_id= mach_read_from_6(rec);
|
||||
mtr.commit();
|
||||
}
|
||||
return(err);
|
||||
}
|
||||
|
||||
/** Initialize the n_core_null_bytes on first access to a clustered
|
||||
|
|
|
@ -93,18 +93,6 @@ dict_hdr_get_new_id(
|
|||
mtr.commit();
|
||||
}
|
||||
|
||||
/** Update dict_sys.row_id in the dictionary header file page. */
|
||||
void dict_hdr_flush_row_id(row_id_t id)
|
||||
{
|
||||
mtr_t mtr;
|
||||
mtr.start();
|
||||
buf_block_t* d= dict_hdr_get(&mtr);
|
||||
byte *row_id= DICT_HDR + DICT_HDR_ROW_ID + d->page.frame;
|
||||
if (mach_read_from_8(row_id) < id)
|
||||
mtr.write<8>(*d, row_id, id);
|
||||
mtr.commit();
|
||||
}
|
||||
|
||||
/** Create the DICT_HDR page on database initialization.
|
||||
@return error code */
|
||||
dberr_t dict_create()
|
||||
|
@ -126,10 +114,8 @@ dberr_t dict_create()
|
|||
}
|
||||
ut_a(d->page.id() == hdr_page_id);
|
||||
|
||||
/* Start counting row, table, index, and tree ids from
|
||||
/* Start counting table, index, and tree ids from
|
||||
DICT_HDR_FIRST_ID */
|
||||
mtr.write<8>(*d, DICT_HDR + DICT_HDR_ROW_ID + d->page.frame,
|
||||
DICT_HDR_FIRST_ID);
|
||||
mtr.write<8>(*d, DICT_HDR + DICT_HDR_TABLE_ID + d->page.frame,
|
||||
DICT_HDR_FIRST_ID);
|
||||
mtr.write<8>(*d, DICT_HDR + DICT_HDR_INDEX_ID + d->page.frame,
|
||||
|
@ -245,17 +231,6 @@ dberr_t dict_boot()
|
|||
|
||||
const byte* dict_hdr = &d->page.frame[DICT_HDR];
|
||||
|
||||
/* Because we only write new row ids to disk-based data structure
|
||||
(dictionary header) when it is divisible by
|
||||
DICT_HDR_ROW_ID_WRITE_MARGIN, in recovery we will not recover
|
||||
the latest value of the row id counter. Therefore we advance
|
||||
the counter at the database startup to avoid overlapping values.
|
||||
Note that when a user after database startup first time asks for
|
||||
a new row id, then because the counter is now divisible by
|
||||
..._MARGIN, it will immediately be updated to the disk-based
|
||||
header. */
|
||||
|
||||
dict_sys.recover_row_id(mach_read_from_8(dict_hdr + DICT_HDR_ROW_ID));
|
||||
if (uint32_t max_space_id
|
||||
= mach_read_from_4(dict_hdr + DICT_HDR_MAX_SPACE_ID)) {
|
||||
max_space_id--;
|
||||
|
|
|
@ -1180,6 +1180,7 @@ inline void dict_sys_t::add(dict_table_t* table)
|
|||
ulint fold = my_crc32c(0, table->name.m_name,
|
||||
strlen(table->name.m_name));
|
||||
|
||||
table->row_id = 0;
|
||||
table->autoinc_mutex.init();
|
||||
table->lock_mutex_init();
|
||||
|
||||
|
|
|
@ -2471,9 +2471,7 @@ corrupted:
|
|||
goto corrupted;
|
||||
}
|
||||
|
||||
if (table->supports_instant()) {
|
||||
err = btr_cur_instant_init(table);
|
||||
}
|
||||
err = btr_cur_instant_init(table);
|
||||
}
|
||||
} else {
|
||||
ut_ad(ignore_err & DICT_ERR_IGNORE_INDEX);
|
||||
|
|
|
@ -10219,6 +10219,7 @@ commit_try_rebuild(
|
|||
|
||||
/* We must be still holding a table handle. */
|
||||
DBUG_ASSERT(user_table->get_ref_count() == 1);
|
||||
rebuilt_table->row_id = uint64_t{user_table->row_id};
|
||||
DBUG_EXECUTE_IF("ib_rebuild_cannot_rename", error = DB_ERROR;);
|
||||
|
||||
switch (error) {
|
||||
|
|
|
@ -44,39 +44,6 @@ dict_hdr_get_new_id(
|
|||
(not assigned if NULL) */
|
||||
uint32_t* space_id); /*!< out: space id
|
||||
(not assigned if NULL) */
|
||||
/** Update dict_sys.row_id in the dictionary header file page. */
|
||||
void dict_hdr_flush_row_id(row_id_t id);
|
||||
/** @return A new value for GEN_CLUST_INDEX(DB_ROW_ID) */
|
||||
inline row_id_t dict_sys_t::get_new_row_id()
|
||||
{
|
||||
row_id_t id= row_id.fetch_add(1);
|
||||
if (!(id % ROW_ID_WRITE_MARGIN))
|
||||
dict_hdr_flush_row_id(id);
|
||||
return id;
|
||||
}
|
||||
|
||||
/** Ensure that row_id is not smaller than id, on IMPORT TABLESPACE */
|
||||
inline void dict_sys_t::update_row_id(row_id_t id)
|
||||
{
|
||||
row_id_t sys_id= row_id;
|
||||
while (id >= sys_id)
|
||||
{
|
||||
if (!row_id.compare_exchange_strong(sys_id, id))
|
||||
continue;
|
||||
if (!(id % ROW_ID_WRITE_MARGIN))
|
||||
dict_hdr_flush_row_id(id);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**********************************************************************//**
|
||||
Writes a row id to a record or other 6-byte stored form. */
|
||||
inline void dict_sys_write_row_id(byte *field, row_id_t row_id)
|
||||
{
|
||||
static_assert(DATA_ROW_ID_LEN == 6, "compatibility");
|
||||
mach_write_to_6(field, row_id);
|
||||
}
|
||||
|
||||
/*****************************************************************//**
|
||||
Initializes the data dictionary memory structures when the database is
|
||||
started. This function is also called when the data dictionary is created.
|
||||
|
@ -116,7 +83,7 @@ inline bool dict_is_sys_table(table_id_t id) { return id < DICT_HDR_FIRST_ID; }
|
|||
|
||||
/*-------------------------------------------------------------*/
|
||||
/* Dictionary header offsets */
|
||||
#define DICT_HDR_ROW_ID 0 /* The latest assigned row id */
|
||||
//#define DICT_HDR_ROW_ID 0 /* Was: latest assigned DB_ROW_ID */
|
||||
#define DICT_HDR_TABLE_ID 8 /* The latest assigned table id */
|
||||
#define DICT_HDR_INDEX_ID 16 /* The latest assigned index id */
|
||||
#define DICT_HDR_MAX_SPACE_ID 24 /* The latest assigned space id,or 0*/
|
||||
|
|
|
@ -648,7 +648,7 @@ dict_table_get_all_fts_indexes(
|
|||
/********************************************************************//**
|
||||
Gets the number of user-defined non-virtual columns in a table in the
|
||||
dictionary cache.
|
||||
@return number of user-defined (e.g., not ROW_ID) non-virtual
|
||||
@return number of user-defined (e.g., not DB_ROW_ID) non-virtual
|
||||
columns of a table */
|
||||
UNIV_INLINE
|
||||
unsigned
|
||||
|
@ -1370,27 +1370,10 @@ private:
|
|||
std::atomic<table_id_t> temp_table_id{DICT_HDR_FIRST_ID};
|
||||
/** hash table of temporary table IDs */
|
||||
hash_table_t temp_id_hash;
|
||||
/** the next value of DB_ROW_ID, backed by DICT_HDR_ROW_ID
|
||||
(FIXME: remove this, and move to dict_table_t) */
|
||||
Atomic_relaxed<row_id_t> row_id;
|
||||
/** The synchronization interval of row_id */
|
||||
static constexpr size_t ROW_ID_WRITE_MARGIN= 256;
|
||||
public:
|
||||
/** Diagnostic message for exceeding the lock_wait() timeout */
|
||||
static const char fatal_msg[];
|
||||
|
||||
/** @return A new value for GEN_CLUST_INDEX(DB_ROW_ID) */
|
||||
inline row_id_t get_new_row_id();
|
||||
|
||||
/** Ensure that row_id is not smaller than id, on IMPORT TABLESPACE */
|
||||
inline void update_row_id(row_id_t id);
|
||||
|
||||
/** Recover the global DB_ROW_ID sequence on database startup */
|
||||
void recover_row_id(row_id_t id)
|
||||
{
|
||||
row_id= ut_uint64_align_up(id, ROW_ID_WRITE_MARGIN) + ROW_ID_WRITE_MARGIN;
|
||||
}
|
||||
|
||||
/** @return a new temporary table ID */
|
||||
table_id_t acquire_temporary_table_id()
|
||||
{
|
||||
|
|
|
@ -244,7 +244,7 @@ dict_table_get_next_index(
|
|||
/********************************************************************//**
|
||||
Gets the number of user-defined non-virtual columns in a table in the
|
||||
dictionary cache.
|
||||
@return number of user-defined (e.g., not ROW_ID) non-virtual
|
||||
@return number of user-defined (e.g., not DB_ROW_ID) non-virtual
|
||||
columns of a table */
|
||||
UNIV_INLINE
|
||||
unsigned
|
||||
|
|
|
@ -2347,6 +2347,8 @@ private:
|
|||
Atomic_relaxed<pthread_t> lock_mutex_owner{0};
|
||||
#endif
|
||||
public:
|
||||
/** The next DB_ROW_ID value */
|
||||
Atomic_counter<uint64_t> row_id{0};
|
||||
/** Autoinc counter value to give to the next inserted row. */
|
||||
uint64_t autoinc;
|
||||
|
||||
|
|
|
@ -2109,8 +2109,9 @@ row_import_cleanup(
|
|||
row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from handler */
|
||||
dberr_t err) /*!< in: error code */
|
||||
{
|
||||
dict_table_t* table = prebuilt->table;
|
||||
|
||||
if (err != DB_SUCCESS) {
|
||||
dict_table_t* table = prebuilt->table;
|
||||
table->file_unreadable = true;
|
||||
if (table->space) {
|
||||
fil_close_tablespace(table->space_id);
|
||||
|
@ -2141,7 +2142,25 @@ row_import_cleanup(
|
|||
|
||||
DBUG_EXECUTE_IF("ib_import_before_checkpoint_crash", DBUG_SUICIDE(););
|
||||
|
||||
return(err);
|
||||
if (err != DB_SUCCESS
|
||||
|| !dict_table_get_first_index(table)->is_gen_clust()) {
|
||||
return err;
|
||||
}
|
||||
|
||||
btr_cur_t cur;
|
||||
mtr_t mtr;
|
||||
mtr.start();
|
||||
err = cur.open_leaf(false, dict_table_get_first_index(table),
|
||||
BTR_SEARCH_LEAF, &mtr);
|
||||
if (err != DB_SUCCESS) {
|
||||
} else if (const rec_t *rec =
|
||||
page_rec_get_prev(btr_cur_get_rec(&cur))) {
|
||||
if (page_rec_is_user_rec(rec))
|
||||
table->row_id= mach_read_from_6(rec);
|
||||
}
|
||||
mtr.commit();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/*****************************************************************//**
|
||||
|
@ -2276,55 +2295,6 @@ row_import_adjust_root_pages_of_secondary_indexes(
|
|||
return(err);
|
||||
}
|
||||
|
||||
/*****************************************************************//**
|
||||
Ensure that dict_sys.row_id exceeds SELECT MAX(DB_ROW_ID). */
|
||||
MY_ATTRIBUTE((nonnull)) static
|
||||
void
|
||||
row_import_set_sys_max_row_id(
|
||||
/*==========================*/
|
||||
row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from
|
||||
handler */
|
||||
const dict_table_t* table) /*!< in: table to import */
|
||||
{
|
||||
const rec_t* rec;
|
||||
mtr_t mtr;
|
||||
btr_pcur_t pcur;
|
||||
row_id_t row_id = 0;
|
||||
dict_index_t* index;
|
||||
|
||||
index = dict_table_get_first_index(table);
|
||||
ut_ad(index->is_primary());
|
||||
ut_ad(dict_index_is_auto_gen_clust(index));
|
||||
|
||||
mtr_start(&mtr);
|
||||
|
||||
mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
|
||||
|
||||
if (pcur.open_leaf(false, index, BTR_SEARCH_LEAF, &mtr)
|
||||
== DB_SUCCESS) {
|
||||
rec = btr_pcur_move_to_prev_on_page(&pcur);
|
||||
|
||||
if (!rec) {
|
||||
/* The table is corrupted. */
|
||||
} else if (page_rec_is_infimum(rec)) {
|
||||
/* The table is empty. */
|
||||
} else if (rec_is_metadata(rec, *index)) {
|
||||
/* The clustered index contains the metadata
|
||||
record only, that is, the table is empty. */
|
||||
} else {
|
||||
row_id = mach_read_from_6(rec);
|
||||
}
|
||||
}
|
||||
|
||||
mtr_commit(&mtr);
|
||||
|
||||
if (row_id) {
|
||||
/* Update the system row id if the imported index row id is
|
||||
greater than the max system row id. */
|
||||
dict_sys.update_row_id(row_id);
|
||||
}
|
||||
}
|
||||
|
||||
/*****************************************************************//**
|
||||
Read the a string from the meta data file.
|
||||
@return DB_SUCCESS or error code. */
|
||||
|
@ -4510,13 +4480,6 @@ row_import_for_mysql(
|
|||
return row_import_error(prebuilt, err);
|
||||
}
|
||||
|
||||
/* Ensure that the next available DB_ROW_ID is not smaller than
|
||||
any DB_ROW_ID stored in the table. */
|
||||
|
||||
if (prebuilt->clust_index_was_generated) {
|
||||
row_import_set_sys_max_row_id(prebuilt, table);
|
||||
}
|
||||
|
||||
ib::info() << "Phase III - Flush changes to disk";
|
||||
|
||||
/* Ensure that all pages dirtied during the IMPORT make it to disk.
|
||||
|
|
|
@ -3525,19 +3525,6 @@ row_ins_index_entry_step(
|
|||
DBUG_RETURN(err);
|
||||
}
|
||||
|
||||
/***********************************************************//**
|
||||
Allocates a row id for row and inits the node->index field. */
|
||||
UNIV_INLINE
|
||||
void
|
||||
row_ins_alloc_row_id_step(
|
||||
/*======================*/
|
||||
ins_node_t* node) /*!< in: row insert node */
|
||||
{
|
||||
ut_ad(node->state == INS_NODE_ALLOC_ROW_ID);
|
||||
if (dict_table_get_first_index(node->table)->is_gen_clust())
|
||||
dict_sys_write_row_id(node->sys_buf, dict_sys.get_new_row_id());
|
||||
}
|
||||
|
||||
/***********************************************************//**
|
||||
Gets a row to insert from the values list. */
|
||||
UNIV_INLINE
|
||||
|
@ -3618,13 +3605,18 @@ row_ins(
|
|||
DBUG_PRINT("row_ins", ("table: %s", node->table->name.m_name));
|
||||
|
||||
if (node->state == INS_NODE_ALLOC_ROW_ID) {
|
||||
|
||||
row_ins_alloc_row_id_step(node);
|
||||
|
||||
node->index = dict_table_get_first_index(node->table);
|
||||
ut_ad(node->entry_list.empty() == false);
|
||||
node->entry = node->entry_list.begin();
|
||||
|
||||
if (node->index->is_gen_clust()) {
|
||||
const uint64_t db_row_id{++node->table->row_id};
|
||||
if (db_row_id >> 48) {
|
||||
DBUG_RETURN(DB_OUT_OF_FILE_SPACE);
|
||||
}
|
||||
mach_write_to_6(node->sys_buf, db_row_id);
|
||||
}
|
||||
|
||||
if (node->ins_type == INS_SEARCHED) {
|
||||
|
||||
row_ins_get_row_from_select(node);
|
||||
|
|
Loading…
Add table
Reference in a new issue