MDEV-19506 Remove the global sequence DICT_HDR_ROW_ID for DB_ROW_ID

InnoDB tables that lack a primary key (and any UNIQUE INDEX whose
all columns are NOT NULL) will use an internally generated index,
called GEN_CLUST_INDEX(DB_ROW_ID) in the InnoDB data dictionary,
and hidden from the SQL layer.

The 48-bit (6-byte) DB_ROW_ID is being assigned from a
global sequence that is persisted in the DICT_HDR page.

There is absolutely no reason for the DB_ROW_ID to be globally
unique across all InnoDB tables.

A downgrade to earlier versions will be prevented by the file format
change related to removing the InnoDB change buffer (MDEV-29694).

DICT_HDR_ROW_ID, dict_sys_t::row_id: Remove.

dict_table_t::row_id: The per-table sequence of DB_ROW_ID.

commit_try_rebuild(): Copy dict_table_t::row_id from the old table.

btr_cur_instant_init(), row_import_cleanup(): If needed, perform
the equivalent of SELECT MAX(DB_ROW_ID) to initialize
dict_table_t::row_id.

row_ins(): If needed, obtain DB_ROW_ID from dict_table_t::row_id.
Should it exceed the maximum 48-bit value, return DB_OUT_OF_FILE_SPACE
to prevent further inserts into the table.

dict_load_table_one(): Move a condition to btr_cur_instant_init_low()
so that dict_table_t::row_id will be restored also for
ROW_FORMAT=COMPRESSED tables.

Tested by: Matthias Leich
This commit is contained in:
Marko Mäkelä 2022-12-14 14:44:28 +02:00
parent f27e9c8947
commit 944beb9e7a
11 changed files with 62 additions and 168 deletions

View file

@ -348,10 +348,14 @@ when loading a table definition.
static dberr_t btr_cur_instant_init_low(dict_index_t* index, mtr_t* mtr)
{
ut_ad(index->is_primary());
ut_ad(index->n_core_null_bytes == dict_index_t::NO_CORE_NULL_BYTES);
ut_ad(index->table->supports_instant());
ut_ad(index->table->is_readable());
if (!index->table->supports_instant()) {
return DB_SUCCESS;
}
ut_ad(index->n_core_null_bytes == dict_index_t::NO_CORE_NULL_BYTES);
dberr_t err;
const fil_space_t* space = index->table->space;
if (!space) {
@ -618,17 +622,25 @@ when loading a table definition.
@param[in,out] table table definition from the data dictionary
@return error code
@retval DB_SUCCESS if no error occurred */
dberr_t
btr_cur_instant_init(dict_table_t* table)
dberr_t btr_cur_instant_init(dict_table_t *table)
{
mtr_t mtr;
dict_index_t* index = dict_table_get_first_index(table);
mtr.start();
dberr_t err = index
? btr_cur_instant_init_low(index, &mtr)
: DB_CORRUPTION;
mtr.commit();
return(err);
mtr_t mtr;
dict_index_t *index= dict_table_get_first_index(table);
mtr.start();
dberr_t err = index ? btr_cur_instant_init_low(index, &mtr) : DB_CORRUPTION;
mtr.commit();
if (err == DB_SUCCESS && index->is_gen_clust())
{
btr_cur_t cur;
mtr.start();
err= cur.open_leaf(false, index, BTR_SEARCH_LEAF, &mtr);
if (err != DB_SUCCESS);
else if (const rec_t *rec= page_rec_get_prev(btr_cur_get_rec(&cur)))
if (page_rec_is_user_rec(rec))
table->row_id= mach_read_from_6(rec);
mtr.commit();
}
return(err);
}
/** Initialize the n_core_null_bytes on first access to a clustered

View file

@ -93,18 +93,6 @@ dict_hdr_get_new_id(
mtr.commit();
}
/** Update dict_sys.row_id in the dictionary header file page. */
void dict_hdr_flush_row_id(row_id_t id)
{
mtr_t mtr;
mtr.start();
buf_block_t* d= dict_hdr_get(&mtr);
byte *row_id= DICT_HDR + DICT_HDR_ROW_ID + d->page.frame;
if (mach_read_from_8(row_id) < id)
mtr.write<8>(*d, row_id, id);
mtr.commit();
}
/** Create the DICT_HDR page on database initialization.
@return error code */
dberr_t dict_create()
@ -126,10 +114,8 @@ dberr_t dict_create()
}
ut_a(d->page.id() == hdr_page_id);
/* Start counting row, table, index, and tree ids from
/* Start counting table, index, and tree ids from
DICT_HDR_FIRST_ID */
mtr.write<8>(*d, DICT_HDR + DICT_HDR_ROW_ID + d->page.frame,
DICT_HDR_FIRST_ID);
mtr.write<8>(*d, DICT_HDR + DICT_HDR_TABLE_ID + d->page.frame,
DICT_HDR_FIRST_ID);
mtr.write<8>(*d, DICT_HDR + DICT_HDR_INDEX_ID + d->page.frame,
@ -245,17 +231,6 @@ dberr_t dict_boot()
const byte* dict_hdr = &d->page.frame[DICT_HDR];
/* Because we only write new row ids to disk-based data structure
(dictionary header) when it is divisible by
DICT_HDR_ROW_ID_WRITE_MARGIN, in recovery we will not recover
the latest value of the row id counter. Therefore we advance
the counter at the database startup to avoid overlapping values.
Note that when a user after database startup first time asks for
a new row id, then because the counter is now divisible by
..._MARGIN, it will immediately be updated to the disk-based
header. */
dict_sys.recover_row_id(mach_read_from_8(dict_hdr + DICT_HDR_ROW_ID));
if (uint32_t max_space_id
= mach_read_from_4(dict_hdr + DICT_HDR_MAX_SPACE_ID)) {
max_space_id--;

View file

@ -1180,6 +1180,7 @@ inline void dict_sys_t::add(dict_table_t* table)
ulint fold = my_crc32c(0, table->name.m_name,
strlen(table->name.m_name));
table->row_id = 0;
table->autoinc_mutex.init();
table->lock_mutex_init();

View file

@ -2471,9 +2471,7 @@ corrupted:
goto corrupted;
}
if (table->supports_instant()) {
err = btr_cur_instant_init(table);
}
err = btr_cur_instant_init(table);
}
} else {
ut_ad(ignore_err & DICT_ERR_IGNORE_INDEX);

View file

@ -10219,6 +10219,7 @@ commit_try_rebuild(
/* We must be still holding a table handle. */
DBUG_ASSERT(user_table->get_ref_count() == 1);
rebuilt_table->row_id = uint64_t{user_table->row_id};
DBUG_EXECUTE_IF("ib_rebuild_cannot_rename", error = DB_ERROR;);
switch (error) {

View file

@ -44,39 +44,6 @@ dict_hdr_get_new_id(
(not assigned if NULL) */
uint32_t* space_id); /*!< out: space id
(not assigned if NULL) */
/** Update dict_sys.row_id in the dictionary header file page. */
void dict_hdr_flush_row_id(row_id_t id);
/** @return A new value for GEN_CLUST_INDEX(DB_ROW_ID) */
inline row_id_t dict_sys_t::get_new_row_id()
{
row_id_t id= row_id.fetch_add(1);
if (!(id % ROW_ID_WRITE_MARGIN))
dict_hdr_flush_row_id(id);
return id;
}
/** Ensure that row_id is not smaller than id, on IMPORT TABLESPACE */
inline void dict_sys_t::update_row_id(row_id_t id)
{
row_id_t sys_id= row_id;
while (id >= sys_id)
{
if (!row_id.compare_exchange_strong(sys_id, id))
continue;
if (!(id % ROW_ID_WRITE_MARGIN))
dict_hdr_flush_row_id(id);
break;
}
}
/**********************************************************************//**
Writes a row id to a record or other 6-byte stored form. */
inline void dict_sys_write_row_id(byte *field, row_id_t row_id)
{
static_assert(DATA_ROW_ID_LEN == 6, "compatibility");
mach_write_to_6(field, row_id);
}
/*****************************************************************//**
Initializes the data dictionary memory structures when the database is
started. This function is also called when the data dictionary is created.
@ -116,7 +83,7 @@ inline bool dict_is_sys_table(table_id_t id) { return id < DICT_HDR_FIRST_ID; }
/*-------------------------------------------------------------*/
/* Dictionary header offsets */
#define DICT_HDR_ROW_ID 0 /* The latest assigned row id */
//#define DICT_HDR_ROW_ID 0 /* Was: latest assigned DB_ROW_ID */
#define DICT_HDR_TABLE_ID 8 /* The latest assigned table id */
#define DICT_HDR_INDEX_ID 16 /* The latest assigned index id */
#define DICT_HDR_MAX_SPACE_ID 24 /* The latest assigned space id,or 0*/

View file

@ -648,7 +648,7 @@ dict_table_get_all_fts_indexes(
/********************************************************************//**
Gets the number of user-defined non-virtual columns in a table in the
dictionary cache.
@return number of user-defined (e.g., not ROW_ID) non-virtual
@return number of user-defined (e.g., not DB_ROW_ID) non-virtual
columns of a table */
UNIV_INLINE
unsigned
@ -1370,27 +1370,10 @@ private:
std::atomic<table_id_t> temp_table_id{DICT_HDR_FIRST_ID};
/** hash table of temporary table IDs */
hash_table_t temp_id_hash;
/** the next value of DB_ROW_ID, backed by DICT_HDR_ROW_ID
(FIXME: remove this, and move to dict_table_t) */
Atomic_relaxed<row_id_t> row_id;
/** The synchronization interval of row_id */
static constexpr size_t ROW_ID_WRITE_MARGIN= 256;
public:
/** Diagnostic message for exceeding the lock_wait() timeout */
static const char fatal_msg[];
/** @return A new value for GEN_CLUST_INDEX(DB_ROW_ID) */
inline row_id_t get_new_row_id();
/** Ensure that row_id is not smaller than id, on IMPORT TABLESPACE */
inline void update_row_id(row_id_t id);
/** Recover the global DB_ROW_ID sequence on database startup */
void recover_row_id(row_id_t id)
{
row_id= ut_uint64_align_up(id, ROW_ID_WRITE_MARGIN) + ROW_ID_WRITE_MARGIN;
}
/** @return a new temporary table ID */
table_id_t acquire_temporary_table_id()
{

View file

@ -244,7 +244,7 @@ dict_table_get_next_index(
/********************************************************************//**
Gets the number of user-defined non-virtual columns in a table in the
dictionary cache.
@return number of user-defined (e.g., not ROW_ID) non-virtual
@return number of user-defined (e.g., not DB_ROW_ID) non-virtual
columns of a table */
UNIV_INLINE
unsigned

View file

@ -2347,6 +2347,8 @@ private:
Atomic_relaxed<pthread_t> lock_mutex_owner{0};
#endif
public:
/** The next DB_ROW_ID value */
Atomic_counter<uint64_t> row_id{0};
/** Autoinc counter value to give to the next inserted row. */
uint64_t autoinc;

View file

@ -2109,8 +2109,9 @@ row_import_cleanup(
row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from handler */
dberr_t err) /*!< in: error code */
{
dict_table_t* table = prebuilt->table;
if (err != DB_SUCCESS) {
dict_table_t* table = prebuilt->table;
table->file_unreadable = true;
if (table->space) {
fil_close_tablespace(table->space_id);
@ -2141,7 +2142,25 @@ row_import_cleanup(
DBUG_EXECUTE_IF("ib_import_before_checkpoint_crash", DBUG_SUICIDE(););
return(err);
if (err != DB_SUCCESS
|| !dict_table_get_first_index(table)->is_gen_clust()) {
return err;
}
btr_cur_t cur;
mtr_t mtr;
mtr.start();
err = cur.open_leaf(false, dict_table_get_first_index(table),
BTR_SEARCH_LEAF, &mtr);
if (err != DB_SUCCESS) {
} else if (const rec_t *rec =
page_rec_get_prev(btr_cur_get_rec(&cur))) {
if (page_rec_is_user_rec(rec))
table->row_id= mach_read_from_6(rec);
}
mtr.commit();
return err;
}
/*****************************************************************//**
@ -2276,55 +2295,6 @@ row_import_adjust_root_pages_of_secondary_indexes(
return(err);
}
/*****************************************************************//**
Ensure that dict_sys.row_id exceeds SELECT MAX(DB_ROW_ID). */
MY_ATTRIBUTE((nonnull)) static
void
row_import_set_sys_max_row_id(
/*==========================*/
row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from
handler */
const dict_table_t* table) /*!< in: table to import */
{
const rec_t* rec;
mtr_t mtr;
btr_pcur_t pcur;
row_id_t row_id = 0;
dict_index_t* index;
index = dict_table_get_first_index(table);
ut_ad(index->is_primary());
ut_ad(dict_index_is_auto_gen_clust(index));
mtr_start(&mtr);
mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
if (pcur.open_leaf(false, index, BTR_SEARCH_LEAF, &mtr)
== DB_SUCCESS) {
rec = btr_pcur_move_to_prev_on_page(&pcur);
if (!rec) {
/* The table is corrupted. */
} else if (page_rec_is_infimum(rec)) {
/* The table is empty. */
} else if (rec_is_metadata(rec, *index)) {
/* The clustered index contains the metadata
record only, that is, the table is empty. */
} else {
row_id = mach_read_from_6(rec);
}
}
mtr_commit(&mtr);
if (row_id) {
/* Update the system row id if the imported index row id is
greater than the max system row id. */
dict_sys.update_row_id(row_id);
}
}
/*****************************************************************//**
Read the a string from the meta data file.
@return DB_SUCCESS or error code. */
@ -4510,13 +4480,6 @@ row_import_for_mysql(
return row_import_error(prebuilt, err);
}
/* Ensure that the next available DB_ROW_ID is not smaller than
any DB_ROW_ID stored in the table. */
if (prebuilt->clust_index_was_generated) {
row_import_set_sys_max_row_id(prebuilt, table);
}
ib::info() << "Phase III - Flush changes to disk";
/* Ensure that all pages dirtied during the IMPORT make it to disk.

View file

@ -3525,19 +3525,6 @@ row_ins_index_entry_step(
DBUG_RETURN(err);
}
/***********************************************************//**
Allocates a row id for row and inits the node->index field. */
UNIV_INLINE
void
row_ins_alloc_row_id_step(
/*======================*/
ins_node_t* node) /*!< in: row insert node */
{
ut_ad(node->state == INS_NODE_ALLOC_ROW_ID);
if (dict_table_get_first_index(node->table)->is_gen_clust())
dict_sys_write_row_id(node->sys_buf, dict_sys.get_new_row_id());
}
/***********************************************************//**
Gets a row to insert from the values list. */
UNIV_INLINE
@ -3618,13 +3605,18 @@ row_ins(
DBUG_PRINT("row_ins", ("table: %s", node->table->name.m_name));
if (node->state == INS_NODE_ALLOC_ROW_ID) {
row_ins_alloc_row_id_step(node);
node->index = dict_table_get_first_index(node->table);
ut_ad(node->entry_list.empty() == false);
node->entry = node->entry_list.begin();
if (node->index->is_gen_clust()) {
const uint64_t db_row_id{++node->table->row_id};
if (db_row_id >> 48) {
DBUG_RETURN(DB_OUT_OF_FILE_SPACE);
}
mach_write_to_6(node->sys_buf, db_row_id);
}
if (node->ins_type == INS_SEARCHED) {
row_ins_get_row_from_select(node);