mirror of
				https://github.com/MariaDB/server.git
				synced 2025-10-30 10:26:12 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			464 lines
		
	
	
	
		
			16 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			464 lines
		
	
	
	
		
			16 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*****************************************************************************
 | |
| 
 | |
| Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved.
 | |
| Copyright (c) 2015, 2020, MariaDB Corporation.
 | |
| 
 | |
| This program is free software; you can redistribute it and/or modify it under
 | |
| the terms of the GNU General Public License as published by the Free Software
 | |
| Foundation; version 2 of the License.
 | |
| 
 | |
| This program is distributed in the hope that it will be useful, but WITHOUT
 | |
| ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 | |
| FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 | |
| 
 | |
| You should have received a copy of the GNU General Public License along with
 | |
| this program; if not, write to the Free Software Foundation, Inc.,
 | |
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
 | |
| 
 | |
| *****************************************************************************/
 | |
| 
 | |
| /**************************************************//**
 | |
| @file include/row0merge.h
 | |
| Index build routines using a merge sort
 | |
| 
 | |
| Created 13/06/2005 Jan Lindstrom
 | |
| *******************************************************/
 | |
| 
 | |
| #ifndef row0merge_h
 | |
| #define row0merge_h
 | |
| 
 | |
| #include "que0types.h"
 | |
| #include "trx0types.h"
 | |
| #include "mtr0mtr.h"
 | |
| #include "rem0types.h"
 | |
| #include "rem0rec.h"
 | |
| #include "btr0types.h"
 | |
| #include "row0mysql.h"
 | |
| #include "lock0types.h"
 | |
| #include "srv0srv.h"
 | |
| #include "ut0stage.h"
 | |
| 
 | |
| /* Reserve free space from every block for key_version */
 | |
| #define ROW_MERGE_RESERVE_SIZE 4
 | |
| 
 | |
| /* Cluster index read task is mandatory */
 | |
| #define COST_READ_CLUSTERED_INDEX            1.0
 | |
| 
 | |
| /* Basic fixed cost to build all type of index */
 | |
| #define COST_BUILD_INDEX_STATIC              0.5
 | |
| /* Dynamic cost to build all type of index, dynamic cost will be re-distributed based on page count ratio of each index */
 | |
| #define COST_BUILD_INDEX_DYNAMIC             0.5
 | |
| 
 | |
| /* Sum of below two must be 1.0 */
 | |
| #define PCT_COST_MERGESORT_INDEX                 0.4
 | |
| #define PCT_COST_INSERT_INDEX                    0.6
 | |
| 
 | |
| // Forward declaration
 | |
| struct ib_sequence_t;
 | |
| 
 | |
| /** @brief Block size for I/O operations in merge sort.
 | |
| 
 | |
| The minimum is srv_page_size, or page_get_free_space_of_empty()
 | |
| rounded to a power of 2.
 | |
| 
 | |
| When not creating a PRIMARY KEY that contains column prefixes, this
 | |
| can be set as small as srv_page_size / 2. */
 | |
| typedef byte	row_merge_block_t;
 | |
| 
 | |
| /** @brief Secondary buffer for I/O operations of merge records.
 | |
| 
 | |
| This buffer is used for writing or reading a record that spans two
 | |
| row_merge_block_t.  Thus, it must be able to hold one merge record,
 | |
| whose maximum size is the same as the minimum size of
 | |
| row_merge_block_t. */
 | |
| typedef byte	mrec_buf_t[UNIV_PAGE_SIZE_MAX];
 | |
| 
 | |
| /** @brief Merge record in row_merge_block_t.
 | |
| 
 | |
| The format is the same as a record in ROW_FORMAT=COMPACT with the
 | |
| exception that the REC_N_NEW_EXTRA_BYTES are omitted. */
 | |
| typedef byte	mrec_t;
 | |
| 
 | |
| /** Merge record in row_merge_buf_t */
 | |
| struct mtuple_t {
 | |
| 	dfield_t*	fields;		/*!< data fields */
 | |
| };
 | |
| 
 | |
| /** Buffer for sorting in main memory. */
 | |
| struct row_merge_buf_t {
 | |
| 	mem_heap_t*	heap;		/*!< memory heap where allocated */
 | |
| 	dict_index_t*	index;		/*!< the index the tuples belong to */
 | |
| 	ulint		total_size;	/*!< total amount of data bytes */
 | |
| 	ulint		n_tuples;	/*!< number of data tuples */
 | |
| 	ulint		max_tuples;	/*!< maximum number of data tuples */
 | |
| 	mtuple_t*	tuples;		/*!< array of data tuples */
 | |
| 	mtuple_t*	tmp_tuples;	/*!< temporary copy of tuples,
 | |
| 					for sorting */
 | |
| };
 | |
| 
 | |
| /** Information about temporary files used in merge sort */
 | |
| struct merge_file_t {
 | |
| 	pfs_os_file_t	fd;		/*!< file descriptor */
 | |
| 	ulint		offset;		/*!< file offset (end of file) */
 | |
| 	ib_uint64_t	n_rec;		/*!< number of records in the file */
 | |
| };
 | |
| 
 | |
| /** Index field definition */
 | |
| struct index_field_t {
 | |
| 	ulint		col_no;		/*!< column offset */
 | |
| 	ulint		prefix_len;	/*!< column prefix length, or 0
 | |
| 					if indexing the whole column */
 | |
| 	bool		is_v_col;	/*!< whether this is a virtual column */
 | |
| };
 | |
| 
 | |
| /** Definition of an index being created */
 | |
| struct index_def_t {
 | |
| 	const char*	name;		/*!< index name */
 | |
| 	bool		rebuild;	/*!< whether the table is rebuilt */
 | |
| 	ulint		ind_type;	/*!< 0, DICT_UNIQUE,
 | |
| 					or DICT_CLUSTERED */
 | |
| 	ulint		key_number;	/*!< MySQL key number,
 | |
| 					or ULINT_UNDEFINED if none */
 | |
| 	ulint		n_fields;	/*!< number of fields in index */
 | |
| 	index_field_t*	fields;		/*!< field definitions */
 | |
| 	st_mysql_ftparser*
 | |
| 			parser;		/*!< fulltext parser plugin */
 | |
| };
 | |
| 
 | |
| /** Structure for reporting duplicate records. */
 | |
| struct row_merge_dup_t {
 | |
| 	dict_index_t*		index;	/*!< index being sorted */
 | |
| 	struct TABLE*		table;	/*!< MySQL table object */
 | |
| 	const ulint*		col_map;/*!< mapping of column numbers
 | |
| 					in table to the rebuilt table
 | |
| 					(index->table), or NULL if not
 | |
| 					rebuilding table */
 | |
| 	ulint			n_dup;	/*!< number of duplicates */
 | |
| };
 | |
| 
 | |
| /*************************************************************//**
 | |
| Report a duplicate key. */
 | |
| void
 | |
| row_merge_dup_report(
 | |
| /*=================*/
 | |
| 	row_merge_dup_t*	dup,	/*!< in/out: for reporting duplicates */
 | |
| 	const dfield_t*		entry)	/*!< in: duplicate index entry */
 | |
| 	MY_ATTRIBUTE((nonnull));
 | |
| 
 | |
| /*********************************************************************//**
 | |
| Sets an exclusive lock on a table, for the duration of creating indexes.
 | |
| @return error code or DB_SUCCESS */
 | |
| dberr_t
 | |
| row_merge_lock_table(
 | |
| /*=================*/
 | |
| 	trx_t*		trx,		/*!< in/out: transaction */
 | |
| 	dict_table_t*	table,		/*!< in: table to lock */
 | |
| 	enum lock_mode	mode)		/*!< in: LOCK_X or LOCK_S */
 | |
| 	MY_ATTRIBUTE((nonnull(1,2), warn_unused_result));
 | |
| 
 | |
| /*********************************************************************//**
 | |
| Drop indexes that were created before an error occurred.
 | |
| The data dictionary must have been locked exclusively by the caller,
 | |
| because the transaction will not be committed. */
 | |
| void
 | |
| row_merge_drop_indexes_dict(
 | |
| /*========================*/
 | |
| 	trx_t*		trx,	/*!< in/out: dictionary transaction */
 | |
| 	table_id_t	table_id)/*!< in: table identifier */
 | |
| 	MY_ATTRIBUTE((nonnull));
 | |
| 
 | |
| /** Drop indexes that were created before an error occurred.
 | |
| The data dictionary must have been locked exclusively by the caller,
 | |
| because the transaction will not be committed.
 | |
| @param trx              dictionary transaction
 | |
| @param table            table containing the indexes
 | |
| @param locked           True if table is locked,
 | |
|                         false - may need to do lazy drop
 | |
| @param alter_trx        Alter table transaction */
 | |
| void
 | |
| row_merge_drop_indexes(
 | |
|         trx_t*          trx,
 | |
|         dict_table_t*   table,
 | |
|         bool            locked,
 | |
|         const trx_t*    alter_trx=NULL);
 | |
| 
 | |
| /*********************************************************************//**
 | |
| Drop all partially created indexes during crash recovery. */
 | |
| void
 | |
| row_merge_drop_temp_indexes(void);
 | |
| /*=============================*/
 | |
| 
 | |
| /** Create temporary merge files in the given paramater path, and if
 | |
| UNIV_PFS_IO defined, register the file descriptor with Performance Schema.
 | |
| @param[in]	path	location for creating temporary merge files, or NULL
 | |
| @return File descriptor */
 | |
| pfs_os_file_t
 | |
| row_merge_file_create_low(
 | |
| 	const char*	path)
 | |
| 	MY_ATTRIBUTE((warn_unused_result));
 | |
| /*********************************************************************//**
 | |
| Destroy a merge file. And de-register the file from Performance Schema
 | |
| if UNIV_PFS_IO is defined. */
 | |
| void
 | |
| row_merge_file_destroy_low(
 | |
| /*=======================*/
 | |
| 	const pfs_os_file_t&	fd);	/*!< in: merge file descriptor */
 | |
| 
 | |
| /*********************************************************************//**
 | |
| Rename an index in the dictionary that was created. The data
 | |
| dictionary must have been locked exclusively by the caller, because
 | |
| the transaction will not be committed.
 | |
| @return DB_SUCCESS if all OK */
 | |
| dberr_t
 | |
| row_merge_rename_index_to_add(
 | |
| /*==========================*/
 | |
| 	trx_t*		trx,		/*!< in/out: transaction */
 | |
| 	table_id_t	table_id,	/*!< in: table identifier */
 | |
| 	index_id_t	index_id)	/*!< in: index identifier */
 | |
| 	MY_ATTRIBUTE((nonnull(1), warn_unused_result));
 | |
| 
 | |
| /*********************************************************************//**
 | |
| Rename an index in the dictionary that is to be dropped. The data
 | |
| dictionary must have been locked exclusively by the caller, because
 | |
| the transaction will not be committed.
 | |
| @return DB_SUCCESS if all OK */
 | |
| dberr_t
 | |
| row_merge_rename_index_to_drop(
 | |
| /*===========================*/
 | |
| 	trx_t*		trx,		/*!< in/out: transaction */
 | |
| 	table_id_t	table_id,	/*!< in: table identifier */
 | |
| 	index_id_t	index_id)	/*!< in: index identifier */
 | |
| 	MY_ATTRIBUTE((nonnull(1), warn_unused_result));
 | |
| 
 | |
| /** Create the index and load in to the dictionary.
 | |
| @param[in,out]	table		the index is on this table
 | |
| @param[in]	index_def	the index definition
 | |
| @param[in]	add_v		new virtual columns added along with add
 | |
| 				index call
 | |
| @return index, or NULL on error */
 | |
| dict_index_t*
 | |
| row_merge_create_index(
 | |
| 	dict_table_t*		table,
 | |
| 	const index_def_t*	index_def,
 | |
| 	const dict_add_v_col_t*	add_v)
 | |
| 	MY_ATTRIBUTE((warn_unused_result));
 | |
| 
 | |
| /*********************************************************************//**
 | |
| Check if a transaction can use an index.
 | |
| @return whether the index can be used by the transaction */
 | |
| bool
 | |
| row_merge_is_index_usable(
 | |
| /*======================*/
 | |
| 	const trx_t*		trx,	/*!< in: transaction */
 | |
| 	const dict_index_t*	index)	/*!< in: index to check */
 | |
| 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 | |
| 
 | |
| /*********************************************************************//**
 | |
| Drop a table. The caller must have ensured that the background stats
 | |
| thread is not processing the table. This can be done by calling
 | |
| dict_stats_wait_bg_to_stop_using_table() after locking the dictionary and
 | |
| before calling this function.
 | |
| @return DB_SUCCESS or error code */
 | |
| dberr_t
 | |
| row_merge_drop_table(
 | |
| /*=================*/
 | |
| 	trx_t*		trx,		/*!< in: transaction */
 | |
| 	dict_table_t*	table)		/*!< in: table instance to drop */
 | |
| 	MY_ATTRIBUTE((nonnull, warn_unused_result));
 | |
| 
 | |
| /** Build indexes on a table by reading a clustered index, creating a temporary
 | |
| file containing index entries, merge sorting these index entries and inserting
 | |
| sorted index entries to indexes.
 | |
| @param[in]	trx		transaction
 | |
| @param[in]	old_table	table where rows are read from
 | |
| @param[in]	new_table	table where indexes are created; identical to
 | |
| old_table unless creating a PRIMARY KEY
 | |
| @param[in]	online		true if creating indexes online
 | |
| @param[in]	indexes		indexes to be created
 | |
| @param[in]	key_numbers	MySQL key numbers
 | |
| @param[in]	n_indexes	size of indexes[]
 | |
| @param[in,out]	table		MySQL table, for reporting erroneous key value
 | |
| if applicable
 | |
| @param[in]	defaults	default values of added, changed columns, or NULL
 | |
| @param[in]	col_map		mapping of old column numbers to new ones, or
 | |
| NULL if old_table == new_table
 | |
| @param[in]	add_autoinc	number of added AUTO_INCREMENT columns, or
 | |
| ULINT_UNDEFINED if none is added
 | |
| @param[in,out]	sequence	autoinc sequence
 | |
| @param[in]	skip_pk_sort	whether the new PRIMARY KEY will follow
 | |
| existing order
 | |
| @param[in,out]	stage		performance schema accounting object, used by
 | |
| ALTER TABLE. stage->begin_phase_read_pk() will be called at the beginning of
 | |
| this function and it will be passed to other functions for further accounting.
 | |
| @param[in]	add_v		new virtual columns added along with indexes
 | |
| @param[in]	eval_table	mysql table used to evaluate virtual column
 | |
| 				value, see innobase_get_computed_value().
 | |
| @param[in]	allow_non_null	allow the conversion from null to not-null
 | |
| @return DB_SUCCESS or error code */
 | |
| dberr_t
 | |
| row_merge_build_indexes(
 | |
| 	trx_t*			trx,
 | |
| 	dict_table_t*		old_table,
 | |
| 	dict_table_t*		new_table,
 | |
| 	bool			online,
 | |
| 	dict_index_t**		indexes,
 | |
| 	const ulint*		key_numbers,
 | |
| 	ulint			n_indexes,
 | |
| 	struct TABLE*		table,
 | |
| 	const dtuple_t*		defaults,
 | |
| 	const ulint*		col_map,
 | |
| 	ulint			add_autoinc,
 | |
| 	ib_sequence_t&		sequence,
 | |
| 	bool			skip_pk_sort,
 | |
| 	ut_stage_alter_t*	stage,
 | |
| 	const dict_add_v_col_t*	add_v,
 | |
| 	struct TABLE*		eval_table,
 | |
| 	bool			allow_non_null)
 | |
| 	MY_ATTRIBUTE((warn_unused_result));
 | |
| 
 | |
| /********************************************************************//**
 | |
| Write a buffer to a block. */
 | |
| void
 | |
| row_merge_buf_write(
 | |
| /*================*/
 | |
| 	const row_merge_buf_t*	buf,	/*!< in: sorted buffer */
 | |
| 	const merge_file_t*	of,	/*!< in: output file */
 | |
| 	row_merge_block_t*	block)	/*!< out: buffer for writing to file */
 | |
| 	MY_ATTRIBUTE((nonnull));
 | |
| 
 | |
| /********************************************************************//**
 | |
| Sort a buffer. */
 | |
| void
 | |
| row_merge_buf_sort(
 | |
| /*===============*/
 | |
| 	row_merge_buf_t*	buf,	/*!< in/out: sort buffer */
 | |
| 	row_merge_dup_t*	dup)	/*!< in/out: reporter of duplicates
 | |
| 					(NULL if non-unique index) */
 | |
| 	MY_ATTRIBUTE((nonnull(1)));
 | |
| 
 | |
| /********************************************************************//**
 | |
| Write a merge block to the file system.
 | |
| @return whether the request was completed successfully
 | |
| @retval	false	on error
 | |
| @retval	true	on success */
 | |
| UNIV_INTERN
 | |
| bool
 | |
| row_merge_write(
 | |
| /*============*/
 | |
| 	const pfs_os_file_t&	fd,	/*!< in: file descriptor */
 | |
| 	ulint		offset,	/*!< in: offset where to write,
 | |
| 				in number of row_merge_block_t elements */
 | |
| 	const void*	buf,	/*!< in: data */
 | |
| 	void*		crypt_buf,		/*!< in: crypt buf or NULL */
 | |
| 	ulint		space)			/*!< in: space id */
 | |
| 	MY_ATTRIBUTE((warn_unused_result));
 | |
| 
 | |
| /********************************************************************//**
 | |
| Empty a sort buffer.
 | |
| @return sort buffer */
 | |
| row_merge_buf_t*
 | |
| row_merge_buf_empty(
 | |
| /*================*/
 | |
| 	row_merge_buf_t*	buf)	/*!< in,own: sort buffer */
 | |
| 	MY_ATTRIBUTE((warn_unused_result, nonnull));
 | |
| 
 | |
| /** Create a merge file in the given location.
 | |
| @param[out]	merge_file	merge file structure
 | |
| @param[in]	path		location for creating temporary file, or NULL
 | |
| @return file descriptor, or -1 on failure */
 | |
| pfs_os_file_t
 | |
| row_merge_file_create(
 | |
| 	merge_file_t*	merge_file,
 | |
| 	const char*	path)
 | |
| 	MY_ATTRIBUTE((warn_unused_result, nonnull(1)));
 | |
| 
 | |
| /** Merge disk files.
 | |
| @param[in]	trx	transaction
 | |
| @param[in]	dup	descriptor of index being created
 | |
| @param[in,out]	file	file containing index entries
 | |
| @param[in,out]	block	3 buffers
 | |
| @param[in,out]	tmpfd	temporary file handle
 | |
| @param[in]      update_progress true, if we should update progress status
 | |
| @param[in]      pct_progress total progress percent until now
 | |
| @param[in]      pct_ocst current progress percent
 | |
| @param[in]      crypt_block crypt buf or NULL
 | |
| @param[in]      space    space_id
 | |
| @param[in,out]	stage	performance schema accounting object, used by
 | |
| ALTER TABLE. If not NULL, stage->begin_phase_sort() will be called initially
 | |
| and then stage->inc() will be called for each record processed.
 | |
| @return DB_SUCCESS or error code */
 | |
| dberr_t
 | |
| row_merge_sort(
 | |
| /*===========*/
 | |
| 	trx_t*			trx,
 | |
| 	const row_merge_dup_t*	dup,
 | |
| 	merge_file_t*		file,
 | |
| 	row_merge_block_t*	block,
 | |
| 	pfs_os_file_t*		tmpfd,
 | |
| 	const bool		update_progress,
 | |
| 	const double	pct_progress,
 | |
| 	const double	pct_cost,
 | |
| 	row_merge_block_t*	crypt_block,
 | |
| 	ulint			space,
 | |
| 	ut_stage_alter_t*	stage = NULL)
 | |
| 	MY_ATTRIBUTE((warn_unused_result));
 | |
| 
 | |
| /*********************************************************************//**
 | |
| Allocate a sort buffer.
 | |
| @return own: sort buffer */
 | |
| row_merge_buf_t*
 | |
| row_merge_buf_create(
 | |
| /*=================*/
 | |
| 	dict_index_t*	index)	/*!< in: secondary index */
 | |
| 	MY_ATTRIBUTE((warn_unused_result, nonnull, malloc));
 | |
| 
 | |
| /*********************************************************************//**
 | |
| Deallocate a sort buffer. */
 | |
| void
 | |
| row_merge_buf_free(
 | |
| /*===============*/
 | |
| 	row_merge_buf_t*	buf)	/*!< in,own: sort buffer to be freed */
 | |
| 	MY_ATTRIBUTE((nonnull));
 | |
| 
 | |
| /*********************************************************************//**
 | |
| Destroy a merge file. */
 | |
| void
 | |
| row_merge_file_destroy(
 | |
| /*===================*/
 | |
| 	merge_file_t*	merge_file)	/*!< in/out: merge file structure */
 | |
| 	MY_ATTRIBUTE((nonnull));
 | |
| 
 | |
| /** Read a merge block from the file system.
 | |
| @return whether the request was completed successfully */
 | |
| bool
 | |
| row_merge_read(
 | |
| /*===========*/
 | |
| 	const pfs_os_file_t&	fd,	/*!< in: file descriptor */
 | |
| 	ulint			offset,	/*!< in: offset where to read
 | |
| 					in number of row_merge_block_t
 | |
| 					elements */
 | |
| 	row_merge_block_t*	buf,	/*!< out: data */
 | |
| 	row_merge_block_t*	crypt_buf, /*!< in: crypt buf or NULL */
 | |
| 	ulint			space)	   /*!< in: space id */
 | |
| 	MY_ATTRIBUTE((warn_unused_result));
 | |
| 
 | |
| /********************************************************************//**
 | |
| Read a merge record.
 | |
| @return pointer to next record, or NULL on I/O error or end of list */
 | |
| const byte*
 | |
| row_merge_read_rec(
 | |
| /*===============*/
 | |
| 	row_merge_block_t*	block,	/*!< in/out: file buffer */
 | |
| 	mrec_buf_t*		buf,	/*!< in/out: secondary buffer */
 | |
| 	const byte*		b,	/*!< in: pointer to record */
 | |
| 	const dict_index_t*	index,	/*!< in: index of the record */
 | |
| 	const pfs_os_file_t&	fd,	/*!< in: file descriptor */
 | |
| 	ulint*			foffs,	/*!< in/out: file offset */
 | |
| 	const mrec_t**		mrec,	/*!< out: pointer to merge record,
 | |
| 					or NULL on end of list
 | |
| 					(non-NULL on I/O error) */
 | |
| 	rec_offs*		offsets,/*!< out: offsets of mrec */
 | |
| 	row_merge_block_t*	crypt_block, /*!< in: crypt buf or NULL */
 | |
| 	ulint			space)	   /*!< in: space id */
 | |
| 	MY_ATTRIBUTE((warn_unused_result));
 | |
| #endif /* row0merge.h */
 | 
