mariadb/storage/innobase/include/row0merge.h

/*****************************************************************************

Copyright (c) 2005, 2010, Oracle and/or its affiliates. All Rights Reserved.

This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA

*****************************************************************************/

/**************************************************//**
@file include/row0merge.h
Index build routines using a merge sort

Created 13/06/2005 Jan Lindstrom
*******************************************************/

#ifndef row0merge_h
#define row0merge_h

#include "univ.i"
#include "data0data.h"
#include "dict0types.h"
#include "trx0types.h"
#include "que0types.h"
#include "mtr0mtr.h"
#include "rem0types.h"
#include "rem0rec.h"
#include "read0types.h"
#include "btr0types.h"
#include "row0mysql.h"
#include "lock0types.h"
#include "srv0srv.h"

/** @brief Block size for I/O operations in merge sort.

The minimum is UNIV_PAGE_SIZE, or page_get_free_space_of_empty()
rounded to a power of 2.

When not creating a PRIMARY KEY that contains column prefixes, this
can be set as small as UNIV_PAGE_SIZE / 2.  See the comment above
ut_ad(data_size < sizeof(row_merge_block_t)). */
typedef byte   row_merge_block_t;

/** @brief Secondary buffer for I/O operations of merge records.

This buffer is used for writing or reading a record that spans two
row_merge_block_t.  Thus, it must be able to hold one merge record,
whose maximum size is the same as the minimum size of
row_merge_block_t. */
typedef byte	mrec_buf_t[UNIV_PAGE_SIZE_MAX];

/** @brief Merge record in row_merge_block_t.

The format is the same as a record in ROW_FORMAT=COMPACT with the
exception that the REC_N_NEW_EXTRA_BYTES are omitted. */
typedef byte	mrec_t;

/** Buffer for sorting in main memory. */
struct row_merge_buf_struct {
	mem_heap_t*	heap;		/*!< memory heap where allocated */
	dict_index_t*	index;		/*!< the index the tuples belong to */
	ulint		total_size;	/*!< total amount of data bytes */
	ulint		n_tuples;	/*!< number of data tuples */
	ulint		max_tuples;	/*!< maximum number of data tuples */
	const dfield_t**tuples;		/*!< array of pointers to
					arrays of fields that form
					the data tuples */
	const dfield_t**tmp_tuples;	/*!< temporary copy of tuples,
					for sorting */
};

/** Buffer for sorting in main memory. */
typedef struct row_merge_buf_struct	row_merge_buf_t;

/** Information about temporary files used in merge sort */
struct merge_file_struct {
	int		fd;		/*!< file descriptor */
	ulint		offset;		/*!< file offset (end of file) */
	ib_uint64_t	n_rec;		/*!< number of records in the file */
};

/** Information about temporary files used in merge sort */
typedef struct merge_file_struct	merge_file_t;

/** Index field definition */
struct merge_index_field_struct {
	ulint		prefix_len;	/*!< column prefix length, or 0
					if indexing the whole column */
	const char*	field_name;	/*!< field name */
};

/** Index field definition */
typedef struct merge_index_field_struct	merge_index_field_t;

/** Definition of an index being created */
struct merge_index_def_struct {
	const char*		name;		/*!< index name */
	ulint			ind_type;	/*!< 0, DICT_UNIQUE,
						or DICT_CLUSTERED */
	ulint			n_fields;	/*!< number of fields
						in index */
	merge_index_field_t*	fields;		/*!< field definitions */
};

/** Definition of an index being created */
typedef struct merge_index_def_struct	merge_index_def_t;

/** Structure for reporting duplicate records. */
struct row_merge_dup_struct {
	const dict_index_t*	index;		/*!< index being sorted */
	struct TABLE*		table;		/*!< MySQL table object */
	ulint			n_dup;		/*!< number of duplicates */
};

/** Structure for reporting duplicate records. */
typedef struct row_merge_dup_struct row_merge_dup_t;

/*********************************************************************//**
Sets an exclusive lock on a table, for the duration of creating indexes.
@return	error code or DB_SUCCESS */
UNIV_INTERN
ulint
row_merge_lock_table(
/*=================*/
	trx_t*		trx,		/*!< in/out: transaction */
	dict_table_t*	table,		/*!< in: table to lock */
	enum lock_mode	mode);		/*!< in: LOCK_X or LOCK_S */
/*********************************************************************//**
Drop an index from the InnoDB system tables.  The data dictionary must
have been locked exclusively by the caller, because the transaction
will not be committed. */
UNIV_INTERN
void
row_merge_drop_index(
/*=================*/
	dict_index_t*	index,	/*!< in: index to be removed */
	dict_table_t*	table,	/*!< in: table */
	trx_t*		trx);	/*!< in: transaction handle */
/*********************************************************************//**
Drop those indexes which were created before an error occurred when
building an index.  The data dictionary must have been locked
exclusively by the caller, because the transaction will not be
committed. */
UNIV_INTERN
void
row_merge_drop_indexes(
/*===================*/
	trx_t*		trx,		/*!< in: transaction */
	dict_table_t*	table,		/*!< in: table containing the indexes */
	dict_index_t**	index,		/*!< in: indexes to drop */
	ulint		num_created);	/*!< in: number of elements in
					index[] */
/*********************************************************************//**
Drop all partially created indexes during crash recovery. */
UNIV_INTERN
void
row_merge_drop_temp_indexes(void);
/*=============================*/
/*********************************************************************//**
Rename the tables in the data dictionary.  The data dictionary must
have been locked exclusively by the caller, because the transaction
will not be committed.
@return	error code or DB_SUCCESS */
UNIV_INTERN
ulint
row_merge_rename_tables(
/*====================*/
	dict_table_t*	old_table,	/*!< in/out: old table, renamed to
					tmp_name */
	dict_table_t*	new_table,	/*!< in/out: new table, renamed to
					old_table->name */
	const char*	tmp_name,	/*!< in: new name for old_table */
	trx_t*		trx);		/*!< in: transaction handle */
/*********************************************************************//**
Create a temporary table for creating a primary key, using the definition
of an existing table.
@return	table, or NULL on error */
UNIV_INTERN
dict_table_t*
row_merge_create_temporary_table(
/*=============================*/
	const char*		table_name,	/*!< in: new table name */
	const merge_index_def_t*index_def,	/*!< in: the index definition
						of the primary key */
	const dict_table_t*	table,		/*!< in: old table definition */
	trx_t*			trx);		/*!< in/out: transaction
						(sets error_state) */
/*********************************************************************//**
Rename the temporary indexes in the dictionary to permanent ones.  The
data dictionary must have been locked exclusively by the caller,
because the transaction will not be committed.
@return	DB_SUCCESS if all OK */
UNIV_INTERN
ulint
row_merge_rename_indexes(
/*=====================*/
	trx_t*		trx,		/*!< in/out: transaction */
	dict_table_t*	table);		/*!< in/out: table with new indexes */
/*********************************************************************//**
Create the index and load in to the dictionary.
@return	index, or NULL on error */
UNIV_INTERN
dict_index_t*
row_merge_create_index(
/*===================*/
	trx_t*			trx,	/*!< in/out: trx (sets error_state) */
	dict_table_t*		table,	/*!< in: the index is on this table */
	const merge_index_def_t*index_def);
					/*!< in: the index definition */
/*********************************************************************//**
Check if a transaction can use an index.
@return	TRUE if index can be used by the transaction else FALSE */
UNIV_INTERN
ibool
row_merge_is_index_usable(
/*======================*/
	const trx_t*		trx,	/*!< in: transaction */
	const dict_index_t*	index);	/*!< in: index to check */
/*********************************************************************//**
If there are views that refer to the old table name then we "attach" to
the new instance of the table else we drop it immediately.
@return	DB_SUCCESS or error code */
UNIV_INTERN
ulint
row_merge_drop_table(
/*=================*/
	trx_t*		trx,		/*!< in: transaction */
	dict_table_t*	table);		/*!< in: table instance to drop */
/*********************************************************************//**
Build indexes on a table by reading a clustered index,
creating a temporary file containing index entries, merge sorting
these index entries and inserting sorted index entries to indexes.
@return	DB_SUCCESS or error code */
UNIV_INTERN
ulint
row_merge_build_indexes(
/*====================*/
	trx_t*		trx,		/*!< in: transaction */
	dict_table_t*	old_table,	/*!< in: table where rows are
					read from */
	dict_table_t*	new_table,	/*!< in: table where indexes are
					created; identical to old_table
					unless creating a PRIMARY KEY */
	dict_index_t**	indexes,	/*!< in: indexes to be created */
	ulint		n_indexes,	/*!< in: size of indexes[] */
	struct TABLE*	table);		/*!< in/out: MySQL table, for
					reporting erroneous key value
					if applicable */
/********************************************************************//**
Write a buffer to a block. */
UNIV_INTERN
void
row_merge_buf_write(
/*================*/
	const row_merge_buf_t*	buf,	/*!< in: sorted buffer */
	const merge_file_t*	of,	/*!< in: output file */
	row_merge_block_t*	block);	/*!< out: buffer for writing to file */
/********************************************************************//**
Sort a buffer. */
UNIV_INTERN
void
row_merge_buf_sort(
/*===============*/
        row_merge_buf_t*        buf,    /*!< in/out: sort buffer */
        row_merge_dup_t*        dup);	/*!< in/out: for reporting duplicates */
/********************************************************************//**
Write a merge block to the file system.
@return TRUE if request was successful, FALSE if fail */
UNIV_INTERN
ibool
row_merge_write(
/*============*/
	int		fd,	/*!< in: file descriptor */
	ulint		offset,	/*!< in: offset where to write,
				in number of row_merge_block_t elements */
	const void*	buf);	/*!< in: data */
/********************************************************************//**
Empty a sort buffer.
@return sort buffer */
UNIV_INTERN
row_merge_buf_t*
row_merge_buf_empty(
/*================*/
        row_merge_buf_t*        buf);    /*!< in,own: sort buffer */
/*********************************************************************//**
Create a merge file. */
UNIV_INTERN
void
row_merge_file_create(
/*==================*/
        merge_file_t*   merge_file);     /*!< out: merge file structure */
/*********************************************************************//**
Merge disk files.
@return DB_SUCCESS or error code */
UNIV_INTERN
ulint
row_merge_sort(
/*===========*/
	trx_t*			trx,	/*!< in: transaction */
	const dict_index_t*	index,	/*!< in: index being created */
	merge_file_t*		file,	/*!< in/out: file containing
					index entries */
	row_merge_block_t*	block,	/*!< in/out: 3 buffers */
	int*			tmpfd,	/*!< in/out: temporary file handle */
	struct TABLE*		table);	/*!< in/out: MySQL table, for
					reporting erroneous key value
					if applicable */
/*********************************************************************//**
Allocate a sort buffer.
@return own: sort buffer */
UNIV_INTERN
row_merge_buf_t*
row_merge_buf_create(
/*=================*/
        dict_index_t*   index);  /*!< in: secondary index */
/*********************************************************************//**
Deallocate a sort buffer. */
UNIV_INTERN
void
row_merge_buf_free(
/*===============*/
	row_merge_buf_t*	buf);    /*!< in,own: sort buffer, to be freed */
/*********************************************************************//**
Destroy a merge file. */
UNIV_INTERN
void
row_merge_file_destroy(
/*===================*/
	merge_file_t*	merge_file);	/*!< out: merge file structure */
/*********************************************************************//**
Compare two merge records.
@return 1, 0, -1 if mrec1 is greater, equal, less, respectively, than mrec2 */
UNIV_INTERN
int
row_merge_cmp(
/*==========*/
	const mrec_t*		mrec1,		/*!< in: first merge
						record to be compared */
	const mrec_t*		mrec2,		/*!< in: second merge
						record to be compared */
	const ulint*		offsets1,	/*!< in: first record offsets */
	const ulint*		offsets2,	/*!< in: second record offsets */
	const dict_index_t*	index,		/*!< in: index */
	ibool*			null_eq);	/*!< out: set to TRUE if
						found matching null values */
/********************************************************************//**
Read a merge block from the file system.
@return TRUE if request was successful, FALSE if fail */
UNIV_INTERN
ibool
row_merge_read(
/*===========*/
	int			fd,	/*!< in: file descriptor */
	ulint			offset,	/*!< in: offset where to read
					in number of row_merge_block_t
					elements */
	row_merge_block_t*	buf);	/*!< out: data */
/********************************************************************//**
Read a merge record.
@return pointer to next record, or NULL on I/O error or end of list */
UNIV_INTERN __attribute__((nonnull))
const byte*
row_merge_read_rec(
/*===============*/
	row_merge_block_t*	block,	/*!< in/out: file buffer */
	mrec_buf_t*		buf,	/*!< in/out: secondary buffer */
	const byte*		b,	/*!< in: pointer to record */
	const dict_index_t*	index,	/*!< in: index of the record */
	int			fd,	/*!< in: file descriptor */
	ulint*			foffs,	/*!< in/out: file offset */
	const mrec_t**		mrec,	/*!< out: pointer to merge record,
					or NULL on end of list
					(non-NULL on I/O error) */
	ulint*			offsets);/*!< out: offsets of mrec */
#endif /* row0merge.h */