mirror of
https://github.com/MariaDB/server.git
synced 2025-01-16 12:02:42 +01:00
df563e0c03
main.derived_cond_pushdown: Move all 10.3 tests to the end, trim trailing white space, and add an "End of 10.3 tests" marker. Add --sorted_result to tests where the ordering is not deterministic. main.win_percentile: Add --sorted_result to tests where the ordering is no longer deterministic.
1514 lines
59 KiB
C++
1514 lines
59 KiB
C++
#ifndef HA_PARTITION_INCLUDED
|
|
#define HA_PARTITION_INCLUDED
|
|
|
|
/*
|
|
Copyright (c) 2005, 2012, Oracle and/or its affiliates.
|
|
Copyright (c) 2009, 2013, Monty Program Ab & SkySQL Ab.
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
|
|
|
|
#include "sql_partition.h" /* part_id_range, partition_element */
|
|
#include "queues.h" /* QUEUE */
|
|
|
|
#define PARTITION_BYTES_IN_POS 2
|
|
|
|
|
|
/** Struct used for partition_name_hash */
|
|
typedef struct st_part_name_def
|
|
{
|
|
uchar *partition_name;
|
|
uint length;
|
|
uint32 part_id;
|
|
my_bool is_subpart;
|
|
} PART_NAME_DEF;
|
|
|
|
/** class where to save partitions Handler_share's */
|
|
class Parts_share_refs
|
|
{
|
|
public:
|
|
uint num_parts; /**< Size of ha_share array */
|
|
Handler_share **ha_shares; /**< Storage for each part */
|
|
Parts_share_refs()
|
|
{
|
|
num_parts= 0;
|
|
ha_shares= NULL;
|
|
}
|
|
~Parts_share_refs()
|
|
{
|
|
uint i;
|
|
for (i= 0; i < num_parts; i++)
|
|
delete ha_shares[i];
|
|
delete[] ha_shares;
|
|
}
|
|
bool init(uint arg_num_parts)
|
|
{
|
|
DBUG_ASSERT(!num_parts && !ha_shares);
|
|
num_parts= arg_num_parts;
|
|
/* Allocate an array of Handler_share pointers */
|
|
ha_shares= new Handler_share *[num_parts];
|
|
if (!ha_shares)
|
|
{
|
|
num_parts= 0;
|
|
return true;
|
|
}
|
|
memset(ha_shares, 0, sizeof(Handler_share*) * num_parts);
|
|
return false;
|
|
}
|
|
};
|
|
|
|
class ha_partition;
|
|
|
|
/* Partition Full Text Search info */
|
|
struct st_partition_ft_info
|
|
{
|
|
struct _ft_vft *please;
|
|
st_partition_ft_info *next;
|
|
ha_partition *file;
|
|
FT_INFO **part_ft_info;
|
|
};
|
|
|
|
|
|
#ifdef HAVE_PSI_MUTEX_INTERFACE
|
|
extern PSI_mutex_key key_partition_auto_inc_mutex;
|
|
#endif
|
|
|
|
/**
|
|
Partition specific Handler_share.
|
|
*/
|
|
class Partition_share : public Handler_share
|
|
{
|
|
public:
|
|
bool auto_inc_initialized;
|
|
mysql_mutex_t auto_inc_mutex; /**< protecting auto_inc val */
|
|
ulonglong next_auto_inc_val; /**< first non reserved value */
|
|
/**
|
|
Hash of partition names. Initialized in the first ha_partition::open()
|
|
for the table_share. After that it is read-only, i.e. no locking required.
|
|
*/
|
|
bool partition_name_hash_initialized;
|
|
HASH partition_name_hash;
|
|
/** Storage for each partitions Handler_share */
|
|
Parts_share_refs partitions_share_refs;
|
|
Partition_share()
|
|
: auto_inc_initialized(false),
|
|
next_auto_inc_val(0),
|
|
partition_name_hash_initialized(false),
|
|
partition_names(NULL)
|
|
{
|
|
mysql_mutex_init(key_partition_auto_inc_mutex,
|
|
&auto_inc_mutex,
|
|
MY_MUTEX_INIT_FAST);
|
|
}
|
|
|
|
~Partition_share()
|
|
{
|
|
mysql_mutex_destroy(&auto_inc_mutex);
|
|
if (partition_names)
|
|
{
|
|
my_free(partition_names);
|
|
}
|
|
if (partition_name_hash_initialized)
|
|
{
|
|
my_hash_free(&partition_name_hash);
|
|
}
|
|
}
|
|
|
|
bool init(uint num_parts);
|
|
|
|
/**
|
|
Release reserved auto increment values not used.
|
|
@param thd Thread.
|
|
@param table_share Table Share
|
|
@param next_insert_id Next insert id (first non used auto inc value).
|
|
@param max_reserved End of reserved auto inc range.
|
|
*/
|
|
void release_auto_inc_if_possible(THD *thd, TABLE_SHARE *table_share,
|
|
const ulonglong next_insert_id,
|
|
const ulonglong max_reserved);
|
|
|
|
/** lock mutex protecting auto increment value next_auto_inc_val. */
|
|
inline void lock_auto_inc()
|
|
{
|
|
mysql_mutex_lock(&auto_inc_mutex);
|
|
}
|
|
/** unlock mutex protecting auto increment value next_auto_inc_val. */
|
|
inline void unlock_auto_inc()
|
|
{
|
|
mysql_mutex_unlock(&auto_inc_mutex);
|
|
}
|
|
/**
|
|
Populate partition_name_hash with partition and subpartition names
|
|
from part_info.
|
|
@param part_info Partition info containing all partitions metadata.
|
|
|
|
@return Operation status.
|
|
@retval false Success.
|
|
@retval true Failure.
|
|
*/
|
|
bool populate_partition_name_hash(partition_info *part_info);
|
|
/** Get partition name.
|
|
|
|
@param part_id Partition id (for subpartitioned table only subpartition
|
|
names will be returned.)
|
|
|
|
@return partition name or NULL if error.
|
|
*/
|
|
const char *get_partition_name(size_t part_id) const;
|
|
private:
|
|
const uchar **partition_names;
|
|
/**
|
|
Insert [sub]partition name into partition_name_hash
|
|
@param name Partition name.
|
|
@param part_id Partition id.
|
|
@param is_subpart True if subpartition else partition.
|
|
|
|
@return Operation status.
|
|
@retval false Success.
|
|
@retval true Failure.
|
|
*/
|
|
bool insert_partition_name_in_hash(const char *name,
|
|
uint part_id,
|
|
bool is_subpart);
|
|
};
|
|
|
|
typedef struct st_partition_key_multi_range
|
|
{
|
|
uint id;
|
|
uchar *key[2];
|
|
uint length[2];
|
|
KEY_MULTI_RANGE key_multi_range;
|
|
range_id_t ptr;
|
|
st_partition_key_multi_range *next;
|
|
} PARTITION_KEY_MULTI_RANGE;
|
|
|
|
|
|
typedef struct st_partition_part_key_multi_range
|
|
{
|
|
PARTITION_KEY_MULTI_RANGE *partition_key_multi_range;
|
|
st_partition_part_key_multi_range *next;
|
|
} PARTITION_PART_KEY_MULTI_RANGE;
|
|
|
|
|
|
class ha_partition;
|
|
typedef struct st_partition_part_key_multi_range_hld
|
|
{
|
|
ha_partition *partition;
|
|
uint32 part_id;
|
|
PARTITION_PART_KEY_MULTI_RANGE *partition_part_key_multi_range;
|
|
} PARTITION_PART_KEY_MULTI_RANGE_HLD;
|
|
|
|
|
|
extern "C" int cmp_key_part_id(void *key_p, uchar *ref1, uchar *ref2);
|
|
extern "C" int cmp_key_rowid_part_id(void *ptr, uchar *ref1, uchar *ref2);
|
|
|
|
class ha_partition :public handler
|
|
{
|
|
private:
|
|
enum partition_index_scan_type
|
|
{
|
|
partition_index_read= 0,
|
|
partition_index_first= 1,
|
|
partition_index_last= 3,
|
|
partition_index_read_last= 4,
|
|
partition_read_range = 5,
|
|
partition_no_index_scan= 6,
|
|
partition_read_multi_range = 7,
|
|
partition_ft_read= 8
|
|
};
|
|
/* Data for the partition handler */
|
|
int m_mode; // Open mode
|
|
uint m_open_test_lock; // Open test_if_locked
|
|
uchar *m_file_buffer; // Content of the .par file
|
|
char *m_name_buffer_ptr; // Pointer to first partition name
|
|
MEM_ROOT m_mem_root;
|
|
plugin_ref *m_engine_array; // Array of types of the handlers
|
|
handler **m_file; // Array of references to handler inst.
|
|
uint m_file_tot_parts; // Debug
|
|
handler **m_new_file; // Array of references to new handlers
|
|
handler **m_reorged_file; // Reorganised partitions
|
|
handler **m_added_file; // Added parts kept for errors
|
|
LEX_CSTRING *m_connect_string;
|
|
partition_info *m_part_info; // local reference to partition
|
|
Field **m_part_field_array; // Part field array locally to save acc
|
|
uchar *m_ordered_rec_buffer; // Row and key buffer for ord. idx scan
|
|
st_partition_ft_info *ft_first;
|
|
st_partition_ft_info *ft_current;
|
|
/*
|
|
Current index.
|
|
When used in key_rec_cmp: If clustered pk, index compare
|
|
must compare pk if given index is same for two rows.
|
|
So normally m_curr_key_info[0]= current index and m_curr_key[1]= NULL,
|
|
and if clustered pk, [0]= current index, [1]= pk, [2]= NULL
|
|
*/
|
|
KEY *m_curr_key_info[3]; // Current index
|
|
uchar *m_rec0; // table->record[0]
|
|
const uchar *m_err_rec; // record which gave error
|
|
QUEUE m_queue; // Prio queue used by sorted read
|
|
|
|
/*
|
|
Length of an element in m_ordered_rec_buffer. The elements are composed of
|
|
|
|
[part_no] [table->record copy] [underlying_table_rowid]
|
|
|
|
underlying_table_rowid is only stored when the table has no extended keys.
|
|
*/
|
|
size_t m_priority_queue_rec_len;
|
|
|
|
/*
|
|
If true, then sorting records by key value also sorts them by their
|
|
underlying_table_rowid.
|
|
*/
|
|
bool m_using_extended_keys;
|
|
|
|
/*
|
|
Since the partition handler is a handler on top of other handlers, it
|
|
is necessary to keep information about what the underlying handler
|
|
characteristics is. It is not possible to keep any handler instances
|
|
for this since the MySQL Server sometimes allocating the handler object
|
|
without freeing them.
|
|
*/
|
|
enum enum_handler_status
|
|
{
|
|
handler_not_initialized= 0,
|
|
handler_initialized,
|
|
handler_opened,
|
|
handler_closed
|
|
};
|
|
enum_handler_status m_handler_status;
|
|
|
|
uint m_reorged_parts; // Number of reorganised parts
|
|
uint m_tot_parts; // Total number of partitions;
|
|
uint m_num_locks; // For engines like ha_blackhole, which needs no locks
|
|
uint m_last_part; // Last file that we update,write,read
|
|
part_id_range m_part_spec; // Which parts to scan
|
|
uint m_scan_value; // Value passed in rnd_init
|
|
// call
|
|
uint m_ref_length; // Length of position in this
|
|
// handler object
|
|
key_range m_start_key; // index read key range
|
|
enum partition_index_scan_type m_index_scan_type;// What type of index
|
|
// scan
|
|
uint m_top_entry; // Which partition is to
|
|
// deliver next result
|
|
uint m_rec_length; // Local copy of record length
|
|
|
|
bool m_ordered; // Ordered/Unordered index scan
|
|
bool m_pkey_is_clustered; // Is primary key clustered
|
|
bool m_create_handler; // Handler used to create table
|
|
bool m_is_sub_partitioned; // Is subpartitioned
|
|
bool m_ordered_scan_ongoing;
|
|
bool m_rnd_init_and_first;
|
|
bool m_ft_init_and_first;
|
|
|
|
/*
|
|
If set, this object was created with ha_partition::clone and doesn't
|
|
"own" the m_part_info structure.
|
|
*/
|
|
ha_partition *m_is_clone_of;
|
|
MEM_ROOT *m_clone_mem_root;
|
|
|
|
/*
|
|
We keep track if all underlying handlers are MyISAM since MyISAM has a
|
|
great number of extra flags not needed by other handlers.
|
|
*/
|
|
bool m_myisam; // Are all underlying handlers
|
|
// MyISAM
|
|
/*
|
|
We keep track of InnoDB handlers below since it requires proper setting
|
|
of query_id in fields at index_init and index_read calls.
|
|
*/
|
|
bool m_innodb; // Are all underlying handlers
|
|
// InnoDB
|
|
/*
|
|
When calling extra(HA_EXTRA_CACHE) we do not pass this to the underlying
|
|
handlers immediately. Instead we cache it and call the underlying
|
|
immediately before starting the scan on the partition. This is to
|
|
prevent allocating a READ CACHE for each partition in parallel when
|
|
performing a full table scan on MyISAM partitioned table.
|
|
This state is cleared by extra(HA_EXTRA_NO_CACHE).
|
|
*/
|
|
bool m_extra_cache;
|
|
uint m_extra_cache_size;
|
|
/* The same goes for HA_EXTRA_PREPARE_FOR_UPDATE */
|
|
bool m_extra_prepare_for_update;
|
|
/* Which partition has active cache */
|
|
uint m_extra_cache_part_id;
|
|
|
|
void init_handler_variables();
|
|
/*
|
|
Variables for lock structures.
|
|
*/
|
|
|
|
bool auto_increment_lock; /**< lock reading/updating auto_inc */
|
|
/**
|
|
Flag to keep the auto_increment lock through out the statement.
|
|
This to ensure it will work with statement based replication.
|
|
*/
|
|
bool auto_increment_safe_stmt_log_lock;
|
|
/** For optimizing ha_start_bulk_insert calls */
|
|
MY_BITMAP m_bulk_insert_started;
|
|
ha_rows m_bulk_inserted_rows;
|
|
/** used for prediction of start_bulk_insert rows */
|
|
enum_monotonicity_info m_part_func_monotonicity_info;
|
|
part_id_range m_direct_update_part_spec;
|
|
bool m_pre_calling;
|
|
bool m_pre_call_use_parallel;
|
|
/* Keep track of bulk access requests */
|
|
bool bulk_access_executing;
|
|
|
|
/** keep track of locked partitions */
|
|
MY_BITMAP m_locked_partitions;
|
|
/** Stores shared auto_increment etc. */
|
|
Partition_share *part_share;
|
|
/** Temporary storage for new partitions Handler_shares during ALTER */
|
|
List<Parts_share_refs> m_new_partitions_share_refs;
|
|
/** Sorted array of partition ids in descending order of number of rows. */
|
|
uint32 *m_part_ids_sorted_by_num_of_records;
|
|
/* Compare function for my_qsort2, for reversed order. */
|
|
static int compare_number_of_records(ha_partition *me,
|
|
const uint32 *a,
|
|
const uint32 *b);
|
|
/** keep track of partitions to call ha_reset */
|
|
MY_BITMAP m_partitions_to_reset;
|
|
/** partitions that returned HA_ERR_KEY_NOT_FOUND. */
|
|
MY_BITMAP m_key_not_found_partitions;
|
|
bool m_key_not_found;
|
|
List<String> *m_partitions_to_open;
|
|
MY_BITMAP m_opened_partitions;
|
|
/** This is one of the m_file-s that it guaranteed to be opened. */
|
|
/** It is set in open_read_partitions() */
|
|
handler *m_file_sample;
|
|
public:
|
|
handler **get_child_handlers()
|
|
{
|
|
return m_file;
|
|
}
|
|
virtual part_id_range *get_part_spec()
|
|
{
|
|
return &m_part_spec;
|
|
}
|
|
virtual uint get_no_current_part_id()
|
|
{
|
|
return NO_CURRENT_PART_ID;
|
|
}
|
|
Partition_share *get_part_share() { return part_share; }
|
|
handler *clone(const char *name, MEM_ROOT *mem_root);
|
|
virtual void set_part_info(partition_info *part_info)
|
|
{
|
|
m_part_info= part_info;
|
|
m_is_sub_partitioned= part_info->is_sub_partitioned();
|
|
}
|
|
|
|
virtual void return_record_by_parent();
|
|
|
|
/*
|
|
-------------------------------------------------------------------------
|
|
MODULE create/delete handler object
|
|
-------------------------------------------------------------------------
|
|
Object create/delete methode. The normal called when a table object
|
|
exists. There is also a method to create the handler object with only
|
|
partition information. This is used from mysql_create_table when the
|
|
table is to be created and the engine type is deduced to be the
|
|
partition handler.
|
|
-------------------------------------------------------------------------
|
|
*/
|
|
ha_partition(handlerton *hton, TABLE_SHARE * table);
|
|
ha_partition(handlerton *hton, partition_info * part_info);
|
|
ha_partition(handlerton *hton, TABLE_SHARE *share,
|
|
partition_info *part_info_arg,
|
|
ha_partition *clone_arg,
|
|
MEM_ROOT *clone_mem_root_arg);
|
|
~ha_partition();
|
|
void ha_partition_init();
|
|
/*
|
|
A partition handler has no characteristics in itself. It only inherits
|
|
those from the underlying handlers. Here we set-up those constants to
|
|
enable later calls of the methods to retrieve constants from the under-
|
|
lying handlers. Returns false if not successful.
|
|
*/
|
|
bool initialize_partition(MEM_ROOT *mem_root);
|
|
|
|
/*
|
|
-------------------------------------------------------------------------
|
|
MODULE meta data changes
|
|
-------------------------------------------------------------------------
|
|
Meta data routines to CREATE, DROP, RENAME table and often used at
|
|
ALTER TABLE (update_create_info used from ALTER TABLE and SHOW ..).
|
|
|
|
update_table_comment is used in SHOW TABLE commands to provide a
|
|
chance for the handler to add any interesting comments to the table
|
|
comments not provided by the users comment.
|
|
|
|
create_partitioning_metadata is called before opening a new handler object
|
|
with openfrm to call create. It is used to create any local handler
|
|
object needed in opening the object in openfrm
|
|
-------------------------------------------------------------------------
|
|
*/
|
|
virtual int delete_table(const char *from);
|
|
virtual int rename_table(const char *from, const char *to);
|
|
virtual int create(const char *name, TABLE *form,
|
|
HA_CREATE_INFO *create_info);
|
|
virtual int create_partitioning_metadata(const char *name,
|
|
const char *old_name, int action_flag);
|
|
virtual void update_create_info(HA_CREATE_INFO *create_info);
|
|
virtual char *update_table_comment(const char *comment);
|
|
virtual int change_partitions(HA_CREATE_INFO *create_info,
|
|
const char *path,
|
|
ulonglong * const copied,
|
|
ulonglong * const deleted,
|
|
const uchar *pack_frm_data,
|
|
size_t pack_frm_len);
|
|
virtual int drop_partitions(const char *path);
|
|
virtual int rename_partitions(const char *path);
|
|
bool get_no_parts(const char *name, uint *num_parts)
|
|
{
|
|
DBUG_ENTER("ha_partition::get_no_parts");
|
|
*num_parts= m_tot_parts;
|
|
DBUG_RETURN(0);
|
|
}
|
|
virtual void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share);
|
|
virtual bool check_if_incompatible_data(HA_CREATE_INFO *create_info,
|
|
uint table_changes);
|
|
void update_part_create_info(HA_CREATE_INFO *create_info, uint part_id)
|
|
{
|
|
m_file[part_id]->update_create_info(create_info);
|
|
}
|
|
private:
|
|
int copy_partitions(ulonglong * const copied, ulonglong * const deleted);
|
|
void cleanup_new_partition(uint part_count);
|
|
int prepare_new_partition(TABLE *table, HA_CREATE_INFO *create_info,
|
|
handler *file, const char *part_name,
|
|
partition_element *p_elem,
|
|
uint disable_non_uniq_indexes);
|
|
/*
|
|
delete_table and rename_table uses very similar logic which
|
|
is packed into this routine.
|
|
*/
|
|
uint del_ren_table(const char *from, const char *to);
|
|
/*
|
|
One method to create the table_name.par file containing the names of the
|
|
underlying partitions, their engine and the number of partitions.
|
|
And one method to read it in.
|
|
*/
|
|
bool create_handler_file(const char *name);
|
|
bool setup_engine_array(MEM_ROOT *mem_root);
|
|
bool read_par_file(const char *name);
|
|
bool get_from_handler_file(const char *name, MEM_ROOT *mem_root,
|
|
bool is_clone);
|
|
bool new_handlers_from_part_info(MEM_ROOT *mem_root);
|
|
bool create_handlers(MEM_ROOT *mem_root);
|
|
void clear_handler_file();
|
|
int set_up_table_before_create(TABLE *table_arg,
|
|
const char *partition_name_with_path,
|
|
HA_CREATE_INFO *info,
|
|
partition_element *p_elem);
|
|
partition_element *find_partition_element(uint part_id);
|
|
bool insert_partition_name_in_hash(const char *name, uint part_id,
|
|
bool is_subpart);
|
|
bool populate_partition_name_hash();
|
|
Partition_share *get_share();
|
|
bool set_ha_share_ref(Handler_share **ha_share);
|
|
void fix_data_dir(char* path);
|
|
bool init_partition_bitmaps();
|
|
void free_partition_bitmaps();
|
|
|
|
public:
|
|
|
|
/*
|
|
-------------------------------------------------------------------------
|
|
MODULE open/close object
|
|
-------------------------------------------------------------------------
|
|
Open and close handler object to ensure all underlying files and
|
|
objects allocated and deallocated for query handling is handled
|
|
properly.
|
|
-------------------------------------------------------------------------
|
|
|
|
A handler object is opened as part of its initialisation and before
|
|
being used for normal queries (not before meta-data changes always.
|
|
If the object was opened it will also be closed before being deleted.
|
|
*/
|
|
virtual int open(const char *name, int mode, uint test_if_locked);
|
|
virtual int close(void);
|
|
|
|
/*
|
|
-------------------------------------------------------------------------
|
|
MODULE start/end statement
|
|
-------------------------------------------------------------------------
|
|
This module contains methods that are used to understand start/end of
|
|
statements, transaction boundaries, and aid for proper concurrency
|
|
control.
|
|
The partition handler need not implement abort and commit since this
|
|
will be handled by any underlying handlers implementing transactions.
|
|
There is only one call to each handler type involved per transaction
|
|
and these go directly to the handlers supporting transactions
|
|
-------------------------------------------------------------------------
|
|
*/
|
|
virtual THR_LOCK_DATA **store_lock(THD * thd, THR_LOCK_DATA ** to,
|
|
enum thr_lock_type lock_type);
|
|
virtual int external_lock(THD * thd, int lock_type);
|
|
LEX_CSTRING *engine_name() { return hton_name(partition_ht()); }
|
|
/*
|
|
When table is locked a statement is started by calling start_stmt
|
|
instead of external_lock
|
|
*/
|
|
virtual int start_stmt(THD * thd, thr_lock_type lock_type);
|
|
/*
|
|
Lock count is number of locked underlying handlers (I assume)
|
|
*/
|
|
virtual uint lock_count(void) const;
|
|
/*
|
|
Call to unlock rows not to be updated in transaction
|
|
*/
|
|
virtual void unlock_row();
|
|
/*
|
|
Check if semi consistent read
|
|
*/
|
|
virtual bool was_semi_consistent_read();
|
|
/*
|
|
Call to hint about semi consistent read
|
|
*/
|
|
virtual void try_semi_consistent_read(bool);
|
|
|
|
/*
|
|
NOTE: due to performance and resource issues with many partitions,
|
|
we only use the m_psi on the ha_partition handler, excluding all
|
|
partitions m_psi.
|
|
*/
|
|
#ifdef HAVE_M_PSI_PER_PARTITION
|
|
/*
|
|
Bind the table/handler thread to track table i/o.
|
|
*/
|
|
virtual void unbind_psi();
|
|
virtual void rebind_psi();
|
|
#endif
|
|
/*
|
|
-------------------------------------------------------------------------
|
|
MODULE change record
|
|
-------------------------------------------------------------------------
|
|
This part of the handler interface is used to change the records
|
|
after INSERT, DELETE, UPDATE, REPLACE method calls but also other
|
|
special meta-data operations as ALTER TABLE, LOAD DATA, TRUNCATE.
|
|
-------------------------------------------------------------------------
|
|
|
|
These methods are used for insert (write_row), update (update_row)
|
|
and delete (delete_row). All methods to change data always work on
|
|
one row at a time. update_row and delete_row also contains the old
|
|
row.
|
|
delete_all_rows will delete all rows in the table in one call as a
|
|
special optimisation for DELETE from table;
|
|
|
|
Bulk inserts are supported if all underlying handlers support it.
|
|
start_bulk_insert and end_bulk_insert is called before and after a
|
|
number of calls to write_row.
|
|
*/
|
|
virtual int write_row(uchar * buf);
|
|
virtual bool start_bulk_update();
|
|
virtual int exec_bulk_update(ha_rows *dup_key_found);
|
|
virtual int end_bulk_update();
|
|
virtual int bulk_update_row(const uchar *old_data, const uchar *new_data,
|
|
ha_rows *dup_key_found);
|
|
virtual int update_row(const uchar * old_data, const uchar * new_data);
|
|
virtual int direct_update_rows_init(List<Item> *update_fields);
|
|
virtual int pre_direct_update_rows_init(List<Item> *update_fields);
|
|
virtual int direct_update_rows(ha_rows *update_rows);
|
|
virtual int pre_direct_update_rows();
|
|
virtual bool start_bulk_delete();
|
|
virtual int end_bulk_delete();
|
|
virtual int delete_row(const uchar * buf);
|
|
virtual int direct_delete_rows_init();
|
|
virtual int pre_direct_delete_rows_init();
|
|
virtual int direct_delete_rows(ha_rows *delete_rows);
|
|
virtual int pre_direct_delete_rows();
|
|
virtual int delete_all_rows(void);
|
|
virtual int truncate();
|
|
virtual void start_bulk_insert(ha_rows rows, uint flags);
|
|
virtual int end_bulk_insert();
|
|
private:
|
|
ha_rows guess_bulk_insert_rows();
|
|
void start_part_bulk_insert(THD *thd, uint part_id);
|
|
long estimate_read_buffer_size(long original_size);
|
|
public:
|
|
|
|
/*
|
|
Method for truncating a specific partition.
|
|
(i.e. ALTER TABLE t1 TRUNCATE PARTITION p).
|
|
|
|
@remark This method is a partitioning-specific hook
|
|
and thus not a member of the general SE API.
|
|
*/
|
|
int truncate_partition(Alter_info *, bool *binlog_stmt);
|
|
|
|
virtual bool is_fatal_error(int error, uint flags)
|
|
{
|
|
if (!handler::is_fatal_error(error, flags) ||
|
|
error == HA_ERR_NO_PARTITION_FOUND ||
|
|
error == HA_ERR_NOT_IN_LOCK_PARTITIONS)
|
|
return FALSE;
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
/*
|
|
-------------------------------------------------------------------------
|
|
MODULE full table scan
|
|
-------------------------------------------------------------------------
|
|
This module is used for the most basic access method for any table
|
|
handler. This is to fetch all data through a full table scan. No
|
|
indexes are needed to implement this part.
|
|
It contains one method to start the scan (rnd_init) that can also be
|
|
called multiple times (typical in a nested loop join). Then proceeding
|
|
to the next record (rnd_next) and closing the scan (rnd_end).
|
|
To remember a record for later access there is a method (position)
|
|
and there is a method used to retrieve the record based on the stored
|
|
position.
|
|
The position can be a file position, a primary key, a ROWID dependent
|
|
on the handler below.
|
|
-------------------------------------------------------------------------
|
|
*/
|
|
/*
|
|
unlike index_init(), rnd_init() can be called two times
|
|
without rnd_end() in between (it only makes sense if scan=1).
|
|
then the second call should prepare for the new table scan
|
|
(e.g if rnd_init allocates the cursor, second call should
|
|
position it to the start of the table, no need to deallocate
|
|
and allocate it again
|
|
*/
|
|
virtual int rnd_init(bool scan);
|
|
virtual int rnd_end();
|
|
virtual int rnd_next(uchar * buf);
|
|
virtual int rnd_pos(uchar * buf, uchar * pos);
|
|
virtual int rnd_pos_by_record(uchar *record);
|
|
virtual void position(const uchar * record);
|
|
|
|
/*
|
|
-------------------------------------------------------------------------
|
|
MODULE index scan
|
|
-------------------------------------------------------------------------
|
|
This part of the handler interface is used to perform access through
|
|
indexes. The interface is defined as a scan interface but the handler
|
|
can also use key lookup if the index is a unique index or a primary
|
|
key index.
|
|
Index scans are mostly useful for SELECT queries but are an important
|
|
part also of UPDATE, DELETE, REPLACE and CREATE TABLE table AS SELECT
|
|
and so forth.
|
|
Naturally an index is needed for an index scan and indexes can either
|
|
be ordered, hash based. Some ordered indexes can return data in order
|
|
but not necessarily all of them.
|
|
There are many flags that define the behavior of indexes in the
|
|
various handlers. These methods are found in the optimizer module.
|
|
-------------------------------------------------------------------------
|
|
|
|
index_read is called to start a scan of an index. The find_flag defines
|
|
the semantics of the scan. These flags are defined in
|
|
include/my_base.h
|
|
index_read_idx is the same but also initializes index before calling doing
|
|
the same thing as index_read. Thus it is similar to index_init followed
|
|
by index_read. This is also how we implement it.
|
|
|
|
index_read/index_read_idx does also return the first row. Thus for
|
|
key lookups, the index_read will be the only call to the handler in
|
|
the index scan.
|
|
|
|
index_init initializes an index before using it and index_end does
|
|
any end processing needed.
|
|
*/
|
|
virtual int index_read_map(uchar * buf, const uchar * key,
|
|
key_part_map keypart_map,
|
|
enum ha_rkey_function find_flag);
|
|
virtual int index_init(uint idx, bool sorted);
|
|
virtual int index_end();
|
|
|
|
/**
|
|
@breif
|
|
Positions an index cursor to the index specified in the hanlde. Fetches the
|
|
row if available. If the key value is null, begin at first key of the
|
|
index.
|
|
*/
|
|
virtual int index_read_idx_map(uchar *buf, uint index, const uchar *key,
|
|
key_part_map keypart_map,
|
|
enum ha_rkey_function find_flag);
|
|
/*
|
|
These methods are used to jump to next or previous entry in the index
|
|
scan. There are also methods to jump to first and last entry.
|
|
*/
|
|
virtual int index_next(uchar * buf);
|
|
virtual int index_prev(uchar * buf);
|
|
virtual int index_first(uchar * buf);
|
|
virtual int index_last(uchar * buf);
|
|
virtual int index_next_same(uchar * buf, const uchar * key, uint keylen);
|
|
|
|
int index_read_last_map(uchar *buf,
|
|
const uchar *key,
|
|
key_part_map keypart_map);
|
|
|
|
/*
|
|
read_first_row is virtual method but is only implemented by
|
|
handler.cc, no storage engine has implemented it so neither
|
|
will the partition handler.
|
|
|
|
virtual int read_first_row(uchar *buf, uint primary_key);
|
|
*/
|
|
|
|
|
|
virtual int read_range_first(const key_range * start_key,
|
|
const key_range * end_key,
|
|
bool eq_range, bool sorted);
|
|
virtual int read_range_next();
|
|
|
|
|
|
HANDLER_BUFFER *m_mrr_buffer;
|
|
uint *m_mrr_buffer_size;
|
|
uchar *m_mrr_full_buffer;
|
|
uint m_mrr_full_buffer_size;
|
|
uint m_mrr_new_full_buffer_size;
|
|
MY_BITMAP m_mrr_used_partitions;
|
|
uint *m_stock_range_seq;
|
|
uint m_current_range_seq;
|
|
uint m_mrr_mode;
|
|
uint m_mrr_n_ranges;
|
|
range_id_t *m_range_info;
|
|
bool m_multi_range_read_first;
|
|
uint m_mrr_range_init_flags;
|
|
uint m_mrr_range_length;
|
|
PARTITION_KEY_MULTI_RANGE *m_mrr_range_first;
|
|
PARTITION_KEY_MULTI_RANGE *m_mrr_range_current;
|
|
uint *m_part_mrr_range_length;
|
|
PARTITION_PART_KEY_MULTI_RANGE **m_part_mrr_range_first;
|
|
PARTITION_PART_KEY_MULTI_RANGE **m_part_mrr_range_current;
|
|
PARTITION_PART_KEY_MULTI_RANGE_HLD *m_partition_part_key_multi_range_hld;
|
|
range_seq_t m_seq;
|
|
RANGE_SEQ_IF *m_seq_if;
|
|
RANGE_SEQ_IF m_part_seq_if;
|
|
|
|
virtual int multi_range_key_create_key(
|
|
RANGE_SEQ_IF *seq,
|
|
range_seq_t seq_it
|
|
);
|
|
virtual ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
|
|
void *seq_init_param,
|
|
uint n_ranges, uint *bufsz,
|
|
uint *mrr_mode,
|
|
Cost_estimate *cost);
|
|
virtual ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
|
|
uint key_parts, uint *bufsz,
|
|
uint *mrr_mode, Cost_estimate *cost);
|
|
virtual int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
|
|
uint n_ranges, uint mrr_mode,
|
|
HANDLER_BUFFER *buf);
|
|
virtual int multi_range_read_next(range_id_t *range_info);
|
|
virtual int multi_range_read_explain_info(uint mrr_mode, char *str,
|
|
size_t size);
|
|
uint last_part() { return m_last_part; }
|
|
|
|
private:
|
|
bool init_record_priority_queue();
|
|
void destroy_record_priority_queue();
|
|
int common_index_read(uchar * buf, bool have_start_key);
|
|
int common_first_last(uchar * buf);
|
|
int partition_scan_set_up(uchar * buf, bool idx_read_flag);
|
|
bool check_parallel_search();
|
|
int handle_pre_scan(bool reverse_order, bool use_parallel);
|
|
int handle_unordered_next(uchar * buf, bool next_same);
|
|
int handle_unordered_scan_next_partition(uchar * buf);
|
|
int handle_ordered_index_scan(uchar * buf, bool reverse_order);
|
|
int handle_ordered_index_scan_key_not_found();
|
|
int handle_ordered_next(uchar * buf, bool next_same);
|
|
int handle_ordered_prev(uchar * buf);
|
|
void return_top_record(uchar * buf);
|
|
public:
|
|
/*
|
|
-------------------------------------------------------------------------
|
|
MODULE information calls
|
|
-------------------------------------------------------------------------
|
|
This calls are used to inform the handler of specifics of the ongoing
|
|
scans and other actions. Most of these are used for optimisation
|
|
purposes.
|
|
-------------------------------------------------------------------------
|
|
*/
|
|
virtual int info(uint);
|
|
void get_dynamic_partition_info(PARTITION_STATS *stat_info,
|
|
uint part_id);
|
|
void set_partitions_to_open(List<String> *partition_names);
|
|
int change_partitions_to_open(List<String> *partition_names);
|
|
int open_read_partitions(char *name_buff, size_t name_buff_size);
|
|
virtual int extra(enum ha_extra_function operation);
|
|
virtual int extra_opt(enum ha_extra_function operation, ulong arg);
|
|
virtual int reset(void);
|
|
virtual uint count_query_cache_dependant_tables(uint8 *tables_type);
|
|
virtual my_bool
|
|
register_query_cache_dependant_tables(THD *thd,
|
|
Query_cache *cache,
|
|
Query_cache_block_table **block,
|
|
uint *n);
|
|
|
|
private:
|
|
typedef int handler_callback(handler *, void *);
|
|
|
|
my_bool reg_query_cache_dependant_table(THD *thd,
|
|
char *engine_key,
|
|
uint engine_key_len,
|
|
char *query_key, uint query_key_len,
|
|
uint8 type,
|
|
Query_cache *cache,
|
|
Query_cache_block_table
|
|
**block_table,
|
|
handler *file, uint *n);
|
|
static const uint NO_CURRENT_PART_ID= NOT_A_PARTITION_ID;
|
|
int loop_partitions(handler_callback callback, void *param);
|
|
int loop_extra_alter(enum ha_extra_function operations);
|
|
void late_extra_cache(uint partition_id);
|
|
void late_extra_no_cache(uint partition_id);
|
|
void prepare_extra_cache(uint cachesize);
|
|
handler *get_open_file_sample() const { return m_file_sample; }
|
|
public:
|
|
|
|
/*
|
|
-------------------------------------------------------------------------
|
|
MODULE optimiser support
|
|
-------------------------------------------------------------------------
|
|
-------------------------------------------------------------------------
|
|
*/
|
|
|
|
/*
|
|
NOTE !!!!!!
|
|
-------------------------------------------------------------------------
|
|
-------------------------------------------------------------------------
|
|
One important part of the public handler interface that is not depicted in
|
|
the methods is the attribute records
|
|
|
|
which is defined in the base class. This is looked upon directly and is
|
|
set by calling info(HA_STATUS_INFO) ?
|
|
-------------------------------------------------------------------------
|
|
*/
|
|
|
|
private:
|
|
/* Helper functions for optimizer hints. */
|
|
ha_rows min_rows_for_estimate();
|
|
uint get_biggest_used_partition(uint *part_index);
|
|
public:
|
|
|
|
/*
|
|
keys_to_use_for_scanning can probably be implemented as the
|
|
intersection of all underlying handlers if mixed handlers are used.
|
|
This method is used to derive whether an index can be used for
|
|
index-only scanning when performing an ORDER BY query.
|
|
Only called from one place in sql_select.cc
|
|
*/
|
|
virtual const key_map *keys_to_use_for_scanning();
|
|
|
|
/*
|
|
Called in test_quick_select to determine if indexes should be used.
|
|
*/
|
|
virtual double scan_time();
|
|
|
|
/*
|
|
The next method will never be called if you do not implement indexes.
|
|
*/
|
|
virtual double read_time(uint index, uint ranges, ha_rows rows);
|
|
/*
|
|
For the given range how many records are estimated to be in this range.
|
|
Used by optimiser to calculate cost of using a particular index.
|
|
*/
|
|
virtual ha_rows records_in_range(uint inx, key_range * min_key,
|
|
key_range * max_key);
|
|
|
|
/*
|
|
Upper bound of number records returned in scan is sum of all
|
|
underlying handlers.
|
|
*/
|
|
virtual ha_rows estimate_rows_upper_bound();
|
|
|
|
/*
|
|
table_cache_type is implemented by the underlying handler but all
|
|
underlying handlers must have the same implementation for it to work.
|
|
*/
|
|
virtual uint8 table_cache_type();
|
|
virtual ha_rows records();
|
|
|
|
/* Calculate hash value for PARTITION BY KEY tables. */
|
|
static uint32 calculate_key_hash_value(Field **field_array);
|
|
|
|
/*
|
|
-------------------------------------------------------------------------
|
|
MODULE print messages
|
|
-------------------------------------------------------------------------
|
|
This module contains various methods that returns text messages for
|
|
table types, index type and error messages.
|
|
-------------------------------------------------------------------------
|
|
*/
|
|
/*
|
|
The name of the index type that will be used for display
|
|
Here we must ensure that all handlers use the same index type
|
|
for each index created.
|
|
*/
|
|
virtual const char *index_type(uint inx);
|
|
|
|
/* The name of the table type that will be used for display purposes */
|
|
virtual const char *table_type() const;
|
|
|
|
/* The name of the row type used for the underlying tables. */
|
|
virtual enum row_type get_row_type() const;
|
|
|
|
/*
|
|
Handler specific error messages
|
|
*/
|
|
virtual void print_error(int error, myf errflag);
|
|
virtual bool get_error_message(int error, String * buf);
|
|
/*
|
|
-------------------------------------------------------------------------
|
|
MODULE handler characteristics
|
|
-------------------------------------------------------------------------
|
|
This module contains a number of methods defining limitations and
|
|
characteristics of the handler. The partition handler will calculate
|
|
this characteristics based on underlying handler characteristics.
|
|
-------------------------------------------------------------------------
|
|
|
|
This is a list of flags that says what the storage engine
|
|
implements. The current table flags are documented in handler.h
|
|
The partition handler will support whatever the underlying handlers
|
|
support except when specifically mentioned below about exceptions
|
|
to this rule.
|
|
NOTE: This cannot be cached since it can depend on TRANSACTION ISOLATION
|
|
LEVEL which is dynamic, see bug#39084.
|
|
|
|
HA_READ_RND_SAME:
|
|
Not currently used. (Means that the handler supports the rnd_same() call)
|
|
(MyISAM, HEAP)
|
|
|
|
HA_TABLE_SCAN_ON_INDEX:
|
|
Used to avoid scanning full tables on an index. If this flag is set then
|
|
the handler always has a primary key (hidden if not defined) and this
|
|
index is used for scanning rather than a full table scan in all
|
|
situations.
|
|
(InnoDB, Federated)
|
|
|
|
HA_REC_NOT_IN_SEQ:
|
|
This flag is set for handlers that cannot guarantee that the rows are
|
|
returned accroding to incremental positions (0, 1, 2, 3...).
|
|
This also means that rnd_next() should return HA_ERR_RECORD_DELETED
|
|
if it finds a deleted row.
|
|
(MyISAM (not fixed length row), HEAP, InnoDB)
|
|
|
|
HA_CAN_GEOMETRY:
|
|
Can the storage engine handle spatial data.
|
|
Used to check that no spatial attributes are declared unless
|
|
the storage engine is capable of handling it.
|
|
(MyISAM)
|
|
|
|
HA_FAST_KEY_READ:
|
|
Setting this flag indicates that the handler is equally fast in
|
|
finding a row by key as by position.
|
|
This flag is used in a very special situation in conjunction with
|
|
filesort's. For further explanation see intro to init_read_record.
|
|
(HEAP, InnoDB)
|
|
|
|
HA_NULL_IN_KEY:
|
|
Is NULL values allowed in indexes.
|
|
If this is not allowed then it is not possible to use an index on a
|
|
NULLable field.
|
|
(HEAP, MyISAM, InnoDB)
|
|
|
|
HA_DUPLICATE_POS:
|
|
Tells that we can the position for the conflicting duplicate key
|
|
record is stored in table->file->dupp_ref. (insert uses rnd_pos() on
|
|
this to find the duplicated row)
|
|
(MyISAM)
|
|
|
|
HA_CAN_INDEX_BLOBS:
|
|
Is the storage engine capable of defining an index of a prefix on
|
|
a BLOB attribute.
|
|
(Federated, MyISAM, InnoDB)
|
|
|
|
HA_AUTO_PART_KEY:
|
|
Auto increment fields can be part of a multi-part key. For second part
|
|
auto-increment keys, the auto_incrementing is done in handler.cc
|
|
(Federated, MyISAM)
|
|
|
|
HA_REQUIRE_PRIMARY_KEY:
|
|
Can't define a table without primary key (and cannot handle a table
|
|
with hidden primary key)
|
|
(No handler has this limitation currently)
|
|
|
|
HA_WANTS_PRIMARY_KEY:
|
|
Can't define a table without primary key except sequences
|
|
(Only InnoDB has this when using innodb_force_primary_key == ON)
|
|
|
|
HA_STATS_RECORDS_IS_EXACT:
|
|
Does the counter of records after the info call specify an exact
|
|
value or not. If it does this flag is set.
|
|
Only MyISAM and HEAP uses exact count.
|
|
|
|
HA_CAN_INSERT_DELAYED:
|
|
Can the storage engine support delayed inserts.
|
|
To start with the partition handler will not support delayed inserts.
|
|
Further investigation needed.
|
|
(HEAP, MyISAM)
|
|
|
|
HA_PRIMARY_KEY_IN_READ_INDEX:
|
|
This parameter is set when the handler will also return the primary key
|
|
when doing read-only-key on another index.
|
|
|
|
HA_NOT_DELETE_WITH_CACHE:
|
|
Seems to be an old MyISAM feature that is no longer used. No handler
|
|
has it defined but it is checked in init_read_record.
|
|
Further investigation needed.
|
|
(No handler defines it)
|
|
|
|
HA_NO_PREFIX_CHAR_KEYS:
|
|
Indexes on prefixes of character fields is not allowed.
|
|
(Federated)
|
|
|
|
HA_CAN_FULLTEXT:
|
|
Does the storage engine support fulltext indexes
|
|
The partition handler will start by not supporting fulltext indexes.
|
|
(MyISAM)
|
|
|
|
HA_CAN_SQL_HANDLER:
|
|
Can the HANDLER interface in the MySQL API be used towards this
|
|
storage engine.
|
|
(MyISAM, InnoDB)
|
|
|
|
HA_NO_AUTO_INCREMENT:
|
|
Set if the storage engine does not support auto increment fields.
|
|
(Currently not set by any handler)
|
|
|
|
HA_HAS_CHECKSUM:
|
|
Special MyISAM feature. Has special SQL support in CREATE TABLE.
|
|
No special handling needed by partition handler.
|
|
(MyISAM)
|
|
|
|
HA_FILE_BASED:
|
|
Should file names always be in lower case (used by engines
|
|
that map table names to file names.
|
|
Since partition handler has a local file this flag is set.
|
|
(Federated, MyISAM)
|
|
|
|
HA_CAN_BIT_FIELD:
|
|
Is the storage engine capable of handling bit fields?
|
|
(MyISAM)
|
|
|
|
HA_NEED_READ_RANGE_BUFFER:
|
|
Is Read Multi-Range supported => need multi read range buffer
|
|
This parameter specifies whether a buffer for read multi range
|
|
is needed by the handler. Whether the handler supports this
|
|
feature or not is dependent of whether the handler implements
|
|
read_multi_range* calls or not. The only handler currently
|
|
supporting this feature is NDB so the partition handler need
|
|
not handle this call. There are methods in handler.cc that will
|
|
transfer those calls into index_read and other calls in the
|
|
index scan module.
|
|
(No handler defines it)
|
|
|
|
HA_PRIMARY_KEY_REQUIRED_FOR_POSITION:
|
|
Does the storage engine need a PK for position?
|
|
(InnoDB)
|
|
|
|
HA_FILE_BASED is always set for partition handler since we use a
|
|
special file for handling names of partitions, engine types.
|
|
HA_REC_NOT_IN_SEQ is always set for partition handler since we cannot
|
|
guarantee that the records will be returned in sequence.
|
|
HA_DUPLICATE_POS,
|
|
HA_CAN_INSERT_DELAYED, HA_PRIMARY_KEY_REQUIRED_FOR_POSITION is disabled
|
|
until further investigated.
|
|
*/
|
|
virtual Table_flags table_flags() const;
|
|
|
|
/*
|
|
This is a bitmap of flags that says how the storage engine
|
|
implements indexes. The current index flags are documented in
|
|
handler.h. If you do not implement indexes, just return zero
|
|
here.
|
|
|
|
part is the key part to check. First key part is 0
|
|
If all_parts it's set, MySQL want to know the flags for the combined
|
|
index up to and including 'part'.
|
|
|
|
HA_READ_NEXT:
|
|
Does the index support read next, this is assumed in the server
|
|
code and never checked so all indexes must support this.
|
|
Note that the handler can be used even if it doesn't have any index.
|
|
(HEAP, MyISAM, Federated, InnoDB)
|
|
|
|
HA_READ_PREV:
|
|
Can the index be used to scan backwards.
|
|
(HEAP, MyISAM, InnoDB)
|
|
|
|
HA_READ_ORDER:
|
|
Can the index deliver its record in index order. Typically true for
|
|
all ordered indexes and not true for hash indexes.
|
|
In first step this is not true for partition handler until a merge
|
|
sort has been implemented in partition handler.
|
|
Used to set keymap part_of_sortkey
|
|
This keymap is only used to find indexes usable for resolving an ORDER BY
|
|
in the query. Thus in most cases index_read will work just fine without
|
|
order in result production. When this flag is set it is however safe to
|
|
order all output started by index_read since most engines do this. With
|
|
read_multi_range calls there is a specific flag setting order or not
|
|
order so in those cases ordering of index output can be avoided.
|
|
(InnoDB, HEAP, MyISAM)
|
|
|
|
HA_READ_RANGE:
|
|
Specify whether index can handle ranges, typically true for all
|
|
ordered indexes and not true for hash indexes.
|
|
Used by optimiser to check if ranges (as key >= 5) can be optimised
|
|
by index.
|
|
(InnoDB, MyISAM, HEAP)
|
|
|
|
HA_ONLY_WHOLE_INDEX:
|
|
Can't use part key searches. This is typically true for hash indexes
|
|
and typically not true for ordered indexes.
|
|
(Federated, HEAP)
|
|
|
|
HA_KEYREAD_ONLY:
|
|
Does the storage engine support index-only scans on this index.
|
|
Enables use of HA_EXTRA_KEYREAD and HA_EXTRA_NO_KEYREAD
|
|
Used to set key_map keys_for_keyread and to check in optimiser for
|
|
index-only scans. When doing a read under HA_EXTRA_KEYREAD the handler
|
|
only have to fill in the columns the key covers. If
|
|
HA_PRIMARY_KEY_IN_READ_INDEX is set then also the PRIMARY KEY columns
|
|
must be updated in the row.
|
|
(InnoDB, MyISAM)
|
|
*/
|
|
virtual ulong index_flags(uint inx, uint part, bool all_parts) const
|
|
{
|
|
/*
|
|
The following code is not safe if you are using different
|
|
storage engines or different index types per partition.
|
|
*/
|
|
return m_file[0]->index_flags(inx, part, all_parts);
|
|
}
|
|
|
|
/**
|
|
wrapper function for handlerton alter_table_flags, since
|
|
the ha_partition_hton cannot know all its capabilities
|
|
*/
|
|
virtual alter_table_operations alter_table_flags(alter_table_operations flags);
|
|
/*
|
|
unireg.cc will call the following to make sure that the storage engine
|
|
can handle the data it is about to send.
|
|
|
|
The maximum supported values is the minimum of all handlers in the table
|
|
*/
|
|
uint min_of_the_max_uint(uint (handler::*operator_func)(void) const) const;
|
|
virtual uint max_supported_record_length() const;
|
|
virtual uint max_supported_keys() const;
|
|
virtual uint max_supported_key_parts() const;
|
|
virtual uint max_supported_key_length() const;
|
|
virtual uint max_supported_key_part_length() const;
|
|
virtual uint min_record_length(uint options) const;
|
|
|
|
/*
|
|
Primary key is clustered can only be true if all underlying handlers have
|
|
this feature.
|
|
*/
|
|
virtual bool primary_key_is_clustered()
|
|
{ return m_pkey_is_clustered; }
|
|
|
|
/*
|
|
-------------------------------------------------------------------------
|
|
MODULE compare records
|
|
-------------------------------------------------------------------------
|
|
cmp_ref checks if two references are the same. For most handlers this is
|
|
a simple memcmp of the reference. However some handlers use primary key
|
|
as reference and this can be the same even if memcmp says they are
|
|
different. This is due to character sets and end spaces and so forth.
|
|
For the partition handler the reference is first two bytes providing the
|
|
partition identity of the referred record and then the reference of the
|
|
underlying handler.
|
|
Thus cmp_ref for the partition handler always returns FALSE for records
|
|
not in the same partition and uses cmp_ref on the underlying handler
|
|
to check whether the rest of the reference part is also the same.
|
|
-------------------------------------------------------------------------
|
|
*/
|
|
virtual int cmp_ref(const uchar * ref1, const uchar * ref2);
|
|
/*
|
|
-------------------------------------------------------------------------
|
|
MODULE auto increment
|
|
-------------------------------------------------------------------------
|
|
This module is used to handle the support of auto increments.
|
|
|
|
This variable in the handler is used as part of the handler interface
|
|
It is maintained by the parent handler object and should not be
|
|
touched by child handler objects (see handler.cc for its use).
|
|
|
|
auto_increment_column_changed
|
|
-------------------------------------------------------------------------
|
|
*/
|
|
virtual bool need_info_for_auto_inc();
|
|
virtual bool can_use_for_auto_inc_init();
|
|
virtual void get_auto_increment(ulonglong offset, ulonglong increment,
|
|
ulonglong nb_desired_values,
|
|
ulonglong *first_value,
|
|
ulonglong *nb_reserved_values);
|
|
virtual void release_auto_increment();
|
|
private:
|
|
virtual int reset_auto_increment(ulonglong value);
|
|
void update_next_auto_inc_val();
|
|
virtual void lock_auto_increment()
|
|
{
|
|
/* lock already taken */
|
|
if (auto_increment_safe_stmt_log_lock)
|
|
return;
|
|
if (table_share->tmp_table == NO_TMP_TABLE)
|
|
{
|
|
part_share->lock_auto_inc();
|
|
DBUG_ASSERT(!auto_increment_lock);
|
|
auto_increment_lock= TRUE;
|
|
}
|
|
}
|
|
virtual void unlock_auto_increment()
|
|
{
|
|
/*
|
|
If auto_increment_safe_stmt_log_lock is true, we have to keep the lock.
|
|
It will be set to false and thus unlocked at the end of the statement by
|
|
ha_partition::release_auto_increment.
|
|
*/
|
|
if (auto_increment_lock && !auto_increment_safe_stmt_log_lock)
|
|
{
|
|
auto_increment_lock= FALSE;
|
|
part_share->unlock_auto_inc();
|
|
}
|
|
}
|
|
virtual void set_auto_increment_if_higher(Field *field)
|
|
{
|
|
ulonglong nr= (((Field_num*) field)->unsigned_flag ||
|
|
field->val_int() > 0) ? field->val_int() : 0;
|
|
lock_auto_increment();
|
|
DBUG_ASSERT(part_share->auto_inc_initialized ||
|
|
!can_use_for_auto_inc_init());
|
|
/* must check when the mutex is taken */
|
|
if (nr >= part_share->next_auto_inc_val)
|
|
part_share->next_auto_inc_val= nr + 1;
|
|
unlock_auto_increment();
|
|
}
|
|
|
|
public:
|
|
|
|
/*
|
|
-------------------------------------------------------------------------
|
|
MODULE initialize handler for HANDLER call
|
|
-------------------------------------------------------------------------
|
|
This method is a special InnoDB method called before a HANDLER query.
|
|
-------------------------------------------------------------------------
|
|
*/
|
|
virtual void init_table_handle_for_HANDLER();
|
|
|
|
/*
|
|
The remainder of this file defines the handler methods not implemented
|
|
by the partition handler
|
|
*/
|
|
|
|
/*
|
|
-------------------------------------------------------------------------
|
|
MODULE foreign key support
|
|
-------------------------------------------------------------------------
|
|
The following methods are used to implement foreign keys as supported by
|
|
InnoDB. Implement this ??
|
|
get_foreign_key_create_info is used by SHOW CREATE TABLE to get a textual
|
|
description of how the CREATE TABLE part to define FOREIGN KEY's is done.
|
|
free_foreign_key_create_info is used to free the memory area that provided
|
|
this description.
|
|
can_switch_engines checks if it is ok to switch to a new engine based on
|
|
the foreign key info in the table.
|
|
-------------------------------------------------------------------------
|
|
|
|
virtual char* get_foreign_key_create_info()
|
|
virtual void free_foreign_key_create_info(char* str)
|
|
|
|
virtual int get_foreign_key_list(THD *thd,
|
|
List<FOREIGN_KEY_INFO> *f_key_list)
|
|
virtual uint referenced_by_foreign_key()
|
|
*/
|
|
virtual bool can_switch_engines();
|
|
/*
|
|
-------------------------------------------------------------------------
|
|
MODULE fulltext index
|
|
-------------------------------------------------------------------------
|
|
*/
|
|
void ft_close_search(FT_INFO *handler);
|
|
virtual int ft_init();
|
|
virtual int pre_ft_init();
|
|
virtual void ft_end();
|
|
virtual int pre_ft_end();
|
|
virtual FT_INFO *ft_init_ext(uint flags, uint inx, String *key);
|
|
virtual int ft_read(uchar *buf);
|
|
virtual int pre_ft_read(bool use_parallel);
|
|
|
|
/*
|
|
-------------------------------------------------------------------------
|
|
MODULE restart full table scan at position (MyISAM)
|
|
-------------------------------------------------------------------------
|
|
The following method is only used by MyISAM when used as
|
|
temporary tables in a join.
|
|
virtual int restart_rnd_next(uchar *buf, uchar *pos);
|
|
*/
|
|
|
|
/*
|
|
-------------------------------------------------------------------------
|
|
MODULE in-place ALTER TABLE
|
|
-------------------------------------------------------------------------
|
|
These methods are in the handler interface. (used by innodb-plugin)
|
|
They are used for in-place alter table:
|
|
-------------------------------------------------------------------------
|
|
*/
|
|
virtual enum_alter_inplace_result
|
|
check_if_supported_inplace_alter(TABLE *altered_table,
|
|
Alter_inplace_info *ha_alter_info);
|
|
virtual bool prepare_inplace_alter_table(TABLE *altered_table,
|
|
Alter_inplace_info *ha_alter_info);
|
|
virtual bool inplace_alter_table(TABLE *altered_table,
|
|
Alter_inplace_info *ha_alter_info);
|
|
virtual bool commit_inplace_alter_table(TABLE *altered_table,
|
|
Alter_inplace_info *ha_alter_info,
|
|
bool commit);
|
|
virtual void notify_table_changed();
|
|
|
|
/*
|
|
-------------------------------------------------------------------------
|
|
MODULE tablespace support
|
|
-------------------------------------------------------------------------
|
|
Admin of table spaces is not applicable to the partition handler (InnoDB)
|
|
This means that the following method is not implemented:
|
|
-------------------------------------------------------------------------
|
|
virtual int discard_or_import_tablespace(my_bool discard)
|
|
*/
|
|
|
|
/*
|
|
-------------------------------------------------------------------------
|
|
MODULE admin MyISAM
|
|
-------------------------------------------------------------------------
|
|
|
|
-------------------------------------------------------------------------
|
|
OPTIMIZE TABLE, CHECK TABLE, ANALYZE TABLE and REPAIR TABLE are
|
|
mapped to a routine that handles looping over a given set of
|
|
partitions and those routines send a flag indicating to execute on
|
|
all partitions.
|
|
-------------------------------------------------------------------------
|
|
*/
|
|
virtual int optimize(THD* thd, HA_CHECK_OPT *check_opt);
|
|
virtual int analyze(THD* thd, HA_CHECK_OPT *check_opt);
|
|
virtual int check(THD* thd, HA_CHECK_OPT *check_opt);
|
|
virtual int repair(THD* thd, HA_CHECK_OPT *check_opt);
|
|
virtual bool check_and_repair(THD *thd);
|
|
virtual bool auto_repair(int error) const;
|
|
virtual bool is_crashed() const;
|
|
virtual int check_for_upgrade(HA_CHECK_OPT *check_opt);
|
|
|
|
/*
|
|
-------------------------------------------------------------------------
|
|
MODULE condition pushdown
|
|
-------------------------------------------------------------------------
|
|
*/
|
|
virtual const COND *cond_push(const COND *cond);
|
|
virtual void cond_pop();
|
|
virtual void clear_top_table_fields();
|
|
virtual int info_push(uint info_type, void *info);
|
|
|
|
private:
|
|
int handle_opt_partitions(THD *thd, HA_CHECK_OPT *check_opt, uint flags);
|
|
int handle_opt_part(THD *thd, HA_CHECK_OPT *check_opt, uint part_id,
|
|
uint flag);
|
|
/**
|
|
Check if the rows are placed in the correct partition. If the given
|
|
argument is true, then move the rows to the correct partition.
|
|
*/
|
|
int check_misplaced_rows(uint read_part_id, bool repair);
|
|
void append_row_to_str(String &str);
|
|
public:
|
|
|
|
/*
|
|
-------------------------------------------------------------------------
|
|
Admin commands not supported currently (almost purely MyISAM routines)
|
|
This means that the following methods are not implemented:
|
|
-------------------------------------------------------------------------
|
|
|
|
virtual int backup(TD* thd, HA_CHECK_OPT *check_opt);
|
|
virtual int restore(THD* thd, HA_CHECK_OPT *check_opt);
|
|
virtual int dump(THD* thd, int fd = -1);
|
|
virtual int net_read_dump(NET* net);
|
|
*/
|
|
virtual uint checksum() const;
|
|
/* Enabled keycache for performance reasons, WL#4571 */
|
|
virtual int assign_to_keycache(THD* thd, HA_CHECK_OPT *check_opt);
|
|
virtual int preload_keys(THD* thd, HA_CHECK_OPT* check_opt);
|
|
virtual TABLE_LIST *get_next_global_for_child();
|
|
|
|
/*
|
|
-------------------------------------------------------------------------
|
|
MODULE enable/disable indexes
|
|
-------------------------------------------------------------------------
|
|
Enable/Disable Indexes are only supported by HEAP and MyISAM.
|
|
-------------------------------------------------------------------------
|
|
*/
|
|
virtual int disable_indexes(uint mode);
|
|
virtual int enable_indexes(uint mode);
|
|
virtual int indexes_are_disabled(void);
|
|
|
|
/*
|
|
-------------------------------------------------------------------------
|
|
MODULE append_create_info
|
|
-------------------------------------------------------------------------
|
|
append_create_info is only used by MyISAM MERGE tables and the partition
|
|
handler will not support this handler as underlying handler.
|
|
Implement this??
|
|
-------------------------------------------------------------------------
|
|
virtual void append_create_info(String *packet)
|
|
*/
|
|
|
|
/*
|
|
the following heavily relies on the fact that all partitions
|
|
are in the same storage engine.
|
|
|
|
When this limitation is lifted, the following hack should go away,
|
|
and a proper interface for engines needs to be introduced:
|
|
|
|
an PARTITION_SHARE structure that has a pointer to the TABLE_SHARE.
|
|
is given to engines everywhere where TABLE_SHARE is used now
|
|
has members like option_struct, ha_data
|
|
perhaps TABLE needs to be split the same way too...
|
|
|
|
this can also be done before partition will support a mix of engines,
|
|
but preferably together with other incompatible API changes.
|
|
*/
|
|
virtual handlerton *partition_ht() const
|
|
{
|
|
handlerton *h= m_file[0]->ht;
|
|
for (uint i=1; i < m_tot_parts; i++)
|
|
DBUG_ASSERT(h == m_file[i]->ht);
|
|
return h;
|
|
}
|
|
|
|
ha_rows part_records(void *_part_elem)
|
|
{
|
|
partition_element *part_elem= reinterpret_cast<partition_element *>(_part_elem);
|
|
DBUG_ASSERT(m_part_info);
|
|
uint32 sub_factor= m_part_info->num_subparts ? m_part_info->num_subparts : 1;
|
|
uint32 part_id= part_elem->id * sub_factor;
|
|
uint32 part_id_end= part_id + sub_factor;
|
|
DBUG_ASSERT(part_id_end <= m_tot_parts);
|
|
ha_rows part_recs= 0;
|
|
for (; part_id < part_id_end; ++part_id)
|
|
{
|
|
handler *file= m_file[part_id];
|
|
DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), part_id));
|
|
file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK | HA_STATUS_OPEN);
|
|
part_recs+= file->stats.records;
|
|
}
|
|
return part_recs;
|
|
}
|
|
|
|
friend int cmp_key_rowid_part_id(void *ptr, uchar *ref1, uchar *ref2);
|
|
friend int cmp_key_part_id(void *key_p, uchar *ref1, uchar *ref2);
|
|
};
|
|
#endif /* HA_PARTITION_INCLUDED */
|