mariadb/sql/table.h

2075 lines
68 KiB
C
Raw Normal View History

#ifndef TABLE_INCLUDED
#define TABLE_INCLUDED
/* Copyright 2000-2008 MySQL AB, 2008-2009 Sun Microsystems, Inc.
2000-07-31 21:29:14 +02:00
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
2000-07-31 21:29:14 +02:00
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
2000-07-31 21:29:14 +02:00
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
#include "my_global.h" /* NO_EMBEDDED_ACCESS_CHECKS */
Initial import of WL#3726 "DDL locking for all metadata objects". Backport of: ------------------------------------------------------------ revno: 2630.4.1 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Fri 2008-05-23 17:54:03 +0400 message: WL#3726 "DDL locking for all metadata objects". After review fixes in progress. ------------------------------------------------------------ This is the first patch in series. It transforms the metadata locking subsystem to use a dedicated module (mdl.h,cc). No significant changes in the locking protocol. The import passes the test suite with the exception of deprecated/removed 6.0 features, and MERGE tables. The latter are subject to a fix by WL#4144. Unfortunately, the original changeset comments got lost in a merge, thus this import has its own (largely insufficient) comments. This patch fixes Bug#25144 "replication / binlog with view breaks". Warning: this patch introduces an incompatible change: Under LOCK TABLES, it's no longer possible to FLUSH a table that was not locked for WRITE. Under LOCK TABLES, it's no longer possible to DROP a table or VIEW that was not locked for WRITE. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.2 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 14:03:45 +0400 message: WL#3726 "DDL locking for all metadata objects". After review fixes in progress. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.3 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 14:08:51 +0400 message: WL#3726 "DDL locking for all metadata objects" Fixed failing Windows builds by adding mdl.cc to the lists of files needed to build server/libmysqld on Windows. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.4 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 21:57:58 +0400 message: WL#3726 "DDL locking for all metadata objects". Fix for assert failures in kill.test which occured when one tried to kill ALTER TABLE statement on merge table while it was waiting in wait_while_table_is_used() for other connections to close this table. These assert failures stemmed from the fact that cleanup code in this case assumed that temporary table representing new version of table was open with adding to THD::temporary_tables list while code which were opening this temporary table wasn't always fulfilling this. This patch changes code that opens new version of table to always do this linking in. It also streamlines cleanup process for cases when error occurs while we have new version of table open. ****** WL#3726 "DDL locking for all metadata objects" Add libmysqld/mdl.cc to .bzrignore. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.6 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sun 2008-05-25 00:33:22 +0400 message: WL#3726 "DDL locking for all metadata objects". Addition to the fix of assert failures in kill.test caused by changes for this worklog. Make sure we close the new table only once.
2009-11-30 16:55:03 +01:00
#include "sql_plist.h"
#include "sql_list.h" /* Sql_alloc */
#include "mdl.h"
Initial import of WL#3726 "DDL locking for all metadata objects". Backport of: ------------------------------------------------------------ revno: 2630.4.1 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Fri 2008-05-23 17:54:03 +0400 message: WL#3726 "DDL locking for all metadata objects". After review fixes in progress. ------------------------------------------------------------ This is the first patch in series. It transforms the metadata locking subsystem to use a dedicated module (mdl.h,cc). No significant changes in the locking protocol. The import passes the test suite with the exception of deprecated/removed 6.0 features, and MERGE tables. The latter are subject to a fix by WL#4144. Unfortunately, the original changeset comments got lost in a merge, thus this import has its own (largely insufficient) comments. This patch fixes Bug#25144 "replication / binlog with view breaks". Warning: this patch introduces an incompatible change: Under LOCK TABLES, it's no longer possible to FLUSH a table that was not locked for WRITE. Under LOCK TABLES, it's no longer possible to DROP a table or VIEW that was not locked for WRITE. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.2 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 14:03:45 +0400 message: WL#3726 "DDL locking for all metadata objects". After review fixes in progress. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.3 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 14:08:51 +0400 message: WL#3726 "DDL locking for all metadata objects" Fixed failing Windows builds by adding mdl.cc to the lists of files needed to build server/libmysqld on Windows. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.4 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 21:57:58 +0400 message: WL#3726 "DDL locking for all metadata objects". Fix for assert failures in kill.test which occured when one tried to kill ALTER TABLE statement on merge table while it was waiting in wait_while_table_is_used() for other connections to close this table. These assert failures stemmed from the fact that cleanup code in this case assumed that temporary table representing new version of table was open with adding to THD::temporary_tables list while code which were opening this temporary table wasn't always fulfilling this. This patch changes code that opens new version of table to always do this linking in. It also streamlines cleanup process for cases when error occurs while we have new version of table open. ****** WL#3726 "DDL locking for all metadata objects" Add libmysqld/mdl.cc to .bzrignore. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.6 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sun 2008-05-25 00:33:22 +0400 message: WL#3726 "DDL locking for all metadata objects". Addition to the fix of assert failures in kill.test caused by changes for this worklog. Make sure we close the new table only once.
2009-11-30 16:55:03 +01:00
#ifndef MYSQL_CLIENT
#include "hash.h" /* HASH */
#include "handler.h" /* row_type, ha_choice, handler */
#include "mysql_com.h" /* enum_field_types */
#include "thr_lock.h" /* thr_lock_type */
2000-07-31 21:29:14 +02:00
/* Structs that defines the TABLE */
class Item; /* Needed by ORDER */
class Item_subselect;
class Item_field;
2000-07-31 21:29:14 +02:00
class GRANT_TABLE;
class st_select_lex_unit;
2004-07-16 00:15:55 +02:00
class st_select_lex;
2005-07-18 13:31:02 +02:00
class partition_info;
class COND_EQUAL;
class Security_context;
struct TABLE_LIST;
class ACL_internal_schema_access;
class ACL_internal_table_access;
struct TABLE_LIST;
class Field;
/*
Used to identify NESTED_JOIN structures within a join (applicable only to
structures that have not been simplified away and embed more the one
element)
*/
typedef ulonglong nested_join_map;
#define tmp_file_prefix "#sql" /**< Prefix for tmp tables */
#define tmp_file_prefix_length 4
#define TMP_TABLE_KEY_EXTRA 8
/**
Enumerate possible types of a table from re-execution
standpoint.
TABLE_LIST class has a member of this type.
At prepared statement prepare, this member is assigned a value
as of the current state of the database. Before (re-)execution
of a prepared statement, we check that the value recorded at
prepare matches the type of the object we obtained from the
table definition cache.
@sa check_and_update_table_version()
@sa Execute_observer
@sa Prepared_statement::reprepare()
*/
enum enum_table_ref_type
{
/** Initial value set by the parser */
TABLE_REF_NULL= 0,
TABLE_REF_VIEW,
TABLE_REF_BASE_TABLE,
TABLE_REF_I_S_TABLE,
TABLE_REF_TMP_TABLE
};
/**
Opening modes for open_temporary_table and open_table_from_share
*/
enum open_table_mode
{
OTM_OPEN= 0,
OTM_CREATE= 1,
OTM_ALTER= 2
};
2000-07-31 21:29:14 +02:00
Patch for the following bugs: - BUG#11986: Stored routines and triggers can fail if the code has a non-ascii symbol - BUG#16291: mysqldump corrupts string-constants with non-ascii-chars - BUG#19443: INFORMATION_SCHEMA does not support charsets properly - BUG#21249: Character set of SP-var can be ignored - BUG#25212: Character set of string constant is ignored (stored routines) - BUG#25221: Character set of string constant is ignored (triggers) There were a few general problems that caused these bugs: 1. Character set information of the original (definition) query for views, triggers, stored routines and events was lost. 2. mysqldump output query in client character set, which can be inappropriate to encode definition-query. 3. INFORMATION_SCHEMA used strings with mixed encodings to display object definition; 1. No query-definition-character set. In order to compile query into execution code, some extra data (such as environment variables or the database character set) is used. The problem here was that this context was not preserved. So, on the next load it can differ from the original one, thus the result will be different. The context contains the following data: - client character set; - connection collation (character set and collation); - collation of the owner database; The fix is to store this context and use it each time we parse (compile) and execute the object (stored routine, trigger, ...). 2. Wrong mysqldump-output. The original query can contain several encodings (by means of character set introducers). The problem here was that we tried to convert original query to the mysqldump-client character set. Moreover, we stored queries in different character sets for different objects (views, for one, used UTF8, triggers used original character set). The solution is - to store definition queries in the original character set; - to change SHOW CREATE statement to output definition query in the binary character set (i.e. without any conversion); - introduce SHOW CREATE TRIGGER statement; - to dump special statements to switch the context to the original one before dumping and restore it afterwards. Note, in order to preserve the database collation at the creation time, additional ALTER DATABASE might be used (to temporary switch the database collation back to the original value). In this case, ALTER DATABASE privilege will be required. This is a backward-incompatible change. 3. INFORMATION_SCHEMA showed non-UTF8 strings The fix is to generate UTF8-query during the parsing, store it in the object and show it in the INFORMATION_SCHEMA. Basically, the idea is to create a copy of the original query convert it to UTF8. Character set introducers are removed and all text literals are converted to UTF8. This UTF8 query is intended to provide user-readable output. It must not be used to recreate the object. Specialized SHOW CREATE statements should be used for this. The reason for this limitation is the following: the original query can contain symbols from several character sets (by means of character set introducers). Example: - original query: CREATE VIEW v1 AS SELECT _cp1251 'Hello' AS c1; - UTF8 query (for INFORMATION_SCHEMA): CREATE VIEW v1 AS SELECT 'Hello' AS c1;
2007-06-28 19:34:54 +02:00
/*************************************************************************/
/**
Object_creation_ctx -- interface for creation context of database objects
(views, stored routines, events, triggers). Creation context -- is a set
of attributes, that should be fixed at the creation time and then be used
each time the object is parsed or executed.
*/
class Object_creation_ctx
{
public:
Object_creation_ctx *set_n_backup(THD *thd);
void restore_env(THD *thd, Object_creation_ctx *backup_ctx);
protected:
Object_creation_ctx() {}
virtual Object_creation_ctx *create_backup_ctx(THD *thd) const = 0;
virtual void change_env(THD *thd) const = 0;
public:
virtual ~Object_creation_ctx()
{ }
};
/*************************************************************************/
/**
Default_object_creation_ctx -- default implementation of
Object_creation_ctx.
*/
class Default_object_creation_ctx : public Object_creation_ctx
{
public:
CHARSET_INFO *get_client_cs()
{
return m_client_cs;
}
CHARSET_INFO *get_connection_cl()
{
return m_connection_cl;
}
protected:
Default_object_creation_ctx(THD *thd);
Default_object_creation_ctx(CHARSET_INFO *client_cs,
CHARSET_INFO *connection_cl);
protected:
virtual Object_creation_ctx *create_backup_ctx(THD *thd) const;
virtual void change_env(THD *thd) const;
protected:
/**
client_cs stores the value of character_set_client session variable.
The only character set attribute is used.
Client character set is included into query context, because we save
query in the original character set, which is client character set. So,
in order to parse the query properly we have to switch client character
set on parsing.
*/
CHARSET_INFO *m_client_cs;
/**
connection_cl stores the value of collation_connection session
variable. Both character set and collation attributes are used.
Connection collation is included into query context, becase it defines
the character set and collation of text literals in internal
representation of query (item-objects).
*/
CHARSET_INFO *m_connection_cl;
};
Patch for the following bugs: - BUG#11986: Stored routines and triggers can fail if the code has a non-ascii symbol - BUG#16291: mysqldump corrupts string-constants with non-ascii-chars - BUG#19443: INFORMATION_SCHEMA does not support charsets properly - BUG#21249: Character set of SP-var can be ignored - BUG#25212: Character set of string constant is ignored (stored routines) - BUG#25221: Character set of string constant is ignored (triggers) There were a few general problems that caused these bugs: 1. Character set information of the original (definition) query for views, triggers, stored routines and events was lost. 2. mysqldump output query in client character set, which can be inappropriate to encode definition-query. 3. INFORMATION_SCHEMA used strings with mixed encodings to display object definition; 1. No query-definition-character set. In order to compile query into execution code, some extra data (such as environment variables or the database character set) is used. The problem here was that this context was not preserved. So, on the next load it can differ from the original one, thus the result will be different. The context contains the following data: - client character set; - connection collation (character set and collation); - collation of the owner database; The fix is to store this context and use it each time we parse (compile) and execute the object (stored routine, trigger, ...). 2. Wrong mysqldump-output. The original query can contain several encodings (by means of character set introducers). The problem here was that we tried to convert original query to the mysqldump-client character set. Moreover, we stored queries in different character sets for different objects (views, for one, used UTF8, triggers used original character set). The solution is - to store definition queries in the original character set; - to change SHOW CREATE statement to output definition query in the binary character set (i.e. without any conversion); - introduce SHOW CREATE TRIGGER statement; - to dump special statements to switch the context to the original one before dumping and restore it afterwards. Note, in order to preserve the database collation at the creation time, additional ALTER DATABASE might be used (to temporary switch the database collation back to the original value). In this case, ALTER DATABASE privilege will be required. This is a backward-incompatible change. 3. INFORMATION_SCHEMA showed non-UTF8 strings The fix is to generate UTF8-query during the parsing, store it in the object and show it in the INFORMATION_SCHEMA. Basically, the idea is to create a copy of the original query convert it to UTF8. Character set introducers are removed and all text literals are converted to UTF8. This UTF8 query is intended to provide user-readable output. It must not be used to recreate the object. Specialized SHOW CREATE statements should be used for this. The reason for this limitation is the following: the original query can contain symbols from several character sets (by means of character set introducers). Example: - original query: CREATE VIEW v1 AS SELECT _cp1251 'Hello' AS c1; - UTF8 query (for INFORMATION_SCHEMA): CREATE VIEW v1 AS SELECT 'Hello' AS c1;
2007-06-28 19:34:54 +02:00
/**
View_creation_ctx -- creation context of view objects.
*/
class View_creation_ctx : public Default_object_creation_ctx,
public Sql_alloc
{
public:
static View_creation_ctx *create(THD *thd);
static View_creation_ctx *create(THD *thd,
TABLE_LIST *view);
Patch for the following bugs: - BUG#11986: Stored routines and triggers can fail if the code has a non-ascii symbol - BUG#16291: mysqldump corrupts string-constants with non-ascii-chars - BUG#19443: INFORMATION_SCHEMA does not support charsets properly - BUG#21249: Character set of SP-var can be ignored - BUG#25212: Character set of string constant is ignored (stored routines) - BUG#25221: Character set of string constant is ignored (triggers) There were a few general problems that caused these bugs: 1. Character set information of the original (definition) query for views, triggers, stored routines and events was lost. 2. mysqldump output query in client character set, which can be inappropriate to encode definition-query. 3. INFORMATION_SCHEMA used strings with mixed encodings to display object definition; 1. No query-definition-character set. In order to compile query into execution code, some extra data (such as environment variables or the database character set) is used. The problem here was that this context was not preserved. So, on the next load it can differ from the original one, thus the result will be different. The context contains the following data: - client character set; - connection collation (character set and collation); - collation of the owner database; The fix is to store this context and use it each time we parse (compile) and execute the object (stored routine, trigger, ...). 2. Wrong mysqldump-output. The original query can contain several encodings (by means of character set introducers). The problem here was that we tried to convert original query to the mysqldump-client character set. Moreover, we stored queries in different character sets for different objects (views, for one, used UTF8, triggers used original character set). The solution is - to store definition queries in the original character set; - to change SHOW CREATE statement to output definition query in the binary character set (i.e. without any conversion); - introduce SHOW CREATE TRIGGER statement; - to dump special statements to switch the context to the original one before dumping and restore it afterwards. Note, in order to preserve the database collation at the creation time, additional ALTER DATABASE might be used (to temporary switch the database collation back to the original value). In this case, ALTER DATABASE privilege will be required. This is a backward-incompatible change. 3. INFORMATION_SCHEMA showed non-UTF8 strings The fix is to generate UTF8-query during the parsing, store it in the object and show it in the INFORMATION_SCHEMA. Basically, the idea is to create a copy of the original query convert it to UTF8. Character set introducers are removed and all text literals are converted to UTF8. This UTF8 query is intended to provide user-readable output. It must not be used to recreate the object. Specialized SHOW CREATE statements should be used for this. The reason for this limitation is the following: the original query can contain symbols from several character sets (by means of character set introducers). Example: - original query: CREATE VIEW v1 AS SELECT _cp1251 'Hello' AS c1; - UTF8 query (for INFORMATION_SCHEMA): CREATE VIEW v1 AS SELECT 'Hello' AS c1;
2007-06-28 19:34:54 +02:00
private:
View_creation_ctx(THD *thd)
: Default_object_creation_ctx(thd)
{ }
};
/*************************************************************************/
2000-07-31 21:29:14 +02:00
/* Order clause list element */
typedef struct st_order {
struct st_order *next;
Item **item; /* Point at item in select fields */
2004-06-25 15:52:01 +02:00
Item *item_ptr; /* Storage for initial item */
Item **item_copy; /* For SPs; the original item ptr */
2004-08-31 10:58:45 +02:00
int counter; /* position in SELECT list, correct
only if counter_used is true*/
2000-07-31 21:29:14 +02:00
bool asc; /* true if ascending */
bool free_me; /* true if item isn't shared */
bool in_field_list; /* true if in select field list */
bool counter_used; /* parameter was counter of columns */
2000-07-31 21:29:14 +02:00
Field *field; /* If tmp-table group */
char *buff; /* If tmp-table group */
table_map used, depend_map;
2000-07-31 21:29:14 +02:00
} ORDER;
/**
State information for internal tables grants.
This structure is part of the TABLE_LIST, and is updated
during the ACL check process.
@sa GRANT_INFO
*/
struct st_grant_internal_info
{
/** True if the internal lookup by schema name was done. */
bool m_schema_lookup_done;
/** Cached internal schema access. */
const ACL_internal_schema_access *m_schema_access;
/** True if the internal lookup by table name was done. */
bool m_table_lookup_done;
/** Cached internal table access. */
const ACL_internal_table_access *m_table_access;
};
typedef struct st_grant_internal_info GRANT_INTERNAL_INFO;
/**
@brief The current state of the privilege checking process for the current
user, SQL statement and SQL object.
@details The privilege checking process is divided into phases depending on
the level of the privilege to be checked and the type of object to be
accessed. Due to the mentioned scattering of privilege checking
functionality, it is necessary to keep track of the state of the
process. This information is stored in privilege, want_privilege, and
orig_want_privilege.
A GRANT_INFO also serves as a cache of the privilege hash tables. Relevant
members are grant_table and version.
*/
2000-07-31 21:29:14 +02:00
typedef struct st_grant_info
{
/**
@brief A copy of the privilege information regarding the current host,
database, object and user.
@details The version of this copy is found in GRANT_INFO::version.
*/
2000-07-31 21:29:14 +02:00
GRANT_TABLE *grant_table;
/**
@brief Used for cache invalidation when caching privilege information.
@details The privilege information is stored on disk, with dedicated
caches residing in memory: table-level and column-level privileges,
respectively, have their own dedicated caches.
The GRANT_INFO works as a level 1 cache with this member updated to the
current value of the global variable @c grant_version (@c static variable
in sql_acl.cc). It is updated Whenever the GRANT_INFO is refreshed from
the level 2 cache. The level 2 cache is the @c column_priv_hash structure
(@c static variable in sql_acl.cc)
@see grant_version
*/
2000-07-31 21:29:14 +02:00
uint version;
/**
@brief The set of privileges that the current user has fulfilled for a
certain host, database, and object.
@details This field is continually updated throughout the access checking
process. In each step the "wanted privilege" is checked against the
fulfilled privileges. When/if the intersection of these sets is empty,
access is granted.
The set is implemented as a bitmap, with the bits defined in sql_acl.h.
*/
ulong privilege;
/**
@brief the set of privileges that the current user needs to fulfil in
order to carry out the requested operation.
*/
ulong want_privilege;
/**
Stores the requested access acl of top level tables list. Is used to
check access rights to the underlying tables of a view.
*/
ulong orig_want_privilege;
/** The grant state for internal tables. */
GRANT_INTERNAL_INFO m_internal;
2000-07-31 21:29:14 +02:00
} GRANT_INFO;
enum tmp_table_type
{
NO_TMP_TABLE, NON_TRANSACTIONAL_TMP_TABLE, TRANSACTIONAL_TMP_TABLE,
INTERNAL_TMP_TABLE, SYSTEM_TMP_TABLE
};
enum frm_type_enum
{
FRMTYPE_ERROR= 0,
FRMTYPE_TABLE,
FRMTYPE_VIEW
};
enum release_type { RELEASE_NORMAL, RELEASE_WAIT_FOR_DROP };
typedef struct st_filesort_info
{
WL#3817: Simplify string / memory area types and make things more consistent (first part) The following type conversions was done: - Changed byte to uchar - Changed gptr to uchar* - Change my_string to char * - Change my_size_t to size_t - Change size_s to size_t Removed declaration of byte, gptr, my_string, my_size_t and size_s. Following function parameter changes was done: - All string functions in mysys/strings was changed to use size_t instead of uint for string lengths. - All read()/write() functions changed to use size_t (including vio). - All protocoll functions changed to use size_t instead of uint - Functions that used a pointer to a string length was changed to use size_t* - Changed malloc(), free() and related functions from using gptr to use void * as this requires fewer casts in the code and is more in line with how the standard functions work. - Added extra length argument to dirname_part() to return the length of the created string. - Changed (at least) following functions to take uchar* as argument: - db_dump() - my_net_write() - net_write_command() - net_store_data() - DBUG_DUMP() - decimal2bin() & bin2decimal() - Changed my_compress() and my_uncompress() to use size_t. Changed one argument to my_uncompress() from a pointer to a value as we only return one value (makes function easier to use). - Changed type of 'pack_data' argument to packfrm() to avoid casts. - Changed in readfrm() and writefrom(), ha_discover and handler::discover() the type for argument 'frmdata' to uchar** to avoid casts. - Changed most Field functions to use uchar* instead of char* (reduced a lot of casts). - Changed field->val_xxx(xxx, new_ptr) to take const pointers. Other changes: - Removed a lot of not needed casts - Added a few new cast required by other changes - Added some cast to my_multi_malloc() arguments for safety (as string lengths needs to be uint, not size_t). - Fixed all calls to hash-get-key functions to use size_t*. (Needed to be done explicitely as this conflict was often hided by casting the function to hash_get_key). - Changed some buffers to memory regions to uchar* to avoid casts. - Changed some string lengths from uint to size_t. - Changed field->ptr to be uchar* instead of char*. This allowed us to get rid of a lot of casts. - Some changes from true -> TRUE, false -> FALSE, unsigned char -> uchar - Include zlib.h in some files as we needed declaration of crc32() - Changed MY_FILE_ERROR to be (size_t) -1. - Changed many variables to hold the result of my_read() / my_write() to be size_t. This was needed to properly detect errors (which are returned as (size_t) -1). - Removed some very old VMS code - Changed packfrm()/unpackfrm() to not be depending on uint size (portability fix) - Removed windows specific code to restore cursor position as this causes slowdown on windows and we should not mix read() and pread() calls anyway as this is not thread safe. Updated function comment to reflect this. Changed function that depended on original behavior of my_pwrite() to itself restore the cursor position (one such case). - Added some missing checking of return value of malloc(). - Changed definition of MOD_PAD_CHAR_TO_FULL_LENGTH to avoid 'long' overflow. - Changed type of table_def::m_size from my_size_t to ulong to reflect that m_size is the number of elements in the array, not a string/memory length. - Moved THD::max_row_length() to table.cc (as it's not depending on THD). Inlined max_row_length_blob() into this function. - More function comments - Fixed some compiler warnings when compiled without partitions. - Removed setting of LEX_STRING() arguments in declaration (portability fix). - Some trivial indentation/variable name changes. - Some trivial code simplifications: - Replaced some calls to alloc_root + memcpy to use strmake_root()/strdup_root(). - Changed some calls from memdup() to strmake() (Safety fix) - Simpler loops in client-simple.c
2007-05-10 11:59:39 +02:00
IO_CACHE *io_cache; /* If sorted through filesort */
uchar **sort_keys; /* Buffer for sorting keys */
uchar *buffpek; /* Buffer for buffpek structures */
uint buffpek_len; /* Max number of buffpeks in the buffer */
uchar *addon_buf; /* Pointer to a buffer if sorted with fields */
size_t addon_length; /* Length of the buffer */
struct st_sort_addon_field *addon_field; /* Pointer to the fields info */
WL#3817: Simplify string / memory area types and make things more consistent (first part) The following type conversions was done: - Changed byte to uchar - Changed gptr to uchar* - Change my_string to char * - Change my_size_t to size_t - Change size_s to size_t Removed declaration of byte, gptr, my_string, my_size_t and size_s. Following function parameter changes was done: - All string functions in mysys/strings was changed to use size_t instead of uint for string lengths. - All read()/write() functions changed to use size_t (including vio). - All protocoll functions changed to use size_t instead of uint - Functions that used a pointer to a string length was changed to use size_t* - Changed malloc(), free() and related functions from using gptr to use void * as this requires fewer casts in the code and is more in line with how the standard functions work. - Added extra length argument to dirname_part() to return the length of the created string. - Changed (at least) following functions to take uchar* as argument: - db_dump() - my_net_write() - net_write_command() - net_store_data() - DBUG_DUMP() - decimal2bin() & bin2decimal() - Changed my_compress() and my_uncompress() to use size_t. Changed one argument to my_uncompress() from a pointer to a value as we only return one value (makes function easier to use). - Changed type of 'pack_data' argument to packfrm() to avoid casts. - Changed in readfrm() and writefrom(), ha_discover and handler::discover() the type for argument 'frmdata' to uchar** to avoid casts. - Changed most Field functions to use uchar* instead of char* (reduced a lot of casts). - Changed field->val_xxx(xxx, new_ptr) to take const pointers. Other changes: - Removed a lot of not needed casts - Added a few new cast required by other changes - Added some cast to my_multi_malloc() arguments for safety (as string lengths needs to be uint, not size_t). - Fixed all calls to hash-get-key functions to use size_t*. (Needed to be done explicitely as this conflict was often hided by casting the function to hash_get_key). - Changed some buffers to memory regions to uchar* to avoid casts. - Changed some string lengths from uint to size_t. - Changed field->ptr to be uchar* instead of char*. This allowed us to get rid of a lot of casts. - Some changes from true -> TRUE, false -> FALSE, unsigned char -> uchar - Include zlib.h in some files as we needed declaration of crc32() - Changed MY_FILE_ERROR to be (size_t) -1. - Changed many variables to hold the result of my_read() / my_write() to be size_t. This was needed to properly detect errors (which are returned as (size_t) -1). - Removed some very old VMS code - Changed packfrm()/unpackfrm() to not be depending on uint size (portability fix) - Removed windows specific code to restore cursor position as this causes slowdown on windows and we should not mix read() and pread() calls anyway as this is not thread safe. Updated function comment to reflect this. Changed function that depended on original behavior of my_pwrite() to itself restore the cursor position (one such case). - Added some missing checking of return value of malloc(). - Changed definition of MOD_PAD_CHAR_TO_FULL_LENGTH to avoid 'long' overflow. - Changed type of table_def::m_size from my_size_t to ulong to reflect that m_size is the number of elements in the array, not a string/memory length. - Moved THD::max_row_length() to table.cc (as it's not depending on THD). Inlined max_row_length_blob() into this function. - More function comments - Fixed some compiler warnings when compiled without partitions. - Removed setting of LEX_STRING() arguments in declaration (portability fix). - Some trivial indentation/variable name changes. - Some trivial code simplifications: - Replaced some calls to alloc_root + memcpy to use strmake_root()/strdup_root(). - Changed some calls from memdup() to strmake() (Safety fix) - Simpler loops in client-simple.c
2007-05-10 11:59:39 +02:00
void (*unpack)(struct st_sort_addon_field *, uchar *); /* To unpack back */
uchar *record_pointers; /* If sorted in memory */
ha_rows found_records; /* How many records in sort */
} FILESORT_INFO;
/*
Values in this enum are used to indicate how a tables TIMESTAMP field
should be treated. It can be set to the current timestamp on insert or
update or both.
WARNING: The values are used for bit operations. If you change the
enum, you must keep the bitwise relation of the values. For example:
(int) TIMESTAMP_AUTO_SET_ON_BOTH must be equal to
(int) TIMESTAMP_AUTO_SET_ON_INSERT | (int) TIMESTAMP_AUTO_SET_ON_UPDATE.
We use an enum here so that the debugger can display the value names.
*/
enum timestamp_auto_set_type
{
TIMESTAMP_NO_AUTO_SET= 0, TIMESTAMP_AUTO_SET_ON_INSERT= 1,
TIMESTAMP_AUTO_SET_ON_UPDATE= 2, TIMESTAMP_AUTO_SET_ON_BOTH= 3
};
#define clear_timestamp_auto_bits(_target_, _bits_) \
(_target_)= (enum timestamp_auto_set_type)((int)(_target_) & ~(int)(_bits_))
2000-07-31 21:29:14 +02:00
class Field_timestamp;
class Field_blob;
class Table_triggers_list;
2000-07-31 21:29:14 +02:00
WL#3984 (Revise locking of mysql.general_log and mysql.slow_log) Bug#25422 (Hang with log tables) Bug 17876 (Truncating mysql.slow_log in a SP after using cursor locks the thread) Bug 23044 (Warnings on flush of a log table) Bug 29129 (Resetting general_log while the GLOBAL READ LOCK is set causes a deadlock) Prior to this fix, the server would hang when performing concurrent ALTER TABLE or TRUNCATE TABLE statements against the LOG TABLES, which are mysql.general_log and mysql.slow_log. The root cause traces to the following code: in sql_base.cc, open_table() if (table->in_use != thd) { /* wait_for_condition will unlock LOCK_open for us */ wait_for_condition(thd, &LOCK_open, &COND_refresh); } The problem with this code is that the current implementation of the LOGGER creates 'fake' THD objects, like - Log_to_csv_event_handler::general_log_thd - Log_to_csv_event_handler::slow_log_thd which are not associated to a real thread running in the server, so that waiting for these non-existing threads to release table locks cause the dead lock. In general, the design of Log_to_csv_event_handler does not fit into the general architecture of the server, so that the concept of general_log_thd and slow_log_thd has to be abandoned: - this implementation does not work with table locking - it will not work with commands like SHOW PROCESSLIST - having the log tables always opened does not integrate well with DDL operations / FLUSH TABLES / SET GLOBAL READ_ONLY With this patch, the fundamental design of the LOGGER has been changed to: - always open and close a log table when writing a log - remove totally the usage of fake THD objects - clarify how locking of log tables is implemented in general. See WL#3984 for details related to the new locking design. Additional changes (misc bugs exposed and fixed): 1) mysqldump which would ignore some tables in dump_all_tables_in_db(), but forget to ignore the same in dump_all_views_in_db(). 2) mysqldump would also issue an empty "LOCK TABLE" command when all the tables to lock are to be ignored (numrows == 0), instead of not issuing the query. 3) Internal errors handlers could intercept errors but not warnings (see sql_error.cc). 4) Implementing a nested call to open tables, for the performance schema tables, exposed an existing bug in remove_table_from_cache(), which would perform: in_use->some_tables_deleted=1; against another thread, without any consideration about thread locking. This call inside remove_table_from_cache() was not required anyway, since calling mysql_lock_abort() takes care of aborting -- cleanly -- threads that might hold a lock on a table. This line (in_use->some_tables_deleted=1) has been removed.
2007-07-27 08:31:06 +02:00
/**
Category of table found in the table share.
*/
enum enum_table_category
{
/**
Unknown value.
*/
TABLE_UNKNOWN_CATEGORY=0,
/**
Temporary table.
The table is visible only in the session.
Therefore,
- FLUSH TABLES WITH READ LOCK
- SET GLOBAL READ_ONLY = ON
do not apply to this table.
2007-08-15 15:43:08 +02:00
Note that LOCK TABLE t FOR READ/WRITE
WL#3984 (Revise locking of mysql.general_log and mysql.slow_log) Bug#25422 (Hang with log tables) Bug 17876 (Truncating mysql.slow_log in a SP after using cursor locks the thread) Bug 23044 (Warnings on flush of a log table) Bug 29129 (Resetting general_log while the GLOBAL READ LOCK is set causes a deadlock) Prior to this fix, the server would hang when performing concurrent ALTER TABLE or TRUNCATE TABLE statements against the LOG TABLES, which are mysql.general_log and mysql.slow_log. The root cause traces to the following code: in sql_base.cc, open_table() if (table->in_use != thd) { /* wait_for_condition will unlock LOCK_open for us */ wait_for_condition(thd, &LOCK_open, &COND_refresh); } The problem with this code is that the current implementation of the LOGGER creates 'fake' THD objects, like - Log_to_csv_event_handler::general_log_thd - Log_to_csv_event_handler::slow_log_thd which are not associated to a real thread running in the server, so that waiting for these non-existing threads to release table locks cause the dead lock. In general, the design of Log_to_csv_event_handler does not fit into the general architecture of the server, so that the concept of general_log_thd and slow_log_thd has to be abandoned: - this implementation does not work with table locking - it will not work with commands like SHOW PROCESSLIST - having the log tables always opened does not integrate well with DDL operations / FLUSH TABLES / SET GLOBAL READ_ONLY With this patch, the fundamental design of the LOGGER has been changed to: - always open and close a log table when writing a log - remove totally the usage of fake THD objects - clarify how locking of log tables is implemented in general. See WL#3984 for details related to the new locking design. Additional changes (misc bugs exposed and fixed): 1) mysqldump which would ignore some tables in dump_all_tables_in_db(), but forget to ignore the same in dump_all_views_in_db(). 2) mysqldump would also issue an empty "LOCK TABLE" command when all the tables to lock are to be ignored (numrows == 0), instead of not issuing the query. 3) Internal errors handlers could intercept errors but not warnings (see sql_error.cc). 4) Implementing a nested call to open tables, for the performance schema tables, exposed an existing bug in remove_table_from_cache(), which would perform: in_use->some_tables_deleted=1; against another thread, without any consideration about thread locking. This call inside remove_table_from_cache() was not required anyway, since calling mysql_lock_abort() takes care of aborting -- cleanly -- threads that might hold a lock on a table. This line (in_use->some_tables_deleted=1) has been removed.
2007-07-27 08:31:06 +02:00
can be used on temporary tables.
Temporary tables are not part of the table cache.
*/
TABLE_CATEGORY_TEMPORARY=1,
/**
User table.
These tables do honor:
2007-08-15 15:43:08 +02:00
- LOCK TABLE t FOR READ/WRITE
WL#3984 (Revise locking of mysql.general_log and mysql.slow_log) Bug#25422 (Hang with log tables) Bug 17876 (Truncating mysql.slow_log in a SP after using cursor locks the thread) Bug 23044 (Warnings on flush of a log table) Bug 29129 (Resetting general_log while the GLOBAL READ LOCK is set causes a deadlock) Prior to this fix, the server would hang when performing concurrent ALTER TABLE or TRUNCATE TABLE statements against the LOG TABLES, which are mysql.general_log and mysql.slow_log. The root cause traces to the following code: in sql_base.cc, open_table() if (table->in_use != thd) { /* wait_for_condition will unlock LOCK_open for us */ wait_for_condition(thd, &LOCK_open, &COND_refresh); } The problem with this code is that the current implementation of the LOGGER creates 'fake' THD objects, like - Log_to_csv_event_handler::general_log_thd - Log_to_csv_event_handler::slow_log_thd which are not associated to a real thread running in the server, so that waiting for these non-existing threads to release table locks cause the dead lock. In general, the design of Log_to_csv_event_handler does not fit into the general architecture of the server, so that the concept of general_log_thd and slow_log_thd has to be abandoned: - this implementation does not work with table locking - it will not work with commands like SHOW PROCESSLIST - having the log tables always opened does not integrate well with DDL operations / FLUSH TABLES / SET GLOBAL READ_ONLY With this patch, the fundamental design of the LOGGER has been changed to: - always open and close a log table when writing a log - remove totally the usage of fake THD objects - clarify how locking of log tables is implemented in general. See WL#3984 for details related to the new locking design. Additional changes (misc bugs exposed and fixed): 1) mysqldump which would ignore some tables in dump_all_tables_in_db(), but forget to ignore the same in dump_all_views_in_db(). 2) mysqldump would also issue an empty "LOCK TABLE" command when all the tables to lock are to be ignored (numrows == 0), instead of not issuing the query. 3) Internal errors handlers could intercept errors but not warnings (see sql_error.cc). 4) Implementing a nested call to open tables, for the performance schema tables, exposed an existing bug in remove_table_from_cache(), which would perform: in_use->some_tables_deleted=1; against another thread, without any consideration about thread locking. This call inside remove_table_from_cache() was not required anyway, since calling mysql_lock_abort() takes care of aborting -- cleanly -- threads that might hold a lock on a table. This line (in_use->some_tables_deleted=1) has been removed.
2007-07-27 08:31:06 +02:00
- FLUSH TABLES WITH READ LOCK
- SET GLOBAL READ_ONLY = ON
User tables are cached in the table cache.
*/
TABLE_CATEGORY_USER=2,
/**
System table, maintained by the server.
These tables do honor:
2007-08-15 15:43:08 +02:00
- LOCK TABLE t FOR READ/WRITE
WL#3984 (Revise locking of mysql.general_log and mysql.slow_log) Bug#25422 (Hang with log tables) Bug 17876 (Truncating mysql.slow_log in a SP after using cursor locks the thread) Bug 23044 (Warnings on flush of a log table) Bug 29129 (Resetting general_log while the GLOBAL READ LOCK is set causes a deadlock) Prior to this fix, the server would hang when performing concurrent ALTER TABLE or TRUNCATE TABLE statements against the LOG TABLES, which are mysql.general_log and mysql.slow_log. The root cause traces to the following code: in sql_base.cc, open_table() if (table->in_use != thd) { /* wait_for_condition will unlock LOCK_open for us */ wait_for_condition(thd, &LOCK_open, &COND_refresh); } The problem with this code is that the current implementation of the LOGGER creates 'fake' THD objects, like - Log_to_csv_event_handler::general_log_thd - Log_to_csv_event_handler::slow_log_thd which are not associated to a real thread running in the server, so that waiting for these non-existing threads to release table locks cause the dead lock. In general, the design of Log_to_csv_event_handler does not fit into the general architecture of the server, so that the concept of general_log_thd and slow_log_thd has to be abandoned: - this implementation does not work with table locking - it will not work with commands like SHOW PROCESSLIST - having the log tables always opened does not integrate well with DDL operations / FLUSH TABLES / SET GLOBAL READ_ONLY With this patch, the fundamental design of the LOGGER has been changed to: - always open and close a log table when writing a log - remove totally the usage of fake THD objects - clarify how locking of log tables is implemented in general. See WL#3984 for details related to the new locking design. Additional changes (misc bugs exposed and fixed): 1) mysqldump which would ignore some tables in dump_all_tables_in_db(), but forget to ignore the same in dump_all_views_in_db(). 2) mysqldump would also issue an empty "LOCK TABLE" command when all the tables to lock are to be ignored (numrows == 0), instead of not issuing the query. 3) Internal errors handlers could intercept errors but not warnings (see sql_error.cc). 4) Implementing a nested call to open tables, for the performance schema tables, exposed an existing bug in remove_table_from_cache(), which would perform: in_use->some_tables_deleted=1; against another thread, without any consideration about thread locking. This call inside remove_table_from_cache() was not required anyway, since calling mysql_lock_abort() takes care of aborting -- cleanly -- threads that might hold a lock on a table. This line (in_use->some_tables_deleted=1) has been removed.
2007-07-27 08:31:06 +02:00
- FLUSH TABLES WITH READ LOCK
- SET GLOBAL READ_ONLY = ON
Typically, writes to system tables are performed by
the server implementation, not explicitly be a user.
System tables are cached in the table cache.
*/
TABLE_CATEGORY_SYSTEM=3,
/**
Information schema tables.
These tables are an interface provided by the system
to inspect the system metadata.
These tables do *not* honor:
2007-08-15 15:43:08 +02:00
- LOCK TABLE t FOR READ/WRITE
WL#3984 (Revise locking of mysql.general_log and mysql.slow_log) Bug#25422 (Hang with log tables) Bug 17876 (Truncating mysql.slow_log in a SP after using cursor locks the thread) Bug 23044 (Warnings on flush of a log table) Bug 29129 (Resetting general_log while the GLOBAL READ LOCK is set causes a deadlock) Prior to this fix, the server would hang when performing concurrent ALTER TABLE or TRUNCATE TABLE statements against the LOG TABLES, which are mysql.general_log and mysql.slow_log. The root cause traces to the following code: in sql_base.cc, open_table() if (table->in_use != thd) { /* wait_for_condition will unlock LOCK_open for us */ wait_for_condition(thd, &LOCK_open, &COND_refresh); } The problem with this code is that the current implementation of the LOGGER creates 'fake' THD objects, like - Log_to_csv_event_handler::general_log_thd - Log_to_csv_event_handler::slow_log_thd which are not associated to a real thread running in the server, so that waiting for these non-existing threads to release table locks cause the dead lock. In general, the design of Log_to_csv_event_handler does not fit into the general architecture of the server, so that the concept of general_log_thd and slow_log_thd has to be abandoned: - this implementation does not work with table locking - it will not work with commands like SHOW PROCESSLIST - having the log tables always opened does not integrate well with DDL operations / FLUSH TABLES / SET GLOBAL READ_ONLY With this patch, the fundamental design of the LOGGER has been changed to: - always open and close a log table when writing a log - remove totally the usage of fake THD objects - clarify how locking of log tables is implemented in general. See WL#3984 for details related to the new locking design. Additional changes (misc bugs exposed and fixed): 1) mysqldump which would ignore some tables in dump_all_tables_in_db(), but forget to ignore the same in dump_all_views_in_db(). 2) mysqldump would also issue an empty "LOCK TABLE" command when all the tables to lock are to be ignored (numrows == 0), instead of not issuing the query. 3) Internal errors handlers could intercept errors but not warnings (see sql_error.cc). 4) Implementing a nested call to open tables, for the performance schema tables, exposed an existing bug in remove_table_from_cache(), which would perform: in_use->some_tables_deleted=1; against another thread, without any consideration about thread locking. This call inside remove_table_from_cache() was not required anyway, since calling mysql_lock_abort() takes care of aborting -- cleanly -- threads that might hold a lock on a table. This line (in_use->some_tables_deleted=1) has been removed.
2007-07-27 08:31:06 +02:00
- FLUSH TABLES WITH READ LOCK
- SET GLOBAL READ_ONLY = ON
as there is no point in locking explicitly
WL#3984 (Revise locking of mysql.general_log and mysql.slow_log) Bug#25422 (Hang with log tables) Bug 17876 (Truncating mysql.slow_log in a SP after using cursor locks the thread) Bug 23044 (Warnings on flush of a log table) Bug 29129 (Resetting general_log while the GLOBAL READ LOCK is set causes a deadlock) Prior to this fix, the server would hang when performing concurrent ALTER TABLE or TRUNCATE TABLE statements against the LOG TABLES, which are mysql.general_log and mysql.slow_log. The root cause traces to the following code: in sql_base.cc, open_table() if (table->in_use != thd) { /* wait_for_condition will unlock LOCK_open for us */ wait_for_condition(thd, &LOCK_open, &COND_refresh); } The problem with this code is that the current implementation of the LOGGER creates 'fake' THD objects, like - Log_to_csv_event_handler::general_log_thd - Log_to_csv_event_handler::slow_log_thd which are not associated to a real thread running in the server, so that waiting for these non-existing threads to release table locks cause the dead lock. In general, the design of Log_to_csv_event_handler does not fit into the general architecture of the server, so that the concept of general_log_thd and slow_log_thd has to be abandoned: - this implementation does not work with table locking - it will not work with commands like SHOW PROCESSLIST - having the log tables always opened does not integrate well with DDL operations / FLUSH TABLES / SET GLOBAL READ_ONLY With this patch, the fundamental design of the LOGGER has been changed to: - always open and close a log table when writing a log - remove totally the usage of fake THD objects - clarify how locking of log tables is implemented in general. See WL#3984 for details related to the new locking design. Additional changes (misc bugs exposed and fixed): 1) mysqldump which would ignore some tables in dump_all_tables_in_db(), but forget to ignore the same in dump_all_views_in_db(). 2) mysqldump would also issue an empty "LOCK TABLE" command when all the tables to lock are to be ignored (numrows == 0), instead of not issuing the query. 3) Internal errors handlers could intercept errors but not warnings (see sql_error.cc). 4) Implementing a nested call to open tables, for the performance schema tables, exposed an existing bug in remove_table_from_cache(), which would perform: in_use->some_tables_deleted=1; against another thread, without any consideration about thread locking. This call inside remove_table_from_cache() was not required anyway, since calling mysql_lock_abort() takes care of aborting -- cleanly -- threads that might hold a lock on a table. This line (in_use->some_tables_deleted=1) has been removed.
2007-07-27 08:31:06 +02:00
an INFORMATION_SCHEMA table.
Nothing is directly written to information schema tables.
Note that this value is not used currently,
since information schema tables are not shared,
but implemented as session specific temporary tables.
*/
/*
TODO: Fixing the performance issues of I_S will lead
to I_S tables in the table cache, which should use
this table type.
*/
TABLE_CATEGORY_INFORMATION=4,
/**
Log tables.
WL#3984 (Revise locking of mysql.general_log and mysql.slow_log) Bug#25422 (Hang with log tables) Bug 17876 (Truncating mysql.slow_log in a SP after using cursor locks the thread) Bug 23044 (Warnings on flush of a log table) Bug 29129 (Resetting general_log while the GLOBAL READ LOCK is set causes a deadlock) Prior to this fix, the server would hang when performing concurrent ALTER TABLE or TRUNCATE TABLE statements against the LOG TABLES, which are mysql.general_log and mysql.slow_log. The root cause traces to the following code: in sql_base.cc, open_table() if (table->in_use != thd) { /* wait_for_condition will unlock LOCK_open for us */ wait_for_condition(thd, &LOCK_open, &COND_refresh); } The problem with this code is that the current implementation of the LOGGER creates 'fake' THD objects, like - Log_to_csv_event_handler::general_log_thd - Log_to_csv_event_handler::slow_log_thd which are not associated to a real thread running in the server, so that waiting for these non-existing threads to release table locks cause the dead lock. In general, the design of Log_to_csv_event_handler does not fit into the general architecture of the server, so that the concept of general_log_thd and slow_log_thd has to be abandoned: - this implementation does not work with table locking - it will not work with commands like SHOW PROCESSLIST - having the log tables always opened does not integrate well with DDL operations / FLUSH TABLES / SET GLOBAL READ_ONLY With this patch, the fundamental design of the LOGGER has been changed to: - always open and close a log table when writing a log - remove totally the usage of fake THD objects - clarify how locking of log tables is implemented in general. See WL#3984 for details related to the new locking design. Additional changes (misc bugs exposed and fixed): 1) mysqldump which would ignore some tables in dump_all_tables_in_db(), but forget to ignore the same in dump_all_views_in_db(). 2) mysqldump would also issue an empty "LOCK TABLE" command when all the tables to lock are to be ignored (numrows == 0), instead of not issuing the query. 3) Internal errors handlers could intercept errors but not warnings (see sql_error.cc). 4) Implementing a nested call to open tables, for the performance schema tables, exposed an existing bug in remove_table_from_cache(), which would perform: in_use->some_tables_deleted=1; against another thread, without any consideration about thread locking. This call inside remove_table_from_cache() was not required anyway, since calling mysql_lock_abort() takes care of aborting -- cleanly -- threads that might hold a lock on a table. This line (in_use->some_tables_deleted=1) has been removed.
2007-07-27 08:31:06 +02:00
These tables are an interface provided by the system
to inspect the system logs.
WL#3984 (Revise locking of mysql.general_log and mysql.slow_log) Bug#25422 (Hang with log tables) Bug 17876 (Truncating mysql.slow_log in a SP after using cursor locks the thread) Bug 23044 (Warnings on flush of a log table) Bug 29129 (Resetting general_log while the GLOBAL READ LOCK is set causes a deadlock) Prior to this fix, the server would hang when performing concurrent ALTER TABLE or TRUNCATE TABLE statements against the LOG TABLES, which are mysql.general_log and mysql.slow_log. The root cause traces to the following code: in sql_base.cc, open_table() if (table->in_use != thd) { /* wait_for_condition will unlock LOCK_open for us */ wait_for_condition(thd, &LOCK_open, &COND_refresh); } The problem with this code is that the current implementation of the LOGGER creates 'fake' THD objects, like - Log_to_csv_event_handler::general_log_thd - Log_to_csv_event_handler::slow_log_thd which are not associated to a real thread running in the server, so that waiting for these non-existing threads to release table locks cause the dead lock. In general, the design of Log_to_csv_event_handler does not fit into the general architecture of the server, so that the concept of general_log_thd and slow_log_thd has to be abandoned: - this implementation does not work with table locking - it will not work with commands like SHOW PROCESSLIST - having the log tables always opened does not integrate well with DDL operations / FLUSH TABLES / SET GLOBAL READ_ONLY With this patch, the fundamental design of the LOGGER has been changed to: - always open and close a log table when writing a log - remove totally the usage of fake THD objects - clarify how locking of log tables is implemented in general. See WL#3984 for details related to the new locking design. Additional changes (misc bugs exposed and fixed): 1) mysqldump which would ignore some tables in dump_all_tables_in_db(), but forget to ignore the same in dump_all_views_in_db(). 2) mysqldump would also issue an empty "LOCK TABLE" command when all the tables to lock are to be ignored (numrows == 0), instead of not issuing the query. 3) Internal errors handlers could intercept errors but not warnings (see sql_error.cc). 4) Implementing a nested call to open tables, for the performance schema tables, exposed an existing bug in remove_table_from_cache(), which would perform: in_use->some_tables_deleted=1; against another thread, without any consideration about thread locking. This call inside remove_table_from_cache() was not required anyway, since calling mysql_lock_abort() takes care of aborting -- cleanly -- threads that might hold a lock on a table. This line (in_use->some_tables_deleted=1) has been removed.
2007-07-27 08:31:06 +02:00
These tables do *not* honor:
2007-08-15 15:43:08 +02:00
- LOCK TABLE t FOR READ/WRITE
WL#3984 (Revise locking of mysql.general_log and mysql.slow_log) Bug#25422 (Hang with log tables) Bug 17876 (Truncating mysql.slow_log in a SP after using cursor locks the thread) Bug 23044 (Warnings on flush of a log table) Bug 29129 (Resetting general_log while the GLOBAL READ LOCK is set causes a deadlock) Prior to this fix, the server would hang when performing concurrent ALTER TABLE or TRUNCATE TABLE statements against the LOG TABLES, which are mysql.general_log and mysql.slow_log. The root cause traces to the following code: in sql_base.cc, open_table() if (table->in_use != thd) { /* wait_for_condition will unlock LOCK_open for us */ wait_for_condition(thd, &LOCK_open, &COND_refresh); } The problem with this code is that the current implementation of the LOGGER creates 'fake' THD objects, like - Log_to_csv_event_handler::general_log_thd - Log_to_csv_event_handler::slow_log_thd which are not associated to a real thread running in the server, so that waiting for these non-existing threads to release table locks cause the dead lock. In general, the design of Log_to_csv_event_handler does not fit into the general architecture of the server, so that the concept of general_log_thd and slow_log_thd has to be abandoned: - this implementation does not work with table locking - it will not work with commands like SHOW PROCESSLIST - having the log tables always opened does not integrate well with DDL operations / FLUSH TABLES / SET GLOBAL READ_ONLY With this patch, the fundamental design of the LOGGER has been changed to: - always open and close a log table when writing a log - remove totally the usage of fake THD objects - clarify how locking of log tables is implemented in general. See WL#3984 for details related to the new locking design. Additional changes (misc bugs exposed and fixed): 1) mysqldump which would ignore some tables in dump_all_tables_in_db(), but forget to ignore the same in dump_all_views_in_db(). 2) mysqldump would also issue an empty "LOCK TABLE" command when all the tables to lock are to be ignored (numrows == 0), instead of not issuing the query. 3) Internal errors handlers could intercept errors but not warnings (see sql_error.cc). 4) Implementing a nested call to open tables, for the performance schema tables, exposed an existing bug in remove_table_from_cache(), which would perform: in_use->some_tables_deleted=1; against another thread, without any consideration about thread locking. This call inside remove_table_from_cache() was not required anyway, since calling mysql_lock_abort() takes care of aborting -- cleanly -- threads that might hold a lock on a table. This line (in_use->some_tables_deleted=1) has been removed.
2007-07-27 08:31:06 +02:00
- FLUSH TABLES WITH READ LOCK
- SET GLOBAL READ_ONLY = ON
as there is no point in locking explicitly
a LOG table.
An example of LOG tables are:
WL#3984 (Revise locking of mysql.general_log and mysql.slow_log) Bug#25422 (Hang with log tables) Bug 17876 (Truncating mysql.slow_log in a SP after using cursor locks the thread) Bug 23044 (Warnings on flush of a log table) Bug 29129 (Resetting general_log while the GLOBAL READ LOCK is set causes a deadlock) Prior to this fix, the server would hang when performing concurrent ALTER TABLE or TRUNCATE TABLE statements against the LOG TABLES, which are mysql.general_log and mysql.slow_log. The root cause traces to the following code: in sql_base.cc, open_table() if (table->in_use != thd) { /* wait_for_condition will unlock LOCK_open for us */ wait_for_condition(thd, &LOCK_open, &COND_refresh); } The problem with this code is that the current implementation of the LOGGER creates 'fake' THD objects, like - Log_to_csv_event_handler::general_log_thd - Log_to_csv_event_handler::slow_log_thd which are not associated to a real thread running in the server, so that waiting for these non-existing threads to release table locks cause the dead lock. In general, the design of Log_to_csv_event_handler does not fit into the general architecture of the server, so that the concept of general_log_thd and slow_log_thd has to be abandoned: - this implementation does not work with table locking - it will not work with commands like SHOW PROCESSLIST - having the log tables always opened does not integrate well with DDL operations / FLUSH TABLES / SET GLOBAL READ_ONLY With this patch, the fundamental design of the LOGGER has been changed to: - always open and close a log table when writing a log - remove totally the usage of fake THD objects - clarify how locking of log tables is implemented in general. See WL#3984 for details related to the new locking design. Additional changes (misc bugs exposed and fixed): 1) mysqldump which would ignore some tables in dump_all_tables_in_db(), but forget to ignore the same in dump_all_views_in_db(). 2) mysqldump would also issue an empty "LOCK TABLE" command when all the tables to lock are to be ignored (numrows == 0), instead of not issuing the query. 3) Internal errors handlers could intercept errors but not warnings (see sql_error.cc). 4) Implementing a nested call to open tables, for the performance schema tables, exposed an existing bug in remove_table_from_cache(), which would perform: in_use->some_tables_deleted=1; against another thread, without any consideration about thread locking. This call inside remove_table_from_cache() was not required anyway, since calling mysql_lock_abort() takes care of aborting -- cleanly -- threads that might hold a lock on a table. This line (in_use->some_tables_deleted=1) has been removed.
2007-07-27 08:31:06 +02:00
- mysql.slow_log
- mysql.general_log,
which *are* updated even when there is either
a GLOBAL READ LOCK or a GLOBAL READ_ONLY in effect.
User queries do not write directly to these tables
(there are exceptions for log tables).
The server implementation perform writes.
Log tables are cached in the table cache.
*/
TABLE_CATEGORY_LOG=5,
/**
Performance schema tables.
These tables are an interface provided by the system
to inspect the system performance data.
These tables do *not* honor:
- LOCK TABLE t FOR READ/WRITE
- FLUSH TABLES WITH READ LOCK
- SET GLOBAL READ_ONLY = ON
as there is no point in locking explicitly
a PERFORMANCE_SCHEMA table.
An example of PERFORMANCE_SCHEMA tables are:
- performance_schema.*
which *are* updated (but not using the handler interface)
even when there is either
a GLOBAL READ LOCK or a GLOBAL READ_ONLY in effect.
User queries do not write directly to these tables
(there are exceptions for SETUP_* tables).
The server implementation perform writes.
WL#3984 (Revise locking of mysql.general_log and mysql.slow_log) Bug#25422 (Hang with log tables) Bug 17876 (Truncating mysql.slow_log in a SP after using cursor locks the thread) Bug 23044 (Warnings on flush of a log table) Bug 29129 (Resetting general_log while the GLOBAL READ LOCK is set causes a deadlock) Prior to this fix, the server would hang when performing concurrent ALTER TABLE or TRUNCATE TABLE statements against the LOG TABLES, which are mysql.general_log and mysql.slow_log. The root cause traces to the following code: in sql_base.cc, open_table() if (table->in_use != thd) { /* wait_for_condition will unlock LOCK_open for us */ wait_for_condition(thd, &LOCK_open, &COND_refresh); } The problem with this code is that the current implementation of the LOGGER creates 'fake' THD objects, like - Log_to_csv_event_handler::general_log_thd - Log_to_csv_event_handler::slow_log_thd which are not associated to a real thread running in the server, so that waiting for these non-existing threads to release table locks cause the dead lock. In general, the design of Log_to_csv_event_handler does not fit into the general architecture of the server, so that the concept of general_log_thd and slow_log_thd has to be abandoned: - this implementation does not work with table locking - it will not work with commands like SHOW PROCESSLIST - having the log tables always opened does not integrate well with DDL operations / FLUSH TABLES / SET GLOBAL READ_ONLY With this patch, the fundamental design of the LOGGER has been changed to: - always open and close a log table when writing a log - remove totally the usage of fake THD objects - clarify how locking of log tables is implemented in general. See WL#3984 for details related to the new locking design. Additional changes (misc bugs exposed and fixed): 1) mysqldump which would ignore some tables in dump_all_tables_in_db(), but forget to ignore the same in dump_all_views_in_db(). 2) mysqldump would also issue an empty "LOCK TABLE" command when all the tables to lock are to be ignored (numrows == 0), instead of not issuing the query. 3) Internal errors handlers could intercept errors but not warnings (see sql_error.cc). 4) Implementing a nested call to open tables, for the performance schema tables, exposed an existing bug in remove_table_from_cache(), which would perform: in_use->some_tables_deleted=1; against another thread, without any consideration about thread locking. This call inside remove_table_from_cache() was not required anyway, since calling mysql_lock_abort() takes care of aborting -- cleanly -- threads that might hold a lock on a table. This line (in_use->some_tables_deleted=1) has been removed.
2007-07-27 08:31:06 +02:00
Performance tables are cached in the table cache.
*/
TABLE_CATEGORY_PERFORMANCE=6
WL#3984 (Revise locking of mysql.general_log and mysql.slow_log) Bug#25422 (Hang with log tables) Bug 17876 (Truncating mysql.slow_log in a SP after using cursor locks the thread) Bug 23044 (Warnings on flush of a log table) Bug 29129 (Resetting general_log while the GLOBAL READ LOCK is set causes a deadlock) Prior to this fix, the server would hang when performing concurrent ALTER TABLE or TRUNCATE TABLE statements against the LOG TABLES, which are mysql.general_log and mysql.slow_log. The root cause traces to the following code: in sql_base.cc, open_table() if (table->in_use != thd) { /* wait_for_condition will unlock LOCK_open for us */ wait_for_condition(thd, &LOCK_open, &COND_refresh); } The problem with this code is that the current implementation of the LOGGER creates 'fake' THD objects, like - Log_to_csv_event_handler::general_log_thd - Log_to_csv_event_handler::slow_log_thd which are not associated to a real thread running in the server, so that waiting for these non-existing threads to release table locks cause the dead lock. In general, the design of Log_to_csv_event_handler does not fit into the general architecture of the server, so that the concept of general_log_thd and slow_log_thd has to be abandoned: - this implementation does not work with table locking - it will not work with commands like SHOW PROCESSLIST - having the log tables always opened does not integrate well with DDL operations / FLUSH TABLES / SET GLOBAL READ_ONLY With this patch, the fundamental design of the LOGGER has been changed to: - always open and close a log table when writing a log - remove totally the usage of fake THD objects - clarify how locking of log tables is implemented in general. See WL#3984 for details related to the new locking design. Additional changes (misc bugs exposed and fixed): 1) mysqldump which would ignore some tables in dump_all_tables_in_db(), but forget to ignore the same in dump_all_views_in_db(). 2) mysqldump would also issue an empty "LOCK TABLE" command when all the tables to lock are to be ignored (numrows == 0), instead of not issuing the query. 3) Internal errors handlers could intercept errors but not warnings (see sql_error.cc). 4) Implementing a nested call to open tables, for the performance schema tables, exposed an existing bug in remove_table_from_cache(), which would perform: in_use->some_tables_deleted=1; against another thread, without any consideration about thread locking. This call inside remove_table_from_cache() was not required anyway, since calling mysql_lock_abort() takes care of aborting -- cleanly -- threads that might hold a lock on a table. This line (in_use->some_tables_deleted=1) has been removed.
2007-07-27 08:31:06 +02:00
};
typedef enum enum_table_category TABLE_CATEGORY;
TABLE_CATEGORY get_table_category(const LEX_STRING *db,
const LEX_STRING *name);
Initial import of WL#3726 "DDL locking for all metadata objects". Backport of: ------------------------------------------------------------ revno: 2630.4.1 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Fri 2008-05-23 17:54:03 +0400 message: WL#3726 "DDL locking for all metadata objects". After review fixes in progress. ------------------------------------------------------------ This is the first patch in series. It transforms the metadata locking subsystem to use a dedicated module (mdl.h,cc). No significant changes in the locking protocol. The import passes the test suite with the exception of deprecated/removed 6.0 features, and MERGE tables. The latter are subject to a fix by WL#4144. Unfortunately, the original changeset comments got lost in a merge, thus this import has its own (largely insufficient) comments. This patch fixes Bug#25144 "replication / binlog with view breaks". Warning: this patch introduces an incompatible change: Under LOCK TABLES, it's no longer possible to FLUSH a table that was not locked for WRITE. Under LOCK TABLES, it's no longer possible to DROP a table or VIEW that was not locked for WRITE. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.2 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 14:03:45 +0400 message: WL#3726 "DDL locking for all metadata objects". After review fixes in progress. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.3 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 14:08:51 +0400 message: WL#3726 "DDL locking for all metadata objects" Fixed failing Windows builds by adding mdl.cc to the lists of files needed to build server/libmysqld on Windows. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.4 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 21:57:58 +0400 message: WL#3726 "DDL locking for all metadata objects". Fix for assert failures in kill.test which occured when one tried to kill ALTER TABLE statement on merge table while it was waiting in wait_while_table_is_used() for other connections to close this table. These assert failures stemmed from the fact that cleanup code in this case assumed that temporary table representing new version of table was open with adding to THD::temporary_tables list while code which were opening this temporary table wasn't always fulfilling this. This patch changes code that opens new version of table to always do this linking in. It also streamlines cleanup process for cases when error occurs while we have new version of table open. ****** WL#3726 "DDL locking for all metadata objects" Add libmysqld/mdl.cc to .bzrignore. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.6 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sun 2008-05-25 00:33:22 +0400 message: WL#3726 "DDL locking for all metadata objects". Addition to the fix of assert failures in kill.test caused by changes for this worklog. Make sure we close the new table only once.
2009-11-30 16:55:03 +01:00
struct TABLE_share;
extern ulong refresh_version;
typedef struct st_table_field_type
{
LEX_STRING name;
LEX_STRING type;
LEX_STRING cset;
} TABLE_FIELD_TYPE;
typedef struct st_table_field_def
{
uint count;
const TABLE_FIELD_TYPE *field;
} TABLE_FIELD_DEF;
class Table_check_intact
{
protected:
virtual void report_error(uint code, const char *fmt, ...)= 0;
public:
Table_check_intact() {}
virtual ~Table_check_intact() {}
/** Checks whether a table is intact. */
bool check(TABLE *table, const TABLE_FIELD_DEF *table_def);
};
/*
This structure is shared between different table objects. There is one
instance of table share per one table in the database.
*/
struct TABLE_SHARE
{
TABLE_SHARE() {} /* Remove gcc warning */
WL#3984 (Revise locking of mysql.general_log and mysql.slow_log) Bug#25422 (Hang with log tables) Bug 17876 (Truncating mysql.slow_log in a SP after using cursor locks the thread) Bug 23044 (Warnings on flush of a log table) Bug 29129 (Resetting general_log while the GLOBAL READ LOCK is set causes a deadlock) Prior to this fix, the server would hang when performing concurrent ALTER TABLE or TRUNCATE TABLE statements against the LOG TABLES, which are mysql.general_log and mysql.slow_log. The root cause traces to the following code: in sql_base.cc, open_table() if (table->in_use != thd) { /* wait_for_condition will unlock LOCK_open for us */ wait_for_condition(thd, &LOCK_open, &COND_refresh); } The problem with this code is that the current implementation of the LOGGER creates 'fake' THD objects, like - Log_to_csv_event_handler::general_log_thd - Log_to_csv_event_handler::slow_log_thd which are not associated to a real thread running in the server, so that waiting for these non-existing threads to release table locks cause the dead lock. In general, the design of Log_to_csv_event_handler does not fit into the general architecture of the server, so that the concept of general_log_thd and slow_log_thd has to be abandoned: - this implementation does not work with table locking - it will not work with commands like SHOW PROCESSLIST - having the log tables always opened does not integrate well with DDL operations / FLUSH TABLES / SET GLOBAL READ_ONLY With this patch, the fundamental design of the LOGGER has been changed to: - always open and close a log table when writing a log - remove totally the usage of fake THD objects - clarify how locking of log tables is implemented in general. See WL#3984 for details related to the new locking design. Additional changes (misc bugs exposed and fixed): 1) mysqldump which would ignore some tables in dump_all_tables_in_db(), but forget to ignore the same in dump_all_views_in_db(). 2) mysqldump would also issue an empty "LOCK TABLE" command when all the tables to lock are to be ignored (numrows == 0), instead of not issuing the query. 3) Internal errors handlers could intercept errors but not warnings (see sql_error.cc). 4) Implementing a nested call to open tables, for the performance schema tables, exposed an existing bug in remove_table_from_cache(), which would perform: in_use->some_tables_deleted=1; against another thread, without any consideration about thread locking. This call inside remove_table_from_cache() was not required anyway, since calling mysql_lock_abort() takes care of aborting -- cleanly -- threads that might hold a lock on a table. This line (in_use->some_tables_deleted=1) has been removed.
2007-07-27 08:31:06 +02:00
/** Category of this table. */
TABLE_CATEGORY table_category;
2004-06-25 15:52:01 +02:00
/* hash of field names (contains pointers to elements of field array) */
HASH name_hash; /* hash of field names */
MEM_ROOT mem_root;
2000-07-31 21:29:14 +02:00
TYPELIB keynames; /* Pointers to keynames */
TYPELIB fieldnames; /* Pointer to fieldnames */
TYPELIB *intervals; /* pointer to interval info */
2010-02-03 14:43:03 +01:00
mysql_mutex_t LOCK_ha_data; /* To protect access to ha_data */
TABLE_SHARE *next, **prev; /* Link to unused shares */
Initial import of WL#3726 "DDL locking for all metadata objects". Backport of: ------------------------------------------------------------ revno: 2630.4.1 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Fri 2008-05-23 17:54:03 +0400 message: WL#3726 "DDL locking for all metadata objects". After review fixes in progress. ------------------------------------------------------------ This is the first patch in series. It transforms the metadata locking subsystem to use a dedicated module (mdl.h,cc). No significant changes in the locking protocol. The import passes the test suite with the exception of deprecated/removed 6.0 features, and MERGE tables. The latter are subject to a fix by WL#4144. Unfortunately, the original changeset comments got lost in a merge, thus this import has its own (largely insufficient) comments. This patch fixes Bug#25144 "replication / binlog with view breaks". Warning: this patch introduces an incompatible change: Under LOCK TABLES, it's no longer possible to FLUSH a table that was not locked for WRITE. Under LOCK TABLES, it's no longer possible to DROP a table or VIEW that was not locked for WRITE. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.2 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 14:03:45 +0400 message: WL#3726 "DDL locking for all metadata objects". After review fixes in progress. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.3 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 14:08:51 +0400 message: WL#3726 "DDL locking for all metadata objects" Fixed failing Windows builds by adding mdl.cc to the lists of files needed to build server/libmysqld on Windows. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.4 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 21:57:58 +0400 message: WL#3726 "DDL locking for all metadata objects". Fix for assert failures in kill.test which occured when one tried to kill ALTER TABLE statement on merge table while it was waiting in wait_while_table_is_used() for other connections to close this table. These assert failures stemmed from the fact that cleanup code in this case assumed that temporary table representing new version of table was open with adding to THD::temporary_tables list while code which were opening this temporary table wasn't always fulfilling this. This patch changes code that opens new version of table to always do this linking in. It also streamlines cleanup process for cases when error occurs while we have new version of table open. ****** WL#3726 "DDL locking for all metadata objects" Add libmysqld/mdl.cc to .bzrignore. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.6 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sun 2008-05-25 00:33:22 +0400 message: WL#3726 "DDL locking for all metadata objects". Addition to the fix of assert failures in kill.test caused by changes for this worklog. Make sure we close the new table only once.
2009-11-30 16:55:03 +01:00
/*
Doubly-linked (back-linked) lists of used and unused TABLE objects
for this share.
*/
I_P_List <TABLE, TABLE_share> used_tables;
I_P_List <TABLE, TABLE_share> free_tables;
/* The following is copied to each TABLE on OPEN */
Field **field;
Field **found_next_number_field;
Field *timestamp_field; /* Used only during open */
KEY *key_info; /* data of keys in database */
uint *blob_field; /* Index to blobs in Field arrray*/
WL#3817: Simplify string / memory area types and make things more consistent (first part) The following type conversions was done: - Changed byte to uchar - Changed gptr to uchar* - Change my_string to char * - Change my_size_t to size_t - Change size_s to size_t Removed declaration of byte, gptr, my_string, my_size_t and size_s. Following function parameter changes was done: - All string functions in mysys/strings was changed to use size_t instead of uint for string lengths. - All read()/write() functions changed to use size_t (including vio). - All protocoll functions changed to use size_t instead of uint - Functions that used a pointer to a string length was changed to use size_t* - Changed malloc(), free() and related functions from using gptr to use void * as this requires fewer casts in the code and is more in line with how the standard functions work. - Added extra length argument to dirname_part() to return the length of the created string. - Changed (at least) following functions to take uchar* as argument: - db_dump() - my_net_write() - net_write_command() - net_store_data() - DBUG_DUMP() - decimal2bin() & bin2decimal() - Changed my_compress() and my_uncompress() to use size_t. Changed one argument to my_uncompress() from a pointer to a value as we only return one value (makes function easier to use). - Changed type of 'pack_data' argument to packfrm() to avoid casts. - Changed in readfrm() and writefrom(), ha_discover and handler::discover() the type for argument 'frmdata' to uchar** to avoid casts. - Changed most Field functions to use uchar* instead of char* (reduced a lot of casts). - Changed field->val_xxx(xxx, new_ptr) to take const pointers. Other changes: - Removed a lot of not needed casts - Added a few new cast required by other changes - Added some cast to my_multi_malloc() arguments for safety (as string lengths needs to be uint, not size_t). - Fixed all calls to hash-get-key functions to use size_t*. (Needed to be done explicitely as this conflict was often hided by casting the function to hash_get_key). - Changed some buffers to memory regions to uchar* to avoid casts. - Changed some string lengths from uint to size_t. - Changed field->ptr to be uchar* instead of char*. This allowed us to get rid of a lot of casts. - Some changes from true -> TRUE, false -> FALSE, unsigned char -> uchar - Include zlib.h in some files as we needed declaration of crc32() - Changed MY_FILE_ERROR to be (size_t) -1. - Changed many variables to hold the result of my_read() / my_write() to be size_t. This was needed to properly detect errors (which are returned as (size_t) -1). - Removed some very old VMS code - Changed packfrm()/unpackfrm() to not be depending on uint size (portability fix) - Removed windows specific code to restore cursor position as this causes slowdown on windows and we should not mix read() and pread() calls anyway as this is not thread safe. Updated function comment to reflect this. Changed function that depended on original behavior of my_pwrite() to itself restore the cursor position (one such case). - Added some missing checking of return value of malloc(). - Changed definition of MOD_PAD_CHAR_TO_FULL_LENGTH to avoid 'long' overflow. - Changed type of table_def::m_size from my_size_t to ulong to reflect that m_size is the number of elements in the array, not a string/memory length. - Moved THD::max_row_length() to table.cc (as it's not depending on THD). Inlined max_row_length_blob() into this function. - More function comments - Fixed some compiler warnings when compiled without partitions. - Removed setting of LEX_STRING() arguments in declaration (portability fix). - Some trivial indentation/variable name changes. - Some trivial code simplifications: - Replaced some calls to alloc_root + memcpy to use strmake_root()/strdup_root(). - Changed some calls from memdup() to strmake() (Safety fix) - Simpler loops in client-simple.c
2007-05-10 11:59:39 +02:00
uchar *default_values; /* row with default values */
LEX_STRING comment; /* Comment about table */
CHARSET_INFO *table_charset; /* Default charset of string fields */
This changeset is largely a handler cleanup changeset (WL#3281), but includes fixes and cleanups that was found necessary while testing the handler changes Changes that requires code changes in other code of other storage engines. (Note that all changes are very straightforward and one should find all issues by compiling a --debug build and fixing all compiler errors and all asserts in field.cc while running the test suite), - New optional handler function introduced: reset() This is called after every DML statement to make it easy for a handler to statement specific cleanups. (The only case it's not called is if force the file to be closed) - handler::extra(HA_EXTRA_RESET) is removed. Code that was there before should be moved to handler::reset() - table->read_set contains a bitmap over all columns that are needed in the query. read_row() and similar functions only needs to read these columns - table->write_set contains a bitmap over all columns that will be updated in the query. write_row() and update_row() only needs to update these columns. The above bitmaps should now be up to date in all context (including ALTER TABLE, filesort()). The handler is informed of any changes to the bitmap after fix_fields() by calling the virtual function handler::column_bitmaps_signal(). If the handler does caching of these bitmaps (instead of using table->read_set, table->write_set), it should redo the caching in this code. as the signal() may be sent several times, it's probably best to set a variable in the signal and redo the caching on read_row() / write_row() if the variable was set. - Removed the read_set and write_set bitmap objects from the handler class - Removed all column bit handling functions from the handler class. (Now one instead uses the normal bitmap functions in my_bitmap.c instead of handler dedicated bitmap functions) - field->query_id is removed. One should instead instead check table->read_set and table->write_set if a field is used in the query. - handler::extra(HA_EXTRA_RETRIVE_ALL_COLS) and handler::extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY) are removed. One should now instead use table->read_set to check for which columns to retrieve. - If a handler needs to call Field->val() or Field->store() on columns that are not used in the query, one should install a temporary all-columns-used map while doing so. For this, we provide the following functions: my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set); field->val(); dbug_tmp_restore_column_map(table->read_set, old_map); and similar for the write map: my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->write_set); field->val(); dbug_tmp_restore_column_map(table->write_set, old_map); If this is not done, you will sooner or later hit a DBUG_ASSERT in the field store() / val() functions. (For not DBUG binaries, the dbug_tmp_restore_column_map() and dbug_tmp_restore_column_map() are inline dummy functions and should be optimized away be the compiler). - If one needs to temporary set the column map for all binaries (and not just to avoid the DBUG_ASSERT() in the Field::store() / Field::val() methods) one should use the functions tmp_use_all_columns() and tmp_restore_column_map() instead of the above dbug_ variants. - All 'status' fields in the handler base class (like records, data_file_length etc) are now stored in a 'stats' struct. This makes it easier to know what status variables are provided by the base handler. This requires some trivial variable names in the extra() function. - New virtual function handler::records(). This is called to optimize COUNT(*) if (handler::table_flags() & HA_HAS_RECORDS()) is true. (stats.records is not supposed to be an exact value. It's only has to be 'reasonable enough' for the optimizer to be able to choose a good optimization path). - Non virtual handler::init() function added for caching of virtual constants from engine. - Removed has_transactions() virtual method. Now one should instead return HA_NO_TRANSACTIONS in table_flags() if the table handler DOES NOT support transactions. - The 'xxxx_create_handler()' function now has a MEM_ROOT_root argument that is to be used with 'new handler_name()' to allocate the handler in the right area. The xxxx_create_handler() function is also responsible for any initialization of the object before returning. For example, one should change: static handler *myisam_create_handler(TABLE_SHARE *table) { return new ha_myisam(table); } -> static handler *myisam_create_handler(TABLE_SHARE *table, MEM_ROOT *mem_root) { return new (mem_root) ha_myisam(table); } - New optional virtual function: use_hidden_primary_key(). This is called in case of an update/delete when (table_flags() and HA_PRIMARY_KEY_REQUIRED_FOR_DELETE) is defined but we don't have a primary key. This allows the handler to take precisions in remembering any hidden primary key to able to update/delete any found row. The default handler marks all columns to be read. - handler::table_flags() now returns a ulonglong (to allow for more flags). - New/changed table_flags() - HA_HAS_RECORDS Set if ::records() is supported - HA_NO_TRANSACTIONS Set if engine doesn't support transactions - HA_PRIMARY_KEY_REQUIRED_FOR_DELETE Set if we should mark all primary key columns for read when reading rows as part of a DELETE statement. If there is no primary key, all columns are marked for read. - HA_PARTIAL_COLUMN_READ Set if engine will not read all columns in some cases (based on table->read_set) - HA_PRIMARY_KEY_ALLOW_RANDOM_ACCESS Renamed to HA_PRIMARY_KEY_REQUIRED_FOR_POSITION. - HA_DUPP_POS Renamed to HA_DUPLICATE_POS - HA_REQUIRES_KEY_COLUMNS_FOR_DELETE Set this if we should mark ALL key columns for read when when reading rows as part of a DELETE statement. In case of an update we will mark all keys for read for which key part changed value. - HA_STATS_RECORDS_IS_EXACT Set this if stats.records is exact. (This saves us some extra records() calls when optimizing COUNT(*)) - Removed table_flags() - HA_NOT_EXACT_COUNT Now one should instead use HA_HAS_RECORDS if handler::records() gives an exact count() and HA_STATS_RECORDS_IS_EXACT if stats.records is exact. - HA_READ_RND_SAME Removed (no one supported this one) - Removed not needed functions ha_retrieve_all_cols() and ha_retrieve_all_pk() - Renamed handler::dupp_pos to handler::dup_pos - Removed not used variable handler::sortkey Upper level handler changes: - ha_reset() now does some overall checks and calls ::reset() - ha_table_flags() added. This is a cached version of table_flags(). The cache is updated on engine creation time and updated on open. MySQL level changes (not obvious from the above): - DBUG_ASSERT() added to check that column usage matches what is set in the column usage bit maps. (This found a LOT of bugs in current column marking code). - In 5.1 before, all used columns was marked in read_set and only updated columns was marked in write_set. Now we only mark columns for which we need a value in read_set. - Column bitmaps are created in open_binary_frm() and open_table_from_share(). (Before this was in table.cc) - handler::table_flags() calls are replaced with handler::ha_table_flags() - For calling field->val() you must have the corresponding bit set in table->read_set. For calling field->store() you must have the corresponding bit set in table->write_set. (There are asserts in all store()/val() functions to catch wrong usage) - thd->set_query_id is renamed to thd->mark_used_columns and instead of setting this to an integer value, this has now the values: MARK_COLUMNS_NONE, MARK_COLUMNS_READ, MARK_COLUMNS_WRITE Changed also all variables named 'set_query_id' to mark_used_columns. - In filesort() we now inform the handler of exactly which columns are needed doing the sort and choosing the rows. - The TABLE_SHARE object has a 'all_set' column bitmap one can use when one needs a column bitmap with all columns set. (This is used for table->use_all_columns() and other places) - The TABLE object has 3 column bitmaps: - def_read_set Default bitmap for columns to be read - def_write_set Default bitmap for columns to be written - tmp_set Can be used as a temporary bitmap when needed. The table object has also two pointer to bitmaps read_set and write_set that the handler should use to find out which columns are used in which way. - count() optimization now calls handler::records() instead of using handler->stats.records (if (table_flags() & HA_HAS_RECORDS) is true). - Added extra argument to Item::walk() to indicate if we should also traverse sub queries. - Added TABLE parameter to cp_buffer_from_ref() - Don't close tables created with CREATE ... SELECT but keep them in the table cache. (Faster usage of newly created tables). New interfaces: - table->clear_column_bitmaps() to initialize the bitmaps for tables at start of new statements. - table->column_bitmaps_set() to set up new column bitmaps and signal the handler about this. - table->column_bitmaps_set_no_signal() for some few cases where we need to setup new column bitmaps but don't signal the handler (as the handler has already been signaled about these before). Used for the momement only in opt_range.cc when doing ROR scans. - table->use_all_columns() to install a bitmap where all columns are marked as use in the read and the write set. - table->default_column_bitmaps() to install the normal read and write column bitmaps, but not signaling the handler about this. This is mainly used when creating TABLE instances. - table->mark_columns_needed_for_delete(), table->mark_columns_needed_for_delete() and table->mark_columns_needed_for_insert() to allow us to put additional columns in column usage maps if handler so requires. (The handler indicates what it neads in handler->table_flags()) - table->prepare_for_position() to allow us to tell handler that it needs to read primary key parts to be able to store them in future table->position() calls. (This replaces the table->file->ha_retrieve_all_pk function) - table->mark_auto_increment_column() to tell handler are going to update columns part of any auto_increment key. - table->mark_columns_used_by_index() to mark all columns that is part of an index. It will also send extra(HA_EXTRA_KEYREAD) to handler to allow it to quickly know that it only needs to read colums that are part of the key. (The handler can also use the column map for detecting this, but simpler/faster handler can just monitor the extra() call). - table->mark_columns_used_by_index_no_reset() to in addition to other columns, also mark all columns that is used by the given key. - table->restore_column_maps_after_mark_index() to restore to default column maps after a call to table->mark_columns_used_by_index(). - New item function register_field_in_read_map(), for marking used columns in table->read_map. Used by filesort() to mark all used columns - Maintain in TABLE->merge_keys set of all keys that are used in query. (Simplices some optimization loops) - Maintain Field->part_of_key_not_clustered which is like Field->part_of_key but the field in the clustered key is not assumed to be part of all index. (used in opt_range.cc for faster loops) - dbug_tmp_use_all_columns(), dbug_tmp_restore_column_map() tmp_use_all_columns() and tmp_restore_column_map() functions to temporally mark all columns as usable. The 'dbug_' version is primarily intended inside a handler when it wants to just call Field:store() & Field::val() functions, but don't need the column maps set for any other usage. (ie:: bitmap_is_set() is never called) - We can't use compare_records() to skip updates for handlers that returns a partial column set and the read_set doesn't cover all columns in the write set. The reason for this is that if we have a column marked only for write we can't in the MySQL level know if the value changed or not. The reason this worked before was that MySQL marked all to be written columns as also to be read. The new 'optimal' bitmaps exposed this 'hidden bug'. - open_table_from_share() does not anymore setup temporary MEM_ROOT object as a thread specific variable for the handler. Instead we send the to-be-used MEMROOT to get_new_handler(). (Simpler, faster code) Bugs fixed: - Column marking was not done correctly in a lot of cases. (ALTER TABLE, when using triggers, auto_increment fields etc) (Could potentially result in wrong values inserted in table handlers relying on that the old column maps or field->set_query_id was correct) Especially when it comes to triggers, there may be cases where the old code would cause lost/wrong values for NDB and/or InnoDB tables. - Split thd->options flag OPTION_STATUS_NO_TRANS_UPDATE to two flags: OPTION_STATUS_NO_TRANS_UPDATE and OPTION_KEEP_LOG. This allowed me to remove some wrong warnings about: "Some non-transactional changed tables couldn't be rolled back" - Fixed handling of INSERT .. SELECT and CREATE ... SELECT that wrongly reset (thd->options & OPTION_STATUS_NO_TRANS_UPDATE) which caused us to loose some warnings about "Some non-transactional changed tables couldn't be rolled back") - Fixed use of uninitialized memory in ha_ndbcluster.cc::delete_table() which could cause delete_table to report random failures. - Fixed core dumps for some tests when running with --debug - Added missing FN_LIBCHAR in mysql_rm_tmp_tables() (This has probably caused us to not properly remove temporary files after crash) - slow_logs was not properly initialized, which could maybe cause extra/lost entries in slow log. - If we get an duplicate row on insert, change column map to read and write all columns while retrying the operation. This is required by the definition of REPLACE and also ensures that fields that are only part of UPDATE are properly handled. This fixed a bug in NDB and REPLACE where REPLACE wrongly copied some column values from the replaced row. - For table handler that doesn't support NULL in keys, we would give an error when creating a primary key with NULL fields, even after the fields has been automaticly converted to NOT NULL. - Creating a primary key on a SPATIAL key, would fail if field was not declared as NOT NULL. Cleanups: - Removed not used condition argument to setup_tables - Removed not needed item function reset_query_id_processor(). - Field->add_index is removed. Now this is instead maintained in (field->flags & FIELD_IN_ADD_INDEX) - Field->fieldnr is removed (use field->field_index instead) - New argument to filesort() to indicate that it should return a set of row pointers (not used columns). This allowed me to remove some references to sql_command in filesort and should also enable us to return column results in some cases where we couldn't before. - Changed column bitmap handling in opt_range.cc to be aligned with TABLE bitmap, which allowed me to use bitmap functions instead of looping over all fields to create some needed bitmaps. (Faster and smaller code) - Broke up found too long lines - Moved some variable declaration at start of function for better code readability. - Removed some not used arguments from functions. (setup_fields(), mysql_prepare_insert_check_table()) - setup_fields() now takes an enum instead of an int for marking columns usage. - For internal temporary tables, use handler::write_row(), handler::delete_row() and handler::update_row() instead of handler::ha_xxxx() for faster execution. - Changed some constants to enum's and define's. - Using separate column read and write sets allows for easier checking of timestamp field was set by statement. - Remove calls to free_io_cache() as this is now done automaticly in ha_reset() - Don't build table->normalized_path as this is now identical to table->path (after bar's fixes to convert filenames) - Fixed some missed DBUG_PRINT(.."%lx") to use "0x%lx" to make it easier to do comparision with the 'convert-dbug-for-diff' tool. Things left to do in 5.1: - We wrongly log failed CREATE TABLE ... SELECT in some cases when using row based logging (as shown by testcase binlog_row_mix_innodb_myisam.result) Mats has promised to look into this. - Test that my fix for CREATE TABLE ... SELECT is indeed correct. (I added several test cases for this, but in this case it's better that someone else also tests this throughly). Lars has promosed to do this.
2006-06-04 17:52:22 +02:00
MY_BITMAP all_set;
/*
Key which is used for looking-up table in table cache and in the list
of thread's temporary tables. Has the form of:
"database_name\0table_name\0" + optional part for temporary tables.
Note that all three 'table_cache_key', 'db' and 'table_name' members
must be set (and be non-zero) for tables in table cache. They also
should correspond to each other.
To ensure this one can use set_table_cache() methods.
*/
LEX_STRING table_cache_key;
LEX_STRING db; /* Pointer to db */
LEX_STRING table_name; /* Table name (for open) */
LEX_STRING path; /* Path to .frm file (from datadir) */
LEX_STRING normalized_path; /* unpack_filename(path) */
LEX_STRING connect_string;
/*
Set of keys in use, implemented as a Bitmap.
Excludes keys disabled by ALTER TABLE ... DISABLE KEYS.
*/
key_map keys_in_use;
key_map keys_for_keyread;
ha_rows min_rows, max_rows; /* create information */
ulong avg_row_length; /* create information */
ulong raid_chunksize;
ulong version, mysql_version;
ulong timestamp_offset; /* Set to offset+1 of record */
ulong reclength; /* Recordlength */
plugin_ref db_plugin; /* storage engine plugin */
inline handlerton *db_type() const /* table_type for handler */
{
// DBUG_ASSERT(db_plugin);
return db_plugin ? plugin_data(db_plugin, handlerton*) : NULL;
}
2000-07-31 21:29:14 +02:00
enum row_type row_type; /* How rows are stored */
enum tmp_table_type tmp_table;
enum enum_ha_unused unused1;
enum enum_ha_unused unused2;
uint ref_count; /* How many TABLE objects uses this */
uint open_count; /* Number of tables in open list */
uint blob_ptr_size; /* 4 or 8 */
uint key_block_size; /* create key_block_size, if used */
uint null_bytes, last_null_bit_pos;
uint fields; /* Number of fields */
uint rec_buff_length; /* Size of table->record[] buffer */
uint keys, key_parts;
uint max_key_length, max_unique_length, total_key_length;
uint uniques; /* Number of UNIQUE index */
uint null_fields; /* number of null fields */
uint blob_fields; /* number of blob fields */
uint timestamp_field_offset; /* Field number for timestamp field */
uint varchar_fields; /* number of varchar fields */
2000-07-31 21:29:14 +02:00
uint db_create_options; /* Create options from database */
uint db_options_in_use; /* Options in use */
uint db_record_offset; /* if HA_REC_IN_SEQ */
uint raid_type, raid_chunks;
uint rowid_field_offset; /* Field_nr +1 to rowid field */
/* Index of auto-updated TIMESTAMP field in field array */
uint primary_key;
uint next_number_index; /* autoincrement key number */
uint next_number_key_offset; /* autoinc keypart offset in a key */
uint next_number_keypart; /* autoinc keypart number in a key */
uint error, open_errno, errarg; /* error from open_table_def() */
This changeset is largely a handler cleanup changeset (WL#3281), but includes fixes and cleanups that was found necessary while testing the handler changes Changes that requires code changes in other code of other storage engines. (Note that all changes are very straightforward and one should find all issues by compiling a --debug build and fixing all compiler errors and all asserts in field.cc while running the test suite), - New optional handler function introduced: reset() This is called after every DML statement to make it easy for a handler to statement specific cleanups. (The only case it's not called is if force the file to be closed) - handler::extra(HA_EXTRA_RESET) is removed. Code that was there before should be moved to handler::reset() - table->read_set contains a bitmap over all columns that are needed in the query. read_row() and similar functions only needs to read these columns - table->write_set contains a bitmap over all columns that will be updated in the query. write_row() and update_row() only needs to update these columns. The above bitmaps should now be up to date in all context (including ALTER TABLE, filesort()). The handler is informed of any changes to the bitmap after fix_fields() by calling the virtual function handler::column_bitmaps_signal(). If the handler does caching of these bitmaps (instead of using table->read_set, table->write_set), it should redo the caching in this code. as the signal() may be sent several times, it's probably best to set a variable in the signal and redo the caching on read_row() / write_row() if the variable was set. - Removed the read_set and write_set bitmap objects from the handler class - Removed all column bit handling functions from the handler class. (Now one instead uses the normal bitmap functions in my_bitmap.c instead of handler dedicated bitmap functions) - field->query_id is removed. One should instead instead check table->read_set and table->write_set if a field is used in the query. - handler::extra(HA_EXTRA_RETRIVE_ALL_COLS) and handler::extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY) are removed. One should now instead use table->read_set to check for which columns to retrieve. - If a handler needs to call Field->val() or Field->store() on columns that are not used in the query, one should install a temporary all-columns-used map while doing so. For this, we provide the following functions: my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set); field->val(); dbug_tmp_restore_column_map(table->read_set, old_map); and similar for the write map: my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->write_set); field->val(); dbug_tmp_restore_column_map(table->write_set, old_map); If this is not done, you will sooner or later hit a DBUG_ASSERT in the field store() / val() functions. (For not DBUG binaries, the dbug_tmp_restore_column_map() and dbug_tmp_restore_column_map() are inline dummy functions and should be optimized away be the compiler). - If one needs to temporary set the column map for all binaries (and not just to avoid the DBUG_ASSERT() in the Field::store() / Field::val() methods) one should use the functions tmp_use_all_columns() and tmp_restore_column_map() instead of the above dbug_ variants. - All 'status' fields in the handler base class (like records, data_file_length etc) are now stored in a 'stats' struct. This makes it easier to know what status variables are provided by the base handler. This requires some trivial variable names in the extra() function. - New virtual function handler::records(). This is called to optimize COUNT(*) if (handler::table_flags() & HA_HAS_RECORDS()) is true. (stats.records is not supposed to be an exact value. It's only has to be 'reasonable enough' for the optimizer to be able to choose a good optimization path). - Non virtual handler::init() function added for caching of virtual constants from engine. - Removed has_transactions() virtual method. Now one should instead return HA_NO_TRANSACTIONS in table_flags() if the table handler DOES NOT support transactions. - The 'xxxx_create_handler()' function now has a MEM_ROOT_root argument that is to be used with 'new handler_name()' to allocate the handler in the right area. The xxxx_create_handler() function is also responsible for any initialization of the object before returning. For example, one should change: static handler *myisam_create_handler(TABLE_SHARE *table) { return new ha_myisam(table); } -> static handler *myisam_create_handler(TABLE_SHARE *table, MEM_ROOT *mem_root) { return new (mem_root) ha_myisam(table); } - New optional virtual function: use_hidden_primary_key(). This is called in case of an update/delete when (table_flags() and HA_PRIMARY_KEY_REQUIRED_FOR_DELETE) is defined but we don't have a primary key. This allows the handler to take precisions in remembering any hidden primary key to able to update/delete any found row. The default handler marks all columns to be read. - handler::table_flags() now returns a ulonglong (to allow for more flags). - New/changed table_flags() - HA_HAS_RECORDS Set if ::records() is supported - HA_NO_TRANSACTIONS Set if engine doesn't support transactions - HA_PRIMARY_KEY_REQUIRED_FOR_DELETE Set if we should mark all primary key columns for read when reading rows as part of a DELETE statement. If there is no primary key, all columns are marked for read. - HA_PARTIAL_COLUMN_READ Set if engine will not read all columns in some cases (based on table->read_set) - HA_PRIMARY_KEY_ALLOW_RANDOM_ACCESS Renamed to HA_PRIMARY_KEY_REQUIRED_FOR_POSITION. - HA_DUPP_POS Renamed to HA_DUPLICATE_POS - HA_REQUIRES_KEY_COLUMNS_FOR_DELETE Set this if we should mark ALL key columns for read when when reading rows as part of a DELETE statement. In case of an update we will mark all keys for read for which key part changed value. - HA_STATS_RECORDS_IS_EXACT Set this if stats.records is exact. (This saves us some extra records() calls when optimizing COUNT(*)) - Removed table_flags() - HA_NOT_EXACT_COUNT Now one should instead use HA_HAS_RECORDS if handler::records() gives an exact count() and HA_STATS_RECORDS_IS_EXACT if stats.records is exact. - HA_READ_RND_SAME Removed (no one supported this one) - Removed not needed functions ha_retrieve_all_cols() and ha_retrieve_all_pk() - Renamed handler::dupp_pos to handler::dup_pos - Removed not used variable handler::sortkey Upper level handler changes: - ha_reset() now does some overall checks and calls ::reset() - ha_table_flags() added. This is a cached version of table_flags(). The cache is updated on engine creation time and updated on open. MySQL level changes (not obvious from the above): - DBUG_ASSERT() added to check that column usage matches what is set in the column usage bit maps. (This found a LOT of bugs in current column marking code). - In 5.1 before, all used columns was marked in read_set and only updated columns was marked in write_set. Now we only mark columns for which we need a value in read_set. - Column bitmaps are created in open_binary_frm() and open_table_from_share(). (Before this was in table.cc) - handler::table_flags() calls are replaced with handler::ha_table_flags() - For calling field->val() you must have the corresponding bit set in table->read_set. For calling field->store() you must have the corresponding bit set in table->write_set. (There are asserts in all store()/val() functions to catch wrong usage) - thd->set_query_id is renamed to thd->mark_used_columns and instead of setting this to an integer value, this has now the values: MARK_COLUMNS_NONE, MARK_COLUMNS_READ, MARK_COLUMNS_WRITE Changed also all variables named 'set_query_id' to mark_used_columns. - In filesort() we now inform the handler of exactly which columns are needed doing the sort and choosing the rows. - The TABLE_SHARE object has a 'all_set' column bitmap one can use when one needs a column bitmap with all columns set. (This is used for table->use_all_columns() and other places) - The TABLE object has 3 column bitmaps: - def_read_set Default bitmap for columns to be read - def_write_set Default bitmap for columns to be written - tmp_set Can be used as a temporary bitmap when needed. The table object has also two pointer to bitmaps read_set and write_set that the handler should use to find out which columns are used in which way. - count() optimization now calls handler::records() instead of using handler->stats.records (if (table_flags() & HA_HAS_RECORDS) is true). - Added extra argument to Item::walk() to indicate if we should also traverse sub queries. - Added TABLE parameter to cp_buffer_from_ref() - Don't close tables created with CREATE ... SELECT but keep them in the table cache. (Faster usage of newly created tables). New interfaces: - table->clear_column_bitmaps() to initialize the bitmaps for tables at start of new statements. - table->column_bitmaps_set() to set up new column bitmaps and signal the handler about this. - table->column_bitmaps_set_no_signal() for some few cases where we need to setup new column bitmaps but don't signal the handler (as the handler has already been signaled about these before). Used for the momement only in opt_range.cc when doing ROR scans. - table->use_all_columns() to install a bitmap where all columns are marked as use in the read and the write set. - table->default_column_bitmaps() to install the normal read and write column bitmaps, but not signaling the handler about this. This is mainly used when creating TABLE instances. - table->mark_columns_needed_for_delete(), table->mark_columns_needed_for_delete() and table->mark_columns_needed_for_insert() to allow us to put additional columns in column usage maps if handler so requires. (The handler indicates what it neads in handler->table_flags()) - table->prepare_for_position() to allow us to tell handler that it needs to read primary key parts to be able to store them in future table->position() calls. (This replaces the table->file->ha_retrieve_all_pk function) - table->mark_auto_increment_column() to tell handler are going to update columns part of any auto_increment key. - table->mark_columns_used_by_index() to mark all columns that is part of an index. It will also send extra(HA_EXTRA_KEYREAD) to handler to allow it to quickly know that it only needs to read colums that are part of the key. (The handler can also use the column map for detecting this, but simpler/faster handler can just monitor the extra() call). - table->mark_columns_used_by_index_no_reset() to in addition to other columns, also mark all columns that is used by the given key. - table->restore_column_maps_after_mark_index() to restore to default column maps after a call to table->mark_columns_used_by_index(). - New item function register_field_in_read_map(), for marking used columns in table->read_map. Used by filesort() to mark all used columns - Maintain in TABLE->merge_keys set of all keys that are used in query. (Simplices some optimization loops) - Maintain Field->part_of_key_not_clustered which is like Field->part_of_key but the field in the clustered key is not assumed to be part of all index. (used in opt_range.cc for faster loops) - dbug_tmp_use_all_columns(), dbug_tmp_restore_column_map() tmp_use_all_columns() and tmp_restore_column_map() functions to temporally mark all columns as usable. The 'dbug_' version is primarily intended inside a handler when it wants to just call Field:store() & Field::val() functions, but don't need the column maps set for any other usage. (ie:: bitmap_is_set() is never called) - We can't use compare_records() to skip updates for handlers that returns a partial column set and the read_set doesn't cover all columns in the write set. The reason for this is that if we have a column marked only for write we can't in the MySQL level know if the value changed or not. The reason this worked before was that MySQL marked all to be written columns as also to be read. The new 'optimal' bitmaps exposed this 'hidden bug'. - open_table_from_share() does not anymore setup temporary MEM_ROOT object as a thread specific variable for the handler. Instead we send the to-be-used MEMROOT to get_new_handler(). (Simpler, faster code) Bugs fixed: - Column marking was not done correctly in a lot of cases. (ALTER TABLE, when using triggers, auto_increment fields etc) (Could potentially result in wrong values inserted in table handlers relying on that the old column maps or field->set_query_id was correct) Especially when it comes to triggers, there may be cases where the old code would cause lost/wrong values for NDB and/or InnoDB tables. - Split thd->options flag OPTION_STATUS_NO_TRANS_UPDATE to two flags: OPTION_STATUS_NO_TRANS_UPDATE and OPTION_KEEP_LOG. This allowed me to remove some wrong warnings about: "Some non-transactional changed tables couldn't be rolled back" - Fixed handling of INSERT .. SELECT and CREATE ... SELECT that wrongly reset (thd->options & OPTION_STATUS_NO_TRANS_UPDATE) which caused us to loose some warnings about "Some non-transactional changed tables couldn't be rolled back") - Fixed use of uninitialized memory in ha_ndbcluster.cc::delete_table() which could cause delete_table to report random failures. - Fixed core dumps for some tests when running with --debug - Added missing FN_LIBCHAR in mysql_rm_tmp_tables() (This has probably caused us to not properly remove temporary files after crash) - slow_logs was not properly initialized, which could maybe cause extra/lost entries in slow log. - If we get an duplicate row on insert, change column map to read and write all columns while retrying the operation. This is required by the definition of REPLACE and also ensures that fields that are only part of UPDATE are properly handled. This fixed a bug in NDB and REPLACE where REPLACE wrongly copied some column values from the replaced row. - For table handler that doesn't support NULL in keys, we would give an error when creating a primary key with NULL fields, even after the fields has been automaticly converted to NOT NULL. - Creating a primary key on a SPATIAL key, would fail if field was not declared as NOT NULL. Cleanups: - Removed not used condition argument to setup_tables - Removed not needed item function reset_query_id_processor(). - Field->add_index is removed. Now this is instead maintained in (field->flags & FIELD_IN_ADD_INDEX) - Field->fieldnr is removed (use field->field_index instead) - New argument to filesort() to indicate that it should return a set of row pointers (not used columns). This allowed me to remove some references to sql_command in filesort and should also enable us to return column results in some cases where we couldn't before. - Changed column bitmap handling in opt_range.cc to be aligned with TABLE bitmap, which allowed me to use bitmap functions instead of looping over all fields to create some needed bitmaps. (Faster and smaller code) - Broke up found too long lines - Moved some variable declaration at start of function for better code readability. - Removed some not used arguments from functions. (setup_fields(), mysql_prepare_insert_check_table()) - setup_fields() now takes an enum instead of an int for marking columns usage. - For internal temporary tables, use handler::write_row(), handler::delete_row() and handler::update_row() instead of handler::ha_xxxx() for faster execution. - Changed some constants to enum's and define's. - Using separate column read and write sets allows for easier checking of timestamp field was set by statement. - Remove calls to free_io_cache() as this is now done automaticly in ha_reset() - Don't build table->normalized_path as this is now identical to table->path (after bar's fixes to convert filenames) - Fixed some missed DBUG_PRINT(.."%lx") to use "0x%lx" to make it easier to do comparision with the 'convert-dbug-for-diff' tool. Things left to do in 5.1: - We wrongly log failed CREATE TABLE ... SELECT in some cases when using row based logging (as shown by testcase binlog_row_mix_innodb_myisam.result) Mats has promised to look into this. - Test that my fix for CREATE TABLE ... SELECT is indeed correct. (I added several test cases for this, but in this case it's better that someone else also tests this throughly). Lars has promosed to do this.
2006-06-04 17:52:22 +02:00
uint column_bitmap_size;
uchar frm_version;
bool null_field_first;
bool system; /* Set if system table (one record) */
bool crypted; /* If .frm file is crypted */
bool db_low_byte_first; /* Portable row format */
bool crashed;
bool is_view;
ulong table_map_id; /* for row-based replication */
ulonglong table_map_version;
/*
Cache for row-based replication table share checks that does not
need to be repeated. Possible values are: -1 when cache value is
not calculated yet, 0 when table *shall not* be replicated, 1 when
table *may* be replicated.
*/
int cached_row_logging_check;
#ifdef WITH_PARTITION_STORAGE_ENGINE
Bug#38804: Query deadlock causes all tables to be inaccessible. Problem was a mutex added in bug n 27405 for solving a problem with auto_increment in partitioned innodb tables. (in ha_partition::write_row over partitions file->ha_write_row) Solution is to use the patch for bug#33479, which refines the usage of mutexes for auto_increment. Backport of bug-33479 from 6.0: Bug-33479: auto_increment failures in partitioning Several problems with auto_increment in partitioning (with MyISAM, InnoDB. Locking issues, not handling multi-row INSERTs properly etc.) Changed the auto_increment handling for partitioning: Added a ha_data variable in table_share for storage engine specific data such as auto_increment value handling in partitioning, also see WL 4305 and using the ha_data->mutex to lock around read + update. The idea is this: Store the table's reserved auto_increment value in the TABLE_SHARE and use a mutex to, lock it for reading and updating it and unlocking it, in one block. Only accessing all partitions when it is not initialized. Also allow reservations of ranges, and if no one has done a reservation afterwards, lower the reservation to what was actually used after the statement is done (via release_auto_increment from WL 3146). The lock is kept from the first reservation if it is statement based replication and a multi-row INSERT statement where the number of candidate rows to insert is not known in advance (like INSERT SELECT, LOAD DATA, unlike INSERT VALUES (row1), (row2),,(rowN)). This should also lead to better concurrancy (no need to have a mutex protection around write_row in all cases) and work with any local storage engine.
2008-09-08 15:30:01 +02:00
/** @todo: Move into *ha_data for partitioning */
bool auto_partitioned;
const char *partition_info;
uint partition_info_len;
uint partition_info_buffer_size;
const char *part_state;
uint part_state_len;
handlerton *default_part_db_type;
#endif
/**
Cache the checked structure of this table.
The pointer data is used to describe the structure that
a instance of the table must have. Each element of the
array specifies a field that must exist on the table.
The pointer is cached in order to perform the check only
once -- when the table is loaded from the disk.
*/
const TABLE_FIELD_DEF *table_field_def_cache;
Bug#38804: Query deadlock causes all tables to be inaccessible. Problem was a mutex added in bug n 27405 for solving a problem with auto_increment in partitioned innodb tables. (in ha_partition::write_row over partitions file->ha_write_row) Solution is to use the patch for bug#33479, which refines the usage of mutexes for auto_increment. Backport of bug-33479 from 6.0: Bug-33479: auto_increment failures in partitioning Several problems with auto_increment in partitioning (with MyISAM, InnoDB. Locking issues, not handling multi-row INSERTs properly etc.) Changed the auto_increment handling for partitioning: Added a ha_data variable in table_share for storage engine specific data such as auto_increment value handling in partitioning, also see WL 4305 and using the ha_data->mutex to lock around read + update. The idea is this: Store the table's reserved auto_increment value in the TABLE_SHARE and use a mutex to, lock it for reading and updating it and unlocking it, in one block. Only accessing all partitions when it is not initialized. Also allow reservations of ranges, and if no one has done a reservation afterwards, lower the reservation to what was actually used after the statement is done (via release_auto_increment from WL 3146). The lock is kept from the first reservation if it is statement based replication and a multi-row INSERT statement where the number of candidate rows to insert is not known in advance (like INSERT SELECT, LOAD DATA, unlike INSERT VALUES (row1), (row2),,(rowN)). This should also lead to better concurrancy (no need to have a mutex protection around write_row in all cases) and work with any local storage engine.
2008-09-08 15:30:01 +02:00
/** place to store storage engine specific data */
void *ha_data;
void (*ha_data_destroy)(void *); /* An optional destructor for ha_data */
Bug#38804: Query deadlock causes all tables to be inaccessible. Problem was a mutex added in bug n 27405 for solving a problem with auto_increment in partitioned innodb tables. (in ha_partition::write_row over partitions file->ha_write_row) Solution is to use the patch for bug#33479, which refines the usage of mutexes for auto_increment. Backport of bug-33479 from 6.0: Bug-33479: auto_increment failures in partitioning Several problems with auto_increment in partitioning (with MyISAM, InnoDB. Locking issues, not handling multi-row INSERTs properly etc.) Changed the auto_increment handling for partitioning: Added a ha_data variable in table_share for storage engine specific data such as auto_increment value handling in partitioning, also see WL 4305 and using the ha_data->mutex to lock around read + update. The idea is this: Store the table's reserved auto_increment value in the TABLE_SHARE and use a mutex to, lock it for reading and updating it and unlocking it, in one block. Only accessing all partitions when it is not initialized. Also allow reservations of ranges, and if no one has done a reservation afterwards, lower the reservation to what was actually used after the statement is done (via release_auto_increment from WL 3146). The lock is kept from the first reservation if it is statement based replication and a multi-row INSERT statement where the number of candidate rows to insert is not known in advance (like INSERT SELECT, LOAD DATA, unlike INSERT VALUES (row1), (row2),,(rowN)). This should also lead to better concurrancy (no need to have a mutex protection around write_row in all cases) and work with any local storage engine.
2008-09-08 15:30:01 +02:00
/** Instrumentation for this table share. */
PSI_table_share *m_psi;
/*
Set share's table cache key and update its db and table name appropriately.
SYNOPSIS
set_table_cache_key()
key_buff Buffer with already built table cache key to be
referenced from share.
key_length Key length.
NOTES
Since 'key_buff' buffer will be referenced from share it should has same
life-time as share itself.
This method automatically ensures that TABLE_SHARE::table_name/db have
appropriate values by using table cache key as their source.
*/
void set_table_cache_key(char *key_buff, uint key_length)
{
table_cache_key.str= key_buff;
table_cache_key.length= key_length;
/*
Let us use the fact that the key is "db/0/table_name/0" + optional
part for temporary tables.
*/
db.str= table_cache_key.str;
db.length= strlen(db.str);
table_name.str= db.str + db.length + 1;
table_name.length= strlen(table_name.str);
}
/*
Set share's table cache key and update its db and table name appropriately.
SYNOPSIS
set_table_cache_key()
key_buff Buffer to be used as storage for table cache key
(should be at least key_length bytes).
key Value for table cache key.
key_length Key length.
NOTE
Since 'key_buff' buffer will be used as storage for table cache key
it should has same life-time as share itself.
*/
void set_table_cache_key(char *key_buff, const char *key, uint key_length)
{
memcpy(key_buff, key, key_length);
set_table_cache_key(key_buff, key_length);
}
WL#3984 (Revise locking of mysql.general_log and mysql.slow_log) Bug#25422 (Hang with log tables) Bug 17876 (Truncating mysql.slow_log in a SP after using cursor locks the thread) Bug 23044 (Warnings on flush of a log table) Bug 29129 (Resetting general_log while the GLOBAL READ LOCK is set causes a deadlock) Prior to this fix, the server would hang when performing concurrent ALTER TABLE or TRUNCATE TABLE statements against the LOG TABLES, which are mysql.general_log and mysql.slow_log. The root cause traces to the following code: in sql_base.cc, open_table() if (table->in_use != thd) { /* wait_for_condition will unlock LOCK_open for us */ wait_for_condition(thd, &LOCK_open, &COND_refresh); } The problem with this code is that the current implementation of the LOGGER creates 'fake' THD objects, like - Log_to_csv_event_handler::general_log_thd - Log_to_csv_event_handler::slow_log_thd which are not associated to a real thread running in the server, so that waiting for these non-existing threads to release table locks cause the dead lock. In general, the design of Log_to_csv_event_handler does not fit into the general architecture of the server, so that the concept of general_log_thd and slow_log_thd has to be abandoned: - this implementation does not work with table locking - it will not work with commands like SHOW PROCESSLIST - having the log tables always opened does not integrate well with DDL operations / FLUSH TABLES / SET GLOBAL READ_ONLY With this patch, the fundamental design of the LOGGER has been changed to: - always open and close a log table when writing a log - remove totally the usage of fake THD objects - clarify how locking of log tables is implemented in general. See WL#3984 for details related to the new locking design. Additional changes (misc bugs exposed and fixed): 1) mysqldump which would ignore some tables in dump_all_tables_in_db(), but forget to ignore the same in dump_all_views_in_db(). 2) mysqldump would also issue an empty "LOCK TABLE" command when all the tables to lock are to be ignored (numrows == 0), instead of not issuing the query. 3) Internal errors handlers could intercept errors but not warnings (see sql_error.cc). 4) Implementing a nested call to open tables, for the performance schema tables, exposed an existing bug in remove_table_from_cache(), which would perform: in_use->some_tables_deleted=1; against another thread, without any consideration about thread locking. This call inside remove_table_from_cache() was not required anyway, since calling mysql_lock_abort() takes care of aborting -- cleanly -- threads that might hold a lock on a table. This line (in_use->some_tables_deleted=1) has been removed.
2007-07-27 08:31:06 +02:00
inline bool honor_global_locks()
{
return ((table_category == TABLE_CATEGORY_USER)
|| (table_category == TABLE_CATEGORY_SYSTEM));
}
inline bool require_write_privileges()
{
return (table_category == TABLE_CATEGORY_LOG);
WL#3984 (Revise locking of mysql.general_log and mysql.slow_log) Bug#25422 (Hang with log tables) Bug 17876 (Truncating mysql.slow_log in a SP after using cursor locks the thread) Bug 23044 (Warnings on flush of a log table) Bug 29129 (Resetting general_log while the GLOBAL READ LOCK is set causes a deadlock) Prior to this fix, the server would hang when performing concurrent ALTER TABLE or TRUNCATE TABLE statements against the LOG TABLES, which are mysql.general_log and mysql.slow_log. The root cause traces to the following code: in sql_base.cc, open_table() if (table->in_use != thd) { /* wait_for_condition will unlock LOCK_open for us */ wait_for_condition(thd, &LOCK_open, &COND_refresh); } The problem with this code is that the current implementation of the LOGGER creates 'fake' THD objects, like - Log_to_csv_event_handler::general_log_thd - Log_to_csv_event_handler::slow_log_thd which are not associated to a real thread running in the server, so that waiting for these non-existing threads to release table locks cause the dead lock. In general, the design of Log_to_csv_event_handler does not fit into the general architecture of the server, so that the concept of general_log_thd and slow_log_thd has to be abandoned: - this implementation does not work with table locking - it will not work with commands like SHOW PROCESSLIST - having the log tables always opened does not integrate well with DDL operations / FLUSH TABLES / SET GLOBAL READ_ONLY With this patch, the fundamental design of the LOGGER has been changed to: - always open and close a log table when writing a log - remove totally the usage of fake THD objects - clarify how locking of log tables is implemented in general. See WL#3984 for details related to the new locking design. Additional changes (misc bugs exposed and fixed): 1) mysqldump which would ignore some tables in dump_all_tables_in_db(), but forget to ignore the same in dump_all_views_in_db(). 2) mysqldump would also issue an empty "LOCK TABLE" command when all the tables to lock are to be ignored (numrows == 0), instead of not issuing the query. 3) Internal errors handlers could intercept errors but not warnings (see sql_error.cc). 4) Implementing a nested call to open tables, for the performance schema tables, exposed an existing bug in remove_table_from_cache(), which would perform: in_use->some_tables_deleted=1; against another thread, without any consideration about thread locking. This call inside remove_table_from_cache() was not required anyway, since calling mysql_lock_abort() takes care of aborting -- cleanly -- threads that might hold a lock on a table. This line (in_use->some_tables_deleted=1) has been removed.
2007-07-27 08:31:06 +02:00
}
Bug#26379 - Combination of FLUSH TABLE and REPAIR TABLE corrupts a MERGE table Bug 26867 - LOCK TABLES + REPAIR + merge table result in memory/cpu hogging Bug 26377 - Deadlock with MERGE and FLUSH TABLE Bug 25038 - Waiting TRUNCATE Bug 25700 - merge base tables get corrupted by optimize/analyze/repair table Bug 30275 - Merge tables: flush tables or unlock tables causes server to crash Bug 19627 - temporary merge table locking Bug 27660 - Falcon: merge table possible Bug 30273 - merge tables: Can't lock file (errno: 155) The problems were: Bug 26379 - Combination of FLUSH TABLE and REPAIR TABLE corrupts a MERGE table 1. A thread trying to lock a MERGE table performs busy waiting while REPAIR TABLE or a similar table administration task is ongoing on one or more of its MyISAM tables. 2. A thread trying to lock a MERGE table performs busy waiting until all threads that did REPAIR TABLE or similar table administration tasks on one or more of its MyISAM tables in LOCK TABLES segments do UNLOCK TABLES. The difference against problem #1 is that the busy waiting takes place *after* the administration task. It is terminated by UNLOCK TABLES only. 3. Two FLUSH TABLES within a LOCK TABLES segment can invalidate the lock. This does *not* require a MERGE table. The first FLUSH TABLES can be replaced by any statement that requires other threads to reopen the table. In 5.0 and 5.1 a single FLUSH TABLES can provoke the problem. Bug 26867 - LOCK TABLES + REPAIR + merge table result in memory/cpu hogging Trying DML on a MERGE table, which has a child locked and repaired by another thread, made an infinite loop in the server. Bug 26377 - Deadlock with MERGE and FLUSH TABLE Locking a MERGE table and its children in parent-child order and flushing the child deadlocked the server. Bug 25038 - Waiting TRUNCATE Truncating a MERGE child, while the MERGE table was in use, let the truncate fail instead of waiting for the table to become free. Bug 25700 - merge base tables get corrupted by optimize/analyze/repair table Repairing a child of an open MERGE table corrupted the child. It was necessary to FLUSH the child first. Bug 30275 - Merge tables: flush tables or unlock tables causes server to crash Flushing and optimizing locked MERGE children crashed the server. Bug 19627 - temporary merge table locking Use of a temporary MERGE table with non-temporary children could corrupt the children. Temporary tables are never locked. So we do now prohibit non-temporary chidlren of a temporary MERGE table. Bug 27660 - Falcon: merge table possible It was possible to create a MERGE table with non-MyISAM children. Bug 30273 - merge tables: Can't lock file (errno: 155) This was a Windows-only bug. Table administration statements sometimes failed with "Can't lock file (errno: 155)". These bugs are fixed by a new implementation of MERGE table open. When opening a MERGE table in open_tables() we do now add the child tables to the list of tables to be opened by open_tables() (the "query_list"). The children are not opened in the handler at this stage. After opening the parent, open_tables() opens each child from the now extended query_list. When the last child is opened, we remove the children from the query_list again and attach the children to the parent. This behaves similar to the old open. However it does not open the MyISAM tables directly, but grabs them from the already open children. When closing a MERGE table in close_thread_table() we detach the children only. Closing of the children is done implicitly because they are in thd->open_tables. For more detail see the comment at the top of ha_myisammrg.cc. Changed from open_ltable() to open_and_lock_tables() in all places that can be relevant for MERGE tables. The latter can handle tables added to the list on the fly. When open_ltable() was used in a loop over a list of tables, the list must be temporarily terminated after every table for open_and_lock_tables(). table_list->required_type is set to FRMTYPE_TABLE to avoid open of special tables. Handling of derived tables is suppressed. These details are handled by the new function open_n_lock_single_table(), which has nearly the same signature as open_ltable() and can replace it in most cases. In reopen_tables() some of the tables open by a thread can be closed and reopened. When a MERGE child is affected, the parent must be closed and reopened too. Closing of the parent is forced before the first child is closed. Reopen happens in the order of thd->open_tables. MERGE parents do not attach their children automatically at open. This is done after all tables are reopened. So all children are open when attaching them. Special lock handling like mysql_lock_abort() or mysql_lock_remove() needs to be suppressed for MERGE children or forwarded to the parent. This depends on the situation. In loops over all open tables one suppresses child lock handling. When a single table is touched, forwarding is done. Behavioral changes: =================== This patch changes the behavior of temporary MERGE tables. Temporary MERGE must have temporary children. The old behavior was wrong. A temporary table is not locked. Hence even non-temporary children were not locked. See Bug 19627 - temporary merge table locking. You cannot change the union list of a non-temporary MERGE table when LOCK TABLES is in effect. The following does *not* work: CREATE TABLE m1 ... ENGINE=MRG_MYISAM ...; LOCK TABLES t1 WRITE, t2 WRITE, m1 WRITE; ALTER TABLE m1 ... UNION=(t1,t2) ...; However, you can do this with a temporary MERGE table. You cannot create a MERGE table with CREATE ... SELECT, neither as a temporary MERGE table, nor as a non-temporary MERGE table. CREATE TABLE m1 ... ENGINE=MRG_MYISAM ... SELECT ...; Gives error message: table is not BASE TABLE.
2007-11-15 20:25:43 +01:00
inline ulong get_table_def_version()
{
return table_map_id;
}
/*
Must all TABLEs be reopened?
*/
inline bool needs_reopen()
{
return version != refresh_version;
}
/**
Convert unrelated members of TABLE_SHARE to one enum
representing its type.
@todo perhaps we need to have a member instead of a function.
*/
enum enum_table_ref_type get_table_ref_type() const
{
if (is_view)
return TABLE_REF_VIEW;
switch (tmp_table) {
case NO_TMP_TABLE:
return TABLE_REF_BASE_TABLE;
case SYSTEM_TMP_TABLE:
return TABLE_REF_I_S_TABLE;
default:
return TABLE_REF_TMP_TABLE;
}
}
/**
Return a table metadata version.
* for base tables, we return table_map_id.
It is assigned from a global counter incremented for each
new table loaded into the table definition cache (TDC).
* for temporary tables it's table_map_id again. But for
temporary tables table_map_id is assigned from
thd->query_id. The latter is assigned from a thread local
counter incremented for every new SQL statement. Since
temporary tables are thread-local, each temporary table
gets a unique id.
* for everything else (views, information schema tables),
the version id is zero.
This choice of version id is a large compromise
to have a working prepared statement validation in 5.1. In
future version ids will be persistent, as described in WL#4180.
Let's try to explain why and how this limited solution allows
to validate prepared statements.
Firstly, sets (in mathematical sense) of version numbers
never intersect for different table types. Therefore,
version id of a temporary table is never compared with
a version id of a view, and vice versa.
Secondly, for base tables, we know that each DDL flushes the
respective share from the TDC. This ensures that whenever
a table is altered or dropped and recreated, it gets a new
version id.
Unfortunately, since elements of the TDC are also flushed on
LRU basis, this choice of version ids leads to false positives.
E.g. when the TDC size is too small, we may have a SELECT
* FROM INFORMATION_SCHEMA.TABLES flush all its elements, which
in turn will lead to a validation error and a subsequent
reprepare of all prepared statements. This is
considered acceptable, since as long as prepared statements are
automatically reprepared, spurious invalidation is only
a performance hit. Besides, no better simple solution exists.
For temporary tables, using thd->query_id ensures that if
a temporary table was altered or recreated, a new version id is
assigned. This suits validation needs very well and will perhaps
never change.
Metadata of information schema tables never changes.
Thus we can safely assume 0 for a good enough version id.
Views are a special and tricky case. A view is always inlined
into the parse tree of a prepared statement at prepare.
Thus, when we execute a prepared statement, the parse tree
will not get modified even if the view is replaced with another
view. Therefore, we can safely choose 0 for version id of
views and effectively never invalidate a prepared statement
when a view definition is altered. Note, that this leads to
wrong binary log in statement-based replication, since we log
prepared statement execution in form Query_log_events
containing conventional statements. But since there is no
metadata locking for views, the very same problem exists for
conventional statements alone, as reported in Bug#25144. The only
difference between prepared and conventional execution is,
effectively, that for prepared statements the race condition
window is much wider.
In 6.0 we plan to support view metadata locking (WL#3726) and
extend table definition cache to cache views (WL#4298).
When this is done, views will be handled in the same fashion
as the base tables.
Finally, by taking into account table type, we always
track that a change has taken place when a view is replaced
with a base table, a base table is replaced with a temporary
table and so on.
@sa TABLE_LIST::is_table_ref_id_equal()
*/
ulong get_table_ref_version() const
{
return (tmp_table == SYSTEM_TMP_TABLE || is_view) ? 0 : table_map_id;
}
};
/* Information for one open table */
enum index_hint_type
{
INDEX_HINT_IGNORE,
INDEX_HINT_USE,
INDEX_HINT_FORCE
};
struct TABLE
{
TABLE() {} /* Remove gcc warning */
TABLE_SHARE *s;
handler *file;
TABLE *next, *prev;
Initial import of WL#3726 "DDL locking for all metadata objects". Backport of: ------------------------------------------------------------ revno: 2630.4.1 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Fri 2008-05-23 17:54:03 +0400 message: WL#3726 "DDL locking for all metadata objects". After review fixes in progress. ------------------------------------------------------------ This is the first patch in series. It transforms the metadata locking subsystem to use a dedicated module (mdl.h,cc). No significant changes in the locking protocol. The import passes the test suite with the exception of deprecated/removed 6.0 features, and MERGE tables. The latter are subject to a fix by WL#4144. Unfortunately, the original changeset comments got lost in a merge, thus this import has its own (largely insufficient) comments. This patch fixes Bug#25144 "replication / binlog with view breaks". Warning: this patch introduces an incompatible change: Under LOCK TABLES, it's no longer possible to FLUSH a table that was not locked for WRITE. Under LOCK TABLES, it's no longer possible to DROP a table or VIEW that was not locked for WRITE. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.2 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 14:03:45 +0400 message: WL#3726 "DDL locking for all metadata objects". After review fixes in progress. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.3 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 14:08:51 +0400 message: WL#3726 "DDL locking for all metadata objects" Fixed failing Windows builds by adding mdl.cc to the lists of files needed to build server/libmysqld on Windows. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.4 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 21:57:58 +0400 message: WL#3726 "DDL locking for all metadata objects". Fix for assert failures in kill.test which occured when one tried to kill ALTER TABLE statement on merge table while it was waiting in wait_while_table_is_used() for other connections to close this table. These assert failures stemmed from the fact that cleanup code in this case assumed that temporary table representing new version of table was open with adding to THD::temporary_tables list while code which were opening this temporary table wasn't always fulfilling this. This patch changes code that opens new version of table to always do this linking in. It also streamlines cleanup process for cases when error occurs while we have new version of table open. ****** WL#3726 "DDL locking for all metadata objects" Add libmysqld/mdl.cc to .bzrignore. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.6 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sun 2008-05-25 00:33:22 +0400 message: WL#3726 "DDL locking for all metadata objects". Addition to the fix of assert failures in kill.test caused by changes for this worklog. Make sure we close the new table only once.
2009-11-30 16:55:03 +01:00
private:
/**
Links for the lists of used/unused TABLE objects for this share.
Declared as private to avoid direct manipulation with those objects.
One should use methods of I_P_List template instead.
*/
TABLE *share_next, **share_prev;
friend struct TABLE_share;
public:
THD *in_use; /* Which thread uses this */
Field **field; /* Pointer to fields */
WL#3817: Simplify string / memory area types and make things more consistent (first part) The following type conversions was done: - Changed byte to uchar - Changed gptr to uchar* - Change my_string to char * - Change my_size_t to size_t - Change size_s to size_t Removed declaration of byte, gptr, my_string, my_size_t and size_s. Following function parameter changes was done: - All string functions in mysys/strings was changed to use size_t instead of uint for string lengths. - All read()/write() functions changed to use size_t (including vio). - All protocoll functions changed to use size_t instead of uint - Functions that used a pointer to a string length was changed to use size_t* - Changed malloc(), free() and related functions from using gptr to use void * as this requires fewer casts in the code and is more in line with how the standard functions work. - Added extra length argument to dirname_part() to return the length of the created string. - Changed (at least) following functions to take uchar* as argument: - db_dump() - my_net_write() - net_write_command() - net_store_data() - DBUG_DUMP() - decimal2bin() & bin2decimal() - Changed my_compress() and my_uncompress() to use size_t. Changed one argument to my_uncompress() from a pointer to a value as we only return one value (makes function easier to use). - Changed type of 'pack_data' argument to packfrm() to avoid casts. - Changed in readfrm() and writefrom(), ha_discover and handler::discover() the type for argument 'frmdata' to uchar** to avoid casts. - Changed most Field functions to use uchar* instead of char* (reduced a lot of casts). - Changed field->val_xxx(xxx, new_ptr) to take const pointers. Other changes: - Removed a lot of not needed casts - Added a few new cast required by other changes - Added some cast to my_multi_malloc() arguments for safety (as string lengths needs to be uint, not size_t). - Fixed all calls to hash-get-key functions to use size_t*. (Needed to be done explicitely as this conflict was often hided by casting the function to hash_get_key). - Changed some buffers to memory regions to uchar* to avoid casts. - Changed some string lengths from uint to size_t. - Changed field->ptr to be uchar* instead of char*. This allowed us to get rid of a lot of casts. - Some changes from true -> TRUE, false -> FALSE, unsigned char -> uchar - Include zlib.h in some files as we needed declaration of crc32() - Changed MY_FILE_ERROR to be (size_t) -1. - Changed many variables to hold the result of my_read() / my_write() to be size_t. This was needed to properly detect errors (which are returned as (size_t) -1). - Removed some very old VMS code - Changed packfrm()/unpackfrm() to not be depending on uint size (portability fix) - Removed windows specific code to restore cursor position as this causes slowdown on windows and we should not mix read() and pread() calls anyway as this is not thread safe. Updated function comment to reflect this. Changed function that depended on original behavior of my_pwrite() to itself restore the cursor position (one such case). - Added some missing checking of return value of malloc(). - Changed definition of MOD_PAD_CHAR_TO_FULL_LENGTH to avoid 'long' overflow. - Changed type of table_def::m_size from my_size_t to ulong to reflect that m_size is the number of elements in the array, not a string/memory length. - Moved THD::max_row_length() to table.cc (as it's not depending on THD). Inlined max_row_length_blob() into this function. - More function comments - Fixed some compiler warnings when compiled without partitions. - Removed setting of LEX_STRING() arguments in declaration (portability fix). - Some trivial indentation/variable name changes. - Some trivial code simplifications: - Replaced some calls to alloc_root + memcpy to use strmake_root()/strdup_root(). - Changed some calls from memdup() to strmake() (Safety fix) - Simpler loops in client-simple.c
2007-05-10 11:59:39 +02:00
uchar *record[2]; /* Pointer to records */
uchar *write_row_record; /* Used as optimisation in
THD::write_row */
WL#3817: Simplify string / memory area types and make things more consistent (first part) The following type conversions was done: - Changed byte to uchar - Changed gptr to uchar* - Change my_string to char * - Change my_size_t to size_t - Change size_s to size_t Removed declaration of byte, gptr, my_string, my_size_t and size_s. Following function parameter changes was done: - All string functions in mysys/strings was changed to use size_t instead of uint for string lengths. - All read()/write() functions changed to use size_t (including vio). - All protocoll functions changed to use size_t instead of uint - Functions that used a pointer to a string length was changed to use size_t* - Changed malloc(), free() and related functions from using gptr to use void * as this requires fewer casts in the code and is more in line with how the standard functions work. - Added extra length argument to dirname_part() to return the length of the created string. - Changed (at least) following functions to take uchar* as argument: - db_dump() - my_net_write() - net_write_command() - net_store_data() - DBUG_DUMP() - decimal2bin() & bin2decimal() - Changed my_compress() and my_uncompress() to use size_t. Changed one argument to my_uncompress() from a pointer to a value as we only return one value (makes function easier to use). - Changed type of 'pack_data' argument to packfrm() to avoid casts. - Changed in readfrm() and writefrom(), ha_discover and handler::discover() the type for argument 'frmdata' to uchar** to avoid casts. - Changed most Field functions to use uchar* instead of char* (reduced a lot of casts). - Changed field->val_xxx(xxx, new_ptr) to take const pointers. Other changes: - Removed a lot of not needed casts - Added a few new cast required by other changes - Added some cast to my_multi_malloc() arguments for safety (as string lengths needs to be uint, not size_t). - Fixed all calls to hash-get-key functions to use size_t*. (Needed to be done explicitely as this conflict was often hided by casting the function to hash_get_key). - Changed some buffers to memory regions to uchar* to avoid casts. - Changed some string lengths from uint to size_t. - Changed field->ptr to be uchar* instead of char*. This allowed us to get rid of a lot of casts. - Some changes from true -> TRUE, false -> FALSE, unsigned char -> uchar - Include zlib.h in some files as we needed declaration of crc32() - Changed MY_FILE_ERROR to be (size_t) -1. - Changed many variables to hold the result of my_read() / my_write() to be size_t. This was needed to properly detect errors (which are returned as (size_t) -1). - Removed some very old VMS code - Changed packfrm()/unpackfrm() to not be depending on uint size (portability fix) - Removed windows specific code to restore cursor position as this causes slowdown on windows and we should not mix read() and pread() calls anyway as this is not thread safe. Updated function comment to reflect this. Changed function that depended on original behavior of my_pwrite() to itself restore the cursor position (one such case). - Added some missing checking of return value of malloc(). - Changed definition of MOD_PAD_CHAR_TO_FULL_LENGTH to avoid 'long' overflow. - Changed type of table_def::m_size from my_size_t to ulong to reflect that m_size is the number of elements in the array, not a string/memory length. - Moved THD::max_row_length() to table.cc (as it's not depending on THD). Inlined max_row_length_blob() into this function. - More function comments - Fixed some compiler warnings when compiled without partitions. - Removed setting of LEX_STRING() arguments in declaration (portability fix). - Some trivial indentation/variable name changes. - Some trivial code simplifications: - Replaced some calls to alloc_root + memcpy to use strmake_root()/strdup_root(). - Changed some calls from memdup() to strmake() (Safety fix) - Simpler loops in client-simple.c
2007-05-10 11:59:39 +02:00
uchar *insert_values; /* used by INSERT ... UPDATE */
/*
Map of keys that can be used to retrieve all data from this table
needed by the query without reading the row.
*/
key_map covering_keys;
key_map quick_keys, merge_keys;
/*
A set of keys that can be used in the query that references this
table.
All indexes disabled on the table's TABLE_SHARE (see TABLE::s) will be
subtracted from this set upon instantiation. Thus for any TABLE t it holds
that t.keys_in_use_for_query is a subset of t.s.keys_in_use. Generally we
must not introduce any new keys here (see setup_tables).
The set is implemented as a bitmap.
*/
key_map keys_in_use_for_query;
/* Map of keys that can be used to calculate GROUP BY without sorting */
key_map keys_in_use_for_group_by;
/* Map of keys that can be used to calculate ORDER BY without sorting */
key_map keys_in_use_for_order_by;
KEY *key_info; /* data of keys in database */
Field *next_number_field; /* Set if next_number is activated */
Field *found_next_number_field; /* Set on open */
Field_timestamp *timestamp_field;
/* Table's triggers, 0 if there are no of them */
Table_triggers_list *triggers;
TABLE_LIST *pos_in_table_list;/* Element referring to this table */
/* Position in thd->locked_table_list under LOCK TABLES */
TABLE_LIST *pos_in_locked_tables;
ORDER *group;
const char *alias; /* alias or table name */
uchar *null_flags;
This changeset is largely a handler cleanup changeset (WL#3281), but includes fixes and cleanups that was found necessary while testing the handler changes Changes that requires code changes in other code of other storage engines. (Note that all changes are very straightforward and one should find all issues by compiling a --debug build and fixing all compiler errors and all asserts in field.cc while running the test suite), - New optional handler function introduced: reset() This is called after every DML statement to make it easy for a handler to statement specific cleanups. (The only case it's not called is if force the file to be closed) - handler::extra(HA_EXTRA_RESET) is removed. Code that was there before should be moved to handler::reset() - table->read_set contains a bitmap over all columns that are needed in the query. read_row() and similar functions only needs to read these columns - table->write_set contains a bitmap over all columns that will be updated in the query. write_row() and update_row() only needs to update these columns. The above bitmaps should now be up to date in all context (including ALTER TABLE, filesort()). The handler is informed of any changes to the bitmap after fix_fields() by calling the virtual function handler::column_bitmaps_signal(). If the handler does caching of these bitmaps (instead of using table->read_set, table->write_set), it should redo the caching in this code. as the signal() may be sent several times, it's probably best to set a variable in the signal and redo the caching on read_row() / write_row() if the variable was set. - Removed the read_set and write_set bitmap objects from the handler class - Removed all column bit handling functions from the handler class. (Now one instead uses the normal bitmap functions in my_bitmap.c instead of handler dedicated bitmap functions) - field->query_id is removed. One should instead instead check table->read_set and table->write_set if a field is used in the query. - handler::extra(HA_EXTRA_RETRIVE_ALL_COLS) and handler::extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY) are removed. One should now instead use table->read_set to check for which columns to retrieve. - If a handler needs to call Field->val() or Field->store() on columns that are not used in the query, one should install a temporary all-columns-used map while doing so. For this, we provide the following functions: my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set); field->val(); dbug_tmp_restore_column_map(table->read_set, old_map); and similar for the write map: my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->write_set); field->val(); dbug_tmp_restore_column_map(table->write_set, old_map); If this is not done, you will sooner or later hit a DBUG_ASSERT in the field store() / val() functions. (For not DBUG binaries, the dbug_tmp_restore_column_map() and dbug_tmp_restore_column_map() are inline dummy functions and should be optimized away be the compiler). - If one needs to temporary set the column map for all binaries (and not just to avoid the DBUG_ASSERT() in the Field::store() / Field::val() methods) one should use the functions tmp_use_all_columns() and tmp_restore_column_map() instead of the above dbug_ variants. - All 'status' fields in the handler base class (like records, data_file_length etc) are now stored in a 'stats' struct. This makes it easier to know what status variables are provided by the base handler. This requires some trivial variable names in the extra() function. - New virtual function handler::records(). This is called to optimize COUNT(*) if (handler::table_flags() & HA_HAS_RECORDS()) is true. (stats.records is not supposed to be an exact value. It's only has to be 'reasonable enough' for the optimizer to be able to choose a good optimization path). - Non virtual handler::init() function added for caching of virtual constants from engine. - Removed has_transactions() virtual method. Now one should instead return HA_NO_TRANSACTIONS in table_flags() if the table handler DOES NOT support transactions. - The 'xxxx_create_handler()' function now has a MEM_ROOT_root argument that is to be used with 'new handler_name()' to allocate the handler in the right area. The xxxx_create_handler() function is also responsible for any initialization of the object before returning. For example, one should change: static handler *myisam_create_handler(TABLE_SHARE *table) { return new ha_myisam(table); } -> static handler *myisam_create_handler(TABLE_SHARE *table, MEM_ROOT *mem_root) { return new (mem_root) ha_myisam(table); } - New optional virtual function: use_hidden_primary_key(). This is called in case of an update/delete when (table_flags() and HA_PRIMARY_KEY_REQUIRED_FOR_DELETE) is defined but we don't have a primary key. This allows the handler to take precisions in remembering any hidden primary key to able to update/delete any found row. The default handler marks all columns to be read. - handler::table_flags() now returns a ulonglong (to allow for more flags). - New/changed table_flags() - HA_HAS_RECORDS Set if ::records() is supported - HA_NO_TRANSACTIONS Set if engine doesn't support transactions - HA_PRIMARY_KEY_REQUIRED_FOR_DELETE Set if we should mark all primary key columns for read when reading rows as part of a DELETE statement. If there is no primary key, all columns are marked for read. - HA_PARTIAL_COLUMN_READ Set if engine will not read all columns in some cases (based on table->read_set) - HA_PRIMARY_KEY_ALLOW_RANDOM_ACCESS Renamed to HA_PRIMARY_KEY_REQUIRED_FOR_POSITION. - HA_DUPP_POS Renamed to HA_DUPLICATE_POS - HA_REQUIRES_KEY_COLUMNS_FOR_DELETE Set this if we should mark ALL key columns for read when when reading rows as part of a DELETE statement. In case of an update we will mark all keys for read for which key part changed value. - HA_STATS_RECORDS_IS_EXACT Set this if stats.records is exact. (This saves us some extra records() calls when optimizing COUNT(*)) - Removed table_flags() - HA_NOT_EXACT_COUNT Now one should instead use HA_HAS_RECORDS if handler::records() gives an exact count() and HA_STATS_RECORDS_IS_EXACT if stats.records is exact. - HA_READ_RND_SAME Removed (no one supported this one) - Removed not needed functions ha_retrieve_all_cols() and ha_retrieve_all_pk() - Renamed handler::dupp_pos to handler::dup_pos - Removed not used variable handler::sortkey Upper level handler changes: - ha_reset() now does some overall checks and calls ::reset() - ha_table_flags() added. This is a cached version of table_flags(). The cache is updated on engine creation time and updated on open. MySQL level changes (not obvious from the above): - DBUG_ASSERT() added to check that column usage matches what is set in the column usage bit maps. (This found a LOT of bugs in current column marking code). - In 5.1 before, all used columns was marked in read_set and only updated columns was marked in write_set. Now we only mark columns for which we need a value in read_set. - Column bitmaps are created in open_binary_frm() and open_table_from_share(). (Before this was in table.cc) - handler::table_flags() calls are replaced with handler::ha_table_flags() - For calling field->val() you must have the corresponding bit set in table->read_set. For calling field->store() you must have the corresponding bit set in table->write_set. (There are asserts in all store()/val() functions to catch wrong usage) - thd->set_query_id is renamed to thd->mark_used_columns and instead of setting this to an integer value, this has now the values: MARK_COLUMNS_NONE, MARK_COLUMNS_READ, MARK_COLUMNS_WRITE Changed also all variables named 'set_query_id' to mark_used_columns. - In filesort() we now inform the handler of exactly which columns are needed doing the sort and choosing the rows. - The TABLE_SHARE object has a 'all_set' column bitmap one can use when one needs a column bitmap with all columns set. (This is used for table->use_all_columns() and other places) - The TABLE object has 3 column bitmaps: - def_read_set Default bitmap for columns to be read - def_write_set Default bitmap for columns to be written - tmp_set Can be used as a temporary bitmap when needed. The table object has also two pointer to bitmaps read_set and write_set that the handler should use to find out which columns are used in which way. - count() optimization now calls handler::records() instead of using handler->stats.records (if (table_flags() & HA_HAS_RECORDS) is true). - Added extra argument to Item::walk() to indicate if we should also traverse sub queries. - Added TABLE parameter to cp_buffer_from_ref() - Don't close tables created with CREATE ... SELECT but keep them in the table cache. (Faster usage of newly created tables). New interfaces: - table->clear_column_bitmaps() to initialize the bitmaps for tables at start of new statements. - table->column_bitmaps_set() to set up new column bitmaps and signal the handler about this. - table->column_bitmaps_set_no_signal() for some few cases where we need to setup new column bitmaps but don't signal the handler (as the handler has already been signaled about these before). Used for the momement only in opt_range.cc when doing ROR scans. - table->use_all_columns() to install a bitmap where all columns are marked as use in the read and the write set. - table->default_column_bitmaps() to install the normal read and write column bitmaps, but not signaling the handler about this. This is mainly used when creating TABLE instances. - table->mark_columns_needed_for_delete(), table->mark_columns_needed_for_delete() and table->mark_columns_needed_for_insert() to allow us to put additional columns in column usage maps if handler so requires. (The handler indicates what it neads in handler->table_flags()) - table->prepare_for_position() to allow us to tell handler that it needs to read primary key parts to be able to store them in future table->position() calls. (This replaces the table->file->ha_retrieve_all_pk function) - table->mark_auto_increment_column() to tell handler are going to update columns part of any auto_increment key. - table->mark_columns_used_by_index() to mark all columns that is part of an index. It will also send extra(HA_EXTRA_KEYREAD) to handler to allow it to quickly know that it only needs to read colums that are part of the key. (The handler can also use the column map for detecting this, but simpler/faster handler can just monitor the extra() call). - table->mark_columns_used_by_index_no_reset() to in addition to other columns, also mark all columns that is used by the given key. - table->restore_column_maps_after_mark_index() to restore to default column maps after a call to table->mark_columns_used_by_index(). - New item function register_field_in_read_map(), for marking used columns in table->read_map. Used by filesort() to mark all used columns - Maintain in TABLE->merge_keys set of all keys that are used in query. (Simplices some optimization loops) - Maintain Field->part_of_key_not_clustered which is like Field->part_of_key but the field in the clustered key is not assumed to be part of all index. (used in opt_range.cc for faster loops) - dbug_tmp_use_all_columns(), dbug_tmp_restore_column_map() tmp_use_all_columns() and tmp_restore_column_map() functions to temporally mark all columns as usable. The 'dbug_' version is primarily intended inside a handler when it wants to just call Field:store() & Field::val() functions, but don't need the column maps set for any other usage. (ie:: bitmap_is_set() is never called) - We can't use compare_records() to skip updates for handlers that returns a partial column set and the read_set doesn't cover all columns in the write set. The reason for this is that if we have a column marked only for write we can't in the MySQL level know if the value changed or not. The reason this worked before was that MySQL marked all to be written columns as also to be read. The new 'optimal' bitmaps exposed this 'hidden bug'. - open_table_from_share() does not anymore setup temporary MEM_ROOT object as a thread specific variable for the handler. Instead we send the to-be-used MEMROOT to get_new_handler(). (Simpler, faster code) Bugs fixed: - Column marking was not done correctly in a lot of cases. (ALTER TABLE, when using triggers, auto_increment fields etc) (Could potentially result in wrong values inserted in table handlers relying on that the old column maps or field->set_query_id was correct) Especially when it comes to triggers, there may be cases where the old code would cause lost/wrong values for NDB and/or InnoDB tables. - Split thd->options flag OPTION_STATUS_NO_TRANS_UPDATE to two flags: OPTION_STATUS_NO_TRANS_UPDATE and OPTION_KEEP_LOG. This allowed me to remove some wrong warnings about: "Some non-transactional changed tables couldn't be rolled back" - Fixed handling of INSERT .. SELECT and CREATE ... SELECT that wrongly reset (thd->options & OPTION_STATUS_NO_TRANS_UPDATE) which caused us to loose some warnings about "Some non-transactional changed tables couldn't be rolled back") - Fixed use of uninitialized memory in ha_ndbcluster.cc::delete_table() which could cause delete_table to report random failures. - Fixed core dumps for some tests when running with --debug - Added missing FN_LIBCHAR in mysql_rm_tmp_tables() (This has probably caused us to not properly remove temporary files after crash) - slow_logs was not properly initialized, which could maybe cause extra/lost entries in slow log. - If we get an duplicate row on insert, change column map to read and write all columns while retrying the operation. This is required by the definition of REPLACE and also ensures that fields that are only part of UPDATE are properly handled. This fixed a bug in NDB and REPLACE where REPLACE wrongly copied some column values from the replaced row. - For table handler that doesn't support NULL in keys, we would give an error when creating a primary key with NULL fields, even after the fields has been automaticly converted to NOT NULL. - Creating a primary key on a SPATIAL key, would fail if field was not declared as NOT NULL. Cleanups: - Removed not used condition argument to setup_tables - Removed not needed item function reset_query_id_processor(). - Field->add_index is removed. Now this is instead maintained in (field->flags & FIELD_IN_ADD_INDEX) - Field->fieldnr is removed (use field->field_index instead) - New argument to filesort() to indicate that it should return a set of row pointers (not used columns). This allowed me to remove some references to sql_command in filesort and should also enable us to return column results in some cases where we couldn't before. - Changed column bitmap handling in opt_range.cc to be aligned with TABLE bitmap, which allowed me to use bitmap functions instead of looping over all fields to create some needed bitmaps. (Faster and smaller code) - Broke up found too long lines - Moved some variable declaration at start of function for better code readability. - Removed some not used arguments from functions. (setup_fields(), mysql_prepare_insert_check_table()) - setup_fields() now takes an enum instead of an int for marking columns usage. - For internal temporary tables, use handler::write_row(), handler::delete_row() and handler::update_row() instead of handler::ha_xxxx() for faster execution. - Changed some constants to enum's and define's. - Using separate column read and write sets allows for easier checking of timestamp field was set by statement. - Remove calls to free_io_cache() as this is now done automaticly in ha_reset() - Don't build table->normalized_path as this is now identical to table->path (after bar's fixes to convert filenames) - Fixed some missed DBUG_PRINT(.."%lx") to use "0x%lx" to make it easier to do comparision with the 'convert-dbug-for-diff' tool. Things left to do in 5.1: - We wrongly log failed CREATE TABLE ... SELECT in some cases when using row based logging (as shown by testcase binlog_row_mix_innodb_myisam.result) Mats has promised to look into this. - Test that my fix for CREATE TABLE ... SELECT is indeed correct. (I added several test cases for this, but in this case it's better that someone else also tests this throughly). Lars has promosed to do this.
2006-06-04 17:52:22 +02:00
my_bitmap_map *bitmap_init_value;
MY_BITMAP def_read_set, def_write_set, tmp_set; /* containers */
MY_BITMAP *read_set, *write_set; /* Active column sets */
/*
The ID of the query that opened and is using this table. Has different
meanings depending on the table type.
Temporary tables:
table->query_id is set to thd->query_id for the duration of a statement
and is reset to 0 once it is closed by the same statement. A non-zero
table->query_id means that a statement is using the table even if it's
not the current statement (table is in use by some outer statement).
Non-temporary tables:
Under pre-locked or LOCK TABLES mode: query_id is set to thd->query_id
for the duration of a statement and is reset to 0 once it is closed by
the same statement. A non-zero query_id is used to control which tables
in the list of pre-opened and locked tables are actually being used.
*/
query_id_t query_id;
/*
For each key that has quick_keys.is_set(key) == TRUE: estimate of #records
and max #key parts that range access would use.
*/
ha_rows quick_rows[MAX_KEY];
/* Bitmaps of key parts that =const for the entire join. */
key_part_map const_key_parts[MAX_KEY];
uint quick_key_parts[MAX_KEY];
uint quick_n_ranges[MAX_KEY];
2004-06-25 15:52:01 +02:00
/*
Estimate of number of records that satisfy SARGable part of the table
condition, or table->file->records if no SARGable condition could be
constructed.
This value is used by join optimizer as an estimate of number of records
that will pass the table condition (condition that depends on fields of
this table and constants)
*/
ha_rows quick_condition_rows;
/*
If this table has TIMESTAMP field with auto-set property (pointed by
timestamp_field member) then this variable indicates during which
operations (insert only/on update/in both cases) we should set this
field to current timestamp. If there are no such field in this table
or we should not automatically set its value during execution of current
statement then the variable contains TIMESTAMP_NO_AUTO_SET (i.e. 0).
Value of this variable is set for each statement in open_table() and
if needed cleared later in statement processing code (see mysql_update()
as example).
2004-06-25 15:52:01 +02:00
*/
timestamp_auto_set_type timestamp_field_type;
table_map map; /* ID bit of table (1,2,4,8,16...) */
uint lock_position; /* Position in MYSQL_LOCK.table */
uint lock_data_start; /* Start pos. in MYSQL_LOCK.locks */
uint lock_count; /* Number of locks */
uint tablenr,used_fields;
uint temp_pool_slot; /* Used by intern temp tables */
uint status; /* What's in record[0] */
uint db_stat; /* mode of file as in handler.h */
/* number of select if it is derived table */
uint derived_select_number;
int current_lock; /* Type of lock on table */
2000-07-31 21:29:14 +02:00
my_bool copy_blobs; /* copy_blobs when storing */
/*
0 or JOIN_TYPE_{LEFT|RIGHT}. Currently this is only compared to 0.
If maybe_null !=0, this table is inner w.r.t. some outer join operation,
and null_row may be true.
*/
uint maybe_null;
/*
If true, the current table row is considered to have all columns set to
NULL, including columns declared as "not null" (see maybe_null).
WL#3817: Simplify string / memory area types and make things more consistent (first part) The following type conversions was done: - Changed byte to uchar - Changed gptr to uchar* - Change my_string to char * - Change my_size_t to size_t - Change size_s to size_t Removed declaration of byte, gptr, my_string, my_size_t and size_s. Following function parameter changes was done: - All string functions in mysys/strings was changed to use size_t instead of uint for string lengths. - All read()/write() functions changed to use size_t (including vio). - All protocoll functions changed to use size_t instead of uint - Functions that used a pointer to a string length was changed to use size_t* - Changed malloc(), free() and related functions from using gptr to use void * as this requires fewer casts in the code and is more in line with how the standard functions work. - Added extra length argument to dirname_part() to return the length of the created string. - Changed (at least) following functions to take uchar* as argument: - db_dump() - my_net_write() - net_write_command() - net_store_data() - DBUG_DUMP() - decimal2bin() & bin2decimal() - Changed my_compress() and my_uncompress() to use size_t. Changed one argument to my_uncompress() from a pointer to a value as we only return one value (makes function easier to use). - Changed type of 'pack_data' argument to packfrm() to avoid casts. - Changed in readfrm() and writefrom(), ha_discover and handler::discover() the type for argument 'frmdata' to uchar** to avoid casts. - Changed most Field functions to use uchar* instead of char* (reduced a lot of casts). - Changed field->val_xxx(xxx, new_ptr) to take const pointers. Other changes: - Removed a lot of not needed casts - Added a few new cast required by other changes - Added some cast to my_multi_malloc() arguments for safety (as string lengths needs to be uint, not size_t). - Fixed all calls to hash-get-key functions to use size_t*. (Needed to be done explicitely as this conflict was often hided by casting the function to hash_get_key). - Changed some buffers to memory regions to uchar* to avoid casts. - Changed some string lengths from uint to size_t. - Changed field->ptr to be uchar* instead of char*. This allowed us to get rid of a lot of casts. - Some changes from true -> TRUE, false -> FALSE, unsigned char -> uchar - Include zlib.h in some files as we needed declaration of crc32() - Changed MY_FILE_ERROR to be (size_t) -1. - Changed many variables to hold the result of my_read() / my_write() to be size_t. This was needed to properly detect errors (which are returned as (size_t) -1). - Removed some very old VMS code - Changed packfrm()/unpackfrm() to not be depending on uint size (portability fix) - Removed windows specific code to restore cursor position as this causes slowdown on windows and we should not mix read() and pread() calls anyway as this is not thread safe. Updated function comment to reflect this. Changed function that depended on original behavior of my_pwrite() to itself restore the cursor position (one such case). - Added some missing checking of return value of malloc(). - Changed definition of MOD_PAD_CHAR_TO_FULL_LENGTH to avoid 'long' overflow. - Changed type of table_def::m_size from my_size_t to ulong to reflect that m_size is the number of elements in the array, not a string/memory length. - Moved THD::max_row_length() to table.cc (as it's not depending on THD). Inlined max_row_length_blob() into this function. - More function comments - Fixed some compiler warnings when compiled without partitions. - Removed setting of LEX_STRING() arguments in declaration (portability fix). - Some trivial indentation/variable name changes. - Some trivial code simplifications: - Replaced some calls to alloc_root + memcpy to use strmake_root()/strdup_root(). - Changed some calls from memdup() to strmake() (Safety fix) - Simpler loops in client-simple.c
2007-05-10 11:59:39 +02:00
*/
my_bool null_row;
WL#3817: Simplify string / memory area types and make things more consistent (first part) The following type conversions was done: - Changed byte to uchar - Changed gptr to uchar* - Change my_string to char * - Change my_size_t to size_t - Change size_s to size_t Removed declaration of byte, gptr, my_string, my_size_t and size_s. Following function parameter changes was done: - All string functions in mysys/strings was changed to use size_t instead of uint for string lengths. - All read()/write() functions changed to use size_t (including vio). - All protocoll functions changed to use size_t instead of uint - Functions that used a pointer to a string length was changed to use size_t* - Changed malloc(), free() and related functions from using gptr to use void * as this requires fewer casts in the code and is more in line with how the standard functions work. - Added extra length argument to dirname_part() to return the length of the created string. - Changed (at least) following functions to take uchar* as argument: - db_dump() - my_net_write() - net_write_command() - net_store_data() - DBUG_DUMP() - decimal2bin() & bin2decimal() - Changed my_compress() and my_uncompress() to use size_t. Changed one argument to my_uncompress() from a pointer to a value as we only return one value (makes function easier to use). - Changed type of 'pack_data' argument to packfrm() to avoid casts. - Changed in readfrm() and writefrom(), ha_discover and handler::discover() the type for argument 'frmdata' to uchar** to avoid casts. - Changed most Field functions to use uchar* instead of char* (reduced a lot of casts). - Changed field->val_xxx(xxx, new_ptr) to take const pointers. Other changes: - Removed a lot of not needed casts - Added a few new cast required by other changes - Added some cast to my_multi_malloc() arguments for safety (as string lengths needs to be uint, not size_t). - Fixed all calls to hash-get-key functions to use size_t*. (Needed to be done explicitely as this conflict was often hided by casting the function to hash_get_key). - Changed some buffers to memory regions to uchar* to avoid casts. - Changed some string lengths from uint to size_t. - Changed field->ptr to be uchar* instead of char*. This allowed us to get rid of a lot of casts. - Some changes from true -> TRUE, false -> FALSE, unsigned char -> uchar - Include zlib.h in some files as we needed declaration of crc32() - Changed MY_FILE_ERROR to be (size_t) -1. - Changed many variables to hold the result of my_read() / my_write() to be size_t. This was needed to properly detect errors (which are returned as (size_t) -1). - Removed some very old VMS code - Changed packfrm()/unpackfrm() to not be depending on uint size (portability fix) - Removed windows specific code to restore cursor position as this causes slowdown on windows and we should not mix read() and pread() calls anyway as this is not thread safe. Updated function comment to reflect this. Changed function that depended on original behavior of my_pwrite() to itself restore the cursor position (one such case). - Added some missing checking of return value of malloc(). - Changed definition of MOD_PAD_CHAR_TO_FULL_LENGTH to avoid 'long' overflow. - Changed type of table_def::m_size from my_size_t to ulong to reflect that m_size is the number of elements in the array, not a string/memory length. - Moved THD::max_row_length() to table.cc (as it's not depending on THD). Inlined max_row_length_blob() into this function. - More function comments - Fixed some compiler warnings when compiled without partitions. - Removed setting of LEX_STRING() arguments in declaration (portability fix). - Some trivial indentation/variable name changes. - Some trivial code simplifications: - Replaced some calls to alloc_root + memcpy to use strmake_root()/strdup_root(). - Changed some calls from memdup() to strmake() (Safety fix) - Simpler loops in client-simple.c
2007-05-10 11:59:39 +02:00
/*
TODO: Each of the following flags take up 8 bits. They can just as easily
be put into one single unsigned long and instead of taking up 18
bytes, it would take up 4.
*/
my_bool force_index;
/**
Flag set when the statement contains FORCE INDEX FOR ORDER BY
See TABLE_LIST::process_index_hints().
*/
my_bool force_index_order;
/**
Flag set when the statement contains FORCE INDEX FOR GROUP BY
See TABLE_LIST::process_index_hints().
*/
my_bool force_index_group;
my_bool distinct,const_table,no_rows;
/**
If set, the optimizer has found that row retrieval should access index
tree only.
*/
my_bool key_read;
my_bool no_keyread;
my_bool locked_by_logger;
my_bool no_replicate;
2000-08-29 11:31:01 +02:00
my_bool locked_by_name;
2002-03-01 17:57:08 +01:00
my_bool fulltext_searched;
my_bool no_cache;
/* To signal that the table is associated with a HANDLER statement */
my_bool open_by_handler;
/*
To indicate that a non-null value of the auto_increment field
was provided by the user or retrieved from the current record.
Used only in the MODE_NO_AUTO_VALUE_ON_ZERO mode.
*/
2004-06-25 15:52:01 +02:00
my_bool auto_increment_field_not_null;
my_bool insert_or_update; /* Can be used by the handler */
2004-12-06 18:18:35 +01:00
my_bool alias_name_used; /* true if table_name is alias */
2005-07-18 13:31:02 +02:00
my_bool get_fields_in_item_tree; /* Signal to fix_field */
my_bool m_needs_reopen;
2000-07-31 21:29:14 +02:00
REGINFO reginfo; /* field connections */
MEM_ROOT mem_root;
GRANT_INFO grant;
FILESORT_INFO sort;
#ifdef WITH_PARTITION_STORAGE_ENGINE
partition_info *part_info; /* Partition related information */
bool no_partitions_used; /* If true, all partitions have been pruned away */
#endif
MDL_ticket *mdl_ticket;
bool fill_item_list(List<Item> *item_list) const;
void reset_item_list(List<Item> *item_list) const;
This changeset is largely a handler cleanup changeset (WL#3281), but includes fixes and cleanups that was found necessary while testing the handler changes Changes that requires code changes in other code of other storage engines. (Note that all changes are very straightforward and one should find all issues by compiling a --debug build and fixing all compiler errors and all asserts in field.cc while running the test suite), - New optional handler function introduced: reset() This is called after every DML statement to make it easy for a handler to statement specific cleanups. (The only case it's not called is if force the file to be closed) - handler::extra(HA_EXTRA_RESET) is removed. Code that was there before should be moved to handler::reset() - table->read_set contains a bitmap over all columns that are needed in the query. read_row() and similar functions only needs to read these columns - table->write_set contains a bitmap over all columns that will be updated in the query. write_row() and update_row() only needs to update these columns. The above bitmaps should now be up to date in all context (including ALTER TABLE, filesort()). The handler is informed of any changes to the bitmap after fix_fields() by calling the virtual function handler::column_bitmaps_signal(). If the handler does caching of these bitmaps (instead of using table->read_set, table->write_set), it should redo the caching in this code. as the signal() may be sent several times, it's probably best to set a variable in the signal and redo the caching on read_row() / write_row() if the variable was set. - Removed the read_set and write_set bitmap objects from the handler class - Removed all column bit handling functions from the handler class. (Now one instead uses the normal bitmap functions in my_bitmap.c instead of handler dedicated bitmap functions) - field->query_id is removed. One should instead instead check table->read_set and table->write_set if a field is used in the query. - handler::extra(HA_EXTRA_RETRIVE_ALL_COLS) and handler::extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY) are removed. One should now instead use table->read_set to check for which columns to retrieve. - If a handler needs to call Field->val() or Field->store() on columns that are not used in the query, one should install a temporary all-columns-used map while doing so. For this, we provide the following functions: my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set); field->val(); dbug_tmp_restore_column_map(table->read_set, old_map); and similar for the write map: my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->write_set); field->val(); dbug_tmp_restore_column_map(table->write_set, old_map); If this is not done, you will sooner or later hit a DBUG_ASSERT in the field store() / val() functions. (For not DBUG binaries, the dbug_tmp_restore_column_map() and dbug_tmp_restore_column_map() are inline dummy functions and should be optimized away be the compiler). - If one needs to temporary set the column map for all binaries (and not just to avoid the DBUG_ASSERT() in the Field::store() / Field::val() methods) one should use the functions tmp_use_all_columns() and tmp_restore_column_map() instead of the above dbug_ variants. - All 'status' fields in the handler base class (like records, data_file_length etc) are now stored in a 'stats' struct. This makes it easier to know what status variables are provided by the base handler. This requires some trivial variable names in the extra() function. - New virtual function handler::records(). This is called to optimize COUNT(*) if (handler::table_flags() & HA_HAS_RECORDS()) is true. (stats.records is not supposed to be an exact value. It's only has to be 'reasonable enough' for the optimizer to be able to choose a good optimization path). - Non virtual handler::init() function added for caching of virtual constants from engine. - Removed has_transactions() virtual method. Now one should instead return HA_NO_TRANSACTIONS in table_flags() if the table handler DOES NOT support transactions. - The 'xxxx_create_handler()' function now has a MEM_ROOT_root argument that is to be used with 'new handler_name()' to allocate the handler in the right area. The xxxx_create_handler() function is also responsible for any initialization of the object before returning. For example, one should change: static handler *myisam_create_handler(TABLE_SHARE *table) { return new ha_myisam(table); } -> static handler *myisam_create_handler(TABLE_SHARE *table, MEM_ROOT *mem_root) { return new (mem_root) ha_myisam(table); } - New optional virtual function: use_hidden_primary_key(). This is called in case of an update/delete when (table_flags() and HA_PRIMARY_KEY_REQUIRED_FOR_DELETE) is defined but we don't have a primary key. This allows the handler to take precisions in remembering any hidden primary key to able to update/delete any found row. The default handler marks all columns to be read. - handler::table_flags() now returns a ulonglong (to allow for more flags). - New/changed table_flags() - HA_HAS_RECORDS Set if ::records() is supported - HA_NO_TRANSACTIONS Set if engine doesn't support transactions - HA_PRIMARY_KEY_REQUIRED_FOR_DELETE Set if we should mark all primary key columns for read when reading rows as part of a DELETE statement. If there is no primary key, all columns are marked for read. - HA_PARTIAL_COLUMN_READ Set if engine will not read all columns in some cases (based on table->read_set) - HA_PRIMARY_KEY_ALLOW_RANDOM_ACCESS Renamed to HA_PRIMARY_KEY_REQUIRED_FOR_POSITION. - HA_DUPP_POS Renamed to HA_DUPLICATE_POS - HA_REQUIRES_KEY_COLUMNS_FOR_DELETE Set this if we should mark ALL key columns for read when when reading rows as part of a DELETE statement. In case of an update we will mark all keys for read for which key part changed value. - HA_STATS_RECORDS_IS_EXACT Set this if stats.records is exact. (This saves us some extra records() calls when optimizing COUNT(*)) - Removed table_flags() - HA_NOT_EXACT_COUNT Now one should instead use HA_HAS_RECORDS if handler::records() gives an exact count() and HA_STATS_RECORDS_IS_EXACT if stats.records is exact. - HA_READ_RND_SAME Removed (no one supported this one) - Removed not needed functions ha_retrieve_all_cols() and ha_retrieve_all_pk() - Renamed handler::dupp_pos to handler::dup_pos - Removed not used variable handler::sortkey Upper level handler changes: - ha_reset() now does some overall checks and calls ::reset() - ha_table_flags() added. This is a cached version of table_flags(). The cache is updated on engine creation time and updated on open. MySQL level changes (not obvious from the above): - DBUG_ASSERT() added to check that column usage matches what is set in the column usage bit maps. (This found a LOT of bugs in current column marking code). - In 5.1 before, all used columns was marked in read_set and only updated columns was marked in write_set. Now we only mark columns for which we need a value in read_set. - Column bitmaps are created in open_binary_frm() and open_table_from_share(). (Before this was in table.cc) - handler::table_flags() calls are replaced with handler::ha_table_flags() - For calling field->val() you must have the corresponding bit set in table->read_set. For calling field->store() you must have the corresponding bit set in table->write_set. (There are asserts in all store()/val() functions to catch wrong usage) - thd->set_query_id is renamed to thd->mark_used_columns and instead of setting this to an integer value, this has now the values: MARK_COLUMNS_NONE, MARK_COLUMNS_READ, MARK_COLUMNS_WRITE Changed also all variables named 'set_query_id' to mark_used_columns. - In filesort() we now inform the handler of exactly which columns are needed doing the sort and choosing the rows. - The TABLE_SHARE object has a 'all_set' column bitmap one can use when one needs a column bitmap with all columns set. (This is used for table->use_all_columns() and other places) - The TABLE object has 3 column bitmaps: - def_read_set Default bitmap for columns to be read - def_write_set Default bitmap for columns to be written - tmp_set Can be used as a temporary bitmap when needed. The table object has also two pointer to bitmaps read_set and write_set that the handler should use to find out which columns are used in which way. - count() optimization now calls handler::records() instead of using handler->stats.records (if (table_flags() & HA_HAS_RECORDS) is true). - Added extra argument to Item::walk() to indicate if we should also traverse sub queries. - Added TABLE parameter to cp_buffer_from_ref() - Don't close tables created with CREATE ... SELECT but keep them in the table cache. (Faster usage of newly created tables). New interfaces: - table->clear_column_bitmaps() to initialize the bitmaps for tables at start of new statements. - table->column_bitmaps_set() to set up new column bitmaps and signal the handler about this. - table->column_bitmaps_set_no_signal() for some few cases where we need to setup new column bitmaps but don't signal the handler (as the handler has already been signaled about these before). Used for the momement only in opt_range.cc when doing ROR scans. - table->use_all_columns() to install a bitmap where all columns are marked as use in the read and the write set. - table->default_column_bitmaps() to install the normal read and write column bitmaps, but not signaling the handler about this. This is mainly used when creating TABLE instances. - table->mark_columns_needed_for_delete(), table->mark_columns_needed_for_delete() and table->mark_columns_needed_for_insert() to allow us to put additional columns in column usage maps if handler so requires. (The handler indicates what it neads in handler->table_flags()) - table->prepare_for_position() to allow us to tell handler that it needs to read primary key parts to be able to store them in future table->position() calls. (This replaces the table->file->ha_retrieve_all_pk function) - table->mark_auto_increment_column() to tell handler are going to update columns part of any auto_increment key. - table->mark_columns_used_by_index() to mark all columns that is part of an index. It will also send extra(HA_EXTRA_KEYREAD) to handler to allow it to quickly know that it only needs to read colums that are part of the key. (The handler can also use the column map for detecting this, but simpler/faster handler can just monitor the extra() call). - table->mark_columns_used_by_index_no_reset() to in addition to other columns, also mark all columns that is used by the given key. - table->restore_column_maps_after_mark_index() to restore to default column maps after a call to table->mark_columns_used_by_index(). - New item function register_field_in_read_map(), for marking used columns in table->read_map. Used by filesort() to mark all used columns - Maintain in TABLE->merge_keys set of all keys that are used in query. (Simplices some optimization loops) - Maintain Field->part_of_key_not_clustered which is like Field->part_of_key but the field in the clustered key is not assumed to be part of all index. (used in opt_range.cc for faster loops) - dbug_tmp_use_all_columns(), dbug_tmp_restore_column_map() tmp_use_all_columns() and tmp_restore_column_map() functions to temporally mark all columns as usable. The 'dbug_' version is primarily intended inside a handler when it wants to just call Field:store() & Field::val() functions, but don't need the column maps set for any other usage. (ie:: bitmap_is_set() is never called) - We can't use compare_records() to skip updates for handlers that returns a partial column set and the read_set doesn't cover all columns in the write set. The reason for this is that if we have a column marked only for write we can't in the MySQL level know if the value changed or not. The reason this worked before was that MySQL marked all to be written columns as also to be read. The new 'optimal' bitmaps exposed this 'hidden bug'. - open_table_from_share() does not anymore setup temporary MEM_ROOT object as a thread specific variable for the handler. Instead we send the to-be-used MEMROOT to get_new_handler(). (Simpler, faster code) Bugs fixed: - Column marking was not done correctly in a lot of cases. (ALTER TABLE, when using triggers, auto_increment fields etc) (Could potentially result in wrong values inserted in table handlers relying on that the old column maps or field->set_query_id was correct) Especially when it comes to triggers, there may be cases where the old code would cause lost/wrong values for NDB and/or InnoDB tables. - Split thd->options flag OPTION_STATUS_NO_TRANS_UPDATE to two flags: OPTION_STATUS_NO_TRANS_UPDATE and OPTION_KEEP_LOG. This allowed me to remove some wrong warnings about: "Some non-transactional changed tables couldn't be rolled back" - Fixed handling of INSERT .. SELECT and CREATE ... SELECT that wrongly reset (thd->options & OPTION_STATUS_NO_TRANS_UPDATE) which caused us to loose some warnings about "Some non-transactional changed tables couldn't be rolled back") - Fixed use of uninitialized memory in ha_ndbcluster.cc::delete_table() which could cause delete_table to report random failures. - Fixed core dumps for some tests when running with --debug - Added missing FN_LIBCHAR in mysql_rm_tmp_tables() (This has probably caused us to not properly remove temporary files after crash) - slow_logs was not properly initialized, which could maybe cause extra/lost entries in slow log. - If we get an duplicate row on insert, change column map to read and write all columns while retrying the operation. This is required by the definition of REPLACE and also ensures that fields that are only part of UPDATE are properly handled. This fixed a bug in NDB and REPLACE where REPLACE wrongly copied some column values from the replaced row. - For table handler that doesn't support NULL in keys, we would give an error when creating a primary key with NULL fields, even after the fields has been automaticly converted to NOT NULL. - Creating a primary key on a SPATIAL key, would fail if field was not declared as NOT NULL. Cleanups: - Removed not used condition argument to setup_tables - Removed not needed item function reset_query_id_processor(). - Field->add_index is removed. Now this is instead maintained in (field->flags & FIELD_IN_ADD_INDEX) - Field->fieldnr is removed (use field->field_index instead) - New argument to filesort() to indicate that it should return a set of row pointers (not used columns). This allowed me to remove some references to sql_command in filesort and should also enable us to return column results in some cases where we couldn't before. - Changed column bitmap handling in opt_range.cc to be aligned with TABLE bitmap, which allowed me to use bitmap functions instead of looping over all fields to create some needed bitmaps. (Faster and smaller code) - Broke up found too long lines - Moved some variable declaration at start of function for better code readability. - Removed some not used arguments from functions. (setup_fields(), mysql_prepare_insert_check_table()) - setup_fields() now takes an enum instead of an int for marking columns usage. - For internal temporary tables, use handler::write_row(), handler::delete_row() and handler::update_row() instead of handler::ha_xxxx() for faster execution. - Changed some constants to enum's and define's. - Using separate column read and write sets allows for easier checking of timestamp field was set by statement. - Remove calls to free_io_cache() as this is now done automaticly in ha_reset() - Don't build table->normalized_path as this is now identical to table->path (after bar's fixes to convert filenames) - Fixed some missed DBUG_PRINT(.."%lx") to use "0x%lx" to make it easier to do comparision with the 'convert-dbug-for-diff' tool. Things left to do in 5.1: - We wrongly log failed CREATE TABLE ... SELECT in some cases when using row based logging (as shown by testcase binlog_row_mix_innodb_myisam.result) Mats has promised to look into this. - Test that my fix for CREATE TABLE ... SELECT is indeed correct. (I added several test cases for this, but in this case it's better that someone else also tests this throughly). Lars has promosed to do this.
2006-06-04 17:52:22 +02:00
void clear_column_bitmaps(void);
void prepare_for_position(void);
void mark_columns_used_by_index_no_reset(uint index, MY_BITMAP *map);
void mark_columns_used_by_index(uint index);
void restore_column_maps_after_mark_index();
void mark_auto_increment_column(void);
void mark_columns_needed_for_update(void);
void mark_columns_needed_for_delete(void);
void mark_columns_needed_for_insert(void);
inline void column_bitmaps_set(MY_BITMAP *read_set_arg,
MY_BITMAP *write_set_arg)
{
read_set= read_set_arg;
write_set= write_set_arg;
if (file)
file->column_bitmaps_signal();
}
inline void column_bitmaps_set_no_signal(MY_BITMAP *read_set_arg,
MY_BITMAP *write_set_arg)
{
read_set= read_set_arg;
write_set= write_set_arg;
}
inline void use_all_columns()
{
column_bitmaps_set(&s->all_set, &s->all_set);
}
inline void default_column_bitmaps()
{
read_set= &def_read_set;
write_set= &def_write_set;
}
Fix for: Bug #20662 "Infinite loop in CREATE TABLE IF NOT EXISTS ... SELECT with locked tables" Bug #20903 "Crash when using CREATE TABLE .. SELECT and triggers" Bug #24738 "CREATE TABLE ... SELECT is not isolated properly" Bug #24508 "Inconsistent results of CREATE TABLE ... SELECT when temporary table exists" Deadlock occured when one tried to execute CREATE TABLE IF NOT EXISTS ... SELECT statement under LOCK TABLES which held read lock on target table. Attempt to execute the same statement for already existing target table with triggers caused server crashes. Also concurrent execution of CREATE TABLE ... SELECT statement and other statements involving target table suffered from various races (some of which might've led to deadlocks). Finally, attempt to execute CREATE TABLE ... SELECT in case when a temporary table with same name was already present led to the insertion of data into this temporary table and creation of empty non-temporary table. All above problems stemmed from the old implementation of CREATE TABLE ... SELECT in which we created, opened and locked target table without any special protection in a separate step and not with the rest of tables used by this statement. This underminded deadlock-avoidance approach used in server and created window for races. It also excluded target table from prelocking causing problems with trigger execution. The patch solves these problems by implementing new approach to handling of CREATE TABLE ... SELECT for base tables. We try to open and lock table to be created at the same time as the rest of tables used by this statement. If such table does not exist at this moment we create and place in the table cache special placeholder for it which prevents its creation or any other usage by other threads. We still use old approach for creation of temporary tables. Note that we have separate fix for 5.0 since there we use slightly different less intrusive approach.
2007-05-11 19:51:03 +02:00
/*
Is this instance of the table should be reopen?
Fix for: Bug #20662 "Infinite loop in CREATE TABLE IF NOT EXISTS ... SELECT with locked tables" Bug #20903 "Crash when using CREATE TABLE .. SELECT and triggers" Bug #24738 "CREATE TABLE ... SELECT is not isolated properly" Bug #24508 "Inconsistent results of CREATE TABLE ... SELECT when temporary table exists" Deadlock occured when one tried to execute CREATE TABLE IF NOT EXISTS ... SELECT statement under LOCK TABLES which held read lock on target table. Attempt to execute the same statement for already existing target table with triggers caused server crashes. Also concurrent execution of CREATE TABLE ... SELECT statement and other statements involving target table suffered from various races (some of which might've led to deadlocks). Finally, attempt to execute CREATE TABLE ... SELECT in case when a temporary table with same name was already present led to the insertion of data into this temporary table and creation of empty non-temporary table. All above problems stemmed from the old implementation of CREATE TABLE ... SELECT in which we created, opened and locked target table without any special protection in a separate step and not with the rest of tables used by this statement. This underminded deadlock-avoidance approach used in server and created window for races. It also excluded target table from prelocking causing problems with trigger execution. The patch solves these problems by implementing new approach to handling of CREATE TABLE ... SELECT for base tables. We try to open and lock table to be created at the same time as the rest of tables used by this statement. If such table does not exist at this moment we create and place in the table cache special placeholder for it which prevents its creation or any other usage by other threads. We still use old approach for creation of temporary tables. Note that we have separate fix for 5.0 since there we use slightly different less intrusive approach.
2007-05-11 19:51:03 +02:00
*/
inline bool needs_reopen()
{ return !db_stat || m_needs_reopen; }
inline void set_keyread(bool flag)
{
DBUG_ASSERT(file);
if (flag && !key_read)
{
key_read= 1;
file->extra(HA_EXTRA_KEYREAD);
}
else if (!flag && key_read)
{
key_read= 0;
file->extra(HA_EXTRA_NO_KEYREAD);
}
}
2000-07-31 21:29:14 +02:00
};
Initial import of WL#3726 "DDL locking for all metadata objects". Backport of: ------------------------------------------------------------ revno: 2630.4.1 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Fri 2008-05-23 17:54:03 +0400 message: WL#3726 "DDL locking for all metadata objects". After review fixes in progress. ------------------------------------------------------------ This is the first patch in series. It transforms the metadata locking subsystem to use a dedicated module (mdl.h,cc). No significant changes in the locking protocol. The import passes the test suite with the exception of deprecated/removed 6.0 features, and MERGE tables. The latter are subject to a fix by WL#4144. Unfortunately, the original changeset comments got lost in a merge, thus this import has its own (largely insufficient) comments. This patch fixes Bug#25144 "replication / binlog with view breaks". Warning: this patch introduces an incompatible change: Under LOCK TABLES, it's no longer possible to FLUSH a table that was not locked for WRITE. Under LOCK TABLES, it's no longer possible to DROP a table or VIEW that was not locked for WRITE. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.2 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 14:03:45 +0400 message: WL#3726 "DDL locking for all metadata objects". After review fixes in progress. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.3 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 14:08:51 +0400 message: WL#3726 "DDL locking for all metadata objects" Fixed failing Windows builds by adding mdl.cc to the lists of files needed to build server/libmysqld on Windows. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.4 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 21:57:58 +0400 message: WL#3726 "DDL locking for all metadata objects". Fix for assert failures in kill.test which occured when one tried to kill ALTER TABLE statement on merge table while it was waiting in wait_while_table_is_used() for other connections to close this table. These assert failures stemmed from the fact that cleanup code in this case assumed that temporary table representing new version of table was open with adding to THD::temporary_tables list while code which were opening this temporary table wasn't always fulfilling this. This patch changes code that opens new version of table to always do this linking in. It also streamlines cleanup process for cases when error occurs while we have new version of table open. ****** WL#3726 "DDL locking for all metadata objects" Add libmysqld/mdl.cc to .bzrignore. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.6 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sun 2008-05-25 00:33:22 +0400 message: WL#3726 "DDL locking for all metadata objects". Addition to the fix of assert failures in kill.test caused by changes for this worklog. Make sure we close the new table only once.
2009-11-30 16:55:03 +01:00
/**
Helper class which specifies which members of TABLE are used for
participation in the list of used/unused TABLE objects for the share.
*/
struct TABLE_share
{
static inline TABLE **next_ptr(TABLE *l)
{
return &l->share_next;
}
static inline TABLE ***prev_ptr(TABLE *l)
{
return &l->share_prev;
}
};
enum enum_schema_table_state
{
NOT_PROCESSED= 0,
PROCESSED_BY_CREATE_SORT_INDEX,
PROCESSED_BY_JOIN_EXEC
};
2000-07-31 21:29:14 +02:00
typedef struct st_foreign_key_info
{
LEX_STRING *forein_id;
LEX_STRING *referenced_db;
LEX_STRING *referenced_table;
LEX_STRING *update_method;
LEX_STRING *delete_method;
LEX_STRING *referenced_key_name;
List<LEX_STRING> foreign_fields;
List<LEX_STRING> referenced_fields;
} FOREIGN_KEY_INFO;
#define MY_I_S_MAYBE_NULL 1
#define MY_I_S_UNSIGNED 2
#define SKIP_OPEN_TABLE 0 // do not open table
#define OPEN_FRM_ONLY 1 // open FRM file only
#define OPEN_FULL_TABLE 2 // open FRM,MYD, MYI files
typedef struct st_field_info
{
/**
This is used as column name.
*/
const char* field_name;
/**
For string-type columns, this is the maximum number of
characters. Otherwise, it is the 'display-length' for the column.
*/
uint field_length;
/**
This denotes data type for the column. For the most part, there seems to
be one entry in the enum for each SQL data type, although there seem to
be a number of additional entries in the enum.
*/
enum enum_field_types field_type;
int value;
/**
This is used to set column attributes. By default, columns are @c NOT
@c NULL and @c SIGNED, and you can deviate from the default
by setting the appopriate flags. You can use either one of the flags
@c MY_I_S_MAYBE_NULL and @cMY_I_S_UNSIGNED or
combine them using the bitwise or operator @c |. Both flags are
defined in table.h.
*/
uint field_flags; // Field atributes(maybe_null, signed, unsigned etc.)
const char* old_name;
/**
This should be one of @c SKIP_OPEN_TABLE,
@c OPEN_FRM_ONLY or @c OPEN_FULL_TABLE.
*/
uint open_method;
} ST_FIELD_INFO;
struct TABLE_LIST;
typedef class Item COND;
typedef struct st_schema_table
{
const char* table_name;
ST_FIELD_INFO *fields_info;
/* Create information_schema table */
TABLE *(*create_table) (THD *thd, TABLE_LIST *table_list);
/* Fill table with data */
int (*fill_table) (THD *thd, TABLE_LIST *tables, COND *cond);
/* Handle fileds for old SHOW */
int (*old_format) (THD *thd, struct st_schema_table *schema_table);
int (*process_table) (THD *thd, TABLE_LIST *tables, TABLE *table,
bool res, LEX_STRING *db_name, LEX_STRING *table_name);
int idx_field1, idx_field2;
bool hidden;
uint i_s_requested_object; /* the object we need to open(TABLE | VIEW) */
} ST_SCHEMA_TABLE;
#define JOIN_TYPE_LEFT 1
#define JOIN_TYPE_RIGHT 2
#define VIEW_ALGORITHM_UNDEFINED 0
#define VIEW_ALGORITHM_TMPTABLE 1
#define VIEW_ALGORITHM_MERGE 2
2004-07-16 00:15:55 +02:00
#define VIEW_SUID_INVOKER 0
#define VIEW_SUID_DEFINER 1
#define VIEW_SUID_DEFAULT 2
/* view WITH CHECK OPTION parameter options */
2004-09-03 14:18:40 +02:00
#define VIEW_CHECK_NONE 0
#define VIEW_CHECK_LOCAL 1
#define VIEW_CHECK_CASCADED 2
/* result of view WITH CHECK OPTION parameter check */
#define VIEW_CHECK_OK 0
#define VIEW_CHECK_ERROR 1
#define VIEW_CHECK_SKIP 2
/** The threshold size a blob field buffer before it is freed */
#define MAX_TDC_BLOB_SIZE 65536
class select_union;
class TMP_TABLE_PARAM;
Item *create_view_field(THD *thd, TABLE_LIST *view, Item **field_ref,
const char *name);
struct Field_translator
{
Item *item;
const char *name;
};
2004-07-16 00:15:55 +02:00
/*
Column reference of a NATURAL/USING join. Since column references in
joins can be both from views and stored tables, may point to either a
Field (for tables), or a Field_translator (for views).
*/
class Natural_join_column: public Sql_alloc
{
public:
Field_translator *view_field; /* Column reference of merge view. */
Item_field *table_field; /* Column reference of table or temp view. */
TABLE_LIST *table_ref; /* Original base table/view reference. */
/*
True if a common join column of two NATURAL/USING join operands. Notice
that when we have a hierarchy of nested NATURAL/USING joins, a column can
be common at some level of nesting but it may not be common at higher
levels of nesting. Thus this flag may change depending on at which level
we are looking at some column.
*/
bool is_common;
public:
Natural_join_column(Field_translator *field_param, TABLE_LIST *tab);
Natural_join_column(Item_field *field_param, TABLE_LIST *tab);
const char *name();
Item *create_item(THD *thd);
Field *field();
const char *table_name();
const char *db_name();
GRANT_INFO *grant();
};
/**
Type of table which can be open for an element of table list.
*/
enum enum_open_type
{
OT_TEMPORARY_OR_BASE= 0, OT_TEMPORARY_ONLY, OT_BASE_ONLY
};
/*
Table reference in the FROM clause.
These table references can be of several types that correspond to
different SQL elements. Below we list all types of TABLE_LISTs with
the necessary conditions to determine when a TABLE_LIST instance
belongs to a certain type.
1) table (TABLE_LIST::view == NULL)
- base table
(TABLE_LIST::derived == NULL)
- subquery - TABLE_LIST::table is a temp table
(TABLE_LIST::derived != NULL)
- information schema table
(TABLE_LIST::schema_table != NULL)
NOTICE: for schema tables TABLE_LIST::field_translation may be != NULL
2) view (TABLE_LIST::view != NULL)
- merge (TABLE_LIST::effective_algorithm == VIEW_ALGORITHM_MERGE)
also (TABLE_LIST::field_translation != NULL)
- tmptable (TABLE_LIST::effective_algorithm == VIEW_ALGORITHM_TMPTABLE)
also (TABLE_LIST::field_translation == NULL)
3) nested table reference (TABLE_LIST::nested_join != NULL)
- table sequence - e.g. (t1, t2, t3)
TODO: how to distinguish from a JOIN?
- general JOIN
TODO: how to distinguish from a table sequence?
- NATURAL JOIN
(TABLE_LIST::natural_join != NULL)
- JOIN ... USING
(TABLE_LIST::join_using_fields != NULL)
*/
struct LEX;
2007-07-23 18:09:48 +02:00
class Index_hint;
struct TABLE_LIST
{
TABLE_LIST() {} /* Remove gcc warning */
/**
Prepare TABLE_LIST that consists of one table instance to use in
simple_open_and_lock_tables
*/
inline void init_one_table(const char *db_name_arg,
size_t db_length_arg,
const char *table_name_arg,
size_t table_name_length_arg,
const char *alias_arg,
enum thr_lock_type lock_type_arg)
{
bzero((char*) this, sizeof(*this));
db= (char*) db_name_arg;
db_length= db_length_arg;
table_name= (char*) table_name_arg;
table_name_length= table_name_length_arg;
alias= (char*) alias_arg;
lock_type= lock_type_arg;
Implement new type-of-operation-aware metadata locks. Add a wait-for graph based deadlock detector to the MDL subsystem. Fixes bug #46272 "MySQL 5.4.4, new MDL: unnecessary deadlock" and bug #37346 "innodb does not detect deadlock between update and alter table". The first bug manifested itself as an unwarranted abort of a transaction with ER_LOCK_DEADLOCK error by a concurrent ALTER statement, when this transaction tried to repeat use of a table, which it has already used in a similar fashion before ALTER started. The second bug showed up as a deadlock between table-level locks and InnoDB row locks, which was "detected" only after innodb_lock_wait_timeout timeout. A transaction would start using the table and modify a few rows. Then ALTER TABLE would come in, and start copying rows into a temporary table. Eventually it would stumble on the modified records and get blocked on a row lock. The first transaction would try to do more updates, and get blocked on thr_lock.c lock. This situation of circular wait would only get resolved by a timeout. Both these bugs stemmed from inadequate solutions to the problem of deadlocks occurring between different locking subsystems. In the first case we tried to avoid deadlocks between metadata locking and table-level locking subsystems, when upgrading shared metadata lock to exclusive one. Transactions holding the shared lock on the table and waiting for some table-level lock used to be aborted too aggressively. We also allowed ALTER TABLE to start in presence of transactions that modify the subject table. ALTER TABLE acquires TL_WRITE_ALLOW_READ lock at start, and that block all writes against the table (naturally, we don't want any writes to be lost when switching the old and the new table). TL_WRITE_ALLOW_READ lock, in turn, would block the started transaction on thr_lock.c lock, should they do more updates. This, again, lead to the need to abort such transactions. The second bug occurred simply because we didn't have any mechanism to detect deadlocks between the table-level locks in thr_lock.c and row-level locks in InnoDB, other than innodb_lock_wait_timeout. This patch solves both these problems by moving lock conflicts which are causing these deadlocks into the metadata locking subsystem, thus making it possible to avoid or detect such deadlocks inside MDL. To do this we introduce new type-of-operation-aware metadata locks, which allow MDL subsystem to know not only the fact that transaction has used or is going to use some object but also what kind of operation it has carried out or going to carry out on the object. This, along with the addition of a special kind of upgradable metadata lock, allows ALTER TABLE to wait until all transactions which has updated the table to go away. This solves the second issue. Another special type of upgradable metadata lock is acquired by LOCK TABLE WRITE. This second lock type allows to solve the first issue, since abortion of table-level locks in event of DDL under LOCK TABLES becomes also unnecessary. Below follows the list of incompatible changes introduced by this patch: - From now on, ALTER TABLE and CREATE/DROP TRIGGER SQL (i.e. those statements that acquire TL_WRITE_ALLOW_READ lock) wait for all transactions which has *updated* the table to complete. - From now on, LOCK TABLES ... WRITE, REPAIR/OPTIMIZE TABLE (i.e. all statements which acquire TL_WRITE table-level lock) wait for all transaction which *updated or read* from the table to complete. As a consequence, innodb_table_locks=0 option no longer applies to LOCK TABLES ... WRITE. - DROP DATABASE, DROP TABLE, RENAME TABLE no longer abort statements or transactions which use tables being dropped or renamed, and instead wait for these transactions to complete. - Since LOCK TABLES WRITE now takes a special metadata lock, not compatible with with reads or writes against the subject table and transaction-wide, thr_lock.c deadlock avoidance algorithm that used to ensure absence of deadlocks between LOCK TABLES WRITE and other statements is no longer sufficient, even for MyISAM. The wait-for graph based deadlock detector of MDL subsystem may sometimes be necessary and is involved. This may lead to ER_LOCK_DEADLOCK error produced for multi-statement transactions even if these only use MyISAM: session 1: session 2: begin; update t1 ... lock table t2 write, t1 write; -- gets a lock on t2, blocks on t1 update t2 ... (ER_LOCK_DEADLOCK) - Finally, support of LOW_PRIORITY option for LOCK TABLES ... WRITE was abandoned. LOCK TABLE ... LOW_PRIORITY WRITE from now on has the same priority as the usual LOCK TABLE ... WRITE. SELECT HIGH PRIORITY no longer trumps LOCK TABLE ... WRITE in the wait queue. - We do not take upgradable metadata locks on implicitly locked tables. So if one has, say, a view v1 that uses table t1, and issues: LOCK TABLE v1 WRITE; FLUSH TABLE t1; -- (or just 'FLUSH TABLES'), an error is produced. In order to be able to perform DDL on a table under LOCK TABLES, the table must be locked explicitly in the LOCK TABLES list.
2010-02-01 12:43:06 +01:00
mdl_request.init(MDL_key::TABLE, db, table_name,
(lock_type >= TL_WRITE_ALLOW_WRITE) ?
MDL_SHARED_WRITE : MDL_SHARED_READ);
}
/*
List of tables local to a subquery (used by SQL_LIST). Considers
views as leaves (unlike 'next_leaf' below). Created at parse time
in st_select_lex::add_table_to_list() -> table_list.link_in_list().
*/
TABLE_LIST *next_local;
2004-07-16 00:15:55 +02:00
/* link in a global list of all queries tables */
TABLE_LIST *next_global, **prev_global;
char *db, *alias, *table_name, *schema_table_name;
char *option; /* Used by cache index */
Item *on_expr; /* Used with outer join */
/*
The structure of ON expression presented in the member above
can be changed during certain optimizations. This member
contains a snapshot of AND-OR structure of the ON expression
made after permanent transformations of the parse tree, and is
used to restore ON clause before every reexecution of a prepared
statement or stored procedure.
*/
Item *prep_on_expr;
COND_EQUAL *cond_equal; /* Used with outer join */
/*
During parsing - left operand of NATURAL/USING join where 'this' is
the right operand. After parsing (this->natural_join == this) iff
'this' represents a NATURAL or USING join operation. Thus after
parsing 'this' is a NATURAL/USING join iff (natural_join != NULL).
*/
TABLE_LIST *natural_join;
/*
True if 'this' represents a nested join that is a NATURAL JOIN.
For one of the operands of 'this', the member 'natural_join' points
to the other operand of 'this'.
*/
bool is_natural_join;
/* Field names in a USING clause for JOIN ... USING. */
List<String> *join_using_fields;
/*
Explicitly store the result columns of either a NATURAL/USING join or
an operand of such a join.
*/
List<Natural_join_column> *join_columns;
/* TRUE if join_columns contains all columns of this table reference. */
bool is_join_columns_complete;
/*
List of nodes in a nested join tree, that should be considered as
leaves with respect to name resolution. The leaves are: views,
top-most nodes representing NATURAL/USING joins, subqueries, and
base tables. All of these TABLE_LIST instances contain a
materialized list of columns. The list is local to a subquery.
*/
TABLE_LIST *next_name_resolution_table;
/* Index names in a "... JOIN ... USE/IGNORE INDEX ..." clause. */
2007-07-23 18:09:48 +02:00
List<Index_hint> *index_hints;
TABLE *table; /* opened table */
uint table_id; /* table id (from binlog) for opened table */
/*
select_result for derived table to pass it from table creation to table
filling procedure
*/
select_union *derived_result;
2004-07-16 00:15:55 +02:00
/*
Reference from aux_tables to local list entry of main select of
multi-delete statement:
delete t1 from t2,t1 where t1.a<'B' and t2.b=t1.b;
here it will be reference of first occurrence of t1 to second (as you
can see this lists can't be merged)
*/
TABLE_LIST *correspondent_table;
/**
@brief Normally, this field is non-null for anonymous derived tables only.
@details This field is set to non-null for
- Anonymous derived tables, In this case it points to the SELECT_LEX_UNIT
representing the derived table. E.g. for a query
@verbatim SELECT * FROM (SELECT a FROM t1) b @endverbatim
For the @c TABLE_LIST representing the derived table @c b, @c derived
points to the SELECT_LEX_UNIT representing the result of the query within
parenteses.
- Views. This is set for views with @verbatim ALGORITHM = TEMPTABLE
@endverbatim by mysql_make_view().
@note Inside views, a subquery in the @c FROM clause is not allowed.
@note Do not use this field to separate views/base tables/anonymous
derived tables. Use TABLE_LIST::is_anonymous_derived_table().
*/
2004-07-16 00:15:55 +02:00
st_select_lex_unit *derived; /* SELECT_LEX_UNIT of derived table */
ST_SCHEMA_TABLE *schema_table; /* Information_schema table */
st_select_lex *schema_select_lex;
/*
True when the view field translation table is used to convert
schema table fields for backwards compatibility with SHOW command.
*/
bool schema_table_reformed;
TMP_TABLE_PARAM *schema_table_param;
2004-07-16 00:15:55 +02:00
/* link to select_lex where this table was used */
st_select_lex *select_lex;
LEX *view; /* link on VIEW lex for merging */
Field_translator *field_translation; /* array of VIEW fields */
/* pointer to element after last one in translation table above */
Field_translator *field_translation_end;
/*
List (based on next_local) of underlying tables of this view. I.e. it
does not include the tables of subqueries used in the view. Is set only
for merged views.
*/
TABLE_LIST *merge_underlying_list;
/*
- 0 for base tables
- in case of the view it is the list of all (not only underlying
tables but also used in subquery ones) tables of the view.
*/
List<TABLE_LIST> *view_tables;
/* most upper view this table belongs to */
TABLE_LIST *belong_to_view;
/*
The view directly referencing this table
(non-zero only for merged underlying tables of a view).
*/
TABLE_LIST *referencing_view;
Bug#26379 - Combination of FLUSH TABLE and REPAIR TABLE corrupts a MERGE table Bug 26867 - LOCK TABLES + REPAIR + merge table result in memory/cpu hogging Bug 26377 - Deadlock with MERGE and FLUSH TABLE Bug 25038 - Waiting TRUNCATE Bug 25700 - merge base tables get corrupted by optimize/analyze/repair table Bug 30275 - Merge tables: flush tables or unlock tables causes server to crash Bug 19627 - temporary merge table locking Bug 27660 - Falcon: merge table possible Bug 30273 - merge tables: Can't lock file (errno: 155) The problems were: Bug 26379 - Combination of FLUSH TABLE and REPAIR TABLE corrupts a MERGE table 1. A thread trying to lock a MERGE table performs busy waiting while REPAIR TABLE or a similar table administration task is ongoing on one or more of its MyISAM tables. 2. A thread trying to lock a MERGE table performs busy waiting until all threads that did REPAIR TABLE or similar table administration tasks on one or more of its MyISAM tables in LOCK TABLES segments do UNLOCK TABLES. The difference against problem #1 is that the busy waiting takes place *after* the administration task. It is terminated by UNLOCK TABLES only. 3. Two FLUSH TABLES within a LOCK TABLES segment can invalidate the lock. This does *not* require a MERGE table. The first FLUSH TABLES can be replaced by any statement that requires other threads to reopen the table. In 5.0 and 5.1 a single FLUSH TABLES can provoke the problem. Bug 26867 - LOCK TABLES + REPAIR + merge table result in memory/cpu hogging Trying DML on a MERGE table, which has a child locked and repaired by another thread, made an infinite loop in the server. Bug 26377 - Deadlock with MERGE and FLUSH TABLE Locking a MERGE table and its children in parent-child order and flushing the child deadlocked the server. Bug 25038 - Waiting TRUNCATE Truncating a MERGE child, while the MERGE table was in use, let the truncate fail instead of waiting for the table to become free. Bug 25700 - merge base tables get corrupted by optimize/analyze/repair table Repairing a child of an open MERGE table corrupted the child. It was necessary to FLUSH the child first. Bug 30275 - Merge tables: flush tables or unlock tables causes server to crash Flushing and optimizing locked MERGE children crashed the server. Bug 19627 - temporary merge table locking Use of a temporary MERGE table with non-temporary children could corrupt the children. Temporary tables are never locked. So we do now prohibit non-temporary chidlren of a temporary MERGE table. Bug 27660 - Falcon: merge table possible It was possible to create a MERGE table with non-MyISAM children. Bug 30273 - merge tables: Can't lock file (errno: 155) This was a Windows-only bug. Table administration statements sometimes failed with "Can't lock file (errno: 155)". These bugs are fixed by a new implementation of MERGE table open. When opening a MERGE table in open_tables() we do now add the child tables to the list of tables to be opened by open_tables() (the "query_list"). The children are not opened in the handler at this stage. After opening the parent, open_tables() opens each child from the now extended query_list. When the last child is opened, we remove the children from the query_list again and attach the children to the parent. This behaves similar to the old open. However it does not open the MyISAM tables directly, but grabs them from the already open children. When closing a MERGE table in close_thread_table() we detach the children only. Closing of the children is done implicitly because they are in thd->open_tables. For more detail see the comment at the top of ha_myisammrg.cc. Changed from open_ltable() to open_and_lock_tables() in all places that can be relevant for MERGE tables. The latter can handle tables added to the list on the fly. When open_ltable() was used in a loop over a list of tables, the list must be temporarily terminated after every table for open_and_lock_tables(). table_list->required_type is set to FRMTYPE_TABLE to avoid open of special tables. Handling of derived tables is suppressed. These details are handled by the new function open_n_lock_single_table(), which has nearly the same signature as open_ltable() and can replace it in most cases. In reopen_tables() some of the tables open by a thread can be closed and reopened. When a MERGE child is affected, the parent must be closed and reopened too. Closing of the parent is forced before the first child is closed. Reopen happens in the order of thd->open_tables. MERGE parents do not attach their children automatically at open. This is done after all tables are reopened. So all children are open when attaching them. Special lock handling like mysql_lock_abort() or mysql_lock_remove() needs to be suppressed for MERGE children or forwarded to the parent. This depends on the situation. In loops over all open tables one suppresses child lock handling. When a single table is touched, forwarding is done. Behavioral changes: =================== This patch changes the behavior of temporary MERGE tables. Temporary MERGE must have temporary children. The old behavior was wrong. A temporary table is not locked. Hence even non-temporary children were not locked. See Bug 19627 - temporary merge table locking. You cannot change the union list of a non-temporary MERGE table when LOCK TABLES is in effect. The following does *not* work: CREATE TABLE m1 ... ENGINE=MRG_MYISAM ...; LOCK TABLES t1 WRITE, t2 WRITE, m1 WRITE; ALTER TABLE m1 ... UNION=(t1,t2) ...; However, you can do this with a temporary MERGE table. You cannot create a MERGE table with CREATE ... SELECT, neither as a temporary MERGE table, nor as a non-temporary MERGE table. CREATE TABLE m1 ... ENGINE=MRG_MYISAM ... SELECT ...; Gives error message: table is not BASE TABLE.
2007-11-15 20:25:43 +01:00
/* Ptr to parent MERGE table list item. See top comment in ha_myisammrg.cc */
TABLE_LIST *parent_l;
/*
2005-10-29 11:11:34 +02:00
Security context (non-zero only for tables which belong
to view with SQL SECURITY DEFINER)
*/
Security_context *security_ctx;
/*
2005-10-29 11:11:34 +02:00
This view security context (non-zero only for views with
SQL SECURITY DEFINER)
*/
Security_context *view_sctx;
/*
List of all base tables local to a subquery including all view
tables. Unlike 'next_local', this in this list views are *not*
leaves. Created in setup_tables() -> make_leaves_list().
*/
bool allowed_show;
TABLE_LIST *next_leaf;
2004-07-16 00:15:55 +02:00
Item *where; /* VIEW WHERE clause condition */
2004-09-03 14:18:40 +02:00
Item *check_option; /* WITH CHECK OPTION condition */
Patch for the following bugs: - BUG#11986: Stored routines and triggers can fail if the code has a non-ascii symbol - BUG#16291: mysqldump corrupts string-constants with non-ascii-chars - BUG#19443: INFORMATION_SCHEMA does not support charsets properly - BUG#21249: Character set of SP-var can be ignored - BUG#25212: Character set of string constant is ignored (stored routines) - BUG#25221: Character set of string constant is ignored (triggers) There were a few general problems that caused these bugs: 1. Character set information of the original (definition) query for views, triggers, stored routines and events was lost. 2. mysqldump output query in client character set, which can be inappropriate to encode definition-query. 3. INFORMATION_SCHEMA used strings with mixed encodings to display object definition; 1. No query-definition-character set. In order to compile query into execution code, some extra data (such as environment variables or the database character set) is used. The problem here was that this context was not preserved. So, on the next load it can differ from the original one, thus the result will be different. The context contains the following data: - client character set; - connection collation (character set and collation); - collation of the owner database; The fix is to store this context and use it each time we parse (compile) and execute the object (stored routine, trigger, ...). 2. Wrong mysqldump-output. The original query can contain several encodings (by means of character set introducers). The problem here was that we tried to convert original query to the mysqldump-client character set. Moreover, we stored queries in different character sets for different objects (views, for one, used UTF8, triggers used original character set). The solution is - to store definition queries in the original character set; - to change SHOW CREATE statement to output definition query in the binary character set (i.e. without any conversion); - introduce SHOW CREATE TRIGGER statement; - to dump special statements to switch the context to the original one before dumping and restore it afterwards. Note, in order to preserve the database collation at the creation time, additional ALTER DATABASE might be used (to temporary switch the database collation back to the original value). In this case, ALTER DATABASE privilege will be required. This is a backward-incompatible change. 3. INFORMATION_SCHEMA showed non-UTF8 strings The fix is to generate UTF8-query during the parsing, store it in the object and show it in the INFORMATION_SCHEMA. Basically, the idea is to create a copy of the original query convert it to UTF8. Character set introducers are removed and all text literals are converted to UTF8. This UTF8 query is intended to provide user-readable output. It must not be used to recreate the object. Specialized SHOW CREATE statements should be used for this. The reason for this limitation is the following: the original query can contain symbols from several character sets (by means of character set introducers). Example: - original query: CREATE VIEW v1 AS SELECT _cp1251 'Hello' AS c1; - UTF8 query (for INFORMATION_SCHEMA): CREATE VIEW v1 AS SELECT 'Hello' AS c1;
2007-06-28 19:34:54 +02:00
LEX_STRING select_stmt; /* text of (CREATE/SELECT) statement */
LEX_STRING md5; /* md5 of query text */
2004-07-16 00:15:55 +02:00
LEX_STRING source; /* source of CREATE VIEW */
LEX_STRING view_db; /* saved view database */
LEX_STRING view_name; /* saved view name */
2004-07-16 00:15:55 +02:00
LEX_STRING timestamp; /* GMT time stamp of last operation */
st_lex_user definer; /* definer of view */
2004-07-16 00:15:55 +02:00
ulonglong file_version; /* version of file's field set */
2004-09-03 20:38:01 +02:00
ulonglong updatable_view; /* VIEW can be updated */
/**
@brief The declared algorithm, if this is a view.
@details One of
- VIEW_ALGORITHM_UNDEFINED
- VIEW_ALGORITHM_TMPTABLE
- VIEW_ALGORITHM_MERGE
@to do Replace with an enum
*/
ulonglong algorithm;
ulonglong view_suid; /* view is suid (TRUE dy default) */
2004-09-03 14:18:40 +02:00
ulonglong with_check; /* WITH CHECK OPTION */
/*
effective value of WITH CHECK OPTION (differ for temporary table
algorithm)
*/
uint8 effective_with_check;
/**
@brief The view algorithm that is actually used, if this is a view.
@details One of
- VIEW_ALGORITHM_UNDEFINED
- VIEW_ALGORITHM_TMPTABLE
- VIEW_ALGORITHM_MERGE
@to do Replace with an enum
*/
uint8 effective_algorithm;
2004-07-16 00:15:55 +02:00
GRANT_INFO grant;
sql/ha_innodb.cc enabled query cache for ndb modified engine interface somewhat sql/ha_innodb.h enabled query cache for ndb modified engine interface somewhat sql/ha_ndbcluster.cc enabled query cache for ndb modified engine interface somewhat ndb will only allow caching and retrieval if running autocommit - return false, but do not invalidate commit count is used as engine data, i.e. - store commit count before store of cache - allow retrieval if commit count has not changed on a table - invalidate if commit count has changed sql/ha_ndbcluster.h enabled query cache for ndb modified engine interface somewhat sql/handler.cc enabled query cache for ndb modified engine interface somewhat sql/handler.h enabled query cache for ndb modified engine interface somewhat new virtual handler method cached_table_registration called on each table before alowing store in query cache - return TRUE - ok to cache, FALSE - not allowed to cache, invalidate queries if engine_data below has changed - sets ulonglong (engine_data) that is stored in query cache for each table - sets callback to be called for each table before usage of cached query, callback = 0 -> no check later sql/mysql_priv.h enabled query cache for ndb modified engine interface somewhat callcack prototype for callback to engine before query cache retrieval sql/sql_cache.cc enabled query cache for ndb modified engine interface somewhat if callback is set on table in cache, do callback to check if allowed to use cache if not allowed to use cache, check if engine_data has changed, if so, invalidate all queries with that table + changes to store and pass callback and engine_data around sql/sql_cache.h enabled query cache for ndb modified engine interface somewhat changes to store callback and engine_data sql/table.h enabled query cache for ndb modified engine interface somewhat changes to store callback and engine_data
2004-11-24 12:56:51 +01:00
/* data need by some engines in query cache*/
ulonglong engine_data;
/* call back function for asking handler about caching in query cache */
qc_engine_callback callback_func;
thr_lock_type lock_type;
uint outer_join; /* Which join type */
2004-06-25 15:52:01 +02:00
uint shared; /* Used in multi-upd */
WL#3817: Simplify string / memory area types and make things more consistent (first part) The following type conversions was done: - Changed byte to uchar - Changed gptr to uchar* - Change my_string to char * - Change my_size_t to size_t - Change size_s to size_t Removed declaration of byte, gptr, my_string, my_size_t and size_s. Following function parameter changes was done: - All string functions in mysys/strings was changed to use size_t instead of uint for string lengths. - All read()/write() functions changed to use size_t (including vio). - All protocoll functions changed to use size_t instead of uint - Functions that used a pointer to a string length was changed to use size_t* - Changed malloc(), free() and related functions from using gptr to use void * as this requires fewer casts in the code and is more in line with how the standard functions work. - Added extra length argument to dirname_part() to return the length of the created string. - Changed (at least) following functions to take uchar* as argument: - db_dump() - my_net_write() - net_write_command() - net_store_data() - DBUG_DUMP() - decimal2bin() & bin2decimal() - Changed my_compress() and my_uncompress() to use size_t. Changed one argument to my_uncompress() from a pointer to a value as we only return one value (makes function easier to use). - Changed type of 'pack_data' argument to packfrm() to avoid casts. - Changed in readfrm() and writefrom(), ha_discover and handler::discover() the type for argument 'frmdata' to uchar** to avoid casts. - Changed most Field functions to use uchar* instead of char* (reduced a lot of casts). - Changed field->val_xxx(xxx, new_ptr) to take const pointers. Other changes: - Removed a lot of not needed casts - Added a few new cast required by other changes - Added some cast to my_multi_malloc() arguments for safety (as string lengths needs to be uint, not size_t). - Fixed all calls to hash-get-key functions to use size_t*. (Needed to be done explicitely as this conflict was often hided by casting the function to hash_get_key). - Changed some buffers to memory regions to uchar* to avoid casts. - Changed some string lengths from uint to size_t. - Changed field->ptr to be uchar* instead of char*. This allowed us to get rid of a lot of casts. - Some changes from true -> TRUE, false -> FALSE, unsigned char -> uchar - Include zlib.h in some files as we needed declaration of crc32() - Changed MY_FILE_ERROR to be (size_t) -1. - Changed many variables to hold the result of my_read() / my_write() to be size_t. This was needed to properly detect errors (which are returned as (size_t) -1). - Removed some very old VMS code - Changed packfrm()/unpackfrm() to not be depending on uint size (portability fix) - Removed windows specific code to restore cursor position as this causes slowdown on windows and we should not mix read() and pread() calls anyway as this is not thread safe. Updated function comment to reflect this. Changed function that depended on original behavior of my_pwrite() to itself restore the cursor position (one such case). - Added some missing checking of return value of malloc(). - Changed definition of MOD_PAD_CHAR_TO_FULL_LENGTH to avoid 'long' overflow. - Changed type of table_def::m_size from my_size_t to ulong to reflect that m_size is the number of elements in the array, not a string/memory length. - Moved THD::max_row_length() to table.cc (as it's not depending on THD). Inlined max_row_length_blob() into this function. - More function comments - Fixed some compiler warnings when compiled without partitions. - Removed setting of LEX_STRING() arguments in declaration (portability fix). - Some trivial indentation/variable name changes. - Some trivial code simplifications: - Replaced some calls to alloc_root + memcpy to use strmake_root()/strdup_root(). - Changed some calls from memdup() to strmake() (Safety fix) - Simpler loops in client-simple.c
2007-05-10 11:59:39 +02:00
size_t db_length;
size_t table_name_length;
bool updatable; /* VIEW/TABLE can be updated now */
bool straight; /* optimize with prev table */
bool updating; /* for replicate-do/ignore table */
bool force_index; /* prefer index over table scan */
bool ignore_leaves; /* preload only non-leaf nodes */
table_map dep_tables; /* tables the table depends on */
table_map on_expr_dep_tables; /* tables on expression depends on */
struct st_nested_join *nested_join; /* if the element is a nested join */
TABLE_LIST *embedding; /* nested join containing the table */
List<TABLE_LIST> *join_list;/* join list the table belongs to */
2004-06-25 15:52:01 +02:00
bool cacheable_table; /* stop PS caching */
/* used in multi-upd/views privilege check */
2004-07-16 00:15:55 +02:00
bool table_in_first_from_clause;
/**
Specifies which kind of table should be open for this element
of table list.
*/
enum enum_open_type open_type;
/* TRUE if this merged view contain auto_increment field */
2004-07-16 00:15:55 +02:00
bool contain_auto_increment;
bool multitable_view; /* TRUE iff this is multitable view */
bool compact_view_format; /* Use compact format for SHOW CREATE VIEW */
/* view where processed */
bool where_processed;
/* TRUE <=> VIEW CHECK OPTION expression has been processed */
bool check_option_processed;
/* FRMTYPE_ERROR if any type is acceptable */
enum frm_type_enum required_type;
handlerton *db_type; /* table_type for handler */
2004-07-16 00:15:55 +02:00
char timestamp_buffer[20]; /* buffer for timestamp (19+1) */
/*
This TABLE_LIST object is just placeholder for prelocking, it will be
used for implicit LOCK TABLES only and won't be used in real statement.
*/
bool prelocking_placeholder;
/**
Indicates that if TABLE_LIST object corresponds to the table/view
Backport of revno ## 2617.31.1, 2617.31.3, 2617.31.4, 2617.31.5, 2617.31.12, 2617.31.15, 2617.31.15, 2617.31.16, 2617.43.1 - initial changeset that introduced the fix for Bug#989 and follow up fixes for all test suite failures introduced in the initial changeset. ------------------------------------------------------------ revno: 2617.31.1 committer: Davi Arnaut <Davi.Arnaut@Sun.COM> branch nick: 4284-6.0 timestamp: Fri 2009-03-06 19:17:00 -0300 message: Bug#989: If DROP TABLE while there's an active transaction, wrong binlog order WL#4284: Transactional DDL locking Currently the MySQL server does not keep metadata locks on schema objects for the duration of a transaction, thus failing to guarantee the integrity of the schema objects being used during the transaction and to protect then from concurrent DDL operations. This also poses a problem for replication as a DDL operation might be replicated even thought there are active transactions using the object being modified. The solution is to defer the release of metadata locks until a active transaction is either committed or rolled back. This prevents other statements from modifying the table for the entire duration of the transaction. This provides commitment ordering for guaranteeing serializability across multiple transactions. - Incompatible change: If MySQL's metadata locking system encounters a lock conflict, the usual schema is to use the try and back-off technique to avoid deadlocks -- this schema consists in releasing all locks and trying to acquire them all in one go. But in a transactional context this algorithm can't be utilized as its not possible to release locks acquired during the course of the transaction without breaking the transaction commitments. To avoid deadlocks in this case, the ER_LOCK_DEADLOCK will be returned if a lock conflict is encountered during a transaction. Let's consider an example: A transaction has two statements that modify table t1, then table t2, and then commits. The first statement of the transaction will acquire a shared metadata lock on table t1, and it will be kept utill COMMIT to ensure serializability. At the moment when the second statement attempts to acquire a shared metadata lock on t2, a concurrent ALTER or DROP statement might have locked t2 exclusively. The prescription of the current locking protocol is that the acquirer of the shared lock backs off -- gives up all his current locks and retries. This implies that the entire multi-statement transaction has to be rolled back. - Incompatible change: FLUSH commands such as FLUSH PRIVILEGES and FLUSH TABLES WITH READ LOCK won't cause locked tables to be implicitly unlocked anymore.
2009-12-05 00:02:48 +01:00
which requires special handling.
Initial import of WL#3726 "DDL locking for all metadata objects". Backport of: ------------------------------------------------------------ revno: 2630.4.1 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Fri 2008-05-23 17:54:03 +0400 message: WL#3726 "DDL locking for all metadata objects". After review fixes in progress. ------------------------------------------------------------ This is the first patch in series. It transforms the metadata locking subsystem to use a dedicated module (mdl.h,cc). No significant changes in the locking protocol. The import passes the test suite with the exception of deprecated/removed 6.0 features, and MERGE tables. The latter are subject to a fix by WL#4144. Unfortunately, the original changeset comments got lost in a merge, thus this import has its own (largely insufficient) comments. This patch fixes Bug#25144 "replication / binlog with view breaks". Warning: this patch introduces an incompatible change: Under LOCK TABLES, it's no longer possible to FLUSH a table that was not locked for WRITE. Under LOCK TABLES, it's no longer possible to DROP a table or VIEW that was not locked for WRITE. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.2 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 14:03:45 +0400 message: WL#3726 "DDL locking for all metadata objects". After review fixes in progress. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.3 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 14:08:51 +0400 message: WL#3726 "DDL locking for all metadata objects" Fixed failing Windows builds by adding mdl.cc to the lists of files needed to build server/libmysqld on Windows. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.4 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 21:57:58 +0400 message: WL#3726 "DDL locking for all metadata objects". Fix for assert failures in kill.test which occured when one tried to kill ALTER TABLE statement on merge table while it was waiting in wait_while_table_is_used() for other connections to close this table. These assert failures stemmed from the fact that cleanup code in this case assumed that temporary table representing new version of table was open with adding to THD::temporary_tables list while code which were opening this temporary table wasn't always fulfilling this. This patch changes code that opens new version of table to always do this linking in. It also streamlines cleanup process for cases when error occurs while we have new version of table open. ****** WL#3726 "DDL locking for all metadata objects" Add libmysqld/mdl.cc to .bzrignore. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.6 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sun 2008-05-25 00:33:22 +0400 message: WL#3726 "DDL locking for all metadata objects". Addition to the fix of assert failures in kill.test caused by changes for this worklog. Make sure we close the new table only once.
2009-11-30 16:55:03 +01:00
*/
enum
{
Backport of revno ## 2617.31.1, 2617.31.3, 2617.31.4, 2617.31.5, 2617.31.12, 2617.31.15, 2617.31.15, 2617.31.16, 2617.43.1 - initial changeset that introduced the fix for Bug#989 and follow up fixes for all test suite failures introduced in the initial changeset. ------------------------------------------------------------ revno: 2617.31.1 committer: Davi Arnaut <Davi.Arnaut@Sun.COM> branch nick: 4284-6.0 timestamp: Fri 2009-03-06 19:17:00 -0300 message: Bug#989: If DROP TABLE while there's an active transaction, wrong binlog order WL#4284: Transactional DDL locking Currently the MySQL server does not keep metadata locks on schema objects for the duration of a transaction, thus failing to guarantee the integrity of the schema objects being used during the transaction and to protect then from concurrent DDL operations. This also poses a problem for replication as a DDL operation might be replicated even thought there are active transactions using the object being modified. The solution is to defer the release of metadata locks until a active transaction is either committed or rolled back. This prevents other statements from modifying the table for the entire duration of the transaction. This provides commitment ordering for guaranteeing serializability across multiple transactions. - Incompatible change: If MySQL's metadata locking system encounters a lock conflict, the usual schema is to use the try and back-off technique to avoid deadlocks -- this schema consists in releasing all locks and trying to acquire them all in one go. But in a transactional context this algorithm can't be utilized as its not possible to release locks acquired during the course of the transaction without breaking the transaction commitments. To avoid deadlocks in this case, the ER_LOCK_DEADLOCK will be returned if a lock conflict is encountered during a transaction. Let's consider an example: A transaction has two statements that modify table t1, then table t2, and then commits. The first statement of the transaction will acquire a shared metadata lock on table t1, and it will be kept utill COMMIT to ensure serializability. At the moment when the second statement attempts to acquire a shared metadata lock on t2, a concurrent ALTER or DROP statement might have locked t2 exclusively. The prescription of the current locking protocol is that the acquirer of the shared lock backs off -- gives up all his current locks and retries. This implies that the entire multi-statement transaction has to be rolled back. - Incompatible change: FLUSH commands such as FLUSH PRIVILEGES and FLUSH TABLES WITH READ LOCK won't cause locked tables to be implicitly unlocked anymore.
2009-12-05 00:02:48 +01:00
/* Normal open. */
OPEN_NORMAL= 0,
/* Associate a table share only if the the table exists. */
OPEN_IF_EXISTS,
/* Don't associate a table share. */
OPEN_STUB
} open_strategy;
/**
Indicates the locking strategy for the object being opened.
Backport of revno ## 2617.31.1, 2617.31.3, 2617.31.4, 2617.31.5, 2617.31.12, 2617.31.15, 2617.31.15, 2617.31.16, 2617.43.1 - initial changeset that introduced the fix for Bug#989 and follow up fixes for all test suite failures introduced in the initial changeset. ------------------------------------------------------------ revno: 2617.31.1 committer: Davi Arnaut <Davi.Arnaut@Sun.COM> branch nick: 4284-6.0 timestamp: Fri 2009-03-06 19:17:00 -0300 message: Bug#989: If DROP TABLE while there's an active transaction, wrong binlog order WL#4284: Transactional DDL locking Currently the MySQL server does not keep metadata locks on schema objects for the duration of a transaction, thus failing to guarantee the integrity of the schema objects being used during the transaction and to protect then from concurrent DDL operations. This also poses a problem for replication as a DDL operation might be replicated even thought there are active transactions using the object being modified. The solution is to defer the release of metadata locks until a active transaction is either committed or rolled back. This prevents other statements from modifying the table for the entire duration of the transaction. This provides commitment ordering for guaranteeing serializability across multiple transactions. - Incompatible change: If MySQL's metadata locking system encounters a lock conflict, the usual schema is to use the try and back-off technique to avoid deadlocks -- this schema consists in releasing all locks and trying to acquire them all in one go. But in a transactional context this algorithm can't be utilized as its not possible to release locks acquired during the course of the transaction without breaking the transaction commitments. To avoid deadlocks in this case, the ER_LOCK_DEADLOCK will be returned if a lock conflict is encountered during a transaction. Let's consider an example: A transaction has two statements that modify table t1, then table t2, and then commits. The first statement of the transaction will acquire a shared metadata lock on table t1, and it will be kept utill COMMIT to ensure serializability. At the moment when the second statement attempts to acquire a shared metadata lock on t2, a concurrent ALTER or DROP statement might have locked t2 exclusively. The prescription of the current locking protocol is that the acquirer of the shared lock backs off -- gives up all his current locks and retries. This implies that the entire multi-statement transaction has to be rolled back. - Incompatible change: FLUSH commands such as FLUSH PRIVILEGES and FLUSH TABLES WITH READ LOCK won't cause locked tables to be implicitly unlocked anymore.
2009-12-05 00:02:48 +01:00
*/
enum
{
/*
Take metadata lock specified by 'mdl_request' member before
the object is opened. Do nothing after that.
*/
OTLS_NONE= 0,
/*
Take (exclusive) metadata lock specified by 'mdl_request' member
before object is opened. If opening is successful, downgrade to
a shared lock.
*/
OTLS_DOWNGRADE_IF_EXISTS
Backport of revno ## 2617.31.1, 2617.31.3, 2617.31.4, 2617.31.5, 2617.31.12, 2617.31.15, 2617.31.15, 2617.31.16, 2617.43.1 - initial changeset that introduced the fix for Bug#989 and follow up fixes for all test suite failures introduced in the initial changeset. ------------------------------------------------------------ revno: 2617.31.1 committer: Davi Arnaut <Davi.Arnaut@Sun.COM> branch nick: 4284-6.0 timestamp: Fri 2009-03-06 19:17:00 -0300 message: Bug#989: If DROP TABLE while there's an active transaction, wrong binlog order WL#4284: Transactional DDL locking Currently the MySQL server does not keep metadata locks on schema objects for the duration of a transaction, thus failing to guarantee the integrity of the schema objects being used during the transaction and to protect then from concurrent DDL operations. This also poses a problem for replication as a DDL operation might be replicated even thought there are active transactions using the object being modified. The solution is to defer the release of metadata locks until a active transaction is either committed or rolled back. This prevents other statements from modifying the table for the entire duration of the transaction. This provides commitment ordering for guaranteeing serializability across multiple transactions. - Incompatible change: If MySQL's metadata locking system encounters a lock conflict, the usual schema is to use the try and back-off technique to avoid deadlocks -- this schema consists in releasing all locks and trying to acquire them all in one go. But in a transactional context this algorithm can't be utilized as its not possible to release locks acquired during the course of the transaction without breaking the transaction commitments. To avoid deadlocks in this case, the ER_LOCK_DEADLOCK will be returned if a lock conflict is encountered during a transaction. Let's consider an example: A transaction has two statements that modify table t1, then table t2, and then commits. The first statement of the transaction will acquire a shared metadata lock on table t1, and it will be kept utill COMMIT to ensure serializability. At the moment when the second statement attempts to acquire a shared metadata lock on t2, a concurrent ALTER or DROP statement might have locked t2 exclusively. The prescription of the current locking protocol is that the acquirer of the shared lock backs off -- gives up all his current locks and retries. This implies that the entire multi-statement transaction has to be rolled back. - Incompatible change: FLUSH commands such as FLUSH PRIVILEGES and FLUSH TABLES WITH READ LOCK won't cause locked tables to be implicitly unlocked anymore.
2009-12-05 00:02:48 +01:00
} lock_strategy;
/* For transactional locking. */
int lock_timeout; /* NOWAIT or WAIT [X] */
bool lock_transactional; /* If transactional lock requested. */
bool internal_tmp_table;
Backport of Bug#27525 to mysql-next-mr ------------------------------------------------------------ revno: 2572.2.1 revision-id: sp1r-davi@mysql.com/endora.local-20080227225948-16317 parent: sp1r-anozdrin/alik@quad.-20080226165712-10409 committer: davi@mysql.com/endora.local timestamp: Wed 2008-02-27 19:59:48 -0300 message: Bug#27525 table not found when using multi-table-deletes with aliases over several databas Bug#30234 Unexpected behavior using DELETE with AS and USING The multi-delete statement has a documented limitation that cross-database multiple-table deletes using aliases are not supported because it fails to find the tables by alias if it belongs to a different database. The problem is that when building the list of tables to delete from, if a database name is not specified (maybe an alias) it defaults to the name of the current selected database, making impossible to to properly resolve tables by alias later. Another problem is a inconsistency of the multiple table delete syntax that permits ambiguities in a delete statement (aliases that refer to multiple different tables or vice-versa). The first step for a solution and proper implementation of the cross-databse multiple table delete is to get rid of any ambiguities in a multiple table statement. Currently, the parser is accepting multiple table delete statements that have no obvious meaning, such as: DELETE a1 FROM db1.t1 AS a1, db2.t2 AS a1; DELETE a1 AS a1 FROM db1.t1 AS a1, db2.t2 AS a1; The solution is to resolve the left part of a delete statement using the right part, if the a table on right has an alias, it must be referenced in the left using the given alias. Also, each table on the left side must match unambiguously only one table in the right side.
2009-11-10 19:48:46 +01:00
/** TRUE if an alias for this table was specified in the SQL. */
bool is_alias;
/** TRUE if the table is referred to in the statement using a fully
qualified name (<db_name>.<table_name>).
*/
bool is_fqtn;
Patch for the following bugs: - BUG#11986: Stored routines and triggers can fail if the code has a non-ascii symbol - BUG#16291: mysqldump corrupts string-constants with non-ascii-chars - BUG#19443: INFORMATION_SCHEMA does not support charsets properly - BUG#21249: Character set of SP-var can be ignored - BUG#25212: Character set of string constant is ignored (stored routines) - BUG#25221: Character set of string constant is ignored (triggers) There were a few general problems that caused these bugs: 1. Character set information of the original (definition) query for views, triggers, stored routines and events was lost. 2. mysqldump output query in client character set, which can be inappropriate to encode definition-query. 3. INFORMATION_SCHEMA used strings with mixed encodings to display object definition; 1. No query-definition-character set. In order to compile query into execution code, some extra data (such as environment variables or the database character set) is used. The problem here was that this context was not preserved. So, on the next load it can differ from the original one, thus the result will be different. The context contains the following data: - client character set; - connection collation (character set and collation); - collation of the owner database; The fix is to store this context and use it each time we parse (compile) and execute the object (stored routine, trigger, ...). 2. Wrong mysqldump-output. The original query can contain several encodings (by means of character set introducers). The problem here was that we tried to convert original query to the mysqldump-client character set. Moreover, we stored queries in different character sets for different objects (views, for one, used UTF8, triggers used original character set). The solution is - to store definition queries in the original character set; - to change SHOW CREATE statement to output definition query in the binary character set (i.e. without any conversion); - introduce SHOW CREATE TRIGGER statement; - to dump special statements to switch the context to the original one before dumping and restore it afterwards. Note, in order to preserve the database collation at the creation time, additional ALTER DATABASE might be used (to temporary switch the database collation back to the original value). In this case, ALTER DATABASE privilege will be required. This is a backward-incompatible change. 3. INFORMATION_SCHEMA showed non-UTF8 strings The fix is to generate UTF8-query during the parsing, store it in the object and show it in the INFORMATION_SCHEMA. Basically, the idea is to create a copy of the original query convert it to UTF8. Character set introducers are removed and all text literals are converted to UTF8. This UTF8 query is intended to provide user-readable output. It must not be used to recreate the object. Specialized SHOW CREATE statements should be used for this. The reason for this limitation is the following: the original query can contain symbols from several character sets (by means of character set introducers). Example: - original query: CREATE VIEW v1 AS SELECT _cp1251 'Hello' AS c1; - UTF8 query (for INFORMATION_SCHEMA): CREATE VIEW v1 AS SELECT 'Hello' AS c1;
2007-06-28 19:34:54 +02:00
/* View creation context. */
View_creation_ctx *view_creation_ctx;
/*
Attributes to save/load view creation context in/from frm-file.
Ther are required only to be able to use existing parser to load
view-definition file. As soon as the parser parsed the file, view
creation context is initialized and the attributes become redundant.
These attributes MUST NOT be used for any purposes but the parsing.
*/
LEX_STRING view_client_cs_name;
LEX_STRING view_connection_cl_name;
/*
View definition (SELECT-statement) in the UTF-form.
*/
LEX_STRING view_body_utf8;
/* End of view definition context. */
/**
Indicates what triggers we need to pre-load for this TABLE_LIST
when opening an associated TABLE. This is filled after
the parsed tree is created.
*/
uint8 trg_event_map;
/* TRUE <=> this table is a const one and was optimized away. */
bool optimized_away;
uint i_s_requested_object;
bool has_db_lookup_value;
bool has_table_lookup_value;
uint table_open_method;
enum enum_schema_table_state schema_table_state;
Initial import of WL#3726 "DDL locking for all metadata objects". Backport of: ------------------------------------------------------------ revno: 2630.4.1 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Fri 2008-05-23 17:54:03 +0400 message: WL#3726 "DDL locking for all metadata objects". After review fixes in progress. ------------------------------------------------------------ This is the first patch in series. It transforms the metadata locking subsystem to use a dedicated module (mdl.h,cc). No significant changes in the locking protocol. The import passes the test suite with the exception of deprecated/removed 6.0 features, and MERGE tables. The latter are subject to a fix by WL#4144. Unfortunately, the original changeset comments got lost in a merge, thus this import has its own (largely insufficient) comments. This patch fixes Bug#25144 "replication / binlog with view breaks". Warning: this patch introduces an incompatible change: Under LOCK TABLES, it's no longer possible to FLUSH a table that was not locked for WRITE. Under LOCK TABLES, it's no longer possible to DROP a table or VIEW that was not locked for WRITE. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.2 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 14:03:45 +0400 message: WL#3726 "DDL locking for all metadata objects". After review fixes in progress. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.3 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 14:08:51 +0400 message: WL#3726 "DDL locking for all metadata objects" Fixed failing Windows builds by adding mdl.cc to the lists of files needed to build server/libmysqld on Windows. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.4 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 21:57:58 +0400 message: WL#3726 "DDL locking for all metadata objects". Fix for assert failures in kill.test which occured when one tried to kill ALTER TABLE statement on merge table while it was waiting in wait_while_table_is_used() for other connections to close this table. These assert failures stemmed from the fact that cleanup code in this case assumed that temporary table representing new version of table was open with adding to THD::temporary_tables list while code which were opening this temporary table wasn't always fulfilling this. This patch changes code that opens new version of table to always do this linking in. It also streamlines cleanup process for cases when error occurs while we have new version of table open. ****** WL#3726 "DDL locking for all metadata objects" Add libmysqld/mdl.cc to .bzrignore. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.6 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sun 2008-05-25 00:33:22 +0400 message: WL#3726 "DDL locking for all metadata objects". Addition to the fix of assert failures in kill.test caused by changes for this worklog. Make sure we close the new table only once.
2009-11-30 16:55:03 +01:00
MDL_request mdl_request;
Initial import of WL#3726 "DDL locking for all metadata objects". Backport of: ------------------------------------------------------------ revno: 2630.4.1 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Fri 2008-05-23 17:54:03 +0400 message: WL#3726 "DDL locking for all metadata objects". After review fixes in progress. ------------------------------------------------------------ This is the first patch in series. It transforms the metadata locking subsystem to use a dedicated module (mdl.h,cc). No significant changes in the locking protocol. The import passes the test suite with the exception of deprecated/removed 6.0 features, and MERGE tables. The latter are subject to a fix by WL#4144. Unfortunately, the original changeset comments got lost in a merge, thus this import has its own (largely insufficient) comments. This patch fixes Bug#25144 "replication / binlog with view breaks". Warning: this patch introduces an incompatible change: Under LOCK TABLES, it's no longer possible to FLUSH a table that was not locked for WRITE. Under LOCK TABLES, it's no longer possible to DROP a table or VIEW that was not locked for WRITE. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.2 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 14:03:45 +0400 message: WL#3726 "DDL locking for all metadata objects". After review fixes in progress. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.3 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 14:08:51 +0400 message: WL#3726 "DDL locking for all metadata objects" Fixed failing Windows builds by adding mdl.cc to the lists of files needed to build server/libmysqld on Windows. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.4 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 21:57:58 +0400 message: WL#3726 "DDL locking for all metadata objects". Fix for assert failures in kill.test which occured when one tried to kill ALTER TABLE statement on merge table while it was waiting in wait_while_table_is_used() for other connections to close this table. These assert failures stemmed from the fact that cleanup code in this case assumed that temporary table representing new version of table was open with adding to THD::temporary_tables list while code which were opening this temporary table wasn't always fulfilling this. This patch changes code that opens new version of table to always do this linking in. It also streamlines cleanup process for cases when error occurs while we have new version of table open. ****** WL#3726 "DDL locking for all metadata objects" Add libmysqld/mdl.cc to .bzrignore. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.6 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sun 2008-05-25 00:33:22 +0400 message: WL#3726 "DDL locking for all metadata objects". Addition to the fix of assert failures in kill.test caused by changes for this worklog. Make sure we close the new table only once.
2009-11-30 16:55:03 +01:00
2004-07-16 00:15:55 +02:00
void calc_md5(char *buffer);
void set_underlying_merge();
int view_check_option(THD *thd, bool ignore_failure);
bool setup_underlying(THD *thd);
2004-11-08 00:54:23 +01:00
void cleanup_items();
Fix for: Bug #20662 "Infinite loop in CREATE TABLE IF NOT EXISTS ... SELECT with locked tables" Bug #20903 "Crash when using CREATE TABLE .. SELECT and triggers" Bug #24738 "CREATE TABLE ... SELECT is not isolated properly" Bug #24508 "Inconsistent results of CREATE TABLE ... SELECT when temporary table exists" Deadlock occured when one tried to execute CREATE TABLE IF NOT EXISTS ... SELECT statement under LOCK TABLES which held read lock on target table. Attempt to execute the same statement for already existing target table with triggers caused server crashes. Also concurrent execution of CREATE TABLE ... SELECT statement and other statements involving target table suffered from various races (some of which might've led to deadlocks). Finally, attempt to execute CREATE TABLE ... SELECT in case when a temporary table with same name was already present led to the insertion of data into this temporary table and creation of empty non-temporary table. All above problems stemmed from the old implementation of CREATE TABLE ... SELECT in which we created, opened and locked target table without any special protection in a separate step and not with the rest of tables used by this statement. This underminded deadlock-avoidance approach used in server and created window for races. It also excluded target table from prelocking causing problems with trigger execution. The patch solves these problems by implementing new approach to handling of CREATE TABLE ... SELECT for base tables. We try to open and lock table to be created at the same time as the rest of tables used by this statement. If such table does not exist at this moment we create and place in the table cache special placeholder for it which prevents its creation or any other usage by other threads. We still use old approach for creation of temporary tables. Note that we have separate fix for 5.0 since there we use slightly different less intrusive approach.
2007-05-11 19:51:03 +02:00
bool placeholder()
{
Initial import of WL#3726 "DDL locking for all metadata objects". Backport of: ------------------------------------------------------------ revno: 2630.4.1 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Fri 2008-05-23 17:54:03 +0400 message: WL#3726 "DDL locking for all metadata objects". After review fixes in progress. ------------------------------------------------------------ This is the first patch in series. It transforms the metadata locking subsystem to use a dedicated module (mdl.h,cc). No significant changes in the locking protocol. The import passes the test suite with the exception of deprecated/removed 6.0 features, and MERGE tables. The latter are subject to a fix by WL#4144. Unfortunately, the original changeset comments got lost in a merge, thus this import has its own (largely insufficient) comments. This patch fixes Bug#25144 "replication / binlog with view breaks". Warning: this patch introduces an incompatible change: Under LOCK TABLES, it's no longer possible to FLUSH a table that was not locked for WRITE. Under LOCK TABLES, it's no longer possible to DROP a table or VIEW that was not locked for WRITE. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.2 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 14:03:45 +0400 message: WL#3726 "DDL locking for all metadata objects". After review fixes in progress. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.3 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 14:08:51 +0400 message: WL#3726 "DDL locking for all metadata objects" Fixed failing Windows builds by adding mdl.cc to the lists of files needed to build server/libmysqld on Windows. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.4 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 21:57:58 +0400 message: WL#3726 "DDL locking for all metadata objects". Fix for assert failures in kill.test which occured when one tried to kill ALTER TABLE statement on merge table while it was waiting in wait_while_table_is_used() for other connections to close this table. These assert failures stemmed from the fact that cleanup code in this case assumed that temporary table representing new version of table was open with adding to THD::temporary_tables list while code which were opening this temporary table wasn't always fulfilling this. This patch changes code that opens new version of table to always do this linking in. It also streamlines cleanup process for cases when error occurs while we have new version of table open. ****** WL#3726 "DDL locking for all metadata objects" Add libmysqld/mdl.cc to .bzrignore. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.6 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sun 2008-05-25 00:33:22 +0400 message: WL#3726 "DDL locking for all metadata objects". Addition to the fix of assert failures in kill.test caused by changes for this worklog. Make sure we close the new table only once.
2009-11-30 16:55:03 +01:00
return derived || view || schema_table || !table;
Fix for: Bug #20662 "Infinite loop in CREATE TABLE IF NOT EXISTS ... SELECT with locked tables" Bug #20903 "Crash when using CREATE TABLE .. SELECT and triggers" Bug #24738 "CREATE TABLE ... SELECT is not isolated properly" Bug #24508 "Inconsistent results of CREATE TABLE ... SELECT when temporary table exists" Deadlock occured when one tried to execute CREATE TABLE IF NOT EXISTS ... SELECT statement under LOCK TABLES which held read lock on target table. Attempt to execute the same statement for already existing target table with triggers caused server crashes. Also concurrent execution of CREATE TABLE ... SELECT statement and other statements involving target table suffered from various races (some of which might've led to deadlocks). Finally, attempt to execute CREATE TABLE ... SELECT in case when a temporary table with same name was already present led to the insertion of data into this temporary table and creation of empty non-temporary table. All above problems stemmed from the old implementation of CREATE TABLE ... SELECT in which we created, opened and locked target table without any special protection in a separate step and not with the rest of tables used by this statement. This underminded deadlock-avoidance approach used in server and created window for races. It also excluded target table from prelocking causing problems with trigger execution. The patch solves these problems by implementing new approach to handling of CREATE TABLE ... SELECT for base tables. We try to open and lock table to be created at the same time as the rest of tables used by this statement. If such table does not exist at this moment we create and place in the table cache special placeholder for it which prevents its creation or any other usage by other threads. We still use old approach for creation of temporary tables. Note that we have separate fix for 5.0 since there we use slightly different less intrusive approach.
2007-05-11 19:51:03 +02:00
}
void print(THD *thd, String *str, enum_query_type query_type);
bool check_single_table(TABLE_LIST **table, table_map map,
TABLE_LIST *view);
bool set_insert_values(MEM_ROOT *mem_root);
void hide_view_error(THD *thd);
TABLE_LIST *find_underlying_table(TABLE *table);
TABLE_LIST *first_leaf_for_name_resolution();
TABLE_LIST *last_leaf_for_name_resolution();
bool is_leaf_for_name_resolution();
inline TABLE_LIST *top_table()
{ return belong_to_view ? belong_to_view : this; }
inline bool prepare_check_option(THD *thd)
{
bool res= FALSE;
if (effective_with_check)
res= prep_check_option(thd, effective_with_check);
return res;
}
inline bool prepare_where(THD *thd, Item **conds,
bool no_where_clause)
{
if (effective_algorithm == VIEW_ALGORITHM_MERGE)
return prep_where(thd, conds, no_where_clause);
return FALSE;
}
void register_want_access(ulong want_access);
bool prepare_security(THD *thd);
#ifndef NO_EMBEDDED_ACCESS_CHECKS
Security_context *find_view_security_context(THD *thd);
bool prepare_view_securety_context(THD *thd);
#endif
/*
Cleanup for re-execution in a prepared statement or a stored
procedure.
*/
void reinit_before_use(THD *thd);
Item_subselect *containing_subselect();
/*
Compiles the tagged hints list and fills up TABLE::keys_in_use_for_query,
TABLE::keys_in_use_for_group_by, TABLE::keys_in_use_for_order_by,
TABLE::force_index and TABLE::covering_keys.
*/
bool process_index_hints(TABLE *table);
/**
Compare the version of metadata from the previous execution
(if any) with values obtained from the current table
definition cache element.
@sa check_and_update_table_version()
*/
inline
bool is_table_ref_id_equal(TABLE_SHARE *s) const
{
return (m_table_ref_type == s->get_table_ref_type() &&
m_table_ref_version == s->get_table_ref_version());
}
/**
Record the value of metadata version of the corresponding
table definition cache element in this parse tree node.
@sa check_and_update_table_version()
*/
inline
void set_table_ref_id(TABLE_SHARE *s)
Backport of revid 2617.69.21, 2617.69.22, 2617.29.23: ---------------------------------------------------------- revno: 2617.69.21 committer: Konstantin Osipov <kostja@sun.com> branch nick: 5.4-4284-1-assert timestamp: Thu 2009-08-13 20:13:55 +0400 message: A fix and a test case for Bug#46610 "MySQL 5.4.4: MyISAM MRG engine crash on auto-repair of child". Also fixes Bug#42862 "Crash on failed attempt to open a children of a merge table". MERGE engine needs to extend the global table list with TABLE_LIST elements for child tables, so that they are opened and locked. Previously these table list elements were allocated in memory of ha_myisammrg object (MERGE engine handler). That would lead to access to freed memory in recover_from_failed_open_table_attempt(), which would try to recover a MERGE table child (MyISAM table) and use for that TABLE_LIST of that child. But by the time recover_from_failed_open_table_attempt() is invoked, ha_myisammrg object that owns this TABLE_LIST may be destroyed, and thus TABLE_LIST memory freed. The fix is to ensure that TABLE_LIST elements that are added to the global table list (lex->query_tables) are always allocated in thd->mem_root, which is not destroyed until end of execution. If previously TABLE_LIST elements were allocated at ha_myisammrg::open() (i.e. when the TABLE object was created and added to the table cache), now they are allocated in ha_myisammrg::add_chidlren_list() (i.e. right after "open" of the merge parent in open_tables()). We still create a list of children names at ha_myisammrg::open() to use as a basis for creation of TABLE_LISTs, that allows to avoid reading the merge handler data file on every execution.
2009-12-08 14:57:25 +01:00
{ set_table_ref_id(s->get_table_ref_type(), s->get_table_ref_version()); }
inline
void set_table_ref_id(enum_table_ref_type table_ref_type_arg,
ulong table_ref_version_arg)
{
Backport of revid 2617.69.21, 2617.69.22, 2617.29.23: ---------------------------------------------------------- revno: 2617.69.21 committer: Konstantin Osipov <kostja@sun.com> branch nick: 5.4-4284-1-assert timestamp: Thu 2009-08-13 20:13:55 +0400 message: A fix and a test case for Bug#46610 "MySQL 5.4.4: MyISAM MRG engine crash on auto-repair of child". Also fixes Bug#42862 "Crash on failed attempt to open a children of a merge table". MERGE engine needs to extend the global table list with TABLE_LIST elements for child tables, so that they are opened and locked. Previously these table list elements were allocated in memory of ha_myisammrg object (MERGE engine handler). That would lead to access to freed memory in recover_from_failed_open_table_attempt(), which would try to recover a MERGE table child (MyISAM table) and use for that TABLE_LIST of that child. But by the time recover_from_failed_open_table_attempt() is invoked, ha_myisammrg object that owns this TABLE_LIST may be destroyed, and thus TABLE_LIST memory freed. The fix is to ensure that TABLE_LIST elements that are added to the global table list (lex->query_tables) are always allocated in thd->mem_root, which is not destroyed until end of execution. If previously TABLE_LIST elements were allocated at ha_myisammrg::open() (i.e. when the TABLE object was created and added to the table cache), now they are allocated in ha_myisammrg::add_chidlren_list() (i.e. right after "open" of the merge parent in open_tables()). We still create a list of children names at ha_myisammrg::open() to use as a basis for creation of TABLE_LISTs, that allows to avoid reading the merge handler data file on every execution.
2009-12-08 14:57:25 +01:00
m_table_ref_type= table_ref_type_arg;
m_table_ref_version= table_ref_version_arg;
}
/**
@brief True if this TABLE_LIST represents an anonymous derived table,
i.e. the result of a subquery.
*/
bool is_anonymous_derived_table() const { return derived && !view; }
/**
@brief Returns the name of the database that the referenced table belongs
to.
*/
char *get_db_name() { return view != NULL ? view_db.str : db; }
/**
@brief Returns the name of the table that this TABLE_LIST represents.
@details The unqualified table name or view name for a table or view,
respectively.
*/
char *get_table_name() { return view != NULL ? view_name.str : table_name; }
private:
bool prep_check_option(THD *thd, uint8 check_opt_type);
bool prep_where(THD *thd, Item **conds, bool no_where_clause);
/** See comments for set_metadata_id() */
enum enum_table_ref_type m_table_ref_type;
/** See comments for set_metadata_id() */
ulong m_table_ref_version;
};
2004-07-16 00:15:55 +02:00
class Item;
/*
Iterator over the fields of a generic table reference.
*/
2004-07-16 00:15:55 +02:00
class Field_iterator: public Sql_alloc
{
public:
Field_iterator() {} /* Remove gcc warning */
2004-07-16 00:15:55 +02:00
virtual ~Field_iterator() {}
virtual void set(TABLE_LIST *)= 0;
virtual void next()= 0;
virtual bool end_of_fields()= 0; /* Return 1 at end of list */
2004-07-16 00:15:55 +02:00
virtual const char *name()= 0;
virtual Item *create_item(THD *)= 0;
2004-07-16 00:15:55 +02:00
virtual Field *field()= 0;
};
/*
Iterator over the fields of a base table, view with temporary
table, or subquery.
*/
2004-07-16 00:15:55 +02:00
class Field_iterator_table: public Field_iterator
{
Field **ptr;
public:
Field_iterator_table() :ptr(0) {}
void set(TABLE_LIST *table) { ptr= table->table->field; }
void set_table(TABLE *table) { ptr= table->field; }
void next() { ptr++; }
bool end_of_fields() { return *ptr == 0; }
2004-07-16 00:15:55 +02:00
const char *name();
Item *create_item(THD *thd);
2004-07-16 00:15:55 +02:00
Field *field() { return *ptr; }
};
/* Iterator over the fields of a merge view. */
2004-07-16 00:15:55 +02:00
class Field_iterator_view: public Field_iterator
{
Field_translator *ptr, *array_end;
TABLE_LIST *view;
2004-07-16 00:15:55 +02:00
public:
Field_iterator_view() :ptr(0), array_end(0) {}
void set(TABLE_LIST *table);
void next() { ptr++; }
bool end_of_fields() { return ptr == array_end; }
2004-07-16 00:15:55 +02:00
const char *name();
Item *create_item(THD *thd);
2005-01-05 15:48:23 +01:00
Item **item_ptr() {return &ptr->item; }
2004-07-16 00:15:55 +02:00
Field *field() { return 0; }
inline Item *item() { return ptr->item; }
Field_translator *field_translator() { return ptr; }
};
/*
Field_iterator interface to the list of materialized fields of a
NATURAL/USING join.
*/
class Field_iterator_natural_join: public Field_iterator
{
List_iterator_fast<Natural_join_column> column_ref_it;
Natural_join_column *cur_column_ref;
public:
Field_iterator_natural_join() :cur_column_ref(NULL) {}
~Field_iterator_natural_join() {}
void set(TABLE_LIST *table);
void next();
bool end_of_fields() { return !cur_column_ref; }
const char *name() { return cur_column_ref->name(); }
Item *create_item(THD *thd) { return cur_column_ref->create_item(thd); }
Field *field() { return cur_column_ref->field(); }
Natural_join_column *column_ref() { return cur_column_ref; }
};
/*
Generic iterator over the fields of an arbitrary table reference.
DESCRIPTION
This class unifies the various ways of iterating over the columns
of a table reference depending on the type of SQL entity it
represents. If such an entity represents a nested table reference,
this iterator encapsulates the iteration over the columns of the
members of the table reference.
IMPLEMENTATION
The implementation assumes that all underlying NATURAL/USING table
references already contain their result columns and are linked into
the list TABLE_LIST::next_name_resolution_table.
*/
class Field_iterator_table_ref: public Field_iterator
{
TABLE_LIST *table_ref, *first_leaf, *last_leaf;
Field_iterator_table table_field_it;
Field_iterator_view view_field_it;
Field_iterator_natural_join natural_join_it;
Field_iterator *field_it;
void set_field_iterator();
public:
Field_iterator_table_ref() :field_it(NULL) {}
void set(TABLE_LIST *table);
void next();
bool end_of_fields()
{ return (table_ref == last_leaf && field_it->end_of_fields()); }
const char *name() { return field_it->name(); }
const char *get_table_name();
const char *get_db_name();
GRANT_INFO *grant();
Item *create_item(THD *thd) { return field_it->create_item(thd); }
Field *field() { return field_it->field(); }
Natural_join_column *get_or_create_column_ref(THD *thd, TABLE_LIST *parent_table_ref);
Natural_join_column *get_natural_column_ref();
2004-07-16 00:15:55 +02:00
};
typedef struct st_nested_join
{
List<TABLE_LIST> join_list; /* list of elements in the nested join */
table_map used_tables; /* bitmap of tables in the nested join */
table_map not_null_tables; /* tables that rejects nulls */
struct st_join_table *first_nested;/* the first nested table in the plan */
/*
Used to count tables in the nested join in 2 isolated places:
1. In make_outerjoin_info().
2. check_interleaving_with_nj/restore_prev_nj_state (these are called
by the join optimizer.
Before each use the counters are zeroed by reset_nj_counters.
*/
uint counter;
nested_join_map nj_map; /* Bit used to identify this nested join*/
} NESTED_JOIN;
typedef struct st_changed_table_list
{
struct st_changed_table_list *next;
char *key;
uint32 key_length;
} CHANGED_TABLE_LIST;
typedef struct st_open_table_list{
struct st_open_table_list *next;
char *db,*table;
uint32 in_use,locked;
} OPEN_TABLE_LIST;
This changeset is largely a handler cleanup changeset (WL#3281), but includes fixes and cleanups that was found necessary while testing the handler changes Changes that requires code changes in other code of other storage engines. (Note that all changes are very straightforward and one should find all issues by compiling a --debug build and fixing all compiler errors and all asserts in field.cc while running the test suite), - New optional handler function introduced: reset() This is called after every DML statement to make it easy for a handler to statement specific cleanups. (The only case it's not called is if force the file to be closed) - handler::extra(HA_EXTRA_RESET) is removed. Code that was there before should be moved to handler::reset() - table->read_set contains a bitmap over all columns that are needed in the query. read_row() and similar functions only needs to read these columns - table->write_set contains a bitmap over all columns that will be updated in the query. write_row() and update_row() only needs to update these columns. The above bitmaps should now be up to date in all context (including ALTER TABLE, filesort()). The handler is informed of any changes to the bitmap after fix_fields() by calling the virtual function handler::column_bitmaps_signal(). If the handler does caching of these bitmaps (instead of using table->read_set, table->write_set), it should redo the caching in this code. as the signal() may be sent several times, it's probably best to set a variable in the signal and redo the caching on read_row() / write_row() if the variable was set. - Removed the read_set and write_set bitmap objects from the handler class - Removed all column bit handling functions from the handler class. (Now one instead uses the normal bitmap functions in my_bitmap.c instead of handler dedicated bitmap functions) - field->query_id is removed. One should instead instead check table->read_set and table->write_set if a field is used in the query. - handler::extra(HA_EXTRA_RETRIVE_ALL_COLS) and handler::extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY) are removed. One should now instead use table->read_set to check for which columns to retrieve. - If a handler needs to call Field->val() or Field->store() on columns that are not used in the query, one should install a temporary all-columns-used map while doing so. For this, we provide the following functions: my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set); field->val(); dbug_tmp_restore_column_map(table->read_set, old_map); and similar for the write map: my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->write_set); field->val(); dbug_tmp_restore_column_map(table->write_set, old_map); If this is not done, you will sooner or later hit a DBUG_ASSERT in the field store() / val() functions. (For not DBUG binaries, the dbug_tmp_restore_column_map() and dbug_tmp_restore_column_map() are inline dummy functions and should be optimized away be the compiler). - If one needs to temporary set the column map for all binaries (and not just to avoid the DBUG_ASSERT() in the Field::store() / Field::val() methods) one should use the functions tmp_use_all_columns() and tmp_restore_column_map() instead of the above dbug_ variants. - All 'status' fields in the handler base class (like records, data_file_length etc) are now stored in a 'stats' struct. This makes it easier to know what status variables are provided by the base handler. This requires some trivial variable names in the extra() function. - New virtual function handler::records(). This is called to optimize COUNT(*) if (handler::table_flags() & HA_HAS_RECORDS()) is true. (stats.records is not supposed to be an exact value. It's only has to be 'reasonable enough' for the optimizer to be able to choose a good optimization path). - Non virtual handler::init() function added for caching of virtual constants from engine. - Removed has_transactions() virtual method. Now one should instead return HA_NO_TRANSACTIONS in table_flags() if the table handler DOES NOT support transactions. - The 'xxxx_create_handler()' function now has a MEM_ROOT_root argument that is to be used with 'new handler_name()' to allocate the handler in the right area. The xxxx_create_handler() function is also responsible for any initialization of the object before returning. For example, one should change: static handler *myisam_create_handler(TABLE_SHARE *table) { return new ha_myisam(table); } -> static handler *myisam_create_handler(TABLE_SHARE *table, MEM_ROOT *mem_root) { return new (mem_root) ha_myisam(table); } - New optional virtual function: use_hidden_primary_key(). This is called in case of an update/delete when (table_flags() and HA_PRIMARY_KEY_REQUIRED_FOR_DELETE) is defined but we don't have a primary key. This allows the handler to take precisions in remembering any hidden primary key to able to update/delete any found row. The default handler marks all columns to be read. - handler::table_flags() now returns a ulonglong (to allow for more flags). - New/changed table_flags() - HA_HAS_RECORDS Set if ::records() is supported - HA_NO_TRANSACTIONS Set if engine doesn't support transactions - HA_PRIMARY_KEY_REQUIRED_FOR_DELETE Set if we should mark all primary key columns for read when reading rows as part of a DELETE statement. If there is no primary key, all columns are marked for read. - HA_PARTIAL_COLUMN_READ Set if engine will not read all columns in some cases (based on table->read_set) - HA_PRIMARY_KEY_ALLOW_RANDOM_ACCESS Renamed to HA_PRIMARY_KEY_REQUIRED_FOR_POSITION. - HA_DUPP_POS Renamed to HA_DUPLICATE_POS - HA_REQUIRES_KEY_COLUMNS_FOR_DELETE Set this if we should mark ALL key columns for read when when reading rows as part of a DELETE statement. In case of an update we will mark all keys for read for which key part changed value. - HA_STATS_RECORDS_IS_EXACT Set this if stats.records is exact. (This saves us some extra records() calls when optimizing COUNT(*)) - Removed table_flags() - HA_NOT_EXACT_COUNT Now one should instead use HA_HAS_RECORDS if handler::records() gives an exact count() and HA_STATS_RECORDS_IS_EXACT if stats.records is exact. - HA_READ_RND_SAME Removed (no one supported this one) - Removed not needed functions ha_retrieve_all_cols() and ha_retrieve_all_pk() - Renamed handler::dupp_pos to handler::dup_pos - Removed not used variable handler::sortkey Upper level handler changes: - ha_reset() now does some overall checks and calls ::reset() - ha_table_flags() added. This is a cached version of table_flags(). The cache is updated on engine creation time and updated on open. MySQL level changes (not obvious from the above): - DBUG_ASSERT() added to check that column usage matches what is set in the column usage bit maps. (This found a LOT of bugs in current column marking code). - In 5.1 before, all used columns was marked in read_set and only updated columns was marked in write_set. Now we only mark columns for which we need a value in read_set. - Column bitmaps are created in open_binary_frm() and open_table_from_share(). (Before this was in table.cc) - handler::table_flags() calls are replaced with handler::ha_table_flags() - For calling field->val() you must have the corresponding bit set in table->read_set. For calling field->store() you must have the corresponding bit set in table->write_set. (There are asserts in all store()/val() functions to catch wrong usage) - thd->set_query_id is renamed to thd->mark_used_columns and instead of setting this to an integer value, this has now the values: MARK_COLUMNS_NONE, MARK_COLUMNS_READ, MARK_COLUMNS_WRITE Changed also all variables named 'set_query_id' to mark_used_columns. - In filesort() we now inform the handler of exactly which columns are needed doing the sort and choosing the rows. - The TABLE_SHARE object has a 'all_set' column bitmap one can use when one needs a column bitmap with all columns set. (This is used for table->use_all_columns() and other places) - The TABLE object has 3 column bitmaps: - def_read_set Default bitmap for columns to be read - def_write_set Default bitmap for columns to be written - tmp_set Can be used as a temporary bitmap when needed. The table object has also two pointer to bitmaps read_set and write_set that the handler should use to find out which columns are used in which way. - count() optimization now calls handler::records() instead of using handler->stats.records (if (table_flags() & HA_HAS_RECORDS) is true). - Added extra argument to Item::walk() to indicate if we should also traverse sub queries. - Added TABLE parameter to cp_buffer_from_ref() - Don't close tables created with CREATE ... SELECT but keep them in the table cache. (Faster usage of newly created tables). New interfaces: - table->clear_column_bitmaps() to initialize the bitmaps for tables at start of new statements. - table->column_bitmaps_set() to set up new column bitmaps and signal the handler about this. - table->column_bitmaps_set_no_signal() for some few cases where we need to setup new column bitmaps but don't signal the handler (as the handler has already been signaled about these before). Used for the momement only in opt_range.cc when doing ROR scans. - table->use_all_columns() to install a bitmap where all columns are marked as use in the read and the write set. - table->default_column_bitmaps() to install the normal read and write column bitmaps, but not signaling the handler about this. This is mainly used when creating TABLE instances. - table->mark_columns_needed_for_delete(), table->mark_columns_needed_for_delete() and table->mark_columns_needed_for_insert() to allow us to put additional columns in column usage maps if handler so requires. (The handler indicates what it neads in handler->table_flags()) - table->prepare_for_position() to allow us to tell handler that it needs to read primary key parts to be able to store them in future table->position() calls. (This replaces the table->file->ha_retrieve_all_pk function) - table->mark_auto_increment_column() to tell handler are going to update columns part of any auto_increment key. - table->mark_columns_used_by_index() to mark all columns that is part of an index. It will also send extra(HA_EXTRA_KEYREAD) to handler to allow it to quickly know that it only needs to read colums that are part of the key. (The handler can also use the column map for detecting this, but simpler/faster handler can just monitor the extra() call). - table->mark_columns_used_by_index_no_reset() to in addition to other columns, also mark all columns that is used by the given key. - table->restore_column_maps_after_mark_index() to restore to default column maps after a call to table->mark_columns_used_by_index(). - New item function register_field_in_read_map(), for marking used columns in table->read_map. Used by filesort() to mark all used columns - Maintain in TABLE->merge_keys set of all keys that are used in query. (Simplices some optimization loops) - Maintain Field->part_of_key_not_clustered which is like Field->part_of_key but the field in the clustered key is not assumed to be part of all index. (used in opt_range.cc for faster loops) - dbug_tmp_use_all_columns(), dbug_tmp_restore_column_map() tmp_use_all_columns() and tmp_restore_column_map() functions to temporally mark all columns as usable. The 'dbug_' version is primarily intended inside a handler when it wants to just call Field:store() & Field::val() functions, but don't need the column maps set for any other usage. (ie:: bitmap_is_set() is never called) - We can't use compare_records() to skip updates for handlers that returns a partial column set and the read_set doesn't cover all columns in the write set. The reason for this is that if we have a column marked only for write we can't in the MySQL level know if the value changed or not. The reason this worked before was that MySQL marked all to be written columns as also to be read. The new 'optimal' bitmaps exposed this 'hidden bug'. - open_table_from_share() does not anymore setup temporary MEM_ROOT object as a thread specific variable for the handler. Instead we send the to-be-used MEMROOT to get_new_handler(). (Simpler, faster code) Bugs fixed: - Column marking was not done correctly in a lot of cases. (ALTER TABLE, when using triggers, auto_increment fields etc) (Could potentially result in wrong values inserted in table handlers relying on that the old column maps or field->set_query_id was correct) Especially when it comes to triggers, there may be cases where the old code would cause lost/wrong values for NDB and/or InnoDB tables. - Split thd->options flag OPTION_STATUS_NO_TRANS_UPDATE to two flags: OPTION_STATUS_NO_TRANS_UPDATE and OPTION_KEEP_LOG. This allowed me to remove some wrong warnings about: "Some non-transactional changed tables couldn't be rolled back" - Fixed handling of INSERT .. SELECT and CREATE ... SELECT that wrongly reset (thd->options & OPTION_STATUS_NO_TRANS_UPDATE) which caused us to loose some warnings about "Some non-transactional changed tables couldn't be rolled back") - Fixed use of uninitialized memory in ha_ndbcluster.cc::delete_table() which could cause delete_table to report random failures. - Fixed core dumps for some tests when running with --debug - Added missing FN_LIBCHAR in mysql_rm_tmp_tables() (This has probably caused us to not properly remove temporary files after crash) - slow_logs was not properly initialized, which could maybe cause extra/lost entries in slow log. - If we get an duplicate row on insert, change column map to read and write all columns while retrying the operation. This is required by the definition of REPLACE and also ensures that fields that are only part of UPDATE are properly handled. This fixed a bug in NDB and REPLACE where REPLACE wrongly copied some column values from the replaced row. - For table handler that doesn't support NULL in keys, we would give an error when creating a primary key with NULL fields, even after the fields has been automaticly converted to NOT NULL. - Creating a primary key on a SPATIAL key, would fail if field was not declared as NOT NULL. Cleanups: - Removed not used condition argument to setup_tables - Removed not needed item function reset_query_id_processor(). - Field->add_index is removed. Now this is instead maintained in (field->flags & FIELD_IN_ADD_INDEX) - Field->fieldnr is removed (use field->field_index instead) - New argument to filesort() to indicate that it should return a set of row pointers (not used columns). This allowed me to remove some references to sql_command in filesort and should also enable us to return column results in some cases where we couldn't before. - Changed column bitmap handling in opt_range.cc to be aligned with TABLE bitmap, which allowed me to use bitmap functions instead of looping over all fields to create some needed bitmaps. (Faster and smaller code) - Broke up found too long lines - Moved some variable declaration at start of function for better code readability. - Removed some not used arguments from functions. (setup_fields(), mysql_prepare_insert_check_table()) - setup_fields() now takes an enum instead of an int for marking columns usage. - For internal temporary tables, use handler::write_row(), handler::delete_row() and handler::update_row() instead of handler::ha_xxxx() for faster execution. - Changed some constants to enum's and define's. - Using separate column read and write sets allows for easier checking of timestamp field was set by statement. - Remove calls to free_io_cache() as this is now done automaticly in ha_reset() - Don't build table->normalized_path as this is now identical to table->path (after bar's fixes to convert filenames) - Fixed some missed DBUG_PRINT(.."%lx") to use "0x%lx" to make it easier to do comparision with the 'convert-dbug-for-diff' tool. Things left to do in 5.1: - We wrongly log failed CREATE TABLE ... SELECT in some cases when using row based logging (as shown by testcase binlog_row_mix_innodb_myisam.result) Mats has promised to look into this. - Test that my fix for CREATE TABLE ... SELECT is indeed correct. (I added several test cases for this, but in this case it's better that someone else also tests this throughly). Lars has promosed to do this.
2006-06-04 17:52:22 +02:00
static inline my_bitmap_map *tmp_use_all_columns(TABLE *table,
MY_BITMAP *bitmap)
{
my_bitmap_map *old= bitmap->bitmap;
bitmap->bitmap= table->s->all_set.bitmap;
return old;
}
static inline void tmp_restore_column_map(MY_BITMAP *bitmap,
my_bitmap_map *old)
{
bitmap->bitmap= old;
}
/* The following is only needed for debugging */
static inline my_bitmap_map *dbug_tmp_use_all_columns(TABLE *table,
MY_BITMAP *bitmap)
{
#ifndef DBUG_OFF
return tmp_use_all_columns(table, bitmap);
#else
return 0;
#endif
}
static inline void dbug_tmp_restore_column_map(MY_BITMAP *bitmap,
my_bitmap_map *old)
{
#ifndef DBUG_OFF
tmp_restore_column_map(bitmap, old);
#endif
}
WL#3817: Simplify string / memory area types and make things more consistent (first part) The following type conversions was done: - Changed byte to uchar - Changed gptr to uchar* - Change my_string to char * - Change my_size_t to size_t - Change size_s to size_t Removed declaration of byte, gptr, my_string, my_size_t and size_s. Following function parameter changes was done: - All string functions in mysys/strings was changed to use size_t instead of uint for string lengths. - All read()/write() functions changed to use size_t (including vio). - All protocoll functions changed to use size_t instead of uint - Functions that used a pointer to a string length was changed to use size_t* - Changed malloc(), free() and related functions from using gptr to use void * as this requires fewer casts in the code and is more in line with how the standard functions work. - Added extra length argument to dirname_part() to return the length of the created string. - Changed (at least) following functions to take uchar* as argument: - db_dump() - my_net_write() - net_write_command() - net_store_data() - DBUG_DUMP() - decimal2bin() & bin2decimal() - Changed my_compress() and my_uncompress() to use size_t. Changed one argument to my_uncompress() from a pointer to a value as we only return one value (makes function easier to use). - Changed type of 'pack_data' argument to packfrm() to avoid casts. - Changed in readfrm() and writefrom(), ha_discover and handler::discover() the type for argument 'frmdata' to uchar** to avoid casts. - Changed most Field functions to use uchar* instead of char* (reduced a lot of casts). - Changed field->val_xxx(xxx, new_ptr) to take const pointers. Other changes: - Removed a lot of not needed casts - Added a few new cast required by other changes - Added some cast to my_multi_malloc() arguments for safety (as string lengths needs to be uint, not size_t). - Fixed all calls to hash-get-key functions to use size_t*. (Needed to be done explicitely as this conflict was often hided by casting the function to hash_get_key). - Changed some buffers to memory regions to uchar* to avoid casts. - Changed some string lengths from uint to size_t. - Changed field->ptr to be uchar* instead of char*. This allowed us to get rid of a lot of casts. - Some changes from true -> TRUE, false -> FALSE, unsigned char -> uchar - Include zlib.h in some files as we needed declaration of crc32() - Changed MY_FILE_ERROR to be (size_t) -1. - Changed many variables to hold the result of my_read() / my_write() to be size_t. This was needed to properly detect errors (which are returned as (size_t) -1). - Removed some very old VMS code - Changed packfrm()/unpackfrm() to not be depending on uint size (portability fix) - Removed windows specific code to restore cursor position as this causes slowdown on windows and we should not mix read() and pread() calls anyway as this is not thread safe. Updated function comment to reflect this. Changed function that depended on original behavior of my_pwrite() to itself restore the cursor position (one such case). - Added some missing checking of return value of malloc(). - Changed definition of MOD_PAD_CHAR_TO_FULL_LENGTH to avoid 'long' overflow. - Changed type of table_def::m_size from my_size_t to ulong to reflect that m_size is the number of elements in the array, not a string/memory length. - Moved THD::max_row_length() to table.cc (as it's not depending on THD). Inlined max_row_length_blob() into this function. - More function comments - Fixed some compiler warnings when compiled without partitions. - Removed setting of LEX_STRING() arguments in declaration (portability fix). - Some trivial indentation/variable name changes. - Some trivial code simplifications: - Replaced some calls to alloc_root + memcpy to use strmake_root()/strdup_root(). - Changed some calls from memdup() to strmake() (Safety fix) - Simpler loops in client-simple.c
2007-05-10 11:59:39 +02:00
/*
Variant of the above : handle both read and write sets.
Provide for the possiblity of the read set being the same as the write set
*/
static inline void dbug_tmp_use_all_columns(TABLE *table,
my_bitmap_map **save,
MY_BITMAP *read_set,
MY_BITMAP *write_set)
{
#ifndef DBUG_OFF
save[0]= read_set->bitmap;
save[1]= write_set->bitmap;
(void) tmp_use_all_columns(table, read_set);
(void) tmp_use_all_columns(table, write_set);
#endif
}
static inline void dbug_tmp_restore_column_maps(MY_BITMAP *read_set,
MY_BITMAP *write_set,
my_bitmap_map **old)
{
#ifndef DBUG_OFF
tmp_restore_column_map(read_set, old[0]);
tmp_restore_column_map(write_set, old[1]);
#endif
}
WL#3817: Simplify string / memory area types and make things more consistent (first part) The following type conversions was done: - Changed byte to uchar - Changed gptr to uchar* - Change my_string to char * - Change my_size_t to size_t - Change size_s to size_t Removed declaration of byte, gptr, my_string, my_size_t and size_s. Following function parameter changes was done: - All string functions in mysys/strings was changed to use size_t instead of uint for string lengths. - All read()/write() functions changed to use size_t (including vio). - All protocoll functions changed to use size_t instead of uint - Functions that used a pointer to a string length was changed to use size_t* - Changed malloc(), free() and related functions from using gptr to use void * as this requires fewer casts in the code and is more in line with how the standard functions work. - Added extra length argument to dirname_part() to return the length of the created string. - Changed (at least) following functions to take uchar* as argument: - db_dump() - my_net_write() - net_write_command() - net_store_data() - DBUG_DUMP() - decimal2bin() & bin2decimal() - Changed my_compress() and my_uncompress() to use size_t. Changed one argument to my_uncompress() from a pointer to a value as we only return one value (makes function easier to use). - Changed type of 'pack_data' argument to packfrm() to avoid casts. - Changed in readfrm() and writefrom(), ha_discover and handler::discover() the type for argument 'frmdata' to uchar** to avoid casts. - Changed most Field functions to use uchar* instead of char* (reduced a lot of casts). - Changed field->val_xxx(xxx, new_ptr) to take const pointers. Other changes: - Removed a lot of not needed casts - Added a few new cast required by other changes - Added some cast to my_multi_malloc() arguments for safety (as string lengths needs to be uint, not size_t). - Fixed all calls to hash-get-key functions to use size_t*. (Needed to be done explicitely as this conflict was often hided by casting the function to hash_get_key). - Changed some buffers to memory regions to uchar* to avoid casts. - Changed some string lengths from uint to size_t. - Changed field->ptr to be uchar* instead of char*. This allowed us to get rid of a lot of casts. - Some changes from true -> TRUE, false -> FALSE, unsigned char -> uchar - Include zlib.h in some files as we needed declaration of crc32() - Changed MY_FILE_ERROR to be (size_t) -1. - Changed many variables to hold the result of my_read() / my_write() to be size_t. This was needed to properly detect errors (which are returned as (size_t) -1). - Removed some very old VMS code - Changed packfrm()/unpackfrm() to not be depending on uint size (portability fix) - Removed windows specific code to restore cursor position as this causes slowdown on windows and we should not mix read() and pread() calls anyway as this is not thread safe. Updated function comment to reflect this. Changed function that depended on original behavior of my_pwrite() to itself restore the cursor position (one such case). - Added some missing checking of return value of malloc(). - Changed definition of MOD_PAD_CHAR_TO_FULL_LENGTH to avoid 'long' overflow. - Changed type of table_def::m_size from my_size_t to ulong to reflect that m_size is the number of elements in the array, not a string/memory length. - Moved THD::max_row_length() to table.cc (as it's not depending on THD). Inlined max_row_length_blob() into this function. - More function comments - Fixed some compiler warnings when compiled without partitions. - Removed setting of LEX_STRING() arguments in declaration (portability fix). - Some trivial indentation/variable name changes. - Some trivial code simplifications: - Replaced some calls to alloc_root + memcpy to use strmake_root()/strdup_root(). - Changed some calls from memdup() to strmake() (Safety fix) - Simpler loops in client-simple.c
2007-05-10 11:59:39 +02:00
size_t max_row_length(TABLE *table, const uchar *data);
Initial import of WL#3726 "DDL locking for all metadata objects". Backport of: ------------------------------------------------------------ revno: 2630.4.1 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Fri 2008-05-23 17:54:03 +0400 message: WL#3726 "DDL locking for all metadata objects". After review fixes in progress. ------------------------------------------------------------ This is the first patch in series. It transforms the metadata locking subsystem to use a dedicated module (mdl.h,cc). No significant changes in the locking protocol. The import passes the test suite with the exception of deprecated/removed 6.0 features, and MERGE tables. The latter are subject to a fix by WL#4144. Unfortunately, the original changeset comments got lost in a merge, thus this import has its own (largely insufficient) comments. This patch fixes Bug#25144 "replication / binlog with view breaks". Warning: this patch introduces an incompatible change: Under LOCK TABLES, it's no longer possible to FLUSH a table that was not locked for WRITE. Under LOCK TABLES, it's no longer possible to DROP a table or VIEW that was not locked for WRITE. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.2 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 14:03:45 +0400 message: WL#3726 "DDL locking for all metadata objects". After review fixes in progress. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.3 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 14:08:51 +0400 message: WL#3726 "DDL locking for all metadata objects" Fixed failing Windows builds by adding mdl.cc to the lists of files needed to build server/libmysqld on Windows. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.4 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 21:57:58 +0400 message: WL#3726 "DDL locking for all metadata objects". Fix for assert failures in kill.test which occured when one tried to kill ALTER TABLE statement on merge table while it was waiting in wait_while_table_is_used() for other connections to close this table. These assert failures stemmed from the fact that cleanup code in this case assumed that temporary table representing new version of table was open with adding to THD::temporary_tables list while code which were opening this temporary table wasn't always fulfilling this. This patch changes code that opens new version of table to always do this linking in. It also streamlines cleanup process for cases when error occurs while we have new version of table open. ****** WL#3726 "DDL locking for all metadata objects" Add libmysqld/mdl.cc to .bzrignore. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.6 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sun 2008-05-25 00:33:22 +0400 message: WL#3726 "DDL locking for all metadata objects". Addition to the fix of assert failures in kill.test caused by changes for this worklog. Make sure we close the new table only once.
2009-11-30 16:55:03 +01:00
void init_mdl_requests(TABLE_LIST *table_list);
Initial import of WL#3726 "DDL locking for all metadata objects". Backport of: ------------------------------------------------------------ revno: 2630.4.1 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Fri 2008-05-23 17:54:03 +0400 message: WL#3726 "DDL locking for all metadata objects". After review fixes in progress. ------------------------------------------------------------ This is the first patch in series. It transforms the metadata locking subsystem to use a dedicated module (mdl.h,cc). No significant changes in the locking protocol. The import passes the test suite with the exception of deprecated/removed 6.0 features, and MERGE tables. The latter are subject to a fix by WL#4144. Unfortunately, the original changeset comments got lost in a merge, thus this import has its own (largely insufficient) comments. This patch fixes Bug#25144 "replication / binlog with view breaks". Warning: this patch introduces an incompatible change: Under LOCK TABLES, it's no longer possible to FLUSH a table that was not locked for WRITE. Under LOCK TABLES, it's no longer possible to DROP a table or VIEW that was not locked for WRITE. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.2 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 14:03:45 +0400 message: WL#3726 "DDL locking for all metadata objects". After review fixes in progress. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.3 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 14:08:51 +0400 message: WL#3726 "DDL locking for all metadata objects" Fixed failing Windows builds by adding mdl.cc to the lists of files needed to build server/libmysqld on Windows. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.4 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sat 2008-05-24 21:57:58 +0400 message: WL#3726 "DDL locking for all metadata objects". Fix for assert failures in kill.test which occured when one tried to kill ALTER TABLE statement on merge table while it was waiting in wait_while_table_is_used() for other connections to close this table. These assert failures stemmed from the fact that cleanup code in this case assumed that temporary table representing new version of table was open with adding to THD::temporary_tables list while code which were opening this temporary table wasn't always fulfilling this. This patch changes code that opens new version of table to always do this linking in. It also streamlines cleanup process for cases when error occurs while we have new version of table open. ****** WL#3726 "DDL locking for all metadata objects" Add libmysqld/mdl.cc to .bzrignore. ****** Backport of: ------------------------------------------------------------ revno: 2630.4.6 committer: Dmitry Lenev <dlenev@mysql.com> branch nick: mysql-6.0-3726-w timestamp: Sun 2008-05-25 00:33:22 +0400 message: WL#3726 "DDL locking for all metadata objects". Addition to the fix of assert failures in kill.test caused by changes for this worklog. Make sure we close the new table only once.
2009-11-30 16:55:03 +01:00
int open_table_from_share(THD *thd, TABLE_SHARE *share, const char *alias,
uint db_stat, uint prgflag, uint ha_open_flags,
TABLE *outparam, bool is_create_table);
TABLE_SHARE *alloc_table_share(TABLE_LIST *table_list, char *key,
uint key_length);
void init_tmp_table_share(THD *thd, TABLE_SHARE *share, const char *key,
uint key_length,
const char *table_name, const char *path);
void free_table_share(TABLE_SHARE *share);
int open_table_def(THD *thd, TABLE_SHARE *share, uint db_flags);
void open_table_error(TABLE_SHARE *share, int error, int db_errno, int errarg);
void update_create_info_from_table(HA_CREATE_INFO *info, TABLE *form);
bool check_and_convert_db_name(LEX_STRING *db, bool preserve_lettercase);
bool check_db_name(LEX_STRING *db);
bool check_column_name(const char *name);
bool check_table_name(const char *name, uint length);
int rename_file_ext(const char * from,const char * to,const char * ext);
char *get_field(MEM_ROOT *mem, Field *field);
bool get_field(MEM_ROOT *mem, Field *field, class String *res);
int closefrm(TABLE *table, bool free_share);
int read_string(File file, uchar* *to, size_t length);
void free_blobs(TABLE *table);
void free_field_buffers_larger_than(TABLE *table, uint32 size);
int set_zone(int nr,int min_zone,int max_zone);
ulong get_form_pos(File file, uchar *head, TYPELIB *save_names);
ulong make_new_entry(File file,uchar *fileinfo,TYPELIB *formnames,
const char *newname);
ulong next_io_size(ulong pos);
void append_unescaped(String *res, const char *pos, uint length);
File create_frm(THD *thd, const char *name, const char *db,
const char *table, uint reclength, uchar *fileinfo,
HA_CREATE_INFO *create_info, uint keys, KEY *key_info);
char *fn_rext(char *name);
/* performance schema */
extern LEX_STRING PERFORMANCE_SCHEMA_DB_NAME;
extern LEX_STRING GENERAL_LOG_NAME;
extern LEX_STRING SLOW_LOG_NAME;
/* information schema */
extern LEX_STRING INFORMATION_SCHEMA_NAME;
extern LEX_STRING MYSQL_SCHEMA_NAME;
inline bool is_infoschema_db(const char *name, size_t len)
{
return (INFORMATION_SCHEMA_NAME.length == len &&
!my_strcasecmp(system_charset_info,
INFORMATION_SCHEMA_NAME.str, name));
}
inline bool is_infoschema_db(const char *name)
{
return !my_strcasecmp(system_charset_info,
INFORMATION_SCHEMA_NAME.str, name);
}
TYPELIB *typelib(MEM_ROOT *mem_root, List<String> &strings);
/**
return true if the table was created explicitly.
*/
inline bool is_user_table(TABLE * table)
{
const char *name= table->s->table_name.str;
return strncmp(name, tmp_file_prefix, tmp_file_prefix_length);
}
inline void mark_as_null_row(TABLE *table)
{
table->null_row=1;
table->status|=STATUS_NULL_ROW;
bfill(table->null_flags,table->s->null_bytes,255);
}
#endif /* MYSQL_CLIENT */
#endif /* TABLE_INCLUDED */