mariadb/sql/mysqld.h

1017 lines
41 KiB
C
Raw Normal View History

2016-04-20 15:25:55 +02:00
/* Copyright (c) 2006, 2016, Oracle and/or its affiliates.
Copyright (c) 2010, 2021, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
#ifndef MYSQLD_INCLUDED
#define MYSQLD_INCLUDED
#include "sql_basic_types.h" /* query_id_t */
#include "sql_mode.h" /* Sql_mode_dependency */
#include "sql_plugin.h"
MDEV-31340 Remove MY_COLLATION_HANDLER::strcasecmp() This patch also fixes: MDEV-33050 Build-in schemas like oracle_schema are accent insensitive MDEV-33084 LASTVAL(t1) and LASTVAL(T1) do not work well with lower-case-table-names=0 MDEV-33085 Tables T1 and t1 do not work well with ENGINE=CSV and lower-case-table-names=0 MDEV-33086 SHOW OPEN TABLES IN DB1 -- is case insensitive with lower-case-table-names=0 MDEV-33088 Cannot create triggers in the database `MYSQL` MDEV-33103 LOCK TABLE t1 AS t2 -- alias is not case sensitive with lower-case-table-names=0 MDEV-33109 DROP DATABASE MYSQL -- does not drop SP with lower-case-table-names=0 MDEV-33110 HANDLER commands are case insensitive with lower-case-table-names=0 MDEV-33119 User is case insensitive in INFORMATION_SCHEMA.VIEWS MDEV-33120 System log table names are case insensitive with lower-cast-table-names=0 - Removing the virtual function strnncoll() from MY_COLLATION_HANDLER - Adding a wrapper function CHARSET_INFO::streq(), to compare two strings for equality. For now it calls strnncoll() internally. In the future it will turn into a virtual function. - Adding new accent sensitive case insensitive collations: - utf8mb4_general1400_as_ci - utf8mb3_general1400_as_ci They implement accent sensitive case insensitive comparison. The weight of a character is equal to the code point of its upper case variant. These collations use Unicode-14.0.0 casefolding data. The result of my_charset_utf8mb3_general1400_as_ci.strcoll() is very close to the former my_charset_utf8mb3_general_ci.strcasecmp() There is only a difference in a couple dozen rare characters, because: - the switch from "tolower" to "toupper" comparison, to make utf8mb3_general1400_as_ci closer to utf8mb3_general_ci - the switch from Unicode-3.0.0 to Unicode-14.0.0 This difference should be tolarable. See the list of affected characters in the MDEV description. Note, utf8mb4_general1400_as_ci correctly handles non-BMP characters! Unlike utf8mb4_general_ci, it does not treat all BMP characters as equal. - Adding classes representing names of the file based database objects: Lex_ident_db Lex_ident_table Lex_ident_trigger Their comparison collation depends on the underlying file system case sensitivity and on --lower-case-table-names and can be either my_charset_bin or my_charset_utf8mb3_general1400_as_ci. - Adding classes representing names of other database objects, whose names have case insensitive comparison style, using my_charset_utf8mb3_general1400_as_ci: Lex_ident_column Lex_ident_sys_var Lex_ident_user_var Lex_ident_sp_var Lex_ident_ps Lex_ident_i_s_table Lex_ident_window Lex_ident_func Lex_ident_partition Lex_ident_with_element Lex_ident_rpl_filter Lex_ident_master_info Lex_ident_host Lex_ident_locale Lex_ident_plugin Lex_ident_engine Lex_ident_server Lex_ident_savepoint Lex_ident_charset engine_option_value::Name - All the mentioned Lex_ident_xxx classes implement a method streq(): if (ident1.streq(ident2)) do_equal(); This method works as a wrapper for CHARSET_INFO::streq(). - Changing a lot of "LEX_CSTRING name" to "Lex_ident_xxx name" in class members and in function/method parameters. - Replacing all calls like system_charset_info->coll->strcasecmp(ident1, ident2) to ident1.streq(ident2) - Taking advantage of the c++11 user defined literal operator for LEX_CSTRING (see m_strings.h) and Lex_ident_xxx (see lex_ident.h) data types. Use example: const Lex_ident_column primary_key_name= "PRIMARY"_Lex_ident_column; is now a shorter version of: const Lex_ident_column primary_key_name= Lex_ident_column({STRING_WITH_LEN("PRIMARY")});
2023-04-26 13:27:01 +02:00
#include "lex_ident.h"
#include "sql_bitmap.h" /* Bitmap */
#include "my_decimal.h" /* my_decimal */
#include "mysql_com.h" /* SERVER_VERSION_LENGTH */
#include "my_counter.h"
#include "mysql/psi/mysql_file.h" /* MYSQL_FILE */
#include "mysql/psi/mysql_socket.h" /* MYSQL_SOCKET */
#include "sql_list.h" /* I_List */
#include "sql_cmd.h"
2013-03-25 23:03:13 +01:00
#include <my_rnd.h>
#include "my_pthread.h"
#include "my_rdtsc.h"
class THD;
class CONNECT;
struct handlerton;
class Time_zone;
struct scheduler_functions;
typedef struct st_mysql_show_var SHOW_VAR;
/* Bits from testflag */
#define TEST_PRINT_CACHED_TABLES 1U
#define TEST_NO_KEY_GROUP 2U
#define TEST_MIT_THREAD 4U
#define TEST_BLOCKING 8U
#define TEST_KEEP_TMP_TABLES 16U
#define TEST_READCHECK 64U /**< Force use of readcheck */
#define TEST_NO_EXTRA 128U
#define TEST_CORE_ON_SIGNAL 256U /**< Give core if signal */
#define TEST_SIGINT 1024U /**< Allow sigint on threads */
#define TEST_SYNCHRONIZATION 2048U /**< get server to do sleep in
some places */
/* Keep things compatible */
#define OPT_DEFAULT SHOW_OPT_DEFAULT
#define OPT_SESSION SHOW_OPT_SESSION
#define OPT_GLOBAL SHOW_OPT_GLOBAL
extern MYSQL_PLUGIN_IMPORT MY_TIMER_INFO sys_timer_info;
/*
Values for --slave-parallel-mode
Must match order in slave_parallel_mode_typelib in sys_vars.cc.
*/
enum enum_slave_parallel_mode {
SLAVE_PARALLEL_NONE,
SLAVE_PARALLEL_MINIMAL,
SLAVE_PARALLEL_CONSERVATIVE,
SLAVE_PARALLEL_OPTIMISTIC,
SLAVE_PARALLEL_AGGRESSIVE
};
/* Function prototypes */
void kill_mysql(THD *thd);
void close_connection(THD *thd, uint sql_errno= 0);
void handle_connection_in_main_thread(CONNECT *thd);
void create_thread_to_handle_connection(CONNECT *connect);
void unlink_thd(THD *thd);
void refresh_status(THD *thd);
bool is_secure_file_path(char *path);
extern void init_net_server_extension(THD *thd);
extern void handle_accepted_socket(MYSQL_SOCKET new_sock, MYSQL_SOCKET sock);
extern void create_new_thread(CONNECT *connect);
extern void ssl_acceptor_stats_update(int sslaccept_ret);
extern int reinit_ssl();
extern "C" MYSQL_PLUGIN_IMPORT CHARSET_INFO *system_charset_info;
MDEV-31340 Remove MY_COLLATION_HANDLER::strcasecmp() This patch also fixes: MDEV-33050 Build-in schemas like oracle_schema are accent insensitive MDEV-33084 LASTVAL(t1) and LASTVAL(T1) do not work well with lower-case-table-names=0 MDEV-33085 Tables T1 and t1 do not work well with ENGINE=CSV and lower-case-table-names=0 MDEV-33086 SHOW OPEN TABLES IN DB1 -- is case insensitive with lower-case-table-names=0 MDEV-33088 Cannot create triggers in the database `MYSQL` MDEV-33103 LOCK TABLE t1 AS t2 -- alias is not case sensitive with lower-case-table-names=0 MDEV-33109 DROP DATABASE MYSQL -- does not drop SP with lower-case-table-names=0 MDEV-33110 HANDLER commands are case insensitive with lower-case-table-names=0 MDEV-33119 User is case insensitive in INFORMATION_SCHEMA.VIEWS MDEV-33120 System log table names are case insensitive with lower-cast-table-names=0 - Removing the virtual function strnncoll() from MY_COLLATION_HANDLER - Adding a wrapper function CHARSET_INFO::streq(), to compare two strings for equality. For now it calls strnncoll() internally. In the future it will turn into a virtual function. - Adding new accent sensitive case insensitive collations: - utf8mb4_general1400_as_ci - utf8mb3_general1400_as_ci They implement accent sensitive case insensitive comparison. The weight of a character is equal to the code point of its upper case variant. These collations use Unicode-14.0.0 casefolding data. The result of my_charset_utf8mb3_general1400_as_ci.strcoll() is very close to the former my_charset_utf8mb3_general_ci.strcasecmp() There is only a difference in a couple dozen rare characters, because: - the switch from "tolower" to "toupper" comparison, to make utf8mb3_general1400_as_ci closer to utf8mb3_general_ci - the switch from Unicode-3.0.0 to Unicode-14.0.0 This difference should be tolarable. See the list of affected characters in the MDEV description. Note, utf8mb4_general1400_as_ci correctly handles non-BMP characters! Unlike utf8mb4_general_ci, it does not treat all BMP characters as equal. - Adding classes representing names of the file based database objects: Lex_ident_db Lex_ident_table Lex_ident_trigger Their comparison collation depends on the underlying file system case sensitivity and on --lower-case-table-names and can be either my_charset_bin or my_charset_utf8mb3_general1400_as_ci. - Adding classes representing names of other database objects, whose names have case insensitive comparison style, using my_charset_utf8mb3_general1400_as_ci: Lex_ident_column Lex_ident_sys_var Lex_ident_user_var Lex_ident_sp_var Lex_ident_ps Lex_ident_i_s_table Lex_ident_window Lex_ident_func Lex_ident_partition Lex_ident_with_element Lex_ident_rpl_filter Lex_ident_master_info Lex_ident_host Lex_ident_locale Lex_ident_plugin Lex_ident_engine Lex_ident_server Lex_ident_savepoint Lex_ident_charset engine_option_value::Name - All the mentioned Lex_ident_xxx classes implement a method streq(): if (ident1.streq(ident2)) do_equal(); This method works as a wrapper for CHARSET_INFO::streq(). - Changing a lot of "LEX_CSTRING name" to "Lex_ident_xxx name" in class members and in function/method parameters. - Replacing all calls like system_charset_info->coll->strcasecmp(ident1, ident2) to ident1.streq(ident2) - Taking advantage of the c++11 user defined literal operator for LEX_CSTRING (see m_strings.h) and Lex_ident_xxx (see lex_ident.h) data types. Use example: const Lex_ident_column primary_key_name= "PRIMARY"_Lex_ident_column; is now a shorter version of: const Lex_ident_column primary_key_name= Lex_ident_column({STRING_WITH_LEN("PRIMARY")});
2023-04-26 13:27:01 +02:00
extern "C" MYSQL_PLUGIN_IMPORT CHARSET_INFO *system_charset_info_for_i_s;
extern MYSQL_PLUGIN_IMPORT CHARSET_INFO *files_charset_info ;
extern MYSQL_PLUGIN_IMPORT CHARSET_INFO *national_charset_info;
extern MYSQL_PLUGIN_IMPORT CHARSET_INFO *table_alias_charset;
/**
Character set of the buildin error messages loaded from errmsg.sys.
*/
extern CHARSET_INFO *error_message_charset_info;
extern CHARSET_INFO *character_set_filesystem;
void temp_pool_clear_bit(uint bit);
uint temp_pool_set_next();
extern bool opt_large_files;
extern bool opt_update_log, opt_bin_log, opt_error_log, opt_bin_log_compress;
extern uint opt_bin_log_compress_min_len;
extern my_bool opt_log, opt_bootstrap;
extern my_bool opt_backup_history_log;
extern my_bool opt_backup_progress_log;
[MDEV-10570] Add Flashback support ==== Description ==== Flashback can rollback the instances/databases/tables to an old snapshot. It's implement on Server-Level by full image format binary logs (--binlog-row-image=FULL), so it supports all engines. Currently, it’s a feature inside mysqlbinlog tool (with --flashback arguments). Because the flashback binlog events will store in the memory, you should check if there is enough memory in your machine. ==== New Arguments to mysqlbinlog ==== --flashback (-B) It will let mysqlbinlog to work on FLASHBACK mode. ==== New Arguments to mysqld ==== --flashback Setup the server to use flashback. This enables binary log in row mode and will enable extra logging for DDL's needed by flashback feature ==== Example ==== I have a table "t" in database "test", we can compare the output with "--flashback" and without. #client/mysqlbinlog /data/mysqldata_10.0/binlog/mysql-bin.000001 -vv -d test -T t --start-datetime="2013-03-27 14:54:00" > /tmp/1.sql #client/mysqlbinlog /data/mysqldata_10.0/binlog/mysql-bin.000001 -vv -d test -T t --start-datetime="2013-03-27 14:54:00" -B > /tmp/2.sql Then, importing the output flashback file (/tmp/2.log), it can flashback your database/table to the special time (--start-datetime). And if you know the exact postion, "--start-postion" is also works, mysqlbinlog will output the flashback logs that can flashback to "--start-postion" position. ==== Implement ==== 1. As we know, if binlog_format is ROW (binlog-row-image=FULL in 10.1 and later), all columns value are store in the row event, so we can get the data before mis-operation. 2. Just do following things: 2.1 Change Event Type, INSERT->DELETE, DELETE->INSERT. For example: INSERT INTO t VALUES (...) ---> DELETE FROM t WHERE ... DELETE FROM t ... ---> INSERT INTO t VALUES (...) 2.2 For Update_Event, swapping the SET part and WHERE part. For example: UPDATE t SET cols1 = vals1 WHERE cols2 = vals2 ---> UPDATE t SET cols2 = vals2 WHERE cols1 = vals1 2.3 For Multi-Rows Event, reverse the rows sequence, from the last row to the first row. For example: DELETE FROM t WHERE id=1; DELETE FROM t WHERE id=2; ...; DELETE FROM t WHERE id=n; ---> DELETE FROM t WHERE id=n; ...; DELETE FROM t WHERE id=2; DELETE FROM t WHERE id=1; 2.4 Output those events from the last one to the first one which mis-operation happened. For example:
2017-01-20 14:33:28 +01:00
extern my_bool opt_support_flashback;
extern ulonglong log_output_options;
extern ulong log_backup_output_options;
extern bool opt_disable_networking, opt_skip_show_db;
extern bool opt_skip_name_resolve;
extern bool opt_ignore_builtin_innodb;
extern my_bool opt_character_set_client_handshake;
extern my_bool debug_assert_on_not_freed_memory;
MDEV-22214 mariadbd.exe calls function mysqld.exe, and crashes Stop linking plugins to the server executable on Windows. Instead, extract whole server functionality into a large DLL, called server.dll. Link both plugins, and small server "stub" exe to it. This eliminates plugin dependency on the name of the server executable. It also reduces the size of the packages (since tiny mysqld.exe and mariadbd.exe are now both linked to one big DLL) Also, simplify the functionality of exporing all symbols from selected static libraries. Rely on WINDOWS_EXPORT_ALL_SYMBOLS, rather than old self-backed solution. fix compile error replace GetProcAddress(GetModuleHandle(NULL), "variable_name") for server exported data with actual variable names. Runtime loading was never required,was error prone , since symbols could be missing at runtime, and now it actually failed, because we do not export symbols from executable anymore, but from a shared library This did require a MYSQL_PLUGIN_IMPORT decoration for the plugin, but made the code more straightforward, and avoids missing symbols at runtime (as mentioned before). The audit plugin is still doing some dynamic loading, as it aims to work cross-version. Now it won't work cross-version on Windows, as it already uses some symbols that are *not* dynamically loaded, e.g fn_format and those symbols now exported from server.dll , when earlier they were exported by mysqld.exe Windows, fixes for storage engine plugin loading after various rebranding stuff Create server.dll containing functionality of the whole server make mariadbd.exe/mysqld.exe a stub that is only calling mysqld_main() fix build
2020-04-10 14:09:18 +02:00
extern MYSQL_PLUGIN_IMPORT bool volatile abort_loop;
extern my_bool opt_safe_user_create;
extern my_bool opt_safe_show_db, opt_local_infile, opt_myisam_use_mmap;
extern my_bool opt_slave_compressed_protocol, use_temp_pool;
Replication changes for CREATE OR REPLACE TABLE - CREATE TABLE is by default executed on the slave as CREATE OR REPLACE - DROP TABLE is by default executed on the slave as DROP TABLE IF NOT EXISTS This means that a slave will by default continue even if we try to create a table that existed on the slave (the table will be deleted and re-created) or if we try to drop a table that didn't exist on the slave. This should be safe as instead of having the slave stop because of an inconsistency between master and slave, it will fix the inconsistency. Those that would prefer to get a stopped slave instead for the above cases can set slave_ddl_exec_mode to STRICT. - Ensure that a CREATE OR REPLACE TABLE which dropped a table is replicated - DROP TABLE that generated an error on master is handled as an identical DROP TABLE on the slave (IF NOT EXISTS is not added in this case) - Added slave_ddl_exec_mode variable to decide how DDL's are replicated New logic for handling BEGIN GTID ... COMMIT from the binary log: - When we find a BEGIN GTID, we start a transaction and set OPTION_GTID_BEGIN - When we find COMMIT, we reset OPTION_GTID_BEGIN and execute the normal COMMIT code. - While OPTION_GTID_BEGIN is set: - We don't generate implict commits before or after statements - All tables are regarded as transactional tables in the binary log (to ensure things are executed exactly as on the master) - We reset OPTION_GTID_BEGIN also on rollback This will help ensuring that we don't get any sporadic commits (and thus new GTID's) on the slave and will help keep the GTID's between master and slave in sync. mysql-test/extra/rpl_tests/rpl_log.test: Added testing of mode slave_ddl_exec_mode=STRICT mysql-test/r/mysqld--help.result: New help messages mysql-test/suite/rpl/r/create_or_replace_mix.result: Testing of CREATE OR REPLACE TABLE with replication mysql-test/suite/rpl/r/create_or_replace_row.result: Testing of CREATE OR REPLACE TABLE with replication mysql-test/suite/rpl/r/create_or_replace_statement.result: Testing replication of create or replace mysql-test/suite/rpl/r/rpl_gtid_startpos.result: Test must be run in slave_ddl_exec_mode=STRICT as part of the test depends on that DROP TABLE should fail on slave. mysql-test/suite/rpl/r/rpl_row_log.result: Updated result mysql-test/suite/rpl/r/rpl_row_log_innodb.result: Updated result mysql-test/suite/rpl/r/rpl_row_show_relaylog_events.result: Updated result mysql-test/suite/rpl/r/rpl_stm_log.result: Updated result mysql-test/suite/rpl/r/rpl_stm_mix_show_relaylog_events.result: Updated result mysql-test/suite/rpl/r/rpl_temp_table_mix_row.result: Updated result mysql-test/suite/rpl/t/create_or_replace.inc: Testing of CREATE OR REPLACE TABLE with replication mysql-test/suite/rpl/t/create_or_replace_mix.cnf: Testing of CREATE OR REPLACE TABLE with replication mysql-test/suite/rpl/t/create_or_replace_mix.test: Testing of CREATE OR REPLACE TABLE with replication mysql-test/suite/rpl/t/create_or_replace_row.cnf: Testing of CREATE OR REPLACE TABLE with replication mysql-test/suite/rpl/t/create_or_replace_row.test: Testing of CREATE OR REPLACE TABLE with replication mysql-test/suite/rpl/t/create_or_replace_statement.cnf: Testing of CREATE OR REPLACE TABLE with replication mysql-test/suite/rpl/t/create_or_replace_statement.test: Testing of CREATE OR REPLACE TABLE with replication mysql-test/suite/rpl/t/rpl_gtid_startpos.test: Test must be run in slave_ddl_exec_mode=STRICT as part of the test depends on that DROP TABLE should fail on slave. mysql-test/suite/rpl/t/rpl_stm_log.test: Removed some lines mysql-test/suite/sys_vars/r/slave_ddl_exec_mode_basic.result: Testing of slave_ddl_exec_mode mysql-test/suite/sys_vars/t/slave_ddl_exec_mode_basic.test: Testing of slave_ddl_exec_mode sql/handler.cc: Regard all tables as transactional in commit if OPTION_GTID_BEGIN is set. This is to ensure that statments are not commited too early if non transactional tables are used. sql/log.cc: Regard all tables as transactional in commit if OPTION_GTID_BEGIN is set. Also treat 'direct' log events as transactional (to get them logged as they where on the master) sql/log_event.cc: Ensure that the new error from DROP TABLE when trying to drop a view is treated same as the old one. Store error code that slave expects in THD. Set OPTION_GTID_BEGIN if we find a BEGIN. Reset OPTION_GTID_BEGIN if we find a COMMIT. sql/mysqld.cc: Added slave_ddl_exec_mode_options sql/mysqld.h: Added slave_ddl_exec_mode_options sql/rpl_gtid.cc: Reset OPTION_GTID_BEGIN if we record a gtid (safety) sql/sql_class.cc: Regard all tables as transactional in commit if OPTION_GTID_BEGIN is set. sql/sql_class.h: Added to THD: log_current_statement and slave_expected_error sql/sql_insert.cc: Ensure that CREATE OR REPLACE is logged if table was deleted. Don't do implicit commit for CREATE if we are under OPTION_GTID_BEGIN sql/sql_parse.cc: Change CREATE TABLE -> CREATE OR REPLACE TABLE for slaves Change DROP TABLE -> DROP TABLE IF EXISTS for slaves CREATE TABLE doesn't force implicit commit in case of OPTION_GTID_BEGIN Don't do commits before or after any statement if OPTION_GTID_BEGIN was set. sql/sql_priv.h: Added OPTION_GTID_BEGIN sql/sql_show.cc: Enhanced store_create_info() to also be able to handle CREATE OR REPLACE sql/sql_show.h: Updated prototype sql/sql_table.cc: Ensure that CREATE OR REPLACE is logged if table was deleted. sql/sys_vars.cc: Added slave_ddl_exec_mode sql/transaction.cc: Added warning if we got a GTID under OPTION_GTID_BEGIN
2014-02-05 18:01:59 +01:00
extern ulong slave_exec_mode_options, slave_ddl_exec_mode_options;
Changed SHOW_FUNC variabels that don't return SHOW_ARRAY to SHOW_SIMPLE_FUNC. This allows us to avoid calculating variables (including those involving mutex) that doesn't match the given wildcard in SHOW STATUS LIKE '...' Removed all references to active_mi that could cause problems for multi-source replication. Added START|STOP ALL SLAVES Added SHOW ALL SLAVES STATUS include/mysql/plugin.h: Added SHOW_SIMPLE_FUNC include/mysql/plugin_audit.h.pp: Updated .pp file include/mysql/plugin_auth.h.pp: Updated .pp file include/mysql/plugin_ftparser.h.pp: Updated .pp file mysql-test/suite/multi_source/info_logs.result: New columns in SHOW ALL SLAVES STATUS mysql-test/suite/multi_source/info_logs.test: Test new syntax mysql-test/suite/multi_source/simple.result: New columns in SHOW ALL SLAVES STATUS mysql-test/suite/multi_source/simple.test: test new syntax mysql-test/suite/multi_source/syntax.result: Updated result mysql-test/suite/multi_source/syntax.test: Test new syntax mysql-test/suite/rpl/r/rpl_skip_replication.result: Updated result plugin/semisync/semisync_master_plugin.cc: SHOW_FUNC -> SHOW_SIMPLE_FUNC sql/item_create.cc: Simplify code sql/lex.h: Added SLAVES keyword sql/log.cc: Constant -> define sql/log_event.cc: Added comment sql/mysqld.cc: SHOW_FUNC -> SHOW_SIMPLE_FUNC Made slave_retried_trans, slave_received_heartbeats and heartbeat_period multi-source safe Clear variable denied_connections and slave_retried_transactions on startup sql/mysqld.h: Added slave_retried_transactions sql/rpl_mi.cc: create_signed_file_name -> create_logfile_name_with_suffix Added start_all_slaves() and stop_all_slaves() sql/rpl_mi.h: Added prototypes sql/rpl_rli.cc: create_signed_file_name -> create_logfile_name_with_suffix added executed_entries sql/rpl_rli.h: Added executed_entries sql/share/errmsg-utf8.txt: More and better error messages sql/slave.cc: Added more fields to SHOW ALL SLAVES STATUS Added slave_retried_transactions Changed constants -> defines sql/sql_class.h: Added comment sql/sql_insert.cc: active_mi.rli -> thd->rli_slave sql/sql_lex.h: Added SQLCOM_SLAVE_ALL_START & SQLCOM_SLAVE_ALL_STOP sql/sql_load.cc: active_mi.rli -> thd->rli_slave sql/sql_parse.cc: Added START|STOP ALL SLAVES sql/sql_prepare.cc: Added SQLCOM_SLAVE_ALL_START & SQLCOM_SLAVE_ALL_STOP sql/sql_reload.cc: Made REFRESH RELAY LOG multi-source safe sql/sql_repl.cc: create_signed_file_name -> create_logfile_name_with_suffix Don't send my_ok() from start_slave() or stop_slave() so that we can call it for all master connections sql/sql_show.cc: Compare wild cards early for all variables sql/sql_yacc.yy: Added START|STOP ALL SLAVES Added SHOW ALL SLAVES STATUS sql/sys_vars.cc: Made replicate_events_marked_for_skip,slave_net_timeout and rpl_filter multi-source safe. sql/sys_vars.h: Simplify Sys_var_rpl_filter
2012-10-03 00:44:54 +02:00
extern ulong slave_retried_transactions;
extern ulong transactions_multi_engine;
extern ulong rpl_transactions_multi_engine;
extern ulong transactions_gtid_foreign_engine;
extern ulong slave_run_triggers_for_rbr;
extern ulonglong slave_type_conversions_options;
extern my_bool read_only, opt_readonly;
extern MYSQL_PLUGIN_IMPORT my_bool lower_case_file_system;
extern my_bool opt_enable_named_pipe, opt_sync_frm, opt_allow_suspicious_udfs;
extern my_bool opt_secure_auth;
extern my_bool opt_require_secure_transport;
extern const char *current_dbug_option;
extern char* opt_secure_file_priv;
extern char* opt_secure_backup_file_priv;
extern size_t opt_secure_backup_file_priv_len;
extern my_bool sp_automatic_privileges, opt_noacl;
extern ulong use_stat_tables;
extern my_bool opt_old_style_user_limits, trust_function_creators;
extern uint opt_crash_binlog_innodb;
extern const char *shared_memory_base_name;
MDEV-22214 mariadbd.exe calls function mysqld.exe, and crashes Stop linking plugins to the server executable on Windows. Instead, extract whole server functionality into a large DLL, called server.dll. Link both plugins, and small server "stub" exe to it. This eliminates plugin dependency on the name of the server executable. It also reduces the size of the packages (since tiny mysqld.exe and mariadbd.exe are now both linked to one big DLL) Also, simplify the functionality of exporing all symbols from selected static libraries. Rely on WINDOWS_EXPORT_ALL_SYMBOLS, rather than old self-backed solution. fix compile error replace GetProcAddress(GetModuleHandle(NULL), "variable_name") for server exported data with actual variable names. Runtime loading was never required,was error prone , since symbols could be missing at runtime, and now it actually failed, because we do not export symbols from executable anymore, but from a shared library This did require a MYSQL_PLUGIN_IMPORT decoration for the plugin, but made the code more straightforward, and avoids missing symbols at runtime (as mentioned before). The audit plugin is still doing some dynamic loading, as it aims to work cross-version. Now it won't work cross-version on Windows, as it already uses some symbols that are *not* dynamically loaded, e.g fn_format and those symbols now exported from server.dll , when earlier they were exported by mysqld.exe Windows, fixes for storage engine plugin loading after various rebranding stuff Create server.dll containing functionality of the whole server make mariadbd.exe/mysqld.exe a stub that is only calling mysqld_main() fix build
2020-04-10 14:09:18 +02:00
extern MYSQL_PLUGIN_IMPORT char *mysqld_unix_port;
extern my_bool opt_enable_shared_memory;
extern ulong opt_replicate_events_marked_for_skip;
extern char *default_tz_name;
extern Time_zone *default_tz;
extern char *my_bind_addr_str;
extern char *default_storage_engine, *default_tmp_storage_engine;
extern char *enforced_storage_engine;
extern char *gtid_pos_auto_engines;
extern plugin_ref *opt_gtid_pos_auto_plugins;
extern bool opt_endinfo, using_udf_functions;
extern my_bool locked_in_memory;
extern bool opt_using_transactions;
extern ulong current_pid;
extern double expire_logs_days;
extern ulong binlog_expire_logs_seconds;
MDEV-31404 Implement binlog_space_limit binlog_space_limit is a variable in Percona server used to limit the total size of all binary logs. This implementation is based on code from Percona server 5.7. In MariaDB we decided to call the variable max-binlog-total-size to be similar to max-binlog-size. This makes it easier to find in the output from 'mariadbd --help --verbose'). MariaDB will also support binlog_space_limit for compatibility with Percona. Some internal notes to explain implementation notes: - When running MariaDB does not delete binary logs that are either used by slaves or have active xid that are not yet committed. Some implementation notes: - max-binlog-total-size is by default 0 (no limit). - max-binlog-total-size can be changed without server restart. - Binlog file sizes are checked on startup, or if max-binlog-total-size is set to a value > 0, not for every log write. The total size of all binary logs is cached and dynamically updated when updating the binary log on binary log rotation. - max-binlog-total-size is checked against existing log files during serverstart, binlog rotation, FLUSH LOGS, when writing to binary log or when max-binlog-total-size changes value. - Option --slave-connections-needed-for-purge with 1 as default added. This allows one to ensure that we do not delete binary logs if there is less than 'slave-connections-needed-for-purge' connected. Without this option max-binlog-total-size would potentially delete binlogs needed by slaves on server startup or when a slave disconnects as there are then no connected slaves to protect active binlogs. - PURGE BINARY LOGS TO ... will be executed as if slave-connectitons-needed-for-purge would be zero. In other words it will do the purge even if there is no slaves connected. If there are connected slaves working on the logs, these will be protected. - If binary log is on and max-binlog-total_size <> 0 then the status variable 'Binlog_disk_use' shows the current size of all old binary logs + the state of the current one. - Removed test of strcmp(log_file_name, log_info.log_file_name) in purge_logs_before_date() as this is tested in can_purge_logs() - To avoid expensive calls of log_in_use() we cache the result for the last log that is in use by a slave. Future calls to can_purge_logs() for this binary log will be quickly detected and false will be returned until a slave starts working on a new log. - Note that after a binary log rotation caused by max_binlog_size, the last log will not be purged directly as it is still in use internally. The next binary log write will purge binlogs if needed. Reviewer:Kristian Nielsen <knielsen@knielsen-hq.org>
2023-12-03 20:42:44 +01:00
extern ulonglong binlog_space_limit;
extern my_bool relay_log_recovery;
extern uint sync_binlog_period, sync_relaylog_period,
sync_relayloginfo_period, sync_masterinfo_period;
extern ulong opt_tc_log_size, tc_log_max_pages_used, tc_log_page_size;
extern ulong tc_log_page_waits;
extern my_bool relay_log_purge, opt_innodb_safe_binlog, opt_innodb;
extern my_bool relay_log_recovery;
extern uint select_errors,ha_open_options;
extern ulonglong test_flags;
MDEV-22214 mariadbd.exe calls function mysqld.exe, and crashes Stop linking plugins to the server executable on Windows. Instead, extract whole server functionality into a large DLL, called server.dll. Link both plugins, and small server "stub" exe to it. This eliminates plugin dependency on the name of the server executable. It also reduces the size of the packages (since tiny mysqld.exe and mariadbd.exe are now both linked to one big DLL) Also, simplify the functionality of exporing all symbols from selected static libraries. Rely on WINDOWS_EXPORT_ALL_SYMBOLS, rather than old self-backed solution. fix compile error replace GetProcAddress(GetModuleHandle(NULL), "variable_name") for server exported data with actual variable names. Runtime loading was never required,was error prone , since symbols could be missing at runtime, and now it actually failed, because we do not export symbols from executable anymore, but from a shared library This did require a MYSQL_PLUGIN_IMPORT decoration for the plugin, but made the code more straightforward, and avoids missing symbols at runtime (as mentioned before). The audit plugin is still doing some dynamic loading, as it aims to work cross-version. Now it won't work cross-version on Windows, as it already uses some symbols that are *not* dynamically loaded, e.g fn_format and those symbols now exported from server.dll , when earlier they were exported by mysqld.exe Windows, fixes for storage engine plugin loading after various rebranding stuff Create server.dll containing functionality of the whole server make mariadbd.exe/mysqld.exe a stub that is only calling mysqld_main() fix build
2020-04-10 14:09:18 +02:00
extern uint protocol_version, dropping_tables;
extern MYSQL_PLUGIN_IMPORT uint mysqld_port;
2010-08-05 14:34:19 +02:00
extern ulong delay_key_write_options;
BUG#11745230: 12133: MASTER.INDEX FILE KEEPS MYSQLD FROM STARTING IF BIN LOG HAS BEEN MOVED When moving the binary/relay log files from one location to another and restarting the server with a different log-bin or relay-log paths, would cause the startup process to abort. The root cause was that the server would not be able to find the log files because it would consider old paths for entries in the index file instead of the new location. What's even worse, the relative paths would not be considered relative to the path provided in log-bin and relay-log, but to mysql_data_dir. We fix the cases where the server contains relative paths. When the server is reading from the index file, it checks whether the entry contains relative paths. If it does, we replace it with the absolute path set in log-bin/relay-log option. Absolute paths remain unchanged and the index must be manually edited to consider the new log-bin and/or relay-log path (this should be documented). This is a fix for a GA version, that does not break behavior (that much). For development versions, we should go with Zhenxing's approach that removes paths altogether from index files. mysql-test/include/begin_include_file.inc: Added parameter to keep the begin_include_file.inc silent. Useful when including scripts that contain platform dependent parameters, for example: --let $rpl_server_parameters=--log-bin=$tmpdir/slave-bin --relay-log=$tmpdir/slave-relay-bin --let $keep_include_silent=1 source include/rpl_start_server.inc; --let $keep_include_silent=0 We want the paths ($tmpdir/slave-bin and $tmpdir/slave-relay-bin) not to be in the result file. mysql-test/suite/rpl/t/rpl_binlog_index.test: Test case. sql/log.cc: When finding the corresponding log entry in the index file, we first normalize the paths before doing the comparison. This will make relative paths to be turned into absolute paths (based on the opt_bin_logname or opt_relay_logname) and then compared against also, expanded paths entered, through CHANGE MASTER for instance. sql/log.h: Added normalize_binlog_name, which turns relative paths, into absolute paths given the parameter: is_relay_log ? opt_relay_logname : opt_bin_logname . sql/mysqld.cc: Exposing opt_bin_logname. sql/mysqld.h: Exposing opt_bin_logname.
2011-11-24 18:15:58 +01:00
extern char *opt_logname, *opt_slow_logname, *opt_bin_logname,
*opt_relay_logname;
extern char *opt_binlog_index_name;
MDEV-31273: Precompute binlog checksums Compute binlog checksums (when enabled) already when writing events into the statement or transaction caches, where before it was done when the caches are copied to the real binlog file. This moves the checksum computation outside of holding LOCK_log, improving scalabitily. At stmt/trx cache write time, the final end_log_pos values are not known, so with this patch these will be set to 0. Events that are written directly to the binlog file (not through stmt/trx cache) keep the correct end_log_pos value. The GTID and COMMIT/XID events at the start and end of event groups are written directly, so the zero end_log_pos is only for events in the middle of event groups, which do not negatively affect replication. An option --binlog-legacy-event-pos, off by default, is provided to disable this behavior to provide backwards compatibility with any external applications that might rely on end_log_pos in events in the middle of event groups. Checksums cannot be pre-computed when binlog encryption is enabled, as encryption relies on correct end_log_pos to provide part of the nonce/IV. Checksum pre-computation is also disabled for WSREP/Galera, as it uses events differently in its write-sets and so on. Extending pre-computation of checksums to Galera where it makes sense could be added in a future patch. The current --binlog-checksum configuration is saved in binlog_cache_data at transaction start and used to pre-compute checksums in cache, if applicable. When the cache is later copied to the binlog, a check is made if the saved value still matches the configured global value; if so, the events are block-copied directly into the binlog file. If --binlog-checksum was changed during the transaction, events are re-written to the binlog file one-by-one and the checksums recomputed/discarded as appropriate. Reviewed-by: Monty <monty@mariadb.org> Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
2023-06-13 11:41:44 +02:00
extern my_bool opt_binlog_legacy_event_pos;
extern char *opt_backup_history_logname, *opt_backup_progress_logname,
*opt_backup_settings_name;
extern const char *log_output_str;
extern const char *log_backup_output_str;
2017-07-21 16:52:47 +02:00
/* System Versioning begin */
enum vers_system_time_t
{
SYSTEM_TIME_UNSPECIFIED = 0,
SYSTEM_TIME_AS_OF,
SYSTEM_TIME_FROM_TO,
SYSTEM_TIME_BETWEEN,
SYSTEM_TIME_BEFORE, // used for DELETE HISTORY ... BEFORE
SYSTEM_TIME_HISTORY, // used for DELETE HISTORY
SYSTEM_TIME_ALL
};
struct vers_asof_timestamp_t
2017-07-21 16:52:47 +02:00
{
ulong type;
my_time_t unix_time;
ulong second_part;
};
enum vers_alter_history_enum
{
VERS_ALTER_HISTORY_ERROR= 0,
VERS_ALTER_HISTORY_KEEP
};
2017-07-21 16:52:47 +02:00
/* System Versioning end */
extern char *mysql_home_ptr, *pidfile_name_ptr;
extern MYSQL_PLUGIN_IMPORT char glob_hostname[FN_REFLEN];
extern char mysql_home[FN_REFLEN];
extern char pidfile_name[FN_REFLEN], system_time_zone[30], *opt_init_file;
extern char default_logfile_name[FN_REFLEN];
MDEV-23842 Atomic RENAME TABLE - Major rewrite of ddl_log.cc and ddl_log.h - ddl_log.cc described in the beginning how the recovery works. - ddl_log.log has unique signature and is dynamic. It's easy to add more information to the header and other ddl blocks while still being able to execute old ddl entries. - IO_SIZE for ddl blocks is now dynamic. Can be changed without affecting recovery of old logs. - Code is more modular and is now usable outside of partition handling. - Renamed log file to dll_recovery.log and added option --log-ddl-recovery to allow one to specify the path & filename. - Added ddl_log_entry_phase[], number of phases for each DDL action, which allowed me to greatly simply set_global_from_ddl_log_entry() - Changed how strings are stored in log entries, which allows us to store much more information in a log entry. - ddl log is now always created at start and deleted on normal shutdown. This simplices things notable. - Added probes debug_crash_here() and debug_simulate_error() to simply crash testing and allow crash after a given number of times a probe is executed. See comments in debug_sync.cc and rename_table.test for how this can be used. - Reverting failed table and view renames is done trough the ddl log. This ensures that the ddl log is tested also outside of recovery. - Added helper function 'handler::needs_lower_case_filenames()' - Extend binary log with Q_XID events. ddl log handling is using this to check if a ddl log entry was logged to the binary log (if yes, it will be deleted from the log during ddl_log_close_binlogged_events() - If a DDL entry fails 3 time, disable it. This is to ensure that if we have a crash in ddl recovery code the server will not get stuck in a forever crash-restart-crash loop. mysqltest.cc changes: - --die will now replace $variables with their values - $error will contain the error of the last failed statement storage engine changes: - maria_rename() was changed to be more robust against crashes during rename.
2020-10-15 01:25:57 +02:00
extern char log_error_file[FN_REFLEN], *opt_tc_log_file, *opt_ddl_recovery_file;
extern const double log_10[309];
extern ulonglong keybuff_size;
extern ulonglong thd_startup_options;
extern my_thread_id global_thread_id;
extern ulong binlog_cache_use, binlog_cache_disk_use;
BUG#57275 binlog_cache_size affects trx- and stmt-cache and gets twice the expected memory After the WL#2687, the binlog_cache_size and max_binlog_cache_size affect both the stmt-cache and the trx-cache. This means that the resource used is twice the amount expected/defined by the user. The binlog_cache_use is incremented when the stmt-cache or the trx-cache is used and binlog_cache_disk_use is incremented when the disk space from the stmt-cache or the trx-cache is used. This behavior does not allow to distinguish which cache may be harming performance due to the extra disk accesses and needs to have its in-memory cache increased. To fix the problem, we introduced two new options and status variables related to the stmt-cache: Options: . binlog_stmt_cache_size . max_binlog_stmt_cache_size Status Variables: . binlog_stmt_cache_use . binlog_stmt_cache_disk_use So there are . binlog_cache_size that defines the size of the transactional cache for updates to transactional engines for the binary log. . binlog_stmt_cache_size that defines the size of the statement cache for updates to non-transactional engines for the binary log. . max_binlog_cache_size that sets the total size of the transactional cache. . max_binlog_stmt_cache_size that sets the total size of the statement cache. . binlog_cache_use that identifies the number of transactions that used the temporary transactional binary log cache. . binlog_cache_disk_use that identifies the number of transactions that used the temporary transactional binary log cache but that exceeded the value of binlog_cache_size. . binlog_stmt_cache_use that identifies the number of statements that used the temporary non-transactional binary log cache. . binlog_stmt_cache_disk_use that identifies the number of statements that used the temporary non-transactional binary log cache but that exceeded the value of binlog_stmt_cache_size. include/my_sys.h: Updated message on disk_writes' usage. mysql-test/extra/binlog_tests/binlog_cache_stat.test: Updated the test case and added code to check the new status variables binlog_stmt_cache_use and binlog_stmt_cache_disk_use. mysql-test/extra/rpl_tests/rpl_binlog_max_cache_size.test: Updated the test case to use the new system variables max_binlog_stmt_cache_size and binlog_stmt_cache_size. mysql-test/r/mysqld--help-notwin.result: Updated the result file. mysql-test/suite/binlog/r/binlog_mixed_cache_stat.result: Updated the result file. mysql-test/suite/binlog/r/binlog_row_cache_stat.result: Updated the result file. mysql-test/suite/binlog/r/binlog_stm_cache_stat.result: Updated the result file. mysql-test/suite/rpl/r/rpl_mixed_binlog_max_cache_size.result: Update the result file. mysql-test/suite/rpl/r/rpl_row_binlog_max_cache_size.result: Update the result file. mysql-test/suite/rpl/r/rpl_stm_binlog_max_cache_size.result: Updated the result file. mysql-test/suite/sys_vars/inc/binlog_stmt_cache_size_basic.inc: Added a test case to check the binlog_stmt_cache_size. mysql-test/suite/sys_vars/r/binlog_stmt_cache_size_basic_32.result: Updated the result file. mysql-test/suite/sys_vars/r/binlog_stmt_cache_size_basic_64.result: Updated the result file. mysql-test/suite/sys_vars/r/max_binlog_stmt_cache_size_basic.result: Updated the result file. mysql-test/suite/sys_vars/t/binlog_stmt_cache_size_basic_32.test: Added a test case to check the binlog_stmt_cache_size. mysql-test/suite/sys_vars/t/binlog_stmt_cache_size_basic_64.test: Added a test case to check the binlog_stmt_cache_size. mysql-test/suite/sys_vars/t/max_binlog_cache_size_func-master.opt: Removed because there is no test case max_binlog_cache_size_func. mysql-test/suite/sys_vars/t/max_binlog_stmt_cache_size_basic.test: Added a test case to check the system variable max_binlog_stmt_cache_size. sql/log.cc: There two main changes in here: . Changed the set_write_error() as an error message is set according to the type of the cache. . Created the function set_binlog_cache_info where references to the appropriate status and system variables are set and the server can smoothly compute statistics and set the maximum size for each cache. sql/log.h: Changed the signature of the function in order to identify the error message to be printed out as there is a different error code for each type of cache. sql/mysqld.cc: Added new status variables binlog_stmt_cache_use and binlog_stmt_cache_disk_use. sql/mysqld.h: Added new system variables max_binlog_stmt_cache_size and binlog_stmt_cache_size. sql/share/errmsg-utf8.txt: Added new error message related to the statement cache. sql/sys_vars.cc: Added new system variables max_binlog_stmt_cache_size and binlog_stmt_cache_size.
2010-11-05 18:42:37 +01:00
extern ulong binlog_stmt_cache_use, binlog_stmt_cache_disk_use;
MDEV-4991: GTID binlog indexing Improve the performance of slave connect using B+-Tree indexes on each binlog file. The index allows fast lookup of a GTID position to the corresponding offset in the binlog file, as well as lookup of a position to find the corresponding GTID position. This eliminates a costly sequential scan of the starting binlog file to find the GTID starting position when a slave connects. This is especially costly if the binlog file is not cached in memory (IO cost), or if it is encrypted or a lot of slaves connect simultaneously (CPU cost). The size of the index files is generally less than 1% of the binlog data, so not expected to be an issue. Most of the work writing the index is done as a background task, in the binlog background thread. This minimises the performance impact on transaction commit. A simple global mutex is used to protect index reads and (background) index writes; this is fine as slave connect is a relatively infrequent operation. Here are the user-visible options and status variables. The feature is on by default and is expected to need no tuning or configuration for most users. binlog_gtid_index On by default. Can be used to disable the indexes for testing purposes. binlog_gtid_index_page_size (default 4096) Page size to use for the binlog GTID index. This is the size of the nodes in the B+-tree used internally in the index. A very small page-size (64 is the minimum) will be less efficient, but can be used to stress the BTree-code during testing. binlog_gtid_index_span_min (default 65536) Control sparseness of the binlog GTID index. If set to N, at most one index record will be added for every N bytes of binlog file written. This can be used to reduce the number of records in the index, at the cost only of having to scan a few more events in the binlog file before finding the target position Two status variables are available to monitor the use of the GTID indexes: Binlog_gtid_index_hit Binlog_gtid_index_miss The "hit" status increments for each successful lookup in a GTID index. The "miss" increments when a lookup is not possible. This indicates that the index file is missing (eg. binlog written by old server version without GTID index support), or corrupt. Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
2023-09-08 13:12:49 +02:00
extern ulong binlog_gtid_index_hit, binlog_gtid_index_miss;
extern ulong aborted_threads, aborted_connects, aborted_connects_preauth;
extern ulong delayed_insert_timeout;
extern ulong delayed_insert_limit, delayed_queue_size;
extern ulong delayed_insert_threads, delayed_insert_writes;
extern ulong delayed_rows_in_use,delayed_insert_errors;
extern Atomic_counter<uint32_t> slave_open_temp_tables;
MDEV-31404 Implement binlog_space_limit binlog_space_limit is a variable in Percona server used to limit the total size of all binary logs. This implementation is based on code from Percona server 5.7. In MariaDB we decided to call the variable max-binlog-total-size to be similar to max-binlog-size. This makes it easier to find in the output from 'mariadbd --help --verbose'). MariaDB will also support binlog_space_limit for compatibility with Percona. Some internal notes to explain implementation notes: - When running MariaDB does not delete binary logs that are either used by slaves or have active xid that are not yet committed. Some implementation notes: - max-binlog-total-size is by default 0 (no limit). - max-binlog-total-size can be changed without server restart. - Binlog file sizes are checked on startup, or if max-binlog-total-size is set to a value > 0, not for every log write. The total size of all binary logs is cached and dynamically updated when updating the binary log on binary log rotation. - max-binlog-total-size is checked against existing log files during serverstart, binlog rotation, FLUSH LOGS, when writing to binary log or when max-binlog-total-size changes value. - Option --slave-connections-needed-for-purge with 1 as default added. This allows one to ensure that we do not delete binary logs if there is less than 'slave-connections-needed-for-purge' connected. Without this option max-binlog-total-size would potentially delete binlogs needed by slaves on server startup or when a slave disconnects as there are then no connected slaves to protect active binlogs. - PURGE BINARY LOGS TO ... will be executed as if slave-connectitons-needed-for-purge would be zero. In other words it will do the purge even if there is no slaves connected. If there are connected slaves working on the logs, these will be protected. - If binary log is on and max-binlog-total_size <> 0 then the status variable 'Binlog_disk_use' shows the current size of all old binary logs + the state of the current one. - Removed test of strcmp(log_file_name, log_info.log_file_name) in purge_logs_before_date() as this is tested in can_purge_logs() - To avoid expensive calls of log_in_use() we cache the result for the last log that is in use by a slave. Future calls to can_purge_logs() for this binary log will be quickly detected and false will be returned until a slave starts working on a new log. - Note that after a binary log rotation caused by max_binlog_size, the last log will not be purged directly as it is still in use internally. The next binary log write will purge binlogs if needed. Reviewer:Kristian Nielsen <knielsen@knielsen-hq.org>
2023-12-03 20:42:44 +01:00
extern Atomic_counter<ulonglong> sending_new_binlog_file;
extern uint slave_connections_needed_for_purge;
2011-12-02 19:49:05 +01:00
extern ulonglong query_cache_size;
extern ulong query_cache_limit;
2011-12-02 19:49:05 +01:00
extern ulong query_cache_min_res_unit;
extern ulong slow_launch_threads, slow_launch_time;
extern MYSQL_PLUGIN_IMPORT ulong max_connections;
extern uint max_digest_length;
extern ulong max_connect_errors, connect_timeout;
extern uint max_password_errors;
extern my_bool slave_allow_batching;
extern my_bool allow_slave_start;
extern LEX_CSTRING reason_slave_blocked;
extern ulong slave_trans_retries;
extern ulong slave_trans_retry_interval;
extern uint slave_net_timeout;
2011-11-22 18:04:38 +01:00
extern int max_user_connections;
extern ulong what_to_log,flush_time;
extern uint max_prepared_stmt_count, prepared_stmt_count;
2018-04-16 20:09:14 +02:00
extern MYSQL_PLUGIN_IMPORT ulong open_files_limit;
extern ulonglong binlog_cache_size, binlog_stmt_cache_size, binlog_file_cache_size;
2011-12-02 19:49:05 +01:00
extern ulonglong max_binlog_cache_size, max_binlog_stmt_cache_size;
MDEV-31404 Implement binlog_space_limit binlog_space_limit is a variable in Percona server used to limit the total size of all binary logs. This implementation is based on code from Percona server 5.7. In MariaDB we decided to call the variable max-binlog-total-size to be similar to max-binlog-size. This makes it easier to find in the output from 'mariadbd --help --verbose'). MariaDB will also support binlog_space_limit for compatibility with Percona. Some internal notes to explain implementation notes: - When running MariaDB does not delete binary logs that are either used by slaves or have active xid that are not yet committed. Some implementation notes: - max-binlog-total-size is by default 0 (no limit). - max-binlog-total-size can be changed without server restart. - Binlog file sizes are checked on startup, or if max-binlog-total-size is set to a value > 0, not for every log write. The total size of all binary logs is cached and dynamically updated when updating the binary log on binary log rotation. - max-binlog-total-size is checked against existing log files during serverstart, binlog rotation, FLUSH LOGS, when writing to binary log or when max-binlog-total-size changes value. - Option --slave-connections-needed-for-purge with 1 as default added. This allows one to ensure that we do not delete binary logs if there is less than 'slave-connections-needed-for-purge' connected. Without this option max-binlog-total-size would potentially delete binlogs needed by slaves on server startup or when a slave disconnects as there are then no connected slaves to protect active binlogs. - PURGE BINARY LOGS TO ... will be executed as if slave-connectitons-needed-for-purge would be zero. In other words it will do the purge even if there is no slaves connected. If there are connected slaves working on the logs, these will be protected. - If binary log is on and max-binlog-total_size <> 0 then the status variable 'Binlog_disk_use' shows the current size of all old binary logs + the state of the current one. - Removed test of strcmp(log_file_name, log_info.log_file_name) in purge_logs_before_date() as this is tested in can_purge_logs() - To avoid expensive calls of log_in_use() we cache the result for the last log that is in use by a slave. Future calls to can_purge_logs() for this binary log will be quickly detected and false will be returned until a slave starts working on a new log. - Note that after a binary log rotation caused by max_binlog_size, the last log will not be purged directly as it is still in use internally. The next binary log write will purge binlogs if needed. Reviewer:Kristian Nielsen <knielsen@knielsen-hq.org>
2023-12-03 20:42:44 +01:00
extern ulonglong internal_binlog_space_limit;
extern uint internal_slave_connections_needed_for_purge;
Made max_relay_log_size depending on master connection. Changed names of multi-source log files so that original suffixes are kept. include/my_sys.h: Added fn_ext2(), which returns pointer to last '.' in file name mysql-test/extra/rpl_tests/rpl_max_relay_size.test: Updated test mysql-test/suite/multi_source/info_logs-master.opt: Test with strange file names mysql-test/suite/multi_source/info_logs.result: Updated results mysql-test/suite/multi_source/info_logs.test: Changed to test with complex names to be able to verify the filename generator code mysql-test/suite/multi_source/relaylog_events.result: Updated results mysql-test/suite/multi_source/reset_slave.result: Updated results mysql-test/suite/multi_source/skip_counter.result: Updated results mysql-test/suite/multi_source/skip_counter.test: Added testing of max_relay_log_size mysql-test/suite/rpl/r/rpl_row_max_relay_size.result: Updated results mysql-test/suite/rpl/r/rpl_stm_max_relay_size.result: Updated results mysql-test/suite/sys_vars/r/max_relay_log_size_basic.result: Updated results mysql-test/suite/sys_vars/t/max_relay_log_size_basic.test: Updated results mysys/mf_fn_ext.c: Added fn_ext2(), which returns pointer to last '.' in file name sql/log.cc: Removed some wrong casts sql/log.h: Updated comment to reflect new code sql/log_event.cc: Updated DBUG_PRINT sql/mysqld.cc: Added that max_relay_log_size copies it's values from max_binlog_size sql/mysqld.h: Removed max_relay_log_size sql/rpl_mi.cc: Changed names of multi-source log files so that original suffixes are kept. sql/rpl_mi.h: Updated prototype sql/rpl_rli.cc: Updated comment to reflect new code Made max_relay_log_size depending on master connection. sql/rpl_rli.h: Made max_relay_log_size depending on master connection. sql/set_var.h: Made option global so that one can check and change min & max values (sorry Sergei) sql/sql_class.h: Made max_relay_log_size depending on master connection. sql/sql_repl.cc: Updated calls to create_signed_file_name() sql/sys_vars.cc: Made max_relay_log_size depending on master connection. Made old code more reusable sql/sys_vars.h: Changed Sys_var_multi_source_uint to ulong to be able to handle max_relay_log_size Made old code more reusable
2012-10-01 01:30:44 +02:00
extern ulong max_binlog_size;
extern ulong slave_max_allowed_packet;
extern ulonglong slave_max_statement_time;
extern double slave_max_statement_time_double;
extern ulong opt_binlog_rows_event_max_size;
extern ulong binlog_row_metadata;
MDEV-4991: GTID binlog indexing Improve the performance of slave connect using B+-Tree indexes on each binlog file. The index allows fast lookup of a GTID position to the corresponding offset in the binlog file, as well as lookup of a position to find the corresponding GTID position. This eliminates a costly sequential scan of the starting binlog file to find the GTID starting position when a slave connects. This is especially costly if the binlog file is not cached in memory (IO cost), or if it is encrypted or a lot of slaves connect simultaneously (CPU cost). The size of the index files is generally less than 1% of the binlog data, so not expected to be an issue. Most of the work writing the index is done as a background task, in the binlog background thread. This minimises the performance impact on transaction commit. A simple global mutex is used to protect index reads and (background) index writes; this is fine as slave connect is a relatively infrequent operation. Here are the user-visible options and status variables. The feature is on by default and is expected to need no tuning or configuration for most users. binlog_gtid_index On by default. Can be used to disable the indexes for testing purposes. binlog_gtid_index_page_size (default 4096) Page size to use for the binlog GTID index. This is the size of the nodes in the B+-tree used internally in the index. A very small page-size (64 is the minimum) will be less efficient, but can be used to stress the BTree-code during testing. binlog_gtid_index_span_min (default 65536) Control sparseness of the binlog GTID index. If set to N, at most one index record will be added for every N bytes of binlog file written. This can be used to reduce the number of records in the index, at the cost only of having to scan a few more events in the binlog file before finding the target position Two status variables are available to monitor the use of the GTID indexes: Binlog_gtid_index_hit Binlog_gtid_index_miss The "hit" status increments for each successful lookup in a GTID index. The "miss" increments when a lookup is not possible. This indicates that the index file is missing (eg. binlog written by old server version without GTID index support), or corrupt. Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
2023-09-08 13:12:49 +02:00
extern my_bool opt_binlog_gtid_index;
extern uint opt_binlog_gtid_index_page_size;
extern uint opt_binlog_gtid_index_span_min;
2019-03-01 19:36:29 +01:00
extern ulong thread_cache_size;
Fixed bug#11753187 (formerly known as bug 44585): SP_CACHE BEHAVES AS MEMORY LEAK. Background: - There are caches for stored functions and stored procedures (SP-cache); - There is no similar cache for events; - Triggers are cached together with TABLE objects; - Those SP-caches are per-session (i.e. specific to each session); - A stored routine is represented by a sp_head-instance internally; - SP-cache basically contains sp_head-objects of stored routines, which have been executed in a session; - sp_head-object is added into the SP-cache before the corresponding stored routine is executed; - SP-cache is flushed in the end of the session. The problem was that SP-cache might grow without any limit. Although this was not a pure memory leak (the SP-cache is flushed when session is closed), this is still a problem, because the user might take much memory by executing many stored routines. The patch fixes this problem in the least-intrusive way. A soft limit (similar to the size of table definition cache) is introduced. To represent such limit the new runtime configuration parameter 'stored_program_cache' is introduced. The value of this parameter is stored in the new global variable stored_program_cache_size that used to control the size of SP-cache to overflow. The parameter 'stored_program_cache' limits number of cached routines for each thread. It has the following min/default/max values given from support: min = 256, default = 256, max = 512 * 1024. Also it should be noted that this parameter limits the size of each cache (for stored procedures and for stored functions) separately. The SP-cache size is checked after top-level statement is parsed. If SP-cache size exceeds the limit specified by parameter 'stored_program_cache' then SP-cache is flushed and memory allocated for cache objects is freed. Such approach allows to flush cache safely when there are dependencies among stored routines. sql/mysqld.cc: Added global variable stored_program_cache_size to store value of configuration parameter 'stored-program-cache'. sql/mysqld.h: Added declaration of global variable stored_program_cache_size. sql/sp_cache.cc: Extended interface for sp_cache by adding helper routine sp_cache_enforce_limit to control size of stored routines cache for overflow. Also added method enforce_limit into class sp_cache that implements control of cache size for overflow. sql/sp_cache.h: Extended interface for sp_cache by adding standalone routine sp_cache_enforce_limit to control size of stored routines cache for overflow. sql/sql_parse.cc: Added flush of sp_cache after processing of next sql-statement received from a client. sql/sql_prepare.cc: Added flush of sp_cache after preparation/execution of next prepared sql-statement received from a client. sql/sys_vars.cc: Added support for configuration parameter stored-program-cache.
2012-01-25 10:59:30 +01:00
extern ulong stored_program_cache_size;
extern ulong opt_slave_parallel_threads;
MDEV-5657: Parallel replication. Clean up and improve the parallel implementation code, mainly related to scheduling of work to threads and handling of stop and errors. Fix a lot of bugs in various corner cases that could lead to crashes or corruption. Fix that a single replication domain could easily grab all worker threads and stall all other domains; now a configuration variable --slave-domain-parallel-threads allows to limit the number of workers. Allow next event group to start as soon as previous group begins the commit phase (as opposed to when it ends it); this allows multiple event groups on the slave to participate in group commit, even when no other opportunities for parallelism are available. Various fixes: - Fix some races in the rpl.rpl_parallel test case. - Fix an old incorrect assertion in Log_event iocache read. - Fix repeated malloc/free of wait_for_commit and rpl_group_info objects. - Simplify wait_for_commit wakeup logic. - Fix one case in queue_for_group_commit() where killing one thread would fail to correctly signal the error to the next, causing loss of the transaction after slave restart. - Fix leaking of pthreads (and their allocated stack) due to missing PTHREAD_CREATE_DETACHED attribute. - Fix how one batch of group-committed transactions wait for the previous batch before starting to execute themselves. The old code had a very complex scheduling where the first transaction was handled differently, with subtle bugs in corner cases. Now each event group is always scheduled for a new worker (in a round-robin fashion amongst available workers). Keep a count of how many transactions have started to commit, and wait for that counter to reach the appropriate value. - Fix slave stop to wait for all workers to actually complete processing; before, the wait was for update of last_committed_sub_id, which happens a bit earlier, and could leave worker threads potentially accessing bits of the replication state that is no longer valid after slave stop. - Fix a couple of places where the test suite would kill a thread waiting inside enter_cond() in connection with debug_sync; debug_sync + kill can crash in rare cases due to a race with mysys_var_current_mutex in this case. - Fix some corner cases where we had enter_cond() but no exit_cond(). - Fix that we could get failure in wait_for_prior_commit() but forget to flag the error with my_error(). - Fix slave stop (both for normal stop and stop due to error). Now, at stop we pick a specific safe point (in terms of event groups executed) and make sure that all event groups before that point are executed to completion, and that no event group after start executing; this ensures a safe place to restart replication, even for non-transactional stuff/DDL. In error stop, make sure that all prior event groups are allowed to execute to completion, and that any later event groups that have started are rolled back, if possible. The old code could leave eg. T1 and T3 committed but T2 not, or it could even leave half a transaction not rolled back in some random worker, which would cause big problems when that worker was later reused after slave restart. - Fix the accounting of amount of events queued for one worker. Before, the amount was reduced immediately as soon as the events were dequeued (which happens all at once); this allowed twice the amount of events to be queued in memory for each single worker, which is not what users would expect. - Fix that an error set during execution of one event was sometimes not cleared before executing the next, causing problems with the error reporting. - Fix incorrect handling of thd->killed in worker threads.
2014-02-26 15:02:09 +01:00
extern ulong opt_slave_domain_parallel_threads;
extern ulong opt_slave_parallel_max_queued;
extern ulong opt_slave_parallel_mode;
extern ulong opt_binlog_commit_wait_count;
extern ulong opt_binlog_commit_wait_usec;
extern my_bool opt_gtid_ignore_duplicates;
extern uint opt_gtid_cleanup_batch_size;
extern ulong back_log;
extern ulong executed_events;
extern char language[FN_REFLEN];
extern "C" MYSQL_PLUGIN_IMPORT ulong server_id;
extern ulong concurrency;
extern time_t server_start_time, flush_status_time;
extern char *opt_mysql_tmpdir, mysql_charsets_dir[];
extern size_t mysql_unpacked_real_data_home_len;
extern MYSQL_PLUGIN_IMPORT MY_TMPDIR mysql_tmpdir_list;
extern const char *first_keyword, *delayed_user, *slave_user;
extern MYSQL_PLUGIN_IMPORT const char *my_localhost;
extern MYSQL_PLUGIN_IMPORT const char **errmesg; /* Error messages */
extern const char *myisam_recover_options_str;
MDEV-31340 Remove MY_COLLATION_HANDLER::strcasecmp() This patch also fixes: MDEV-33050 Build-in schemas like oracle_schema are accent insensitive MDEV-33084 LASTVAL(t1) and LASTVAL(T1) do not work well with lower-case-table-names=0 MDEV-33085 Tables T1 and t1 do not work well with ENGINE=CSV and lower-case-table-names=0 MDEV-33086 SHOW OPEN TABLES IN DB1 -- is case insensitive with lower-case-table-names=0 MDEV-33088 Cannot create triggers in the database `MYSQL` MDEV-33103 LOCK TABLE t1 AS t2 -- alias is not case sensitive with lower-case-table-names=0 MDEV-33109 DROP DATABASE MYSQL -- does not drop SP with lower-case-table-names=0 MDEV-33110 HANDLER commands are case insensitive with lower-case-table-names=0 MDEV-33119 User is case insensitive in INFORMATION_SCHEMA.VIEWS MDEV-33120 System log table names are case insensitive with lower-cast-table-names=0 - Removing the virtual function strnncoll() from MY_COLLATION_HANDLER - Adding a wrapper function CHARSET_INFO::streq(), to compare two strings for equality. For now it calls strnncoll() internally. In the future it will turn into a virtual function. - Adding new accent sensitive case insensitive collations: - utf8mb4_general1400_as_ci - utf8mb3_general1400_as_ci They implement accent sensitive case insensitive comparison. The weight of a character is equal to the code point of its upper case variant. These collations use Unicode-14.0.0 casefolding data. The result of my_charset_utf8mb3_general1400_as_ci.strcoll() is very close to the former my_charset_utf8mb3_general_ci.strcasecmp() There is only a difference in a couple dozen rare characters, because: - the switch from "tolower" to "toupper" comparison, to make utf8mb3_general1400_as_ci closer to utf8mb3_general_ci - the switch from Unicode-3.0.0 to Unicode-14.0.0 This difference should be tolarable. See the list of affected characters in the MDEV description. Note, utf8mb4_general1400_as_ci correctly handles non-BMP characters! Unlike utf8mb4_general_ci, it does not treat all BMP characters as equal. - Adding classes representing names of the file based database objects: Lex_ident_db Lex_ident_table Lex_ident_trigger Their comparison collation depends on the underlying file system case sensitivity and on --lower-case-table-names and can be either my_charset_bin or my_charset_utf8mb3_general1400_as_ci. - Adding classes representing names of other database objects, whose names have case insensitive comparison style, using my_charset_utf8mb3_general1400_as_ci: Lex_ident_column Lex_ident_sys_var Lex_ident_user_var Lex_ident_sp_var Lex_ident_ps Lex_ident_i_s_table Lex_ident_window Lex_ident_func Lex_ident_partition Lex_ident_with_element Lex_ident_rpl_filter Lex_ident_master_info Lex_ident_host Lex_ident_locale Lex_ident_plugin Lex_ident_engine Lex_ident_server Lex_ident_savepoint Lex_ident_charset engine_option_value::Name - All the mentioned Lex_ident_xxx classes implement a method streq(): if (ident1.streq(ident2)) do_equal(); This method works as a wrapper for CHARSET_INFO::streq(). - Changing a lot of "LEX_CSTRING name" to "Lex_ident_xxx name" in class members and in function/method parameters. - Replacing all calls like system_charset_info->coll->strcasecmp(ident1, ident2) to ident1.streq(ident2) - Taking advantage of the c++11 user defined literal operator for LEX_CSTRING (see m_strings.h) and Lex_ident_xxx (see lex_ident.h) data types. Use example: const Lex_ident_column primary_key_name= "PRIMARY"_Lex_ident_column; is now a shorter version of: const Lex_ident_column primary_key_name= Lex_ident_column({STRING_WITH_LEN("PRIMARY")});
2023-04-26 13:27:01 +02:00
extern const Lex_ident_column in_left_expr_name, in_additional_cond, in_having_cond;
Reduce usage of strlen() Changes: - To detect automatic strlen() I removed the methods in String that uses 'const char *' without a length: - String::append(const char*) - Binary_string(const char *str) - String(const char *str, CHARSET_INFO *cs) - append_for_single_quote(const char *) All usage of append(const char*) is changed to either use String::append(char), String::append(const char*, size_t length) or String::append(LEX_CSTRING) - Added STRING_WITH_LEN() around constant string arguments to String::append() - Added overflow argument to escape_string_for_mysql() and escape_quotes_for_mysql() instead of returning (size_t) -1 on overflow. This was needed as most usage of the above functions never tested the result for -1 and would have given wrong results or crashes in case of overflows. - Added Item_func_or_sum::func_name_cstring(), which returns LEX_CSTRING. Changed all Item_func::func_name()'s to func_name_cstring()'s. The old Item_func_or_sum::func_name() is now an inline function that returns func_name_cstring().str. - Changed Item::mode_name() and Item::func_name_ext() to return LEX_CSTRING. - Changed for some functions the name argument from const char * to to const LEX_CSTRING &: - Item::Item_func_fix_attributes() - Item::check_type_...() - Type_std_attributes::agg_item_collations() - Type_std_attributes::agg_item_set_converter() - Type_std_attributes::agg_arg_charsets...() - Type_handler_hybrid_field_type::aggregate_for_result() - Type_handler_geometry::check_type_geom_or_binary() - Type_handler::Item_func_or_sum_illegal_param() - Predicant_to_list_comparator::add_value_skip_null() - Predicant_to_list_comparator::add_value() - cmp_item_row::prepare_comparators() - cmp_item_row::aggregate_row_elements_for_comparison() - Cursor_ref::print_func() - Removes String_space() as it was only used in one cases and that could be simplified to not use String_space(), thanks to the fixed my_vsnprintf(). - Added some const LEX_CSTRING's for common strings: - NULL_clex_str, DATA_clex_str, INDEX_clex_str. - Changed primary_key_name to a LEX_CSTRING - Renamed String::set_quick() to String::set_buffer_if_not_allocated() to clarify what the function really does. - Rename of protocol function: bool store(const char *from, CHARSET_INFO *cs) to bool store_string_or_null(const char *from, CHARSET_INFO *cs). This was done to both clarify the difference between this 'store' function and also to make it easier to find unoptimal usage of store() calls. - Added Protocol::store(const LEX_CSTRING*, CHARSET_INFO*) - Changed some 'const char*' arrays to instead be of type LEX_CSTRING. - class Item_func_units now used LEX_CSTRING for name. Other things: - Fixed a bug in mysql.cc:construct_prompt() where a wrong escape character in the prompt would cause some part of the prompt to be duplicated. - Fixed a lot of instances where the length of the argument to append is known or easily obtain but was not used. - Removed some not needed 'virtual' definition for functions that was inherited from the parent. I added override to these. - Fixed Ordered_key::print() to preallocate needed buffer. Old code could case memory overruns. - Simplified some loops when adding char * to a String with delimiters.
2020-08-12 19:29:55 +02:00
extern const LEX_CSTRING NULL_clex_str;
extern const LEX_CSTRING error_clex_str;
extern SHOW_VAR status_vars[];
extern struct system_variables max_system_variables;
extern struct system_status_var global_status_var;
2010-11-25 18:17:28 +01:00
extern struct my_rnd_struct sql_rand;
extern const char *opt_date_time_formats[];
extern handlerton *partition_hton;
extern handlerton *myisam_hton;
extern handlerton *heap_hton;
extern const char *load_default_groups[];
extern struct my_option my_long_options[];
int handle_early_options();
MDEV-22214 mariadbd.exe calls function mysqld.exe, and crashes Stop linking plugins to the server executable on Windows. Instead, extract whole server functionality into a large DLL, called server.dll. Link both plugins, and small server "stub" exe to it. This eliminates plugin dependency on the name of the server executable. It also reduces the size of the packages (since tiny mysqld.exe and mariadbd.exe are now both linked to one big DLL) Also, simplify the functionality of exporing all symbols from selected static libraries. Rely on WINDOWS_EXPORT_ALL_SYMBOLS, rather than old self-backed solution. fix compile error replace GetProcAddress(GetModuleHandle(NULL), "variable_name") for server exported data with actual variable names. Runtime loading was never required,was error prone , since symbols could be missing at runtime, and now it actually failed, because we do not export symbols from executable anymore, but from a shared library This did require a MYSQL_PLUGIN_IMPORT decoration for the plugin, but made the code more straightforward, and avoids missing symbols at runtime (as mentioned before). The audit plugin is still doing some dynamic loading, as it aims to work cross-version. Now it won't work cross-version on Windows, as it already uses some symbols that are *not* dynamically loaded, e.g fn_format and those symbols now exported from server.dll , when earlier they were exported by mysqld.exe Windows, fixes for storage engine plugin loading after various rebranding stuff Create server.dll containing functionality of the whole server make mariadbd.exe/mysqld.exe a stub that is only calling mysqld_main() fix build
2020-04-10 14:09:18 +02:00
extern int MYSQL_PLUGIN_IMPORT mysqld_server_started;
extern int mysqld_server_initialized;
extern "C" MYSQL_PLUGIN_IMPORT int orig_argc;
extern "C" MYSQL_PLUGIN_IMPORT char **orig_argv;
extern pthread_attr_t connection_attrib;
extern my_bool old_mode;
extern LEX_STRING opt_init_connect, opt_init_slave;
extern char err_shared_dir[];
extern ulong connection_errors_select;
extern ulong connection_errors_accept;
extern ulong connection_errors_tcpwrap;
extern ulong connection_errors_internal;
extern ulong connection_errors_max_connection;
extern ulong connection_errors_peer_addr;
extern ulong log_warnings;
extern my_bool encrypt_binlog;
extern my_bool encrypt_tmp_disk_tables, encrypt_tmp_files;
2014-12-22 15:53:17 +01:00
extern ulong encryption_algorithm;
extern const char *encryption_algorithm_names[];
extern long opt_secure_timestamp;
extern uint default_password_lifetime;
extern my_bool disconnect_on_expired_password;
enum secure_timestamp { SECTIME_NO, SECTIME_SUPER, SECTIME_REPL, SECTIME_YES };
bool is_set_timestamp_forbidden(THD *thd);
#ifdef HAVE_MMAP
extern PSI_mutex_key key_PAGE_lock, key_LOCK_sync, key_LOCK_active,
key_LOCK_pool, key_LOCK_pending_checkpoint;
#endif /* HAVE_MMAP */
extern PSI_mutex_key key_BINLOG_LOCK_index, key_BINLOG_LOCK_xid_list,
key_BINLOG_LOCK_binlog_background_thread,
key_LOCK_binlog_end_pos,
key_delayed_insert_mutex, key_hash_filo_lock, key_LOCK_active_mi,
key_LOCK_crypt, key_LOCK_delayed_create,
key_LOCK_delayed_insert, key_LOCK_delayed_status, key_LOCK_error_log,
Patch that refactors global read lock implementation and fixes bug #57006 "Deadlock between HANDLER and FLUSH TABLES WITH READ LOCK" and bug #54673 "It takes too long to get readlock for 'FLUSH TABLES WITH READ LOCK'". The first bug manifested itself as a deadlock which occurred when a connection, which had some table open through HANDLER statement, tried to update some data through DML statement while another connection tried to execute FLUSH TABLES WITH READ LOCK concurrently. What happened was that FTWRL in the second connection managed to perform first step of GRL acquisition and thus blocked all upcoming DML. After that it started to wait for table open through HANDLER statement to be flushed. When the first connection tried to execute DML it has started to wait for GRL/the second connection creating deadlock. The second bug manifested itself as starvation of FLUSH TABLES WITH READ LOCK statements in cases when there was a constant stream of concurrent DML statements (in two or more connections). This has happened because requests for protection against GRL which were acquired by DML statements were ignoring presence of pending GRL and thus the latter was starved. This patch solves both these problems by re-implementing GRL using metadata locks. Similar to the old implementation acquisition of GRL in new implementation is two-step. During the first step we block all concurrent DML and DDL statements by acquiring global S metadata lock (each DML and DDL statement acquires global IX lock for its duration). During the second step we block commits by acquiring global S lock in COMMIT namespace (commit code acquires global IX lock in this namespace). Note that unlike in old implementation acquisition of protection against GRL in DML and DDL is semi-automatic. We assume that any statement which should be blocked by GRL will either open and acquires write-lock on tables or acquires metadata locks on objects it is going to modify. For any such statement global IX metadata lock is automatically acquired for its duration. The first problem is solved because waits for GRL become visible to deadlock detector in metadata locking subsystem and thus deadlocks like one in the first bug become impossible. The second problem is solved because global S locks which are used for GRL implementation are given preference over IX locks which are acquired by concurrent DML (and we can switch to fair scheduling in future if needed). Important change: FTWRL/GRL no longer blocks DML and DDL on temporary tables. Before this patch behavior was not consistent in this respect: in some cases DML/DDL statements on temporary tables were blocked while in others they were not. Since the main use cases for FTWRL are various forms of backups and temporary tables are not preserved during backups we have opted for consistently allowing DML/DDL on temporary tables during FTWRL/GRL. Important change: This patch changes thread state names which are used when DML/DDL of FTWRL is waiting for global read lock. It is now either "Waiting for global read lock" or "Waiting for commit lock" depending on the stage on which FTWRL is. Incompatible change: To solve deadlock in events code which was exposed by this patch we have to replace LOCK_event_metadata mutex with metadata locks on events. As result we have to prohibit DDL on events under LOCK TABLES. This patch also adds extensive test coverage for interaction of DML/DDL and FTWRL. Performance of new and old global read lock implementations in sysbench tests were compared. There were no significant difference between new and old implementations. mysql-test/include/check_ftwrl_compatible.inc: Added helper script which allows to check that a statement is compatible with FLUSH TABLES WITH READ LOCK. mysql-test/include/check_ftwrl_incompatible.inc: Added helper script which allows to check that a statement is incompatible with FLUSH TABLES WITH READ LOCK. mysql-test/include/handler.inc: Adjusted test case to the fact that now DROP TABLE closes open HANDLERs for the table to be dropped before checking if there active FTWRL in this connection. mysql-test/include/wait_show_condition.inc: Fixed small error in the timeout message. The correct name of variable used as parameter for this script is "$condition" and not "$wait_condition". mysql-test/r/delayed.result: Added test coverage for scenario which triggered assert in metadata locking subsystem. mysql-test/r/events_2.result: Updated test results after prohibiting event DDL operations under LOCK TABLES. mysql-test/r/flush.result: Added test coverage for bug #57006 "Deadlock between HANDLER and FLUSH TABLES WITH READ LOCK". mysql-test/r/flush_read_lock.result: Added test coverage for various aspects of FLUSH TABLES WITH READ LOCK functionality. mysql-test/r/flush_read_lock_kill.result: Adjusted test case after replacing custom global read lock implementation with one based on metadata locks. Use new debug_sync point. Do not disable concurrent inserts as now InnoDB we always use InnoDB table. mysql-test/r/handler_innodb.result: Adjusted test case to the fact that now DROP TABLE closes open HANDLERs for the table to be dropped before checking if there active FTWRL in this connection. mysql-test/r/handler_myisam.result: Adjusted test case to the fact that now DROP TABLE closes open HANDLERs for the table to be dropped before checking if there active FTWRL in this connection. mysql-test/r/mdl_sync.result: Adjusted test case after replacing custom global read lock implementation with one based on metadata locks. Replaced usage of GRL-specific debug_sync's with appropriate sync points in MDL subsystem. mysql-test/suite/perfschema/r/dml_setup_instruments.result: Updated test results after removing global COND_global_read_lock condition variable. mysql-test/suite/perfschema/r/func_file_io.result: Ensure that this test doesn't affect subsequent tests. At the end of its execution enable back P_S instrumentation which this test disables at some point. mysql-test/suite/perfschema/r/func_mutex.result: Ensure that this test doesn't affect subsequent tests. At the end of its execution enable back P_S instrumentation which this test disables at some point. mysql-test/suite/perfschema/r/global_read_lock.result: Adjusted test case to take into account that new GRL implementation is based on MDL. mysql-test/suite/perfschema/r/server_init.result: Adjusted test case after replacing custom global read lock implementation with one based on MDL and replacing LOCK_event_metadata mutex with metadata lock. mysql-test/suite/perfschema/t/func_file_io.test: Ensure that this test doesn't affect subsequent tests. At the end of its execution enable back P_S instrumentation which this test disables at some point. mysql-test/suite/perfschema/t/func_mutex.test: Ensure that this test doesn't affect subsequent tests. At the end of its execution enable back P_S instrumentation which this test disables at some point. mysql-test/suite/perfschema/t/global_read_lock.test: Adjusted test case to take into account that new GRL implementation is based on MDL. mysql-test/suite/perfschema/t/server_init.test: Adjusted test case after replacing custom global read lock implementation with one based on MDL and replacing LOCK_event_metadata mutex with metadata lock. mysql-test/suite/rpl/r/rpl_tmp_table_and_DDL.result: Updated test results after prohibiting event DDL under LOCK TABLES. mysql-test/t/delayed.test: Added test coverage for scenario which triggered assert in metadata locking subsystem. mysql-test/t/events_2.test: Updated test case after prohibiting event DDL operations under LOCK TABLES. mysql-test/t/flush.test: Added test coverage for bug #57006 "Deadlock between HANDLER and FLUSH TABLES WITH READ LOCK". mysql-test/t/flush_block_commit.test: Adjusted test case after changing thread state name which is used when COMMIT waits for FLUSH TABLES WITH READ LOCK from "Waiting for release of readlock" to "Waiting for commit lock". mysql-test/t/flush_block_commit_notembedded.test: Adjusted test case after changing thread state name which is used when DML waits for FLUSH TABLES WITH READ LOCK. Now we use "Waiting for global read lock" in this case. mysql-test/t/flush_read_lock.test: Added test coverage for various aspects of FLUSH TABLES WITH READ LOCK functionality. mysql-test/t/flush_read_lock_kill-master.opt: We no longer need to use make_global_read_lock_block_commit_loop debug tag in this test. Instead we rely on an appropriate debug_sync point in MDL code. mysql-test/t/flush_read_lock_kill.test: Adjusted test case after replacing custom global read lock implementation with one based on metadata locks. Use new debug_sync point. Do not disable concurrent inserts as now InnoDB we always use InnoDB table. mysql-test/t/lock_multi.test: Adjusted test case after changing thread state names which are used when DML or DDL waits for FLUSH TABLES WITH READ LOCK to "Waiting for global read lock". mysql-test/t/mdl_sync.test: Adjusted test case after replacing custom global read lock implementation with one based on metadata locks. Replaced usage of GRL-specific debug_sync's with appropriate sync points in MDL subsystem. Updated thread state names which are used when DDL waits for FTWRL. mysql-test/t/trigger_notembedded.test: Adjusted test case after changing thread state names which are used when DML or DDL waits for FLUSH TABLES WITH READ LOCK to "Waiting for global read lock". sql/event_data_objects.cc: Removed Event_queue_element::status/last_executed_changed members and Event_queue_element::update_timing_fields() method. We no longer use this class for updating mysql.events once event is chosen for execution. Accesses to instances of this class in scheduler thread require protection by Event_queue::LOCK_event_queue mutex and we try to avoid updating table while holding this lock. sql/event_data_objects.h: Removed Event_queue_element::status/last_executed_changed members and Event_queue_element::update_timing_fields() method. We no longer use this class for updating mysql.events once event is chosen for execution. Accesses to instances of this class in scheduler thread require protection by Event_queue::LOCK_event_queue mutex and we try to avoid updating table while holding this lock. sql/event_db_repository.cc: - Changed Event_db_repository methods to not release all metadata locks once they are done updating mysql.events table. This allows to keep metadata lock protecting against GRL and lock protecting particular event around until corresponding DDL statement is written to the binary log. - Removed logic for conditional update of "status" and "last_executed" fields from update_timing_fields_for_event() method. In the only case when this method is called now "last_executed" is always modified and tracking change of "status" is too much hassle. sql/event_db_repository.h: Removed logic for conditional update of "status" and "last_executed" fields from Event_db_repository:: update_timing_fields_for_event() method. In the only case when this method is called now "last_executed" is always modified and tracking change of "status" field is too much hassle. sql/event_queue.cc: Changed event scheduler code not to update mysql.events table while holding Event_queue::LOCK_event_queue mutex. Doing so led to a deadlock with a new GRL implementation. This deadlock didn't occur with old implementation due to fact that code acquiring protection against GRL ignored pending GRL requests (which lead to GRL starvation). One of goals of new implementation is to disallow GRL starvation and so we have to solve problem with this deadlock in a different way. sql/events.cc: Changed methods of Events class to acquire protection against GRL while perfoming DDL statement and keep it until statement is written to the binary log. Unfortunately this step together with new GRL implementation exposed deadlock involving Events::LOCK_event_metadata and GRL. To solve it Events::LOCK_event_metadata mutex was replaced with a metadata lock on event. As a side-effect events DDL has to be prohibited under LOCK TABLES even in cases when mysql.events table was explicitly locked for write. sql/events.h: Replaced Events::LOCK_event_metadata mutex with a metadata lock on event. sql/ha_ndbcluster.cc: Updated code after replacing custom global read lock implementation with one based on MDL. Since MDL subsystem should now be able to detect deadlocks involving metadata locks and GRL there is no need for special handling of active GRL. sql/handler.cc: Replaced custom implementation of global read lock with one based on metadata locks. Consequently when doing commit instead of calling method of Global_read_lock class to acquire protection against GRL we simply acquire IX in COMMIT namespace. sql/lock.cc: Replaced custom implementation of global read lock with one based on metadata locks. This step allows to expose wait for GRL to deadlock detector of MDL subsystem and thus succesfully resolve deadlocks similar to one behind bug #57006 "Deadlock between HANDLER and FLUSH TABLES WITH READ LOCK". It also solves problem with GRL starvation described in bug #54673 "It takes too long to get readlock for 'FLUSH TABLES WITH READ LOCK'" since metadata locks used by GRL give preference to FTWRL statement instead of DML statements (if needed in future this can be changed to fair scheduling). Similar to old implementation of acquisition of GRL is two-step. During the first step we block all concurrent DML and DDL statements by acquiring global S metadata lock (each DML and DDL statement acquires global IX lock for its duration). During the second step we block commits by acquiring global S lock in COMMIT namespace (commit code acquires global IX lock in this namespace). Note that unlike in old implementation acquisition of protection against GRL in DML and DDL is semi-automatic. We assume that any statement which should be blocked by GRL will either open and acquires write-lock on tables or acquires metadata locks on objects it is going to modify. For any such statement global IX metadata lock is automatically acquired for its duration. To support this change: - Global_read_lock::lock/unlock_global_read_lock and make_global_read_lock_block_commit methods were changed accordingly. - Global_read_lock::wait_if_global_read_lock() and start_waiting_global_read_lock() methods were dropped. It is now responsibility of code acquiring metadata locks opening tables to acquire protection against GRL by explicitly taking global IX lock with statement duration. - Global variables, mutex and condition variable used by old implementation was removed. - lock_routine_name() was changed to use statement duration for its global IX lock. It was also renamed to lock_object_name() as it now also used to take metadata locks on events. - Global_read_lock::set_explicit_lock_duration() was added which allows not to release locks used for GRL when leaving prelocked mode. sql/lock.h: - Renamed lock_routine_name() to lock_object_name() and changed its signature to allow its usage for events. - Removed broadcast_refresh() function. It is no longer needed with new GRL implementation. sql/log_event.cc: Release metadata locks with statement duration at the end of processing legacy event for LOAD DATA. This ensures that replication thread processing such event properly releases its protection against global read lock. sql/mdl.cc: Changed MDL subsystem to support new MDL-based implementation of global read lock. Added COMMIT and EVENTS namespaces for metadata locks. Changed thread state name for GLOBAL namespace to "Waiting for global read lock". Optimized MDL_map::find_or_insert() method to avoid taking m_mutex mutex when looking up MDL_lock objects for GLOBAL or COMMIT namespaces. We keep pre-created MDL_lock objects for these namespaces around and simply return pointers to these global objects when needed. Changed MDL_lock/MDL_scoped_lock to properly handle notification of insert delayed handler threads when FTWRL takes global S lock. Introduced concept of lock duration. In addition to locks with transaction duration which work in the way which is similar to how locks worked before (i.e. they are released at the end of transaction), locks with statement and explicit duration were introduced. Locks with statement duration are automatically released at the end of statement. Locks with explicit duration require explicit release and obsolete concept of transactional sentinel. * Changed MDL_request and MDL_ticket classes to support notion of duration. * Changed MDL_context to keep locks with different duration in different lists. Changed code handling ticket list to take this into account. * Changed methods responsible for releasing locks to take into account duration of tickets. Particularly public MDL_context::release_lock() method now only can release tickets with explicit duration (there is still internal method which allows to specify duration). To release locks with statement or transaction duration one have to use release_statement/transactional_locks() methods. * Concept of savepoint for MDL subsystem now has to take into account locks with statement duration. Consequently MDL_savepoint class was introduced and methods working with savepoints were updated accordingly. * Added methods which allow to set duration for one or all locks in the context. sql/mdl.h: Changed MDL subsystem to support new MDL-based implementation of global read lock. Added COMMIT and EVENTS namespaces for metadata locks. Introduced concept of lock duration. In addition to locks with transaction duration which work in the way which is similar to how locks worked before (i.e. they are released at the end of transaction), locks with statement and explicit duration were introduced. Locks with statement duration are automatically released at the end of statement. Locks with explicit duration require explicit release and obsolete concept of transactional sentinel. * Changed MDL_request and MDL_ticket classes to support notion of duration. * Changed MDL_context to keep locks with different duration in different lists. Changed code handling ticket list to take this into account. * Changed methods responsible for releasing locks to take into account duration of tickets. Particularly public MDL_context::release_lock() method now only can release tickets with explicit duration (there is still internal method which allows to specify duration). To release locks with statement or transaction duration one have to use release_statement/transactional_locks() methods. * Concept of savepoint for MDL subsystem now has to take into account locks with statement duration. Consequently MDL_savepoint class was introduced and methods working with savepoints were updated accordingly. * Added methods which allow to set duration for one or all locks in the context. sql/mysqld.cc: Removed global mutex and condition variables which were used by old implementation of GRL. Also we no longer need to initialize Events::LOCK_event_metadata mutex as it was replaced with metadata locks on events. sql/mysqld.h: Removed global variable, mutex and condition variables which were used by old implementation of GRL. sql/rpl_rli.cc: When slave thread closes tables which were open for handling of RBR events ensure that it releases global IX lock which was acquired as protection against GRL. sql/sp.cc: Adjusted code to the new signature of lock_object/routine_name(), to the fact that one now needs specify duration of lock when initializing MDL_request and to the fact that savepoints for MDL subsystem are now represented by MDL_savepoint class. sql/sp_head.cc: Ensure that statements in stored procedures release statement metadata locks and thus release their protectiong against GRL in proper moment in time. Adjusted code to the fact that one now needs specify duration of lock when initializing MDL_request. sql/sql_admin.cc: Adjusted code to the fact that one now needs specify duration of lock when initializing MDL_request. sql/sql_base.cc: - Implemented support for new approach to acquiring protection against global read lock. We no longer acquire such protection explicitly on the basis of statement flags. Instead we always rely on code which is responsible for acquiring metadata locks on object to be changed acquiring this protection. This is achieved by acquiring global IX metadata lock with statement duration. Code doing this also responsible for checking that current connection has no active GRL by calling an Global_read_lock::can_acquire_protection() method. Changed code in open_table() and lock_table_names() accordingly. Note that as result of this change DDL and DML on temporary tables is always compatible with GRL (before it was incompatible in some cases and compatible in other cases). - To speed-up code acquiring protection against GRL introduced m_has_protection_against_grl member in Open_table_context class. It indicates that protection was already acquired sometime during open_tables() execution and new attempts can be skipped. - Thanks to new GRL implementation calls to broadcast_refresh() became unnecessary and were removed. - Adjusted code to the fact that one now needs specify duration of lock when initializing MDL_request and to the fact that savepoints for MDL subsystem are now represented by MDL_savepoint class. sql/sql_base.h: Adjusted code to the fact that savepoints for MDL subsystem are now represented by MDL_savepoint class. Also introduced Open_table_context::m_has_protection_against_grl member which allows to avoid acquiring protection against GRL while opening tables if such protection was already acquired. sql/sql_class.cc: Changed THD::leave_locked_tables_mode() after transactional sentinel for metadata locks was obsoleted by introduction of locks with explicit duration. sql/sql_class.h: - Adjusted code to the fact that savepoints for MDL subsystem are now represented by MDL_savepoint class. - Changed Global_read_lock class according to changes in global read lock implementation: * wait_if_global_read_lock and start_waiting_global_read_lock are now gone. Instead code needing protection against GRL has to acquire global IX metadata lock with statement duration itself. To help it new can_acquire_protection() was introduced. Also as result of the above change m_protection_count member is gone too. * Added m_mdl_blocks_commits_lock member to store metadata lock blocking commits. * Adjusted code to the fact that concept of transactional sentinel was obsoleted by concept of lock duration. - Removed CF_PROTECT_AGAINST_GRL flag as it is no longer necessary. New GRL implementation acquires protection against global read lock automagically when statement acquires metadata locks on tables or other objects it is going to change. sql/sql_db.cc: Adjusted code to the fact that one now needs specify duration of lock when initializing MDL_request. sql/sql_handler.cc: Removed call to broadcast_refresh() function. It is no longer needed with new GRL implementation. Adjusted code after introducing duration concept for metadata locks. Particularly to the fact transactional sentinel was replaced with explicit duration. sql/sql_handler.h: Renamed mysql_ha_move_tickets_after_trans_sentinel() to mysql_ha_set_explicit_lock_duration() after transactional sentinel was obsoleted by locks with explicit duration. sql/sql_insert.cc: Adjusted code handling delaying inserts after switching to new GRL implementation. Now connection thread initiating delayed insert has to acquire global IX lock in addition to metadata lock on table being inserted into. This IX lock protects against GRL and similarly to SW lock on table being inserted into has to be passed to handler thread in order to avoid deadlocks. sql/sql_lex.cc: LEX::protect_against_global_read_lock member is no longer necessary since protection against GRL is automatically taken by code acquiring metadata locks/opening tables. sql/sql_lex.h: LEX::protect_against_global_read_lock member is no longer necessary since protection against GRL is automatically taken by code acquiring metadata locks/opening tables. sql/sql_parse.cc: - Implemented support for new approach to acquiring protection against global read lock. We no longer acquire such protection explicitly on the basis of statement flags. Instead we always rely on code which is responsible for acquiring metadata locks on object to be changed acquiring this protection. This is achieved by acquiring global IX metadata lock with statement duration. This lock is automatically released at the end of statement execution. - Changed implementation of CREATE/DROP PROCEDURE/FUNCTION not to release metadata locks and thus protection against of GRL in the middle of statement execution. - Adjusted code to the fact that one now needs specify duration of lock when initializing MDL_request and to the fact that savepoints for MDL subsystem are now represented by MDL_savepoint class. sql/sql_prepare.cc: Adjusted code to the to the fact that savepoints for MDL subsystem are now represented by MDL_savepoint class. sql/sql_rename.cc: With new GRL implementation there is no need to explicitly acquire protection against GRL before renaming tables. This happens automatically in code which acquires metadata locks on tables being renamed. sql/sql_show.cc: Adjusted code to the fact that one now needs specify duration of lock when initializing MDL_request and to the fact that savepoints for MDL subsystem are now represented by MDL_savepoint class. sql/sql_table.cc: - With new GRL implementation there is no need to explicitly acquire protection against GRL before dropping tables. This happens automatically in code which acquires metadata locks on tables being dropped. - Changed mysql_alter_table() not to release lock on new table name explicitly and to rely on automatic release of locks at the end of statement instead. This was necessary since now MDL_context::release_lock() is supported only for locks for explicit duration. sql/sql_trigger.cc: With new GRL implementation there is no need to explicitly acquire protection against GRL before changing table triggers. This happens automatically in code which acquires metadata locks on tables which triggers are to be changed. sql/sql_update.cc: Fix bug exposed by GRL testing. During prepare phase acquire only S metadata locks instead of SW locks to keep prepare of multi-UPDATE compatible with concurrent LOCK TABLES WRITE and global read lock. sql/sql_view.cc: With new GRL implementation there is no need to explicitly acquire protection against GRL before creating view. This happens automatically in code which acquires metadata lock on view to be created. sql/sql_yacc.yy: LEX::protect_against_global_read_lock member is no longer necessary since protection against GRL is automatically taken by code acquiring metadata locks/opening tables. sql/table.cc: Adjusted code to the fact that one now needs specify duration of lock when initializing MDL_request. sql/table.h: Adjusted code to the fact that one now needs specify duration of lock when initializing MDL_request. sql/transaction.cc: Replaced custom implementation of global read lock with one based on metadata locks. Consequently when doing commit instead of calling method of Global_read_lock class to acquire protection against GRL we simply acquire IX in COMMIT namespace. Also adjusted code to the fact that MDL savepoint is now represented by MDL_savepoint class.
2010-11-11 18:11:05 +01:00
key_LOCK_gdl, key_LOCK_global_system_variables,
key_LOCK_logger, key_LOCK_manager,
key_LOCK_prepared_stmt_count,
key_LOCK_rpl_status, key_LOCK_server_started,
Changing all cost calculation to be given in milliseconds This makes it easier to compare different costs and also allows the optimizer to optimizer different storage engines more reliably. - Added tests/check_costs.pl, a tool to verify optimizer cost calculations. - Most engine costs has been found with this program. All steps to calculate the new costs are documented in Docs/optimizer_costs.txt - User optimizer_cost variables are given in microseconds (as individual costs can be very small). Internally they are stored in ms. - Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost (9 ms) to common SSD cost (400MB/sec). - Removed cost calculations for hard disks (rotation etc). - Changed the following handler functions to return IO_AND_CPU_COST. This makes it easy to apply different cost modifiers in ha_..time() functions for io and cpu costs. - scan_time() - rnd_pos_time() & rnd_pos_call_time() - keyread_time() - Enhanched keyread_time() to calculate the full cost of reading of a set of keys with a given number of ranges and optional number of blocks that need to be accessed. - Removed read_time() as keyread_time() + rnd_pos_time() can do the same thing and more. - Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks. Used heap table costs for json_table. The rest are using default engine costs. - Added the following new optimizer variables: - optimizer_disk_read_ratio - optimizer_disk_read_cost - optimizer_key_lookup_cost - optimizer_row_lookup_cost - optimizer_row_next_find_cost - optimizer_scan_cost - Moved all engine specific cost to OPTIMIZER_COSTS structure. - Changed costs to use 'records_out' instead of 'records_read' when recalculating costs. - Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h. This allows one to change costs without having to compile a lot of files. - Updated costs for filter lookup. - Use a better cost estimate in best_extension_by_limited_search() for the sorting cost. - Fixed previous issues with 'filtered' explain column as we are now using 'records_out' (min rows seen for table) to calculate filtering. This greatly simplifies the filtering code in JOIN_TAB::save_explain_data(). This change caused a lot of queries to be optimized differently than before, which exposed different issues in the optimizer that needs to be fixed. These fixes are in the following commits. To not have to change the same test case over and over again, the changes in the test cases are done in a single commit after all the critical change sets are done. InnoDB changes: - Updated InnoDB to not divide big range cost with 2. - Added cost for InnoDB (innobase_update_optimizer_costs()). - Don't mark clustered primary key with HA_KEYREAD_ONLY. This will prevent that the optimizer is trying to use index-only scans on the clustered key. - Disabled ha_innobase::scan_time() and ha_innobase::read_time() and ha_innobase::rnd_pos_time() as the default engine cost functions now works good for InnoDB. Other things: - Added --show-query-costs (\Q) option to mysql.cc to show the query cost after each query (good when working with query costs). - Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust the value that user is given. This is used to change cost from microseconds (user input) to milliseconds (what the server is internally using). - Added include/my_tracker.h ; Useful include file to quickly test costs of a function. - Use handler::set_table() in all places instead of 'table= arg'. - Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and shown in microseconds for the user but stored as milliseconds. This is to make the numbers easier to read for the user (less pre-zeros). Implemented in 'Sys_var_optimizer_cost' class. - In test_quick_select() do not use index scans if 'no_keyread' is set for the table. This is what we do in other places of the server. - Added THD parameter to Unique::get_use_cost() and check_index_intersect_extension() and similar functions to be able to provide costs to called functions. - Changed 'records' to 'rows' in optimizer_trace. - Write more information to optimizer_trace. - Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3) to calculate usage space of keys in b-trees. (Before we used numeric constants). - Removed code that assumed that b-trees has similar costs as binary trees. Replaced with engine calls that returns the cost. - Added Bitmap::find_first_bit() - Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia). - Added records_init and records_after_filter to POSITION to remember more of what best_access_patch() calculates. - table_after_join_selectivity() changed to recalculate 'records_out' based on the new fields from best_access_patch() Bug fixes: - Some queries did not update last_query_cost (was 0). Fixed by moving setting thd->...last_query_cost in JOIN::optimize(). - Write '0' as number of rows for const tables with a matching row. Some internals: - Engine cost are stored in OPTIMIZER_COSTS structure. When a handlerton is created, we also created a new cost variable for the handlerton. We also create a new variable if the user changes a optimizer cost for a not yet loaded handlerton either with command line arguments or with SET @@global.engine.optimizer_cost_variable=xx. - There are 3 global OPTIMIZER_COSTS variables: default_optimizer_costs The default costs + changes from the command line without an engine specifier. heap_optimizer_costs Heap table costs, used for temporary tables tmp_table_optimizer_costs The cost for the default on disk internal temporary table (MyISAM or Aria) - The engine cost for a table is stored in table_share. To speed up accesses the handler has a pointer to this. The cost is copied to the table on first access. If one wants to change the cost one must first update the global engine cost and then do a FLUSH TABLES. This was done to be able to access the costs for an open table without any locks. - When a handlerton is created, the cost are updated the following way: See sql/keycaches.cc for details: - Use 'default_optimizer_costs' as a base - Call hton->update_optimizer_costs() to override with the engines default costs. - Override the costs that the user has specified for the engine. - One handler open, copy the engine cost from handlerton to TABLE_SHARE. - Call handler::update_optimizer_costs() to allow the engine to update cost for this particular table. - There are two costs stored in THD. These are copied to the handler when the table is used in a query: - optimizer_where_cost - optimizer_scan_setup_cost - Simply code in best_access_path() by storing all cost result in a structure. (Idea/Suggestion by Igor)
2022-08-11 12:05:23 +02:00
key_LOCK_status, key_LOCK_optimizer_costs,
key_LOCK_thd_data, key_LOCK_thd_kill,
MDEV-4991: GTID binlog indexing Improve the performance of slave connect using B+-Tree indexes on each binlog file. The index allows fast lookup of a GTID position to the corresponding offset in the binlog file, as well as lookup of a position to find the corresponding GTID position. This eliminates a costly sequential scan of the starting binlog file to find the GTID starting position when a slave connects. This is especially costly if the binlog file is not cached in memory (IO cost), or if it is encrypted or a lot of slaves connect simultaneously (CPU cost). The size of the index files is generally less than 1% of the binlog data, so not expected to be an issue. Most of the work writing the index is done as a background task, in the binlog background thread. This minimises the performance impact on transaction commit. A simple global mutex is used to protect index reads and (background) index writes; this is fine as slave connect is a relatively infrequent operation. Here are the user-visible options and status variables. The feature is on by default and is expected to need no tuning or configuration for most users. binlog_gtid_index On by default. Can be used to disable the indexes for testing purposes. binlog_gtid_index_page_size (default 4096) Page size to use for the binlog GTID index. This is the size of the nodes in the B+-tree used internally in the index. A very small page-size (64 is the minimum) will be less efficient, but can be used to stress the BTree-code during testing. binlog_gtid_index_span_min (default 65536) Control sparseness of the binlog GTID index. If set to N, at most one index record will be added for every N bytes of binlog file written. This can be used to reduce the number of records in the index, at the cost only of having to scan a few more events in the binlog file before finding the target position Two status variables are available to monitor the use of the GTID indexes: Binlog_gtid_index_hit Binlog_gtid_index_miss The "hit" status increments for each successful lookup in a GTID index. The "miss" increments when a lookup is not possible. This indicates that the index file is missing (eg. binlog written by old server version without GTID index support), or corrupt. Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
2023-09-08 13:12:49 +02:00
key_LOCK_user_conn, key_LOG_LOCK_log, key_gtid_index_lock,
key_master_info_data_lock, key_master_info_run_lock,
key_master_info_sleep_lock, key_master_info_start_stop_lock,
MDEV-11675 Lag Free Alter On Slave This commit implements two phase binloggable ALTER. When a new @@session.binlog_alter_two_phase = YES ALTER query gets logged in two parts, the START ALTER and the COMMIT or ROLLBACK ALTER. START Alter is written in binlog as soon as necessary locks have been acquired for the table. The timing is such that any concurrent DML:s that update the same table are either committed, thus logged into binary log having done work on the old version of the table, or will be queued for execution on its new version. The "COMPLETE" COMMIT or ROLLBACK ALTER are written at the very point of a normal "single-piece" ALTER that is after the most of the query work is done. When its result is positive COMMIT ALTER is written, otherwise ROLLBACK ALTER is written with specific error happened after START ALTER phase. Replication of two-phase binloggable ALTER is cross-version safe. Specifically the OLD slave merely does not recognized the start alter part, still being able to process and memorize its gtid. Two phase logged ALTER is read from binlog by mysqlbinlog to produce BINLOG 'string', where 'string' contains base64 encoded Query_log_event containing either the start part of ALTER, or a completion part. The Query details can be displayed with `-v` flag, similarly to ROW format events. Notice, mysqlbinlog output containing parts of two-phase binloggable ALTER is processable correctly only by binlog_alter_two_phase server. @@log_warnings > 2 can reveal details of binlogging and slave side processing of the ALTER parts. The current commit also carries fixes to the following list of reported bugs: MDEV-27511, MDEV-27471, MDEV-27349, MDEV-27628, MDEV-27528. Thanks to all people involved into early discussion of the feature including Kristian Nielsen, those who helped to design, implement and test: Sergei Golubchik, Andrei Elkin who took the burden of the implemenation completion, Sujatha Sivakumar, Brandon Nesterenko, Alice Sherepa, Ramesh Sivaraman, Jan Lindstrom.
2021-01-29 12:59:14 +01:00
key_master_info_start_alter_lock,
key_master_info_start_alter_list_lock,
key_mutex_slave_reporting_capability_err_lock, key_relay_log_info_data_lock,
key_relay_log_info_log_space_lock, key_relay_log_info_run_lock,
Fixes for parallel slave: - Made slaves temporary table multi-thread slave safe by adding mutex around save_temporary_table usage. - rli->save_temporary_tables is the active list of all used temporary tables - This is copied to THD->temporary_tables when temporary tables are opened and updated when temporary tables are closed - Added THD->lock_temporary_tables() and THD->unlock_temporary_tables() to simplify this. - Relay_log_info->sql_thd renamed to Relay_log_info->sql_driver_thd to avoid wrong usage for merged code. - Added is_part_of_group() to mark functions that are part of the next function. This replaces setting IN_STMT when events are executed. - Added is_begin(), is_commit() and is_rollback() functions to Query_log_event to simplify code. - If slave_skip_counter is set run things in single threaded mode. This simplifies code for skipping events. - Updating state of relay log (IN_STMT and IN_TRANSACTION) is moved to one single function: update_state_of_relay_log() We can't use OPTION_BEGIN to check for the state anymore as the sql_driver and sql execution threads may be different. Clear IN_STMT and IN_TRANSACTION in init_relay_log_pos() and Relay_log_info::cleanup_context() to ensure the flags doesn't survive slave restarts is_in_group() is now independent of state of executed transaction. - Reset thd->transaction.all.modified_non_trans_table() if we did set it for single table row events. This was mainly for keeping the flag as documented. - Changed slave_open_temp_tables to uint32 to be able to use atomic operators on it. - Relay_log_info::sleep_lock -> rpl_group_info::sleep_lock - Relay_log_info::sleep_cond -> rpl_group_info::sleep_cond - Changed some functions to take rpl_group_info instead of Relay_log_info to make them multi-slave safe and to simplify usage - do_shall_skip() - continue_group() - sql_slave_killed() - next_event() - Simplifed arguments to io_salve_killed(), check_io_slave_killed() and sql_slave_killed(); No reason to supply THD as this is part of the given structure. - set_thd_in_use_temporary_tables() removed as in_use is set on usage - Added information to thd_proc_info() which thread is waiting for slave mutex to exit. - In open_table() reuse code from find_temporary_table() Other things: - More DBUG statements - Fixed the rpl_incident.test can be run with --debug - More comments - Disabled not used function rpl_connect_master() mysql-test/suite/perfschema/r/all_instances.result: Moved sleep_lock and sleep_cond to rpl_group_info mysql-test/suite/rpl/r/rpl_incident.result: Updated result mysql-test/suite/rpl/t/rpl_incident-master.opt: Not needed anymore mysql-test/suite/rpl/t/rpl_incident.test: Fixed that test can be run with --debug sql/handler.cc: More DBUG_PRINT sql/log.cc: More comments sql/log_event.cc: Added DBUG statements do_shall_skip(), continue_group() now takes rpl_group_info param Use is_begin(), is_commit() and is_rollback() functions instead of inspecting query string We don't have set slaves temporary tables 'in_use' as this is now done when tables are opened. Removed IN_STMT flag setting. This is now done in update_state_of_relay_log() Use IN_TRANSACTION flag to test state of relay log. In rows_event_stmt_cleanup() reset thd->transaction.all.modified_non_trans_table if we had set this before. sql/log_event.h: do_shall_skip(), continue_group() now takes rpl_group_info param Added is_part_of_group() to mark events that are part of the next event. This replaces setting IN_STMT when events are executed. Added is_begin(), is_commit() and is_rollback() functions to Query_log_event to simplify code. sql/log_event_old.cc: Removed IN_STMT flag setting. This is now done in update_state_of_relay_log() do_shall_skip(), continue_group() now takes rpl_group_info param sql/log_event_old.h: Added is_part_of_group() to mark events that are part of the next event. do_shall_skip(), continue_group() now takes rpl_group_info param sql/mysqld.cc: Changed slave_open_temp_tables to uint32 to be able to use atomic operators on it. Relay_log_info::sleep_lock -> Rpl_group_info::sleep_lock Relay_log_info::sleep_cond -> Rpl_group_info::sleep_cond sql/mysqld.h: Updated types and names sql/rpl_gtid.cc: More DBUG sql/rpl_parallel.cc: Updated TODO section Set thd for event that is execution Use new is_begin(), is_commit() and is_rollback() functions. More comments sql/rpl_rli.cc: sql_thd -> sql_driver_thd Relay_log_info::sleep_lock -> rpl_group_info::sleep_lock Relay_log_info::sleep_cond -> rpl_group_info::sleep_cond Clear IN_STMT and IN_TRANSACTION in init_relay_log_pos() and Relay_log_info::cleanup_context() to ensure the flags doesn't survive slave restarts. Reset table->in_use for temporary tables as the table may have been used by another THD. Use IN_TRANSACTION instead of OPTION_BEGIN to check state of relay log. Removed IN_STMT flag setting. This is now done in update_state_of_relay_log() sql/rpl_rli.h: Changed relay log state flags to bit masks instead of bit positions (most other code we have uses bit masks) Added IN_TRANSACTION to mark if we are in a BEGIN ... COMMIT section. save_temporary_tables is now thread safe Relay_log_info::sleep_lock -> rpl_group_info::sleep_lock Relay_log_info::sleep_cond -> rpl_group_info::sleep_cond Relay_log_info->sql_thd renamed to Relay_log_info->sql_driver_thd to avoid wrong usage for merged code is_in_group() is now independent of state of executed transaction. sql/slave.cc: Simplifed arguments to io_salve_killed(), sql_slave_killed() and check_io_slave_killed(); No reason to supply THD as this is part of the given structure. set_thd_in_use_temporary_tables() removed as in_use is set on usage in sql_base.cc sql_thd -> sql_driver_thd More DBUG Added update_state_of_relay_log() which will calculate the IN_STMT and IN_TRANSACTION state of the relay log after the current element is executed. If slave_skip_counter is set run things in single threaded mode. Simplifed arguments to io_salve_killed(), check_io_slave_killed() and sql_slave_killed(); No reason to supply THD as this is part of the given structure. Added information to thd_proc_info() which thread is waiting for slave mutex to exit. Disabled not used function rpl_connect_master() Updated argument to next_event() sql/sql_base.cc: Added mutex around usage of slave's temporary tables. The active list is always kept up to date in sql->rgi_slave->save_temporary_tables. Clear thd->temporary_tables after query (safety) More DBUG When using temporary table, set table->in_use to current thd as the THD may be different for slave threads. Some code is ifdef:ed with REMOVE_AFTER_MERGE_WITH_10 as the given code in 10.0 is not yet in this tree. In open_table() reuse code from find_temporary_table() sql/sql_binlog.cc: rli->sql_thd -> rli->sql_driver_thd Remove duplicate setting of rgi->rli sql/sql_class.cc: Added helper functions rgi_lock_temporary_tables() and rgi_unlock_temporary_tables() Would have been nicer to have these inline, but there was no easy way to do that sql/sql_class.h: Added functions to protect slaves temporary tables sql/sql_parse.cc: Added DBUG_PRINT sql/transaction.cc: Added comment
2013-10-13 23:24:05 +02:00
key_rpl_group_info_sleep_lock,
key_structure_guard_mutex, key_TABLE_SHARE_LOCK_ha_data,
MDEV-29693 ANALYZE TABLE still flushes table definition cache when engine-independent statistics is used This commits enables reloading of engine-independent statistics without flushing the table from table definition cache. This is achieved by allowing multiple version of the TABLE_STATISTICS_CB object and having independent pointers to it in TABLE and TABLE_SHARE. The TABLE_STATISTICS_CB object have reference pointers and are freed when no one is pointing to it anymore. TABLE's TABLE_STATISTICS_CB pointer is updated to use the TABLE_SHARE's pointer when read_statistics_for_tables() is called at the beginning of a query. Main changes: - read_statistics_for_table() will allocate an new TABLE_STATISTICS_CB object. - All get_stat_values() functions has a new parameter that tells where collected data should be stored. get_stat_values() are not using the table_field object anymore to store data. - All get_stat_values() functions returns 1 if they found any data in the statistics tables. Other things: - Fixed INSERT DELAYED to not read statistics tables. - Removed Statistics_state from TABLE_STATISTICS_CB as this is not needed anymore as wer are not changing TABLE_SHARE->stats_cb while calculating or loading statistics. - Store values used with store_from_statistical_minmax_field() in TABLE_STATISTICS_CB::mem_root. This allowed me to remove the function delete_stat_values_for_table_share(). - Field_blob::store_from_statistical_minmax_field() is implemented but is not normally used as we do not yet support EIS statistics for blobs. For example Field_blob::update_min() and Field_blob::update_max() are not implemented. Note that the function can be called if there is an concurrent "ALTER TABLE MODIFY field BLOB" running because of a bug in ALTER TABLE where it deletes entries from column_stats before it has an exclusive lock on the table. - Use result of field->val_str(&val) as a pointer to the result instead of val (safetly fix). - Allocate memory for collected statistics in THD::mem_root, not in in TABLE::mem_root. This could cause the TABLE object to grow if a ANALYZE TABLE was run many times on the same table. This was done in allocate_statistics_for_table(), create_min_max_statistical_fields_for_table() and create_min_max_statistical_fields_for_table_share(). - Store in TABLE_STATISTICS_CB::stats_available which statistics was found in the statistics tables. - Removed index_table from class Index_prefix_calc as it was not used. - Added TABLE_SHARE::LOCK_statistics to ensure we don't load EITS in parallel. First thread will load it, others will reuse the loaded data. - Eliminate read_histograms_for_table(). The loading happens within read_statistics_for_tables() if histograms are needed. One downside is that if we have read statistics without histograms before and someone requires histograms, we have to read all statistics again (once) from the statistics tables. A smaller downside is the need to call alloc_root() for each individual histogram. Before we could allocate all the space for histograms with a single alloc_root. - Fixed bug in MyISAM and Aria where they did not properly notice that table had changed after analyze table. This was not a problem before this patch as then the MyISAM and Aria tables where flushed as part of ANALYZE table which did hide this issue. - Fixed a bug in ANALYZE table where table->records could be seen as 0 in collect_statistics_for_table(). The effect of this unlikely bug was that a full table scan could be done even if analyze_sample_percentage was not set to 1. - Changed multiple mallocs in a row to use multi_alloc_root(). - Added a mutex protection in update_statistics_for_table() to ensure that several tables are not updating the statistics at the same time. Some of the changes in sql_statistics.cc are based on a patch from Oleg Smirnov <olernov@gmail.com> Co-authored-by: Oleg Smirnov <olernov@gmail.com> Co-authored-by: Vicentiu Ciorbaru <cvicentiu@gmail.com> Reviewer: Sergei Petrunia <sergey@mariadb.com>
2023-08-05 00:08:05 +02:00
key_TABLE_SHARE_LOCK_statistics,
key_LOCK_start_thread,
key_LOCK_error_messages,
key_PARTITION_LOCK_auto_inc;
extern PSI_mutex_key key_RELAYLOG_LOCK_index;
extern PSI_mutex_key key_LOCK_relaylog_end_pos;
extern PSI_mutex_key key_LOCK_slave_state, key_LOCK_binlog_state,
key_LOCK_rpl_thread, key_LOCK_rpl_thread_pool, key_LOCK_parallel_entry;
extern PSI_mutex_key key_TABLE_SHARE_LOCK_share, key_LOCK_stats,
2011-04-25 17:22:25 +02:00
key_LOCK_global_user_client_stats, key_LOCK_global_table_stats,
key_LOCK_global_index_stats, key_LOCK_wakeup_ready, key_LOCK_wait_commit,
key_TABLE_SHARE_LOCK_rotation;
extern PSI_mutex_key key_LOCK_gtid_waiting;
2011-04-25 17:22:25 +02:00
extern PSI_rwlock_key key_rwlock_LOCK_grant, key_rwlock_LOCK_logger,
key_rwlock_LOCK_sys_init_connect, key_rwlock_LOCK_sys_init_slave,
key_rwlock_LOCK_system_variables_hash, key_rwlock_query_cache_query_lock,
key_LOCK_SEQUENCE,
key_rwlock_LOCK_vers_stats, key_rwlock_LOCK_stat_serial,
key_rwlock_THD_list;
#ifdef HAVE_MMAP
2011-10-25 12:53:40 +02:00
extern PSI_cond_key key_PAGE_cond, key_COND_active, key_COND_pool;
#endif /* HAVE_MMAP */
extern PSI_cond_key key_BINLOG_COND_xid_list, key_BINLOG_update_cond,
key_BINLOG_COND_binlog_background_thread,
key_BINLOG_COND_binlog_background_thread_end,
Patch that refactors global read lock implementation and fixes bug #57006 "Deadlock between HANDLER and FLUSH TABLES WITH READ LOCK" and bug #54673 "It takes too long to get readlock for 'FLUSH TABLES WITH READ LOCK'". The first bug manifested itself as a deadlock which occurred when a connection, which had some table open through HANDLER statement, tried to update some data through DML statement while another connection tried to execute FLUSH TABLES WITH READ LOCK concurrently. What happened was that FTWRL in the second connection managed to perform first step of GRL acquisition and thus blocked all upcoming DML. After that it started to wait for table open through HANDLER statement to be flushed. When the first connection tried to execute DML it has started to wait for GRL/the second connection creating deadlock. The second bug manifested itself as starvation of FLUSH TABLES WITH READ LOCK statements in cases when there was a constant stream of concurrent DML statements (in two or more connections). This has happened because requests for protection against GRL which were acquired by DML statements were ignoring presence of pending GRL and thus the latter was starved. This patch solves both these problems by re-implementing GRL using metadata locks. Similar to the old implementation acquisition of GRL in new implementation is two-step. During the first step we block all concurrent DML and DDL statements by acquiring global S metadata lock (each DML and DDL statement acquires global IX lock for its duration). During the second step we block commits by acquiring global S lock in COMMIT namespace (commit code acquires global IX lock in this namespace). Note that unlike in old implementation acquisition of protection against GRL in DML and DDL is semi-automatic. We assume that any statement which should be blocked by GRL will either open and acquires write-lock on tables or acquires metadata locks on objects it is going to modify. For any such statement global IX metadata lock is automatically acquired for its duration. The first problem is solved because waits for GRL become visible to deadlock detector in metadata locking subsystem and thus deadlocks like one in the first bug become impossible. The second problem is solved because global S locks which are used for GRL implementation are given preference over IX locks which are acquired by concurrent DML (and we can switch to fair scheduling in future if needed). Important change: FTWRL/GRL no longer blocks DML and DDL on temporary tables. Before this patch behavior was not consistent in this respect: in some cases DML/DDL statements on temporary tables were blocked while in others they were not. Since the main use cases for FTWRL are various forms of backups and temporary tables are not preserved during backups we have opted for consistently allowing DML/DDL on temporary tables during FTWRL/GRL. Important change: This patch changes thread state names which are used when DML/DDL of FTWRL is waiting for global read lock. It is now either "Waiting for global read lock" or "Waiting for commit lock" depending on the stage on which FTWRL is. Incompatible change: To solve deadlock in events code which was exposed by this patch we have to replace LOCK_event_metadata mutex with metadata locks on events. As result we have to prohibit DDL on events under LOCK TABLES. This patch also adds extensive test coverage for interaction of DML/DDL and FTWRL. Performance of new and old global read lock implementations in sysbench tests were compared. There were no significant difference between new and old implementations. mysql-test/include/check_ftwrl_compatible.inc: Added helper script which allows to check that a statement is compatible with FLUSH TABLES WITH READ LOCK. mysql-test/include/check_ftwrl_incompatible.inc: Added helper script which allows to check that a statement is incompatible with FLUSH TABLES WITH READ LOCK. mysql-test/include/handler.inc: Adjusted test case to the fact that now DROP TABLE closes open HANDLERs for the table to be dropped before checking if there active FTWRL in this connection. mysql-test/include/wait_show_condition.inc: Fixed small error in the timeout message. The correct name of variable used as parameter for this script is "$condition" and not "$wait_condition". mysql-test/r/delayed.result: Added test coverage for scenario which triggered assert in metadata locking subsystem. mysql-test/r/events_2.result: Updated test results after prohibiting event DDL operations under LOCK TABLES. mysql-test/r/flush.result: Added test coverage for bug #57006 "Deadlock between HANDLER and FLUSH TABLES WITH READ LOCK". mysql-test/r/flush_read_lock.result: Added test coverage for various aspects of FLUSH TABLES WITH READ LOCK functionality. mysql-test/r/flush_read_lock_kill.result: Adjusted test case after replacing custom global read lock implementation with one based on metadata locks. Use new debug_sync point. Do not disable concurrent inserts as now InnoDB we always use InnoDB table. mysql-test/r/handler_innodb.result: Adjusted test case to the fact that now DROP TABLE closes open HANDLERs for the table to be dropped before checking if there active FTWRL in this connection. mysql-test/r/handler_myisam.result: Adjusted test case to the fact that now DROP TABLE closes open HANDLERs for the table to be dropped before checking if there active FTWRL in this connection. mysql-test/r/mdl_sync.result: Adjusted test case after replacing custom global read lock implementation with one based on metadata locks. Replaced usage of GRL-specific debug_sync's with appropriate sync points in MDL subsystem. mysql-test/suite/perfschema/r/dml_setup_instruments.result: Updated test results after removing global COND_global_read_lock condition variable. mysql-test/suite/perfschema/r/func_file_io.result: Ensure that this test doesn't affect subsequent tests. At the end of its execution enable back P_S instrumentation which this test disables at some point. mysql-test/suite/perfschema/r/func_mutex.result: Ensure that this test doesn't affect subsequent tests. At the end of its execution enable back P_S instrumentation which this test disables at some point. mysql-test/suite/perfschema/r/global_read_lock.result: Adjusted test case to take into account that new GRL implementation is based on MDL. mysql-test/suite/perfschema/r/server_init.result: Adjusted test case after replacing custom global read lock implementation with one based on MDL and replacing LOCK_event_metadata mutex with metadata lock. mysql-test/suite/perfschema/t/func_file_io.test: Ensure that this test doesn't affect subsequent tests. At the end of its execution enable back P_S instrumentation which this test disables at some point. mysql-test/suite/perfschema/t/func_mutex.test: Ensure that this test doesn't affect subsequent tests. At the end of its execution enable back P_S instrumentation which this test disables at some point. mysql-test/suite/perfschema/t/global_read_lock.test: Adjusted test case to take into account that new GRL implementation is based on MDL. mysql-test/suite/perfschema/t/server_init.test: Adjusted test case after replacing custom global read lock implementation with one based on MDL and replacing LOCK_event_metadata mutex with metadata lock. mysql-test/suite/rpl/r/rpl_tmp_table_and_DDL.result: Updated test results after prohibiting event DDL under LOCK TABLES. mysql-test/t/delayed.test: Added test coverage for scenario which triggered assert in metadata locking subsystem. mysql-test/t/events_2.test: Updated test case after prohibiting event DDL operations under LOCK TABLES. mysql-test/t/flush.test: Added test coverage for bug #57006 "Deadlock between HANDLER and FLUSH TABLES WITH READ LOCK". mysql-test/t/flush_block_commit.test: Adjusted test case after changing thread state name which is used when COMMIT waits for FLUSH TABLES WITH READ LOCK from "Waiting for release of readlock" to "Waiting for commit lock". mysql-test/t/flush_block_commit_notembedded.test: Adjusted test case after changing thread state name which is used when DML waits for FLUSH TABLES WITH READ LOCK. Now we use "Waiting for global read lock" in this case. mysql-test/t/flush_read_lock.test: Added test coverage for various aspects of FLUSH TABLES WITH READ LOCK functionality. mysql-test/t/flush_read_lock_kill-master.opt: We no longer need to use make_global_read_lock_block_commit_loop debug tag in this test. Instead we rely on an appropriate debug_sync point in MDL code. mysql-test/t/flush_read_lock_kill.test: Adjusted test case after replacing custom global read lock implementation with one based on metadata locks. Use new debug_sync point. Do not disable concurrent inserts as now InnoDB we always use InnoDB table. mysql-test/t/lock_multi.test: Adjusted test case after changing thread state names which are used when DML or DDL waits for FLUSH TABLES WITH READ LOCK to "Waiting for global read lock". mysql-test/t/mdl_sync.test: Adjusted test case after replacing custom global read lock implementation with one based on metadata locks. Replaced usage of GRL-specific debug_sync's with appropriate sync points in MDL subsystem. Updated thread state names which are used when DDL waits for FTWRL. mysql-test/t/trigger_notembedded.test: Adjusted test case after changing thread state names which are used when DML or DDL waits for FLUSH TABLES WITH READ LOCK to "Waiting for global read lock". sql/event_data_objects.cc: Removed Event_queue_element::status/last_executed_changed members and Event_queue_element::update_timing_fields() method. We no longer use this class for updating mysql.events once event is chosen for execution. Accesses to instances of this class in scheduler thread require protection by Event_queue::LOCK_event_queue mutex and we try to avoid updating table while holding this lock. sql/event_data_objects.h: Removed Event_queue_element::status/last_executed_changed members and Event_queue_element::update_timing_fields() method. We no longer use this class for updating mysql.events once event is chosen for execution. Accesses to instances of this class in scheduler thread require protection by Event_queue::LOCK_event_queue mutex and we try to avoid updating table while holding this lock. sql/event_db_repository.cc: - Changed Event_db_repository methods to not release all metadata locks once they are done updating mysql.events table. This allows to keep metadata lock protecting against GRL and lock protecting particular event around until corresponding DDL statement is written to the binary log. - Removed logic for conditional update of "status" and "last_executed" fields from update_timing_fields_for_event() method. In the only case when this method is called now "last_executed" is always modified and tracking change of "status" is too much hassle. sql/event_db_repository.h: Removed logic for conditional update of "status" and "last_executed" fields from Event_db_repository:: update_timing_fields_for_event() method. In the only case when this method is called now "last_executed" is always modified and tracking change of "status" field is too much hassle. sql/event_queue.cc: Changed event scheduler code not to update mysql.events table while holding Event_queue::LOCK_event_queue mutex. Doing so led to a deadlock with a new GRL implementation. This deadlock didn't occur with old implementation due to fact that code acquiring protection against GRL ignored pending GRL requests (which lead to GRL starvation). One of goals of new implementation is to disallow GRL starvation and so we have to solve problem with this deadlock in a different way. sql/events.cc: Changed methods of Events class to acquire protection against GRL while perfoming DDL statement and keep it until statement is written to the binary log. Unfortunately this step together with new GRL implementation exposed deadlock involving Events::LOCK_event_metadata and GRL. To solve it Events::LOCK_event_metadata mutex was replaced with a metadata lock on event. As a side-effect events DDL has to be prohibited under LOCK TABLES even in cases when mysql.events table was explicitly locked for write. sql/events.h: Replaced Events::LOCK_event_metadata mutex with a metadata lock on event. sql/ha_ndbcluster.cc: Updated code after replacing custom global read lock implementation with one based on MDL. Since MDL subsystem should now be able to detect deadlocks involving metadata locks and GRL there is no need for special handling of active GRL. sql/handler.cc: Replaced custom implementation of global read lock with one based on metadata locks. Consequently when doing commit instead of calling method of Global_read_lock class to acquire protection against GRL we simply acquire IX in COMMIT namespace. sql/lock.cc: Replaced custom implementation of global read lock with one based on metadata locks. This step allows to expose wait for GRL to deadlock detector of MDL subsystem and thus succesfully resolve deadlocks similar to one behind bug #57006 "Deadlock between HANDLER and FLUSH TABLES WITH READ LOCK". It also solves problem with GRL starvation described in bug #54673 "It takes too long to get readlock for 'FLUSH TABLES WITH READ LOCK'" since metadata locks used by GRL give preference to FTWRL statement instead of DML statements (if needed in future this can be changed to fair scheduling). Similar to old implementation of acquisition of GRL is two-step. During the first step we block all concurrent DML and DDL statements by acquiring global S metadata lock (each DML and DDL statement acquires global IX lock for its duration). During the second step we block commits by acquiring global S lock in COMMIT namespace (commit code acquires global IX lock in this namespace). Note that unlike in old implementation acquisition of protection against GRL in DML and DDL is semi-automatic. We assume that any statement which should be blocked by GRL will either open and acquires write-lock on tables or acquires metadata locks on objects it is going to modify. For any such statement global IX metadata lock is automatically acquired for its duration. To support this change: - Global_read_lock::lock/unlock_global_read_lock and make_global_read_lock_block_commit methods were changed accordingly. - Global_read_lock::wait_if_global_read_lock() and start_waiting_global_read_lock() methods were dropped. It is now responsibility of code acquiring metadata locks opening tables to acquire protection against GRL by explicitly taking global IX lock with statement duration. - Global variables, mutex and condition variable used by old implementation was removed. - lock_routine_name() was changed to use statement duration for its global IX lock. It was also renamed to lock_object_name() as it now also used to take metadata locks on events. - Global_read_lock::set_explicit_lock_duration() was added which allows not to release locks used for GRL when leaving prelocked mode. sql/lock.h: - Renamed lock_routine_name() to lock_object_name() and changed its signature to allow its usage for events. - Removed broadcast_refresh() function. It is no longer needed with new GRL implementation. sql/log_event.cc: Release metadata locks with statement duration at the end of processing legacy event for LOAD DATA. This ensures that replication thread processing such event properly releases its protection against global read lock. sql/mdl.cc: Changed MDL subsystem to support new MDL-based implementation of global read lock. Added COMMIT and EVENTS namespaces for metadata locks. Changed thread state name for GLOBAL namespace to "Waiting for global read lock". Optimized MDL_map::find_or_insert() method to avoid taking m_mutex mutex when looking up MDL_lock objects for GLOBAL or COMMIT namespaces. We keep pre-created MDL_lock objects for these namespaces around and simply return pointers to these global objects when needed. Changed MDL_lock/MDL_scoped_lock to properly handle notification of insert delayed handler threads when FTWRL takes global S lock. Introduced concept of lock duration. In addition to locks with transaction duration which work in the way which is similar to how locks worked before (i.e. they are released at the end of transaction), locks with statement and explicit duration were introduced. Locks with statement duration are automatically released at the end of statement. Locks with explicit duration require explicit release and obsolete concept of transactional sentinel. * Changed MDL_request and MDL_ticket classes to support notion of duration. * Changed MDL_context to keep locks with different duration in different lists. Changed code handling ticket list to take this into account. * Changed methods responsible for releasing locks to take into account duration of tickets. Particularly public MDL_context::release_lock() method now only can release tickets with explicit duration (there is still internal method which allows to specify duration). To release locks with statement or transaction duration one have to use release_statement/transactional_locks() methods. * Concept of savepoint for MDL subsystem now has to take into account locks with statement duration. Consequently MDL_savepoint class was introduced and methods working with savepoints were updated accordingly. * Added methods which allow to set duration for one or all locks in the context. sql/mdl.h: Changed MDL subsystem to support new MDL-based implementation of global read lock. Added COMMIT and EVENTS namespaces for metadata locks. Introduced concept of lock duration. In addition to locks with transaction duration which work in the way which is similar to how locks worked before (i.e. they are released at the end of transaction), locks with statement and explicit duration were introduced. Locks with statement duration are automatically released at the end of statement. Locks with explicit duration require explicit release and obsolete concept of transactional sentinel. * Changed MDL_request and MDL_ticket classes to support notion of duration. * Changed MDL_context to keep locks with different duration in different lists. Changed code handling ticket list to take this into account. * Changed methods responsible for releasing locks to take into account duration of tickets. Particularly public MDL_context::release_lock() method now only can release tickets with explicit duration (there is still internal method which allows to specify duration). To release locks with statement or transaction duration one have to use release_statement/transactional_locks() methods. * Concept of savepoint for MDL subsystem now has to take into account locks with statement duration. Consequently MDL_savepoint class was introduced and methods working with savepoints were updated accordingly. * Added methods which allow to set duration for one or all locks in the context. sql/mysqld.cc: Removed global mutex and condition variables which were used by old implementation of GRL. Also we no longer need to initialize Events::LOCK_event_metadata mutex as it was replaced with metadata locks on events. sql/mysqld.h: Removed global variable, mutex and condition variables which were used by old implementation of GRL. sql/rpl_rli.cc: When slave thread closes tables which were open for handling of RBR events ensure that it releases global IX lock which was acquired as protection against GRL. sql/sp.cc: Adjusted code to the new signature of lock_object/routine_name(), to the fact that one now needs specify duration of lock when initializing MDL_request and to the fact that savepoints for MDL subsystem are now represented by MDL_savepoint class. sql/sp_head.cc: Ensure that statements in stored procedures release statement metadata locks and thus release their protectiong against GRL in proper moment in time. Adjusted code to the fact that one now needs specify duration of lock when initializing MDL_request. sql/sql_admin.cc: Adjusted code to the fact that one now needs specify duration of lock when initializing MDL_request. sql/sql_base.cc: - Implemented support for new approach to acquiring protection against global read lock. We no longer acquire such protection explicitly on the basis of statement flags. Instead we always rely on code which is responsible for acquiring metadata locks on object to be changed acquiring this protection. This is achieved by acquiring global IX metadata lock with statement duration. Code doing this also responsible for checking that current connection has no active GRL by calling an Global_read_lock::can_acquire_protection() method. Changed code in open_table() and lock_table_names() accordingly. Note that as result of this change DDL and DML on temporary tables is always compatible with GRL (before it was incompatible in some cases and compatible in other cases). - To speed-up code acquiring protection against GRL introduced m_has_protection_against_grl member in Open_table_context class. It indicates that protection was already acquired sometime during open_tables() execution and new attempts can be skipped. - Thanks to new GRL implementation calls to broadcast_refresh() became unnecessary and were removed. - Adjusted code to the fact that one now needs specify duration of lock when initializing MDL_request and to the fact that savepoints for MDL subsystem are now represented by MDL_savepoint class. sql/sql_base.h: Adjusted code to the fact that savepoints for MDL subsystem are now represented by MDL_savepoint class. Also introduced Open_table_context::m_has_protection_against_grl member which allows to avoid acquiring protection against GRL while opening tables if such protection was already acquired. sql/sql_class.cc: Changed THD::leave_locked_tables_mode() after transactional sentinel for metadata locks was obsoleted by introduction of locks with explicit duration. sql/sql_class.h: - Adjusted code to the fact that savepoints for MDL subsystem are now represented by MDL_savepoint class. - Changed Global_read_lock class according to changes in global read lock implementation: * wait_if_global_read_lock and start_waiting_global_read_lock are now gone. Instead code needing protection against GRL has to acquire global IX metadata lock with statement duration itself. To help it new can_acquire_protection() was introduced. Also as result of the above change m_protection_count member is gone too. * Added m_mdl_blocks_commits_lock member to store metadata lock blocking commits. * Adjusted code to the fact that concept of transactional sentinel was obsoleted by concept of lock duration. - Removed CF_PROTECT_AGAINST_GRL flag as it is no longer necessary. New GRL implementation acquires protection against global read lock automagically when statement acquires metadata locks on tables or other objects it is going to change. sql/sql_db.cc: Adjusted code to the fact that one now needs specify duration of lock when initializing MDL_request. sql/sql_handler.cc: Removed call to broadcast_refresh() function. It is no longer needed with new GRL implementation. Adjusted code after introducing duration concept for metadata locks. Particularly to the fact transactional sentinel was replaced with explicit duration. sql/sql_handler.h: Renamed mysql_ha_move_tickets_after_trans_sentinel() to mysql_ha_set_explicit_lock_duration() after transactional sentinel was obsoleted by locks with explicit duration. sql/sql_insert.cc: Adjusted code handling delaying inserts after switching to new GRL implementation. Now connection thread initiating delayed insert has to acquire global IX lock in addition to metadata lock on table being inserted into. This IX lock protects against GRL and similarly to SW lock on table being inserted into has to be passed to handler thread in order to avoid deadlocks. sql/sql_lex.cc: LEX::protect_against_global_read_lock member is no longer necessary since protection against GRL is automatically taken by code acquiring metadata locks/opening tables. sql/sql_lex.h: LEX::protect_against_global_read_lock member is no longer necessary since protection against GRL is automatically taken by code acquiring metadata locks/opening tables. sql/sql_parse.cc: - Implemented support for new approach to acquiring protection against global read lock. We no longer acquire such protection explicitly on the basis of statement flags. Instead we always rely on code which is responsible for acquiring metadata locks on object to be changed acquiring this protection. This is achieved by acquiring global IX metadata lock with statement duration. This lock is automatically released at the end of statement execution. - Changed implementation of CREATE/DROP PROCEDURE/FUNCTION not to release metadata locks and thus protection against of GRL in the middle of statement execution. - Adjusted code to the fact that one now needs specify duration of lock when initializing MDL_request and to the fact that savepoints for MDL subsystem are now represented by MDL_savepoint class. sql/sql_prepare.cc: Adjusted code to the to the fact that savepoints for MDL subsystem are now represented by MDL_savepoint class. sql/sql_rename.cc: With new GRL implementation there is no need to explicitly acquire protection against GRL before renaming tables. This happens automatically in code which acquires metadata locks on tables being renamed. sql/sql_show.cc: Adjusted code to the fact that one now needs specify duration of lock when initializing MDL_request and to the fact that savepoints for MDL subsystem are now represented by MDL_savepoint class. sql/sql_table.cc: - With new GRL implementation there is no need to explicitly acquire protection against GRL before dropping tables. This happens automatically in code which acquires metadata locks on tables being dropped. - Changed mysql_alter_table() not to release lock on new table name explicitly and to rely on automatic release of locks at the end of statement instead. This was necessary since now MDL_context::release_lock() is supported only for locks for explicit duration. sql/sql_trigger.cc: With new GRL implementation there is no need to explicitly acquire protection against GRL before changing table triggers. This happens automatically in code which acquires metadata locks on tables which triggers are to be changed. sql/sql_update.cc: Fix bug exposed by GRL testing. During prepare phase acquire only S metadata locks instead of SW locks to keep prepare of multi-UPDATE compatible with concurrent LOCK TABLES WRITE and global read lock. sql/sql_view.cc: With new GRL implementation there is no need to explicitly acquire protection against GRL before creating view. This happens automatically in code which acquires metadata lock on view to be created. sql/sql_yacc.yy: LEX::protect_against_global_read_lock member is no longer necessary since protection against GRL is automatically taken by code acquiring metadata locks/opening tables. sql/table.cc: Adjusted code to the fact that one now needs specify duration of lock when initializing MDL_request. sql/table.h: Adjusted code to the fact that one now needs specify duration of lock when initializing MDL_request. sql/transaction.cc: Replaced custom implementation of global read lock with one based on metadata locks. Consequently when doing commit instead of calling method of Global_read_lock class to acquire protection against GRL we simply acquire IX in COMMIT namespace. Also adjusted code to the fact that MDL savepoint is now represented by MDL_savepoint class.
2010-11-11 18:11:05 +01:00
key_COND_cache_status_changed, key_COND_manager,
Fix for bug #52044 "FLUSH TABLES WITH READ LOCK and FLUSH TABLES <list> WITH READ LOCK are incompatible". The problem was that FLUSH TABLES <list> WITH READ LOCK which was issued when other connection has acquired global read lock using FLUSH TABLES WITH READ LOCK was blocked and has to wait until global read lock is released. This issue stemmed from the fact that FLUSH TABLES <list> WITH READ LOCK implementation has acquired X metadata locks on tables to be flushed. Since these locks required acquiring of global IX lock this statement was incompatible with global read lock. This patch addresses problem by using SNW metadata type of lock for tables to be flushed by FLUSH TABLES <list> WITH READ LOCK. It is OK to acquire them without global IX lock as long as we won't try to upgrade those locks. Since SNW locks allow concurrent statements using same table FLUSH TABLE <list> WITH READ LOCK now has to wait until old versions of tables to be flushed go away after acquiring metadata locks. Since such waiting can lead to deadlock MDL deadlock detector was extended to take into account waits for flush and resolve such deadlocks. As a bonus code in open_tables() which was responsible for waiting old versions of tables to go away was refactored. Now when we encounter old version of table in open_table() we don't back-off and wait for all old version to go away, but instead wait for this particular table to be flushed. Such approach supported by deadlock detection should reduce number of scenarios in which FLUSH TABLES aborts concurrent multi-statement transactions. Note that active FLUSH TABLES <list> WITH READ LOCK still blocks concurrent FLUSH TABLES WITH READ LOCK statement as the former keeps tables open and thus prevents the latter statement from doing flush. mysql-test/include/handler.inc: Adjusted test case after changing status which is set when FLUSH TABLES waits for tables to be flushed from "Flushing tables" to "Waiting for table". mysql-test/r/flush.result: Added test which checks that "flush tables <list> with read lock" is compatible with active "flush tables with read lock" but not vice-versa. This test also covers bug #52044 "FLUSH TABLES WITH READ LOCK and FLUSH TABLES <list> WITH READ LOCK are incompatible". mysql-test/r/mdl_sync.result: Added scenarios in which wait for table to be flushed causes deadlocks to the coverage of MDL deadlock detector. mysql-test/suite/perfschema/r/dml_setup_instruments.result: Adjusted test results after removal of COND_refresh condition variable. mysql-test/suite/perfschema/r/server_init.result: Adjusted test and its results after removal of COND_refresh condition variable. mysql-test/suite/perfschema/t/server_init.test: Adjusted test and its results after removal of COND_refresh condition variable. mysql-test/t/flush.test: Added test which checks that "flush tables <list> with read lock" is compatible with active "flush tables with read lock" but not vice-versa. This test also covers bug #52044 "FLUSH TABLES WITH READ LOCK and FLUSH TABLES <list> WITH READ LOCK are incompatible". mysql-test/t/kill.test: Adjusted test case after changing status which is set when FLUSH TABLES waits for tables to be flushed from "Flushing tables" to "Waiting for table". mysql-test/t/lock_multi.test: Adjusted test case after changing status which is set when FLUSH TABLES waits for tables to be flushed from "Flushing tables" to "Waiting for table". mysql-test/t/mdl_sync.test: Added scenarios in which wait for table to be flushed causes deadlocks to the coverage of MDL deadlock detector. sql/ha_ndbcluster.cc: Adjusted code after adding one more parameter for close_cached_tables() call - timeout for waiting for table to be flushed. sql/ha_ndbcluster_binlog.cc: Adjusted code after adding one more parameter for close_cached_tables() call - timeout for waiting for table to be flushed. sql/lock.cc: Removed COND_refresh condition variable. See comment for sql_base.cc for details. sql/mdl.cc: Now MDL deadlock detector takes into account information about waits for table flushes when searching for deadlock. To implement this change: - Declaration of enum_deadlock_weight and Deadlock_detection_visitor were moved to mdl.h header to make them available to the code in table.cc which implements deadlock detector traversal through edges of waiters graph representing waiting for flush. - Since now MDL_context may wait not only for metadata lock but also for table to be flushed an abstract Wait_for_edge class was introduced. Its descendants MDL_ticket and Flush_ticket incapsulate specifics of inspecting waiters graph when following through edge representing wait of particular type. We no longer require global IX metadata lock when acquiring SNW or SNRW locks. Such locks are needed only when metadata locks of these types are upgraded to X locks. This allows to use SNW locks in FLUSH TABLES <list> WITH READ LOCK implementation and keep the latter compatible with global read lock. sql/mdl.h: Now MDL deadlock detector takes into account information about waits for table flushes when searching for deadlock. To implement this change: - Declaration of enum_deadlock_weight and Deadlock_detection_visitor were moved to mdl.h header to make them available to the code in table.cc which implements deadlock detector traversal through edges of waiters graph representing waiting for flush. - Since now MDL_context may wait not only for metadata lock but also for table to be flushed an abstract Wait_for_edge class was introduced. Its descendants MDL_ticket and Flush_ticket incapsulate specifics of inspecting waiters graph when following through edge representing wait of particular type. - Deadlock_detection_visitor now has m_table_shares_visited member which allows to support recursive locking for LOCK_open. This is required when deadlock detector inspects waiters graph which contains several edges representing waits for flushes or needs to come through the such edge more than once. sql/mysqld.cc: Removed COND_refresh condition variable. See comment for sql_base.cc for details. sql/mysqld.h: Removed COND_refresh condition variable. See comment for sql_base.cc for details. sql/sql_base.cc: Changed approach to how threads are waiting for table to be flushed. Now thread that wants to wait for old table to go away subscribes for notification by adding Flush_ticket to table's share and waits using MDL_context::m_wait object. Once table gets flushed (i.e. all tables are closed and table share is ready to be destroyed) all such waiters are notified individually. Thanks to this change MDL deadlock detector can take such waits into account. To implement this/as result of this change: - tdc_wait_for_old_versions() was replaced with tdc_wait_for_old_version() which waits for individual old share to go away and which is called by open_table() after finding out that share is outdated. We don't need to perform back-off before such waiting thanks to the fact that deadlock detector now sees such waits. - As result Open_table_ctx::m_mdl_requests became unnecessary and was removed. We no longer allocate copies of MDL_request objects on MEM_ROOT when MYSQL_OPEN_FORCE_SHARED/SHARED_HIGH_PRIO flags are in effect. - close_cached_tables() and tdc_wait_for_old_version() share code which implements waiting for share to be flushed - the both use TABLE_SHARE::wait_until_flush() method. Thanks to this close_cached_tables() supports timeouts and has extra parameter for this. - Open_table_context::OT_MDL_CONFLICT enum element was renamed to OT_CONFLICT as it is now also used in cases when back-off is required to resolve deadlock caused by waiting for flush and not metadata lock. - In cases when we discover that current connection tries to open tables from different generation we now simply back-off and restart process of opening tables. To support this Open_table_context::OT_REOPEN_TABLES enum element was added. - COND_refresh condition variable became unnecessary and was removed. - mysql_notify_thread_having_shared_lock() no longer wakes up connections waiting for flush as all such connections can be waken up by deadlock detector if necessary. sql/sql_base.h: - close_cached_tables() now has one more parameter - timeout for waiting for table to be flushed. - Open_table_context::OT_MDL_CONFLICT enum element was renamed to OT_CONFLICT as it is now also used in cases when back-off is required to resolve deadlock caused by waiting for flush and not metadata lock. Added new OT_REOPEN_TABLES enum element to be used in cases when we need to restart open tables process even in the middle of transaction. - Open_table_ctx::m_mdl_requests became unnecessary and was removed. sql/sql_class.h: Added assert ensuring that we won't use LOCK_open mutex with THD::enter_cond(). Otherwise deadlocks can arise in MDL deadlock detector. sql/sql_parse.cc: Changed FLUSH TABLES <list> WITH READ LOCK to take SNW metadata locks instead of X locks on tables to be flushed. Since we no longer require global IX lock to be taken when SNW locks are taken this makes this statement compatible with FLUSH TABLES WITH READ LOCK statement. Since SNW locks allow other connections to have table opened FLUSH TABLES <list> WITH READ LOCK now has to wait during open_tables() for old version to go away. Such waits can lead to deadlocks which will be detected by MDL deadlock detector which now takes waits for table to be flushed into account. Also adjusted code after adding one more parameter for close_cached_tables() call - timeout for waiting for table to be flushed. sql/sql_yacc.yy: FLUSH TABLES <list> WITH READ LOCK now needs only SNW metadata locks on tables. sql/sys_vars.cc: Adjusted code after adding one more parameter for close_cached_tables() call - timeout for waiting for table to be flushed. sql/table.cc: Implemented new approach to how threads are waiting for table to be flushed. Now thread that wants to wait for old table to go away subscribes for notification by adding Flush_ticket to table's share and waits using MDL_context::m_wait object. Once table gets flushed (i.e. all tables are closed and table share is ready to be destroyed) all such waiters are notified individually. This change allows to make such waits visible inside of MDL deadlock detector. To do it: - Added list of waiters/Flush_tickets to TABLE_SHARE class. - Changed free_table_share() to postpone freeing of share memory until last waiter goes away and to wake up subscribed waiters. - Added TABLE_SHARE::wait_until_flushed() method which implements subscription to the list of waiters for table to be flushed and waiting for this event. Implemented interface which allows to expose waits for flushes to MDL deadlock detector: - Introduced Flush_ticket class a descendant of Wait_for_edge class. - Added TABLE_SHARE::find_deadlock() method which allows deadlock detector to find out what contexts are still using old version of table in question (i.e. to find out what contexts are waited for by owner of Flush_ticket). sql/table.h: In order to support new strategy of waiting for table flush (see comment for table.cc for details) added list of waiters/Flush_tickets to TABLE_SHARE class. Implemented interface which allows to expose waits for flushes to MDL deadlock detector: - Introduced Flush_ticket class a descendant of Wait_for_edge class. - Added TABLE_SHARE::find_deadlock() method which allows deadlock detector to find out what contexts are still using old version of table in question (i.e. to find out what contexts are waited for by owner of Flush_ticket).
2010-07-27 15:34:58 +02:00
key_COND_rpl_status, key_COND_server_started,
key_delayed_insert_cond, key_delayed_insert_cond_client,
key_item_func_sleep_cond, key_master_info_data_cond,
key_master_info_start_cond, key_master_info_stop_cond,
BUG#11752315 - 43460: STOP SLAVE UNABLE TO COMPLETE WHEN SLAVE THREAD IS TRYING TO RECONNECT TO Problem : The basic problem is the way the thread sleeps in mysql-5.5 and also in mysql-5.1 when we execute a stop slave on windows platform. On windows platform if the stop slave is executed after the master dies, we have this long wait before the stop slave return a value. This is because there is a sleep of the thread. The sleep is uninterruptable in the two above version, which was fixed by Davi patch for the BUG#11765860 for mysql-trunk. Backporting his patch for mysql-5.5 fixes the problem. Solution : A new pair of mutex and condition variable is introduced to synchronize thread sleep and finalization. A new mutex is required because the slave threads are terminated while holding the slave thread locks (run_lock), which can not be relinquished during termination as this would affect the lock order. mysql-test/suite/rpl/r/rpl_start_stop_slave.result: The result file associated with the test added. mysql-test/suite/rpl/t/rpl_start_stop_slave.test: A test to check the new functionality. sql/rpl_mi.cc: The constructor using the new mutex and condition variables for the master_info. sql/rpl_mi.h: The condition variable and mutex have been added for the master_info. sql/rpl_rli.cc: The constructor using the new mutex and condition variables for the realy_log_info. sql/rpl_rli.h: The condition variable and mutex have been added for the relay_log_info. sql/slave.cc: Use a timed wait on a condition variable to implement a interruptible sleep. The wait is registered with the THD object so that the thread will be woken up if killed.
2012-01-23 13:09:37 +01:00
key_master_info_sleep_cond,
key_relay_log_info_data_cond, key_relay_log_info_log_space_cond,
key_relay_log_info_start_cond, key_relay_log_info_stop_cond,
Fixes for parallel slave: - Made slaves temporary table multi-thread slave safe by adding mutex around save_temporary_table usage. - rli->save_temporary_tables is the active list of all used temporary tables - This is copied to THD->temporary_tables when temporary tables are opened and updated when temporary tables are closed - Added THD->lock_temporary_tables() and THD->unlock_temporary_tables() to simplify this. - Relay_log_info->sql_thd renamed to Relay_log_info->sql_driver_thd to avoid wrong usage for merged code. - Added is_part_of_group() to mark functions that are part of the next function. This replaces setting IN_STMT when events are executed. - Added is_begin(), is_commit() and is_rollback() functions to Query_log_event to simplify code. - If slave_skip_counter is set run things in single threaded mode. This simplifies code for skipping events. - Updating state of relay log (IN_STMT and IN_TRANSACTION) is moved to one single function: update_state_of_relay_log() We can't use OPTION_BEGIN to check for the state anymore as the sql_driver and sql execution threads may be different. Clear IN_STMT and IN_TRANSACTION in init_relay_log_pos() and Relay_log_info::cleanup_context() to ensure the flags doesn't survive slave restarts is_in_group() is now independent of state of executed transaction. - Reset thd->transaction.all.modified_non_trans_table() if we did set it for single table row events. This was mainly for keeping the flag as documented. - Changed slave_open_temp_tables to uint32 to be able to use atomic operators on it. - Relay_log_info::sleep_lock -> rpl_group_info::sleep_lock - Relay_log_info::sleep_cond -> rpl_group_info::sleep_cond - Changed some functions to take rpl_group_info instead of Relay_log_info to make them multi-slave safe and to simplify usage - do_shall_skip() - continue_group() - sql_slave_killed() - next_event() - Simplifed arguments to io_salve_killed(), check_io_slave_killed() and sql_slave_killed(); No reason to supply THD as this is part of the given structure. - set_thd_in_use_temporary_tables() removed as in_use is set on usage - Added information to thd_proc_info() which thread is waiting for slave mutex to exit. - In open_table() reuse code from find_temporary_table() Other things: - More DBUG statements - Fixed the rpl_incident.test can be run with --debug - More comments - Disabled not used function rpl_connect_master() mysql-test/suite/perfschema/r/all_instances.result: Moved sleep_lock and sleep_cond to rpl_group_info mysql-test/suite/rpl/r/rpl_incident.result: Updated result mysql-test/suite/rpl/t/rpl_incident-master.opt: Not needed anymore mysql-test/suite/rpl/t/rpl_incident.test: Fixed that test can be run with --debug sql/handler.cc: More DBUG_PRINT sql/log.cc: More comments sql/log_event.cc: Added DBUG statements do_shall_skip(), continue_group() now takes rpl_group_info param Use is_begin(), is_commit() and is_rollback() functions instead of inspecting query string We don't have set slaves temporary tables 'in_use' as this is now done when tables are opened. Removed IN_STMT flag setting. This is now done in update_state_of_relay_log() Use IN_TRANSACTION flag to test state of relay log. In rows_event_stmt_cleanup() reset thd->transaction.all.modified_non_trans_table if we had set this before. sql/log_event.h: do_shall_skip(), continue_group() now takes rpl_group_info param Added is_part_of_group() to mark events that are part of the next event. This replaces setting IN_STMT when events are executed. Added is_begin(), is_commit() and is_rollback() functions to Query_log_event to simplify code. sql/log_event_old.cc: Removed IN_STMT flag setting. This is now done in update_state_of_relay_log() do_shall_skip(), continue_group() now takes rpl_group_info param sql/log_event_old.h: Added is_part_of_group() to mark events that are part of the next event. do_shall_skip(), continue_group() now takes rpl_group_info param sql/mysqld.cc: Changed slave_open_temp_tables to uint32 to be able to use atomic operators on it. Relay_log_info::sleep_lock -> Rpl_group_info::sleep_lock Relay_log_info::sleep_cond -> Rpl_group_info::sleep_cond sql/mysqld.h: Updated types and names sql/rpl_gtid.cc: More DBUG sql/rpl_parallel.cc: Updated TODO section Set thd for event that is execution Use new is_begin(), is_commit() and is_rollback() functions. More comments sql/rpl_rli.cc: sql_thd -> sql_driver_thd Relay_log_info::sleep_lock -> rpl_group_info::sleep_lock Relay_log_info::sleep_cond -> rpl_group_info::sleep_cond Clear IN_STMT and IN_TRANSACTION in init_relay_log_pos() and Relay_log_info::cleanup_context() to ensure the flags doesn't survive slave restarts. Reset table->in_use for temporary tables as the table may have been used by another THD. Use IN_TRANSACTION instead of OPTION_BEGIN to check state of relay log. Removed IN_STMT flag setting. This is now done in update_state_of_relay_log() sql/rpl_rli.h: Changed relay log state flags to bit masks instead of bit positions (most other code we have uses bit masks) Added IN_TRANSACTION to mark if we are in a BEGIN ... COMMIT section. save_temporary_tables is now thread safe Relay_log_info::sleep_lock -> rpl_group_info::sleep_lock Relay_log_info::sleep_cond -> rpl_group_info::sleep_cond Relay_log_info->sql_thd renamed to Relay_log_info->sql_driver_thd to avoid wrong usage for merged code is_in_group() is now independent of state of executed transaction. sql/slave.cc: Simplifed arguments to io_salve_killed(), sql_slave_killed() and check_io_slave_killed(); No reason to supply THD as this is part of the given structure. set_thd_in_use_temporary_tables() removed as in_use is set on usage in sql_base.cc sql_thd -> sql_driver_thd More DBUG Added update_state_of_relay_log() which will calculate the IN_STMT and IN_TRANSACTION state of the relay log after the current element is executed. If slave_skip_counter is set run things in single threaded mode. Simplifed arguments to io_salve_killed(), check_io_slave_killed() and sql_slave_killed(); No reason to supply THD as this is part of the given structure. Added information to thd_proc_info() which thread is waiting for slave mutex to exit. Disabled not used function rpl_connect_master() Updated argument to next_event() sql/sql_base.cc: Added mutex around usage of slave's temporary tables. The active list is always kept up to date in sql->rgi_slave->save_temporary_tables. Clear thd->temporary_tables after query (safety) More DBUG When using temporary table, set table->in_use to current thd as the THD may be different for slave threads. Some code is ifdef:ed with REMOVE_AFTER_MERGE_WITH_10 as the given code in 10.0 is not yet in this tree. In open_table() reuse code from find_temporary_table() sql/sql_binlog.cc: rli->sql_thd -> rli->sql_driver_thd Remove duplicate setting of rgi->rli sql/sql_class.cc: Added helper functions rgi_lock_temporary_tables() and rgi_unlock_temporary_tables() Would have been nicer to have these inline, but there was no easy way to do that sql/sql_class.h: Added functions to protect slaves temporary tables sql/sql_parse.cc: Added DBUG_PRINT sql/transaction.cc: Added comment
2013-10-13 23:24:05 +02:00
key_rpl_group_info_sleep_cond,
key_TABLE_SHARE_cond, key_user_level_lock_cond,
key_COND_start_thread;
extern PSI_cond_key key_RELAYLOG_COND_relay_log_updated,
key_RELAYLOG_COND_bin_log_updated, key_COND_wakeup_ready,
key_COND_wait_commit;
2011-10-25 12:53:40 +02:00
extern PSI_cond_key key_RELAYLOG_COND_queue_busy;
extern PSI_cond_key key_TC_LOG_MMAP_COND_queue_busy;
MDEV-5657: Parallel replication. Clean up and improve the parallel implementation code, mainly related to scheduling of work to threads and handling of stop and errors. Fix a lot of bugs in various corner cases that could lead to crashes or corruption. Fix that a single replication domain could easily grab all worker threads and stall all other domains; now a configuration variable --slave-domain-parallel-threads allows to limit the number of workers. Allow next event group to start as soon as previous group begins the commit phase (as opposed to when it ends it); this allows multiple event groups on the slave to participate in group commit, even when no other opportunities for parallelism are available. Various fixes: - Fix some races in the rpl.rpl_parallel test case. - Fix an old incorrect assertion in Log_event iocache read. - Fix repeated malloc/free of wait_for_commit and rpl_group_info objects. - Simplify wait_for_commit wakeup logic. - Fix one case in queue_for_group_commit() where killing one thread would fail to correctly signal the error to the next, causing loss of the transaction after slave restart. - Fix leaking of pthreads (and their allocated stack) due to missing PTHREAD_CREATE_DETACHED attribute. - Fix how one batch of group-committed transactions wait for the previous batch before starting to execute themselves. The old code had a very complex scheduling where the first transaction was handled differently, with subtle bugs in corner cases. Now each event group is always scheduled for a new worker (in a round-robin fashion amongst available workers). Keep a count of how many transactions have started to commit, and wait for that counter to reach the appropriate value. - Fix slave stop to wait for all workers to actually complete processing; before, the wait was for update of last_committed_sub_id, which happens a bit earlier, and could leave worker threads potentially accessing bits of the replication state that is no longer valid after slave stop. - Fix a couple of places where the test suite would kill a thread waiting inside enter_cond() in connection with debug_sync; debug_sync + kill can crash in rare cases due to a race with mysys_var_current_mutex in this case. - Fix some corner cases where we had enter_cond() but no exit_cond(). - Fix that we could get failure in wait_for_prior_commit() but forget to flag the error with my_error(). - Fix slave stop (both for normal stop and stop due to error). Now, at stop we pick a specific safe point (in terms of event groups executed) and make sure that all event groups before that point are executed to completion, and that no event group after start executing; this ensures a safe place to restart replication, even for non-transactional stuff/DDL. In error stop, make sure that all prior event groups are allowed to execute to completion, and that any later event groups that have started are rolled back, if possible. The old code could leave eg. T1 and T3 committed but T2 not, or it could even leave half a transaction not rolled back in some random worker, which would cause big problems when that worker was later reused after slave restart. - Fix the accounting of amount of events queued for one worker. Before, the amount was reduced immediately as soon as the events were dequeued (which happens all at once); this allowed twice the amount of events to be queued in memory for each single worker, which is not what users would expect. - Fix that an error set during execution of one event was sometimes not cleared before executing the next, causing problems with the error reporting. - Fix incorrect handling of thd->killed in worker threads.
2014-02-26 15:02:09 +01:00
extern PSI_cond_key key_COND_rpl_thread, key_COND_rpl_thread_queue,
key_COND_rpl_thread_stop, key_COND_rpl_thread_pool,
MDEV-5657: Parallel replication. Clean up and improve the parallel implementation code, mainly related to scheduling of work to threads and handling of stop and errors. Fix a lot of bugs in various corner cases that could lead to crashes or corruption. Fix that a single replication domain could easily grab all worker threads and stall all other domains; now a configuration variable --slave-domain-parallel-threads allows to limit the number of workers. Allow next event group to start as soon as previous group begins the commit phase (as opposed to when it ends it); this allows multiple event groups on the slave to participate in group commit, even when no other opportunities for parallelism are available. Various fixes: - Fix some races in the rpl.rpl_parallel test case. - Fix an old incorrect assertion in Log_event iocache read. - Fix repeated malloc/free of wait_for_commit and rpl_group_info objects. - Simplify wait_for_commit wakeup logic. - Fix one case in queue_for_group_commit() where killing one thread would fail to correctly signal the error to the next, causing loss of the transaction after slave restart. - Fix leaking of pthreads (and their allocated stack) due to missing PTHREAD_CREATE_DETACHED attribute. - Fix how one batch of group-committed transactions wait for the previous batch before starting to execute themselves. The old code had a very complex scheduling where the first transaction was handled differently, with subtle bugs in corner cases. Now each event group is always scheduled for a new worker (in a round-robin fashion amongst available workers). Keep a count of how many transactions have started to commit, and wait for that counter to reach the appropriate value. - Fix slave stop to wait for all workers to actually complete processing; before, the wait was for update of last_committed_sub_id, which happens a bit earlier, and could leave worker threads potentially accessing bits of the replication state that is no longer valid after slave stop. - Fix a couple of places where the test suite would kill a thread waiting inside enter_cond() in connection with debug_sync; debug_sync + kill can crash in rare cases due to a race with mysys_var_current_mutex in this case. - Fix some corner cases where we had enter_cond() but no exit_cond(). - Fix that we could get failure in wait_for_prior_commit() but forget to flag the error with my_error(). - Fix slave stop (both for normal stop and stop due to error). Now, at stop we pick a specific safe point (in terms of event groups executed) and make sure that all event groups before that point are executed to completion, and that no event group after start executing; this ensures a safe place to restart replication, even for non-transactional stuff/DDL. In error stop, make sure that all prior event groups are allowed to execute to completion, and that any later event groups that have started are rolled back, if possible. The old code could leave eg. T1 and T3 committed but T2 not, or it could even leave half a transaction not rolled back in some random worker, which would cause big problems when that worker was later reused after slave restart. - Fix the accounting of amount of events queued for one worker. Before, the amount was reduced immediately as soon as the events were dequeued (which happens all at once); this allowed twice the amount of events to be queued in memory for each single worker, which is not what users would expect. - Fix that an error set during execution of one event was sometimes not cleared before executing the next, causing problems with the error reporting. - Fix incorrect handling of thd->killed in worker threads.
2014-02-26 15:02:09 +01:00
key_COND_parallel_entry, key_COND_group_commit_orderer;
extern PSI_cond_key key_COND_wait_gtid, key_COND_gtid_ignore_duplicates;
extern PSI_cond_key key_TABLE_SHARE_COND_rotation;
extern PSI_thread_key key_thread_delayed_insert,
key_thread_handle_manager, key_thread_kill_server, key_thread_main,
key_thread_one_connection, key_thread_signal_hand,
key_thread_slave_background, key_rpl_parallel_thread;
extern PSI_file_key key_file_binlog, key_file_binlog_cache,
key_file_binlog_index, key_file_binlog_index_cache, key_file_casetest,
key_file_dbopt, key_file_ERRMSG, key_select_to_file,
key_file_fileparser, key_file_frm, key_file_global_ddl_log, key_file_load,
key_file_loadfile, key_file_log_event_data, key_file_log_event_info,
key_file_master_info, key_file_misc, key_file_partition_ddl_log,
key_file_pid, key_file_relay_log_info, key_file_send_file, key_file_tclog,
key_file_trg, key_file_trn, key_file_init, key_file_log_ddl;
extern PSI_file_key key_file_query_log, key_file_slow_log;
extern PSI_file_key key_file_relaylog, key_file_relaylog_index,
key_file_relaylog_cache, key_file_relaylog_index_cache;
extern PSI_socket_key key_socket_tcpip, key_socket_unix,
key_socket_client_connection;
MDEV-4991: GTID binlog indexing Improve the performance of slave connect using B+-Tree indexes on each binlog file. The index allows fast lookup of a GTID position to the corresponding offset in the binlog file, as well as lookup of a position to find the corresponding GTID position. This eliminates a costly sequential scan of the starting binlog file to find the GTID starting position when a slave connects. This is especially costly if the binlog file is not cached in memory (IO cost), or if it is encrypted or a lot of slaves connect simultaneously (CPU cost). The size of the index files is generally less than 1% of the binlog data, so not expected to be an issue. Most of the work writing the index is done as a background task, in the binlog background thread. This minimises the performance impact on transaction commit. A simple global mutex is used to protect index reads and (background) index writes; this is fine as slave connect is a relatively infrequent operation. Here are the user-visible options and status variables. The feature is on by default and is expected to need no tuning or configuration for most users. binlog_gtid_index On by default. Can be used to disable the indexes for testing purposes. binlog_gtid_index_page_size (default 4096) Page size to use for the binlog GTID index. This is the size of the nodes in the B+-tree used internally in the index. A very small page-size (64 is the minimum) will be less efficient, but can be used to stress the BTree-code during testing. binlog_gtid_index_span_min (default 65536) Control sparseness of the binlog GTID index. If set to N, at most one index record will be added for every N bytes of binlog file written. This can be used to reduce the number of records in the index, at the cost only of having to scan a few more events in the binlog file before finding the target position Two status variables are available to monitor the use of the GTID indexes: Binlog_gtid_index_hit Binlog_gtid_index_miss The "hit" status increments for each successful lookup in a GTID index. The "miss" increments when a lookup is not possible. This indicates that the index file is missing (eg. binlog written by old server version without GTID index support), or corrupt. Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
2023-09-08 13:12:49 +02:00
extern PSI_file_key key_file_binlog_state, key_file_gtid_index;
#ifdef HAVE_des
extern char* des_key_file;
extern PSI_file_key key_file_des_key_file;
extern PSI_mutex_key key_LOCK_des_key_file;
extern mysql_mutex_t LOCK_des_key_file;
#endif
#ifdef HAVE_PSI_INTERFACE
void init_server_psi_keys();
#endif /* HAVE_PSI_INTERFACE */
extern PSI_memory_key key_memory_locked_table_list;
extern PSI_memory_key key_memory_locked_thread_list;
extern PSI_memory_key key_memory_thd_transactions;
extern PSI_memory_key key_memory_delegate;
extern PSI_memory_key key_memory_acl_mem;
extern PSI_memory_key key_memory_acl_memex;
extern PSI_memory_key key_memory_acl_cache;
extern PSI_memory_key key_memory_thd_main_mem_root;
extern PSI_memory_key key_memory_help;
extern PSI_memory_key key_memory_frm;
extern PSI_memory_key key_memory_table_share;
extern PSI_memory_key key_memory_gdl;
extern PSI_memory_key key_memory_table_triggers_list;
extern PSI_memory_key key_memory_prepared_statement_map;
extern PSI_memory_key key_memory_prepared_statement_main_mem_root;
extern PSI_memory_key key_memory_protocol_rset_root;
extern PSI_memory_key key_memory_warning_info_warn_root;
extern PSI_memory_key key_memory_sp_cache;
extern PSI_memory_key key_memory_sp_head_main_root;
extern PSI_memory_key key_memory_sp_head_execute_root;
extern PSI_memory_key key_memory_sp_head_call_root;
extern PSI_memory_key key_memory_table_mapping_root;
extern PSI_memory_key key_memory_quick_range_select_root;
extern PSI_memory_key key_memory_quick_index_merge_root;
extern PSI_memory_key key_memory_quick_ror_intersect_select_root;
extern PSI_memory_key key_memory_quick_ror_union_select_root;
extern PSI_memory_key key_memory_quick_group_min_max_select_root;
extern PSI_memory_key key_memory_test_quick_select_exec;
extern PSI_memory_key key_memory_prune_partitions_exec;
extern PSI_memory_key key_memory_binlog_recover_exec;
extern PSI_memory_key key_memory_blob_mem_storage;
extern PSI_memory_key key_memory_Sys_var_charptr_value;
extern PSI_memory_key key_memory_THD_db;
extern PSI_memory_key key_memory_user_var_entry;
extern PSI_memory_key key_memory_user_var_entry_value;
extern PSI_memory_key key_memory_Slave_job_group_group_relay_log_name;
extern PSI_memory_key key_memory_Relay_log_info_group_relay_log_name;
extern PSI_memory_key key_memory_binlog_cache_mngr;
MDEV-4991: GTID binlog indexing Improve the performance of slave connect using B+-Tree indexes on each binlog file. The index allows fast lookup of a GTID position to the corresponding offset in the binlog file, as well as lookup of a position to find the corresponding GTID position. This eliminates a costly sequential scan of the starting binlog file to find the GTID starting position when a slave connects. This is especially costly if the binlog file is not cached in memory (IO cost), or if it is encrypted or a lot of slaves connect simultaneously (CPU cost). The size of the index files is generally less than 1% of the binlog data, so not expected to be an issue. Most of the work writing the index is done as a background task, in the binlog background thread. This minimises the performance impact on transaction commit. A simple global mutex is used to protect index reads and (background) index writes; this is fine as slave connect is a relatively infrequent operation. Here are the user-visible options and status variables. The feature is on by default and is expected to need no tuning or configuration for most users. binlog_gtid_index On by default. Can be used to disable the indexes for testing purposes. binlog_gtid_index_page_size (default 4096) Page size to use for the binlog GTID index. This is the size of the nodes in the B+-tree used internally in the index. A very small page-size (64 is the minimum) will be less efficient, but can be used to stress the BTree-code during testing. binlog_gtid_index_span_min (default 65536) Control sparseness of the binlog GTID index. If set to N, at most one index record will be added for every N bytes of binlog file written. This can be used to reduce the number of records in the index, at the cost only of having to scan a few more events in the binlog file before finding the target position Two status variables are available to monitor the use of the GTID indexes: Binlog_gtid_index_hit Binlog_gtid_index_miss The "hit" status increments for each successful lookup in a GTID index. The "miss" increments when a lookup is not possible. This indicates that the index file is missing (eg. binlog written by old server version without GTID index support), or corrupt. Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
2023-09-08 13:12:49 +02:00
extern PSI_memory_key key_memory_binlog_gtid_index;
extern PSI_memory_key key_memory_Row_data_memory_memory;
extern PSI_memory_key key_memory_errmsgs;
extern PSI_memory_key key_memory_Event_queue_element_for_exec_names;
extern PSI_memory_key key_memory_Event_scheduler_scheduler_param;
extern PSI_memory_key key_memory_Gis_read_stream_err_msg;
extern PSI_memory_key key_memory_Geometry_objects_data;
extern PSI_memory_key key_memory_host_cache_hostname;
extern PSI_memory_key key_memory_User_level_lock;
extern PSI_memory_key key_memory_Filesort_info_record_pointers;
extern PSI_memory_key key_memory_Sort_param_tmp_buffer;
extern PSI_memory_key key_memory_Filesort_info_merge;
extern PSI_memory_key key_memory_Filesort_buffer_sort_keys;
extern PSI_memory_key key_memory_handler_errmsgs;
extern PSI_memory_key key_memory_handlerton;
extern PSI_memory_key key_memory_XID;
extern PSI_memory_key key_memory_MYSQL_LOCK;
extern PSI_memory_key key_memory_MYSQL_LOG_name;
extern PSI_memory_key key_memory_TC_LOG_MMAP_pages;
extern PSI_memory_key key_memory_my_str_malloc;
extern PSI_memory_key key_memory_MYSQL_BIN_LOG_basename;
extern PSI_memory_key key_memory_MYSQL_BIN_LOG_index;
extern PSI_memory_key key_memory_MYSQL_RELAY_LOG_basename;
extern PSI_memory_key key_memory_MYSQL_RELAY_LOG_index;
extern PSI_memory_key key_memory_rpl_filter;
extern PSI_memory_key key_memory_Security_context;
extern PSI_memory_key key_memory_NET_buff;
extern PSI_memory_key key_memory_NET_compress_packet;
extern PSI_memory_key key_memory_my_bitmap_map;
extern PSI_memory_key key_memory_QUICK_RANGE_SELECT_mrr_buf_desc;
extern PSI_memory_key key_memory_TABLE_RULE_ENT;
extern PSI_memory_key key_memory_Mutex_cond_array_Mutex_cond;
extern PSI_memory_key key_memory_Owned_gtids_sidno_to_hash;
extern PSI_memory_key key_memory_Sid_map_Node;
extern PSI_memory_key key_memory_bison_stack;
extern PSI_memory_key key_memory_TABLE_sort_io_cache;
extern PSI_memory_key key_memory_DATE_TIME_FORMAT;
extern PSI_memory_key key_memory_DDL_LOG_MEMORY_ENTRY;
extern PSI_memory_key key_memory_ST_SCHEMA_TABLE;
extern PSI_memory_key key_memory_ignored_db;
extern PSI_memory_key key_memory_SLAVE_INFO;
extern PSI_memory_key key_memory_log_event_old;
extern PSI_memory_key key_memory_HASH_ROW_ENTRY;
extern PSI_memory_key key_memory_table_def_memory;
extern PSI_memory_key key_memory_MPVIO_EXT_auth_info;
extern PSI_memory_key key_memory_LOG_POS_COORD;
extern PSI_memory_key key_memory_XID_STATE;
extern PSI_memory_key key_memory_Rpl_info_file_buffer;
extern PSI_memory_key key_memory_Rpl_info_table;
extern PSI_memory_key key_memory_binlog_pos;
extern PSI_memory_key key_memory_db_worker_hash_entry;
extern PSI_memory_key key_memory_rpl_slave_command_buffer;
extern PSI_memory_key key_memory_binlog_ver_1_event;
extern PSI_memory_key key_memory_rpl_slave_check_temp_dir;
extern PSI_memory_key key_memory_TABLE;
extern PSI_memory_key key_memory_binlog_statement_buffer;
extern PSI_memory_key key_memory_user_conn;
extern PSI_memory_key key_memory_dboptions_hash;
extern PSI_memory_key key_memory_dbnames_cache;
extern PSI_memory_key key_memory_hash_index_key_buffer;
extern PSI_memory_key key_memory_THD_handler_tables_hash;
extern PSI_memory_key key_memory_JOIN_CACHE;
extern PSI_memory_key key_memory_READ_INFO;
extern PSI_memory_key key_memory_partition_syntax_buffer;
extern PSI_memory_key key_memory_global_system_variables;
extern PSI_memory_key key_memory_THD_variables;
extern PSI_memory_key key_memory_PROFILE;
extern PSI_memory_key key_memory_LOG_name;
extern PSI_memory_key key_memory_string_iterator;
extern PSI_memory_key key_memory_frm_extra_segment_buff;
extern PSI_memory_key key_memory_frm_form_pos;
extern PSI_memory_key key_memory_frm_string;
extern PSI_memory_key key_memory_Unique_sort_buffer;
extern PSI_memory_key key_memory_Unique_merge_buffer;
extern PSI_memory_key key_memory_shared_memory_name;
extern PSI_memory_key key_memory_opt_bin_logname;
extern PSI_memory_key key_memory_Query_cache;
extern PSI_memory_key key_memory_READ_RECORD_cache;
extern PSI_memory_key key_memory_Quick_ranges;
extern PSI_memory_key key_memory_File_query_log_name;
extern PSI_memory_key key_memory_Table_trigger_dispatcher;
extern PSI_memory_key key_memory_show_slave_status_io_gtid_set;
extern PSI_memory_key key_memory_write_set_extraction;
extern PSI_memory_key key_memory_thd_timer;
extern PSI_memory_key key_memory_THD_Session_tracker;
extern PSI_memory_key key_memory_THD_Session_sysvar_resource_manager;
extern PSI_memory_key key_memory_get_all_tables;
extern PSI_memory_key key_memory_fill_schema_schemata;
extern PSI_memory_key key_memory_native_functions;
extern PSI_memory_key key_memory_JSON;
extern PSI_memory_key key_memory_WSREP;
/*
MAINTAINER: Please keep this list in order, to limit merge collisions.
Hint: grep PSI_stage_info | sort -u
*/
extern PSI_stage_info stage_apply_event;
extern PSI_stage_info stage_after_create;
2014-05-09 12:35:11 +02:00
extern PSI_stage_info stage_after_opening_tables;
extern PSI_stage_info stage_after_table_lock;
extern PSI_stage_info stage_allocating_local_table;
extern PSI_stage_info stage_alter_inplace_prepare;
extern PSI_stage_info stage_alter_inplace;
extern PSI_stage_info stage_alter_inplace_commit;
extern PSI_stage_info stage_after_apply_event;
extern PSI_stage_info stage_changing_master;
extern PSI_stage_info stage_checking_master_version;
extern PSI_stage_info stage_checking_permissions;
extern PSI_stage_info stage_checking_privileges_on_cached_query;
extern PSI_stage_info stage_checking_query_cache_for_query;
extern PSI_stage_info stage_cleaning_up;
extern PSI_stage_info stage_closing_tables;
extern PSI_stage_info stage_connecting_to_master;
extern PSI_stage_info stage_converting_heap_to_myisam;
extern PSI_stage_info stage_copying_to_group_table;
extern PSI_stage_info stage_copying_to_tmp_table;
extern PSI_stage_info stage_copy_to_tmp_table;
extern PSI_stage_info stage_creating_delayed_handler;
extern PSI_stage_info stage_creating_sort_index;
extern PSI_stage_info stage_creating_table;
extern PSI_stage_info stage_creating_tmp_table;
extern PSI_stage_info stage_deleting_from_main_table;
extern PSI_stage_info stage_deleting_from_reference_tables;
extern PSI_stage_info stage_discard_or_import_tablespace;
extern PSI_stage_info stage_end;
fixes for test failures and small collateral changes mysql-test/lib/My/Test.pm: somehow with "print" we get truncated writes sometimes mysql-test/suite/perfschema/r/digest_table_full.result: md5 hashes of statement digests differ, because yacc token codes are different in mariadb mysql-test/suite/perfschema/r/dml_handler.result: host table is not ported over yet mysql-test/suite/perfschema/r/information_schema.result: host table is not ported over yet mysql-test/suite/perfschema/r/nesting.result: this differs, because we don't rewrite general log queries, and multi-statement packets are logged as a one entry. this result file is identical to what mysql-5.6.5 produces with the --log-raw option. mysql-test/suite/perfschema/r/relaylog.result: MariaDB modifies the binlog index file directly, while MySQL 5.6 has a feature "crash-safe binlog index" and modifies a special "crash-safe" shadow copy of the index file and then moves it over. That's why this test shows "NONE" index file writes in MySQL and "MANY" in MariaDB. mysql-test/suite/perfschema/r/server_init.result: MariaDB initializes the "manager" resources from the "manager" thread, and starts this thread only when --flush-time is not 0. MySQL 5.6 initializes "manager" resources unconditionally on server startup. mysql-test/suite/perfschema/r/stage_mdl_global.result: this differs, because MariaDB disables query cache when query_cache_size=0. MySQL does not do that, and this causes useless mutex locks and waits. mysql-test/suite/perfschema/r/statement_digest.result: md5 hashes of statement digests differ, because yacc token codes are different in mariadb mysql-test/suite/perfschema/r/statement_digest_consumers.result: md5 hashes of statement digests differ, because yacc token codes are different in mariadb mysql-test/suite/perfschema/r/statement_digest_long_query.result: md5 hashes of statement digests differ, because yacc token codes are different in mariadb mysql-test/suite/rpl/r/rpl_mixed_drop_create_temp_table.result: will be updated to match 5.6 when alfranio.correia@oracle.com-20110512172919-c1b5kmum4h52g0ni and anders.song@greatopensource.com-20110105052107-zoab0bsf5a6xxk2y are merged mysql-test/suite/rpl/r/rpl_non_direct_mixed_mixing_engines.result: will be updated to match 5.6 when anders.song@greatopensource.com-20110105052107-zoab0bsf5a6xxk2y is merged
2012-09-27 20:09:46 +02:00
extern PSI_stage_info stage_enabling_keys;
extern PSI_stage_info stage_executing;
extern PSI_stage_info stage_execution_of_init_command;
extern PSI_stage_info stage_explaining;
2014-05-09 12:35:11 +02:00
extern PSI_stage_info stage_finding_key_cache;
extern PSI_stage_info stage_finished_reading_one_binlog_switching_to_next_binlog;
extern PSI_stage_info stage_flushing_relay_log_and_master_info_repository;
extern PSI_stage_info stage_flushing_relay_log_info_file;
extern PSI_stage_info stage_freeing_items;
extern PSI_stage_info stage_fulltext_initialization;
extern PSI_stage_info stage_got_handler_lock;
extern PSI_stage_info stage_got_old_table;
extern PSI_stage_info stage_init;
Add more execution stages (commit, rollback, etc) This was done to get more information about where time is spent. Now we can get proper timing for time spent in commit, rollback, binlog write etc. Following stages was added: - Commit - Commit_implicit - Rollback - Rollback implicit - Binlog write - Init for update - This is used instead of "Init" for insert, update and delete. - Staring cleanup Following stages where changed: - "Unlocking tables" stage reset stage to previous stage at end - "binlog write" stage resets stage to previous stage at end - "end" -> "end of update loop" - "cleaning up" -> "Reset for next command" - Added stage_searching_rows_for_update when searching for rows to be deleted. Other things: - Renamed all stages to start with big letter (before there was no consitency) - Increased performance_schema_max_stage_classes from 150 to 160. - Most of the test changes in performance schema comes from renaming of stages. - Removed duplicate output of variables and inital state in a lot of performance schema tests. This was done to make it easier to change a default value for a performance variable without affecting all tests. - Added start_server_variables.test to check configuration - Removed some duplicate "closing tables" stages - Updated position for "stage_init_update" and "stage_updating" for delete, insert and update to be just before update loop (for more exact timing). - Don't set "Checking permissions" twice in a row. - Remove stage_end stage from creating views (not done for create table either). - Updated default performance history size from 10 to 20 because of new stages - Ensure that ps_enabled is correct (to be used in a later patch)
2017-11-05 16:04:20 +01:00
extern PSI_stage_info stage_init_update;
extern PSI_stage_info stage_insert;
extern PSI_stage_info stage_invalidating_query_cache_entries_table;
extern PSI_stage_info stage_invalidating_query_cache_entries_table_list;
extern PSI_stage_info stage_killing_slave;
extern PSI_stage_info stage_logging_slow_query;
extern PSI_stage_info stage_making_temp_file_append_before_load_data;
extern PSI_stage_info stage_making_temp_file_create_before_load_data;
extern PSI_stage_info stage_manage_keys;
extern PSI_stage_info stage_master_has_sent_all_binlog_to_slave;
extern PSI_stage_info stage_opening_tables;
extern PSI_stage_info stage_optimizing;
extern PSI_stage_info stage_preparing;
extern PSI_stage_info stage_purging_old_relay_logs;
extern PSI_stage_info stage_query_end;
Add more execution stages (commit, rollback, etc) This was done to get more information about where time is spent. Now we can get proper timing for time spent in commit, rollback, binlog write etc. Following stages was added: - Commit - Commit_implicit - Rollback - Rollback implicit - Binlog write - Init for update - This is used instead of "Init" for insert, update and delete. - Staring cleanup Following stages where changed: - "Unlocking tables" stage reset stage to previous stage at end - "binlog write" stage resets stage to previous stage at end - "end" -> "end of update loop" - "cleaning up" -> "Reset for next command" - Added stage_searching_rows_for_update when searching for rows to be deleted. Other things: - Renamed all stages to start with big letter (before there was no consitency) - Increased performance_schema_max_stage_classes from 150 to 160. - Most of the test changes in performance schema comes from renaming of stages. - Removed duplicate output of variables and inital state in a lot of performance schema tests. This was done to make it easier to change a default value for a performance variable without affecting all tests. - Added start_server_variables.test to check configuration - Removed some duplicate "closing tables" stages - Updated position for "stage_init_update" and "stage_updating" for delete, insert and update to be just before update loop (for more exact timing). - Don't set "Checking permissions" twice in a row. - Remove stage_end stage from creating views (not done for create table either). - Updated default performance history size from 10 to 20 because of new stages - Ensure that ps_enabled is correct (to be used in a later patch)
2017-11-05 16:04:20 +01:00
extern PSI_stage_info stage_starting_cleanup;
extern PSI_stage_info stage_rollback;
extern PSI_stage_info stage_rollback_implicit;
extern PSI_stage_info stage_commit;
extern PSI_stage_info stage_commit_implicit;
extern PSI_stage_info stage_queueing_master_event_to_the_relay_log;
extern PSI_stage_info stage_reading_event_from_the_relay_log;
2014-05-09 12:35:11 +02:00
extern PSI_stage_info stage_recreating_table;
extern PSI_stage_info stage_registering_slave_on_master;
extern PSI_stage_info stage_removing_duplicates;
extern PSI_stage_info stage_removing_tmp_table;
extern PSI_stage_info stage_rename;
extern PSI_stage_info stage_rename_result_table;
extern PSI_stage_info stage_requesting_binlog_dump;
extern PSI_stage_info stage_reschedule;
extern PSI_stage_info stage_searching_rows_for_update;
extern PSI_stage_info stage_sending_binlog_event_to_slave;
extern PSI_stage_info stage_sending_cached_result_to_client;
extern PSI_stage_info stage_sending_data;
extern PSI_stage_info stage_setup;
extern PSI_stage_info stage_slave_has_read_all_relay_log;
2012-10-19 20:38:59 +02:00
extern PSI_stage_info stage_show_explain;
fixes for test failures and small collateral changes mysql-test/lib/My/Test.pm: somehow with "print" we get truncated writes sometimes mysql-test/suite/perfschema/r/digest_table_full.result: md5 hashes of statement digests differ, because yacc token codes are different in mariadb mysql-test/suite/perfschema/r/dml_handler.result: host table is not ported over yet mysql-test/suite/perfschema/r/information_schema.result: host table is not ported over yet mysql-test/suite/perfschema/r/nesting.result: this differs, because we don't rewrite general log queries, and multi-statement packets are logged as a one entry. this result file is identical to what mysql-5.6.5 produces with the --log-raw option. mysql-test/suite/perfschema/r/relaylog.result: MariaDB modifies the binlog index file directly, while MySQL 5.6 has a feature "crash-safe binlog index" and modifies a special "crash-safe" shadow copy of the index file and then moves it over. That's why this test shows "NONE" index file writes in MySQL and "MANY" in MariaDB. mysql-test/suite/perfschema/r/server_init.result: MariaDB initializes the "manager" resources from the "manager" thread, and starts this thread only when --flush-time is not 0. MySQL 5.6 initializes "manager" resources unconditionally on server startup. mysql-test/suite/perfschema/r/stage_mdl_global.result: this differs, because MariaDB disables query cache when query_cache_size=0. MySQL does not do that, and this causes useless mutex locks and waits. mysql-test/suite/perfschema/r/statement_digest.result: md5 hashes of statement digests differ, because yacc token codes are different in mariadb mysql-test/suite/perfschema/r/statement_digest_consumers.result: md5 hashes of statement digests differ, because yacc token codes are different in mariadb mysql-test/suite/perfschema/r/statement_digest_long_query.result: md5 hashes of statement digests differ, because yacc token codes are different in mariadb mysql-test/suite/rpl/r/rpl_mixed_drop_create_temp_table.result: will be updated to match 5.6 when alfranio.correia@oracle.com-20110512172919-c1b5kmum4h52g0ni and anders.song@greatopensource.com-20110105052107-zoab0bsf5a6xxk2y are merged mysql-test/suite/rpl/r/rpl_non_direct_mixed_mixing_engines.result: will be updated to match 5.6 when anders.song@greatopensource.com-20110105052107-zoab0bsf5a6xxk2y is merged
2012-09-27 20:09:46 +02:00
extern PSI_stage_info stage_sorting;
extern PSI_stage_info stage_sorting_for_group;
extern PSI_stage_info stage_sorting_for_order;
extern PSI_stage_info stage_sorting_result;
extern PSI_stage_info stage_sql_thd_waiting_until_delay;
extern PSI_stage_info stage_statistics;
extern PSI_stage_info stage_storing_result_in_query_cache;
extern PSI_stage_info stage_storing_row_into_queue;
extern PSI_stage_info stage_system_lock;
extern PSI_stage_info stage_unlocking_tables;
extern PSI_stage_info stage_table_lock;
extern PSI_stage_info stage_filling_schema_table;
extern PSI_stage_info stage_update;
extern PSI_stage_info stage_updating;
extern PSI_stage_info stage_updating_main_table;
extern PSI_stage_info stage_updating_reference_tables;
extern PSI_stage_info stage_upgrading_lock;
extern PSI_stage_info stage_user_lock;
extern PSI_stage_info stage_user_sleep;
extern PSI_stage_info stage_verifying_table;
extern PSI_stage_info stage_waiting_for_ddl;
extern PSI_stage_info stage_waiting_for_delay_list;
extern PSI_stage_info stage_waiting_for_disk_space;
extern PSI_stage_info stage_waiting_for_flush;
extern PSI_stage_info stage_waiting_for_gtid_to_be_written_to_binary_log;
extern PSI_stage_info stage_waiting_for_handler_insert;
extern PSI_stage_info stage_waiting_for_handler_lock;
extern PSI_stage_info stage_waiting_for_handler_open;
extern PSI_stage_info stage_waiting_for_insert;
extern PSI_stage_info stage_waiting_for_master_to_send_event;
extern PSI_stage_info stage_waiting_for_master_update;
extern PSI_stage_info stage_waiting_for_relay_log_space;
extern PSI_stage_info stage_waiting_for_slave_mutex_on_exit;
extern PSI_stage_info stage_waiting_for_slave_thread_to_start;
extern PSI_stage_info stage_waiting_for_query_cache_lock;
extern PSI_stage_info stage_waiting_for_table_flush;
extern PSI_stage_info stage_waiting_for_the_next_event_in_relay_log;
extern PSI_stage_info stage_waiting_for_the_slave_thread_to_advance_position;
extern PSI_stage_info stage_waiting_to_finalize_termination;
extern PSI_stage_info stage_binlog_waiting_background_tasks;
Add more execution stages (commit, rollback, etc) This was done to get more information about where time is spent. Now we can get proper timing for time spent in commit, rollback, binlog write etc. Following stages was added: - Commit - Commit_implicit - Rollback - Rollback implicit - Binlog write - Init for update - This is used instead of "Init" for insert, update and delete. - Staring cleanup Following stages where changed: - "Unlocking tables" stage reset stage to previous stage at end - "binlog write" stage resets stage to previous stage at end - "end" -> "end of update loop" - "cleaning up" -> "Reset for next command" - Added stage_searching_rows_for_update when searching for rows to be deleted. Other things: - Renamed all stages to start with big letter (before there was no consitency) - Increased performance_schema_max_stage_classes from 150 to 160. - Most of the test changes in performance schema comes from renaming of stages. - Removed duplicate output of variables and inital state in a lot of performance schema tests. This was done to make it easier to change a default value for a performance variable without affecting all tests. - Added start_server_variables.test to check configuration - Removed some duplicate "closing tables" stages - Updated position for "stage_init_update" and "stage_updating" for delete, insert and update to be just before update loop (for more exact timing). - Don't set "Checking permissions" twice in a row. - Remove stage_end stage from creating views (not done for create table either). - Updated default performance history size from 10 to 20 because of new stages - Ensure that ps_enabled is correct (to be used in a later patch)
2017-11-05 16:04:20 +01:00
extern PSI_stage_info stage_binlog_write;
extern PSI_stage_info stage_binlog_processing_checkpoint_notify;
extern PSI_stage_info stage_binlog_stopping_background_thread;
extern PSI_stage_info stage_waiting_for_work_from_sql_thread;
2014-02-03 15:22:39 +01:00
extern PSI_stage_info stage_waiting_for_prior_transaction_to_commit;
extern PSI_stage_info stage_waiting_for_prior_transaction_to_start_commit;
2014-02-03 15:22:39 +01:00
extern PSI_stage_info stage_waiting_for_room_in_worker_thread;
extern PSI_stage_info stage_waiting_for_workers_idle;
extern PSI_stage_info stage_waiting_for_ftwrl;
extern PSI_stage_info stage_waiting_for_ftwrl_threads_to_pause;
extern PSI_stage_info stage_waiting_for_rpl_thread_pool;
2014-02-10 15:12:17 +01:00
extern PSI_stage_info stage_master_gtid_wait_primary;
extern PSI_stage_info stage_master_gtid_wait;
extern PSI_stage_info stage_gtid_wait_other_connection;
extern PSI_stage_info stage_slave_background_process_request;
extern PSI_stage_info stage_slave_background_wait_request;
extern PSI_stage_info stage_waiting_for_deadlock_kill;
extern PSI_stage_info stage_starting;
#ifdef WITH_WSREP
// Aditional Galera thread states
extern PSI_stage_info stage_waiting_isolation;
extern PSI_stage_info stage_waiting_certification;
extern PSI_stage_info stage_waiting_ddl;
extern PSI_stage_info stage_waiting_flow;
#endif /* WITH_WSREP */
2014-02-03 15:22:39 +01:00
#ifdef HAVE_PSI_STATEMENT_INTERFACE
/**
Statement instrumentation keys (sql).
The last entry, at [SQLCOM_END], is for parsing errors.
*/
extern PSI_statement_info sql_statement_info[(uint) SQLCOM_END + 1];
/**
Statement instrumentation keys (com).
The last entry, at [COM_END], is for packet errors.
*/
extern PSI_statement_info com_statement_info[(uint) COM_END + 1];
/**
Statement instrumentation key for replication.
*/
extern PSI_statement_info stmt_info_rpl;
void init_sql_statement_info();
void init_com_statement_info();
#endif /* HAVE_PSI_STATEMENT_INTERFACE */
#ifndef _WIN32
extern pthread_t signal_thread;
#endif
#ifdef HAVE_OPENSSL
extern struct st_VioSSLFd * ssl_acceptor_fd;
extern LEX_CUSTRING ssl_acceptor_fingerprint();
#endif /* HAVE_OPENSSL */
/*
The following variables were under INNODB_COMPABILITY_HOOKS
*/
extern my_bool opt_large_pages;
extern uint opt_large_page_size;
extern MYSQL_PLUGIN_IMPORT char lc_messages_dir[FN_REFLEN];
extern char *lc_messages_dir_ptr, *log_error_file_ptr;
extern MYSQL_PLUGIN_IMPORT char reg_ext[FN_EXTLEN];
extern MYSQL_PLUGIN_IMPORT uint reg_ext_length;
extern MYSQL_PLUGIN_IMPORT uint lower_case_table_names;
extern MYSQL_PLUGIN_IMPORT bool mysqld_embedded;
extern ulong specialflag;
extern uint mysql_data_home_len;
extern uint mysql_real_data_home_len;
extern const char *mysql_real_data_home_ptr;
2010-08-05 14:34:19 +02:00
extern ulong thread_handling;
extern "C" MYSQL_PLUGIN_IMPORT char server_version[SERVER_VERSION_LENGTH];
extern char *server_version_ptr;
extern MYSQL_PLUGIN_IMPORT char mysql_real_data_home[];
extern char mysql_unpacked_real_data_home[];
extern MYSQL_PLUGIN_IMPORT struct system_variables global_system_variables;
extern char default_logfile_name[FN_REFLEN];
extern char *my_proxy_protocol_networks;
#define mysql_tmpdir (my_tmpdir(&mysql_tmpdir_list))
extern MYSQL_PLUGIN_IMPORT const key_map key_map_empty;
extern MYSQL_PLUGIN_IMPORT key_map key_map_full; /* Should be threaded as const */
/*
Server mutex locks and condition variables.
*/
extern mysql_mutex_t
LOCK_item_func_sleep, LOCK_status,
LOCK_error_log, LOCK_delayed_insert, LOCK_short_uuid_generator,
LOCK_delayed_status, LOCK_delayed_create, LOCK_crypt, LOCK_timezone,
2019-05-29 21:17:00 +02:00
LOCK_active_mi, LOCK_manager, LOCK_user_conn,
Changing all cost calculation to be given in milliseconds This makes it easier to compare different costs and also allows the optimizer to optimizer different storage engines more reliably. - Added tests/check_costs.pl, a tool to verify optimizer cost calculations. - Most engine costs has been found with this program. All steps to calculate the new costs are documented in Docs/optimizer_costs.txt - User optimizer_cost variables are given in microseconds (as individual costs can be very small). Internally they are stored in ms. - Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost (9 ms) to common SSD cost (400MB/sec). - Removed cost calculations for hard disks (rotation etc). - Changed the following handler functions to return IO_AND_CPU_COST. This makes it easy to apply different cost modifiers in ha_..time() functions for io and cpu costs. - scan_time() - rnd_pos_time() & rnd_pos_call_time() - keyread_time() - Enhanched keyread_time() to calculate the full cost of reading of a set of keys with a given number of ranges and optional number of blocks that need to be accessed. - Removed read_time() as keyread_time() + rnd_pos_time() can do the same thing and more. - Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks. Used heap table costs for json_table. The rest are using default engine costs. - Added the following new optimizer variables: - optimizer_disk_read_ratio - optimizer_disk_read_cost - optimizer_key_lookup_cost - optimizer_row_lookup_cost - optimizer_row_next_find_cost - optimizer_scan_cost - Moved all engine specific cost to OPTIMIZER_COSTS structure. - Changed costs to use 'records_out' instead of 'records_read' when recalculating costs. - Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h. This allows one to change costs without having to compile a lot of files. - Updated costs for filter lookup. - Use a better cost estimate in best_extension_by_limited_search() for the sorting cost. - Fixed previous issues with 'filtered' explain column as we are now using 'records_out' (min rows seen for table) to calculate filtering. This greatly simplifies the filtering code in JOIN_TAB::save_explain_data(). This change caused a lot of queries to be optimized differently than before, which exposed different issues in the optimizer that needs to be fixed. These fixes are in the following commits. To not have to change the same test case over and over again, the changes in the test cases are done in a single commit after all the critical change sets are done. InnoDB changes: - Updated InnoDB to not divide big range cost with 2. - Added cost for InnoDB (innobase_update_optimizer_costs()). - Don't mark clustered primary key with HA_KEYREAD_ONLY. This will prevent that the optimizer is trying to use index-only scans on the clustered key. - Disabled ha_innobase::scan_time() and ha_innobase::read_time() and ha_innobase::rnd_pos_time() as the default engine cost functions now works good for InnoDB. Other things: - Added --show-query-costs (\Q) option to mysql.cc to show the query cost after each query (good when working with query costs). - Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust the value that user is given. This is used to change cost from microseconds (user input) to milliseconds (what the server is internally using). - Added include/my_tracker.h ; Useful include file to quickly test costs of a function. - Use handler::set_table() in all places instead of 'table= arg'. - Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and shown in microseconds for the user but stored as milliseconds. This is to make the numbers easier to read for the user (less pre-zeros). Implemented in 'Sys_var_optimizer_cost' class. - In test_quick_select() do not use index scans if 'no_keyread' is set for the table. This is what we do in other places of the server. - Added THD parameter to Unique::get_use_cost() and check_index_intersect_extension() and similar functions to be able to provide costs to called functions. - Changed 'records' to 'rows' in optimizer_trace. - Write more information to optimizer_trace. - Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3) to calculate usage space of keys in b-trees. (Before we used numeric constants). - Removed code that assumed that b-trees has similar costs as binary trees. Replaced with engine calls that returns the cost. - Added Bitmap::find_first_bit() - Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia). - Added records_init and records_after_filter to POSITION to remember more of what best_access_patch() calculates. - table_after_join_selectivity() changed to recalculate 'records_out' based on the new fields from best_access_patch() Bug fixes: - Some queries did not update last_query_cost (was 0). Fixed by moving setting thd->...last_query_cost in JOIN::optimize(). - Write '0' as number of rows for const tables with a matching row. Some internals: - Engine cost are stored in OPTIMIZER_COSTS structure. When a handlerton is created, we also created a new cost variable for the handlerton. We also create a new variable if the user changes a optimizer cost for a not yet loaded handlerton either with command line arguments or with SET @@global.engine.optimizer_cost_variable=xx. - There are 3 global OPTIMIZER_COSTS variables: default_optimizer_costs The default costs + changes from the command line without an engine specifier. heap_optimizer_costs Heap table costs, used for temporary tables tmp_table_optimizer_costs The cost for the default on disk internal temporary table (MyISAM or Aria) - The engine cost for a table is stored in table_share. To speed up accesses the handler has a pointer to this. The cost is copied to the table on first access. If one wants to change the cost one must first update the global engine cost and then do a FLUSH TABLES. This was done to be able to access the costs for an open table without any locks. - When a handlerton is created, the cost are updated the following way: See sql/keycaches.cc for details: - Use 'default_optimizer_costs' as a base - Call hton->update_optimizer_costs() to override with the engines default costs. - Override the costs that the user has specified for the engine. - One handler open, copy the engine cost from handlerton to TABLE_SHARE. - Call handler::update_optimizer_costs() to allow the engine to update cost for this particular table. - There are two costs stored in THD. These are copied to the handler when the table is used in a query: - optimizer_where_cost - optimizer_scan_setup_cost - Simply code in best_access_path() by storing all cost result in a structure. (Idea/Suggestion by Igor)
2022-08-11 12:05:23 +02:00
LOCK_prepared_stmt_count, LOCK_error_messages, LOCK_backup_log,
LOCK_optimizer_costs;
2019-05-29 21:17:00 +02:00
extern MYSQL_PLUGIN_IMPORT mysql_mutex_t LOCK_global_system_variables;
extern mysql_rwlock_t LOCK_all_status_vars;
extern mysql_mutex_t LOCK_start_thread;
MDEV-22214 mariadbd.exe calls function mysqld.exe, and crashes Stop linking plugins to the server executable on Windows. Instead, extract whole server functionality into a large DLL, called server.dll. Link both plugins, and small server "stub" exe to it. This eliminates plugin dependency on the name of the server executable. It also reduces the size of the packages (since tiny mysqld.exe and mariadbd.exe are now both linked to one big DLL) Also, simplify the functionality of exporing all symbols from selected static libraries. Rely on WINDOWS_EXPORT_ALL_SYMBOLS, rather than old self-backed solution. fix compile error replace GetProcAddress(GetModuleHandle(NULL), "variable_name") for server exported data with actual variable names. Runtime loading was never required,was error prone , since symbols could be missing at runtime, and now it actually failed, because we do not export symbols from executable anymore, but from a shared library This did require a MYSQL_PLUGIN_IMPORT decoration for the plugin, but made the code more straightforward, and avoids missing symbols at runtime (as mentioned before). The audit plugin is still doing some dynamic loading, as it aims to work cross-version. Now it won't work cross-version on Windows, as it already uses some symbols that are *not* dynamically loaded, e.g fn_format and those symbols now exported from server.dll , when earlier they were exported by mysqld.exe Windows, fixes for storage engine plugin loading after various rebranding stuff Create server.dll containing functionality of the whole server make mariadbd.exe/mysqld.exe a stub that is only calling mysqld_main() fix build
2020-04-10 14:09:18 +02:00
extern MYSQL_PLUGIN_IMPORT mysql_mutex_t LOCK_server_started;
extern MYSQL_PLUGIN_IMPORT mysql_cond_t COND_server_started;
extern mysql_rwlock_t LOCK_grant, LOCK_sys_init_connect, LOCK_sys_init_slave;
extern mysql_rwlock_t LOCK_ssl_refresh;
extern mysql_prlock_t LOCK_system_variables_hash;
extern mysql_cond_t COND_start_thread;
Fix for bug #52044 "FLUSH TABLES WITH READ LOCK and FLUSH TABLES <list> WITH READ LOCK are incompatible". The problem was that FLUSH TABLES <list> WITH READ LOCK which was issued when other connection has acquired global read lock using FLUSH TABLES WITH READ LOCK was blocked and has to wait until global read lock is released. This issue stemmed from the fact that FLUSH TABLES <list> WITH READ LOCK implementation has acquired X metadata locks on tables to be flushed. Since these locks required acquiring of global IX lock this statement was incompatible with global read lock. This patch addresses problem by using SNW metadata type of lock for tables to be flushed by FLUSH TABLES <list> WITH READ LOCK. It is OK to acquire them without global IX lock as long as we won't try to upgrade those locks. Since SNW locks allow concurrent statements using same table FLUSH TABLE <list> WITH READ LOCK now has to wait until old versions of tables to be flushed go away after acquiring metadata locks. Since such waiting can lead to deadlock MDL deadlock detector was extended to take into account waits for flush and resolve such deadlocks. As a bonus code in open_tables() which was responsible for waiting old versions of tables to go away was refactored. Now when we encounter old version of table in open_table() we don't back-off and wait for all old version to go away, but instead wait for this particular table to be flushed. Such approach supported by deadlock detection should reduce number of scenarios in which FLUSH TABLES aborts concurrent multi-statement transactions. Note that active FLUSH TABLES <list> WITH READ LOCK still blocks concurrent FLUSH TABLES WITH READ LOCK statement as the former keeps tables open and thus prevents the latter statement from doing flush. mysql-test/include/handler.inc: Adjusted test case after changing status which is set when FLUSH TABLES waits for tables to be flushed from "Flushing tables" to "Waiting for table". mysql-test/r/flush.result: Added test which checks that "flush tables <list> with read lock" is compatible with active "flush tables with read lock" but not vice-versa. This test also covers bug #52044 "FLUSH TABLES WITH READ LOCK and FLUSH TABLES <list> WITH READ LOCK are incompatible". mysql-test/r/mdl_sync.result: Added scenarios in which wait for table to be flushed causes deadlocks to the coverage of MDL deadlock detector. mysql-test/suite/perfschema/r/dml_setup_instruments.result: Adjusted test results after removal of COND_refresh condition variable. mysql-test/suite/perfschema/r/server_init.result: Adjusted test and its results after removal of COND_refresh condition variable. mysql-test/suite/perfschema/t/server_init.test: Adjusted test and its results after removal of COND_refresh condition variable. mysql-test/t/flush.test: Added test which checks that "flush tables <list> with read lock" is compatible with active "flush tables with read lock" but not vice-versa. This test also covers bug #52044 "FLUSH TABLES WITH READ LOCK and FLUSH TABLES <list> WITH READ LOCK are incompatible". mysql-test/t/kill.test: Adjusted test case after changing status which is set when FLUSH TABLES waits for tables to be flushed from "Flushing tables" to "Waiting for table". mysql-test/t/lock_multi.test: Adjusted test case after changing status which is set when FLUSH TABLES waits for tables to be flushed from "Flushing tables" to "Waiting for table". mysql-test/t/mdl_sync.test: Added scenarios in which wait for table to be flushed causes deadlocks to the coverage of MDL deadlock detector. sql/ha_ndbcluster.cc: Adjusted code after adding one more parameter for close_cached_tables() call - timeout for waiting for table to be flushed. sql/ha_ndbcluster_binlog.cc: Adjusted code after adding one more parameter for close_cached_tables() call - timeout for waiting for table to be flushed. sql/lock.cc: Removed COND_refresh condition variable. See comment for sql_base.cc for details. sql/mdl.cc: Now MDL deadlock detector takes into account information about waits for table flushes when searching for deadlock. To implement this change: - Declaration of enum_deadlock_weight and Deadlock_detection_visitor were moved to mdl.h header to make them available to the code in table.cc which implements deadlock detector traversal through edges of waiters graph representing waiting for flush. - Since now MDL_context may wait not only for metadata lock but also for table to be flushed an abstract Wait_for_edge class was introduced. Its descendants MDL_ticket and Flush_ticket incapsulate specifics of inspecting waiters graph when following through edge representing wait of particular type. We no longer require global IX metadata lock when acquiring SNW or SNRW locks. Such locks are needed only when metadata locks of these types are upgraded to X locks. This allows to use SNW locks in FLUSH TABLES <list> WITH READ LOCK implementation and keep the latter compatible with global read lock. sql/mdl.h: Now MDL deadlock detector takes into account information about waits for table flushes when searching for deadlock. To implement this change: - Declaration of enum_deadlock_weight and Deadlock_detection_visitor were moved to mdl.h header to make them available to the code in table.cc which implements deadlock detector traversal through edges of waiters graph representing waiting for flush. - Since now MDL_context may wait not only for metadata lock but also for table to be flushed an abstract Wait_for_edge class was introduced. Its descendants MDL_ticket and Flush_ticket incapsulate specifics of inspecting waiters graph when following through edge representing wait of particular type. - Deadlock_detection_visitor now has m_table_shares_visited member which allows to support recursive locking for LOCK_open. This is required when deadlock detector inspects waiters graph which contains several edges representing waits for flushes or needs to come through the such edge more than once. sql/mysqld.cc: Removed COND_refresh condition variable. See comment for sql_base.cc for details. sql/mysqld.h: Removed COND_refresh condition variable. See comment for sql_base.cc for details. sql/sql_base.cc: Changed approach to how threads are waiting for table to be flushed. Now thread that wants to wait for old table to go away subscribes for notification by adding Flush_ticket to table's share and waits using MDL_context::m_wait object. Once table gets flushed (i.e. all tables are closed and table share is ready to be destroyed) all such waiters are notified individually. Thanks to this change MDL deadlock detector can take such waits into account. To implement this/as result of this change: - tdc_wait_for_old_versions() was replaced with tdc_wait_for_old_version() which waits for individual old share to go away and which is called by open_table() after finding out that share is outdated. We don't need to perform back-off before such waiting thanks to the fact that deadlock detector now sees such waits. - As result Open_table_ctx::m_mdl_requests became unnecessary and was removed. We no longer allocate copies of MDL_request objects on MEM_ROOT when MYSQL_OPEN_FORCE_SHARED/SHARED_HIGH_PRIO flags are in effect. - close_cached_tables() and tdc_wait_for_old_version() share code which implements waiting for share to be flushed - the both use TABLE_SHARE::wait_until_flush() method. Thanks to this close_cached_tables() supports timeouts and has extra parameter for this. - Open_table_context::OT_MDL_CONFLICT enum element was renamed to OT_CONFLICT as it is now also used in cases when back-off is required to resolve deadlock caused by waiting for flush and not metadata lock. - In cases when we discover that current connection tries to open tables from different generation we now simply back-off and restart process of opening tables. To support this Open_table_context::OT_REOPEN_TABLES enum element was added. - COND_refresh condition variable became unnecessary and was removed. - mysql_notify_thread_having_shared_lock() no longer wakes up connections waiting for flush as all such connections can be waken up by deadlock detector if necessary. sql/sql_base.h: - close_cached_tables() now has one more parameter - timeout for waiting for table to be flushed. - Open_table_context::OT_MDL_CONFLICT enum element was renamed to OT_CONFLICT as it is now also used in cases when back-off is required to resolve deadlock caused by waiting for flush and not metadata lock. Added new OT_REOPEN_TABLES enum element to be used in cases when we need to restart open tables process even in the middle of transaction. - Open_table_ctx::m_mdl_requests became unnecessary and was removed. sql/sql_class.h: Added assert ensuring that we won't use LOCK_open mutex with THD::enter_cond(). Otherwise deadlocks can arise in MDL deadlock detector. sql/sql_parse.cc: Changed FLUSH TABLES <list> WITH READ LOCK to take SNW metadata locks instead of X locks on tables to be flushed. Since we no longer require global IX lock to be taken when SNW locks are taken this makes this statement compatible with FLUSH TABLES WITH READ LOCK statement. Since SNW locks allow other connections to have table opened FLUSH TABLES <list> WITH READ LOCK now has to wait during open_tables() for old version to go away. Such waits can lead to deadlocks which will be detected by MDL deadlock detector which now takes waits for table to be flushed into account. Also adjusted code after adding one more parameter for close_cached_tables() call - timeout for waiting for table to be flushed. sql/sql_yacc.yy: FLUSH TABLES <list> WITH READ LOCK now needs only SNW metadata locks on tables. sql/sys_vars.cc: Adjusted code after adding one more parameter for close_cached_tables() call - timeout for waiting for table to be flushed. sql/table.cc: Implemented new approach to how threads are waiting for table to be flushed. Now thread that wants to wait for old table to go away subscribes for notification by adding Flush_ticket to table's share and waits using MDL_context::m_wait object. Once table gets flushed (i.e. all tables are closed and table share is ready to be destroyed) all such waiters are notified individually. This change allows to make such waits visible inside of MDL deadlock detector. To do it: - Added list of waiters/Flush_tickets to TABLE_SHARE class. - Changed free_table_share() to postpone freeing of share memory until last waiter goes away and to wake up subscribed waiters. - Added TABLE_SHARE::wait_until_flushed() method which implements subscription to the list of waiters for table to be flushed and waiting for this event. Implemented interface which allows to expose waits for flushes to MDL deadlock detector: - Introduced Flush_ticket class a descendant of Wait_for_edge class. - Added TABLE_SHARE::find_deadlock() method which allows deadlock detector to find out what contexts are still using old version of table in question (i.e. to find out what contexts are waited for by owner of Flush_ticket). sql/table.h: In order to support new strategy of waiting for table flush (see comment for table.cc for details) added list of waiters/Flush_tickets to TABLE_SHARE class. Implemented interface which allows to expose waits for flushes to MDL deadlock detector: - Introduced Flush_ticket class a descendant of Wait_for_edge class. - Added TABLE_SHARE::find_deadlock() method which allows deadlock detector to find out what contexts are still using old version of table in question (i.e. to find out what contexts are waited for by owner of Flush_ticket).
2010-07-27 15:34:58 +02:00
extern mysql_cond_t COND_manager;
extern my_bool opt_use_ssl;
extern char *opt_ssl_ca, *opt_ssl_capath, *opt_ssl_cert, *opt_ssl_cipher,
*opt_ssl_key, *opt_ssl_crl, *opt_ssl_crlpath;
MDEV-14101 Provide an option to select TLS protocol version Server and command line tools now support option --tls_version to specify the TLS version between client and server. Valid values are TLSv1.0, TLSv1.1, TLSv1.2, TLSv1.3 or a combination of them. E.g. --tls_version=TLSv1.3 --tls_version=TLSv1.2,TLSv1.3 In case there is a gap between versions, the lowest version will be used: --tls_version=TLSv1.1,TLSv1.3 -> Only TLSv1.1 will be available. If the used TLS library doesn't support the specified TLS version, it will use the default configuration. Limitations: SSLv3 is not supported. The default configuration doesn't support TLSv1.0 anymore. TLSv1.3 protocol currently is only supported by OpenSSL 1.1.0 (client and server) and GnuTLS 3.6.5 (client only). Overview of TLS implementations and protocols Server: +-----------+-----------------------------------------+ | Library | Supported TLS versions | +-----------+-----------------------------------------+ | WolfSSL | TLSv1.1, TLSv1,2 | +-----------+-----------------------------------------+ | OpenSSL | (TLSv1.0), TLSv1.1, TLSv1,2, TLSv1.3 | +-----------+-----------------------------------------+ | LibreSSL | (TLSv1.0), TLSv1.1, TLSv1,2, TLSv1.3 | +-----------+-----------------------------------------+ Client (MariaDB Connector/C) +-----------+-----------------------------------------+ | Library | Supported TLS versions | +-----------+-----------------------------------------+ | GnuTLS | (TLSv1.0), TLSv1.1, TLSv1.2, TLSv1.3 | +-----------+-----------------------------------------+ | Schannel | (TLSv1.0), TLSv1.1, TLSv1.2 | +-----------+-----------------------------------------+ | OpenSSL | (TLSv1.0), TLSv1.1, TLSv1,2, TLSv1.3 | +-----------+-----------------------------------------+ | LibreSSL | (TLSv1.0), TLSv1.1, TLSv1,2, TLSv1.3 | +-----------+-----------------------------------------+
2019-06-11 12:44:16 +02:00
extern ulonglong tls_version;
2011-04-25 17:22:25 +02:00
#ifdef MYSQL_SERVER
/**
only options that need special treatment in get_one_option() deserve
to be listed below
*/
enum options_mysqld
{
OPT_to_set_the_start_number=256,
OPT_BINLOG_DO_DB,
OPT_BINLOG_FORMAT,
OPT_BINLOG_IGNORE_DB,
OPT_BIN_LOG,
OPT_BOOTSTRAP,
Changing all cost calculation to be given in milliseconds This makes it easier to compare different costs and also allows the optimizer to optimizer different storage engines more reliably. - Added tests/check_costs.pl, a tool to verify optimizer cost calculations. - Most engine costs has been found with this program. All steps to calculate the new costs are documented in Docs/optimizer_costs.txt - User optimizer_cost variables are given in microseconds (as individual costs can be very small). Internally they are stored in ms. - Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost (9 ms) to common SSD cost (400MB/sec). - Removed cost calculations for hard disks (rotation etc). - Changed the following handler functions to return IO_AND_CPU_COST. This makes it easy to apply different cost modifiers in ha_..time() functions for io and cpu costs. - scan_time() - rnd_pos_time() & rnd_pos_call_time() - keyread_time() - Enhanched keyread_time() to calculate the full cost of reading of a set of keys with a given number of ranges and optional number of blocks that need to be accessed. - Removed read_time() as keyread_time() + rnd_pos_time() can do the same thing and more. - Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks. Used heap table costs for json_table. The rest are using default engine costs. - Added the following new optimizer variables: - optimizer_disk_read_ratio - optimizer_disk_read_cost - optimizer_key_lookup_cost - optimizer_row_lookup_cost - optimizer_row_next_find_cost - optimizer_scan_cost - Moved all engine specific cost to OPTIMIZER_COSTS structure. - Changed costs to use 'records_out' instead of 'records_read' when recalculating costs. - Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h. This allows one to change costs without having to compile a lot of files. - Updated costs for filter lookup. - Use a better cost estimate in best_extension_by_limited_search() for the sorting cost. - Fixed previous issues with 'filtered' explain column as we are now using 'records_out' (min rows seen for table) to calculate filtering. This greatly simplifies the filtering code in JOIN_TAB::save_explain_data(). This change caused a lot of queries to be optimized differently than before, which exposed different issues in the optimizer that needs to be fixed. These fixes are in the following commits. To not have to change the same test case over and over again, the changes in the test cases are done in a single commit after all the critical change sets are done. InnoDB changes: - Updated InnoDB to not divide big range cost with 2. - Added cost for InnoDB (innobase_update_optimizer_costs()). - Don't mark clustered primary key with HA_KEYREAD_ONLY. This will prevent that the optimizer is trying to use index-only scans on the clustered key. - Disabled ha_innobase::scan_time() and ha_innobase::read_time() and ha_innobase::rnd_pos_time() as the default engine cost functions now works good for InnoDB. Other things: - Added --show-query-costs (\Q) option to mysql.cc to show the query cost after each query (good when working with query costs). - Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust the value that user is given. This is used to change cost from microseconds (user input) to milliseconds (what the server is internally using). - Added include/my_tracker.h ; Useful include file to quickly test costs of a function. - Use handler::set_table() in all places instead of 'table= arg'. - Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and shown in microseconds for the user but stored as milliseconds. This is to make the numbers easier to read for the user (less pre-zeros). Implemented in 'Sys_var_optimizer_cost' class. - In test_quick_select() do not use index scans if 'no_keyread' is set for the table. This is what we do in other places of the server. - Added THD parameter to Unique::get_use_cost() and check_index_intersect_extension() and similar functions to be able to provide costs to called functions. - Changed 'records' to 'rows' in optimizer_trace. - Write more information to optimizer_trace. - Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3) to calculate usage space of keys in b-trees. (Before we used numeric constants). - Removed code that assumed that b-trees has similar costs as binary trees. Replaced with engine calls that returns the cost. - Added Bitmap::find_first_bit() - Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia). - Added records_init and records_after_filter to POSITION to remember more of what best_access_patch() calculates. - table_after_join_selectivity() changed to recalculate 'records_out' based on the new fields from best_access_patch() Bug fixes: - Some queries did not update last_query_cost (was 0). Fixed by moving setting thd->...last_query_cost in JOIN::optimize(). - Write '0' as number of rows for const tables with a matching row. Some internals: - Engine cost are stored in OPTIMIZER_COSTS structure. When a handlerton is created, we also created a new cost variable for the handlerton. We also create a new variable if the user changes a optimizer cost for a not yet loaded handlerton either with command line arguments or with SET @@global.engine.optimizer_cost_variable=xx. - There are 3 global OPTIMIZER_COSTS variables: default_optimizer_costs The default costs + changes from the command line without an engine specifier. heap_optimizer_costs Heap table costs, used for temporary tables tmp_table_optimizer_costs The cost for the default on disk internal temporary table (MyISAM or Aria) - The engine cost for a table is stored in table_share. To speed up accesses the handler has a pointer to this. The cost is copied to the table on first access. If one wants to change the cost one must first update the global engine cost and then do a FLUSH TABLES. This was done to be able to access the costs for an open table without any locks. - When a handlerton is created, the cost are updated the following way: See sql/keycaches.cc for details: - Use 'default_optimizer_costs' as a base - Call hton->update_optimizer_costs() to override with the engines default costs. - Override the costs that the user has specified for the engine. - One handler open, copy the engine cost from handlerton to TABLE_SHARE. - Call handler::update_optimizer_costs() to allow the engine to update cost for this particular table. - There are two costs stored in THD. These are copied to the handler when the table is used in a query: - optimizer_where_cost - optimizer_scan_setup_cost - Simply code in best_access_path() by storing all cost result in a structure. (Idea/Suggestion by Igor)
2022-08-11 12:05:23 +02:00
OPT_COSTS_DISK_READ_COST,
OPT_COSTS_INDEX_BLOCK_COPY_COST,
OPT_COSTS_KEY_CMP_COST,
OPT_COSTS_KEY_COPY_COST,
OPT_COSTS_KEY_LOOKUP_COST,
OPT_COSTS_KEY_NEXT_FIND_COST,
OPT_COSTS_DISK_READ_RATIO,
OPT_COSTS_ROW_COPY_COST,
OPT_COSTS_ROW_LOOKUP_COST,
OPT_COSTS_ROW_NEXT_FIND_COST,
OPT_COSTS_ROWID_CMP_COST,
OPT_COSTS_ROWID_COPY_COST,
OPT_EXPIRE_LOGS_DAYS,
OPT_BINLOG_EXPIRE_LOGS_SECONDS,
OPT_CONSOLE,
OPT_DEBUG_SYNC_TIMEOUT,
OPT_REMOVED_OPTION,
2012-10-18 23:33:06 +02:00
OPT_IGNORE_DB_DIRECTORY,
OPT_ISAM_LOG,
OPT_KEY_BUFFER_SIZE,
OPT_KEY_CACHE_AGE_THRESHOLD,
OPT_KEY_CACHE_BLOCK_SIZE,
OPT_KEY_CACHE_DIVISION_LIMIT,
2010-11-25 18:17:28 +01:00
OPT_KEY_CACHE_PARTITIONS,
Fixed problem with very slow shutdown when using 100,000 MyISAM tables with delay_key_write Reason for the problem was that the hash of changed files in the key cache was too small (was 128). Fixed by making the hash size larger and changeable. - Introduced key-cache-file-hash-size (default 512) for MyISAM and aria_pagecache_file_hash_size (default 512) for Aria. - Added new status variable "Feature_delay_key_write" which counts number of tables opened that are using delay_key_write mysql-test/r/features.result: Added test of Feature_delay_key_write mysql-test/r/key_cache.result: Updated tests as the number of blocks has changed mysql-test/r/mysqld--help.result: Updated result mysql-test/suite/maria/maria3.result: Updated result mysql-test/suite/sys_vars/r/key_cache_file_hash_size_basic.result: Test new variable mysql-test/suite/sys_vars/t/aria_pagecache_file_hash_size_basic.test: Test new variable mysql-test/suite/sys_vars/t/key_cache_file_hash_size_basic.test: Test new variable mysql-test/t/features.test: Added test of Feature_delay_key_write mysql-test/t/key_cache.test: Updated tests as the number of blocks has changed mysys/mf_keycache.c: Made CHANGED_BLOCKS_HASH dynamic sql/handler.cc: Updated call to init_key_cache() sql/mysqld.cc: Added "Feature_delay_key_write" Added support for key-cache-file-hash-size sql/mysqld.h: Added support for key-cache-file-hash-size sql/sql_class.h: Added feature_files_opened_with_delayed_keys sql/sys_vars.cc: Added key_cache_file_hash_size storage/maria/ha_maria.cc: Added pagecache_file_hash_size Added counting of files with delay_key_write storage/maria/ma_checkpoint.c: Fixed compiler warning storage/maria/ma_pagecache.c: Made PAGECACHE_CHANGED_BLOCKS_HASH into a variable storage/maria/ma_pagecache.h: Made PAGECACHE_CHANGED_BLOCKS_HASH into a variable storage/maria/ma_rt_test.c: Updated parameters for init_pagecache() storage/maria/ma_test1.c: Updated parameters for init_pagecache() storage/maria/ma_test2.c: Updated parameters for init_pagecache() storage/maria/ma_test3.c: Updated parameters for init_pagecache() storage/maria/maria_chk.c: Updated parameters for init_pagecache() storage/maria/maria_ftdump.c: Updated parameters for init_pagecache() storage/maria/maria_pack.c: Updated parameters for init_pagecache() storage/maria/maria_read_log.c: Updated parameters for init_pagecache() storage/maria/unittest/ma_pagecache_consist.c: Updated parameters for init_pagecache() storage/maria/unittest/ma_pagecache_rwconsist.c: Updated parameters for init_pagecache() storage/maria/unittest/ma_pagecache_rwconsist2.c: Updated parameters for init_pagecache() storage/maria/unittest/ma_pagecache_single.c: Updated parameters for init_pagecache() storage/maria/unittest/ma_test_loghandler-t.c: Updated parameters for init_pagecache() storage/maria/unittest/ma_test_loghandler_first_lsn-t.c: Updated parameters for init_pagecache() storage/maria/unittest/ma_test_loghandler_max_lsn-t.c: Updated parameters for init_pagecache() storage/maria/unittest/ma_test_loghandler_multigroup-t.c: Updated parameters for init_pagecache() storage/maria/unittest/ma_test_loghandler_multithread-t.c: Updated parameters for init_pagecache() storage/maria/unittest/ma_test_loghandler_noflush-t.c: Updated parameters for init_pagecache() storage/maria/unittest/ma_test_loghandler_nologs-t.c: Updated parameters for init_pagecache() storage/maria/unittest/ma_test_loghandler_pagecache-t.c: Updated parameters for init_pagecache() storage/maria/unittest/ma_test_loghandler_purge-t.c: Updated parameters for init_pagecache() storage/myisam/ha_myisam.cc: Added counting of files with delay_key_write storage/myisam/mi_check.c: Updated call to init_key_cache() storage/myisam/mi_test1.c: Updated call to init_key_cache() storage/myisam/mi_test2.c: Updated call to init_key_cache() storage/myisam/mi_test3.c: Updated call to init_key_cache() storage/myisam/mi_test_all.sh: Fixed broken test storage/myisam/myisam_ftdump.c: Updated call to init_key_cache() storage/myisam/myisamchk.c: Updated call to init_key_cache() storage/myisam/myisamlog.c: Updated call to init_key_cache()
2014-07-19 16:46:08 +02:00
OPT_KEY_CACHE_CHANGED_BLOCKS_HASH_SIZE,
2012-10-18 23:33:06 +02:00
OPT_LOG_BASENAME,
OPT_LOG_ERROR,
OPT_LOWER_CASE_TABLE_NAMES,
OPT_PLUGIN_LOAD,
OPT_PLUGIN_LOAD_ADD,
2012-11-03 12:28:51 +01:00
OPT_PFS_INSTRUMENT,
OPT_REPLICATE_DO_DB,
OPT_REPLICATE_DO_TABLE,
OPT_REPLICATE_IGNORE_DB,
OPT_REPLICATE_IGNORE_TABLE,
OPT_REPLICATE_REWRITE_DB,
OPT_REPLICATE_WILD_DO_TABLE,
OPT_REPLICATE_WILD_IGNORE_TABLE,
OPT_SAFE,
OPT_SERVER_ID,
OPT_SILENT,
OPT_SKIP_HOST_CACHE,
OPT_SLAVE_PARALLEL_MODE,
OPT_SSL_CA,
OPT_SSL_CAPATH,
OPT_SSL_CERT,
OPT_SSL_CIPHER,
2012-11-03 12:28:51 +01:00
OPT_SSL_CRL,
OPT_SSL_CRLPATH,
OPT_SSL_KEY,
OPT_WANT_CORE,
OPT_MYSQL_COMPATIBILITY,
2023-11-08 15:44:18 +01:00
OPT_TLS_VERSION, OPT_SECURE_AUTH,
OPT_MYSQL_TO_BE_IMPLEMENTED,
OPT_SEQURE_FILE_PRIV,
2012-10-18 23:33:06 +02:00
OPT_which_is_always_the_last
};
2011-04-25 17:22:25 +02:00
#endif
/**
Query type constants (usable as bitmap flags).
*/
enum enum_query_type
{
/// Nothing specific, ordinary SQL query.
QT_ORDINARY= 0,
/// In utf8.
QT_TO_SYSTEM_CHARSET= (1 << 0),
/// Without character set introducers.
QT_WITHOUT_INTRODUCERS= (1 << 1),
/// view internal representation (like QT_ORDINARY except ORDER BY clause)
QT_VIEW_INTERNAL= (1 << 2),
/// If identifiers should not include database names, where unambiguous
QT_ITEM_IDENT_SKIP_DB_NAMES= (1 << 3),
/// If identifiers should not include table names, where unambiguous
QT_ITEM_IDENT_SKIP_TABLE_NAMES= (1 << 4),
/// If Item_cache_wrapper should not print <expr_cache>
QT_ITEM_CACHE_WRAPPER_SKIP_DETAILS= (1 << 5),
/// If Item_subselect should print as just "(subquery#1)"
/// rather than display the subquery body
QT_ITEM_SUBSELECT_ID_ONLY= (1 << 6),
/// If NULLIF(a,b) should print itself as
/// CASE WHEN a_for_comparison=b THEN NULL ELSE a_for_return_value END
/// when "a" was replaced to two different items
/// (e.g. by equal fields propagation in optimize_cond())
/// or always as NULLIF(a, b).
/// The default behaviour is to use CASE syntax when
/// a_for_return_value is not the same as a_for_comparison.
/// SHOW CREATE {VIEW|PROCEDURE|FUNCTION} and other cases where the
/// original representation is required, should set this flag.
QT_ITEM_ORIGINAL_FUNC_NULLIF= (1 << 7),
/// good for parsing
QT_PARSABLE= (1 << 8),
2023-12-01 13:43:58 +01:00
// If an expression is constant, print the expression, not the value
// it evaluates to. Should be used for error messages, so that they
// don't reveal values.
QT_NO_DATA_EXPANSION= (1 << 9),
/// This value means focus on readability, not on ability to parse back, etc.
QT_EXPLAIN= QT_TO_SYSTEM_CHARSET |
QT_ITEM_IDENT_SKIP_DB_NAMES |
QT_ITEM_CACHE_WRAPPER_SKIP_DETAILS |
QT_ITEM_SUBSELECT_ID_ONLY,
QT_SHOW_SELECT_NUMBER= (1<<10),
/// Do not print database name or table name in the identifiers (even if
/// this means the printout will be ambigous). It is assumed that the caller
/// passing this flag knows what they are doing.
QT_ITEM_IDENT_DISABLE_DB_TABLE_NAMES= (1 <<11),
/// This is used for EXPLAIN EXTENDED extra warnings / Be more detailed
/// Be more detailed than QT_EXPLAIN.
/// Perhaps we should eventually include QT_ITEM_IDENT_SKIP_CURRENT_DATABASE
/// here, as it would give better readable results
QT_EXPLAIN_EXTENDED= QT_TO_SYSTEM_CHARSET|
QT_SHOW_SELECT_NUMBER,
2016-05-04 15:23:26 +02:00
MDEV-28603 Invalid view when its definition uses TVC as single-value subquery Subselect_single_value_engine cannot handle table value constructor used as subquery. That's why any table value constructor TVC used as subquery is converted into a select over derived table whose specification is TVC. Currently the names of the columns of the derived table DT are taken from the first element of TVC and if the k-th component of the element happens to be a subquery the text representation of this subquery serves as the name of the k-th column of the derived table. References of all columns of the derived table DT compose the select list of the result of the conversion. If a definition of a view contained a table value constructor used as a subquery and the view was registered after this conversion had been applied we could register an invalid view definition if the first element of TVC contained a subquery as its component: the name of this component was taken from the original subquery, while the name of the corresponding column of the derived table was taken from the text representation of the subquery produced by the function SELECT_LEX::print() and these names were usually differ from each other. To avoid registration of such invalid views the function SELECT_LEX::print() now prints the original TVC instead of the select in which this TVC has been wrapped. Now the specification of registered view looks like as if no conversions from TVC to selects were done. Approved by Oleksandr Byelkin <sanja@mariadb.com>
2023-02-27 19:51:22 +01:00
// Remove wrappers added for TVC when creating or showing view
QT_NO_WRAPPERS_FOR_TVC_IN_VIEW= (1 << 12),
MDEV-27744 LPAD in vcol created in ORACLE mode makes table corrupted in non-ORACLE The crash happened with an indexed virtual column whose value is evaluated using a function that has a different meaning in sql_mode='' vs sql_mode=ORACLE: - DECODE() - LTRIM() - RTRIM() - LPAD() - RPAD() - REPLACE() - SUBSTR() For example: CREATE TABLE t1 ( b VARCHAR(1), g CHAR(1) GENERATED ALWAYS AS (SUBSTR(b,0,0)) VIRTUAL, KEY g(g) ); So far we had replacement XXX_ORACLE() functions for all mentioned function, e.g. SUBSTR_ORACLE() for SUBSTR(). So it was possible to correctly re-parse SUBSTR_ORACLE() even in sql_mode=''. But it was not possible to re-parse the MariaDB version of SUBSTR() after switching to sql_mode=ORACLE. It was erroneously mis-interpreted as SUBSTR_ORACLE(). As a result, this combination worked fine: SET sql_mode=ORACLE; CREATE TABLE t1 ... g CHAR(1) GENERATED ALWAYS AS (SUBSTR(b,0,0)) VIRTUAL, ...; INSERT ... FLUSH TABLES; SET sql_mode=''; INSERT ... But the other way around it crashed: SET sql_mode=''; CREATE TABLE t1 ... g CHAR(1) GENERATED ALWAYS AS (SUBSTR(b,0,0)) VIRTUAL, ...; INSERT ... FLUSH TABLES; SET sql_mode=ORACLE; INSERT ... At CREATE time, SUBSTR was instantiated as Item_func_substr and printed in the FRM file as substr(). At re-open time with sql_mode=ORACLE, "substr()" was erroneously instantiated as Item_func_substr_oracle. Fix: The fix proposes a symmetric solution. It provides a way to re-parse reliably all sql_mode dependent functions to their original CREATE TABLE time meaning, no matter what the open-time sql_mode is. We take advantage of the same idea we previously used to resolve sql_mode dependent data types. Now all sql_mode dependent functions are printed by SHOW using a schema qualifier when the current sql_mode differs from the function sql_mode: SET sql_mode=''; CREATE TABLE t1 ... SUBSTR(a,b,c) ..; SET sql_mode=ORACLE; SHOW CREATE TABLE t1; -> mariadb_schema.substr(a,b,c) SET sql_mode=ORACLE; CREATE TABLE t2 ... SUBSTR(a,b,c) ..; SET sql_mode=''; SHOW CREATE TABLE t1; -> oracle_schema.substr(a,b,c) Old replacement names like substr_oracle() are still understood for backward compatibility and used in FRM files (for downgrade compatibility), but they are not printed by SHOW any more.
2022-04-04 12:50:21 +02:00
/// Print for FRM file. Focus on parse-back.
/// e.g. VIEW expressions and virtual column expressions
2023-12-19 20:11:54 +01:00
QT_FOR_FRM= (1 << 13),
// Print only the SELECT part, even for INSERT...SELECT
2023-12-19 20:11:54 +01:00
QT_SELECT_ONLY = (1 << 14)
};
/* query_id */
extern Atomic_counter<query_id_t> global_query_id;
/* increment query_id and return it. */
inline __attribute__((warn_unused_result)) query_id_t next_query_id()
{
return global_query_id++;
}
inline query_id_t get_query_id()
{
return global_query_id;
}
/* increment global_thread_id and return it. */
extern __attribute__((warn_unused_result)) my_thread_id next_thread_id(void);
/*
TODO: Replace this with an inline function.
*/
#ifndef EMBEDDED_LIBRARY
extern "C" void unireg_abort(int exit_code) __attribute__((noreturn));
#else
extern "C" void unireg_clear(int exit_code);
#define unireg_abort(exit_code) do { unireg_clear(exit_code); DBUG_RETURN(exit_code); } while(0)
#endif
extern void set_server_version(char *buf, size_t size);
2012-02-21 20:51:56 +01:00
#define current_thd _current_thd()
void set_current_thd(THD *thd);
2010-11-25 18:17:28 +01:00
/*
@todo remove, make it static in ha_maria.cc
currently it's needed for sql_select.cc
*/
extern handlerton *maria_hton;
extern uint64 global_gtid_counter;
extern my_bool opt_gtid_strict_mode;
extern my_bool opt_userstat_running, debug_assert_if_crashed_table;
2010-11-25 18:17:28 +01:00
extern uint mysqld_extra_port;
extern ulong opt_progress_report_time;
2010-11-25 18:17:28 +01:00
extern ulong extra_max_connections;
extern ulonglong denied_connections;
extern ulong thread_created;
extern scheduler_functions *thread_scheduler, *extra_thread_scheduler;
extern char *opt_log_basename;
extern my_bool opt_master_verify_checksum;
extern my_bool opt_stack_trace, disable_log_notes;
2012-02-21 20:51:56 +01:00
extern my_bool opt_expect_abort;
extern my_bool opt_slave_sql_verify_checksum;
2014-11-25 18:47:44 +01:00
extern my_bool opt_mysql56_temporal_format, strict_password_validation;
extern ulong binlog_checksum_options;
2011-11-22 18:04:38 +01:00
extern bool max_user_connections_checking;
extern ulong opt_binlog_dbug_fsync_sleep;
extern uint volatile global_disable_checkpoint;
extern my_bool opt_help;
extern int mysqld_main(int argc, char **argv);
#ifdef _WIN32
extern HANDLE hEventShutdown;
extern void mysqld_win_initiate_shutdown();
extern void mysqld_win_set_startup_complete();
extern void mysqld_win_extend_service_timeout(DWORD sec);
extern void mysqld_set_service_status_callback(void (*)(DWORD, DWORD, DWORD));
extern void mysqld_win_set_service_name(const char *name);
#endif
#endif /* MYSQLD_INCLUDED */