2011-06-30 17:37:13 +02:00
|
|
|
/*
|
2011-11-21 19:13:14 +02:00
|
|
|
Copyright (c) 2004, 2010, Oracle and/or its affiliates
|
2004-04-01 01:02:42 -08:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
2006-12-23 20:20:40 +01:00
|
|
|
the Free Software Foundation; version 2 of the License.
|
2004-04-01 01:02:42 -08:00
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
2019-05-11 21:29:06 +03:00
|
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
|
2004-04-01 01:02:42 -08:00
|
|
|
|
2006-12-18 14:28:15 -08:00
|
|
|
/** @file ha_example.h
|
|
|
|
|
|
|
|
@brief
|
|
|
|
The ha_example engine is a stubbed storage engine for example purposes only;
|
|
|
|
it does nothing at this point. Its purpose is to provide a source
|
|
|
|
code illustration of how to begin writing new storage engines; see also
|
|
|
|
/storage/example/ha_example.cc.
|
|
|
|
|
|
|
|
@note
|
|
|
|
Please read ha_example.cc before reading this file.
|
|
|
|
Reminder: The example storage engine implements all methods that are *required*
|
|
|
|
to be implemented. For a full list of all methods that you can implement, see
|
|
|
|
handler.h.
|
|
|
|
|
|
|
|
@see
|
|
|
|
/sql/handler.h and /storage/example/ha_example.cc
|
2004-05-11 15:59:20 -07:00
|
|
|
*/
|
|
|
|
|
2010-03-31 16:05:33 +02:00
|
|
|
#include "my_global.h" /* ulonglong */
|
|
|
|
#include "thr_lock.h" /* THR_LOCK, THR_LOCK_DATA */
|
|
|
|
#include "handler.h" /* handler */
|
|
|
|
#include "my_base.h" /* ha_rows */
|
|
|
|
|
2006-12-18 14:28:15 -08:00
|
|
|
/** @brief
|
2013-06-15 18:32:08 +03:00
|
|
|
Example_share is a class that will be shared among all open handlers.
|
2006-12-18 14:28:15 -08:00
|
|
|
This example implements the minimum of what you will probably need.
|
2004-05-11 15:59:20 -07:00
|
|
|
*/
|
2013-06-15 18:32:08 +03:00
|
|
|
class Example_share : public Handler_share {
|
|
|
|
public:
|
2009-12-04 18:26:15 -07:00
|
|
|
mysql_mutex_t mutex;
|
2004-04-01 01:02:42 -08:00
|
|
|
THR_LOCK lock;
|
2013-06-15 18:32:08 +03:00
|
|
|
Example_share();
|
|
|
|
~Example_share()
|
|
|
|
{
|
|
|
|
thr_lock_delete(&lock);
|
|
|
|
mysql_mutex_destroy(&mutex);
|
|
|
|
}
|
|
|
|
};
|
2004-04-01 01:02:42 -08:00
|
|
|
|
2006-12-18 14:28:15 -08:00
|
|
|
/** @brief
|
2004-05-11 15:59:20 -07:00
|
|
|
Class definition for the storage engine
|
|
|
|
*/
|
2004-04-01 01:02:42 -08:00
|
|
|
class ha_example: public handler
|
|
|
|
{
|
2023-08-14 20:56:27 +01:00
|
|
|
THR_LOCK_DATA lock; ///< MariaDB lock
|
2013-06-15 18:32:08 +03:00
|
|
|
Example_share *share; ///< Shared lock info
|
|
|
|
Example_share *get_share(); ///< Get the share
|
2004-04-01 01:02:42 -08:00
|
|
|
|
|
|
|
public:
|
2006-09-29 17:19:02 -07:00
|
|
|
ha_example(handlerton *hton, TABLE_SHARE *table_arg);
|
2023-02-07 13:57:20 +02:00
|
|
|
~ha_example() = default;
|
2006-12-18 14:28:15 -08:00
|
|
|
|
|
|
|
/** @brief
|
|
|
|
This is a list of flags that indicate what functionality the storage engine
|
|
|
|
implements. The current table flags are documented in handler.h
|
2004-05-11 15:59:20 -07:00
|
|
|
*/
|
2024-06-12 09:46:26 -04:00
|
|
|
ulonglong table_flags() const override
|
2004-04-01 01:02:42 -08:00
|
|
|
{
|
2007-05-28 12:50:29 +02:00
|
|
|
/*
|
BUG#39934: Slave stops for engine that only support row-based logging
General overview:
The logic for switching to row format when binlog_format=MIXED had
numerous flaws. The underlying problem was the lack of a consistent
architecture.
General purpose of this changeset:
This changeset introduces an architecture for switching to row format
when binlog_format=MIXED. It enforces the architecture where it has
to. It leaves some bugs to be fixed later. It adds extensive tests to
verify that unsafe statements work as expected and that appropriate
errors are produced by problems with the selection of binlog format.
It was not practical to split this into smaller pieces of work.
Problem 1:
To determine the logging mode, the code has to take several parameters
into account (namely: (1) the value of binlog_format; (2) the
capabilities of the engines; (3) the type of the current statement:
normal, unsafe, or row injection). These parameters may conflict in
several ways, namely:
- binlog_format=STATEMENT for a row injection
- binlog_format=STATEMENT for an unsafe statement
- binlog_format=STATEMENT for an engine only supporting row logging
- binlog_format=ROW for an engine only supporting statement logging
- statement is unsafe and engine does not support row logging
- row injection in a table that does not support statement logging
- statement modifies one table that does not support row logging and
one that does not support statement logging
Several of these conflicts were not detected, or were detected with
an inappropriate error message. The problem of BUG#39934 was that no
appropriate error message was written for the case when an engine
only supporting row logging executed a row injection with
binlog_format=ROW. However, all above cases must be handled.
Fix 1:
Introduce new error codes (sql/share/errmsg.txt). Ensure that all
conditions are detected and handled in decide_logging_format()
Problem 2:
The binlog format shall be determined once per statement, in
decide_logging_format(). It shall not be changed before or after that.
Before decide_logging_format() is called, all information necessary to
determine the logging format must be available. This principle ensures
that all unsafe statements are handled in a consistent way.
However, this principle is not followed:
thd->set_current_stmt_binlog_row_based_if_mixed() is called in several
places, including from code executing UPDATE..LIMIT,
INSERT..SELECT..LIMIT, DELETE..LIMIT, INSERT DELAYED, and
SET @@binlog_format. After Problem 1 was fixed, that caused
inconsistencies where these unsafe statements would not print the
appropriate warnings or errors for some of the conflicts.
Fix 2:
Remove calls to THD::set_current_stmt_binlog_row_based_if_mixed() from
code executed after decide_logging_format(). Compensate by calling the
set_current_stmt_unsafe() at parse time. This way, all unsafe statements
are detected by decide_logging_format().
Problem 3:
INSERT DELAYED is not unsafe: it is logged in statement format even if
binlog_format=MIXED, and no warning is printed even if
binlog_format=STATEMENT. This is BUG#45825.
Fix 3:
Made INSERT DELAYED set itself to unsafe at parse time. This allows
decide_logging_format() to detect that a warning should be printed or
the binlog_format changed.
Problem 4:
LIMIT clause were not marked as unsafe when executed inside stored
functions/triggers/views/prepared statements. This is
BUG#45785.
Fix 4:
Make statements containing the LIMIT clause marked as unsafe at
parse time, instead of at execution time. This allows propagating
unsafe-ness to the view.
mysql-test/extra/rpl_tests/create_recursive_construct.inc:
Added auxiliary file used by binlog_unsafe.test to create and
execute recursive constructs
(functions/procedures/triggers/views/prepared statements).
mysql-test/extra/rpl_tests/rpl_foreign_key.test:
removed unnecessary set @@session.binlog_format
mysql-test/extra/rpl_tests/rpl_insert_delayed.test:
Filter out table id from table map events in binlog listing.
Got rid of $binlog_format_statement.
mysql-test/extra/rpl_tests/rpl_ndb_apply_status.test:
disable warnings around call to unsafe procedure
mysql-test/include/rpl_udf.inc:
Disabled warnings for code that generates warnings
for some binlog formats. That would otherwise cause
inconsistencies in the result file.
mysql-test/r/mysqldump.result:
Views are now unsafe if they contain a LIMIT clause.
That fixed BUG#45831. Due to BUG#45832, a warning is
printed for the CREATE VIEW statement.
mysql-test/r/sp_trans.result:
Unsafe statements in stored procedures did not give a warning if
binlog_format=statement. This is BUG#45824. Now they do, so this
result file gets a new warning.
mysql-test/suite/binlog/r/binlog_multi_engine.result:
Error message changed.
mysql-test/suite/binlog/r/binlog_statement_insert_delayed.result:
INSERT DELAYED didn't generate a warning when binlog_format=STATEMENT.
That was BUG#45825. Now there is a warning, so result file needs to be
updated.
mysql-test/suite/binlog/r/binlog_stm_ps.result:
Changed error message.
mysql-test/suite/binlog/r/binlog_unsafe.result:
updated result file:
- error message changed
- added test for most combinations of unsafe constructs invoked
from recursive constructs
- INSERT DELAYED now gives a warning (because BUG#45826 is fixed)
- INSERT..SELECT..LIMIT now gives a warning from inside recursive
constructs (because BUG#45785 was fixed)
- When a recursive construct (e.g., stored proc or function)
contains more than one statement, at least one of which is
unsafe, then all statements in the recursive construct give
warnings. This is a new bug introduced by this changeset.
It will be addressed in a post-push fix.
mysql-test/suite/binlog/t/binlog_innodb.test:
Changed error code for innodb updates with READ COMMITTED or
READ UNCOMMITTED transaction isolation level and
binlog_format=statement.
mysql-test/suite/binlog/t/binlog_multi_engine.test:
The error code has changed for statements where more than one
engine is involved and one of them is self-logging.
mysql-test/suite/binlog/t/binlog_unsafe-master.opt:
Since binlog_unsafe now tests unsafe-ness of UDF's, we need an extra
flag in the .opt file.
mysql-test/suite/binlog/t/binlog_unsafe.test:
- Clarified comment.
- Rewrote first part of test. Now it tests not only unsafe variables
and functions, but also unsafe-ness due to INSERT..SELECT..LIMIT,
INSERT DELAYED, insert into two autoinc columns, use of UDF's, and
access to log tables in the mysql database.
Also, in addition to functions, procedures, triggers, and prepared
statements, it now also tests views; and it constructs recursive
calls in two levels by combining these recursive constructs.
Part of the logic is in extra/rpl_tests/create_recursive_construct.inc.
- added tests for all special system variables that should not be unsafe.
- added specific tests for BUG#45785 and BUG#45825
mysql-test/suite/rpl/r/rpl_events.result:
updated result file
mysql-test/suite/rpl/r/rpl_extraColmaster_innodb.result:
updated result file
mysql-test/suite/rpl/r/rpl_extraColmaster_myisam.result:
updated result file
mysql-test/suite/rpl/r/rpl_foreign_key_innodb.result:
updated result file
mysql-test/suite/rpl/r/rpl_idempotency.result:
updated result file
mysql-test/suite/rpl/r/rpl_mix_found_rows.result:
Split rpl_found_rows.test into rpl_mix_found_rows.test (a new file) and
rpl_stm_found_rows.test (renamed rpl_found_rows.test). This file equals
the second half of the old rpl_found_rows.result, with the following
modifications:
- minor formatting changes
- additional initialization
mysql-test/suite/rpl/r/rpl_mix_insert_delayed.result:
Moved out code operating in mixed mode from rpl_stm_insert_delayed
(into rpl_mix_insert_delayed) and got rid of explicit setting of
binlog format.
mysql-test/suite/rpl/r/rpl_rbr_to_sbr.result:
updated result file
mysql-test/suite/rpl/r/rpl_row_idempotency.result:
Moved the second half of rpl_idempotency.test, which only
executed in row mode, to rpl_row_idempotency.test. This is
the new result file.
mysql-test/suite/rpl/r/rpl_row_insert_delayed.result:
Got rid of unnecessary explicit setting of binlog format.
mysql-test/suite/rpl/r/rpl_stm_found_rows.result:
Split rpl_found_rows.test into rpl_mix_found_rows.test (a new file) and
rpl_stm_found_rows.test (renamed rpl_found_rows.test). Changes in
this file:
- minor formatting changes
- warning is now issued for unsafe statements inside procedures
(since BUG#45824 is fixed)
- second half of file is moved to rpl_mix_found_rows.result
mysql-test/suite/rpl/r/rpl_stm_insert_delayed.result:
Moved out code operating in mixed mode from rpl_stm_insert_delayed
(into rpl_mix_insert_delayed) and got rid of explicit setting of
binlog format.
mysql-test/suite/rpl/r/rpl_stm_loadfile.result:
error message changed
mysql-test/suite/rpl/r/rpl_temporary_errors.result:
updated result file
mysql-test/suite/rpl/r/rpl_udf.result:
Remove explicit set of binlog format (and triplicate test execution)
and rely on test system executing the test in all binlog formats.
mysql-test/suite/rpl/t/rpl_bug31076.test:
Test is only valid in mixed or row mode since it generates row events.
mysql-test/suite/rpl/t/rpl_events.test:
Removed explicit set of binlog_format and removed duplicate testing.
Instead, we rely on the test system to try all binlog formats.
mysql-test/suite/rpl/t/rpl_extraColmaster_innodb.test:
Removed triplicate testing and instead relying on test system.
Test is only relevant for row format since statement-based replication
cannot handle extra columns on master.
mysql-test/suite/rpl/t/rpl_extraColmaster_myisam.test:
Removed triplicate testing and instead relying on test system.
Test is only relevant for row format since statement-based replication
cannot handle extra columns on master.
mysql-test/suite/rpl/t/rpl_idempotency-slave.opt:
Removed .opt file to avoid server restarts.
mysql-test/suite/rpl/t/rpl_idempotency.test:
- Moved out row-only tests to a new test file, rpl_row_idempotency.test.
rpl_idempotency now only contains tests that execute in all
binlog_formats.
- While I was here, also removed .opt file to avoid server restarts.
The slave_exec_mode is now set inside the test instead.
mysql-test/suite/rpl/t/rpl_mix_found_rows.test:
Split rpl_found_rows.test into rpl_mix_found_rows.test (a new file) and
rpl_stm_found_rows.test (renamed rpl_found_rows.test). This file
contains the second half of the original rpl_found_rows.test with the
follwing changes:
- initialization
- removed SET_BINLOG_FORMAT and added have_binlog_format_mixed.inc
- minor formatting changes
mysql-test/suite/rpl/t/rpl_mix_insert_delayed.test:
Moved out code operating in mixed mode from rpl_stm_insert_delayed
(into rpl_mix_insert_delayed) and got rid of explicit setting of
binlog format.
mysql-test/suite/rpl/t/rpl_rbr_to_sbr.test:
Test cannot execute in statement mode, since we no longer
switch to row format when binlog_format=statement.
Enforced mixed mode throughout the test.
mysql-test/suite/rpl/t/rpl_row_idempotency.test:
Moved the second half of rpl_idempotency.test, which only
executed in row mode, to this new file. We now rely on the
test system to set binlog format.
mysql-test/suite/rpl/t/rpl_row_insert_delayed.test:
- Got rid of unnecessary explicit setting of binlog format.
- extra/rpl_tests/rpl_insert_delayed.test does not need the
$binlog_format_statement variable any more, so that was
removed.
mysql-test/suite/rpl/t/rpl_slave_skip.test:
The test switches binlog_format internally and master generates both
row and statement events. Hence, the slave must be able to log in both
statement and row format. Hence test was changed to only execute in
mixed mode.
mysql-test/suite/rpl/t/rpl_stm_found_rows.test:
Split rpl_found_rows.test into rpl_mix_found_rows.test (a new file) and
rpl_stm_found_rows.test (renamed rpl_found_rows.test). Changes in
this file:
- minor formatting changes
- added have_binlog_format_statement and removed SET BINLOG_FORMAT.
- second half of file is moved to rpl_mix_found_rows.test
- added cleanup code
mysql-test/suite/rpl/t/rpl_stm_insert_delayed.test:
Moved out code operating in mixed mode from rpl_stm_insert_delayed
(into rpl_mix_insert_delayed) and got rid of explicit setting of
binlog format.
mysql-test/suite/rpl/t/rpl_switch_stm_row_mixed.test:
The test switches binlog_format internally and master generates both
row and statement events. Hence, the slave must be able to log in both
statement and row format. Hence test was changed to only execute in
mixed mode on slave.
mysql-test/suite/rpl/t/rpl_temporary_errors.test:
Removed explicit set of binlog format. Instead, the test now only
executes in row mode.
mysql-test/suite/rpl/t/rpl_udf.test:
Remove explicit set of binlog format (and triplicate test execution)
and rely on test system executing the test in all binlog formats.
mysql-test/suite/rpl_ndb/combinations:
Added combinations file for rpl_ndb.
mysql-test/suite/rpl_ndb/r/rpl_ndb_binlog_format_errors.result:
new result file
mysql-test/suite/rpl_ndb/r/rpl_ndb_circular_simplex.result:
updated result file
mysql-test/suite/rpl_ndb/t/rpl_ndb_2innodb.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_2myisam.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_basic.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_binlog_format_errors-master.opt:
new option file
mysql-test/suite/rpl_ndb/t/rpl_ndb_binlog_format_errors-slave.opt:
new option file
mysql-test/suite/rpl_ndb/t/rpl_ndb_binlog_format_errors.test:
New test case to verify all errors and warnings generated by
decide_logging_format.
mysql-test/suite/rpl_ndb/t/rpl_ndb_blob.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_blob2.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_circular.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_circular_simplex.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
While I was here, also made the test clean up after itself.
mysql-test/suite/rpl_ndb/t/rpl_ndb_commit_afterflush.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_ctype_ucs2_def.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_delete_nowhere.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_do_db.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_do_table.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_func003.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_innodb_trans.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_insert_ignore.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_mixed_engines_transactions.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_multi_update3.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_rep_ignore.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_row_001.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_sp003.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_sp006.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_trig004.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/t/partition_innodb_stmt.test:
Changed error code for innodb updates with READ COMMITTED or
READ UNCOMMITTED transaction isolation level and
binlog_format=statement.
sql/event_db_repository.cc:
Use member function to read current_stmt_binlog_row_based.
sql/events.cc:
Use member function to read current_stmt_binlog_row_based.
sql/ha_ndbcluster_binlog.cc:
reset_current_stmt_binlog_row_based() is not a no-op for the ndb_binlog
thread any more. Instead, the ndb_binlog thread now forces row mode both
initially and just after calling mysql_parse. (mysql_parse() is the only
place where reset_current_stmt_binlog_row_based() may be called from
the ndb_binlog thread, so these are the only two places that need to
change.)
sql/ha_partition.cc:
Use member function to read current_stmt_binlog_row_based.
sql/handler.cc:
Use member function to read current_stmt_binlog_row_based.
sql/item_create.cc:
Added DBUG_ENTER to some functions, to be able to trace when
set_stmt_unsafe is called.
sql/log.cc:
Use member function to read current_stmt_binlog_row_based.
sql/log_event.cc:
- Moved logic for changing to row format out of do_apply_event (and into
decide_logging_format).
- Added @todo comment for post-push cleanup.
sql/log_event_old.cc:
Move logic for changing to row format out of do_apply_event (and into
decide_logging_format).
sql/mysql_priv.h:
Make decide_logging_format() a member of the THD class, for two reasons:
- It is natural from an object-oriented perspective.
- decide_logging_format() needs to access private members of THD
(specifically, the new binlog_warning_flags field).
sql/rpl_injector.cc:
Removed call to set_current_stmt_binlog_row_based().
From now on, only decide_logging_fromat is allowed to modify
current_stmt_binlog_row_based. This call is from the ndb_binlog
thread, mostly executing code in ha_ndbcluster_binlog.cc.
This call can be safely removed, because:
- current_stmt_binlog_row_based is initialized for the ndb_binlog
thread's THD object when the THD object is created. So we're
not going to read uninitialized memory.
- The behavior of ndb_binlog thread does not use the state of the
current_stmt_binlog_row_based. It is conceivable that the
ndb_binlog thread would rely on the current_stmt_binlog_format
in two situations:
(1) when it calls mysql_parse;
(2) when it calls THD::binlog_query.
In case (1), it always clears THD::options&OPTION_BIN_LOG (because
run_query() in ha_ndbcluster_binlog.cc is only called with
disable_binlogging = TRUE).
In case (2), it always uses qtype=STMT_QUERY_TYPE.
sql/set_var.cc:
Added @todo comment for post-push cleanup.
sql/share/errmsg.txt:
Added new error messages and clarified ER_BINLOG_UNSAFE_STATEMENT.
sql/sp.cc:
Added DBUG_ENTER, to be able to trace when set_stmt_unsafe is called.
Got rid of MYSQL_QUERY_TYPE: it was equivalent to STMT_QUERY_TYPE.
sql/sp_head.cc:
Use member function to read current_stmt_binlog_row_based.
sql/sp_head.h:
Added DBUG_ENTER, to be able to trace when set_stmt_unsafe is called.
sql/sql_acl.cc:
Got rid of MYSQL_QUERY_TYPE: it was equivalent to STMT_QUERY_TYPE.
sql/sql_base.cc:
- Made decide_logging_format take care of all logic for deciding the
logging format, and for determining the related warnings and errors.
See comment above decide_logging_format for details.
- Made decide_logging_format a member function of THD, since it needs
to access private members of THD and since its purpose is to update
the state of a THD object.
- Added DBUG_ENTER, to be able to trace when set_stmt_unsafe is called.
sql/sql_class.cc:
- Moved logic for determining unsafe warnings away from THD::binlog_query
(and into decide_logging_format()). Now, it works like this:
1. decide_logging_format detects that the current statement shall
produce a warning, if it ever makes it to the binlog
2. decide_logging_format sets a flag of THD::binlog_warning_flags.
3. THD::binlog_query reads the flag. If the flag is set, it generates
a warning.
- Use member function to read current_stmt_binlog_row_based.
sql/sql_class.h:
- Added THD::binlog_warning_flags (see sql_class.cc for explanation).
- Made decide_logging_format() and reset_for_next_command() member
functions of THD (instead of standalone functions). This was needed
for two reasons: (1) the functions need to access the private member
THD::binlog_warning_flags; (2) the purpose of these functions is to
update the staet of a THD object, so from an object-oriented point
of view they should be member functions.
- Encapsulated current_stmt_binlog_row_based, so it is now private and
can only be accessed from a member function. Also changed the
data type to an enumeration instead of a bool.
- Removed MYSQL_QUERY_TYPE, because it was equivalent to
STMT_QUERY_TYPE anyways.
- When reset_current_stmt_binlog_row_based was called from the
ndb_binlog thread, it would behave as a no-op. This special
case has been removed, and the behavior of
reset_current_stmt_binlog_row_based does not depend on which thread
calls it any more. The special case did not serve any purpose,
since the ndb binlog thread did not take the
current_stmt_binlog_row_based flag into account anyways.
sql/sql_delete.cc:
- Moved logic for setting row format for DELETE..LIMIT away from
mysql_prepare_delete.
(Instead, we mark the statement as unsafe at parse time (sql_yacc.yy)
and rely on decide_logging_format() (sql_class.cc) to set row format.)
This is part of the fix for BUG#45831.
- Use member function to read current_stmt_binlog_row_based.
sql/sql_insert.cc:
- Removed unnecessary calls to thd->lex->set_stmt_unsafe() and
thd->set_current_stmt_binlog_row_based_if_mixed() from
handle_delayed_insert(). The calls are unnecessary because they
have already been made; they were made in the constructor of
the `di' object.
- Since decide_logging_format() is now a member function of THD, code
that calls decide_logging_format() had to be updated.
- Added DBUG_ENTER call, to be able to trace when set_stmt_unsafe is
called.
- Moved call to set_stmt_unsafe() for INSERT..SELECT..LIMIT away from
mysql_insert_select_prepare() (and into decide_logging_format).
This is part of the fix for BUG#45831.
- Use member function to read current_stmt_binlog_row_based.
sql/sql_lex.h:
- Added the flag BINLOG_STMT_FLAG_ROW_INJECTION to enum_binlog_stmt_flag.
This was necessary so that a statement can identify itself as a row
injection.
- Added appropriate setter and getter functions for the new flag.
- Added or clarified some comments.
- Added DBUG_ENTER()
sql/sql_load.cc:
Use member function to read current_stmt_binlog_row_based.
sql/sql_parse.cc:
- Made mysql_reset_thd_for_next_command() clear thd->binlog_warning_flags.
- Since thd->binlog_warning_flags is private, it must be set in a
member function of THD. Hence, moved the body of
mysql_reset_thd_for_next_command() to the new member function
THD::reset_thd_for_next_command(), and made
mysql_reset_thd_for_next_command() call
THD::reset_thd_for_next_command().
- Removed confusing comment.
- Use member function to read current_stmt_binlog_row_based.
sql/sql_repl.cc:
Use member function to read current_stmt_binlog_row_based.
sql/sql_table.cc:
Use member function to read current_stmt_binlog_row_based.
sql/sql_udf.cc:
Use member function to read current_stmt_binlog_row_based.
sql/sql_update.cc:
Moved logic for setting row format for UPDATE..LIMIT away from
mysql_prepare_update.
(Instead, we mark the statement as unsafe at parse time (sql_yacc.yy)
and rely on decide_logging_format() (sql_class.cc) to set row format.)
This is part of the fix for BUG#45831.
sql/sql_yacc.yy:
Made INSERT DELAYED, INSERT..SELECT..LIMIT, UPDATE..LIMIT, and
DELETE..LIMIT mark themselves as unsafe at parse time (instead
of at execution time).
This is part of the fixes BUG#45831 and BUG#45825.
storage/example/ha_example.cc:
Made exampledb accept inserts. This was needed by the new test case
rpl_ndb_binlog_format_errors, because it needs an engine that
is statement-only (and accepts inserts).
storage/example/ha_example.h:
Made exampledb a statement-only engine instead of a row-only engine.
No existing test relied exampledb's row-only capabilities. The new
test case rpl_ndb_binlog_format_errors needs an engine that is
statement-only.
storage/innobase/handler/ha_innodb.cc:
- Changed error error code and message given by innodb when
binlog_format=STATEMENT and transaction isolation level is
READ COMMITTED or READ UNCOMMITTED.
- While I was here, also simplified the condition for
checking when to give the error.
2009-07-14 21:31:19 +02:00
|
|
|
We are saying that this engine is just statement capable to have
|
|
|
|
an engine that can only handle statement-based logging. This is
|
|
|
|
used in testing.
|
2007-05-28 12:50:29 +02:00
|
|
|
*/
|
BUG#39934: Slave stops for engine that only support row-based logging
General overview:
The logic for switching to row format when binlog_format=MIXED had
numerous flaws. The underlying problem was the lack of a consistent
architecture.
General purpose of this changeset:
This changeset introduces an architecture for switching to row format
when binlog_format=MIXED. It enforces the architecture where it has
to. It leaves some bugs to be fixed later. It adds extensive tests to
verify that unsafe statements work as expected and that appropriate
errors are produced by problems with the selection of binlog format.
It was not practical to split this into smaller pieces of work.
Problem 1:
To determine the logging mode, the code has to take several parameters
into account (namely: (1) the value of binlog_format; (2) the
capabilities of the engines; (3) the type of the current statement:
normal, unsafe, or row injection). These parameters may conflict in
several ways, namely:
- binlog_format=STATEMENT for a row injection
- binlog_format=STATEMENT for an unsafe statement
- binlog_format=STATEMENT for an engine only supporting row logging
- binlog_format=ROW for an engine only supporting statement logging
- statement is unsafe and engine does not support row logging
- row injection in a table that does not support statement logging
- statement modifies one table that does not support row logging and
one that does not support statement logging
Several of these conflicts were not detected, or were detected with
an inappropriate error message. The problem of BUG#39934 was that no
appropriate error message was written for the case when an engine
only supporting row logging executed a row injection with
binlog_format=ROW. However, all above cases must be handled.
Fix 1:
Introduce new error codes (sql/share/errmsg.txt). Ensure that all
conditions are detected and handled in decide_logging_format()
Problem 2:
The binlog format shall be determined once per statement, in
decide_logging_format(). It shall not be changed before or after that.
Before decide_logging_format() is called, all information necessary to
determine the logging format must be available. This principle ensures
that all unsafe statements are handled in a consistent way.
However, this principle is not followed:
thd->set_current_stmt_binlog_row_based_if_mixed() is called in several
places, including from code executing UPDATE..LIMIT,
INSERT..SELECT..LIMIT, DELETE..LIMIT, INSERT DELAYED, and
SET @@binlog_format. After Problem 1 was fixed, that caused
inconsistencies where these unsafe statements would not print the
appropriate warnings or errors for some of the conflicts.
Fix 2:
Remove calls to THD::set_current_stmt_binlog_row_based_if_mixed() from
code executed after decide_logging_format(). Compensate by calling the
set_current_stmt_unsafe() at parse time. This way, all unsafe statements
are detected by decide_logging_format().
Problem 3:
INSERT DELAYED is not unsafe: it is logged in statement format even if
binlog_format=MIXED, and no warning is printed even if
binlog_format=STATEMENT. This is BUG#45825.
Fix 3:
Made INSERT DELAYED set itself to unsafe at parse time. This allows
decide_logging_format() to detect that a warning should be printed or
the binlog_format changed.
Problem 4:
LIMIT clause were not marked as unsafe when executed inside stored
functions/triggers/views/prepared statements. This is
BUG#45785.
Fix 4:
Make statements containing the LIMIT clause marked as unsafe at
parse time, instead of at execution time. This allows propagating
unsafe-ness to the view.
mysql-test/extra/rpl_tests/create_recursive_construct.inc:
Added auxiliary file used by binlog_unsafe.test to create and
execute recursive constructs
(functions/procedures/triggers/views/prepared statements).
mysql-test/extra/rpl_tests/rpl_foreign_key.test:
removed unnecessary set @@session.binlog_format
mysql-test/extra/rpl_tests/rpl_insert_delayed.test:
Filter out table id from table map events in binlog listing.
Got rid of $binlog_format_statement.
mysql-test/extra/rpl_tests/rpl_ndb_apply_status.test:
disable warnings around call to unsafe procedure
mysql-test/include/rpl_udf.inc:
Disabled warnings for code that generates warnings
for some binlog formats. That would otherwise cause
inconsistencies in the result file.
mysql-test/r/mysqldump.result:
Views are now unsafe if they contain a LIMIT clause.
That fixed BUG#45831. Due to BUG#45832, a warning is
printed for the CREATE VIEW statement.
mysql-test/r/sp_trans.result:
Unsafe statements in stored procedures did not give a warning if
binlog_format=statement. This is BUG#45824. Now they do, so this
result file gets a new warning.
mysql-test/suite/binlog/r/binlog_multi_engine.result:
Error message changed.
mysql-test/suite/binlog/r/binlog_statement_insert_delayed.result:
INSERT DELAYED didn't generate a warning when binlog_format=STATEMENT.
That was BUG#45825. Now there is a warning, so result file needs to be
updated.
mysql-test/suite/binlog/r/binlog_stm_ps.result:
Changed error message.
mysql-test/suite/binlog/r/binlog_unsafe.result:
updated result file:
- error message changed
- added test for most combinations of unsafe constructs invoked
from recursive constructs
- INSERT DELAYED now gives a warning (because BUG#45826 is fixed)
- INSERT..SELECT..LIMIT now gives a warning from inside recursive
constructs (because BUG#45785 was fixed)
- When a recursive construct (e.g., stored proc or function)
contains more than one statement, at least one of which is
unsafe, then all statements in the recursive construct give
warnings. This is a new bug introduced by this changeset.
It will be addressed in a post-push fix.
mysql-test/suite/binlog/t/binlog_innodb.test:
Changed error code for innodb updates with READ COMMITTED or
READ UNCOMMITTED transaction isolation level and
binlog_format=statement.
mysql-test/suite/binlog/t/binlog_multi_engine.test:
The error code has changed for statements where more than one
engine is involved and one of them is self-logging.
mysql-test/suite/binlog/t/binlog_unsafe-master.opt:
Since binlog_unsafe now tests unsafe-ness of UDF's, we need an extra
flag in the .opt file.
mysql-test/suite/binlog/t/binlog_unsafe.test:
- Clarified comment.
- Rewrote first part of test. Now it tests not only unsafe variables
and functions, but also unsafe-ness due to INSERT..SELECT..LIMIT,
INSERT DELAYED, insert into two autoinc columns, use of UDF's, and
access to log tables in the mysql database.
Also, in addition to functions, procedures, triggers, and prepared
statements, it now also tests views; and it constructs recursive
calls in two levels by combining these recursive constructs.
Part of the logic is in extra/rpl_tests/create_recursive_construct.inc.
- added tests for all special system variables that should not be unsafe.
- added specific tests for BUG#45785 and BUG#45825
mysql-test/suite/rpl/r/rpl_events.result:
updated result file
mysql-test/suite/rpl/r/rpl_extraColmaster_innodb.result:
updated result file
mysql-test/suite/rpl/r/rpl_extraColmaster_myisam.result:
updated result file
mysql-test/suite/rpl/r/rpl_foreign_key_innodb.result:
updated result file
mysql-test/suite/rpl/r/rpl_idempotency.result:
updated result file
mysql-test/suite/rpl/r/rpl_mix_found_rows.result:
Split rpl_found_rows.test into rpl_mix_found_rows.test (a new file) and
rpl_stm_found_rows.test (renamed rpl_found_rows.test). This file equals
the second half of the old rpl_found_rows.result, with the following
modifications:
- minor formatting changes
- additional initialization
mysql-test/suite/rpl/r/rpl_mix_insert_delayed.result:
Moved out code operating in mixed mode from rpl_stm_insert_delayed
(into rpl_mix_insert_delayed) and got rid of explicit setting of
binlog format.
mysql-test/suite/rpl/r/rpl_rbr_to_sbr.result:
updated result file
mysql-test/suite/rpl/r/rpl_row_idempotency.result:
Moved the second half of rpl_idempotency.test, which only
executed in row mode, to rpl_row_idempotency.test. This is
the new result file.
mysql-test/suite/rpl/r/rpl_row_insert_delayed.result:
Got rid of unnecessary explicit setting of binlog format.
mysql-test/suite/rpl/r/rpl_stm_found_rows.result:
Split rpl_found_rows.test into rpl_mix_found_rows.test (a new file) and
rpl_stm_found_rows.test (renamed rpl_found_rows.test). Changes in
this file:
- minor formatting changes
- warning is now issued for unsafe statements inside procedures
(since BUG#45824 is fixed)
- second half of file is moved to rpl_mix_found_rows.result
mysql-test/suite/rpl/r/rpl_stm_insert_delayed.result:
Moved out code operating in mixed mode from rpl_stm_insert_delayed
(into rpl_mix_insert_delayed) and got rid of explicit setting of
binlog format.
mysql-test/suite/rpl/r/rpl_stm_loadfile.result:
error message changed
mysql-test/suite/rpl/r/rpl_temporary_errors.result:
updated result file
mysql-test/suite/rpl/r/rpl_udf.result:
Remove explicit set of binlog format (and triplicate test execution)
and rely on test system executing the test in all binlog formats.
mysql-test/suite/rpl/t/rpl_bug31076.test:
Test is only valid in mixed or row mode since it generates row events.
mysql-test/suite/rpl/t/rpl_events.test:
Removed explicit set of binlog_format and removed duplicate testing.
Instead, we rely on the test system to try all binlog formats.
mysql-test/suite/rpl/t/rpl_extraColmaster_innodb.test:
Removed triplicate testing and instead relying on test system.
Test is only relevant for row format since statement-based replication
cannot handle extra columns on master.
mysql-test/suite/rpl/t/rpl_extraColmaster_myisam.test:
Removed triplicate testing and instead relying on test system.
Test is only relevant for row format since statement-based replication
cannot handle extra columns on master.
mysql-test/suite/rpl/t/rpl_idempotency-slave.opt:
Removed .opt file to avoid server restarts.
mysql-test/suite/rpl/t/rpl_idempotency.test:
- Moved out row-only tests to a new test file, rpl_row_idempotency.test.
rpl_idempotency now only contains tests that execute in all
binlog_formats.
- While I was here, also removed .opt file to avoid server restarts.
The slave_exec_mode is now set inside the test instead.
mysql-test/suite/rpl/t/rpl_mix_found_rows.test:
Split rpl_found_rows.test into rpl_mix_found_rows.test (a new file) and
rpl_stm_found_rows.test (renamed rpl_found_rows.test). This file
contains the second half of the original rpl_found_rows.test with the
follwing changes:
- initialization
- removed SET_BINLOG_FORMAT and added have_binlog_format_mixed.inc
- minor formatting changes
mysql-test/suite/rpl/t/rpl_mix_insert_delayed.test:
Moved out code operating in mixed mode from rpl_stm_insert_delayed
(into rpl_mix_insert_delayed) and got rid of explicit setting of
binlog format.
mysql-test/suite/rpl/t/rpl_rbr_to_sbr.test:
Test cannot execute in statement mode, since we no longer
switch to row format when binlog_format=statement.
Enforced mixed mode throughout the test.
mysql-test/suite/rpl/t/rpl_row_idempotency.test:
Moved the second half of rpl_idempotency.test, which only
executed in row mode, to this new file. We now rely on the
test system to set binlog format.
mysql-test/suite/rpl/t/rpl_row_insert_delayed.test:
- Got rid of unnecessary explicit setting of binlog format.
- extra/rpl_tests/rpl_insert_delayed.test does not need the
$binlog_format_statement variable any more, so that was
removed.
mysql-test/suite/rpl/t/rpl_slave_skip.test:
The test switches binlog_format internally and master generates both
row and statement events. Hence, the slave must be able to log in both
statement and row format. Hence test was changed to only execute in
mixed mode.
mysql-test/suite/rpl/t/rpl_stm_found_rows.test:
Split rpl_found_rows.test into rpl_mix_found_rows.test (a new file) and
rpl_stm_found_rows.test (renamed rpl_found_rows.test). Changes in
this file:
- minor formatting changes
- added have_binlog_format_statement and removed SET BINLOG_FORMAT.
- second half of file is moved to rpl_mix_found_rows.test
- added cleanup code
mysql-test/suite/rpl/t/rpl_stm_insert_delayed.test:
Moved out code operating in mixed mode from rpl_stm_insert_delayed
(into rpl_mix_insert_delayed) and got rid of explicit setting of
binlog format.
mysql-test/suite/rpl/t/rpl_switch_stm_row_mixed.test:
The test switches binlog_format internally and master generates both
row and statement events. Hence, the slave must be able to log in both
statement and row format. Hence test was changed to only execute in
mixed mode on slave.
mysql-test/suite/rpl/t/rpl_temporary_errors.test:
Removed explicit set of binlog format. Instead, the test now only
executes in row mode.
mysql-test/suite/rpl/t/rpl_udf.test:
Remove explicit set of binlog format (and triplicate test execution)
and rely on test system executing the test in all binlog formats.
mysql-test/suite/rpl_ndb/combinations:
Added combinations file for rpl_ndb.
mysql-test/suite/rpl_ndb/r/rpl_ndb_binlog_format_errors.result:
new result file
mysql-test/suite/rpl_ndb/r/rpl_ndb_circular_simplex.result:
updated result file
mysql-test/suite/rpl_ndb/t/rpl_ndb_2innodb.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_2myisam.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_basic.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_binlog_format_errors-master.opt:
new option file
mysql-test/suite/rpl_ndb/t/rpl_ndb_binlog_format_errors-slave.opt:
new option file
mysql-test/suite/rpl_ndb/t/rpl_ndb_binlog_format_errors.test:
New test case to verify all errors and warnings generated by
decide_logging_format.
mysql-test/suite/rpl_ndb/t/rpl_ndb_blob.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_blob2.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_circular.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_circular_simplex.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
While I was here, also made the test clean up after itself.
mysql-test/suite/rpl_ndb/t/rpl_ndb_commit_afterflush.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_ctype_ucs2_def.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_delete_nowhere.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_do_db.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_do_table.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_func003.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_innodb_trans.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_insert_ignore.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_mixed_engines_transactions.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_multi_update3.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_rep_ignore.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_row_001.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_sp003.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_sp006.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/suite/rpl_ndb/t/rpl_ndb_trig004.test:
The test needs slave to be able to switch to row mode, so the
test was changed to only execute in mixed and row mode.
mysql-test/t/partition_innodb_stmt.test:
Changed error code for innodb updates with READ COMMITTED or
READ UNCOMMITTED transaction isolation level and
binlog_format=statement.
sql/event_db_repository.cc:
Use member function to read current_stmt_binlog_row_based.
sql/events.cc:
Use member function to read current_stmt_binlog_row_based.
sql/ha_ndbcluster_binlog.cc:
reset_current_stmt_binlog_row_based() is not a no-op for the ndb_binlog
thread any more. Instead, the ndb_binlog thread now forces row mode both
initially and just after calling mysql_parse. (mysql_parse() is the only
place where reset_current_stmt_binlog_row_based() may be called from
the ndb_binlog thread, so these are the only two places that need to
change.)
sql/ha_partition.cc:
Use member function to read current_stmt_binlog_row_based.
sql/handler.cc:
Use member function to read current_stmt_binlog_row_based.
sql/item_create.cc:
Added DBUG_ENTER to some functions, to be able to trace when
set_stmt_unsafe is called.
sql/log.cc:
Use member function to read current_stmt_binlog_row_based.
sql/log_event.cc:
- Moved logic for changing to row format out of do_apply_event (and into
decide_logging_format).
- Added @todo comment for post-push cleanup.
sql/log_event_old.cc:
Move logic for changing to row format out of do_apply_event (and into
decide_logging_format).
sql/mysql_priv.h:
Make decide_logging_format() a member of the THD class, for two reasons:
- It is natural from an object-oriented perspective.
- decide_logging_format() needs to access private members of THD
(specifically, the new binlog_warning_flags field).
sql/rpl_injector.cc:
Removed call to set_current_stmt_binlog_row_based().
From now on, only decide_logging_fromat is allowed to modify
current_stmt_binlog_row_based. This call is from the ndb_binlog
thread, mostly executing code in ha_ndbcluster_binlog.cc.
This call can be safely removed, because:
- current_stmt_binlog_row_based is initialized for the ndb_binlog
thread's THD object when the THD object is created. So we're
not going to read uninitialized memory.
- The behavior of ndb_binlog thread does not use the state of the
current_stmt_binlog_row_based. It is conceivable that the
ndb_binlog thread would rely on the current_stmt_binlog_format
in two situations:
(1) when it calls mysql_parse;
(2) when it calls THD::binlog_query.
In case (1), it always clears THD::options&OPTION_BIN_LOG (because
run_query() in ha_ndbcluster_binlog.cc is only called with
disable_binlogging = TRUE).
In case (2), it always uses qtype=STMT_QUERY_TYPE.
sql/set_var.cc:
Added @todo comment for post-push cleanup.
sql/share/errmsg.txt:
Added new error messages and clarified ER_BINLOG_UNSAFE_STATEMENT.
sql/sp.cc:
Added DBUG_ENTER, to be able to trace when set_stmt_unsafe is called.
Got rid of MYSQL_QUERY_TYPE: it was equivalent to STMT_QUERY_TYPE.
sql/sp_head.cc:
Use member function to read current_stmt_binlog_row_based.
sql/sp_head.h:
Added DBUG_ENTER, to be able to trace when set_stmt_unsafe is called.
sql/sql_acl.cc:
Got rid of MYSQL_QUERY_TYPE: it was equivalent to STMT_QUERY_TYPE.
sql/sql_base.cc:
- Made decide_logging_format take care of all logic for deciding the
logging format, and for determining the related warnings and errors.
See comment above decide_logging_format for details.
- Made decide_logging_format a member function of THD, since it needs
to access private members of THD and since its purpose is to update
the state of a THD object.
- Added DBUG_ENTER, to be able to trace when set_stmt_unsafe is called.
sql/sql_class.cc:
- Moved logic for determining unsafe warnings away from THD::binlog_query
(and into decide_logging_format()). Now, it works like this:
1. decide_logging_format detects that the current statement shall
produce a warning, if it ever makes it to the binlog
2. decide_logging_format sets a flag of THD::binlog_warning_flags.
3. THD::binlog_query reads the flag. If the flag is set, it generates
a warning.
- Use member function to read current_stmt_binlog_row_based.
sql/sql_class.h:
- Added THD::binlog_warning_flags (see sql_class.cc for explanation).
- Made decide_logging_format() and reset_for_next_command() member
functions of THD (instead of standalone functions). This was needed
for two reasons: (1) the functions need to access the private member
THD::binlog_warning_flags; (2) the purpose of these functions is to
update the staet of a THD object, so from an object-oriented point
of view they should be member functions.
- Encapsulated current_stmt_binlog_row_based, so it is now private and
can only be accessed from a member function. Also changed the
data type to an enumeration instead of a bool.
- Removed MYSQL_QUERY_TYPE, because it was equivalent to
STMT_QUERY_TYPE anyways.
- When reset_current_stmt_binlog_row_based was called from the
ndb_binlog thread, it would behave as a no-op. This special
case has been removed, and the behavior of
reset_current_stmt_binlog_row_based does not depend on which thread
calls it any more. The special case did not serve any purpose,
since the ndb binlog thread did not take the
current_stmt_binlog_row_based flag into account anyways.
sql/sql_delete.cc:
- Moved logic for setting row format for DELETE..LIMIT away from
mysql_prepare_delete.
(Instead, we mark the statement as unsafe at parse time (sql_yacc.yy)
and rely on decide_logging_format() (sql_class.cc) to set row format.)
This is part of the fix for BUG#45831.
- Use member function to read current_stmt_binlog_row_based.
sql/sql_insert.cc:
- Removed unnecessary calls to thd->lex->set_stmt_unsafe() and
thd->set_current_stmt_binlog_row_based_if_mixed() from
handle_delayed_insert(). The calls are unnecessary because they
have already been made; they were made in the constructor of
the `di' object.
- Since decide_logging_format() is now a member function of THD, code
that calls decide_logging_format() had to be updated.
- Added DBUG_ENTER call, to be able to trace when set_stmt_unsafe is
called.
- Moved call to set_stmt_unsafe() for INSERT..SELECT..LIMIT away from
mysql_insert_select_prepare() (and into decide_logging_format).
This is part of the fix for BUG#45831.
- Use member function to read current_stmt_binlog_row_based.
sql/sql_lex.h:
- Added the flag BINLOG_STMT_FLAG_ROW_INJECTION to enum_binlog_stmt_flag.
This was necessary so that a statement can identify itself as a row
injection.
- Added appropriate setter and getter functions for the new flag.
- Added or clarified some comments.
- Added DBUG_ENTER()
sql/sql_load.cc:
Use member function to read current_stmt_binlog_row_based.
sql/sql_parse.cc:
- Made mysql_reset_thd_for_next_command() clear thd->binlog_warning_flags.
- Since thd->binlog_warning_flags is private, it must be set in a
member function of THD. Hence, moved the body of
mysql_reset_thd_for_next_command() to the new member function
THD::reset_thd_for_next_command(), and made
mysql_reset_thd_for_next_command() call
THD::reset_thd_for_next_command().
- Removed confusing comment.
- Use member function to read current_stmt_binlog_row_based.
sql/sql_repl.cc:
Use member function to read current_stmt_binlog_row_based.
sql/sql_table.cc:
Use member function to read current_stmt_binlog_row_based.
sql/sql_udf.cc:
Use member function to read current_stmt_binlog_row_based.
sql/sql_update.cc:
Moved logic for setting row format for UPDATE..LIMIT away from
mysql_prepare_update.
(Instead, we mark the statement as unsafe at parse time (sql_yacc.yy)
and rely on decide_logging_format() (sql_class.cc) to set row format.)
This is part of the fix for BUG#45831.
sql/sql_yacc.yy:
Made INSERT DELAYED, INSERT..SELECT..LIMIT, UPDATE..LIMIT, and
DELETE..LIMIT mark themselves as unsafe at parse time (instead
of at execution time).
This is part of the fixes BUG#45831 and BUG#45825.
storage/example/ha_example.cc:
Made exampledb accept inserts. This was needed by the new test case
rpl_ndb_binlog_format_errors, because it needs an engine that
is statement-only (and accepts inserts).
storage/example/ha_example.h:
Made exampledb a statement-only engine instead of a row-only engine.
No existing test relied exampledb's row-only capabilities. The new
test case rpl_ndb_binlog_format_errors needs an engine that is
statement-only.
storage/innobase/handler/ha_innodb.cc:
- Changed error error code and message given by innodb when
binlog_format=STATEMENT and transaction isolation level is
READ COMMITTED or READ UNCOMMITTED.
- While I was here, also simplified the condition for
checking when to give the error.
2009-07-14 21:31:19 +02:00
|
|
|
return HA_BINLOG_STMT_CAPABLE;
|
2004-04-01 01:02:42 -08:00
|
|
|
}
|
2006-12-18 14:28:15 -08:00
|
|
|
|
|
|
|
/** @brief
|
|
|
|
This is a bitmap of flags that indicates how the storage engine
|
2004-05-11 15:59:20 -07:00
|
|
|
implements indexes. The current index flags are documented in
|
2006-12-18 14:28:15 -08:00
|
|
|
handler.h. If you do not implement indexes, just return zero here.
|
2004-07-08 15:45:25 +03:00
|
|
|
|
2006-12-18 14:28:15 -08:00
|
|
|
@details
|
|
|
|
part is the key part to check. First key part is 0.
|
2023-08-14 20:56:27 +01:00
|
|
|
If all_parts is set, MariaDB wants to know the flags for the combined
|
2006-12-18 14:28:15 -08:00
|
|
|
index, up to and including 'part'.
|
2004-05-11 15:59:20 -07:00
|
|
|
*/
|
2024-06-12 09:46:26 -04:00
|
|
|
ulong index_flags(uint inx, uint part, bool all_parts) const override
|
2004-04-01 01:02:42 -08:00
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
2004-06-23 21:26:34 +02:00
|
|
|
|
2006-12-18 14:28:15 -08:00
|
|
|
/** @brief
|
|
|
|
unireg.cc will call max_supported_record_length(), max_supported_keys(),
|
|
|
|
max_supported_key_parts(), uint max_supported_key_length()
|
|
|
|
to make sure that the storage engine can handle the data it is about to
|
2023-08-14 20:56:27 +01:00
|
|
|
send. Return *real* limits of your storage engine here; MariaDB will do
|
2006-12-18 14:28:15 -08:00
|
|
|
min(your_limits, MySQL_limits) automatically.
|
|
|
|
*/
|
2024-06-12 09:46:26 -04:00
|
|
|
uint max_supported_record_length() const override { return HA_MAX_REC_LENGTH; }
|
2006-12-18 14:28:15 -08:00
|
|
|
|
|
|
|
/** @brief
|
|
|
|
unireg.cc will call this to make sure that the storage engine can handle
|
|
|
|
the data it is about to send. Return *real* limits of your storage engine
|
2023-08-14 20:56:27 +01:00
|
|
|
here; MariaDB will do min(your_limits, MySQL_limits) automatically.
|
2006-12-18 14:28:15 -08:00
|
|
|
|
|
|
|
@details
|
|
|
|
There is no need to implement ..._key_... methods if your engine doesn't
|
|
|
|
support indexes.
|
|
|
|
*/
|
2024-06-12 09:46:26 -04:00
|
|
|
uint max_supported_keys() const override { return 0; }
|
2006-12-18 14:28:15 -08:00
|
|
|
|
|
|
|
/** @brief
|
|
|
|
unireg.cc will call this to make sure that the storage engine can handle
|
|
|
|
the data it is about to send. Return *real* limits of your storage engine
|
2023-08-14 20:56:27 +01:00
|
|
|
here; MariaDB will do min(your_limits, MySQL_limits) automatically.
|
2006-12-18 14:28:15 -08:00
|
|
|
|
|
|
|
@details
|
|
|
|
There is no need to implement ..._key_... methods if your engine doesn't
|
|
|
|
support indexes.
|
|
|
|
*/
|
2024-06-12 09:46:26 -04:00
|
|
|
uint max_supported_key_parts() const override { return 0; }
|
2006-12-18 14:28:15 -08:00
|
|
|
|
|
|
|
/** @brief
|
|
|
|
unireg.cc will call this to make sure that the storage engine can handle
|
|
|
|
the data it is about to send. Return *real* limits of your storage engine
|
2023-08-14 20:56:27 +01:00
|
|
|
here; MariaDB will do min(your_limits, MySQL_limits) automatically.
|
2006-12-18 14:28:15 -08:00
|
|
|
|
|
|
|
@details
|
|
|
|
There is no need to implement ..._key_... methods if your engine doesn't
|
|
|
|
support indexes.
|
|
|
|
*/
|
2024-06-12 09:46:26 -04:00
|
|
|
uint max_supported_key_length() const override { return 0; }
|
2006-12-18 14:28:15 -08:00
|
|
|
|
|
|
|
/** @brief
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
Called in test_quick_select to determine cost of table scan
|
2004-04-01 01:02:42 -08:00
|
|
|
*/
|
2024-07-05 12:45:07 +02:00
|
|
|
virtual IO_AND_CPU_COST scan_time() override
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
{
|
|
|
|
IO_AND_CPU_COST cost;
|
|
|
|
/* 0 blocks, 0.001 ms / row */
|
2022-09-30 17:10:37 +03:00
|
|
|
cost.io= (double) (stats.records+stats.deleted) * DISK_READ_COST;
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
cost.cpu= 0;
|
|
|
|
return cost;
|
|
|
|
}
|
2006-12-18 14:28:15 -08:00
|
|
|
|
|
|
|
/** @brief
|
|
|
|
This method will never be called if you do not implement indexes.
|
2004-05-11 15:59:20 -07:00
|
|
|
*/
|
2024-07-05 12:45:07 +02:00
|
|
|
IO_AND_CPU_COST keyread_time(uint, ulong, ha_rows rows,
|
|
|
|
ulonglong blocks) override
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
{
|
|
|
|
IO_AND_CPU_COST cost;
|
2022-09-30 17:10:37 +03:00
|
|
|
cost.io= blocks * DISK_READ_COST;
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
cost.cpu= (double) rows * 0.001;
|
|
|
|
return cost;
|
|
|
|
}
|
|
|
|
|
|
|
|
/** @brief
|
|
|
|
Cost of fetching 'rows' records through rnd_pos()
|
|
|
|
*/
|
2024-07-05 12:45:07 +02:00
|
|
|
IO_AND_CPU_COST rnd_pos_time(ha_rows rows) override
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
{
|
|
|
|
IO_AND_CPU_COST cost;
|
|
|
|
/* 0 blocks, 0.001 ms / row */
|
|
|
|
cost.io= 0;
|
2022-09-30 17:10:37 +03:00
|
|
|
cost.cpu= (double) rows * DISK_READ_COST;
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
return cost;
|
|
|
|
}
|
2004-04-01 01:02:42 -08:00
|
|
|
|
2004-06-23 21:26:34 +02:00
|
|
|
/*
|
2006-12-18 14:28:15 -08:00
|
|
|
Everything below are methods that we implement in ha_example.cc.
|
2004-06-23 21:26:34 +02:00
|
|
|
|
|
|
|
Most of these methods are not obligatory, skip them and
|
2023-08-14 20:56:27 +01:00
|
|
|
MariaDB will treat them as not implemented
|
2004-05-11 15:59:20 -07:00
|
|
|
*/
|
2006-12-18 14:28:15 -08:00
|
|
|
/** @brief
|
|
|
|
We implement this in ha_example.cc; it's a required method.
|
|
|
|
*/
|
2024-06-12 09:46:26 -04:00
|
|
|
int open(const char *name, int mode, uint test_if_locked) override; // required
|
2006-12-18 14:28:15 -08:00
|
|
|
|
|
|
|
/** @brief
|
|
|
|
We implement this in ha_example.cc; it's a required method.
|
|
|
|
*/
|
2024-06-12 09:46:26 -04:00
|
|
|
int close(void) override; // required
|
2004-06-23 21:26:34 +02:00
|
|
|
|
2006-12-18 14:28:15 -08:00
|
|
|
/** @brief
|
|
|
|
We implement this in ha_example.cc. It's not an obligatory method;
|
2023-08-14 20:56:27 +01:00
|
|
|
skip it and and MariaDB will treat it as not implemented.
|
2006-12-18 14:28:15 -08:00
|
|
|
*/
|
2024-06-12 09:46:26 -04:00
|
|
|
int write_row(const uchar *buf) override;
|
2006-12-18 14:28:15 -08:00
|
|
|
|
|
|
|
/** @brief
|
|
|
|
We implement this in ha_example.cc. It's not an obligatory method;
|
2023-08-14 20:56:27 +01:00
|
|
|
skip it and and MariaDB will treat it as not implemented.
|
2006-12-18 14:28:15 -08:00
|
|
|
*/
|
2024-06-12 09:46:26 -04:00
|
|
|
int update_row(const uchar *old_data, const uchar *new_data) override;
|
2006-12-18 14:28:15 -08:00
|
|
|
|
|
|
|
/** @brief
|
|
|
|
We implement this in ha_example.cc. It's not an obligatory method;
|
2023-08-14 20:56:27 +01:00
|
|
|
skip it and and MariaDB will treat it as not implemented.
|
2006-12-18 14:28:15 -08:00
|
|
|
*/
|
2024-06-12 09:46:26 -04:00
|
|
|
int delete_row(const uchar *buf) override;
|
2006-12-18 14:28:15 -08:00
|
|
|
|
|
|
|
/** @brief
|
|
|
|
We implement this in ha_example.cc. It's not an obligatory method;
|
2023-08-14 20:56:27 +01:00
|
|
|
skip it and and MariaDB will treat it as not implemented.
|
2006-12-18 14:28:15 -08:00
|
|
|
*/
|
2007-08-13 16:11:25 +03:00
|
|
|
int index_read_map(uchar *buf, const uchar *key,
|
2024-06-12 09:46:26 -04:00
|
|
|
key_part_map keypart_map, enum ha_rkey_function find_flag) override;
|
2006-12-18 14:28:15 -08:00
|
|
|
|
|
|
|
/** @brief
|
|
|
|
We implement this in ha_example.cc. It's not an obligatory method;
|
2023-08-14 20:56:27 +01:00
|
|
|
skip it and and MariaDB will treat it as not implemented.
|
2006-12-18 14:28:15 -08:00
|
|
|
*/
|
2024-06-12 09:46:26 -04:00
|
|
|
int index_next(uchar *buf) override;
|
2006-12-18 14:28:15 -08:00
|
|
|
|
|
|
|
/** @brief
|
|
|
|
We implement this in ha_example.cc. It's not an obligatory method;
|
2023-08-14 20:56:27 +01:00
|
|
|
skip it and and MariaDB will treat it as not implemented.
|
2006-12-18 14:28:15 -08:00
|
|
|
*/
|
2024-06-12 09:46:26 -04:00
|
|
|
int index_prev(uchar *buf) override;
|
2006-12-18 14:28:15 -08:00
|
|
|
|
|
|
|
/** @brief
|
|
|
|
We implement this in ha_example.cc. It's not an obligatory method;
|
2023-08-14 20:56:27 +01:00
|
|
|
skip it and and MariaDB will treat it as not implemented.
|
2006-12-18 14:28:15 -08:00
|
|
|
*/
|
2024-06-12 09:46:26 -04:00
|
|
|
int index_first(uchar *buf) override;
|
2006-12-18 14:28:15 -08:00
|
|
|
|
|
|
|
/** @brief
|
|
|
|
We implement this in ha_example.cc. It's not an obligatory method;
|
2023-08-14 20:56:27 +01:00
|
|
|
skip it and and MariaDB will treat it as not implemented.
|
2006-12-18 14:28:15 -08:00
|
|
|
*/
|
2024-06-12 09:46:26 -04:00
|
|
|
int index_last(uchar *buf) override;
|
2006-12-18 14:28:15 -08:00
|
|
|
|
|
|
|
/** @brief
|
|
|
|
Unlike index_init(), rnd_init() can be called two consecutive times
|
|
|
|
without rnd_end() in between (it only makes sense if scan=1). In this
|
|
|
|
case, the second call should prepare for the new table scan (e.g if
|
|
|
|
rnd_init() allocates the cursor, the second call should position the
|
|
|
|
cursor to the start of the table; no need to deallocate and allocate
|
|
|
|
it again. This is a required method.
|
2004-06-23 21:26:34 +02:00
|
|
|
*/
|
2024-06-12 09:46:26 -04:00
|
|
|
int rnd_init(bool scan) override; //required
|
|
|
|
int rnd_end() override;
|
|
|
|
int rnd_next(uchar *buf) override; ///< required
|
|
|
|
int rnd_pos(uchar *buf, uchar *pos) override; ///< required
|
|
|
|
void position(const uchar *record) override; ///< required
|
|
|
|
int info(uint) override; ///< required
|
|
|
|
int extra(enum ha_extra_function operation) override;
|
|
|
|
int external_lock(THD *thd, int lock_type) override; ///< required
|
|
|
|
int delete_all_rows(void) override;
|
2020-02-27 19:12:27 +02:00
|
|
|
ha_rows records_in_range(uint inx, const key_range *min_key,
|
2024-06-12 09:46:26 -04:00
|
|
|
const key_range *max_key, page_range *pages) override;
|
|
|
|
int delete_table(const char *from) override;
|
2004-06-23 21:26:34 +02:00
|
|
|
int create(const char *name, TABLE *form,
|
2024-06-12 09:46:26 -04:00
|
|
|
HA_CREATE_INFO *create_info) override; ///< required
|
2014-03-02 15:02:13 +01:00
|
|
|
enum_alter_inplace_result
|
|
|
|
check_if_supported_inplace_alter(TABLE* altered_table,
|
2024-06-12 09:46:26 -04:00
|
|
|
Alter_inplace_info* ha_alter_info) override;
|
2004-04-01 01:02:42 -08:00
|
|
|
|
|
|
|
THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to,
|
2024-06-12 09:46:26 -04:00
|
|
|
enum thr_lock_type lock_type) override; ///< required
|
2006-12-18 17:16:40 -08:00
|
|
|
};
|