2012-02-17 12:19:38 +01:00
|
|
|
/*
|
2020-09-23 12:27:56 +03:00
|
|
|
Copyright (c) 2010, 2020, MariaDB
|
2012-02-17 12:19:38 +01:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
Update FSF address
This commit is based on the work of Michal Schorm, rebased on the
earliest MariaDB version.
Th command line used to generate this diff was:
find ./ -type f \
-exec sed -i -e 's/Foundation, Inc., 59 Temple Place, Suite 330, Boston, /Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, /g' {} \; \
-exec sed -i -e 's/Foundation, Inc. 59 Temple Place.* Suite 330, Boston, /Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, /g' {} \; \
-exec sed -i -e 's/MA.*.....-1307.*USA/MA 02110-1335 USA/g' {} \; \
-exec sed -i -e 's/Foundation, Inc., 59 Temple/Foundation, Inc., 51 Franklin/g' {} \; \
-exec sed -i -e 's/Place, Suite 330, Boston, MA.*02111-1307.*USA/Street, Fifth Floor, Boston, MA 02110-1335 USA/g' {} \; \
-exec sed -i -e 's/MA.*.....-1307/MA 02110-1335/g' {} \;
2019-05-10 20:49:46 +03:00
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
|
2012-02-17 12:19:38 +01:00
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
/**
|
|
|
|
@file
|
|
|
|
|
|
|
|
@brief
|
2010-04-25 12:23:52 +04:00
|
|
|
Semi-join subquery optimizations code
|
2010-02-16 00:53:06 +03:00
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifdef USE_PRAGMA_IMPLEMENTATION
|
|
|
|
#pragma implementation // gcc: Class implementation
|
|
|
|
#endif
|
|
|
|
|
2017-06-18 06:42:16 +03:00
|
|
|
#include "mariadb.h"
|
2012-01-13 15:50:02 +01:00
|
|
|
#include "sql_base.h"
|
2019-05-27 19:08:00 -07:00
|
|
|
#include "sql_const.h"
|
2010-02-16 00:53:06 +03:00
|
|
|
#include "sql_select.h"
|
2022-07-11 16:57:37 -07:00
|
|
|
#include "sql_update.h" // class Sql_cmd_update
|
|
|
|
#include "sql_delete.h" // class Sql_cmd_delete
|
2012-01-13 15:50:02 +01:00
|
|
|
#include "filesort.h"
|
2010-02-16 00:53:06 +03:00
|
|
|
#include "opt_subselect.h"
|
2011-04-25 17:22:25 +02:00
|
|
|
#include "sql_test.h"
|
2010-02-16 00:53:06 +03:00
|
|
|
#include <my_bit.h>
|
2019-02-13 11:22:16 +05:30
|
|
|
#include "opt_trace.h"
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
#include "optimizer_defaults.h"
|
2010-02-16 00:53:06 +03:00
|
|
|
|
2010-05-27 16:14:25 +04:00
|
|
|
/*
|
|
|
|
This file contains optimizations for semi-join subqueries.
|
|
|
|
|
|
|
|
Contents
|
|
|
|
--------
|
|
|
|
1. What is a semi-join subquery
|
|
|
|
2. General idea about semi-join execution
|
|
|
|
2.1 Correlated vs uncorrelated semi-joins
|
|
|
|
2.2 Mergeable vs non-mergeable semi-joins
|
|
|
|
3. Code-level view of semi-join processing
|
|
|
|
3.1 Conversion
|
|
|
|
3.1.1 Merged semi-join TABLE_LIST object
|
|
|
|
3.1.2 Non-merged semi-join data structure
|
|
|
|
3.2 Semi-joins and query optimization
|
2010-07-10 20:51:12 +03:00
|
|
|
3.2.1 Non-merged semi-joins and join optimization
|
|
|
|
3.2.2 Merged semi-joins and join optimization
|
2010-05-27 16:14:25 +04:00
|
|
|
3.3 Semi-joins and query execution
|
|
|
|
|
|
|
|
1. What is a semi-join subquery
|
|
|
|
-------------------------------
|
|
|
|
We use this definition of semi-join:
|
|
|
|
|
|
|
|
outer_tbl SEMI JOIN inner_tbl ON cond = {set of outer_tbl.row such that
|
|
|
|
exist inner_tbl.row, for which
|
|
|
|
cond(outer_tbl.row,inner_tbl.row)
|
|
|
|
is satisfied}
|
|
|
|
|
|
|
|
That is, semi-join operation is similar to inner join operation, with
|
|
|
|
exception that we don't care how many matches a row from outer_tbl has in
|
|
|
|
inner_tbl.
|
|
|
|
|
2010-06-08 18:22:31 +04:00
|
|
|
In SQL terms: a semi-join subquery is an IN subquery that is an AND-part of
|
|
|
|
the WHERE/ON clause.
|
2010-05-27 16:14:25 +04:00
|
|
|
|
|
|
|
2. General idea about semi-join execution
|
|
|
|
-----------------------------------------
|
2010-06-08 18:22:31 +04:00
|
|
|
We can execute semi-join in a way similar to inner join, with exception that
|
|
|
|
we need to somehow ensure that we do not generate record combinations that
|
2010-05-27 16:14:25 +04:00
|
|
|
differ only in rows of inner tables.
|
|
|
|
There is a number of different ways to achieve this property, implemented by
|
|
|
|
a number of semi-join execution strategies.
|
|
|
|
Some strategies can handle any semi-joins, other can be applied only to
|
|
|
|
semi-joins that have certain properties that are described below:
|
|
|
|
|
|
|
|
2.1 Correlated vs uncorrelated semi-joins
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
Uncorrelated semi-joins are special in the respect that they allow to
|
|
|
|
- execute the subquery (possible as it's uncorrelated)
|
|
|
|
- somehow make sure that generated set does not have duplicates
|
|
|
|
- perform an inner join with outer tables.
|
|
|
|
|
|
|
|
or, rephrasing in SQL form:
|
|
|
|
|
|
|
|
SELECT ... FROM ot WHERE ot.col IN (SELECT it.col FROM it WHERE uncorr_cond)
|
|
|
|
->
|
|
|
|
SELECT ... FROM ot JOIN (SELECT DISTINCT it.col FROM it WHERE uncorr_cond)
|
|
|
|
|
|
|
|
2.2 Mergeable vs non-mergeable semi-joins
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
Semi-join operation has some degree of commutability with inner join
|
|
|
|
operation: we can join subquery's tables with ouside table(s) and eliminate
|
|
|
|
duplicate record combination after that:
|
|
|
|
|
|
|
|
ot1 JOIN ot2 SEMI_JOIN{it1,it2} (it1 JOIN it2) ON sjcond(ot2,it*) ->
|
|
|
|
|
|
|
|
|
+-------------------------------+
|
|
|
|
v
|
|
|
|
ot1 SEMI_JOIN{it1,it2} (it1 JOIN it2 JOIN ot2) ON sjcond(ot2,it*)
|
|
|
|
|
|
|
|
In order for this to work, subquery's top-level operation must be join, and
|
|
|
|
grouping or ordering with limit (grouping or ordering with limit are not
|
|
|
|
commutative with duplicate removal). In other words, the conversion is
|
|
|
|
possible when the subquery doesn't have GROUP BY clause, any aggregate
|
|
|
|
functions*, or ORDER BY ... LIMIT clause.
|
|
|
|
|
|
|
|
Definitions:
|
|
|
|
- Subquery whose top-level operation is a join is called *mergeable semi-join*
|
|
|
|
- All other kinds of semi-join subqueries are considered non-mergeable.
|
|
|
|
|
|
|
|
*- this requirement is actually too strong, but its exceptions are too
|
|
|
|
complicated to be considered here.
|
|
|
|
|
|
|
|
3. Code-level view of semi-join processing
|
|
|
|
------------------------------------------
|
|
|
|
|
2010-07-10 20:51:12 +03:00
|
|
|
3.1 Conversion and pre-optimization data structures
|
|
|
|
---------------------------------------------------
|
2010-05-27 16:14:25 +04:00
|
|
|
* When doing JOIN::prepare for the subquery, we detect that it can be
|
|
|
|
converted into a semi-join and register it in parent_join->sj_subselects
|
|
|
|
|
|
|
|
* At the start of parent_join->optimize(), the predicate is converted into
|
|
|
|
a semi-join node. A semi-join node is a TABLE_LIST object that is linked
|
|
|
|
somewhere in parent_join->join_list (either it is just present there, or
|
|
|
|
it is a descendant of some of its members).
|
|
|
|
|
|
|
|
There are two kinds of semi-joins:
|
|
|
|
- Merged semi-joins
|
|
|
|
- Non-merged semi-joins
|
|
|
|
|
|
|
|
3.1.1 Merged semi-join TABLE_LIST object
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
Merged semi-join object is a TABLE_LIST that contains a sub-join of
|
|
|
|
subquery tables and the semi-join ON expression (in this respect it is
|
2010-07-10 20:51:12 +03:00
|
|
|
very similar to nested outer join representation)
|
2010-05-27 16:14:25 +04:00
|
|
|
Merged semi-join represents this SQL:
|
|
|
|
|
|
|
|
... SEMI JOIN (inner_tbl1 JOIN ... JOIN inner_tbl_n) ON sj_on_expr
|
|
|
|
|
|
|
|
Semi-join objects of this kind have TABLE_LIST::sj_subq_pred set.
|
|
|
|
|
|
|
|
3.1.2 Non-merged semi-join data structure
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
Non-merged semi-join object is a leaf TABLE_LIST object that has a subquery
|
|
|
|
that produces rows. It is similar to a base table and represents this SQL:
|
|
|
|
|
|
|
|
... SEMI_JOIN (SELECT non_mergeable_select) ON sj_on_expr
|
|
|
|
|
|
|
|
Subquery items that were converted into semi-joins are removed from the WHERE
|
|
|
|
clause. (They do remain in PS-saved WHERE clause, and they replace themselves
|
|
|
|
with Item_int(1) on subsequent re-executions).
|
|
|
|
|
2010-07-10 20:51:12 +03:00
|
|
|
3.2 Semi-joins and join optimization
|
|
|
|
------------------------------------
|
|
|
|
|
|
|
|
3.2.1 Non-merged semi-joins and join optimization
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
For join optimization purposes, non-merged semi-join nests are similar to
|
2012-08-21 22:24:34 +04:00
|
|
|
base tables. Each such nest is represented by one one JOIN_TAB, which has
|
|
|
|
two possible access strategies:
|
2010-07-10 20:51:12 +03:00
|
|
|
- full table scan (representing SJ-Materialization-Scan strategy)
|
|
|
|
- eq_ref-like table lookup (representing SJ-Materialization-Lookup)
|
|
|
|
|
|
|
|
Unlike regular base tables, non-merged semi-joins have:
|
|
|
|
- non-zero JOIN_TAB::startup_cost, and
|
|
|
|
- join_tab->table->is_filled_at_execution()==TRUE, which means one
|
2012-08-21 22:24:34 +04:00
|
|
|
cannot do const table detection, range analysis or other dataset-dependent
|
|
|
|
optimizations.
|
|
|
|
Instead, get_delayed_table_estimates() will run optimization for the
|
|
|
|
subquery and produce an E(materialized table size).
|
2010-07-10 20:51:12 +03:00
|
|
|
|
|
|
|
3.2.2 Merged semi-joins and join optimization
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
- optimize_semijoin_nests() does pre-optimization
|
|
|
|
- during join optimization, the join has one JOIN_TAB (or is it POSITION?)
|
2022-06-06 22:21:22 +03:00
|
|
|
array, and suffix-based detection is used, see optimize_semi_joins()
|
2010-07-10 20:51:12 +03:00
|
|
|
- after join optimization is done, get_best_combination() switches
|
|
|
|
the data-structure to prefix-based, multiple JOIN_TAB ranges format.
|
2010-05-27 16:14:25 +04:00
|
|
|
|
|
|
|
3.3 Semi-joins and query execution
|
|
|
|
----------------------------------
|
|
|
|
* Join executor has hooks for all semi-join strategies.
|
2010-07-10 20:51:12 +03:00
|
|
|
TODO elaborate.
|
|
|
|
|
2010-05-27 16:14:25 +04:00
|
|
|
*/
|
|
|
|
|
2012-03-26 13:47:00 +04:00
|
|
|
/*
|
|
|
|
EqualityPropagationAndSjmNests
|
|
|
|
******************************
|
|
|
|
|
|
|
|
Equalities are used for:
|
|
|
|
P1. Equality propagation
|
|
|
|
P2. Equality substitution [for a certain join order]
|
|
|
|
|
|
|
|
The equality propagation is not affected by SJM nests. In fact, it is done
|
|
|
|
before we determine the execution plan, i.e. before we even know we will use
|
|
|
|
SJM-nests for execution.
|
|
|
|
|
|
|
|
The equality substitution is affected.
|
|
|
|
|
|
|
|
Substitution without SJMs
|
|
|
|
=========================
|
|
|
|
When one doesn't have SJM nests, tables have a strict join order:
|
|
|
|
|
|
|
|
--------------------------------->
|
|
|
|
t1 -- t2 -- t3 -- t4 --- t5
|
|
|
|
|
|
|
|
|
|
|
|
? ^
|
|
|
|
\
|
|
|
|
--(part-of-WHERE)
|
|
|
|
|
|
|
|
|
|
|
|
parts WHERE/ON and ref. expressions are attached at some point along the axis.
|
|
|
|
Expression is allowed to refer to a table column if the table is to the left of
|
|
|
|
the attachment point. For any given expression, we have a goal:
|
|
|
|
|
|
|
|
"Move leftmost allowed attachment point as much as possible to the left"
|
|
|
|
|
|
|
|
Substitution with SJMs - task setting
|
|
|
|
=====================================
|
|
|
|
|
|
|
|
When SJM nests are present, there is no global strict table ordering anymore:
|
|
|
|
|
|
|
|
|
|
|
|
--------------------------------->
|
|
|
|
|
|
|
|
ot1 -- ot2 --- sjm -- ot4 --- ot5
|
|
|
|
|
|
|
|
|
| Main execution
|
|
|
|
- - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
| Materialization
|
|
|
|
it1 -- it2 --/
|
|
|
|
|
|
|
|
|
|
|
|
Besides that, we must take into account that
|
|
|
|
- values for outer table columns, otN.col, are inaccessible at
|
|
|
|
materialization step (SJM-RULE)
|
|
|
|
- values for inner table columns, itN.col, are inaccessible at Main execution
|
|
|
|
step, except for SJ-Materialization-Scan and columns that are in the
|
|
|
|
subquery's select list. (SJM-RULE)
|
|
|
|
|
|
|
|
Substitution with SJMs - solution
|
|
|
|
=================================
|
|
|
|
|
|
|
|
First, we introduce global strict table ordering like this:
|
|
|
|
|
|
|
|
ot1 - ot2 --\ /--- ot3 -- ot5
|
|
|
|
\--- it1 --- it2 --/
|
|
|
|
|
|
|
|
Now, let's see how to meet (SJM-RULE).
|
|
|
|
|
|
|
|
SJ-Materialization is only applicable for uncorrelated subqueries. From this, it
|
|
|
|
follows that any multiple equality will either
|
|
|
|
1. include only columns of outer tables, or
|
|
|
|
2. include only columns of inner tables, or
|
|
|
|
3. include columns of inner and outer tables, joined together through one
|
|
|
|
of IN-equalities.
|
|
|
|
|
|
|
|
Cases #1 and #2 can be handled in the same way as with regular inner joins.
|
|
|
|
|
|
|
|
Case #3 requires special handling, so that we don't construct violations of
|
|
|
|
(SJM-RULE). Let's consider possible ways to build violations.
|
|
|
|
|
|
|
|
Equality propagation starts with the clause in this form
|
|
|
|
|
|
|
|
top_query_where AND subquery_where AND in_equalities
|
|
|
|
|
|
|
|
First, it builds multi-equalities. It can also build a mixed multi-equality
|
|
|
|
|
|
|
|
multiple-equal(ot1.col, ot2.col, ... it1.col, itN.col)
|
|
|
|
|
|
|
|
Multi-equalities are pushed down the OR-clauses in top_query_where and in
|
|
|
|
subquery_where, so it's possible that clauses like this one are built:
|
|
|
|
|
|
|
|
subquery_cond OR (multiple-equal(it1.col, ot1.col,...) AND ...)
|
|
|
|
^^^^^^^^^^^^^ \
|
|
|
|
| this must be evaluated
|
|
|
|
\- can only be evaluated at the main phase.
|
|
|
|
at the materialization phase
|
|
|
|
|
|
|
|
Finally, equality substitution is started. It does two operations:
|
|
|
|
|
|
|
|
|
|
|
|
1. Field reference substitution
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
|
|
|
(In the code, this is Item_field::replace_equal_field)
|
|
|
|
|
|
|
|
This is a process of replacing each reference to "tblX.col"
|
|
|
|
with the first element of the multi-equality. (REF-SUBST-ORIG)
|
|
|
|
|
|
|
|
This behaviour can cause problems with Semi-join nests. Suppose, we have a
|
|
|
|
condition:
|
|
|
|
|
|
|
|
func(it1.col, it2.col)
|
|
|
|
|
|
|
|
and a multi-equality(ot1.col, it1.col). Then, reference to "it1.col" will be
|
|
|
|
replaced with "ot1.col", constructing a condition
|
|
|
|
|
|
|
|
func(ot1.col, it2.col)
|
|
|
|
|
|
|
|
which will be a violation of (SJM-RULE).
|
|
|
|
|
|
|
|
In order to avoid this, (REF-SUBST-ORIG) is amended as follows:
|
|
|
|
|
|
|
|
- references to tables "itX.col" that are inner wrt some SJM nest, are
|
|
|
|
replaced with references to the first inner table from the same SJM nest.
|
|
|
|
|
|
|
|
- references to top-level tables "otX.col" are replaced with references to
|
|
|
|
the first element of the multi-equality, no matter if that first element is
|
|
|
|
a column of a top-level table or of table from some SJM nest.
|
|
|
|
(REF-SUBST-SJM)
|
|
|
|
|
|
|
|
The case where the first element is a table from an SJM nest $SJM is ok,
|
|
|
|
because it can be proven that $SJM uses SJ-Materialization-Scan, and
|
|
|
|
"unpacks" correct column values to the first element during the main
|
|
|
|
execution phase.
|
|
|
|
|
|
|
|
2. Item_equal elimination
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
(In the code: eliminate_item_equal) This is a process of taking
|
|
|
|
|
|
|
|
multiple-equal(a,b,c,d,e)
|
|
|
|
|
|
|
|
and replacing it with an equivalent expression which is an AND of pair-wise
|
|
|
|
equalities:
|
|
|
|
|
|
|
|
a=b AND a=c AND ...
|
|
|
|
|
|
|
|
The equalities are picked such that for any given join prefix (t1,t2...) the
|
|
|
|
subset of equalities that can be evaluated gives the most restrictive
|
|
|
|
filtering.
|
|
|
|
|
|
|
|
Without SJM nests, it is sufficient to compare every multi-equality member
|
|
|
|
with the first one:
|
|
|
|
|
|
|
|
elem1=elem2 AND elem1=elem3 AND elem1=elem4 ...
|
|
|
|
|
|
|
|
When SJM nests are present, we should take care not to construct equalities
|
|
|
|
that violate the (SJM-RULE). This is achieved by generating separate sets of
|
|
|
|
equalites for top-level tables and for inner tables. That is, for the join
|
|
|
|
order
|
|
|
|
|
|
|
|
ot1 - ot2 --\ /--- ot3 -- ot5
|
|
|
|
\--- it1 --- it2 --/
|
|
|
|
|
|
|
|
we will generate
|
|
|
|
ot1.col=ot2.col
|
|
|
|
ot1.col=ot3.col
|
|
|
|
ot1.col=ot5.col
|
|
|
|
it2.col=it1.col
|
|
|
|
|
|
|
|
|
|
|
|
2.1 The problem with Item_equals and ORs
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
As has been mentioned above, multiple equalities are pushed down into OR
|
|
|
|
clauses, possibly building clauses like this:
|
|
|
|
|
|
|
|
func(it.col2) OR multiple-equal(it1.col1, it1.col2, ot1.col) (1)
|
|
|
|
|
|
|
|
where the first part of the clause has references to inner tables, while the
|
|
|
|
second has references to the top-level tables, which is a violation of
|
|
|
|
(SJM-RULE).
|
|
|
|
|
|
|
|
AND-clauses of this kind do not create problems, because make_cond_for_table()
|
|
|
|
will take them apart. OR-clauses will not be split. It is possible to
|
|
|
|
split-out the part that's dependent on the inner table:
|
|
|
|
|
|
|
|
func(it.col2) OR it1.col1=it1.col2
|
|
|
|
|
|
|
|
but this is a less-restrictive condition than condition (1). Current execution
|
|
|
|
scheme will still try to generate the "remainder" condition:
|
|
|
|
|
|
|
|
func(it.col2) OR it1.col1=ot1.col
|
|
|
|
|
|
|
|
which is a violation of (SJM-RULE).
|
|
|
|
|
|
|
|
QQ: "ot1.col=it1.col" is checked at the upper level. Why was it not removed
|
|
|
|
here?
|
|
|
|
AA: because has a proper subset of conditions that are found on this level.
|
|
|
|
consider a join order of ot, sjm(it)
|
|
|
|
and a condition
|
|
|
|
ot.col=it.col AND ( ot.col=it.col='foo' OR it.col2='bar')
|
|
|
|
|
|
|
|
we will produce:
|
|
|
|
table ot: nothing
|
|
|
|
table it: ot.col=it.col AND (ot.col='foo' OR it.col2='bar')
|
|
|
|
^^^^ ^^^^^^^^^^^^^^^^
|
|
|
|
| \ the problem is that
|
|
|
|
| this part condition didnt
|
|
|
|
| receive a substitution
|
|
|
|
|
|
|
|
|
+--- it was correct to subst, 'ot' is
|
|
|
|
the left-most.
|
|
|
|
|
|
|
|
|
|
|
|
Does it make sense to push "inner=outer" down into ORs?
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
|
|
|
Yes. Consider the query:
|
|
|
|
|
|
|
|
select * from ot
|
|
|
|
where ot.col in (select it.col from it where (it.col='foo' OR it.col='bar'))
|
|
|
|
|
|
|
|
here, it may be useful to infer that
|
|
|
|
|
|
|
|
(ot.col='foo' OR ot.col='bar') (CASE-FOR-SUBST)
|
|
|
|
|
|
|
|
and attach that condition to the table 'ot'.
|
|
|
|
|
|
|
|
Possible solutions for Item_equals and ORs
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
|
|
|
Solution #1
|
|
|
|
~~~~~~~~~~~
|
|
|
|
Let make_cond_for_table() chop analyze the OR clauses it has produced and
|
|
|
|
discard them if they violate (SJM-RULE). This solution would allow to handle
|
|
|
|
cases like (CASE-FOR-SUBST) at the expense of making semantics of
|
|
|
|
make_cond_for_table() complicated.
|
|
|
|
|
|
|
|
Solution #2
|
|
|
|
~~~~~~~~~~~
|
|
|
|
Before the equality propagation phase, none of the OR clauses violate the
|
|
|
|
(SJM-RULE). This way, if we remember which tables the original equality
|
|
|
|
referred to, we can only generate equalities that refer to the outer (or inner)
|
|
|
|
tables. Note that this will disallow handling of cases like (CASE-FOR-SUBST).
|
|
|
|
|
|
|
|
Currently, solution #2 is implemented.
|
|
|
|
*/
|
|
|
|
|
2023-04-26 15:27:01 +04:00
|
|
|
static const Lex_ident_column weedout_key= "weedout_key"_Lex_ident_column;
|
2010-05-27 16:14:25 +04:00
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
static
|
2019-02-13 11:22:16 +05:30
|
|
|
bool subquery_types_allow_materialization(THD *thd, Item_in_subselect *in_subs);
|
2013-11-13 12:43:39 +01:00
|
|
|
static bool replace_where_subcondition(JOIN *, Item **, Item *, Item *, bool);
|
2011-07-16 23:57:43 -07:00
|
|
|
static int subq_sj_candidate_cmp(Item_in_subselect* el1, Item_in_subselect* el2,
|
|
|
|
void *arg);
|
2018-07-25 21:17:50 +05:30
|
|
|
static void reset_equality_number_for_subq_conds(Item * cond);
|
2010-02-16 00:53:06 +03:00
|
|
|
static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred);
|
2010-04-06 00:16:45 +04:00
|
|
|
static bool convert_subq_to_jtbm(JOIN *parent_join,
|
|
|
|
Item_in_subselect *subq_pred, bool *remove);
|
2010-02-16 00:53:06 +03:00
|
|
|
static TABLE_LIST *alloc_join_nest(THD *thd);
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
static uint get_tmp_table_rec_length(Ref_ptr_array p_list, uint elements,
|
|
|
|
bool *blobs_used);
|
2010-02-16 00:53:06 +03:00
|
|
|
bool find_eq_ref_candidate(TABLE *table, table_map sj_inner_tables);
|
|
|
|
static SJ_MATERIALIZATION_INFO *
|
|
|
|
at_sjmat_pos(const JOIN *join, table_map remaining_tables, const JOIN_TAB *tab,
|
|
|
|
uint idx, bool *loose_scan);
|
|
|
|
static Item *create_subq_in_equalities(THD *thd, SJ_MATERIALIZATION_INFO *sjm,
|
|
|
|
Item_in_subselect *subq_pred);
|
2017-11-14 07:47:58 +02:00
|
|
|
static bool remove_sj_conds(THD *thd, Item **tree);
|
2010-02-16 00:53:06 +03:00
|
|
|
static bool is_cond_sj_in_equality(Item *item);
|
|
|
|
static bool sj_table_is_included(JOIN *join, JOIN_TAB *join_tab);
|
|
|
|
static Item *remove_additional_cond(Item* conds);
|
|
|
|
static void remove_subq_pushed_predicates(JOIN *join, Item **where);
|
|
|
|
|
2011-03-27 03:45:16 +04:00
|
|
|
enum_nested_loop_state
|
|
|
|
end_sj_materialize(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
|
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
|
2011-10-01 00:10:03 +04:00
|
|
|
/*
|
|
|
|
Check if Materialization strategy is allowed for given subquery predicate.
|
|
|
|
|
|
|
|
@param thd Thread handle
|
|
|
|
@param in_subs The subquery predicate
|
|
|
|
@param child_select The select inside predicate (the function will
|
|
|
|
check it is the only one)
|
|
|
|
|
|
|
|
@return TRUE - Materialization is applicable
|
|
|
|
FALSE - Otherwise
|
|
|
|
*/
|
|
|
|
|
|
|
|
bool is_materialization_applicable(THD *thd, Item_in_subselect *in_subs,
|
|
|
|
st_select_lex *child_select)
|
|
|
|
{
|
|
|
|
st_select_lex_unit* parent_unit= child_select->master_unit();
|
|
|
|
/*
|
|
|
|
Check if the subquery predicate can be executed via materialization.
|
|
|
|
The required conditions are:
|
|
|
|
0. The materialization optimizer switch was set.
|
|
|
|
1. Subquery is a single SELECT (not a UNION).
|
|
|
|
TODO: this is a limitation that can be fixed
|
|
|
|
2. Subquery is not a table-less query. In this case there is no
|
|
|
|
point in materializing.
|
|
|
|
2A The upper query is not a table-less SELECT ... FROM DUAL. We
|
|
|
|
can't do materialization for SELECT .. FROM DUAL because it
|
|
|
|
does not call setup_subquery_materialization(). We could make
|
|
|
|
SELECT ... FROM DUAL call that function but that doesn't seem
|
|
|
|
to be the case that is worth handling.
|
|
|
|
3. Either the subquery predicate is a top-level predicate, or at
|
|
|
|
least one partial match strategy is enabled. If no partial match
|
|
|
|
strategy is enabled, then materialization cannot be used for
|
|
|
|
non-top-level queries because it cannot handle NULLs correctly.
|
|
|
|
4. Subquery is non-correlated
|
|
|
|
TODO:
|
|
|
|
This condition is too restrictive (limitation). It can be extended to:
|
|
|
|
(Subquery is non-correlated ||
|
|
|
|
Subquery is correlated to any query outer to IN predicate ||
|
|
|
|
(Subquery is correlated to the immediate outer query &&
|
|
|
|
Subquery !contains {GROUP BY, ORDER BY [LIMIT],
|
|
|
|
aggregate functions}) && subquery predicate is not under "NOT IN"))
|
2016-05-24 21:29:52 +03:00
|
|
|
5. Subquery does not contain recursive references
|
2011-10-01 00:10:03 +04:00
|
|
|
|
|
|
|
A note about prepared statements: we want the if-branch to be taken on
|
|
|
|
PREPARE and each EXECUTE. The rewrites are only done once, but we need
|
|
|
|
select_lex->sj_subselects list to be populated for every EXECUTE.
|
|
|
|
|
|
|
|
*/
|
|
|
|
if (optimizer_flag(thd, OPTIMIZER_SWITCH_MATERIALIZATION) && // 0
|
|
|
|
!child_select->is_part_of_union() && // 1
|
|
|
|
parent_unit->first_select()->leaf_tables.elements && // 2
|
2018-05-22 19:08:39 +02:00
|
|
|
child_select->outer_select() &&
|
2019-05-28 11:25:45 +03:00
|
|
|
child_select->outer_select()->table_list.first && // 2A
|
2019-02-13 11:22:16 +05:30
|
|
|
subquery_types_allow_materialization(thd, in_subs) &&
|
2011-10-01 00:10:03 +04:00
|
|
|
(in_subs->is_top_level_item() || //3
|
|
|
|
optimizer_flag(thd,
|
|
|
|
OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) || //3
|
|
|
|
optimizer_flag(thd,
|
|
|
|
OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)) && //3
|
2016-05-24 21:29:52 +03:00
|
|
|
!in_subs->is_correlated && //4
|
|
|
|
!in_subs->with_recursive_reference) //5
|
2011-10-01 00:10:03 +04:00
|
|
|
{
|
|
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
2022-07-11 16:57:37 -07:00
|
|
|
/**
|
|
|
|
@brief Check whether an IN subquery must be excluded from conversion to SJ
|
|
|
|
|
|
|
|
@param thd global context the processed statement
|
|
|
|
@returns true if the IN subquery must be excluded from conversion to SJ
|
|
|
|
|
|
|
|
@note
|
|
|
|
Currently a top level IN subquery of an delete statement is not converted
|
|
|
|
to SJ if the statement contains ORDER BY ... LIMIT or contains RETURNING.
|
|
|
|
|
|
|
|
@todo
|
|
|
|
The disjunctive members
|
|
|
|
!((Sql_cmd_update *) cmd)->is_multitable()
|
|
|
|
!((Sql_cmd_delete *) cmd)->is_multitable()
|
|
|
|
will be removed when conversions of IN predicands to semi-joins are
|
|
|
|
fully supported for single-table UPDATE/DELETE statements.
|
|
|
|
*/
|
|
|
|
|
|
|
|
bool SELECT_LEX::is_sj_conversion_prohibited(THD *thd)
|
|
|
|
{
|
|
|
|
DBUG_ASSERT(master_unit()->item->substype() == Item_subselect::IN_SUBS);
|
|
|
|
|
|
|
|
SELECT_LEX *outer_sl= outer_select();
|
|
|
|
if (outer_sl->outer_select())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
Sql_cmd *cmd= thd->lex->m_sql_cmd;
|
|
|
|
|
|
|
|
switch (thd->lex->sql_command) {
|
|
|
|
case SQLCOM_UPDATE:
|
|
|
|
return
|
2023-01-09 22:39:39 -08:00
|
|
|
!((Sql_cmd_update *) cmd)->is_multitable() &&
|
2022-07-11 16:57:37 -07:00
|
|
|
((Sql_cmd_update *) cmd)->processing_as_multitable_update_prohibited(thd);
|
|
|
|
case SQLCOM_DELETE:
|
|
|
|
return
|
2023-01-09 22:39:39 -08:00
|
|
|
!((Sql_cmd_delete *) cmd)->is_multitable() &&
|
2022-07-11 16:57:37 -07:00
|
|
|
((Sql_cmd_delete *) cmd)->processing_as_multitable_delete_prohibited(thd);
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-10-01 00:10:03 +04:00
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
/*
|
|
|
|
Check if we need JOIN::prepare()-phase subquery rewrites and if yes, do them
|
|
|
|
|
2010-05-10 19:28:19 +04:00
|
|
|
SYNOPSIS
|
|
|
|
check_and_do_in_subquery_rewrites()
|
|
|
|
join Subquery's join
|
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
DESCRIPTION
|
|
|
|
Check if we need to do
|
2010-05-10 19:28:19 +04:00
|
|
|
- subquery -> mergeable semi-join rewrite
|
2010-02-16 00:53:06 +03:00
|
|
|
- if the subquery can be handled with materialization
|
|
|
|
- 'substitution' rewrite for table-less subqueries like "(select 1)"
|
2010-05-10 19:28:19 +04:00
|
|
|
- IN->EXISTS rewrite
|
|
|
|
and, depending on the rewrite, either do it, or record it to be done at a
|
|
|
|
later phase.
|
2010-02-16 00:53:06 +03:00
|
|
|
|
|
|
|
RETURN
|
2010-05-10 19:28:19 +04:00
|
|
|
0 - OK
|
|
|
|
Other - Some sort of query error
|
2010-02-16 00:53:06 +03:00
|
|
|
*/
|
|
|
|
|
|
|
|
int check_and_do_in_subquery_rewrites(JOIN *join)
|
|
|
|
{
|
|
|
|
THD *thd=join->thd;
|
|
|
|
st_select_lex *select_lex= join->select_lex;
|
2010-09-05 18:43:47 +03:00
|
|
|
st_select_lex_unit* parent_unit= select_lex->master_unit();
|
2010-02-16 00:53:06 +03:00
|
|
|
DBUG_ENTER("check_and_do_in_subquery_rewrites");
|
2011-11-24 15:12:10 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
IN/ALL/ANY rewrites are not applicable for so called fake select
|
|
|
|
(this select exists only to filter results of union if it is needed).
|
|
|
|
*/
|
|
|
|
if (select_lex == select_lex->master_unit()->fake_select_lex)
|
|
|
|
DBUG_RETURN(0);
|
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
/*
|
|
|
|
If
|
|
|
|
1) this join is inside a subquery (of any type except FROM-clause
|
|
|
|
subquery) and
|
|
|
|
2) we aren't just normalizing a VIEW
|
|
|
|
|
|
|
|
Then perform early unconditional subquery transformations:
|
|
|
|
- Convert subquery predicate into semi-join, or
|
|
|
|
- Mark the subquery for execution using materialization, or
|
|
|
|
- Perform IN->EXISTS transformation, or
|
|
|
|
- Perform more/less ALL/ANY -> MIN/MAX rewrite
|
|
|
|
- Substitute trivial scalar-context subquery with its value
|
|
|
|
|
|
|
|
TODO: for PS, make the whole block execute only on the first execution
|
|
|
|
*/
|
|
|
|
Item_subselect *subselect;
|
2012-03-26 12:33:49 +02:00
|
|
|
if (!thd->lex->is_view_context_analysis() && // (1)
|
|
|
|
(subselect= parent_unit->item)) // (2)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
|
|
|
Item_in_subselect *in_subs= NULL;
|
2011-05-04 18:08:58 +03:00
|
|
|
Item_allany_subselect *allany_subs= NULL;
|
2019-05-04 21:33:39 +03:00
|
|
|
Item_subselect::subs_type substype= subselect->substype();
|
|
|
|
switch (substype) {
|
2011-05-04 18:08:58 +03:00
|
|
|
case Item_subselect::IN_SUBS:
|
2020-06-30 15:20:11 +02:00
|
|
|
in_subs= subselect->get_IN_subquery();
|
2011-05-04 18:08:58 +03:00
|
|
|
break;
|
|
|
|
case Item_subselect::ALL_SUBS:
|
|
|
|
case Item_subselect::ANY_SUBS:
|
2020-06-30 15:20:11 +02:00
|
|
|
DBUG_ASSERT(subselect->get_IN_subquery());
|
2011-05-04 18:08:58 +03:00
|
|
|
allany_subs= (Item_allany_subselect *)subselect;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2019-05-04 21:33:39 +03:00
|
|
|
/*
|
|
|
|
Try removing "ORDER BY" or even "ORDER BY ... LIMIT" from certain kinds
|
|
|
|
of subqueries. The removal might enable further transformations.
|
|
|
|
*/
|
|
|
|
if (substype == Item_subselect::IN_SUBS ||
|
|
|
|
substype == Item_subselect::EXISTS_SUBS ||
|
|
|
|
substype == Item_subselect::ANY_SUBS ||
|
|
|
|
substype == Item_subselect::ALL_SUBS)
|
|
|
|
{
|
|
|
|
// (1) - ORDER BY without LIMIT can be removed from IN/EXISTS subqueries
|
|
|
|
// (2) - for EXISTS, can also remove "ORDER BY ... LIMIT n",
|
|
|
|
// but cannot remove "ORDER BY ... LIMIT n OFFSET m"
|
2020-12-19 13:59:37 +02:00
|
|
|
if (!select_lex->limit_params.select_limit || // (1)
|
2019-05-04 21:33:39 +03:00
|
|
|
(substype == Item_subselect::EXISTS_SUBS && // (2)
|
2020-12-19 13:59:37 +02:00
|
|
|
!select_lex->limit_params.offset_limit)) // (2)
|
2019-05-04 21:33:39 +03:00
|
|
|
{
|
|
|
|
select_lex->join->order= 0;
|
|
|
|
select_lex->join->skip_sort_order= 1;
|
|
|
|
}
|
|
|
|
}
|
2010-02-16 00:53:06 +03:00
|
|
|
|
|
|
|
/* Resolve expressions and perform semantic analysis for IN query */
|
|
|
|
if (in_subs != NULL)
|
|
|
|
/*
|
|
|
|
TODO: Add the condition below to this if statement when we have proper
|
|
|
|
support for is_correlated handling for materialized semijoins.
|
|
|
|
If we were to add this condition now, the fix_fields() call in
|
|
|
|
convert_subq_to_sj() would force the flag is_correlated to be set
|
|
|
|
erroneously for prepared queries.
|
|
|
|
|
|
|
|
thd->stmt_arena->state != Query_arena::PREPARED)
|
|
|
|
*/
|
|
|
|
{
|
2015-04-23 19:11:06 +02:00
|
|
|
SELECT_LEX *current= thd->lex->current_select;
|
|
|
|
thd->lex->current_select= current->return_after_parsing();
|
|
|
|
char const *save_where= thd->where;
|
|
|
|
thd->where= "IN/ALL/ANY subquery";
|
|
|
|
|
2020-06-30 15:20:11 +02:00
|
|
|
Item **left= in_subs->left_exp_ptr();
|
|
|
|
bool failure= (*left)->fix_fields_if_needed(thd, left);
|
2015-04-23 19:11:06 +02:00
|
|
|
thd->lex->current_select= current;
|
|
|
|
thd->where= save_where;
|
|
|
|
if (failure)
|
|
|
|
DBUG_RETURN(-1); /* purecov: deadcode */
|
|
|
|
|
2020-06-30 15:20:11 +02:00
|
|
|
// fix_field above can rewrite left expression
|
|
|
|
uint ncols= (*left)->cols();
|
2010-02-16 00:53:06 +03:00
|
|
|
/*
|
|
|
|
Check if the left and right expressions have the same # of
|
|
|
|
columns, i.e. we don't have a case like
|
|
|
|
(oe1, oe2) IN (SELECT ie1, ie2, ie3 ...)
|
|
|
|
|
|
|
|
TODO why do we have this duplicated in IN->EXISTS transformers?
|
|
|
|
psergey-todo: fix these: grep for duplicated_subselect_card_check
|
|
|
|
*/
|
2020-06-30 15:20:11 +02:00
|
|
|
if (select_lex->item_list.elements != ncols)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2020-06-30 15:20:11 +02:00
|
|
|
my_error(ER_OPERAND_COLUMNS, MYF(0), ncols);
|
2010-02-16 00:53:06 +03:00
|
|
|
DBUG_RETURN(-1);
|
|
|
|
}
|
2023-10-13 12:17:25 +03:00
|
|
|
|
2023-11-08 12:59:00 +01:00
|
|
|
uint cols_num= in_subs->left_exp()->cols();
|
2023-10-13 12:17:25 +03:00
|
|
|
for (uint i= 0; i < cols_num; i++)
|
|
|
|
{
|
|
|
|
if (select_lex->ref_pointer_array[i]->
|
2023-11-08 12:59:00 +01:00
|
|
|
check_cols(in_subs->left_exp()->element_index(i)->cols()))
|
2023-10-13 12:17:25 +03:00
|
|
|
DBUG_RETURN(-1);
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
}
|
2010-09-05 18:43:47 +03:00
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
DBUG_PRINT("info", ("Checking if subq can be converted to semi-join"));
|
|
|
|
/*
|
|
|
|
Check if we're in subquery that is a candidate for flattening into a
|
|
|
|
semi-join (which is done in flatten_subqueries()). The
|
|
|
|
requirements are:
|
|
|
|
1. Subquery predicate is an IN/=ANY subq predicate
|
|
|
|
2. Subquery is a single SELECT (not a UNION)
|
|
|
|
3. Subquery does not have GROUP BY or ORDER BY
|
|
|
|
4. Subquery does not use aggregate functions or HAVING
|
|
|
|
5. Subquery predicate is at the AND-top-level of ON/WHERE clause
|
2022-07-11 16:57:37 -07:00
|
|
|
6. We are not in a subquery of a single-table UPDATE/DELETE that
|
|
|
|
does not allow conversion to multi-table UPDATE/DELETE
|
2010-02-16 00:53:06 +03:00
|
|
|
7. We're not in a table-less subquery like "SELECT 1"
|
|
|
|
8. No execution method was already chosen (by a prepared statement)
|
|
|
|
9. Parent select is not a table-less select
|
|
|
|
10. Neither parent nor child select have STRAIGHT_JOIN option.
|
2013-02-26 01:20:17 +02:00
|
|
|
11. It is first optimisation (the subquery could be moved from ON
|
2023-01-12 22:31:18 +02:00
|
|
|
clause during first optimisation and then be considered for SJ
|
|
|
|
on the second when it is too late)
|
2023-02-06 16:23:17 +03:00
|
|
|
|
|
|
|
There are also other requirements which cannot be checked at this phase,
|
|
|
|
yet. They are checked later in convert_join_subqueries_to_semijoins(),
|
|
|
|
look for calls to block_conversion_to_sj().
|
2010-02-16 00:53:06 +03:00
|
|
|
*/
|
|
|
|
if (optimizer_flag(thd, OPTIMIZER_SWITCH_SEMIJOIN) &&
|
|
|
|
in_subs && // 1
|
|
|
|
!select_lex->is_part_of_union() && // 2
|
|
|
|
!select_lex->group_list.elements && !join->order && // 3
|
|
|
|
!join->having && !select_lex->with_sum_func && // 4
|
2011-08-29 19:57:41 +04:00
|
|
|
in_subs->emb_on_expr_nest && // 5
|
2022-07-11 16:57:37 -07:00
|
|
|
!select_lex->is_sj_conversion_prohibited(thd) && // 6
|
2011-05-16 22:39:43 -07:00
|
|
|
parent_unit->first_select()->leaf_tables.elements && // 7
|
Fix MySQL BUG#12329653
In MariaDB, when running in ONLY_FULL_GROUP_BY mode,
the server produced in incorrect error message that there
is an aggregate function without GROUP BY, for artificially
created MIN/MAX functions during subquery MIN/MAX optimization.
The fix introduces a way to distinguish between artifially
created MIN/MAX functions as a result of a rewrite, and normal
ones present in the query. The test for ONLY_FULL_GROUP_BY violation
now tests in addition if a MIN/MAX function was part of a MIN/MAX
subquery rewrite.
In order to be able to distinguish these MIN/MAX functions, the
patch introduces an additional flag in Item_in_subselect::in_strategy -
SUBS_STRATEGY_CHOSEN. This flag is set when the optimizer makes its
final choice of a subuqery strategy. In order to make the choice
consistent, access to Item_in_subselect::in_strategy is provided
via new class methods.
******
Fix MySQL BUG#12329653
In MariaDB, when running in ONLY_FULL_GROUP_BY mode,
the server produced in incorrect error message that there
is an aggregate function without GROUP BY, for artificially
created MIN/MAX functions during subquery MIN/MAX optimization.
The fix introduces a way to distinguish between artifially
created MIN/MAX functions as a result of a rewrite, and normal
ones present in the query. The test for ONLY_FULL_GROUP_BY violation
now tests in addition if a MIN/MAX function was part of a MIN/MAX
subquery rewrite.
In order to be able to distinguish these MIN/MAX functions, the
patch introduces an additional flag in Item_in_subselect::in_strategy -
SUBS_STRATEGY_CHOSEN. This flag is set when the optimizer makes its
final choice of a subuqery strategy. In order to make the choice
consistent, access to Item_in_subselect::in_strategy is provided
via new class methods.
2011-11-12 11:29:12 +02:00
|
|
|
!in_subs->has_strategy() && // 8
|
2019-03-14 17:41:35 -07:00
|
|
|
select_lex->outer_select()->table_list.first && // 9
|
2010-02-16 00:53:06 +03:00
|
|
|
!((join->select_options | // 10
|
|
|
|
select_lex->outer_select()->join->select_options) // 10
|
2013-02-26 01:20:17 +02:00
|
|
|
& SELECT_STRAIGHT_JOIN) && // 10
|
2023-02-06 16:23:17 +03:00
|
|
|
select_lex->first_cond_optimization) // 11
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
|
|
|
DBUG_PRINT("info", ("Subquery is semi-join conversion candidate"));
|
|
|
|
|
2021-12-09 20:13:35 +03:00
|
|
|
//(void)subquery_types_allow_materialization(thd, in_subs);
|
2010-02-16 00:53:06 +03:00
|
|
|
|
2010-05-23 23:13:18 +04:00
|
|
|
in_subs->is_flattenable_semijoin= TRUE;
|
2010-02-16 00:53:06 +03:00
|
|
|
|
|
|
|
/* Register the subquery for further processing in flatten_subqueries() */
|
2011-07-16 23:57:43 -07:00
|
|
|
if (!in_subs->is_registered_semijoin)
|
|
|
|
{
|
|
|
|
Query_arena *arena, backup;
|
|
|
|
arena= thd->activate_stmt_arena_if_needed(&backup);
|
2015-08-24 14:42:07 +03:00
|
|
|
select_lex->outer_select()->sj_subselects.push_back(in_subs,
|
|
|
|
thd->mem_root);
|
2011-07-16 23:57:43 -07:00
|
|
|
if (arena)
|
|
|
|
thd->restore_active_arena(arena, &backup);
|
|
|
|
in_subs->is_registered_semijoin= TRUE;
|
2021-11-19 12:17:14 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
Print the transformation into trace. Do it when we've just set
|
|
|
|
is_registered_semijoin=TRUE above, and also do it when we've already
|
|
|
|
had it set.
|
|
|
|
*/
|
|
|
|
if (in_subs->is_registered_semijoin)
|
|
|
|
{
|
2019-02-18 17:11:20 +05:30
|
|
|
OPT_TRACE_TRANSFORM(thd, trace_wrapper, trace_transform,
|
|
|
|
select_lex->select_number,
|
2019-02-13 11:22:16 +05:30
|
|
|
"IN (SELECT)", "semijoin");
|
2019-02-18 17:11:20 +05:30
|
|
|
trace_transform.add("chosen", true);
|
2011-07-16 23:57:43 -07:00
|
|
|
}
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2011-05-26 15:01:26 +04:00
|
|
|
DBUG_PRINT("info", ("Subquery can't be converted to merged semi-join"));
|
2010-10-05 16:00:31 +03:00
|
|
|
/* Test if the user has set a legal combination of optimizer switches. */
|
2020-07-02 19:03:39 +05:30
|
|
|
DBUG_ASSERT(optimizer_flag(thd, OPTIMIZER_SWITCH_IN_TO_EXISTS |
|
|
|
|
OPTIMIZER_SWITCH_MATERIALIZATION));
|
2015-04-23 19:04:11 +02:00
|
|
|
/*
|
|
|
|
Transform each subquery predicate according to its overloaded
|
|
|
|
transformer.
|
|
|
|
*/
|
|
|
|
if (subselect->select_transformer(join))
|
|
|
|
DBUG_RETURN(-1);
|
2010-09-30 18:32:44 +03:00
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
/*
|
2011-05-23 10:56:05 +03:00
|
|
|
If the subquery predicate is IN/=ANY, analyse and set all possible
|
|
|
|
subquery execution strategies based on optimizer switches and syntactic
|
|
|
|
properties.
|
|
|
|
*/
|
2011-11-21 16:56:32 +02:00
|
|
|
if (in_subs && !in_subs->has_strategy())
|
2010-09-30 18:32:44 +03:00
|
|
|
{
|
2022-07-11 16:57:37 -07:00
|
|
|
if (!select_lex->is_sj_conversion_prohibited(thd) &&
|
|
|
|
is_materialization_applicable(thd, in_subs, select_lex))
|
2011-10-01 00:10:03 +04:00
|
|
|
{
|
Fix MySQL BUG#12329653
In MariaDB, when running in ONLY_FULL_GROUP_BY mode,
the server produced in incorrect error message that there
is an aggregate function without GROUP BY, for artificially
created MIN/MAX functions during subquery MIN/MAX optimization.
The fix introduces a way to distinguish between artifially
created MIN/MAX functions as a result of a rewrite, and normal
ones present in the query. The test for ONLY_FULL_GROUP_BY violation
now tests in addition if a MIN/MAX function was part of a MIN/MAX
subquery rewrite.
In order to be able to distinguish these MIN/MAX functions, the
patch introduces an additional flag in Item_in_subselect::in_strategy -
SUBS_STRATEGY_CHOSEN. This flag is set when the optimizer makes its
final choice of a subuqery strategy. In order to make the choice
consistent, access to Item_in_subselect::in_strategy is provided
via new class methods.
******
Fix MySQL BUG#12329653
In MariaDB, when running in ONLY_FULL_GROUP_BY mode,
the server produced in incorrect error message that there
is an aggregate function without GROUP BY, for artificially
created MIN/MAX functions during subquery MIN/MAX optimization.
The fix introduces a way to distinguish between artifially
created MIN/MAX functions as a result of a rewrite, and normal
ones present in the query. The test for ONLY_FULL_GROUP_BY violation
now tests in addition if a MIN/MAX function was part of a MIN/MAX
subquery rewrite.
In order to be able to distinguish these MIN/MAX functions, the
patch introduces an additional flag in Item_in_subselect::in_strategy -
SUBS_STRATEGY_CHOSEN. This flag is set when the optimizer makes its
final choice of a subuqery strategy. In order to make the choice
consistent, access to Item_in_subselect::in_strategy is provided
via new class methods.
2011-11-12 11:29:12 +02:00
|
|
|
in_subs->add_strategy(SUBS_MATERIALIZATION);
|
2011-05-25 19:31:13 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
If the subquery is an AND-part of WHERE register for being processed
|
|
|
|
with jtbm strategy
|
|
|
|
*/
|
2011-08-29 19:57:41 +04:00
|
|
|
if (in_subs->emb_on_expr_nest == NO_JOIN_NEST &&
|
2011-05-25 19:31:13 +04:00
|
|
|
optimizer_flag(thd, OPTIMIZER_SWITCH_SEMIJOIN))
|
|
|
|
{
|
|
|
|
in_subs->is_flattenable_semijoin= FALSE;
|
2011-07-16 23:57:43 -07:00
|
|
|
if (!in_subs->is_registered_semijoin)
|
|
|
|
{
|
|
|
|
Query_arena *arena, backup;
|
|
|
|
arena= thd->activate_stmt_arena_if_needed(&backup);
|
2015-08-24 14:42:07 +03:00
|
|
|
select_lex->outer_select()->sj_subselects.push_back(in_subs,
|
|
|
|
thd->mem_root);
|
2011-07-16 23:57:43 -07:00
|
|
|
if (arena)
|
|
|
|
thd->restore_active_arena(arena, &backup);
|
|
|
|
in_subs->is_registered_semijoin= TRUE;
|
|
|
|
}
|
2011-05-25 19:31:13 +04:00
|
|
|
}
|
2010-09-30 18:32:44 +03:00
|
|
|
}
|
2010-02-16 00:53:06 +03:00
|
|
|
|
2011-05-23 10:56:05 +03:00
|
|
|
/*
|
|
|
|
IN-TO-EXISTS is the only universal strategy. Choose it if the user
|
|
|
|
allowed it via an optimizer switch, or if materialization is not
|
|
|
|
possible.
|
|
|
|
*/
|
|
|
|
if (optimizer_flag(thd, OPTIMIZER_SWITCH_IN_TO_EXISTS) ||
|
Fix MySQL BUG#12329653
In MariaDB, when running in ONLY_FULL_GROUP_BY mode,
the server produced in incorrect error message that there
is an aggregate function without GROUP BY, for artificially
created MIN/MAX functions during subquery MIN/MAX optimization.
The fix introduces a way to distinguish between artifially
created MIN/MAX functions as a result of a rewrite, and normal
ones present in the query. The test for ONLY_FULL_GROUP_BY violation
now tests in addition if a MIN/MAX function was part of a MIN/MAX
subquery rewrite.
In order to be able to distinguish these MIN/MAX functions, the
patch introduces an additional flag in Item_in_subselect::in_strategy -
SUBS_STRATEGY_CHOSEN. This flag is set when the optimizer makes its
final choice of a subuqery strategy. In order to make the choice
consistent, access to Item_in_subselect::in_strategy is provided
via new class methods.
******
Fix MySQL BUG#12329653
In MariaDB, when running in ONLY_FULL_GROUP_BY mode,
the server produced in incorrect error message that there
is an aggregate function without GROUP BY, for artificially
created MIN/MAX functions during subquery MIN/MAX optimization.
The fix introduces a way to distinguish between artifially
created MIN/MAX functions as a result of a rewrite, and normal
ones present in the query. The test for ONLY_FULL_GROUP_BY violation
now tests in addition if a MIN/MAX function was part of a MIN/MAX
subquery rewrite.
In order to be able to distinguish these MIN/MAX functions, the
patch introduces an additional flag in Item_in_subselect::in_strategy -
SUBS_STRATEGY_CHOSEN. This flag is set when the optimizer makes its
final choice of a subuqery strategy. In order to make the choice
consistent, access to Item_in_subselect::in_strategy is provided
via new class methods.
2011-11-12 11:29:12 +02:00
|
|
|
!in_subs->has_strategy())
|
|
|
|
in_subs->add_strategy(SUBS_IN_TO_EXISTS);
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
|
2011-05-04 18:08:58 +03:00
|
|
|
/* Check if max/min optimization applicable */
|
Fix MySQL BUG#12329653
In MariaDB, when running in ONLY_FULL_GROUP_BY mode,
the server produced in incorrect error message that there
is an aggregate function without GROUP BY, for artificially
created MIN/MAX functions during subquery MIN/MAX optimization.
The fix introduces a way to distinguish between artifially
created MIN/MAX functions as a result of a rewrite, and normal
ones present in the query. The test for ONLY_FULL_GROUP_BY violation
now tests in addition if a MIN/MAX function was part of a MIN/MAX
subquery rewrite.
In order to be able to distinguish these MIN/MAX functions, the
patch introduces an additional flag in Item_in_subselect::in_strategy -
SUBS_STRATEGY_CHOSEN. This flag is set when the optimizer makes its
final choice of a subuqery strategy. In order to make the choice
consistent, access to Item_in_subselect::in_strategy is provided
via new class methods.
******
Fix MySQL BUG#12329653
In MariaDB, when running in ONLY_FULL_GROUP_BY mode,
the server produced in incorrect error message that there
is an aggregate function without GROUP BY, for artificially
created MIN/MAX functions during subquery MIN/MAX optimization.
The fix introduces a way to distinguish between artifially
created MIN/MAX functions as a result of a rewrite, and normal
ones present in the query. The test for ONLY_FULL_GROUP_BY violation
now tests in addition if a MIN/MAX function was part of a MIN/MAX
subquery rewrite.
In order to be able to distinguish these MIN/MAX functions, the
patch introduces an additional flag in Item_in_subselect::in_strategy -
SUBS_STRATEGY_CHOSEN. This flag is set when the optimizer makes its
final choice of a subuqery strategy. In order to make the choice
consistent, access to Item_in_subselect::in_strategy is provided
via new class methods.
2011-11-12 11:29:12 +02:00
|
|
|
if (allany_subs && !allany_subs->is_set_strategy())
|
|
|
|
{
|
|
|
|
uchar strategy= (allany_subs->is_maxmin_applicable(join) ?
|
|
|
|
(SUBS_MAXMIN_INJECTED | SUBS_MAXMIN_ENGINE) :
|
|
|
|
SUBS_IN_TO_EXISTS);
|
|
|
|
allany_subs->add_strategy(strategy);
|
|
|
|
}
|
2011-05-04 18:08:58 +03:00
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
DBUG_RETURN(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
@brief Check if subquery's compared types allow materialization.
|
|
|
|
|
|
|
|
@param in_subs Subquery predicate, updated as follows:
|
|
|
|
types_allow_materialization TRUE if subquery materialization is allowed.
|
|
|
|
sjm_scan_allowed If types_allow_materialization is TRUE,
|
|
|
|
indicates whether it is possible to use subquery
|
|
|
|
materialization and scan the materialized table.
|
|
|
|
|
|
|
|
@retval TRUE If subquery types allow materialization.
|
|
|
|
@retval FALSE Otherwise.
|
|
|
|
|
|
|
|
@details
|
|
|
|
This is a temporary fix for BUG#36752.
|
|
|
|
|
|
|
|
There are two subquery materialization strategies:
|
|
|
|
|
|
|
|
1. Materialize and do index lookups in the materialized table. See
|
|
|
|
BUG#36752 for description of restrictions we need to put on the
|
|
|
|
compared expressions.
|
|
|
|
|
|
|
|
2. Materialize and then do a full scan of the materialized table. At the
|
|
|
|
moment, this strategy's applicability criteria are even stricter than
|
|
|
|
in #1.
|
|
|
|
|
|
|
|
This is so because of the following: consider an uncorrelated subquery
|
|
|
|
|
|
|
|
...WHERE (ot1.col1, ot2.col2 ...) IN (SELECT ie1,ie2,... FROM it1 ...)
|
|
|
|
|
|
|
|
and a join order that could be used to do sjm-materialization:
|
|
|
|
|
|
|
|
SJM-Scan(it1, it1), ot1, ot2
|
|
|
|
|
|
|
|
IN-equalities will be parts of conditions attached to the outer tables:
|
|
|
|
|
|
|
|
ot1: ot1.col1 = ie1 AND ... (C1)
|
|
|
|
ot2: ot1.col2 = ie2 AND ... (C2)
|
|
|
|
|
|
|
|
besides those there may be additional references to ie1 and ie2
|
|
|
|
generated by equality propagation. The problem with evaluating C1 and
|
|
|
|
C2 is that ie{1,2} refer to subquery tables' columns, while we only have
|
|
|
|
current value of materialization temptable. Our solution is to
|
|
|
|
* require that all ie{N} are table column references. This allows
|
|
|
|
to copy the values of materialization temptable columns to the
|
|
|
|
original table's columns (see setup_sj_materialization for more
|
|
|
|
details)
|
|
|
|
* require that compared columns have exactly the same type. This is
|
|
|
|
a temporary measure to avoid BUG#36752-type problems.
|
2018-07-25 14:20:16 +05:30
|
|
|
|
|
|
|
JOIN_TAB::keyuse_is_valid_for_access_in_chosen_plan expects that for Semi Join Materialization
|
|
|
|
Scan all the items in the select list of the IN Subquery are of the type Item::FIELD_ITEM.
|
2010-02-16 00:53:06 +03:00
|
|
|
*/
|
|
|
|
|
|
|
|
static
|
2019-02-13 11:22:16 +05:30
|
|
|
bool subquery_types_allow_materialization(THD* thd, Item_in_subselect *in_subs)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2020-06-30 15:20:11 +02:00
|
|
|
Item *left_exp= in_subs->left_exp();
|
2010-02-16 00:53:06 +03:00
|
|
|
DBUG_ENTER("subquery_types_allow_materialization");
|
|
|
|
|
2020-08-14 19:51:10 +03:00
|
|
|
DBUG_ASSERT(left_exp->fixed());
|
2010-02-16 00:53:06 +03:00
|
|
|
|
|
|
|
List_iterator<Item> it(in_subs->unit->first_select()->item_list);
|
|
|
|
uint elements= in_subs->unit->first_select()->item_list.elements;
|
2019-02-13 11:22:16 +05:30
|
|
|
const char* cause= NULL;
|
2010-02-16 00:53:06 +03:00
|
|
|
|
|
|
|
in_subs->types_allow_materialization= FALSE; // Assign default values
|
|
|
|
in_subs->sjm_scan_allowed= FALSE;
|
2019-02-13 11:22:16 +05:30
|
|
|
|
2019-02-18 17:11:20 +05:30
|
|
|
OPT_TRACE_TRANSFORM(thd, trace_wrapper, trace_transform,
|
2019-02-13 11:22:16 +05:30
|
|
|
in_subs->get_select_lex()->select_number,
|
|
|
|
"IN (SELECT)", "materialization");
|
2019-12-13 17:30:37 +02:00
|
|
|
|
2019-12-04 20:04:45 +05:30
|
|
|
/*
|
|
|
|
The checks here must be kept in sync with the one in
|
|
|
|
Item_func_in::in_predicate_to_in_subs_transformer().
|
|
|
|
*/
|
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
bool all_are_fields= TRUE;
|
2016-02-08 22:53:40 +02:00
|
|
|
uint32 total_key_length = 0;
|
2020-11-27 22:06:54 +05:30
|
|
|
bool converted_from_in_predicate= in_subs->converted_from_in_predicate;
|
2010-02-16 00:53:06 +03:00
|
|
|
for (uint i= 0; i < elements; i++)
|
|
|
|
{
|
2020-06-30 15:20:11 +02:00
|
|
|
Item *outer= left_exp->element_index(i);
|
2010-02-16 00:53:06 +03:00
|
|
|
Item *inner= it++;
|
|
|
|
all_are_fields &= (outer->real_item()->type() == Item::FIELD_ITEM &&
|
|
|
|
inner->real_item()->type() == Item::FIELD_ITEM);
|
2016-02-08 22:53:40 +02:00
|
|
|
total_key_length += inner->max_length;
|
2020-11-27 22:06:54 +05:30
|
|
|
if (!inner->
|
|
|
|
type_handler()->
|
|
|
|
subquery_type_allows_materialization(inner,
|
|
|
|
outer,
|
|
|
|
converted_from_in_predicate))
|
2019-02-13 11:22:16 +05:30
|
|
|
{
|
2022-01-20 15:49:01 +02:00
|
|
|
if (unlikely(trace_transform.trace_started()))
|
|
|
|
trace_transform.
|
|
|
|
add("possible", false).
|
|
|
|
add("cause", "types mismatch");
|
2010-02-16 00:53:06 +03:00
|
|
|
DBUG_RETURN(FALSE);
|
2019-02-13 11:22:16 +05:30
|
|
|
}
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
2011-05-28 05:11:32 +03:00
|
|
|
|
2016-02-08 22:53:40 +02:00
|
|
|
/*
|
|
|
|
Make sure that create_tmp_table will not fail due to too long keys.
|
|
|
|
See MDEV-7122. This check is performed inside create_tmp_table also and
|
|
|
|
we must do it so that we know the table has keys created.
|
2018-03-06 19:59:57 +05:30
|
|
|
Make sure that the length of the key for the temp_table is atleast
|
|
|
|
greater than 0.
|
2016-02-08 22:53:40 +02:00
|
|
|
*/
|
2019-02-13 11:22:16 +05:30
|
|
|
if (!total_key_length)
|
|
|
|
cause= "zero length key for materialized table";
|
|
|
|
else if (total_key_length > tmp_table_max_key_length())
|
|
|
|
cause= "length of key greater than allowed key length for materialized tables";
|
|
|
|
else if (elements > tmp_table_max_key_parts())
|
|
|
|
cause= "#keyparts greater than allowed key parts for materialized tables";
|
|
|
|
else
|
|
|
|
{
|
|
|
|
in_subs->types_allow_materialization= TRUE;
|
|
|
|
in_subs->sjm_scan_allowed= all_are_fields;
|
2022-01-20 15:49:01 +02:00
|
|
|
if (unlikely(trace_transform.trace_started()))
|
|
|
|
trace_transform.
|
|
|
|
add("sjm_scan_allowed", all_are_fields).
|
|
|
|
add("possible", true);
|
2019-02-13 11:22:16 +05:30
|
|
|
DBUG_PRINT("info",("subquery_types_allow_materialization: ok, allowed"));
|
|
|
|
DBUG_RETURN(TRUE);
|
|
|
|
}
|
2019-02-18 17:11:20 +05:30
|
|
|
trace_transform.add("possible", false).add("cause", cause);
|
2019-02-13 11:22:16 +05:30
|
|
|
DBUG_RETURN(FALSE);
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-05-04 18:08:58 +03:00
|
|
|
/**
|
|
|
|
Apply max min optimization of all/any subselect
|
|
|
|
*/
|
|
|
|
|
2011-05-12 00:14:15 +03:00
|
|
|
bool JOIN::transform_max_min_subquery()
|
2011-05-04 18:08:58 +03:00
|
|
|
{
|
2011-05-12 00:14:15 +03:00
|
|
|
DBUG_ENTER("JOIN::transform_max_min_subquery");
|
|
|
|
Item_subselect *subselect= unit->item;
|
2011-05-04 18:08:58 +03:00
|
|
|
if (!subselect || (subselect->substype() != Item_subselect::ALL_SUBS &&
|
|
|
|
subselect->substype() != Item_subselect::ANY_SUBS))
|
|
|
|
DBUG_RETURN(0);
|
2011-05-12 00:14:15 +03:00
|
|
|
DBUG_RETURN(((Item_allany_subselect *) subselect)->
|
|
|
|
transform_into_max_min(this));
|
2011-05-04 18:08:58 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-05-23 23:13:18 +04:00
|
|
|
/*
|
|
|
|
Finalize IN->EXISTS conversion in case we couldn't use materialization.
|
|
|
|
|
|
|
|
DESCRIPTION Invoke the IN->EXISTS converter
|
|
|
|
Replace the Item_in_subselect with its wrapper Item_in_optimizer in WHERE.
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
FALSE - Ok
|
|
|
|
TRUE - Fatal error
|
|
|
|
*/
|
|
|
|
|
|
|
|
bool make_in_exists_conversion(THD *thd, JOIN *join, Item_in_subselect *item)
|
2010-04-06 00:16:45 +04:00
|
|
|
{
|
|
|
|
DBUG_ENTER("make_in_exists_conversion");
|
|
|
|
JOIN *child_join= item->unit->first_select()->join;
|
2011-05-25 19:31:13 +04:00
|
|
|
bool res;
|
2011-04-05 14:33:15 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
We're going to finalize IN->EXISTS conversion.
|
|
|
|
Normally, IN->EXISTS conversion takes place inside the
|
2020-08-14 19:51:10 +03:00
|
|
|
Item_subselect::fix_fields() call, where item_subselect->fixed()==FALSE (as
|
2011-04-05 14:33:15 +04:00
|
|
|
fix_fields() haven't finished yet) and item_subselect->changed==FALSE (as
|
|
|
|
the conversion haven't been finalized)
|
|
|
|
|
|
|
|
At the end of Item_subselect::fix_fields() we had to set fixed=TRUE,
|
|
|
|
changed=TRUE (the only other option would have been to return error).
|
|
|
|
|
|
|
|
So, now we have to set these back for the duration of select_transformer()
|
|
|
|
call.
|
|
|
|
*/
|
2010-04-06 00:16:45 +04:00
|
|
|
item->changed= 0;
|
2020-09-02 03:13:32 +03:00
|
|
|
item->base_flags|= item_base_t::FIXED;
|
2010-04-06 00:16:45 +04:00
|
|
|
|
|
|
|
SELECT_LEX *save_select_lex= thd->lex->current_select;
|
|
|
|
thd->lex->current_select= item->unit->first_select();
|
|
|
|
|
|
|
|
res= item->select_transformer(child_join);
|
|
|
|
|
|
|
|
thd->lex->current_select= save_select_lex;
|
|
|
|
|
2011-05-25 19:31:13 +04:00
|
|
|
if (res)
|
2010-04-06 00:16:45 +04:00
|
|
|
DBUG_RETURN(TRUE);
|
|
|
|
|
|
|
|
item->changed= 1;
|
2020-08-02 12:31:14 +03:00
|
|
|
DBUG_ASSERT(item->fixed());
|
2010-04-06 00:16:45 +04:00
|
|
|
|
|
|
|
Item *substitute= item->substitution;
|
2020-08-14 19:51:10 +03:00
|
|
|
bool do_fix_fields= !item->substitution->fixed();
|
2010-04-06 00:16:45 +04:00
|
|
|
/*
|
2010-05-23 23:13:18 +04:00
|
|
|
The Item_subselect has already been wrapped with Item_in_optimizer, so we
|
|
|
|
should search for item->optimizer, not 'item'.
|
2010-04-06 00:16:45 +04:00
|
|
|
*/
|
2010-05-23 23:13:18 +04:00
|
|
|
Item *replace_me= item->optimizer;
|
|
|
|
DBUG_ASSERT(replace_me==substitute);
|
2010-04-06 00:16:45 +04:00
|
|
|
|
2011-03-29 14:33:59 +04:00
|
|
|
Item **tree= (item->emb_on_expr_nest == NO_JOIN_NEST)?
|
2010-05-23 23:13:18 +04:00
|
|
|
&join->conds : &(item->emb_on_expr_nest->on_expr);
|
2010-04-06 00:16:45 +04:00
|
|
|
if (replace_where_subcondition(join, tree, replace_me, substitute,
|
|
|
|
do_fix_fields))
|
|
|
|
DBUG_RETURN(TRUE);
|
|
|
|
item->substitution= NULL;
|
|
|
|
|
2011-04-05 14:33:15 +04:00
|
|
|
/*
|
|
|
|
If this is a prepared statement, repeat the above operation for
|
|
|
|
prep_where (or prep_on_expr).
|
|
|
|
*/
|
2010-04-06 00:16:45 +04:00
|
|
|
if (!thd->stmt_arena->is_conventional())
|
|
|
|
{
|
2011-03-29 14:33:59 +04:00
|
|
|
tree= (item->emb_on_expr_nest == (TABLE_LIST*)NO_JOIN_NEST)?
|
2010-04-06 00:16:45 +04:00
|
|
|
&join->select_lex->prep_where :
|
|
|
|
&(item->emb_on_expr_nest->prep_on_expr);
|
|
|
|
|
|
|
|
if (replace_where_subcondition(join, tree, replace_me, substitute,
|
|
|
|
FALSE))
|
|
|
|
DBUG_RETURN(TRUE);
|
|
|
|
}
|
|
|
|
DBUG_RETURN(FALSE);
|
|
|
|
}
|
2010-04-25 12:23:52 +04:00
|
|
|
|
|
|
|
|
2011-05-09 11:20:51 +01:00
|
|
|
bool check_for_outer_joins(List<TABLE_LIST> *join_list)
|
|
|
|
{
|
|
|
|
TABLE_LIST *table;
|
|
|
|
NESTED_JOIN *nested_join;
|
|
|
|
List_iterator<TABLE_LIST> li(*join_list);
|
|
|
|
while ((table= li++))
|
|
|
|
{
|
|
|
|
if ((nested_join= table->nested_join))
|
|
|
|
{
|
|
|
|
if (check_for_outer_joins(&nested_join->join_list))
|
|
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (table->outer_join)
|
|
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-05-09 00:41:45 -07:00
|
|
|
void find_and_block_conversion_to_sj(Item *to_find,
|
|
|
|
List_iterator_fast<Item_in_subselect> &li)
|
|
|
|
{
|
2018-03-22 03:01:53 +05:30
|
|
|
if (to_find->type() == Item::FUNC_ITEM &&
|
|
|
|
((Item_func*)to_find)->functype() == Item_func::IN_OPTIMIZER_FUNC)
|
|
|
|
to_find= ((Item_in_optimizer*)to_find)->get_wrapped_in_subselect_item();
|
|
|
|
|
2017-05-09 00:41:45 -07:00
|
|
|
if (to_find->type() != Item::SUBSELECT_ITEM ||
|
|
|
|
((Item_subselect *) to_find)->substype() != Item_subselect::IN_SUBS)
|
|
|
|
return;
|
|
|
|
Item_in_subselect *in_subq;
|
|
|
|
li.rewind();
|
|
|
|
while ((in_subq= li++))
|
|
|
|
{
|
|
|
|
if (in_subq == to_find)
|
|
|
|
{
|
|
|
|
in_subq->block_conversion_to_sj();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
/*
|
|
|
|
Convert semi-join subquery predicates into semi-join join nests
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
convert_join_subqueries_to_semijoins()
|
|
|
|
|
|
|
|
DESCRIPTION
|
|
|
|
|
|
|
|
Convert candidate subquery predicates into semi-join join nests. This
|
|
|
|
transformation is performed once in query lifetime and is irreversible.
|
|
|
|
|
|
|
|
Conversion of one subquery predicate
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
We start with a join that has a semi-join subquery:
|
|
|
|
|
|
|
|
SELECT ...
|
|
|
|
FROM ot, ...
|
|
|
|
WHERE oe IN (SELECT ie FROM it1 ... itN WHERE subq_where) AND outer_where
|
|
|
|
|
|
|
|
and convert it into a semi-join nest:
|
|
|
|
|
|
|
|
SELECT ...
|
|
|
|
FROM ot SEMI JOIN (it1 ... itN), ...
|
|
|
|
WHERE outer_where AND subq_where AND oe=ie
|
|
|
|
|
|
|
|
that is, in order to do the conversion, we need to
|
|
|
|
|
|
|
|
* Create the "SEMI JOIN (it1 .. itN)" part and add it into the parent
|
|
|
|
query's FROM structure.
|
|
|
|
* Add "AND subq_where AND oe=ie" into parent query's WHERE (or ON if
|
|
|
|
the subquery predicate was in an ON expression)
|
|
|
|
* Remove the subquery predicate from the parent query's WHERE
|
|
|
|
|
|
|
|
Considerations when converting many predicates
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
A join may have at most MAX_TABLES tables. This may prevent us from
|
|
|
|
flattening all subqueries when the total number of tables in parent and
|
|
|
|
child selects exceeds MAX_TABLES.
|
|
|
|
We deal with this problem by flattening children's subqueries first and
|
|
|
|
then using a heuristic rule to determine each subquery predicate's
|
|
|
|
"priority".
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
FALSE OK
|
|
|
|
TRUE Error
|
|
|
|
*/
|
|
|
|
|
|
|
|
bool convert_join_subqueries_to_semijoins(JOIN *join)
|
|
|
|
{
|
|
|
|
Query_arena *arena, backup;
|
2011-07-16 23:57:43 -07:00
|
|
|
Item_in_subselect *in_subq;
|
2010-02-16 00:53:06 +03:00
|
|
|
THD *thd= join->thd;
|
|
|
|
DBUG_ENTER("convert_join_subqueries_to_semijoins");
|
|
|
|
|
2011-07-16 23:57:43 -07:00
|
|
|
if (join->select_lex->sj_subselects.is_empty())
|
2010-02-16 00:53:06 +03:00
|
|
|
DBUG_RETURN(FALSE);
|
|
|
|
|
2011-07-16 23:57:43 -07:00
|
|
|
List_iterator_fast<Item_in_subselect> li(join->select_lex->sj_subselects);
|
|
|
|
|
|
|
|
while ((in_subq= li++))
|
2010-05-26 13:18:18 -07:00
|
|
|
{
|
2011-07-16 23:57:43 -07:00
|
|
|
SELECT_LEX *subq_sel= in_subq->get_select_lex();
|
2010-05-26 13:18:18 -07:00
|
|
|
if (subq_sel->handle_derived(thd->lex, DT_MERGE))
|
|
|
|
DBUG_RETURN(TRUE);
|
2017-09-02 23:19:20 +02:00
|
|
|
if (subq_sel->join->transform_in_predicates_into_in_subq(thd))
|
2017-08-29 02:32:39 +02:00
|
|
|
DBUG_RETURN(TRUE);
|
2010-05-26 13:18:18 -07:00
|
|
|
subq_sel->update_used_tables();
|
|
|
|
}
|
|
|
|
|
2017-05-09 00:41:45 -07:00
|
|
|
/*
|
|
|
|
Check all candidates to semi-join conversion that occur
|
|
|
|
in ON expressions of outer join. Set the flag blocking
|
|
|
|
this conversion for them.
|
|
|
|
*/
|
|
|
|
TABLE_LIST *tbl;
|
|
|
|
List_iterator<TABLE_LIST> ti(join->select_lex->leaf_tables);
|
|
|
|
while ((tbl= ti++))
|
|
|
|
{
|
|
|
|
TABLE_LIST *embedded;
|
|
|
|
TABLE_LIST *embedding= tbl;
|
|
|
|
do
|
|
|
|
{
|
|
|
|
embedded= embedding;
|
2017-05-17 14:29:13 -07:00
|
|
|
bool block_conversion_to_sj= false;
|
|
|
|
if (embedded->on_expr)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
Conversion of an IN subquery predicate into semi-join
|
|
|
|
is blocked now if the predicate occurs:
|
|
|
|
- in the ON expression of an outer join
|
|
|
|
- in the ON expression of an inner join embedded directly
|
|
|
|
or indirectly in the inner nest of an outer join
|
|
|
|
*/
|
|
|
|
for (TABLE_LIST *tl= embedded; tl; tl= tl->embedding)
|
|
|
|
{
|
|
|
|
if (tl->outer_join)
|
|
|
|
{
|
|
|
|
block_conversion_to_sj= true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (block_conversion_to_sj)
|
2017-05-09 00:41:45 -07:00
|
|
|
{
|
|
|
|
Item *cond= embedded->on_expr;
|
|
|
|
if (!cond)
|
|
|
|
;
|
|
|
|
else if (cond->type() != Item::COND_ITEM)
|
|
|
|
find_and_block_conversion_to_sj(cond, li);
|
|
|
|
else if (((Item_cond*) cond)->functype() ==
|
|
|
|
Item_func::COND_AND_FUNC)
|
|
|
|
{
|
|
|
|
Item *item;
|
|
|
|
List_iterator<Item> it(*(((Item_cond*) cond)->argument_list()));
|
|
|
|
while ((item= it++))
|
|
|
|
{
|
|
|
|
find_and_block_conversion_to_sj(item, li);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
embedding= embedded->embedding;
|
|
|
|
}
|
|
|
|
while (embedding &&
|
|
|
|
embedding->nested_join->join_list.head() == embedded);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
Block conversion to semi-joins for those candidates that
|
|
|
|
are encountered in the WHERE condition of the multi-table view
|
|
|
|
with CHECK OPTION if this view is used in UPDATE/DELETE.
|
|
|
|
(This limitation can be, probably, easily lifted.)
|
|
|
|
*/
|
|
|
|
li.rewind();
|
|
|
|
while ((in_subq= li++))
|
|
|
|
{
|
|
|
|
if (in_subq->emb_on_expr_nest != NO_JOIN_NEST &&
|
|
|
|
in_subq->emb_on_expr_nest->effective_with_check)
|
|
|
|
{
|
|
|
|
in_subq->block_conversion_to_sj();
|
|
|
|
}
|
|
|
|
}
|
2017-05-17 16:16:54 -07:00
|
|
|
|
2023-02-06 16:23:17 +03:00
|
|
|
/*
|
|
|
|
Compute join->not_usable_rowid_map.
|
|
|
|
The idea is:
|
|
|
|
- DuplicateWeedout strategy requires that one is able to get the rowid
|
|
|
|
(call h->position()) for tables in the parent select. Obtained Rowid
|
|
|
|
values must be stable across table scans.
|
|
|
|
= Rowids are typically available. The only known exception is federatedx
|
|
|
|
tables.
|
|
|
|
- The optimizer requires that DuplicateWeedout strategy is always
|
|
|
|
applicable. It is the only strategy that is applicable for any join
|
|
|
|
order. The optimizer is not prepared for the situation where it has
|
|
|
|
constructed a join order and then it turns out that there's no semi-join
|
|
|
|
strategy that can be used for it.
|
|
|
|
|
|
|
|
Because of the above, we will not use semi-joins if the parent select has
|
|
|
|
tables which do not support rowids.
|
|
|
|
*/
|
|
|
|
{
|
|
|
|
List_iterator_fast<TABLE_LIST> li(join->select_lex->leaf_tables);
|
|
|
|
TABLE_LIST *tbl;
|
|
|
|
while ((tbl = li++))
|
|
|
|
{
|
|
|
|
TABLE *table= tbl->table;
|
|
|
|
if (table && table->file->ha_table_flags() & HA_NON_COMPARABLE_ROWID)
|
|
|
|
join->not_usable_rowid_map|= table->map;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (join->select_options & SELECT_STRAIGHT_JOIN ||
|
|
|
|
join->not_usable_rowid_map != 0)
|
2017-05-17 16:16:54 -07:00
|
|
|
{
|
|
|
|
/* Block conversion to semijoins for all candidates */
|
|
|
|
li.rewind();
|
|
|
|
while ((in_subq= li++))
|
|
|
|
{
|
|
|
|
in_subq->block_conversion_to_sj();
|
|
|
|
}
|
|
|
|
}
|
2017-05-09 00:41:45 -07:00
|
|
|
|
2011-07-16 23:57:43 -07:00
|
|
|
li.rewind();
|
2010-02-16 00:53:06 +03:00
|
|
|
/* First, convert child join's subqueries. We proceed bottom-up here */
|
2011-07-16 23:57:43 -07:00
|
|
|
while ((in_subq= li++))
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2011-07-16 23:57:43 -07:00
|
|
|
st_select_lex *child_select= in_subq->get_select_lex();
|
2010-02-16 00:53:06 +03:00
|
|
|
JOIN *child_join= child_select->join;
|
2011-03-27 03:45:16 +04:00
|
|
|
child_join->outer_tables = child_join->table_count;
|
2010-02-16 00:53:06 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
child_select->where contains only the WHERE predicate of the
|
|
|
|
subquery itself here. We may be selecting from a VIEW, which has its
|
|
|
|
own predicate. The combined predicates are available in child_join->conds,
|
|
|
|
which was built by setup_conds() doing prepare_where() for all views.
|
|
|
|
*/
|
|
|
|
child_select->where= child_join->conds;
|
|
|
|
|
|
|
|
if (convert_join_subqueries_to_semijoins(child_join))
|
|
|
|
DBUG_RETURN(TRUE);
|
2017-05-09 00:41:45 -07:00
|
|
|
|
|
|
|
|
2011-07-16 23:57:43 -07:00
|
|
|
in_subq->sj_convert_priority=
|
2017-05-17 15:42:36 +03:00
|
|
|
MY_TEST(in_subq->do_not_convert_to_sj) * MAX_TABLES * 2 +
|
2011-07-16 23:57:43 -07:00
|
|
|
in_subq->is_correlated * MAX_TABLES + child_join->outer_tables;
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// Temporary measure: disable semi-joins when they are together with outer
|
|
|
|
// joins.
|
2011-06-28 00:51:26 +04:00
|
|
|
#if 0
|
2011-05-09 11:20:51 +01:00
|
|
|
if (check_for_outer_joins(join->join_list))
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2011-07-16 23:57:43 -07:00
|
|
|
in_subq= join->select_lex->sj_subselects.head();
|
2011-05-09 11:20:51 +01:00
|
|
|
arena= thd->activate_stmt_arena_if_needed(&backup);
|
|
|
|
goto skip_conversion;
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
2011-06-28 00:51:26 +04:00
|
|
|
#endif
|
2010-02-16 00:53:06 +03:00
|
|
|
//dump_TABLE_LIST_struct(select_lex, select_lex->leaf_tables);
|
|
|
|
/*
|
|
|
|
2. Pick which subqueries to convert:
|
|
|
|
sort the subquery array
|
|
|
|
- prefer correlated subqueries over uncorrelated;
|
|
|
|
- prefer subqueries that have greater number of outer tables;
|
|
|
|
*/
|
2011-07-16 23:57:43 -07:00
|
|
|
bubble_sort<Item_in_subselect>(&join->select_lex->sj_subselects,
|
|
|
|
subq_sj_candidate_cmp, NULL);
|
2010-02-16 00:53:06 +03:00
|
|
|
// #tables-in-parent-query + #tables-in-subquery < MAX_TABLES
|
|
|
|
/* Replace all subqueries to be flattened with Item_int(1) */
|
|
|
|
arena= thd->activate_stmt_arena_if_needed(&backup);
|
|
|
|
|
2011-07-16 23:57:43 -07:00
|
|
|
li.rewind();
|
|
|
|
while ((in_subq= li++))
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2010-04-06 00:16:45 +04:00
|
|
|
bool remove_item= TRUE;
|
2021-12-09 20:13:35 +03:00
|
|
|
subquery_types_allow_materialization(thd, in_subq);
|
2011-06-28 00:51:26 +04:00
|
|
|
|
|
|
|
/* Stop processing if we've reached a subquery that's attached to the ON clause */
|
2017-05-09 00:41:45 -07:00
|
|
|
if (in_subq->do_not_convert_to_sj)
|
2019-02-13 11:22:16 +05:30
|
|
|
{
|
2019-02-18 17:11:20 +05:30
|
|
|
OPT_TRACE_TRANSFORM(thd, trace_wrapper, trace_transform,
|
2019-02-13 11:22:16 +05:30
|
|
|
in_subq->get_select_lex()->select_number,
|
|
|
|
"IN (SELECT)", "semijoin");
|
2022-01-20 15:49:01 +02:00
|
|
|
if (unlikely(trace_transform.trace_started()))
|
|
|
|
trace_transform.
|
|
|
|
add("converted_to_semi_join", false).
|
|
|
|
add("cause", "subquery attached to the ON clause");
|
2011-06-28 00:51:26 +04:00
|
|
|
break;
|
2019-02-13 11:22:16 +05:30
|
|
|
}
|
2011-06-28 00:51:26 +04:00
|
|
|
|
2011-07-16 23:57:43 -07:00
|
|
|
if (in_subq->is_flattenable_semijoin)
|
2010-04-06 00:16:45 +04:00
|
|
|
{
|
2019-02-18 17:11:20 +05:30
|
|
|
OPT_TRACE_TRANSFORM(thd, trace_wrapper, trace_transform,
|
2019-02-13 11:22:16 +05:30
|
|
|
in_subq->get_select_lex()->select_number,
|
|
|
|
"IN (SELECT)", "semijoin");
|
2011-03-27 03:45:16 +04:00
|
|
|
if (join->table_count +
|
2011-07-16 23:57:43 -07:00
|
|
|
in_subq->unit->first_select()->join->table_count >= MAX_TABLES)
|
2019-02-13 11:22:16 +05:30
|
|
|
{
|
2022-01-20 15:49:01 +02:00
|
|
|
if (unlikely(trace_transform.trace_started()))
|
|
|
|
trace_transform.
|
|
|
|
add("converted_to_semi_join", false).
|
|
|
|
add("cause", "table in parent join now exceeds MAX_TABLES");
|
2011-03-22 00:39:27 +03:00
|
|
|
break;
|
2019-02-13 11:22:16 +05:30
|
|
|
}
|
2011-07-16 23:57:43 -07:00
|
|
|
if (convert_subq_to_sj(join, in_subq))
|
2011-10-04 02:20:06 +04:00
|
|
|
goto restore_arena_and_fail;
|
2019-02-18 17:11:20 +05:30
|
|
|
trace_transform.add("converted_to_semi_join", true);
|
2010-04-06 00:16:45 +04:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2011-03-27 03:45:16 +04:00
|
|
|
if (join->table_count + 1 >= MAX_TABLES)
|
2011-03-22 00:39:27 +03:00
|
|
|
break;
|
2011-07-16 23:57:43 -07:00
|
|
|
if (convert_subq_to_jtbm(join, in_subq, &remove_item))
|
2011-10-04 02:20:06 +04:00
|
|
|
goto restore_arena_and_fail;
|
2010-04-06 00:16:45 +04:00
|
|
|
}
|
|
|
|
if (remove_item)
|
|
|
|
{
|
2011-07-16 23:57:43 -07:00
|
|
|
Item **tree= (in_subq->emb_on_expr_nest == NO_JOIN_NEST)?
|
|
|
|
&join->conds : &(in_subq->emb_on_expr_nest->on_expr);
|
|
|
|
Item *replace_me= in_subq->original_item();
|
2015-08-11 11:18:38 +04:00
|
|
|
if (replace_where_subcondition(join, tree, replace_me,
|
2015-08-20 15:24:13 +03:00
|
|
|
new (thd->mem_root) Item_int(thd, 1),
|
2010-04-06 00:16:45 +04:00
|
|
|
FALSE))
|
2011-10-04 02:20:06 +04:00
|
|
|
goto restore_arena_and_fail;
|
2010-04-06 00:16:45 +04:00
|
|
|
}
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
2011-06-28 00:51:26 +04:00
|
|
|
//skip_conversion:
|
2010-02-16 00:53:06 +03:00
|
|
|
/*
|
|
|
|
3. Finalize (perform IN->EXISTS rewrite) the subqueries that we didn't
|
|
|
|
convert:
|
|
|
|
*/
|
2011-07-16 23:57:43 -07:00
|
|
|
while (in_subq)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2011-07-16 23:57:43 -07:00
|
|
|
JOIN *child_join= in_subq->unit->first_select()->join;
|
|
|
|
in_subq->changed= 0;
|
2020-09-02 03:13:32 +03:00
|
|
|
in_subq->base_flags|= item_base_t::FIXED;
|
2010-02-16 00:53:06 +03:00
|
|
|
|
|
|
|
SELECT_LEX *save_select_lex= thd->lex->current_select;
|
2011-07-16 23:57:43 -07:00
|
|
|
thd->lex->current_select= in_subq->unit->first_select();
|
2010-02-16 00:53:06 +03:00
|
|
|
|
2011-07-16 23:57:43 -07:00
|
|
|
bool res= in_subq->select_transformer(child_join);
|
2010-02-16 00:53:06 +03:00
|
|
|
|
|
|
|
thd->lex->current_select= save_select_lex;
|
|
|
|
|
2010-12-15 12:54:25 +02:00
|
|
|
if (res)
|
2010-02-16 00:53:06 +03:00
|
|
|
DBUG_RETURN(TRUE);
|
|
|
|
|
2011-07-16 23:57:43 -07:00
|
|
|
in_subq->changed= 1;
|
2020-08-02 12:31:14 +03:00
|
|
|
DBUG_ASSERT(in_subq->fixed());
|
2010-02-16 00:53:06 +03:00
|
|
|
|
2011-07-16 23:57:43 -07:00
|
|
|
Item *substitute= in_subq->substitution;
|
2020-08-14 19:51:10 +03:00
|
|
|
bool do_fix_fields= !in_subq->substitution->fixed();
|
2011-07-16 23:57:43 -07:00
|
|
|
Item **tree= (in_subq->emb_on_expr_nest == NO_JOIN_NEST)?
|
|
|
|
&join->conds : &(in_subq->emb_on_expr_nest->on_expr);
|
|
|
|
Item *replace_me= in_subq->original_item();
|
2010-04-06 00:16:45 +04:00
|
|
|
if (replace_where_subcondition(join, tree, replace_me, substitute,
|
2010-02-16 00:53:06 +03:00
|
|
|
do_fix_fields))
|
|
|
|
DBUG_RETURN(TRUE);
|
2011-07-16 23:57:43 -07:00
|
|
|
in_subq->substitution= NULL;
|
2011-04-05 14:33:15 +04:00
|
|
|
/*
|
|
|
|
If this is a prepared statement, repeat the above operation for
|
|
|
|
prep_where (or prep_on_expr). Subquery-to-semijoin conversion is
|
|
|
|
done once for prepared statement.
|
|
|
|
*/
|
2010-02-16 00:53:06 +03:00
|
|
|
if (!thd->stmt_arena->is_conventional())
|
|
|
|
{
|
2011-07-16 23:57:43 -07:00
|
|
|
tree= (in_subq->emb_on_expr_nest == NO_JOIN_NEST)?
|
2010-02-16 00:53:06 +03:00
|
|
|
&join->select_lex->prep_where :
|
2011-07-16 23:57:43 -07:00
|
|
|
&(in_subq->emb_on_expr_nest->prep_on_expr);
|
2011-09-17 23:53:50 +04:00
|
|
|
/*
|
|
|
|
prep_on_expr/ prep_where may be NULL in some cases.
|
|
|
|
If that is the case, do nothing - simplify_joins() will copy
|
|
|
|
ON/WHERE expression into prep_on_expr/prep_where.
|
|
|
|
*/
|
|
|
|
if (*tree && replace_where_subcondition(join, tree, replace_me, substitute,
|
2010-02-16 00:53:06 +03:00
|
|
|
FALSE))
|
|
|
|
DBUG_RETURN(TRUE);
|
|
|
|
}
|
2010-09-17 13:17:27 +03:00
|
|
|
/*
|
|
|
|
Revert to the IN->EXISTS strategy in the rare case when the subquery could
|
2010-09-30 18:32:44 +03:00
|
|
|
not be flattened.
|
2010-09-17 13:17:27 +03:00
|
|
|
*/
|
Fix MySQL BUG#12329653
In MariaDB, when running in ONLY_FULL_GROUP_BY mode,
the server produced in incorrect error message that there
is an aggregate function without GROUP BY, for artificially
created MIN/MAX functions during subquery MIN/MAX optimization.
The fix introduces a way to distinguish between artifially
created MIN/MAX functions as a result of a rewrite, and normal
ones present in the query. The test for ONLY_FULL_GROUP_BY violation
now tests in addition if a MIN/MAX function was part of a MIN/MAX
subquery rewrite.
In order to be able to distinguish these MIN/MAX functions, the
patch introduces an additional flag in Item_in_subselect::in_strategy -
SUBS_STRATEGY_CHOSEN. This flag is set when the optimizer makes its
final choice of a subuqery strategy. In order to make the choice
consistent, access to Item_in_subselect::in_strategy is provided
via new class methods.
******
Fix MySQL BUG#12329653
In MariaDB, when running in ONLY_FULL_GROUP_BY mode,
the server produced in incorrect error message that there
is an aggregate function without GROUP BY, for artificially
created MIN/MAX functions during subquery MIN/MAX optimization.
The fix introduces a way to distinguish between artifially
created MIN/MAX functions as a result of a rewrite, and normal
ones present in the query. The test for ONLY_FULL_GROUP_BY violation
now tests in addition if a MIN/MAX function was part of a MIN/MAX
subquery rewrite.
In order to be able to distinguish these MIN/MAX functions, the
patch introduces an additional flag in Item_in_subselect::in_strategy -
SUBS_STRATEGY_CHOSEN. This flag is set when the optimizer makes its
final choice of a subuqery strategy. In order to make the choice
consistent, access to Item_in_subselect::in_strategy is provided
via new class methods.
2011-11-12 11:29:12 +02:00
|
|
|
in_subq->reset_strategy(SUBS_IN_TO_EXISTS);
|
2011-10-01 00:10:03 +04:00
|
|
|
if (is_materialization_applicable(thd, in_subq,
|
|
|
|
in_subq->unit->first_select()))
|
|
|
|
{
|
Fix MySQL BUG#12329653
In MariaDB, when running in ONLY_FULL_GROUP_BY mode,
the server produced in incorrect error message that there
is an aggregate function without GROUP BY, for artificially
created MIN/MAX functions during subquery MIN/MAX optimization.
The fix introduces a way to distinguish between artifially
created MIN/MAX functions as a result of a rewrite, and normal
ones present in the query. The test for ONLY_FULL_GROUP_BY violation
now tests in addition if a MIN/MAX function was part of a MIN/MAX
subquery rewrite.
In order to be able to distinguish these MIN/MAX functions, the
patch introduces an additional flag in Item_in_subselect::in_strategy -
SUBS_STRATEGY_CHOSEN. This flag is set when the optimizer makes its
final choice of a subuqery strategy. In order to make the choice
consistent, access to Item_in_subselect::in_strategy is provided
via new class methods.
******
Fix MySQL BUG#12329653
In MariaDB, when running in ONLY_FULL_GROUP_BY mode,
the server produced in incorrect error message that there
is an aggregate function without GROUP BY, for artificially
created MIN/MAX functions during subquery MIN/MAX optimization.
The fix introduces a way to distinguish between artifially
created MIN/MAX functions as a result of a rewrite, and normal
ones present in the query. The test for ONLY_FULL_GROUP_BY violation
now tests in addition if a MIN/MAX function was part of a MIN/MAX
subquery rewrite.
In order to be able to distinguish these MIN/MAX functions, the
patch introduces an additional flag in Item_in_subselect::in_strategy -
SUBS_STRATEGY_CHOSEN. This flag is set when the optimizer makes its
final choice of a subuqery strategy. In order to make the choice
consistent, access to Item_in_subselect::in_strategy is provided
via new class methods.
2011-11-12 11:29:12 +02:00
|
|
|
in_subq->add_strategy(SUBS_MATERIALIZATION);
|
2011-10-01 00:10:03 +04:00
|
|
|
}
|
|
|
|
|
2011-07-16 23:57:43 -07:00
|
|
|
in_subq= li++;
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
if (arena)
|
|
|
|
thd->restore_active_arena(arena, &backup);
|
2011-07-16 23:57:43 -07:00
|
|
|
join->select_lex->sj_subselects.empty();
|
2010-02-16 00:53:06 +03:00
|
|
|
DBUG_RETURN(FALSE);
|
2011-10-04 02:20:06 +04:00
|
|
|
|
|
|
|
restore_arena_and_fail:
|
|
|
|
if (arena)
|
|
|
|
thd->restore_active_arena(arena, &backup);
|
|
|
|
DBUG_RETURN(TRUE);
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
|
2010-04-06 00:16:45 +04:00
|
|
|
|
2010-05-23 23:13:18 +04:00
|
|
|
/*
|
|
|
|
Get #output_rows and scan_time estimates for a "delayed" table.
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
get_delayed_table_estimates()
|
|
|
|
table IN Table to get estimates for
|
|
|
|
out_rows OUT E(#rows in the table)
|
|
|
|
scan_time OUT E(scan_time).
|
|
|
|
startup_cost OUT cost to populate the table.
|
|
|
|
|
|
|
|
DESCRIPTION
|
|
|
|
Get #output_rows and scan_time estimates for a "delayed" table. By
|
|
|
|
"delayed" here we mean that the table is filled at the start of query
|
|
|
|
execution. This means that the optimizer can't use table statistics to
|
|
|
|
get #rows estimate for it, it has to call this function instead.
|
|
|
|
|
|
|
|
This function is expected to make different actions depending on the nature
|
|
|
|
of the table. At the moment there is only one kind of delayed tables,
|
|
|
|
non-flattenable semi-joins.
|
|
|
|
*/
|
|
|
|
|
|
|
|
void get_delayed_table_estimates(TABLE *table,
|
|
|
|
ha_rows *out_rows,
|
|
|
|
double *scan_time,
|
|
|
|
double *startup_cost)
|
2010-04-06 00:16:45 +04:00
|
|
|
{
|
2010-05-23 23:13:18 +04:00
|
|
|
Item_in_subselect *item= table->pos_in_table_list->jtbm_subselect;
|
2021-03-17 09:03:45 +04:00
|
|
|
Table_function_json_table *table_function=
|
|
|
|
table->pos_in_table_list->table_function;
|
2022-04-05 20:12:29 +03:00
|
|
|
handler *file= table->file;
|
2021-03-17 09:03:45 +04:00
|
|
|
|
|
|
|
if (table_function)
|
|
|
|
{
|
|
|
|
table_function->get_estimates(out_rows, scan_time, startup_cost);
|
|
|
|
return;
|
|
|
|
}
|
2011-05-09 10:35:55 +01:00
|
|
|
|
2010-04-06 00:16:45 +04:00
|
|
|
DBUG_ASSERT(item->engine->engine_type() ==
|
|
|
|
subselect_engine::HASH_SJ_ENGINE);
|
|
|
|
|
|
|
|
subselect_hash_sj_engine *hash_sj_engine=
|
|
|
|
((subselect_hash_sj_engine*)item->engine);
|
2011-03-22 13:09:55 +03:00
|
|
|
|
2011-05-26 15:01:26 +04:00
|
|
|
*out_rows= (ha_rows)item->jtbm_record_count;
|
|
|
|
*startup_cost= item->jtbm_read_time;
|
|
|
|
|
2010-04-06 00:16:45 +04:00
|
|
|
/* Calculate cost of scanning the temptable */
|
2019-05-27 19:08:00 -07:00
|
|
|
double data_size= COST_MULT(item->jtbm_record_count,
|
|
|
|
hash_sj_engine->tmp_table->s->reclength);
|
2022-04-05 20:12:29 +03:00
|
|
|
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
/* Do like in handler::ha_scan_and_compare_time, but ignore the where cost */
|
2022-09-30 17:10:37 +03:00
|
|
|
*scan_time= ((data_size/IO_SIZE * table->file->DISK_READ_COST *
|
|
|
|
table->file->DISK_READ_RATIO) +
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
*out_rows * file->ROW_COPY_COST);
|
2020-02-28 12:59:30 +02:00
|
|
|
}
|
2010-04-06 00:16:45 +04:00
|
|
|
|
2010-04-25 12:23:52 +04:00
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
/**
|
|
|
|
@brief Replaces an expression destructively inside the expression tree of
|
|
|
|
the WHERE clase.
|
|
|
|
|
2013-11-11 17:28:14 +02:00
|
|
|
@note We substitute AND/OR structure because it was copied by
|
|
|
|
copy_andor_structure and some changes could be done in the copy but
|
|
|
|
should be left permanent, also there could be several layers of AND over
|
|
|
|
AND and OR over OR because ::fix_field() possibly is not called.
|
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
@param join The top-level query.
|
|
|
|
@param old_cond The expression to be replaced.
|
|
|
|
@param new_cond The expression to be substituted.
|
|
|
|
@param do_fix_fields If true, Item::fix_fields(THD*, Item**) is called for
|
|
|
|
the new expression.
|
|
|
|
@return <code>true</code> if there was an error, <code>false</code> if
|
|
|
|
successful.
|
|
|
|
*/
|
2010-04-25 12:23:52 +04:00
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
static bool replace_where_subcondition(JOIN *join, Item **expr,
|
|
|
|
Item *old_cond, Item *new_cond,
|
|
|
|
bool do_fix_fields)
|
|
|
|
{
|
|
|
|
if (*expr == old_cond)
|
|
|
|
{
|
|
|
|
*expr= new_cond;
|
|
|
|
if (do_fix_fields)
|
|
|
|
new_cond->fix_fields(join->thd, expr);
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((*expr)->type() == Item::COND_ITEM)
|
|
|
|
{
|
|
|
|
List_iterator<Item> li(*((Item_cond*)(*expr))->argument_list());
|
|
|
|
Item *item;
|
|
|
|
while ((item= li++))
|
|
|
|
{
|
2013-11-11 17:28:14 +02:00
|
|
|
if (item == old_cond)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
|
|
|
li.replace(new_cond);
|
|
|
|
if (do_fix_fields)
|
|
|
|
new_cond->fix_fields(join->thd, li.ref());
|
|
|
|
return FALSE;
|
|
|
|
}
|
2013-11-11 17:28:14 +02:00
|
|
|
else if (item->type() == Item::COND_ITEM)
|
|
|
|
{
|
|
|
|
replace_where_subcondition(join, li.ref(),
|
|
|
|
old_cond, new_cond,
|
|
|
|
do_fix_fields);
|
|
|
|
}
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
}
|
2011-11-02 22:05:08 +04:00
|
|
|
/*
|
|
|
|
We can come to here when
|
|
|
|
- we're doing replace operations on both on_expr and prep_on_expr
|
|
|
|
- on_expr is the same as prep_on_expr, or they share a sub-tree
|
|
|
|
(so, when we do replace in on_expr, we replace in prep_on_expr, too,
|
|
|
|
and when we try doing a replace in prep_on_expr, the item we wanted
|
|
|
|
to replace there has already been replaced)
|
|
|
|
*/
|
|
|
|
return FALSE;
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
|
2011-07-16 23:57:43 -07:00
|
|
|
static int subq_sj_candidate_cmp(Item_in_subselect* el1, Item_in_subselect* el2,
|
|
|
|
void *arg)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2016-05-08 23:04:41 +03:00
|
|
|
return (el1->sj_convert_priority > el2->sj_convert_priority) ? -1 :
|
|
|
|
( (el1->sj_convert_priority == el2->sj_convert_priority)? 0 : 1);
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2018-07-25 21:17:50 +05:30
|
|
|
/**
|
|
|
|
@brief
|
|
|
|
reset the value of the field in_eqaulity_no for all Item_func_eq
|
|
|
|
items in the where clause of the subquery.
|
|
|
|
|
|
|
|
Look for in_equality_no description in Item_func_eq class
|
|
|
|
|
|
|
|
DESCRIPTION
|
|
|
|
Lets have an example:
|
|
|
|
SELECT t1.a FROM t1 WHERE t1.a IN
|
|
|
|
(SELECT t2.a FROM t2 where t2.b IN
|
|
|
|
(select t3.b from t3 where t3.c=27 ))
|
|
|
|
|
|
|
|
So for such a query we have the parent, child and
|
|
|
|
grandchild select.
|
|
|
|
|
|
|
|
So for the equality t2.b = t3.b we set the value for in_equality_no to
|
|
|
|
0 according to its description. Wewe do the same for t1.a = t2.a.
|
|
|
|
But when we look at the child select (with the grandchild select merged),
|
|
|
|
the query would be
|
|
|
|
|
|
|
|
SELECT t1.a FROM t1 WHERE t1.a IN
|
|
|
|
(SELECT t2.a FROM t2 where t2.b = t3.b and t3.c=27)
|
|
|
|
|
|
|
|
and then when the child select is merged into the parent select the query
|
|
|
|
would look like
|
|
|
|
|
|
|
|
SELECT t1.a FROM t1, semi-join-nest(t2,t3)
|
|
|
|
WHERE t1.a =t2.a and t2.b = t3.b and t3.c=27
|
|
|
|
|
|
|
|
Still we would have in_equality_no set for t2.b = t3.b
|
|
|
|
though it does not take part in the semi-join equality for the parent select,
|
|
|
|
so we should reset its value to UINT_MAX.
|
|
|
|
|
|
|
|
@param cond WHERE clause of the subquery
|
|
|
|
*/
|
|
|
|
|
|
|
|
static void reset_equality_number_for_subq_conds(Item * cond)
|
|
|
|
{
|
|
|
|
if (!cond)
|
|
|
|
return;
|
|
|
|
if (cond->type() == Item::COND_ITEM)
|
|
|
|
{
|
|
|
|
List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
|
|
|
|
Item *item;
|
|
|
|
while ((item=li++))
|
|
|
|
{
|
|
|
|
if (item->type() == Item::FUNC_ITEM &&
|
|
|
|
((Item_func*)item)->functype()== Item_func::EQ_FUNC)
|
|
|
|
((Item_func_eq*)item)->in_equality_no= UINT_MAX;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (cond->type() == Item::FUNC_ITEM &&
|
|
|
|
((Item_func*)cond)->functype()== Item_func::EQ_FUNC)
|
|
|
|
((Item_func_eq*)cond)->in_equality_no= UINT_MAX;
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
/*
|
|
|
|
Convert a subquery predicate into a TABLE_LIST semi-join nest
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
convert_subq_to_sj()
|
|
|
|
parent_join Parent join, the one that has subq_pred in its WHERE/ON
|
|
|
|
clause
|
|
|
|
subq_pred Subquery predicate to be converted
|
|
|
|
|
|
|
|
DESCRIPTION
|
|
|
|
Convert a subquery predicate into a TABLE_LIST semi-join nest. All the
|
|
|
|
prerequisites are already checked, so the conversion is always successfull.
|
|
|
|
|
|
|
|
Prepared Statements: the transformation is permanent:
|
|
|
|
- Changes in TABLE_LIST structures are naturally permanent
|
|
|
|
- Item tree changes are performed on statement MEM_ROOT:
|
|
|
|
= we activate statement MEM_ROOT
|
|
|
|
= this function is called before the first fix_prepare_information
|
|
|
|
call.
|
|
|
|
|
|
|
|
This is intended because the criteria for subquery-to-sj conversion remain
|
|
|
|
constant for the lifetime of the Prepared Statement.
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
FALSE OK
|
|
|
|
TRUE Out of memory error
|
|
|
|
*/
|
|
|
|
|
|
|
|
static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred)
|
|
|
|
{
|
|
|
|
SELECT_LEX *parent_lex= parent_join->select_lex;
|
|
|
|
TABLE_LIST *emb_tbl_nest= NULL;
|
2021-11-02 16:21:11 +04:00
|
|
|
TABLE_LIST *orig_tl;
|
2010-02-16 00:53:06 +03:00
|
|
|
List<TABLE_LIST> *emb_join_list= &parent_lex->top_join_list;
|
|
|
|
THD *thd= parent_join->thd;
|
2022-02-01 20:33:04 +01:00
|
|
|
SELECT_LEX *save_lex;
|
|
|
|
Item **left;
|
|
|
|
Item *left_exp;
|
|
|
|
Item *left_exp_orig;
|
|
|
|
|
|
|
|
uint ncols;
|
2010-02-16 00:53:06 +03:00
|
|
|
DBUG_ENTER("convert_subq_to_sj");
|
|
|
|
|
|
|
|
/*
|
|
|
|
1. Find out where to put the predicate into.
|
|
|
|
Note: for "t1 LEFT JOIN t2" this will be t2, a leaf.
|
|
|
|
*/
|
2011-03-29 14:33:59 +04:00
|
|
|
if ((void*)subq_pred->emb_on_expr_nest != (void*)NO_JOIN_NEST)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2011-03-29 14:33:59 +04:00
|
|
|
if (subq_pred->emb_on_expr_nest->nested_join)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
We're dealing with
|
|
|
|
|
|
|
|
... [LEFT] JOIN ( ... ) ON (subquery AND whatever) ...
|
|
|
|
|
|
|
|
The sj-nest will be inserted into the brackets nest.
|
|
|
|
*/
|
2011-03-29 14:33:59 +04:00
|
|
|
emb_tbl_nest= subq_pred->emb_on_expr_nest;
|
2010-02-16 00:53:06 +03:00
|
|
|
emb_join_list= &emb_tbl_nest->nested_join->join_list;
|
|
|
|
}
|
2011-03-29 14:33:59 +04:00
|
|
|
else if (!subq_pred->emb_on_expr_nest->outer_join)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
We're dealing with
|
|
|
|
|
|
|
|
... INNER JOIN tblX ON (subquery AND whatever) ...
|
|
|
|
|
|
|
|
The sj-nest will be tblX's "sibling", i.e. another child of its
|
|
|
|
parent. This is ok because tblX is joined as an inner join.
|
|
|
|
*/
|
2011-03-29 14:33:59 +04:00
|
|
|
emb_tbl_nest= subq_pred->emb_on_expr_nest->embedding;
|
2010-02-16 00:53:06 +03:00
|
|
|
if (emb_tbl_nest)
|
|
|
|
emb_join_list= &emb_tbl_nest->nested_join->join_list;
|
|
|
|
}
|
2011-03-29 14:33:59 +04:00
|
|
|
else if (!subq_pred->emb_on_expr_nest->nested_join)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2011-03-29 14:33:59 +04:00
|
|
|
TABLE_LIST *outer_tbl= subq_pred->emb_on_expr_nest;
|
2010-02-16 00:53:06 +03:00
|
|
|
TABLE_LIST *wrap_nest;
|
2023-04-26 15:27:01 +04:00
|
|
|
const Lex_ident_table sj_wrap_name= "(sj-wrap)"_Lex_ident_table;
|
2010-02-16 00:53:06 +03:00
|
|
|
/*
|
|
|
|
We're dealing with
|
|
|
|
|
|
|
|
... LEFT JOIN tbl ON (on_expr AND subq_pred) ...
|
|
|
|
|
|
|
|
we'll need to convert it into:
|
|
|
|
|
|
|
|
... LEFT JOIN ( tbl SJ (subq_tables) ) ON (on_expr AND subq_pred) ...
|
|
|
|
| |
|
|
|
|
|<----- wrap_nest ---->|
|
|
|
|
|
|
|
|
Q: other subqueries may be pointing to this element. What to do?
|
|
|
|
A1: simple solution: copy *subq_pred->expr_join_nest= *parent_nest.
|
|
|
|
But we'll need to fix other pointers.
|
|
|
|
A2: Another way: have TABLE_LIST::next_ptr so the following
|
|
|
|
subqueries know the table has been nested.
|
|
|
|
A3: changes in the TABLE_LIST::outer_join will make everything work
|
|
|
|
automatically.
|
|
|
|
*/
|
2015-08-24 14:42:07 +03:00
|
|
|
if (!(wrap_nest= alloc_join_nest(thd)))
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
|
|
|
DBUG_RETURN(TRUE);
|
|
|
|
}
|
|
|
|
wrap_nest->embedding= outer_tbl->embedding;
|
|
|
|
wrap_nest->join_list= outer_tbl->join_list;
|
2018-01-07 18:03:44 +02:00
|
|
|
wrap_nest->alias= sj_wrap_name;
|
2010-02-16 00:53:06 +03:00
|
|
|
|
|
|
|
wrap_nest->nested_join->join_list.empty();
|
2015-08-24 14:42:07 +03:00
|
|
|
wrap_nest->nested_join->join_list.push_back(outer_tbl, thd->mem_root);
|
2010-02-16 00:53:06 +03:00
|
|
|
|
|
|
|
outer_tbl->embedding= wrap_nest;
|
|
|
|
outer_tbl->join_list= &wrap_nest->nested_join->join_list;
|
|
|
|
|
|
|
|
/*
|
|
|
|
wrap_nest will take place of outer_tbl, so move the outer join flag
|
|
|
|
and on_expr
|
|
|
|
*/
|
|
|
|
wrap_nest->outer_join= outer_tbl->outer_join;
|
|
|
|
outer_tbl->outer_join= 0;
|
|
|
|
|
|
|
|
wrap_nest->on_expr= outer_tbl->on_expr;
|
|
|
|
outer_tbl->on_expr= NULL;
|
|
|
|
|
|
|
|
List_iterator<TABLE_LIST> li(*wrap_nest->join_list);
|
|
|
|
TABLE_LIST *tbl;
|
|
|
|
while ((tbl= li++))
|
|
|
|
{
|
|
|
|
if (tbl == outer_tbl)
|
|
|
|
{
|
|
|
|
li.replace(wrap_nest);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
Ok now wrap_nest 'contains' outer_tbl and we're ready to add the
|
|
|
|
semi-join nest into it
|
|
|
|
*/
|
|
|
|
emb_join_list= &wrap_nest->nested_join->join_list;
|
|
|
|
emb_tbl_nest= wrap_nest;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
TABLE_LIST *sj_nest;
|
|
|
|
NESTED_JOIN *nested_join;
|
2023-04-26 15:27:01 +04:00
|
|
|
const Lex_ident_table sj_nest_name= "(sj-nest)"_Lex_ident_table;
|
2015-08-24 14:42:07 +03:00
|
|
|
if (!(sj_nest= alloc_join_nest(thd)))
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
|
|
|
DBUG_RETURN(TRUE);
|
|
|
|
}
|
|
|
|
nested_join= sj_nest->nested_join;
|
|
|
|
|
|
|
|
sj_nest->join_list= emb_join_list;
|
|
|
|
sj_nest->embedding= emb_tbl_nest;
|
2018-01-07 18:03:44 +02:00
|
|
|
sj_nest->alias= sj_nest_name;
|
2010-02-16 00:53:06 +03:00
|
|
|
sj_nest->sj_subq_pred= subq_pred;
|
2012-04-02 21:41:54 +04:00
|
|
|
sj_nest->original_subq_pred_used_tables= subq_pred->used_tables() |
|
2020-06-30 15:20:11 +02:00
|
|
|
subq_pred->left_exp()->used_tables();
|
2010-02-16 00:53:06 +03:00
|
|
|
/* Nests do not participate in those 'chains', so: */
|
|
|
|
/* sj_nest->next_leaf= sj_nest->next_local= sj_nest->next_global == NULL*/
|
2015-08-24 14:42:07 +03:00
|
|
|
emb_join_list->push_back(sj_nest, thd->mem_root);
|
2010-02-16 00:53:06 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
nested_join->used_tables and nested_join->not_null_tables are
|
|
|
|
initialized in simplify_joins().
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
2. Walk through subquery's top list and set 'embedding' to point to the
|
|
|
|
sj-nest.
|
|
|
|
*/
|
|
|
|
st_select_lex *subq_lex= subq_pred->unit->first_select();
|
2017-02-18 17:47:31 +01:00
|
|
|
DBUG_ASSERT(subq_lex->next_select() == NULL);
|
2010-02-16 00:53:06 +03:00
|
|
|
nested_join->join_list.empty();
|
|
|
|
List_iterator_fast<TABLE_LIST> li(subq_lex->top_join_list);
|
2010-05-26 13:18:18 -07:00
|
|
|
TABLE_LIST *tl;
|
2010-02-16 00:53:06 +03:00
|
|
|
while ((tl= li++))
|
|
|
|
{
|
|
|
|
tl->embedding= sj_nest;
|
|
|
|
tl->join_list= &nested_join->join_list;
|
2015-08-24 14:42:07 +03:00
|
|
|
nested_join->join_list.push_back(tl, thd->mem_root);
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
Reconnect the next_leaf chain.
|
|
|
|
TODO: Do we have to put subquery's tables at the end of the chain?
|
|
|
|
Inserting them at the beginning would be a bit faster.
|
|
|
|
NOTE: We actually insert them at the front! That's because the order is
|
|
|
|
reversed in this list.
|
|
|
|
*/
|
2014-08-15 17:35:07 +02:00
|
|
|
parent_lex->leaf_tables.append(&subq_lex->leaf_tables);
|
2010-02-16 00:53:06 +03:00
|
|
|
|
2013-03-29 19:27:06 +04:00
|
|
|
if (subq_lex->options & OPTION_SCHEMA_TABLE)
|
|
|
|
parent_lex->options |= OPTION_SCHEMA_TABLE;
|
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
/*
|
|
|
|
Same as above for next_local chain
|
|
|
|
(a theory: a next_local chain always starts with ::leaf_tables
|
|
|
|
because view's tables are inserted after the view)
|
|
|
|
*/
|
2012-03-18 23:58:20 +04:00
|
|
|
|
2021-11-02 16:21:11 +04:00
|
|
|
for (orig_tl= (TABLE_LIST*)(parent_lex->table_list.first);
|
|
|
|
orig_tl->next_local;
|
|
|
|
orig_tl= orig_tl->next_local)
|
2012-03-18 23:58:20 +04:00
|
|
|
{}
|
|
|
|
|
2021-11-02 16:21:11 +04:00
|
|
|
orig_tl->next_local= subq_lex->join->tables_list;
|
2010-02-16 00:53:06 +03:00
|
|
|
|
|
|
|
/* A theory: no need to re-connect the next_global chain */
|
|
|
|
|
|
|
|
/* 3. Remove the original subquery predicate from the WHERE/ON */
|
|
|
|
|
2017-11-28 06:25:14 +04:00
|
|
|
/*TODO: also reset the 'm_with_subquery' there. */
|
2010-02-16 00:53:06 +03:00
|
|
|
|
2011-03-27 03:45:16 +04:00
|
|
|
/* n. Adjust the parent_join->table_count counter */
|
|
|
|
uint table_no= parent_join->table_count;
|
2010-02-16 00:53:06 +03:00
|
|
|
/* n. Walk through child's tables and adjust table->map */
|
2010-05-26 13:18:18 -07:00
|
|
|
List_iterator_fast<TABLE_LIST> si(subq_lex->leaf_tables);
|
|
|
|
while ((tl= si++))
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2011-12-07 01:03:00 +04:00
|
|
|
tl->set_tablenr(table_no);
|
2011-07-09 13:47:41 +04:00
|
|
|
if (tl->is_jtbm())
|
2016-07-27 17:01:45 +03:00
|
|
|
{
|
2011-12-07 01:03:00 +04:00
|
|
|
tl->jtbm_table_no= table_no;
|
2016-07-27 17:01:45 +03:00
|
|
|
Item *dummy= tl->jtbm_subselect;
|
2017-11-09 14:05:53 +04:00
|
|
|
tl->jtbm_subselect->fix_after_pullout(parent_lex, &dummy, true);
|
2016-07-27 17:01:45 +03:00
|
|
|
DBUG_ASSERT(dummy == tl->jtbm_subselect);
|
|
|
|
}
|
2021-03-17 09:03:45 +04:00
|
|
|
else if (tl->table_function)
|
|
|
|
{
|
|
|
|
tl->table_function->fix_after_pullout(tl, parent_lex, true);
|
|
|
|
}
|
2010-02-16 00:53:06 +03:00
|
|
|
SELECT_LEX *old_sl= tl->select_lex;
|
|
|
|
tl->select_lex= parent_join->select_lex;
|
|
|
|
for (TABLE_LIST *emb= tl->embedding;
|
|
|
|
emb && emb->select_lex == old_sl;
|
|
|
|
emb= emb->embedding)
|
|
|
|
emb->select_lex= parent_join->select_lex;
|
2010-05-26 13:18:18 -07:00
|
|
|
table_no++;
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
2011-03-27 03:45:16 +04:00
|
|
|
parent_join->table_count += subq_lex->join->table_count;
|
2011-06-30 20:49:11 +04:00
|
|
|
//parent_join->table_count += subq_lex->leaf_tables.elements;
|
2010-02-16 00:53:06 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
Put the subquery's WHERE into semi-join's sj_on_expr
|
|
|
|
Add the subquery-induced equalities too.
|
|
|
|
*/
|
2022-02-01 20:33:04 +01:00
|
|
|
save_lex= thd->lex->current_select;
|
2021-11-02 16:21:11 +04:00
|
|
|
table_map subq_pred_used_tables;
|
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
thd->lex->current_select=subq_lex;
|
2022-02-01 20:33:04 +01:00
|
|
|
left= subq_pred->left_exp_ptr();
|
2020-06-30 15:20:11 +02:00
|
|
|
if ((*left)->fix_fields_if_needed(thd, left))
|
2021-11-02 16:21:11 +04:00
|
|
|
goto restore_tl_and_exit;
|
2022-02-01 20:33:04 +01:00
|
|
|
left_exp= *left;
|
|
|
|
left_exp_orig= subq_pred->left_exp_orig();
|
2010-02-16 00:53:06 +03:00
|
|
|
thd->lex->current_select=save_lex;
|
|
|
|
|
2021-11-02 16:21:11 +04:00
|
|
|
subq_pred_used_tables= subq_pred->used_tables();
|
2015-05-13 16:17:22 +02:00
|
|
|
sj_nest->nested_join->sj_corr_tables= subq_pred_used_tables;
|
|
|
|
sj_nest->nested_join->sj_depends_on= subq_pred_used_tables |
|
2020-06-30 15:20:11 +02:00
|
|
|
left_exp->used_tables();
|
2010-10-18 12:55:26 +04:00
|
|
|
sj_nest->sj_on_expr= subq_lex->join->conds;
|
2010-02-16 00:53:06 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
Create the IN-equalities and inject them into semi-join's ON expression.
|
|
|
|
Additionally, for LooseScan strategy
|
|
|
|
- Record the number of IN-equalities.
|
|
|
|
- Create list of pointers to (oe1, ..., ieN). We'll need the list to
|
|
|
|
see which of the expressions are bound and which are not (for those
|
|
|
|
we'll produce a distinct stream of (ie_i1,...ie_ik).
|
|
|
|
|
|
|
|
(TODO: can we just create a list of pointers and hope the expressions
|
|
|
|
will not substitute themselves on fix_fields()? or we need to wrap
|
|
|
|
them into Item_direct_view_refs and store pointers to those. The
|
|
|
|
pointers to Item_direct_view_refs are guaranteed to be stable as
|
|
|
|
Item_direct_view_refs doesn't substitute itself with anything in
|
|
|
|
Item_direct_view_ref::fix_fields.
|
|
|
|
*/
|
2022-02-01 20:33:04 +01:00
|
|
|
ncols= sj_nest->sj_in_exprs= left_exp->cols();
|
2010-02-16 00:53:06 +03:00
|
|
|
sj_nest->nested_join->sj_outer_expr_list.empty();
|
2018-07-25 21:17:50 +05:30
|
|
|
reset_equality_number_for_subq_conds(sj_nest->sj_on_expr);
|
2010-02-16 00:53:06 +03:00
|
|
|
|
2020-06-30 15:20:11 +02:00
|
|
|
if (ncols == 1)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2017-02-18 17:47:31 +01:00
|
|
|
/* add left = select_list_element */
|
2020-06-30 15:20:11 +02:00
|
|
|
nested_join->sj_outer_expr_list.push_back(left,
|
2015-08-24 14:42:07 +03:00
|
|
|
thd->mem_root);
|
2015-08-05 11:57:35 +02:00
|
|
|
/*
|
|
|
|
Create Item_func_eq. Note that
|
|
|
|
1. this is done on the statement, not execution, arena
|
|
|
|
2. if it's a PS then this happens only once - on the first execution.
|
|
|
|
On following re-executions, the item will be fix_field-ed normally.
|
|
|
|
3. Thus it should be created as if it was fix_field'ed, in particular
|
|
|
|
all pointers to items in the execution arena should be protected
|
|
|
|
with thd->change_item_tree
|
|
|
|
*/
|
2010-02-16 00:53:06 +03:00
|
|
|
Item_func_eq *item_eq=
|
2020-06-30 15:20:11 +02:00
|
|
|
new (thd->mem_root) Item_func_eq(thd, left_exp_orig,
|
2015-08-24 14:42:07 +03:00
|
|
|
subq_lex->ref_pointer_array[0]);
|
2017-02-18 17:47:31 +01:00
|
|
|
if (!item_eq)
|
2021-11-02 16:21:11 +04:00
|
|
|
goto restore_tl_and_exit;
|
2020-06-30 15:20:11 +02:00
|
|
|
if (left_exp_orig != left_exp)
|
|
|
|
thd->change_item_tree(item_eq->arguments(), left_exp);
|
2010-02-16 00:53:06 +03:00
|
|
|
item_eq->in_equality_no= 0;
|
2015-08-11 11:18:38 +04:00
|
|
|
sj_nest->sj_on_expr= and_items(thd, sj_nest->sj_on_expr, item_eq);
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
2020-06-30 15:20:11 +02:00
|
|
|
else if (left_exp->type() == Item::ROW_ITEM)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2017-02-18 17:47:31 +01:00
|
|
|
/*
|
|
|
|
disassemple left expression and add
|
|
|
|
left1 = select_list_element1 and left2 = select_list_element2 ...
|
|
|
|
*/
|
2020-06-30 15:20:11 +02:00
|
|
|
for (uint i= 0; i < ncols; i++)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2020-06-30 15:20:11 +02:00
|
|
|
nested_join->sj_outer_expr_list.push_back(left_exp->addr(i),
|
2015-08-24 14:42:07 +03:00
|
|
|
thd->mem_root);
|
2017-02-18 17:47:31 +01:00
|
|
|
Item_func_eq *item_eq=
|
2015-08-24 14:42:07 +03:00
|
|
|
new (thd->mem_root)
|
2020-06-30 15:20:11 +02:00
|
|
|
Item_func_eq(thd, left_exp_orig->element_index(i),
|
2015-08-24 14:42:07 +03:00
|
|
|
subq_lex->ref_pointer_array[i]);
|
2017-02-18 17:47:31 +01:00
|
|
|
if (!item_eq)
|
2021-11-02 16:21:11 +04:00
|
|
|
goto restore_tl_and_exit;
|
2020-08-14 19:51:10 +03:00
|
|
|
DBUG_ASSERT(left_exp->element_index(i)->fixed());
|
2020-06-30 15:20:11 +02:00
|
|
|
if (left_exp_orig->element_index(i) !=
|
|
|
|
left_exp->element_index(i))
|
2017-02-18 17:47:31 +01:00
|
|
|
thd->change_item_tree(item_eq->arguments(),
|
2020-06-30 15:20:11 +02:00
|
|
|
left_exp->element_index(i));
|
2010-02-16 00:53:06 +03:00
|
|
|
item_eq->in_equality_no= i;
|
2015-08-11 11:18:38 +04:00
|
|
|
sj_nest->sj_on_expr= and_items(thd, sj_nest->sj_on_expr, item_eq);
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
}
|
2017-02-18 17:47:31 +01:00
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
add row operation
|
|
|
|
left = (select_list_element1, select_list_element2, ...)
|
|
|
|
*/
|
2017-10-22 13:03:41 +02:00
|
|
|
Item_row *row= new (thd->mem_root) Item_row(thd, subq_lex->pre_fix);
|
2017-02-18 17:47:31 +01:00
|
|
|
/* fix fields on subquery was call so they should be the same */
|
|
|
|
if (!row)
|
2021-11-02 16:21:11 +04:00
|
|
|
goto restore_tl_and_exit;
|
2020-06-30 15:20:11 +02:00
|
|
|
DBUG_ASSERT(ncols == row->cols());
|
|
|
|
nested_join->sj_outer_expr_list.push_back(left);
|
2017-02-18 17:47:31 +01:00
|
|
|
Item_func_eq *item_eq=
|
2020-06-30 15:20:11 +02:00
|
|
|
new (thd->mem_root) Item_func_eq(thd, left_exp_orig, row);
|
2017-02-18 17:47:31 +01:00
|
|
|
if (!item_eq)
|
2021-11-02 16:21:11 +04:00
|
|
|
goto restore_tl_and_exit;
|
2017-02-18 17:47:31 +01:00
|
|
|
for (uint i= 0; i < row->cols(); i++)
|
|
|
|
{
|
|
|
|
if (row->element_index(i) != subq_lex->ref_pointer_array[i])
|
|
|
|
thd->change_item_tree(row->addr(i), subq_lex->ref_pointer_array[i]);
|
|
|
|
}
|
|
|
|
item_eq->in_equality_no= 0;
|
2017-10-22 13:03:41 +02:00
|
|
|
sj_nest->sj_on_expr= and_items(thd, sj_nest->sj_on_expr, item_eq);
|
2017-02-18 17:47:31 +01:00
|
|
|
}
|
2015-04-21 15:41:01 +03:00
|
|
|
/*
|
|
|
|
Fix the created equality and AND
|
|
|
|
|
|
|
|
Note that fix_fields() can actually fail in a meaningful way here. One
|
|
|
|
example is when the IN-equality is not valid, because it compares columns
|
|
|
|
with incompatible collations. (One can argue it would be more appropriate
|
|
|
|
to check for this at name resolution stage, but as a legacy of IN->EXISTS
|
|
|
|
we have in here).
|
|
|
|
*/
|
2018-06-05 10:25:39 +04:00
|
|
|
if (sj_nest->sj_on_expr->fix_fields_if_needed(thd, &sj_nest->sj_on_expr))
|
2021-11-02 16:21:11 +04:00
|
|
|
goto restore_tl_and_exit;
|
2010-02-16 00:53:06 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
Walk through sj nest's WHERE and ON expressions and call
|
|
|
|
item->fix_table_changes() for all items.
|
|
|
|
*/
|
2017-11-08 15:47:49 +01:00
|
|
|
sj_nest->sj_on_expr->fix_after_pullout(parent_lex, &sj_nest->sj_on_expr,
|
|
|
|
TRUE);
|
2010-02-16 00:53:06 +03:00
|
|
|
fix_list_after_tbl_changes(parent_lex, &sj_nest->nested_join->join_list);
|
|
|
|
|
|
|
|
|
|
|
|
/* Unlink the child select_lex so it doesn't show up in EXPLAIN: */
|
|
|
|
subq_lex->master_unit()->exclude_level();
|
|
|
|
|
|
|
|
DBUG_EXECUTE("where",
|
|
|
|
print_where(sj_nest->sj_on_expr,"SJ-EXPR", QT_ORDINARY););
|
|
|
|
|
|
|
|
/* Inject sj_on_expr into the parent's WHERE or ON */
|
|
|
|
if (emb_tbl_nest)
|
|
|
|
{
|
2015-08-11 11:18:38 +04:00
|
|
|
emb_tbl_nest->on_expr= and_items(thd, emb_tbl_nest->on_expr,
|
2010-02-16 00:53:06 +03:00
|
|
|
sj_nest->sj_on_expr);
|
2011-07-15 02:58:34 +04:00
|
|
|
emb_tbl_nest->on_expr->top_level_item();
|
2018-06-05 10:25:39 +04:00
|
|
|
if (emb_tbl_nest->on_expr->fix_fields_if_needed(thd,
|
|
|
|
&emb_tbl_nest->on_expr))
|
2021-11-02 16:21:11 +04:00
|
|
|
goto restore_tl_and_exit;
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Inject into the WHERE */
|
2015-08-11 11:18:38 +04:00
|
|
|
parent_join->conds= and_items(thd, parent_join->conds, sj_nest->sj_on_expr);
|
2011-07-15 02:58:34 +04:00
|
|
|
parent_join->conds->top_level_item();
|
2011-03-02 08:10:38 +02:00
|
|
|
/*
|
|
|
|
fix_fields must update the properties (e.g. st_select_lex::cond_count of
|
|
|
|
the correct select_lex.
|
|
|
|
*/
|
2011-03-01 14:16:28 +02:00
|
|
|
save_lex= thd->lex->current_select;
|
|
|
|
thd->lex->current_select=parent_join->select_lex;
|
2018-06-05 10:25:39 +04:00
|
|
|
if (parent_join->conds->fix_fields_if_needed(thd, &parent_join->conds))
|
2021-11-02 16:21:11 +04:00
|
|
|
goto restore_tl_and_exit;
|
|
|
|
|
2011-03-01 14:16:28 +02:00
|
|
|
thd->lex->current_select=save_lex;
|
2010-02-16 00:53:06 +03:00
|
|
|
parent_join->select_lex->where= parent_join->conds;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (subq_lex->ftfunc_list->elements)
|
|
|
|
{
|
|
|
|
Item_func_match *ifm;
|
|
|
|
List_iterator_fast<Item_func_match> li(*(subq_lex->ftfunc_list));
|
|
|
|
while ((ifm= li++))
|
2015-08-24 14:42:07 +03:00
|
|
|
parent_lex->ftfunc_list->push_front(ifm, thd->mem_root);
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
|
2021-11-02 16:21:11 +04:00
|
|
|
// The subqueries were replaced for Item_int(1) earlier
|
|
|
|
subq_pred->reset_strategy(SUBS_SEMI_JOIN); // for subsequent executions
|
|
|
|
|
2011-08-27 09:47:21 +04:00
|
|
|
parent_lex->have_merged_subqueries= TRUE;
|
2017-11-14 07:47:58 +02:00
|
|
|
/* Fatal error may have been set to by fix_after_pullout() */
|
|
|
|
DBUG_RETURN(thd->is_fatal_error);
|
2021-11-02 16:21:11 +04:00
|
|
|
|
|
|
|
restore_tl_and_exit:
|
|
|
|
orig_tl->next_local= NULL;
|
|
|
|
DBUG_RETURN(TRUE);
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
|
2010-04-06 00:16:45 +04:00
|
|
|
|
2011-03-22 00:39:27 +03:00
|
|
|
const int SUBQERY_TEMPTABLE_NAME_MAX_LEN= 20;
|
|
|
|
|
2018-01-07 18:03:44 +02:00
|
|
|
static void create_subquery_temptable_name(LEX_STRING *str, uint number)
|
2011-03-22 00:39:27 +03:00
|
|
|
{
|
2018-01-07 18:03:44 +02:00
|
|
|
char *to= str->str;
|
2011-03-22 00:39:27 +03:00
|
|
|
DBUG_ASSERT(number < 10000);
|
|
|
|
to= strmov(to, "<subquery");
|
|
|
|
to= int10_to_str((int) number, to, 10);
|
|
|
|
to[0]= '>';
|
|
|
|
to[1]= 0;
|
2018-01-07 18:03:44 +02:00
|
|
|
str->length= (size_t) (to - str->str)+1;
|
2011-03-22 00:39:27 +03:00
|
|
|
}
|
|
|
|
|
2011-03-22 16:46:39 +03:00
|
|
|
|
2010-05-10 19:28:19 +04:00
|
|
|
/*
|
|
|
|
Convert subquery predicate into non-mergeable semi-join nest.
|
|
|
|
|
|
|
|
TODO:
|
|
|
|
why does this do IN-EXISTS conversion? Can't we unify it with mergeable
|
|
|
|
semi-joins? currently, convert_subq_to_sj() cannot fail to convert (unless
|
|
|
|
fatal errors)
|
|
|
|
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
FALSE - Ok
|
|
|
|
TRUE - Fatal error
|
|
|
|
*/
|
|
|
|
|
2010-04-06 00:16:45 +04:00
|
|
|
static bool convert_subq_to_jtbm(JOIN *parent_join,
|
|
|
|
Item_in_subselect *subq_pred,
|
|
|
|
bool *remove_item)
|
|
|
|
{
|
|
|
|
SELECT_LEX *parent_lex= parent_join->select_lex;
|
|
|
|
List<TABLE_LIST> *emb_join_list= &parent_lex->top_join_list;
|
|
|
|
TABLE_LIST *emb_tbl_nest= NULL; // will change when we learn to handle outer joins
|
2010-04-25 12:23:52 +04:00
|
|
|
TABLE_LIST *tl;
|
2011-12-07 01:03:00 +04:00
|
|
|
bool optimization_delayed= TRUE;
|
2015-08-24 14:42:07 +03:00
|
|
|
TABLE_LIST *jtbm;
|
2018-01-07 18:03:44 +02:00
|
|
|
LEX_STRING tbl_alias;
|
2017-11-14 07:47:58 +02:00
|
|
|
THD *thd= parent_join->thd;
|
2015-08-24 14:42:07 +03:00
|
|
|
DBUG_ENTER("convert_subq_to_jtbm");
|
2011-05-25 19:31:13 +04:00
|
|
|
|
2015-08-24 14:42:07 +03:00
|
|
|
subq_pred->set_strategy(SUBS_MATERIALIZATION);
|
2011-05-25 19:31:13 +04:00
|
|
|
subq_pred->is_jtbm_merged= TRUE;
|
|
|
|
|
2010-04-06 00:16:45 +04:00
|
|
|
*remove_item= TRUE;
|
|
|
|
|
2018-01-07 18:03:44 +02:00
|
|
|
if (!(tbl_alias.str= (char*)thd->calloc(SUBQERY_TEMPTABLE_NAME_MAX_LEN)) ||
|
2017-11-14 07:47:58 +02:00
|
|
|
!(jtbm= alloc_join_nest(thd))) //todo: this is not a join nest!
|
2010-04-06 00:16:45 +04:00
|
|
|
{
|
|
|
|
DBUG_RETURN(TRUE);
|
|
|
|
}
|
|
|
|
|
|
|
|
jtbm->join_list= emb_join_list;
|
|
|
|
jtbm->embedding= emb_tbl_nest;
|
|
|
|
jtbm->jtbm_subselect= subq_pred;
|
|
|
|
jtbm->nested_join= NULL;
|
|
|
|
|
|
|
|
/* Nests do not participate in those 'chains', so: */
|
|
|
|
/* jtbm->next_leaf= jtbm->next_local= jtbm->next_global == NULL*/
|
2017-11-14 07:47:58 +02:00
|
|
|
emb_join_list->push_back(jtbm, thd->mem_root);
|
2010-04-06 00:16:45 +04:00
|
|
|
|
|
|
|
/*
|
2010-05-10 19:28:19 +04:00
|
|
|
Inject the jtbm table into TABLE_LIST::next_leaf list, so that
|
|
|
|
make_join_statistics() and co. can find it.
|
2010-04-06 00:16:45 +04:00
|
|
|
*/
|
2017-11-14 07:47:58 +02:00
|
|
|
parent_lex->leaf_tables.push_back(jtbm, thd->mem_root);
|
2010-04-06 00:16:45 +04:00
|
|
|
|
2013-03-29 19:27:06 +04:00
|
|
|
if (subq_pred->unit->first_select()->options & OPTION_SCHEMA_TABLE)
|
|
|
|
parent_lex->options |= OPTION_SCHEMA_TABLE;
|
|
|
|
|
2010-04-06 00:16:45 +04:00
|
|
|
/*
|
2010-05-10 19:28:19 +04:00
|
|
|
Same as above for TABLE_LIST::next_local chain
|
2010-04-06 00:16:45 +04:00
|
|
|
(a theory: a next_local chain always starts with ::leaf_tables
|
|
|
|
because view's tables are inserted after the view)
|
|
|
|
*/
|
2012-03-18 23:58:20 +04:00
|
|
|
for (tl= (TABLE_LIST*)(parent_lex->table_list.first); tl->next_local; tl= tl->next_local)
|
2011-03-22 00:39:27 +03:00
|
|
|
{}
|
2010-04-06 00:16:45 +04:00
|
|
|
tl->next_local= jtbm;
|
|
|
|
|
|
|
|
/* A theory: no need to re-connect the next_global chain */
|
2011-12-07 01:03:00 +04:00
|
|
|
if (optimization_delayed)
|
|
|
|
{
|
|
|
|
DBUG_ASSERT(parent_join->table_count < MAX_TABLES);
|
|
|
|
|
|
|
|
jtbm->jtbm_table_no= parent_join->table_count;
|
2010-04-06 00:16:45 +04:00
|
|
|
|
2018-01-07 18:03:44 +02:00
|
|
|
create_subquery_temptable_name(&tbl_alias,
|
2011-12-07 01:03:00 +04:00
|
|
|
subq_pred->unit->first_select()->select_number);
|
2018-01-07 18:03:44 +02:00
|
|
|
jtbm->alias.str= tbl_alias.str;
|
|
|
|
jtbm->alias.length= tbl_alias.length;
|
2011-12-07 01:03:00 +04:00
|
|
|
parent_join->table_count++;
|
2017-11-14 07:47:58 +02:00
|
|
|
DBUG_RETURN(thd->is_fatal_error);
|
2011-12-07 01:03:00 +04:00
|
|
|
}
|
2010-04-06 00:16:45 +04:00
|
|
|
subselect_hash_sj_engine *hash_sj_engine=
|
|
|
|
((subselect_hash_sj_engine*)subq_pred->engine);
|
|
|
|
jtbm->table= hash_sj_engine->tmp_table;
|
|
|
|
|
2011-03-27 03:45:16 +04:00
|
|
|
jtbm->table->tablenr= parent_join->table_count;
|
|
|
|
jtbm->table->map= table_map(1) << (parent_join->table_count);
|
2011-05-28 20:34:04 +04:00
|
|
|
jtbm->jtbm_table_no= jtbm->table->tablenr;
|
2010-04-06 00:16:45 +04:00
|
|
|
|
2011-03-27 03:45:16 +04:00
|
|
|
parent_join->table_count++;
|
|
|
|
DBUG_ASSERT(parent_join->table_count < MAX_TABLES);
|
2010-04-06 00:16:45 +04:00
|
|
|
|
|
|
|
Item *conds= hash_sj_engine->semi_join_conds;
|
2017-11-08 15:47:49 +01:00
|
|
|
conds->fix_after_pullout(parent_lex, &conds, TRUE);
|
2010-04-06 00:16:45 +04:00
|
|
|
|
|
|
|
DBUG_EXECUTE("where", print_where(conds,"SJ-EXPR", QT_ORDINARY););
|
|
|
|
|
2018-01-07 18:03:44 +02:00
|
|
|
create_subquery_temptable_name(&tbl_alias, hash_sj_engine->materialize_join->
|
2011-03-22 00:39:27 +03:00
|
|
|
select_lex->select_number);
|
2018-01-07 18:03:44 +02:00
|
|
|
jtbm->alias.str= tbl_alias.str;
|
|
|
|
jtbm->alias.length= tbl_alias.length;
|
2011-08-27 09:47:21 +04:00
|
|
|
|
|
|
|
parent_lex->have_merged_subqueries= TRUE;
|
2017-11-14 07:47:58 +02:00
|
|
|
|
2010-05-24 15:17:38 +04:00
|
|
|
/* Don't unlink the child subselect, as the subquery will be used. */
|
2010-04-06 00:16:45 +04:00
|
|
|
|
2017-11-14 07:47:58 +02:00
|
|
|
DBUG_RETURN(thd->is_fatal_error);
|
2010-04-06 00:16:45 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
static TABLE_LIST *alloc_join_nest(THD *thd)
|
|
|
|
{
|
|
|
|
TABLE_LIST *tbl;
|
|
|
|
if (!(tbl= (TABLE_LIST*) thd->calloc(ALIGN_SIZE(sizeof(TABLE_LIST))+
|
|
|
|
sizeof(NESTED_JOIN))))
|
|
|
|
return NULL;
|
|
|
|
tbl->nested_join= (NESTED_JOIN*) ((uchar*)tbl +
|
|
|
|
ALIGN_SIZE(sizeof(TABLE_LIST)));
|
|
|
|
return tbl;
|
|
|
|
}
|
|
|
|
|
2017-11-14 07:47:58 +02:00
|
|
|
/*
|
|
|
|
@Note thd->is_fatal_error can be set in case of OOM
|
|
|
|
*/
|
2010-02-16 00:53:06 +03:00
|
|
|
|
|
|
|
void fix_list_after_tbl_changes(SELECT_LEX *new_parent, List<TABLE_LIST> *tlist)
|
|
|
|
{
|
|
|
|
List_iterator<TABLE_LIST> it(*tlist);
|
|
|
|
TABLE_LIST *table;
|
|
|
|
while ((table= it++))
|
|
|
|
{
|
|
|
|
if (table->on_expr)
|
2017-11-08 15:47:49 +01:00
|
|
|
table->on_expr->fix_after_pullout(new_parent, &table->on_expr, TRUE);
|
2010-02-16 00:53:06 +03:00
|
|
|
if (table->nested_join)
|
|
|
|
fix_list_after_tbl_changes(new_parent, &table->nested_join->join_list);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-07-05 21:22:13 +04:00
|
|
|
static void set_emb_join_nest(List<TABLE_LIST> *tables, TABLE_LIST *emb_sj_nest)
|
|
|
|
{
|
|
|
|
List_iterator<TABLE_LIST> it(*tables);
|
|
|
|
TABLE_LIST *tbl;
|
|
|
|
while ((tbl= it++))
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
Note: check for nested_join first.
|
|
|
|
derived-merged tables have tbl->table!=NULL &&
|
|
|
|
tbl->table->reginfo==NULL.
|
|
|
|
*/
|
|
|
|
if (tbl->nested_join)
|
|
|
|
set_emb_join_nest(&tbl->nested_join->join_list, emb_sj_nest);
|
|
|
|
else if (tbl->table)
|
|
|
|
tbl->table->reginfo.join_tab->emb_sj_nest= emb_sj_nest;
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
/*
|
|
|
|
Pull tables out of semi-join nests, if possible
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
pull_out_semijoin_tables()
|
|
|
|
join The join where to do the semi-join flattening
|
|
|
|
|
|
|
|
DESCRIPTION
|
|
|
|
Try to pull tables out of semi-join nests.
|
|
|
|
|
|
|
|
PRECONDITIONS
|
|
|
|
When this function is called, the join may have several semi-join nests
|
|
|
|
but it is guaranteed that one semi-join nest does not contain another.
|
|
|
|
|
|
|
|
ACTION
|
|
|
|
A table can be pulled out of the semi-join nest if
|
|
|
|
- It is a constant table, or
|
|
|
|
- It is accessed via eq_ref(outer_tables)
|
|
|
|
|
|
|
|
POSTCONDITIONS
|
|
|
|
* Tables that were pulled out have JOIN_TAB::emb_sj_nest == NULL
|
|
|
|
* Tables that were not pulled out have JOIN_TAB::emb_sj_nest pointing
|
|
|
|
to semi-join nest they are in.
|
|
|
|
* Semi-join nests' TABLE_LIST::sj_inner_tables is updated accordingly
|
|
|
|
|
|
|
|
This operation is (and should be) performed at each PS execution since
|
|
|
|
tables may become/cease to be constant across PS reexecutions.
|
|
|
|
|
|
|
|
NOTE
|
|
|
|
Table pullout may make uncorrelated subquery correlated. Consider this
|
|
|
|
example:
|
|
|
|
|
|
|
|
... WHERE oe IN (SELECT it1.primary_key WHERE p(it1, it2) ... )
|
|
|
|
|
|
|
|
here table it1 can be pulled out (we have it1.primary_key=oe which gives
|
|
|
|
us functional dependency). Once it1 is pulled out, all references to it1
|
|
|
|
from p(it1, it2) become references to outside of the subquery and thus
|
|
|
|
make the subquery (i.e. its semi-join nest) correlated.
|
|
|
|
Making the subquery (i.e. its semi-join nest) correlated prevents us from
|
|
|
|
using Materialization or LooseScan to execute it.
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
0 - OK
|
|
|
|
1 - Out of memory error
|
|
|
|
*/
|
|
|
|
|
|
|
|
int pull_out_semijoin_tables(JOIN *join)
|
|
|
|
{
|
|
|
|
TABLE_LIST *sj_nest;
|
|
|
|
DBUG_ENTER("pull_out_semijoin_tables");
|
|
|
|
List_iterator<TABLE_LIST> sj_list_it(join->select_lex->sj_nests);
|
2019-08-25 11:03:19 +03:00
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
/* Try pulling out of the each of the semi-joins */
|
|
|
|
while ((sj_nest= sj_list_it++))
|
|
|
|
{
|
|
|
|
List_iterator<TABLE_LIST> child_li(sj_nest->nested_join->join_list);
|
|
|
|
TABLE_LIST *tbl;
|
2019-08-25 11:03:19 +03:00
|
|
|
Json_writer_object trace_wrapper(join->thd);
|
|
|
|
Json_writer_object trace(join->thd, "semijoin_table_pullout");
|
|
|
|
Json_writer_array trace_arr(join->thd, "pulled_out_tables");
|
2011-07-05 21:22:13 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
Don't do table pull-out for nested joins (if we get nested joins here, it
|
|
|
|
means these are outer joins. It is theoretically possible to do pull-out
|
2020-03-04 18:30:08 +02:00
|
|
|
for some of the outer tables but we don't support this currently.
|
2011-07-05 21:22:13 +04:00
|
|
|
*/
|
|
|
|
bool have_join_nest_children= FALSE;
|
|
|
|
|
|
|
|
set_emb_join_nest(&sj_nest->nested_join->join_list, sj_nest);
|
|
|
|
|
|
|
|
while ((tbl= child_li++))
|
|
|
|
{
|
|
|
|
if (tbl->nested_join)
|
|
|
|
{
|
|
|
|
have_join_nest_children= TRUE;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
table_map pulled_tables= 0;
|
2013-11-21 11:19:01 +04:00
|
|
|
table_map dep_tables= 0;
|
2011-07-05 21:22:13 +04:00
|
|
|
if (have_join_nest_children)
|
|
|
|
goto skip;
|
|
|
|
|
2013-11-21 11:19:01 +04:00
|
|
|
/*
|
|
|
|
Calculate set of tables within this semi-join nest that have
|
|
|
|
other dependent tables
|
|
|
|
*/
|
|
|
|
child_li.rewind();
|
|
|
|
while ((tbl= child_li++))
|
|
|
|
{
|
|
|
|
TABLE *const table= tbl->table;
|
|
|
|
if (table &&
|
|
|
|
(table->reginfo.join_tab->dependent &
|
|
|
|
sj_nest->nested_join->used_tables))
|
|
|
|
dep_tables|= table->reginfo.join_tab->dependent;
|
|
|
|
}
|
|
|
|
|
2011-07-05 21:22:13 +04:00
|
|
|
/* Action #1: Mark the constant tables to be pulled out */
|
|
|
|
child_li.rewind();
|
2010-02-16 00:53:06 +03:00
|
|
|
while ((tbl= child_li++))
|
|
|
|
{
|
|
|
|
if (tbl->table)
|
|
|
|
{
|
|
|
|
tbl->table->reginfo.join_tab->emb_sj_nest= sj_nest;
|
2010-03-14 20:54:12 +03:00
|
|
|
#if 0
|
|
|
|
/*
|
|
|
|
Do not pull out tables because they are constant. This operation has
|
|
|
|
a problem:
|
|
|
|
- Some constant tables may become/cease to be constant across PS
|
|
|
|
re-executions
|
|
|
|
- Contrary to our initial assumption, it turned out that table pullout
|
|
|
|
operation is not easily undoable.
|
|
|
|
|
|
|
|
The solution is to leave constant tables where they are. This will
|
|
|
|
affect only constant tables that are 1-row or empty, tables that are
|
|
|
|
constant because they are accessed via eq_ref(const) access will
|
|
|
|
still be pulled out as functionally-dependent.
|
|
|
|
|
|
|
|
This will cause us to miss the chance to flatten some of the
|
|
|
|
subqueries, but since const tables do not generate many duplicates,
|
|
|
|
it really doesn't matter that much whether they were pulled out or
|
|
|
|
not.
|
|
|
|
|
|
|
|
All of this was done as fix for BUG#43768.
|
|
|
|
*/
|
2010-02-16 00:53:06 +03:00
|
|
|
if (tbl->table->map & join->const_table_map)
|
|
|
|
{
|
|
|
|
pulled_tables |= tbl->table->map;
|
|
|
|
DBUG_PRINT("info", ("Table %s pulled out (reason: constant)",
|
|
|
|
tbl->table->alias));
|
|
|
|
}
|
2010-03-14 20:54:12 +03:00
|
|
|
#endif
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
Action #2: Find which tables we can pull out based on
|
|
|
|
update_ref_and_keys() data. Note that pulling one table out can allow
|
|
|
|
us to pull out some other tables too.
|
|
|
|
*/
|
|
|
|
bool pulled_a_table;
|
|
|
|
do
|
|
|
|
{
|
|
|
|
pulled_a_table= FALSE;
|
|
|
|
child_li.rewind();
|
|
|
|
while ((tbl= child_li++))
|
|
|
|
{
|
2013-11-21 11:19:01 +04:00
|
|
|
if (tbl->table && !(pulled_tables & tbl->table->map) &&
|
|
|
|
!(dep_tables & tbl->table->map))
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
|
|
|
if (find_eq_ref_candidate(tbl->table,
|
|
|
|
sj_nest->nested_join->used_tables &
|
|
|
|
~pulled_tables))
|
|
|
|
{
|
|
|
|
pulled_a_table= TRUE;
|
|
|
|
pulled_tables |= tbl->table->map;
|
|
|
|
DBUG_PRINT("info", ("Table %s pulled out (reason: func dep)",
|
2019-08-25 11:03:19 +03:00
|
|
|
tbl->table->alias.c_ptr_safe()));
|
|
|
|
trace_arr.add(tbl->table->alias.c_ptr_safe());
|
2010-02-16 00:53:06 +03:00
|
|
|
/*
|
|
|
|
Pulling a table out of uncorrelated subquery in general makes
|
|
|
|
makes it correlated. See the NOTE to this funtion.
|
|
|
|
*/
|
|
|
|
sj_nest->sj_subq_pred->is_correlated= TRUE;
|
|
|
|
sj_nest->nested_join->sj_corr_tables|= tbl->table->map;
|
|
|
|
sj_nest->nested_join->sj_depends_on|= tbl->table->map;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} while (pulled_a_table);
|
|
|
|
|
|
|
|
child_li.rewind();
|
2011-07-05 21:22:13 +04:00
|
|
|
skip:
|
2010-02-16 00:53:06 +03:00
|
|
|
/*
|
|
|
|
Action #3: Move the pulled out TABLE_LIST elements to the parents.
|
|
|
|
*/
|
|
|
|
table_map inner_tables= sj_nest->nested_join->used_tables &
|
|
|
|
~pulled_tables;
|
|
|
|
/* Record the bitmap of inner tables */
|
|
|
|
sj_nest->sj_inner_tables= inner_tables;
|
|
|
|
if (pulled_tables)
|
|
|
|
{
|
|
|
|
List<TABLE_LIST> *upper_join_list= (sj_nest->embedding != NULL)?
|
|
|
|
(&sj_nest->embedding->nested_join->join_list):
|
|
|
|
(&join->select_lex->top_join_list);
|
|
|
|
Query_arena *arena, backup;
|
|
|
|
arena= join->thd->activate_stmt_arena_if_needed(&backup);
|
|
|
|
while ((tbl= child_li++))
|
|
|
|
{
|
|
|
|
if (tbl->table)
|
|
|
|
{
|
|
|
|
if (inner_tables & tbl->table->map)
|
|
|
|
{
|
|
|
|
/* This table is not pulled out */
|
|
|
|
tbl->table->reginfo.join_tab->emb_sj_nest= sj_nest;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* This table has been pulled out of the semi-join nest */
|
|
|
|
tbl->table->reginfo.join_tab->emb_sj_nest= NULL;
|
|
|
|
/*
|
|
|
|
Pull the table up in the same way as simplify_joins() does:
|
|
|
|
update join_list and embedding pointers but keep next[_local]
|
|
|
|
pointers.
|
|
|
|
*/
|
|
|
|
child_li.remove();
|
2010-03-14 20:54:12 +03:00
|
|
|
sj_nest->nested_join->used_tables &= ~tbl->table->map;
|
2015-08-24 14:42:07 +03:00
|
|
|
upper_join_list->push_back(tbl, join->thd->mem_root);
|
2010-02-16 00:53:06 +03:00
|
|
|
tbl->join_list= upper_join_list;
|
|
|
|
tbl->embedding= sj_nest->embedding;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Remove the sj-nest itself if we've removed everything from it */
|
|
|
|
if (!inner_tables)
|
|
|
|
{
|
|
|
|
List_iterator<TABLE_LIST> li(*upper_join_list);
|
|
|
|
/* Find the sj_nest in the list. */
|
|
|
|
while (sj_nest != li++) ;
|
|
|
|
li.remove();
|
|
|
|
/* Also remove it from the list of SJ-nests: */
|
|
|
|
sj_list_it.remove();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (arena)
|
|
|
|
join->thd->restore_active_arena(arena, &backup);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
DBUG_RETURN(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Optimize semi-join nests that could be run with sj-materialization
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
optimize_semijoin_nests()
|
|
|
|
join The join to optimize semi-join nests for
|
|
|
|
all_table_map Bitmap of all tables in the join
|
|
|
|
|
|
|
|
DESCRIPTION
|
|
|
|
Optimize each of the semi-join nests that can be run with
|
|
|
|
materialization. For each of the nests, we
|
|
|
|
- Generate the best join order for this "sub-join" and remember it;
|
|
|
|
- Remember the sub-join execution cost (it's part of materialization
|
|
|
|
cost);
|
|
|
|
- Calculate other costs that will be incurred if we decide
|
|
|
|
to use materialization strategy for this semi-join nest.
|
|
|
|
|
|
|
|
All obtained information is saved and will be used by the main join
|
|
|
|
optimization pass.
|
2011-10-01 00:10:03 +04:00
|
|
|
|
|
|
|
NOTES
|
|
|
|
Because of Join::reoptimize(), this function may be called multiple times.
|
2010-02-16 00:53:06 +03:00
|
|
|
|
|
|
|
RETURN
|
|
|
|
FALSE Ok
|
|
|
|
TRUE Out of memory error
|
|
|
|
*/
|
|
|
|
|
|
|
|
bool optimize_semijoin_nests(JOIN *join, table_map all_table_map)
|
|
|
|
{
|
|
|
|
DBUG_ENTER("optimize_semijoin_nests");
|
2019-02-13 11:22:16 +05:30
|
|
|
THD *thd= join->thd;
|
2010-02-16 00:53:06 +03:00
|
|
|
List_iterator<TABLE_LIST> sj_list_it(join->select_lex->sj_nests);
|
|
|
|
TABLE_LIST *sj_nest;
|
2019-02-18 17:11:20 +05:30
|
|
|
if (!join->select_lex->sj_nests.elements)
|
|
|
|
DBUG_RETURN(FALSE);
|
2019-02-13 11:22:16 +05:30
|
|
|
Json_writer_object wrapper(thd);
|
|
|
|
Json_writer_object trace_semijoin_nest(thd,
|
|
|
|
"execution_plan_for_potential_materialization");
|
|
|
|
Json_writer_array trace_steps_array(thd, "steps");
|
2010-03-14 20:54:12 +03:00
|
|
|
while ((sj_nest= sj_list_it++))
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2010-03-14 20:54:12 +03:00
|
|
|
/* semi-join nests with only constant tables are not valid */
|
|
|
|
/// DBUG_ASSERT(sj_nest->sj_inner_tables & ~join->const_table_map);
|
2010-02-16 00:53:06 +03:00
|
|
|
|
2010-03-14 20:54:12 +03:00
|
|
|
sj_nest->sj_mat_info= NULL;
|
|
|
|
/*
|
|
|
|
The statement may have been executed with 'semijoin=on' earlier.
|
|
|
|
We need to verify that 'semijoin=on' still holds.
|
|
|
|
*/
|
|
|
|
if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_SEMIJOIN) &&
|
|
|
|
optimizer_flag(join->thd, OPTIMIZER_SWITCH_MATERIALIZATION))
|
|
|
|
{
|
|
|
|
if ((sj_nest->sj_inner_tables & ~join->const_table_map) && /* not everything was pulled out */
|
2010-02-16 00:53:06 +03:00
|
|
|
!sj_nest->sj_subq_pred->is_correlated &&
|
|
|
|
sj_nest->sj_subq_pred->types_allow_materialization)
|
|
|
|
{
|
2022-06-13 17:45:37 +03:00
|
|
|
if (choose_plan(join, all_table_map &~join->const_table_map, sj_nest))
|
2010-02-16 00:53:06 +03:00
|
|
|
DBUG_RETURN(TRUE); /* purecov: inspected */
|
|
|
|
/*
|
|
|
|
The best plan to run the subquery is now in join->best_positions,
|
|
|
|
save it.
|
|
|
|
*/
|
2010-03-14 20:54:12 +03:00
|
|
|
uint n_tables= my_count_bits(sj_nest->sj_inner_tables & ~join->const_table_map);
|
2010-02-16 00:53:06 +03:00
|
|
|
SJ_MATERIALIZATION_INFO* sjm;
|
|
|
|
if (!(sjm= new SJ_MATERIALIZATION_INFO) ||
|
|
|
|
!(sjm->positions= (POSITION*)join->thd->alloc(sizeof(POSITION)*
|
|
|
|
n_tables)))
|
|
|
|
DBUG_RETURN(TRUE); /* purecov: inspected */
|
|
|
|
sjm->tables= n_tables;
|
|
|
|
sjm->is_used= FALSE;
|
|
|
|
double subjoin_out_rows, subjoin_read_time;
|
2011-05-26 15:01:26 +04:00
|
|
|
|
2021-10-06 12:31:19 +03:00
|
|
|
join->get_prefix_cost_and_fanout(n_tables,
|
2011-05-26 15:01:26 +04:00
|
|
|
&subjoin_read_time,
|
|
|
|
&subjoin_out_rows);
|
2010-02-16 00:53:06 +03:00
|
|
|
|
2022-09-30 17:10:37 +03:00
|
|
|
sjm->materialization_cost=subjoin_read_time;
|
2019-08-29 15:37:49 +03:00
|
|
|
sjm->rows_with_duplicates= sjm->rows= subjoin_out_rows;
|
2011-07-11 17:13:16 +04:00
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
/*
|
|
|
|
Adjust output cardinality estimates. If the subquery has form
|
|
|
|
|
|
|
|
... oe IN (SELECT t1.colX, t2.colY, func(X,Y,Z) )
|
|
|
|
|
|
|
|
then the number of distinct output record combinations has an
|
|
|
|
upper bound of product of number of records matching the tables
|
|
|
|
that are used by the SELECT clause.
|
|
|
|
TODO:
|
|
|
|
We can get a more precise estimate if we
|
|
|
|
- use rec_per_key cardinality estimates. For simple cases like
|
|
|
|
"oe IN (SELECT t.key ...)" it is trivial.
|
|
|
|
- Functional dependencies between the tables in the semi-join
|
|
|
|
nest (the payoff is probably less here?)
|
2011-05-10 11:31:02 +01:00
|
|
|
|
|
|
|
See also get_post_group_estimate().
|
2010-02-16 00:53:06 +03:00
|
|
|
*/
|
2011-07-11 17:13:16 +04:00
|
|
|
SELECT_LEX *subq_select= sj_nest->sj_subq_pred->unit->first_select();
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
|
|
|
for (uint i=0 ; i < join->const_tables + sjm->tables ; i++)
|
|
|
|
{
|
|
|
|
JOIN_TAB *tab= join->best_positions[i].table;
|
|
|
|
join->map2table[tab->table->tablenr]= tab;
|
|
|
|
}
|
|
|
|
table_map map= 0;
|
2016-05-08 23:04:41 +03:00
|
|
|
for (uint i=0; i < subq_select->item_list.elements; i++)
|
|
|
|
map|= subq_select->ref_pointer_array[i]->used_tables();
|
2010-02-16 00:53:06 +03:00
|
|
|
map= map & ~PSEUDO_TABLE_BITS;
|
|
|
|
Table_map_iterator tm_it(map);
|
|
|
|
int tableno;
|
|
|
|
double rows= 1.0;
|
|
|
|
while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END)
|
2022-10-04 11:32:33 +03:00
|
|
|
{
|
|
|
|
ha_rows tbl_rows=join->map2table[tableno]->
|
|
|
|
table->opt_range_condition_rows;
|
|
|
|
|
|
|
|
rows= COST_MULT(rows, rows2double(tbl_rows));
|
|
|
|
}
|
2013-03-26 00:03:13 +02:00
|
|
|
sjm->rows= MY_MIN(sjm->rows, rows);
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
2016-04-28 21:59:23 +04:00
|
|
|
memcpy((uchar*) sjm->positions,
|
|
|
|
(uchar*) (join->best_positions + join->const_tables),
|
2010-02-16 00:53:06 +03:00
|
|
|
sizeof(POSITION) * n_tables);
|
|
|
|
|
|
|
|
/*
|
|
|
|
Calculate temporary table parameters and usage costs
|
|
|
|
*/
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
bool blobs_used;
|
2011-07-11 17:13:16 +04:00
|
|
|
uint rowlen= get_tmp_table_rec_length(subq_select->ref_pointer_array,
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
subq_select->item_list.elements,
|
|
|
|
&blobs_used);
|
|
|
|
TMPTABLE_COSTS cost= get_tmp_table_costs(join->thd,
|
|
|
|
subjoin_out_rows, rowlen,
|
2022-06-16 13:12:01 +03:00
|
|
|
blobs_used, 1);
|
|
|
|
double scan_cost, total_cost;
|
|
|
|
double row_copy_cost= ROW_COPY_COST_THD(thd);
|
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
/*
|
|
|
|
Let materialization cost include the cost to write the data into the
|
2022-06-16 13:12:01 +03:00
|
|
|
temporary table. Note that smj->materialization_cost already includes
|
|
|
|
row copy and compare costs of finding the original row.
|
2010-02-16 00:53:06 +03:00
|
|
|
*/
|
2022-09-30 17:10:37 +03:00
|
|
|
sjm->materialization_cost+=subjoin_out_rows * cost.write + cost.create;
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
/*
|
|
|
|
Set the cost to do a full scan of the temptable (will need this to
|
2022-06-16 13:12:01 +03:00
|
|
|
consider doing sjm-scan). See ha_scan_time() for the basics of
|
|
|
|
the calculations.
|
|
|
|
We don't need to check the where clause for each row, so no
|
|
|
|
WHERE_COST is needed.
|
|
|
|
*/
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
scan_cost= (rowlen * (double) sjm->rows) / cost.block_size;
|
2022-06-16 13:12:01 +03:00
|
|
|
total_cost= (scan_cost * cost.cache_hit_ratio * cost.avg_io_cost +
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
TABLE_SCAN_SETUP_COST_THD(thd) +
|
2022-06-16 13:12:01 +03:00
|
|
|
row_copy_cost * sjm->rows);
|
2022-09-30 17:10:37 +03:00
|
|
|
sjm->scan_cost=total_cost;
|
2022-06-16 13:12:01 +03:00
|
|
|
|
|
|
|
/* When reading a row, we have also to check the where clause */
|
2022-09-30 17:10:37 +03:00
|
|
|
sjm->lookup_cost= cost.lookup + WHERE_COST_THD(thd);
|
2010-02-16 00:53:06 +03:00
|
|
|
sj_nest->sj_mat_info= sjm;
|
|
|
|
DBUG_EXECUTE("opt", print_sjm(sjm););
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
DBUG_RETURN(FALSE);
|
|
|
|
}
|
|
|
|
|
2010-04-25 12:23:52 +04:00
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
/*
|
|
|
|
Get estimated record length for semi-join materialization temptable
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
get_tmp_table_rec_length()
|
|
|
|
items IN subquery's select list.
|
|
|
|
|
|
|
|
DESCRIPTION
|
|
|
|
Calculate estimated record length for semi-join materialization
|
|
|
|
temptable. It's an estimate because we don't follow every bit of
|
|
|
|
create_tmp_table()'s logic. This isn't necessary as the return value of
|
|
|
|
this function is used only for cost calculations.
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
Length of the temptable record, in bytes
|
|
|
|
*/
|
|
|
|
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
static uint get_tmp_table_rec_length(Ref_ptr_array p_items, uint elements,
|
|
|
|
bool *blobs_used)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
|
|
|
uint len= 0;
|
|
|
|
Item *item;
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
|
|
|
|
*blobs_used= 0;
|
2016-05-08 23:04:41 +03:00
|
|
|
for (uint i= 0; i < elements ; i++)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2016-05-08 23:04:41 +03:00
|
|
|
item = p_items[i];
|
2010-02-16 00:53:06 +03:00
|
|
|
switch (item->result_type()) {
|
|
|
|
case REAL_RESULT:
|
|
|
|
len += sizeof(double);
|
|
|
|
break;
|
|
|
|
case INT_RESULT:
|
|
|
|
if (item->max_length >= (MY_INT32_NUM_DECIMAL_DIGITS - 1))
|
|
|
|
len += 8;
|
|
|
|
else
|
|
|
|
len += 4;
|
|
|
|
break;
|
|
|
|
case STRING_RESULT:
|
|
|
|
enum enum_field_types type;
|
|
|
|
/* DATE/TIME and GEOMETRY fields have STRING_RESULT result type. */
|
|
|
|
if ((type= item->field_type()) == MYSQL_TYPE_DATETIME ||
|
|
|
|
type == MYSQL_TYPE_TIME || type == MYSQL_TYPE_DATE ||
|
|
|
|
type == MYSQL_TYPE_TIMESTAMP || type == MYSQL_TYPE_GEOMETRY)
|
|
|
|
len += 8;
|
|
|
|
else
|
|
|
|
len += item->max_length;
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
if (item->max_length > MAX_FIELD_VARCHARLENGTH)
|
|
|
|
*blobs_used= 1;
|
2010-02-16 00:53:06 +03:00
|
|
|
break;
|
|
|
|
case DECIMAL_RESULT:
|
|
|
|
len += 10;
|
|
|
|
break;
|
|
|
|
case ROW_RESULT:
|
|
|
|
default:
|
|
|
|
DBUG_ASSERT(0); /* purecov: deadcode */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
2010-12-15 12:54:25 +02:00
|
|
|
|
|
|
|
/**
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
The cost of a create, write and read into a unique hash/btree index on
|
|
|
|
a temporary table with 'row_count' rows each of size 'row_size'.
|
2010-12-15 12:54:25 +02:00
|
|
|
|
|
|
|
@param thd current query context
|
|
|
|
@param row_count number of rows in the temp table
|
|
|
|
@param row_size average size in bytes of the rows
|
|
|
|
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
@return The cost of using the temporary table
|
|
|
|
|
|
|
|
TODO:
|
|
|
|
This is an optimistic estimate. We are not taking into account:
|
|
|
|
- That we first write into a memory and then overflow to disk.
|
|
|
|
- If binary trees would be used for heap tables.
|
|
|
|
- The addition cost of writing a row to memory/disk and possible
|
|
|
|
index reorganization.
|
2010-12-15 12:54:25 +02:00
|
|
|
*/
|
|
|
|
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
TMPTABLE_COSTS
|
2022-06-16 13:12:01 +03:00
|
|
|
get_tmp_table_costs(THD *thd, double row_count, uint row_size, bool blobs_used,
|
|
|
|
bool add_copy_cost)
|
2010-12-15 12:54:25 +02:00
|
|
|
{
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
TMPTABLE_COSTS cost;
|
|
|
|
/* From heap_prepare_hp_create_info(), assuming one hash key used */
|
|
|
|
row_size+= sizeof(char*)*2;
|
|
|
|
row_size= MY_ALIGN(MY_MAX(row_size, sizeof(char*)) + 1, sizeof(char*));
|
2010-12-15 12:54:25 +02:00
|
|
|
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
if (row_count > thd->variables.max_heap_table_size / (double) row_size ||
|
|
|
|
blobs_used)
|
|
|
|
{
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
double row_copy_cost= (add_copy_cost ?
|
|
|
|
tmp_table_optimizer_costs.row_copy_cost :
|
|
|
|
0);
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
/* Disk based table */
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
cost.lookup= ((tmp_table_optimizer_costs.key_lookup_cost *
|
|
|
|
tmp_table_optimizer_costs.disk_read_ratio) +
|
|
|
|
row_copy_cost);
|
|
|
|
cost.write= cost.lookup;
|
2022-06-16 13:12:01 +03:00
|
|
|
cost.create= DISK_TEMPTABLE_CREATE_COST;
|
|
|
|
cost.block_size= DISK_TEMPTABLE_BLOCK_SIZE;
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
cost.avg_io_cost= tmp_table_optimizer_costs.disk_read_cost;
|
|
|
|
cost.cache_hit_ratio= tmp_table_optimizer_costs.disk_read_ratio;
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2022-06-16 13:12:01 +03:00
|
|
|
/* Values are as they are in heap.h */
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
double row_copy_cost= (add_copy_cost ?
|
|
|
|
heap_optimizer_costs.row_copy_cost :
|
|
|
|
0);
|
2022-06-16 13:12:01 +03:00
|
|
|
cost.lookup= HEAP_TEMPTABLE_LOOKUP_COST + row_copy_cost;
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
cost.write= cost.lookup;
|
2022-06-16 13:12:01 +03:00
|
|
|
cost.create= HEAP_TEMPTABLE_CREATE_COST;
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
cost.block_size= 1;
|
|
|
|
cost.avg_io_cost= 0;
|
|
|
|
cost.cache_hit_ratio= 0;
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
}
|
|
|
|
return cost;
|
2010-12-15 12:54:25 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
/*
|
|
|
|
Check if table's KEYUSE elements have an eq_ref(outer_tables) candidate
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
find_eq_ref_candidate()
|
|
|
|
table Table to be checked
|
|
|
|
sj_inner_tables Bitmap of inner tables. eq_ref(inner_table) doesn't
|
|
|
|
count.
|
|
|
|
|
|
|
|
DESCRIPTION
|
|
|
|
Check if table's KEYUSE elements have an eq_ref(outer_tables) candidate
|
|
|
|
|
|
|
|
TODO
|
|
|
|
Check again if it is feasible to factor common parts with constant table
|
|
|
|
search
|
|
|
|
|
2010-05-10 19:28:19 +04:00
|
|
|
Also check if it's feasible to factor common parts with table elimination
|
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
RETURN
|
|
|
|
TRUE - There exists an eq_ref(outer-tables) candidate
|
|
|
|
FALSE - Otherwise
|
|
|
|
*/
|
|
|
|
|
|
|
|
bool find_eq_ref_candidate(TABLE *table, table_map sj_inner_tables)
|
|
|
|
{
|
|
|
|
KEYUSE *keyuse= table->reginfo.join_tab->keyuse;
|
|
|
|
|
|
|
|
if (keyuse)
|
|
|
|
{
|
2011-01-21 18:21:55 -08:00
|
|
|
do
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2011-01-04 21:59:41 -08:00
|
|
|
uint key= keyuse->key;
|
2010-02-16 00:53:06 +03:00
|
|
|
key_part_map bound_parts= 0;
|
2020-09-23 12:27:56 +03:00
|
|
|
if (!keyuse->is_for_hash_join() &&
|
|
|
|
(table->key_info[key].flags & HA_NOSAME))
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2020-09-23 12:27:56 +03:00
|
|
|
KEY *keyinfo= table->key_info + key;
|
2010-02-16 00:53:06 +03:00
|
|
|
do /* For all equalities on all key parts */
|
|
|
|
{
|
2019-10-15 12:24:23 +02:00
|
|
|
/*
|
|
|
|
Check if this is "t.keypart = expr(outer_tables)
|
|
|
|
|
|
|
|
Don't allow variants that can produce duplicates:
|
|
|
|
- Dont allow "ref or null"
|
|
|
|
- the keyuse (that is, the operation) must be null-rejecting,
|
|
|
|
unless the other expression is non-NULLable.
|
|
|
|
*/
|
2010-02-16 00:53:06 +03:00
|
|
|
if (!(keyuse->used_tables & sj_inner_tables) &&
|
2019-10-15 12:24:23 +02:00
|
|
|
!(keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL) &&
|
2020-08-02 12:31:14 +03:00
|
|
|
(keyuse->null_rejecting || !keyuse->val->maybe_null()))
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
|
|
|
bound_parts |= 1 << keyuse->keypart;
|
|
|
|
}
|
|
|
|
keyuse++;
|
|
|
|
} while (keyuse->key == key && keyuse->table == table);
|
|
|
|
|
2013-06-15 18:32:08 +03:00
|
|
|
if (bound_parts == PREV_BITS(uint, keyinfo->user_defined_key_parts))
|
2010-02-16 00:53:06 +03:00
|
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
do
|
|
|
|
{
|
|
|
|
keyuse++;
|
2011-01-21 18:21:55 -08:00
|
|
|
} while (keyuse->key == key && keyuse->table == table);
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
2011-01-21 18:21:55 -08:00
|
|
|
} while (keyuse->table == table);
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
2010-04-25 12:23:52 +04:00
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
/*
|
|
|
|
Do semi-join optimization step after we've added a new tab to join prefix
|
|
|
|
|
|
|
|
SYNOPSIS
|
2022-06-06 22:21:22 +03:00
|
|
|
optimize_semi_joins()
|
2010-02-16 00:53:06 +03:00
|
|
|
join The join we're optimizing
|
|
|
|
remaining_tables Tables not in the join prefix
|
|
|
|
new_join_tab Join tab we've just added to the join prefix
|
|
|
|
idx Index of this join tab (i.e. number of tables
|
|
|
|
in the prefix minus one)
|
|
|
|
current_record_count INOUT Estimate of #records in join prefix's output
|
|
|
|
current_read_time INOUT Cost to execute the join prefix
|
|
|
|
loose_scan_pos IN A POSITION with LooseScan plan to access
|
|
|
|
table new_join_tab
|
|
|
|
(produced by the last best_access_path call)
|
|
|
|
|
|
|
|
DESCRIPTION
|
|
|
|
Update semi-join optimization state after we've added another tab (table
|
|
|
|
and access method) to the join prefix.
|
|
|
|
|
|
|
|
The state is maintained in join->positions[#prefix_size]. Each of the
|
|
|
|
available strategies has its own state variables.
|
|
|
|
|
|
|
|
for each semi-join strategy
|
|
|
|
{
|
|
|
|
update strategy's state variables;
|
|
|
|
|
|
|
|
if (join prefix has all the tables that are needed to consider
|
|
|
|
using this strategy for the semi-join(s))
|
|
|
|
{
|
|
|
|
calculate cost of using the strategy
|
|
|
|
if ((this is the first strategy to handle the semi-join nest(s) ||
|
|
|
|
the cost is less than other strategies))
|
|
|
|
{
|
|
|
|
// Pick this strategy
|
|
|
|
pos->sj_strategy= ..
|
|
|
|
..
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Most of the new state is saved join->positions[idx] (and hence no undo
|
|
|
|
is necessary). Several members of class JOIN are updated also, these
|
|
|
|
changes can be rolled back with restore_prev_sj_state().
|
|
|
|
|
|
|
|
See setup_semijoin_dups_elimination() for a description of what kinds of
|
|
|
|
join prefixes each strategy can handle.
|
|
|
|
*/
|
|
|
|
|
2011-11-25 05:56:58 +04:00
|
|
|
bool is_multiple_semi_joins(JOIN *join, POSITION *prefix, uint idx, table_map inner_tables)
|
2011-11-23 04:25:52 +04:00
|
|
|
{
|
|
|
|
for (int i= (int)idx; i >= 0; i--)
|
|
|
|
{
|
|
|
|
TABLE_LIST *emb_sj_nest;
|
|
|
|
if ((emb_sj_nest= prefix[i].table->emb_sj_nest))
|
|
|
|
{
|
|
|
|
if (inner_tables & emb_sj_nest->sj_inner_tables)
|
2014-02-19 14:05:15 +04:00
|
|
|
return !MY_TEST(inner_tables == (emb_sj_nest->sj_inner_tables &
|
|
|
|
~join->const_table_map));
|
2011-11-23 04:25:52 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return FALSE;
|
|
|
|
}
|
2010-02-16 00:53:06 +03:00
|
|
|
|
2011-11-23 04:25:52 +04:00
|
|
|
|
2022-06-06 22:21:22 +03:00
|
|
|
void optimize_semi_joins(JOIN *join, table_map remaining_tables, uint idx,
|
|
|
|
double *current_record_count,
|
|
|
|
double *current_read_time, POSITION *loose_scan_pos)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
|
|
|
POSITION *pos= join->positions + idx;
|
2011-11-23 04:25:52 +04:00
|
|
|
const JOIN_TAB *new_join_tab= pos->table;
|
2011-10-12 13:19:37 +04:00
|
|
|
|
2019-09-10 23:51:42 +03:00
|
|
|
#ifdef HAVE_valgrind
|
|
|
|
new (&pos->firstmatch_picker) Firstmatch_picker;
|
|
|
|
new (&pos->loosescan_picker) LooseScan_picker;
|
|
|
|
new (&pos->sjmat_picker) Sj_materialization_picker;
|
|
|
|
new (&pos->dups_weedout_picker) Duplicate_weedout_picker;
|
|
|
|
#endif
|
|
|
|
|
2019-08-25 11:03:19 +03:00
|
|
|
if (join->emb_sjm_nest || //(1)
|
|
|
|
!join->select_lex->have_merged_subqueries) //(2)
|
2011-10-12 13:19:37 +04:00
|
|
|
{
|
2011-11-23 04:25:52 +04:00
|
|
|
/*
|
2019-08-25 11:03:19 +03:00
|
|
|
(1): We're performing optimization inside SJ-Materialization nest:
|
2011-11-23 04:25:52 +04:00
|
|
|
- there are no other semi-joins inside semi-join nests
|
|
|
|
- attempts to build semi-join strategies here will confuse
|
|
|
|
the optimizer, so bail out.
|
2019-08-25 11:03:19 +03:00
|
|
|
(2): Don't waste time on semi-join optimizations if we don't have any
|
|
|
|
semi-joins
|
2011-11-23 04:25:52 +04:00
|
|
|
*/
|
2011-11-25 05:56:58 +04:00
|
|
|
pos->sj_strategy= SJ_OPT_NONE;
|
2011-10-12 13:19:37 +04:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2019-08-25 11:03:19 +03:00
|
|
|
Semi_join_strategy_picker *pickers[]=
|
|
|
|
{
|
|
|
|
&pos->firstmatch_picker,
|
|
|
|
&pos->loosescan_picker,
|
|
|
|
&pos->sjmat_picker,
|
|
|
|
&pos->dups_weedout_picker,
|
|
|
|
NULL,
|
|
|
|
};
|
|
|
|
Json_writer_array trace_steps(join->thd, "semijoin_strategy_choice");
|
2011-11-23 04:25:52 +04:00
|
|
|
/*
|
|
|
|
Update join->cur_sj_inner_tables (Used by FirstMatch in this function and
|
|
|
|
LooseScan detector in best_access_path)
|
|
|
|
*/
|
|
|
|
remaining_tables &= ~new_join_tab->table->map;
|
2018-01-27 15:03:30 +02:00
|
|
|
table_map dups_producing_tables, UNINIT_VAR(prev_dups_producing_tables),
|
|
|
|
UNINIT_VAR(prev_sjm_lookup_tables);
|
2015-03-17 13:26:33 +03:00
|
|
|
|
|
|
|
if (idx == join->const_tables)
|
|
|
|
dups_producing_tables= 0;
|
|
|
|
else
|
|
|
|
dups_producing_tables= pos[-1].dups_producing_tables;
|
|
|
|
|
2011-11-23 04:25:52 +04:00
|
|
|
TABLE_LIST *emb_sj_nest;
|
|
|
|
if ((emb_sj_nest= new_join_tab->emb_sj_nest))
|
2015-03-17 13:26:33 +03:00
|
|
|
dups_producing_tables |= emb_sj_nest->sj_inner_tables;
|
2011-11-23 04:25:52 +04:00
|
|
|
|
2018-01-27 15:03:30 +02:00
|
|
|
Semi_join_strategy_picker **strategy, **prev_strategy= 0;
|
2010-02-16 00:53:06 +03:00
|
|
|
if (idx == join->const_tables)
|
|
|
|
{
|
2011-11-23 04:25:52 +04:00
|
|
|
/* First table, initialize pickers */
|
|
|
|
for (strategy= pickers; *strategy != NULL; strategy++)
|
|
|
|
(*strategy)->set_empty();
|
|
|
|
pos->inner_tables_handled_with_other_sjs= 0;
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2011-11-23 04:25:52 +04:00
|
|
|
for (strategy= pickers; *strategy != NULL; strategy++)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2011-11-23 04:25:52 +04:00
|
|
|
(*strategy)->set_from_prev(pos - 1);
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
2011-11-23 04:25:52 +04:00
|
|
|
pos->inner_tables_handled_with_other_sjs=
|
|
|
|
pos[-1].inner_tables_handled_with_other_sjs;
|
|
|
|
}
|
|
|
|
|
2022-06-03 13:24:18 +03:00
|
|
|
pos->prefix_cost= *current_read_time;
|
2011-11-23 04:25:52 +04:00
|
|
|
pos->prefix_record_count= *current_record_count;
|
|
|
|
|
|
|
|
{
|
|
|
|
pos->sj_strategy= SJ_OPT_NONE;
|
2010-02-16 00:53:06 +03:00
|
|
|
|
2011-11-23 04:25:52 +04:00
|
|
|
for (strategy= pickers; *strategy != NULL; strategy++)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2011-11-23 04:25:52 +04:00
|
|
|
table_map handled_fanout;
|
|
|
|
sj_strategy_enum sj_strategy;
|
|
|
|
double rec_count= *current_record_count;
|
|
|
|
double read_time= *current_read_time;
|
|
|
|
if ((*strategy)->check_qep(join, idx, remaining_tables,
|
|
|
|
new_join_tab,
|
|
|
|
&rec_count,
|
|
|
|
&read_time,
|
|
|
|
&handled_fanout,
|
|
|
|
&sj_strategy,
|
|
|
|
loose_scan_pos))
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
|
|
|
/*
|
2011-11-23 04:25:52 +04:00
|
|
|
It's possible to use the strategy. Use it, if
|
|
|
|
- it removes semi-join fanout that was not removed before
|
|
|
|
- using it is cheaper than using something else,
|
|
|
|
and {if some other strategy has removed fanout
|
|
|
|
that this strategy is trying to remove, then it
|
|
|
|
did remove the fanout only for one semi-join}
|
|
|
|
This is to avoid a situation when
|
|
|
|
1. strategy X removes fanout for semijoin X,Y
|
|
|
|
2. using strategy Z is cheaper, but it only removes
|
|
|
|
fanout from semijoin X.
|
|
|
|
3. We have no clue what to do about fanount of semi-join Y.
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
|
|
|
|
For the first iteration read_time will always be bigger than
|
|
|
|
*current_read_time (as the 'strategy' is an addition to the
|
|
|
|
chosen plan) . If a strategy was picked
|
|
|
|
(dusp_producing_tables & handled_fanout is true), then
|
|
|
|
*current_read_time is updated and the cost for the next
|
|
|
|
strategy can be smaller than *current_read_time.
|
2010-02-16 00:53:06 +03:00
|
|
|
*/
|
2015-03-17 13:26:33 +03:00
|
|
|
if ((dups_producing_tables & handled_fanout) ||
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
(read_time + COST_EPS < *current_read_time &&
|
2011-11-23 04:25:52 +04:00
|
|
|
!(handled_fanout & pos->inner_tables_handled_with_other_sjs)))
|
|
|
|
{
|
2017-10-06 17:52:35 +02:00
|
|
|
DBUG_ASSERT(pos->sj_strategy != sj_strategy);
|
|
|
|
/*
|
2020-12-20 21:07:38 +02:00
|
|
|
If the strategy chosen first time or
|
2017-10-06 17:52:35 +02:00
|
|
|
the strategy replace strategy which was used to exectly the same
|
|
|
|
tables
|
|
|
|
*/
|
|
|
|
if (pos->sj_strategy == SJ_OPT_NONE ||
|
|
|
|
handled_fanout ==
|
|
|
|
(prev_dups_producing_tables ^ dups_producing_tables))
|
|
|
|
{
|
|
|
|
prev_strategy= strategy;
|
|
|
|
if (pos->sj_strategy == SJ_OPT_NONE)
|
|
|
|
{
|
|
|
|
prev_dups_producing_tables= dups_producing_tables;
|
|
|
|
prev_sjm_lookup_tables= join->sjm_lookup_tables;
|
|
|
|
}
|
|
|
|
/* Mark strategy as used */
|
|
|
|
(*strategy)->mark_used();
|
|
|
|
pos->sj_strategy= sj_strategy;
|
|
|
|
if (sj_strategy == SJ_OPT_MATERIALIZE)
|
|
|
|
join->sjm_lookup_tables |= handled_fanout;
|
|
|
|
else
|
|
|
|
join->sjm_lookup_tables &= ~handled_fanout;
|
|
|
|
*current_read_time= read_time;
|
|
|
|
*current_record_count= rec_count;
|
|
|
|
dups_producing_tables &= ~handled_fanout;
|
2019-08-25 11:03:19 +03:00
|
|
|
|
2017-10-06 17:52:35 +02:00
|
|
|
//TODO: update bitmap of semi-joins that were handled together with
|
|
|
|
// others.
|
|
|
|
if (is_multiple_semi_joins(join, join->positions, idx,
|
|
|
|
handled_fanout))
|
|
|
|
pos->inner_tables_handled_with_other_sjs |= handled_fanout;
|
|
|
|
}
|
2013-02-07 21:46:02 -08:00
|
|
|
else
|
2017-10-06 17:52:35 +02:00
|
|
|
{
|
|
|
|
/* Conflict fall to most general variant */
|
|
|
|
(*prev_strategy)->set_empty();
|
|
|
|
dups_producing_tables= prev_dups_producing_tables;
|
|
|
|
join->sjm_lookup_tables= prev_sjm_lookup_tables;
|
|
|
|
// mark it 'none' to avpoid loops
|
|
|
|
pos->sj_strategy= SJ_OPT_NONE;
|
|
|
|
// next skip to last;
|
|
|
|
strategy= pickers +
|
|
|
|
(sizeof(pickers)/sizeof(Semi_join_strategy_picker*) - 3);
|
|
|
|
continue;
|
|
|
|
}
|
2011-11-23 04:25:52 +04:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* We decided not to apply the strategy. */
|
|
|
|
(*strategy)->set_empty();
|
|
|
|
}
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
}
|
2019-08-25 11:03:19 +03:00
|
|
|
|
|
|
|
if (unlikely(join->thd->trace_started() && pos->sj_strategy != SJ_OPT_NONE))
|
|
|
|
{
|
|
|
|
Json_writer_object tr(join->thd);
|
|
|
|
const char *sname;
|
|
|
|
switch (pos->sj_strategy) {
|
|
|
|
case SJ_OPT_MATERIALIZE:
|
2019-09-12 19:07:56 +03:00
|
|
|
sname= "SJ-Materialization";
|
2019-08-25 11:03:19 +03:00
|
|
|
break;
|
|
|
|
case SJ_OPT_MATERIALIZE_SCAN:
|
2019-09-12 19:07:56 +03:00
|
|
|
sname= "SJ-Materialization-Scan";
|
2019-08-25 11:03:19 +03:00
|
|
|
break;
|
|
|
|
case SJ_OPT_FIRST_MATCH:
|
|
|
|
sname= "FirstMatch";
|
|
|
|
break;
|
|
|
|
case SJ_OPT_DUPS_WEEDOUT:
|
|
|
|
sname= "DuplicateWeedout";
|
|
|
|
break;
|
2019-09-02 00:06:31 +05:30
|
|
|
case SJ_OPT_LOOSE_SCAN:
|
2019-09-02 11:21:52 +05:30
|
|
|
sname= "LooseScan";
|
2019-09-02 00:06:31 +05:30
|
|
|
break;
|
2019-08-25 11:03:19 +03:00
|
|
|
default:
|
|
|
|
DBUG_ASSERT(0);
|
|
|
|
sname="Invalid";
|
|
|
|
}
|
|
|
|
tr.add("chosen_strategy", sname);
|
|
|
|
}
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
|
2022-06-06 22:21:22 +03:00
|
|
|
update_sj_state(join, new_join_tab, idx, remaining_tables);
|
|
|
|
|
2022-06-03 13:24:18 +03:00
|
|
|
pos->prefix_cost= *current_read_time;
|
2022-06-06 22:21:22 +03:00
|
|
|
pos->prefix_record_count= *current_record_count;
|
|
|
|
pos->dups_producing_tables= dups_producing_tables;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Update JOIN's semi-join optimization state after the join tab new_tab
|
|
|
|
has been added into the join prefix.
|
|
|
|
|
|
|
|
@seealso restore_prev_sj_state() does the reverse actoion
|
|
|
|
*/
|
|
|
|
|
|
|
|
void update_sj_state(JOIN *join, const JOIN_TAB *new_tab,
|
|
|
|
uint idx, table_map remaining_tables)
|
|
|
|
{
|
2022-07-07 22:15:42 +03:00
|
|
|
DBUG_ASSERT(!join->emb_sjm_nest);
|
2022-06-06 22:21:22 +03:00
|
|
|
if (TABLE_LIST *emb_sj_nest= new_tab->emb_sj_nest)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
|
|
|
join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables;
|
|
|
|
|
|
|
|
/* Remove the sj_nest if all of its SJ-inner tables are in cur_table_map */
|
|
|
|
if (!(remaining_tables &
|
2022-06-06 22:21:22 +03:00
|
|
|
emb_sj_nest->sj_inner_tables & ~new_tab->table->map))
|
2010-02-16 00:53:06 +03:00
|
|
|
join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables;
|
|
|
|
}
|
2022-06-06 22:21:22 +03:00
|
|
|
#ifndef DBUG_OFF
|
|
|
|
join->dbug_verify_sj_inner_tables(idx + 1);
|
|
|
|
#endif
|
2011-11-23 04:25:52 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
Fix all warnings given by UBSAN
The easiest way to compile and test the server with UBSAN is to run:
./BUILD/compile-pentium64-ubsan
and then run mysql-test-run.
After this commit, one should be able to run this without any UBSAN
warnings. There is still a few compiler warnings that should be fixed
at some point, but these do not expose any real bugs.
The 'special' cases where we disable, suppress or circumvent UBSAN are:
- ref10 source (as here we intentionally do some shifts that UBSAN
complains about.
- x86 version of optimized int#korr() methods. UBSAN do not like unaligned
memory access of integers. Fixed by using byte_order_generic.h when
compiling with UBSAN
- We use smaller thread stack with ASAN and UBSAN, which forced me to
disable a few tests that prints the thread stack size.
- Verifying class types does not work for shared libraries. I added
suppression in mysql-test-run.pl for this case.
- Added '#ifdef WITH_UBSAN' when using integer arithmetic where it is
safe to have overflows (two cases, in item_func.cc).
Things fixed:
- Don't left shift signed values
(byte_order_generic.h, mysqltest.c, item_sum.cc and many more)
- Don't assign not non existing values to enum variables.
- Ensure that bool and enum values are properly initialized in
constructors. This was needed as UBSAN checks that these types has
correct values when one copies an object.
(gcalc_tools.h, ha_partition.cc, item_sum.cc, partition_element.h ...)
- Ensure we do not called handler functions on unallocated objects or
deleted objects.
(events.cc, sql_acl.cc).
- Fixed bugs in Item_sp::Item_sp() where we did not call constructor
on Query_arena object.
- Fixed several cast of objects to an incompatible class!
(Item.cc, Item_buff.cc, item_timefunc.cc, opt_subselect.cc, sql_acl.cc,
sql_select.cc ...)
- Ensure we do not do integer arithmetic that causes over or underflows.
This includes also ++ and -- of integers.
(Item_func.cc, Item_strfunc.cc, item_timefunc.cc, sql_base.cc ...)
- Added JSON_VALUE_UNITIALIZED to json_value_types and ensure that
value_type is initialized to this instead of to -1, which is not a valid
enum value for json_value_types.
- Ensure we do not call memcpy() when second argument could be null.
- Fixed that Item_func_str::make_empty_result() creates an empty string
instead of a null string (safer as it ensures we do not do arithmetic
on null strings).
Other things:
- Changed struct st_position to an OBJECT and added an initialization
function to it to ensure that we do not copy or use uninitialized
members. The change to a class was also motived that we used "struct
st_position" and POSITION randomly trough the code which was
confusing.
- Notably big rewrite in sql_acl.cc to avoid using deleted objects.
- Changed in sql_partition to use '^' instead of '-'. This is safe as
the operator is either 0 or 0x8000000000000000ULL.
- Added check for select_nr < INT_MAX in JOIN::build_explain() to
avoid bug when get_select() could return NULL.
- Reordered elements in POSITION for better alignment.
- Changed sql_test.cc::print_plan() to use pointers instead of objects.
- Fixed bug in find_set() where could could execute '1 << -1'.
- Added variable have_sanitizer, used by mtr. (This variable was before
only in 10.5 and up). It can now have one of two values:
ASAN or UBSAN.
- Moved ~Archive_share() from ha_archive.cc to ha_archive.h and marked
it virtual. This was an effort to get UBSAN to work with loaded storage
engines. I kept the change as the new place is better.
- Added in CONNECT engine COLBLK::SetName(), to get around a wrong cast
in tabutil.cpp.
- Added HAVE_REPLICATION around usage of rgi_slave, to get embedded
server to compile with UBSAN. (Patch from Marko).
- Added #ifdef for powerpc64 to avoid a bug in old gcc versions related
to integer arithmetic.
Changes that should not be needed but had to be done to suppress warnings
from UBSAN:
- Added static_cast<<uint16_t>> around shift to get rid of a LOT of
compiler warnings when using UBSAN.
- Had to change some '/' of 2 base integers to shift to get rid of
some compile time warnings.
Reviewed by:
- Json changes: Alexey Botchkov
- Charset changes in ctype-uca.c: Alexander Barkov
- InnoDB changes & Embedded server: Marko Mäkelä
- sql_acl.cc changes: Vicențiu Ciorbaru
- build_explain() changes: Sergey Petrunia
2021-04-18 15:29:13 +03:00
|
|
|
void Sj_materialization_picker::set_from_prev(POSITION *prev)
|
2011-11-23 04:25:52 +04:00
|
|
|
{
|
|
|
|
if (prev->sjmat_picker.is_used)
|
|
|
|
set_empty();
|
|
|
|
else
|
|
|
|
{
|
|
|
|
sjm_scan_need_tables= prev->sjmat_picker.sjm_scan_need_tables;
|
|
|
|
sjm_scan_last_inner= prev->sjmat_picker.sjm_scan_last_inner;
|
|
|
|
}
|
|
|
|
is_used= FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool Sj_materialization_picker::check_qep(JOIN *join,
|
|
|
|
uint idx,
|
|
|
|
table_map remaining_tables,
|
|
|
|
const JOIN_TAB *new_join_tab,
|
|
|
|
double *record_count,
|
|
|
|
double *read_time,
|
|
|
|
table_map *handled_fanout,
|
|
|
|
sj_strategy_enum *strategy,
|
|
|
|
POSITION *loose_scan_pos)
|
|
|
|
{
|
2010-02-16 00:53:06 +03:00
|
|
|
bool sjm_scan;
|
|
|
|
SJ_MATERIALIZATION_INFO *mat_info;
|
2019-02-18 17:11:20 +05:30
|
|
|
THD *thd= join->thd;
|
2010-02-16 00:53:06 +03:00
|
|
|
if ((mat_info= at_sjmat_pos(join, remaining_tables,
|
|
|
|
new_join_tab, idx, &sjm_scan)))
|
|
|
|
{
|
|
|
|
if (sjm_scan)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
We can't yet evaluate this option yet. This is because we can't
|
|
|
|
accout for fanout of sj-inner tables yet:
|
|
|
|
|
|
|
|
ntX SJM-SCAN(it1 ... itN) | ot1 ... otN |
|
|
|
|
^(1) ^(2)
|
|
|
|
|
|
|
|
we're now at position (1). SJM temptable in general has multiple
|
|
|
|
records, so at point (1) we'll get the fanout from sj-inner tables (ie
|
|
|
|
there will be multiple record combinations).
|
|
|
|
|
|
|
|
The final join result will not contain any semi-join produced
|
|
|
|
fanout, i.e. tables within SJM-SCAN(...) will not contribute to
|
|
|
|
the cardinality of the join output. Extra fanout produced by
|
|
|
|
SJM-SCAN(...) will be 'absorbed' into fanout produced by ot1 ... otN.
|
|
|
|
|
|
|
|
The simple way to model this is to remove SJM-SCAN(...) fanout once
|
|
|
|
we reach the point #2.
|
|
|
|
*/
|
2011-11-23 04:25:52 +04:00
|
|
|
sjm_scan_need_tables=
|
2010-02-16 00:53:06 +03:00
|
|
|
new_join_tab->emb_sj_nest->sj_inner_tables |
|
|
|
|
new_join_tab->emb_sj_nest->nested_join->sj_depends_on |
|
|
|
|
new_join_tab->emb_sj_nest->nested_join->sj_corr_tables;
|
2011-11-23 04:25:52 +04:00
|
|
|
sjm_scan_last_inner= idx;
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* This is SJ-Materialization with lookups */
|
2022-06-03 13:24:18 +03:00
|
|
|
double prefix_cost;
|
2010-02-16 00:53:06 +03:00
|
|
|
signed int first_tab= (int)idx - mat_info->tables;
|
2022-06-03 13:24:18 +03:00
|
|
|
double prefix_rec_count, mat_read_time;
|
2020-03-06 10:33:11 +02:00
|
|
|
Json_writer_object trace(join->thd);
|
|
|
|
trace.add("strategy", "SJ-Materialization");
|
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
if (first_tab < (int)join->const_tables)
|
|
|
|
{
|
2022-06-03 13:24:18 +03:00
|
|
|
prefix_cost= 0;
|
2010-02-16 00:53:06 +03:00
|
|
|
prefix_rec_count= 1.0;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
prefix_cost= join->positions[first_tab].prefix_cost;
|
|
|
|
prefix_rec_count= join->positions[first_tab].prefix_record_count;
|
|
|
|
}
|
|
|
|
|
2019-05-27 19:08:00 -07:00
|
|
|
mat_read_time=
|
2022-06-03 13:24:18 +03:00
|
|
|
COST_ADD(prefix_cost,
|
2022-09-30 17:10:37 +03:00
|
|
|
COST_ADD(mat_info->materialization_cost,
|
2019-05-27 19:08:00 -07:00
|
|
|
COST_MULT(prefix_rec_count,
|
2022-09-30 17:10:37 +03:00
|
|
|
mat_info->lookup_cost)));
|
2010-02-16 00:53:06 +03:00
|
|
|
|
2011-11-23 04:25:52 +04:00
|
|
|
/*
|
|
|
|
NOTE: When we pick to use SJM[-Scan] we don't memcpy its POSITION
|
|
|
|
elements to join->positions as that makes it hard to return things
|
|
|
|
back when making one step back in join optimization. That's done
|
|
|
|
after the QEP has been chosen.
|
|
|
|
*/
|
|
|
|
*read_time= mat_read_time;
|
|
|
|
*record_count= prefix_rec_count;
|
|
|
|
*handled_fanout= new_join_tab->emb_sj_nest->sj_inner_tables;
|
|
|
|
*strategy= SJ_OPT_MATERIALIZE;
|
2020-03-06 10:33:11 +02:00
|
|
|
if (unlikely(trace.trace_started()))
|
2019-08-25 11:03:19 +03:00
|
|
|
{
|
2022-01-20 15:49:01 +02:00
|
|
|
trace.
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
add("rows", *record_count).
|
2022-01-20 15:49:01 +02:00
|
|
|
add("cost", *read_time);
|
2019-08-25 11:03:19 +03:00
|
|
|
}
|
2011-11-23 04:25:52 +04:00
|
|
|
return TRUE;
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* 4.A SJM-Scan second phase check */
|
2011-11-23 04:25:52 +04:00
|
|
|
if (sjm_scan_need_tables && /* Have SJM-Scan prefix */
|
|
|
|
!(sjm_scan_need_tables & remaining_tables))
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2019-08-25 11:03:19 +03:00
|
|
|
Json_writer_object trace(join->thd);
|
|
|
|
trace.add("strategy", "SJ-Materialization-Scan");
|
2010-02-16 00:53:06 +03:00
|
|
|
TABLE_LIST *mat_nest=
|
2011-11-23 04:25:52 +04:00
|
|
|
join->positions[sjm_scan_last_inner].table->emb_sj_nest;
|
2010-02-16 00:53:06 +03:00
|
|
|
SJ_MATERIALIZATION_INFO *mat_info= mat_nest->sj_mat_info;
|
|
|
|
|
|
|
|
double prefix_cost;
|
|
|
|
double prefix_rec_count;
|
2011-11-23 04:25:52 +04:00
|
|
|
int first_tab= sjm_scan_last_inner + 1 - mat_info->tables;
|
2010-02-16 00:53:06 +03:00
|
|
|
/* Get the prefix cost */
|
|
|
|
if (first_tab == (int)join->const_tables)
|
|
|
|
{
|
|
|
|
prefix_rec_count= 1.0;
|
|
|
|
prefix_cost= 0.0;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2022-06-03 13:24:18 +03:00
|
|
|
prefix_cost= join->positions[first_tab - 1].prefix_cost;
|
2010-02-16 00:53:06 +03:00
|
|
|
prefix_rec_count= join->positions[first_tab - 1].prefix_record_count;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Add materialization cost */
|
2019-05-27 19:08:00 -07:00
|
|
|
prefix_cost=
|
|
|
|
COST_ADD(prefix_cost,
|
2022-09-30 17:10:37 +03:00
|
|
|
COST_ADD(mat_info->materialization_cost,
|
2019-05-27 19:08:00 -07:00
|
|
|
COST_MULT(prefix_rec_count,
|
2022-09-30 17:10:37 +03:00
|
|
|
mat_info->scan_cost)));
|
2019-05-27 19:08:00 -07:00
|
|
|
prefix_rec_count= COST_MULT(prefix_rec_count, mat_info->rows);
|
2010-02-16 00:53:06 +03:00
|
|
|
|
|
|
|
uint i;
|
|
|
|
table_map rem_tables= remaining_tables;
|
|
|
|
for (i= idx; i != (first_tab + mat_info->tables - 1); i--)
|
|
|
|
rem_tables |= join->positions[i].table->table->map;
|
|
|
|
|
|
|
|
POSITION curpos, dummy;
|
|
|
|
/* Need to re-run best-access-path as we prefix_rec_count has changed */
|
2011-11-23 04:25:52 +04:00
|
|
|
bool disable_jbuf= (join->thd->variables.join_cache_level == 0);
|
2019-02-18 17:11:20 +05:30
|
|
|
Json_writer_temp_disable trace_semijoin_mat_scan(thd);
|
2010-02-16 00:53:06 +03:00
|
|
|
for (i= first_tab + mat_info->tables; i <= idx; i++)
|
|
|
|
{
|
2019-09-10 23:51:42 +03:00
|
|
|
best_access_path(join, join->positions[i].table, rem_tables,
|
|
|
|
join->positions, i,
|
2010-05-26 13:18:18 -07:00
|
|
|
disable_jbuf, prefix_rec_count, &curpos, &dummy);
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
prefix_rec_count= COST_MULT(prefix_rec_count, curpos.records_out);
|
2019-05-27 19:08:00 -07:00
|
|
|
prefix_cost= COST_ADD(prefix_cost, curpos.read_time);
|
2019-08-29 15:37:49 +03:00
|
|
|
//TODO: take into account join condition selectivity here
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
|
2011-11-23 04:25:52 +04:00
|
|
|
*strategy= SJ_OPT_MATERIALIZE_SCAN;
|
|
|
|
*read_time= prefix_cost;
|
2019-11-13 18:53:59 +03:00
|
|
|
/*
|
|
|
|
Note: the next line means we did not remove the subquery's fanout from
|
|
|
|
*record_count. It needs to be removed, as the join prefix is
|
|
|
|
|
|
|
|
ntX SJM-SCAN(it1 ... itN) | (ot1 ... otN) ...
|
|
|
|
|
|
|
|
here, the SJM-SCAN may have introduced subquery's fanout (duplicate rows,
|
|
|
|
rows that don't have matches in ot1_i). All this fanout is gone after
|
|
|
|
table otN (or earlier) but taking it into account is hard.
|
|
|
|
|
|
|
|
Some consolation here is that SJM-Scan strategy is applicable when the
|
|
|
|
subquery is smaller than tables otX. If the subquery has large cardinality,
|
|
|
|
we can greatly overestimate *record_count here, but it doesn't matter as
|
|
|
|
SJ-Materialization-Lookup is a better strategy anyway.
|
|
|
|
*/
|
|
|
|
*record_count= prefix_rec_count;
|
2011-11-23 04:25:52 +04:00
|
|
|
*handled_fanout= mat_nest->sj_inner_tables;
|
2020-03-06 10:33:11 +02:00
|
|
|
if (unlikely(trace.trace_started()))
|
2019-08-25 11:03:19 +03:00
|
|
|
{
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
trace.
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
add("rows", *record_count).
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
add("cost", *read_time);
|
2019-08-25 11:03:19 +03:00
|
|
|
}
|
2011-11-23 04:25:52 +04:00
|
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
Fix all warnings given by UBSAN
The easiest way to compile and test the server with UBSAN is to run:
./BUILD/compile-pentium64-ubsan
and then run mysql-test-run.
After this commit, one should be able to run this without any UBSAN
warnings. There is still a few compiler warnings that should be fixed
at some point, but these do not expose any real bugs.
The 'special' cases where we disable, suppress or circumvent UBSAN are:
- ref10 source (as here we intentionally do some shifts that UBSAN
complains about.
- x86 version of optimized int#korr() methods. UBSAN do not like unaligned
memory access of integers. Fixed by using byte_order_generic.h when
compiling with UBSAN
- We use smaller thread stack with ASAN and UBSAN, which forced me to
disable a few tests that prints the thread stack size.
- Verifying class types does not work for shared libraries. I added
suppression in mysql-test-run.pl for this case.
- Added '#ifdef WITH_UBSAN' when using integer arithmetic where it is
safe to have overflows (two cases, in item_func.cc).
Things fixed:
- Don't left shift signed values
(byte_order_generic.h, mysqltest.c, item_sum.cc and many more)
- Don't assign not non existing values to enum variables.
- Ensure that bool and enum values are properly initialized in
constructors. This was needed as UBSAN checks that these types has
correct values when one copies an object.
(gcalc_tools.h, ha_partition.cc, item_sum.cc, partition_element.h ...)
- Ensure we do not called handler functions on unallocated objects or
deleted objects.
(events.cc, sql_acl.cc).
- Fixed bugs in Item_sp::Item_sp() where we did not call constructor
on Query_arena object.
- Fixed several cast of objects to an incompatible class!
(Item.cc, Item_buff.cc, item_timefunc.cc, opt_subselect.cc, sql_acl.cc,
sql_select.cc ...)
- Ensure we do not do integer arithmetic that causes over or underflows.
This includes also ++ and -- of integers.
(Item_func.cc, Item_strfunc.cc, item_timefunc.cc, sql_base.cc ...)
- Added JSON_VALUE_UNITIALIZED to json_value_types and ensure that
value_type is initialized to this instead of to -1, which is not a valid
enum value for json_value_types.
- Ensure we do not call memcpy() when second argument could be null.
- Fixed that Item_func_str::make_empty_result() creates an empty string
instead of a null string (safer as it ensures we do not do arithmetic
on null strings).
Other things:
- Changed struct st_position to an OBJECT and added an initialization
function to it to ensure that we do not copy or use uninitialized
members. The change to a class was also motived that we used "struct
st_position" and POSITION randomly trough the code which was
confusing.
- Notably big rewrite in sql_acl.cc to avoid using deleted objects.
- Changed in sql_partition to use '^' instead of '-'. This is safe as
the operator is either 0 or 0x8000000000000000ULL.
- Added check for select_nr < INT_MAX in JOIN::build_explain() to
avoid bug when get_select() could return NULL.
- Reordered elements in POSITION for better alignment.
- Changed sql_test.cc::print_plan() to use pointers instead of objects.
- Fixed bug in find_set() where could could execute '1 << -1'.
- Added variable have_sanitizer, used by mtr. (This variable was before
only in 10.5 and up). It can now have one of two values:
ASAN or UBSAN.
- Moved ~Archive_share() from ha_archive.cc to ha_archive.h and marked
it virtual. This was an effort to get UBSAN to work with loaded storage
engines. I kept the change as the new place is better.
- Added in CONNECT engine COLBLK::SetName(), to get around a wrong cast
in tabutil.cpp.
- Added HAVE_REPLICATION around usage of rgi_slave, to get embedded
server to compile with UBSAN. (Patch from Marko).
- Added #ifdef for powerpc64 to avoid a bug in old gcc versions related
to integer arithmetic.
Changes that should not be needed but had to be done to suppress warnings
from UBSAN:
- Added static_cast<<uint16_t>> around shift to get rid of a LOT of
compiler warnings when using UBSAN.
- Had to change some '/' of 2 base integers to shift to get rid of
some compile time warnings.
Reviewed by:
- Json changes: Alexey Botchkov
- Charset changes in ctype-uca.c: Alexander Barkov
- InnoDB changes & Embedded server: Marko Mäkelä
- sql_acl.cc changes: Vicențiu Ciorbaru
- build_explain() changes: Sergey Petrunia
2021-04-18 15:29:13 +03:00
|
|
|
void LooseScan_picker::set_from_prev(POSITION *prev)
|
2011-11-23 04:25:52 +04:00
|
|
|
{
|
|
|
|
if (prev->loosescan_picker.is_used)
|
|
|
|
set_empty();
|
|
|
|
else
|
|
|
|
{
|
|
|
|
first_loosescan_table= prev->loosescan_picker.first_loosescan_table;
|
|
|
|
loosescan_need_tables= prev->loosescan_picker.loosescan_need_tables;
|
|
|
|
}
|
|
|
|
is_used= FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool LooseScan_picker::check_qep(JOIN *join,
|
|
|
|
uint idx,
|
|
|
|
table_map remaining_tables,
|
|
|
|
const JOIN_TAB *new_join_tab,
|
|
|
|
double *record_count,
|
|
|
|
double *read_time,
|
|
|
|
table_map *handled_fanout,
|
|
|
|
sj_strategy_enum *strategy,
|
Fix all warnings given by UBSAN
The easiest way to compile and test the server with UBSAN is to run:
./BUILD/compile-pentium64-ubsan
and then run mysql-test-run.
After this commit, one should be able to run this without any UBSAN
warnings. There is still a few compiler warnings that should be fixed
at some point, but these do not expose any real bugs.
The 'special' cases where we disable, suppress or circumvent UBSAN are:
- ref10 source (as here we intentionally do some shifts that UBSAN
complains about.
- x86 version of optimized int#korr() methods. UBSAN do not like unaligned
memory access of integers. Fixed by using byte_order_generic.h when
compiling with UBSAN
- We use smaller thread stack with ASAN and UBSAN, which forced me to
disable a few tests that prints the thread stack size.
- Verifying class types does not work for shared libraries. I added
suppression in mysql-test-run.pl for this case.
- Added '#ifdef WITH_UBSAN' when using integer arithmetic where it is
safe to have overflows (two cases, in item_func.cc).
Things fixed:
- Don't left shift signed values
(byte_order_generic.h, mysqltest.c, item_sum.cc and many more)
- Don't assign not non existing values to enum variables.
- Ensure that bool and enum values are properly initialized in
constructors. This was needed as UBSAN checks that these types has
correct values when one copies an object.
(gcalc_tools.h, ha_partition.cc, item_sum.cc, partition_element.h ...)
- Ensure we do not called handler functions on unallocated objects or
deleted objects.
(events.cc, sql_acl.cc).
- Fixed bugs in Item_sp::Item_sp() where we did not call constructor
on Query_arena object.
- Fixed several cast of objects to an incompatible class!
(Item.cc, Item_buff.cc, item_timefunc.cc, opt_subselect.cc, sql_acl.cc,
sql_select.cc ...)
- Ensure we do not do integer arithmetic that causes over or underflows.
This includes also ++ and -- of integers.
(Item_func.cc, Item_strfunc.cc, item_timefunc.cc, sql_base.cc ...)
- Added JSON_VALUE_UNITIALIZED to json_value_types and ensure that
value_type is initialized to this instead of to -1, which is not a valid
enum value for json_value_types.
- Ensure we do not call memcpy() when second argument could be null.
- Fixed that Item_func_str::make_empty_result() creates an empty string
instead of a null string (safer as it ensures we do not do arithmetic
on null strings).
Other things:
- Changed struct st_position to an OBJECT and added an initialization
function to it to ensure that we do not copy or use uninitialized
members. The change to a class was also motived that we used "struct
st_position" and POSITION randomly trough the code which was
confusing.
- Notably big rewrite in sql_acl.cc to avoid using deleted objects.
- Changed in sql_partition to use '^' instead of '-'. This is safe as
the operator is either 0 or 0x8000000000000000ULL.
- Added check for select_nr < INT_MAX in JOIN::build_explain() to
avoid bug when get_select() could return NULL.
- Reordered elements in POSITION for better alignment.
- Changed sql_test.cc::print_plan() to use pointers instead of objects.
- Fixed bug in find_set() where could could execute '1 << -1'.
- Added variable have_sanitizer, used by mtr. (This variable was before
only in 10.5 and up). It can now have one of two values:
ASAN or UBSAN.
- Moved ~Archive_share() from ha_archive.cc to ha_archive.h and marked
it virtual. This was an effort to get UBSAN to work with loaded storage
engines. I kept the change as the new place is better.
- Added in CONNECT engine COLBLK::SetName(), to get around a wrong cast
in tabutil.cpp.
- Added HAVE_REPLICATION around usage of rgi_slave, to get embedded
server to compile with UBSAN. (Patch from Marko).
- Added #ifdef for powerpc64 to avoid a bug in old gcc versions related
to integer arithmetic.
Changes that should not be needed but had to be done to suppress warnings
from UBSAN:
- Added static_cast<<uint16_t>> around shift to get rid of a LOT of
compiler warnings when using UBSAN.
- Had to change some '/' of 2 base integers to shift to get rid of
some compile time warnings.
Reviewed by:
- Json changes: Alexey Botchkov
- Charset changes in ctype-uca.c: Alexander Barkov
- InnoDB changes & Embedded server: Marko Mäkelä
- sql_acl.cc changes: Vicențiu Ciorbaru
- build_explain() changes: Sergey Petrunia
2021-04-18 15:29:13 +03:00
|
|
|
POSITION *loose_scan_pos)
|
2011-11-23 04:25:52 +04:00
|
|
|
{
|
|
|
|
POSITION *first= join->positions + first_loosescan_table;
|
|
|
|
/*
|
|
|
|
LooseScan strategy can't handle interleaving between tables from the
|
|
|
|
semi-join that LooseScan is handling and any other tables.
|
|
|
|
|
|
|
|
If we were considering LooseScan for the join prefix (1)
|
|
|
|
and the table we're adding creates an interleaving (2)
|
|
|
|
then
|
|
|
|
stop considering loose scan
|
|
|
|
*/
|
|
|
|
if ((first_loosescan_table != MAX_TABLES) && // (1)
|
|
|
|
(first->table->emb_sj_nest->sj_inner_tables & remaining_tables) && //(2)
|
|
|
|
new_join_tab->emb_sj_nest != first->table->emb_sj_nest) //(2)
|
|
|
|
{
|
|
|
|
first_loosescan_table= MAX_TABLES;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
If we got an option to use LooseScan for the current table, start
|
|
|
|
considering using LooseScan strategy
|
|
|
|
*/
|
|
|
|
if (loose_scan_pos->read_time != DBL_MAX && !join->outer_join)
|
|
|
|
{
|
|
|
|
first_loosescan_table= idx;
|
|
|
|
loosescan_need_tables=
|
|
|
|
new_join_tab->emb_sj_nest->sj_inner_tables |
|
|
|
|
new_join_tab->emb_sj_nest->nested_join->sj_depends_on |
|
|
|
|
new_join_tab->emb_sj_nest->nested_join->sj_corr_tables;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((first_loosescan_table != MAX_TABLES) &&
|
|
|
|
!(remaining_tables & loosescan_need_tables) &&
|
|
|
|
(new_join_tab->table->map & loosescan_need_tables))
|
|
|
|
{
|
2019-08-25 11:03:19 +03:00
|
|
|
Json_writer_object trace(join->thd);
|
2019-09-12 19:07:56 +03:00
|
|
|
trace.add("strategy", "LooseScan");
|
2011-11-23 04:25:52 +04:00
|
|
|
/*
|
|
|
|
Ok we have LooseScan plan and also have all LooseScan sj-nest's
|
|
|
|
inner tables and outer correlated tables into the prefix.
|
|
|
|
*/
|
|
|
|
|
|
|
|
first= join->positions + first_loosescan_table;
|
|
|
|
uint n_tables= my_count_bits(first->table->emb_sj_nest->sj_inner_tables);
|
|
|
|
/* Got a complete LooseScan range. Calculate its cost */
|
2010-02-16 00:53:06 +03:00
|
|
|
/*
|
2011-11-23 04:25:52 +04:00
|
|
|
The same problem as with FirstMatch - we need to save POSITIONs
|
|
|
|
somewhere but reserving space for all cases would require too
|
|
|
|
much space. We will re-calculate POSITION structures later on.
|
2010-02-16 00:53:06 +03:00
|
|
|
*/
|
2011-11-23 04:25:52 +04:00
|
|
|
bool disable_jbuf= (join->thd->variables.join_cache_level == 0);
|
|
|
|
optimize_wo_join_buffering(join, first_loosescan_table, idx,
|
|
|
|
remaining_tables,
|
|
|
|
TRUE, //first_alt
|
|
|
|
disable_jbuf ? join->table_count :
|
|
|
|
first_loosescan_table + n_tables,
|
|
|
|
record_count,
|
|
|
|
read_time);
|
|
|
|
/*
|
|
|
|
We don't yet have any other strategies that could handle this
|
|
|
|
semi-join nest (the other options are Duplicate Elimination or
|
|
|
|
Materialization, which need at least the same set of tables in
|
|
|
|
the join prefix to be considered) so unconditionally pick the
|
|
|
|
LooseScan.
|
|
|
|
*/
|
|
|
|
*strategy= SJ_OPT_LOOSE_SCAN;
|
|
|
|
*handled_fanout= first->table->emb_sj_nest->sj_inner_tables;
|
2020-03-06 10:33:11 +02:00
|
|
|
if (unlikely(trace.trace_started()))
|
2019-08-25 11:03:19 +03:00
|
|
|
{
|
2022-01-20 15:49:01 +02:00
|
|
|
trace.
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
add("rows", *record_count).
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
add("cost", *read_time);
|
2019-08-25 11:03:19 +03:00
|
|
|
}
|
2011-11-23 04:25:52 +04:00
|
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
return FALSE;
|
|
|
|
}
|
2010-02-16 00:53:06 +03:00
|
|
|
|
Fix all warnings given by UBSAN
The easiest way to compile and test the server with UBSAN is to run:
./BUILD/compile-pentium64-ubsan
and then run mysql-test-run.
After this commit, one should be able to run this without any UBSAN
warnings. There is still a few compiler warnings that should be fixed
at some point, but these do not expose any real bugs.
The 'special' cases where we disable, suppress or circumvent UBSAN are:
- ref10 source (as here we intentionally do some shifts that UBSAN
complains about.
- x86 version of optimized int#korr() methods. UBSAN do not like unaligned
memory access of integers. Fixed by using byte_order_generic.h when
compiling with UBSAN
- We use smaller thread stack with ASAN and UBSAN, which forced me to
disable a few tests that prints the thread stack size.
- Verifying class types does not work for shared libraries. I added
suppression in mysql-test-run.pl for this case.
- Added '#ifdef WITH_UBSAN' when using integer arithmetic where it is
safe to have overflows (two cases, in item_func.cc).
Things fixed:
- Don't left shift signed values
(byte_order_generic.h, mysqltest.c, item_sum.cc and many more)
- Don't assign not non existing values to enum variables.
- Ensure that bool and enum values are properly initialized in
constructors. This was needed as UBSAN checks that these types has
correct values when one copies an object.
(gcalc_tools.h, ha_partition.cc, item_sum.cc, partition_element.h ...)
- Ensure we do not called handler functions on unallocated objects or
deleted objects.
(events.cc, sql_acl.cc).
- Fixed bugs in Item_sp::Item_sp() where we did not call constructor
on Query_arena object.
- Fixed several cast of objects to an incompatible class!
(Item.cc, Item_buff.cc, item_timefunc.cc, opt_subselect.cc, sql_acl.cc,
sql_select.cc ...)
- Ensure we do not do integer arithmetic that causes over or underflows.
This includes also ++ and -- of integers.
(Item_func.cc, Item_strfunc.cc, item_timefunc.cc, sql_base.cc ...)
- Added JSON_VALUE_UNITIALIZED to json_value_types and ensure that
value_type is initialized to this instead of to -1, which is not a valid
enum value for json_value_types.
- Ensure we do not call memcpy() when second argument could be null.
- Fixed that Item_func_str::make_empty_result() creates an empty string
instead of a null string (safer as it ensures we do not do arithmetic
on null strings).
Other things:
- Changed struct st_position to an OBJECT and added an initialization
function to it to ensure that we do not copy or use uninitialized
members. The change to a class was also motived that we used "struct
st_position" and POSITION randomly trough the code which was
confusing.
- Notably big rewrite in sql_acl.cc to avoid using deleted objects.
- Changed in sql_partition to use '^' instead of '-'. This is safe as
the operator is either 0 or 0x8000000000000000ULL.
- Added check for select_nr < INT_MAX in JOIN::build_explain() to
avoid bug when get_select() could return NULL.
- Reordered elements in POSITION for better alignment.
- Changed sql_test.cc::print_plan() to use pointers instead of objects.
- Fixed bug in find_set() where could could execute '1 << -1'.
- Added variable have_sanitizer, used by mtr. (This variable was before
only in 10.5 and up). It can now have one of two values:
ASAN or UBSAN.
- Moved ~Archive_share() from ha_archive.cc to ha_archive.h and marked
it virtual. This was an effort to get UBSAN to work with loaded storage
engines. I kept the change as the new place is better.
- Added in CONNECT engine COLBLK::SetName(), to get around a wrong cast
in tabutil.cpp.
- Added HAVE_REPLICATION around usage of rgi_slave, to get embedded
server to compile with UBSAN. (Patch from Marko).
- Added #ifdef for powerpc64 to avoid a bug in old gcc versions related
to integer arithmetic.
Changes that should not be needed but had to be done to suppress warnings
from UBSAN:
- Added static_cast<<uint16_t>> around shift to get rid of a LOT of
compiler warnings when using UBSAN.
- Had to change some '/' of 2 base integers to shift to get rid of
some compile time warnings.
Reviewed by:
- Json changes: Alexey Botchkov
- Charset changes in ctype-uca.c: Alexander Barkov
- InnoDB changes & Embedded server: Marko Mäkelä
- sql_acl.cc changes: Vicențiu Ciorbaru
- build_explain() changes: Sergey Petrunia
2021-04-18 15:29:13 +03:00
|
|
|
void Firstmatch_picker::set_from_prev(POSITION *prev)
|
2011-11-23 04:25:52 +04:00
|
|
|
{
|
|
|
|
if (prev->firstmatch_picker.is_used)
|
|
|
|
invalidate_firstmatch_prefix();
|
|
|
|
else
|
|
|
|
{
|
|
|
|
first_firstmatch_table= prev->firstmatch_picker.first_firstmatch_table;
|
|
|
|
first_firstmatch_rtbl= prev->firstmatch_picker.first_firstmatch_rtbl;
|
|
|
|
firstmatch_need_tables= prev->firstmatch_picker.firstmatch_need_tables;
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
2011-11-23 04:25:52 +04:00
|
|
|
is_used= FALSE;
|
|
|
|
}
|
2010-02-16 00:53:06 +03:00
|
|
|
|
2011-11-23 04:25:52 +04:00
|
|
|
bool Firstmatch_picker::check_qep(JOIN *join,
|
|
|
|
uint idx,
|
|
|
|
table_map remaining_tables,
|
|
|
|
const JOIN_TAB *new_join_tab,
|
|
|
|
double *record_count,
|
|
|
|
double *read_time,
|
|
|
|
table_map *handled_fanout,
|
|
|
|
sj_strategy_enum *strategy,
|
|
|
|
POSITION *loose_scan_pos)
|
|
|
|
{
|
|
|
|
if (new_join_tab->emb_sj_nest &&
|
|
|
|
optimizer_flag(join->thd, OPTIMIZER_SWITCH_FIRSTMATCH) &&
|
|
|
|
!join->outer_join)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2011-11-23 04:25:52 +04:00
|
|
|
const table_map outer_corr_tables=
|
|
|
|
new_join_tab->emb_sj_nest->nested_join->sj_corr_tables |
|
|
|
|
new_join_tab->emb_sj_nest->nested_join->sj_depends_on;
|
|
|
|
const table_map sj_inner_tables=
|
|
|
|
new_join_tab->emb_sj_nest->sj_inner_tables & ~join->const_table_map;
|
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
/*
|
2011-11-23 04:25:52 +04:00
|
|
|
Enter condition:
|
|
|
|
1. The next join tab belongs to semi-join nest
|
|
|
|
(verified for the encompassing code block above).
|
|
|
|
2. We're not in a duplicate producer range yet
|
|
|
|
3. All outer tables that
|
|
|
|
- the subquery is correlated with, or
|
|
|
|
- referred to from the outer_expr
|
|
|
|
are in the join prefix
|
|
|
|
4. All inner tables are still part of remaining_tables.
|
2010-02-16 00:53:06 +03:00
|
|
|
*/
|
2011-11-23 04:25:52 +04:00
|
|
|
if (!join->cur_sj_inner_tables && // (2)
|
|
|
|
!(remaining_tables & outer_corr_tables) && // (3)
|
|
|
|
(sj_inner_tables == // (4)
|
|
|
|
((remaining_tables | new_join_tab->table->map) & sj_inner_tables)))
|
2011-07-21 19:14:34 +04:00
|
|
|
{
|
2011-11-23 04:25:52 +04:00
|
|
|
/* Start tracking potential FirstMatch range */
|
|
|
|
first_firstmatch_table= idx;
|
|
|
|
firstmatch_need_tables= sj_inner_tables;
|
|
|
|
first_firstmatch_rtbl= remaining_tables;
|
2011-07-21 19:14:34 +04:00
|
|
|
}
|
2010-02-16 00:53:06 +03:00
|
|
|
|
2011-11-23 04:25:52 +04:00
|
|
|
if (in_firstmatch_prefix())
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2011-11-23 04:25:52 +04:00
|
|
|
if (outer_corr_tables & first_firstmatch_rtbl)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2011-11-23 04:25:52 +04:00
|
|
|
/*
|
|
|
|
Trying to add an sj-inner table whose sj-nest has an outer correlated
|
|
|
|
table that was not in the prefix. This means FirstMatch can't be used.
|
|
|
|
*/
|
|
|
|
invalidate_firstmatch_prefix();
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2011-11-23 04:25:52 +04:00
|
|
|
/* Record that we need all of this semi-join's inner tables, too */
|
|
|
|
firstmatch_need_tables|= sj_inner_tables;
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
2011-11-23 04:25:52 +04:00
|
|
|
|
|
|
|
if (in_firstmatch_prefix() &&
|
|
|
|
!(firstmatch_need_tables & remaining_tables))
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2019-08-25 11:03:19 +03:00
|
|
|
Json_writer_object trace(join->thd);
|
|
|
|
trace.add("strategy", "FirstMatch");
|
2011-11-23 04:25:52 +04:00
|
|
|
/*
|
2011-12-08 04:22:38 +04:00
|
|
|
Got a complete FirstMatch range. Calculate correct costs and fanout
|
2011-11-23 04:25:52 +04:00
|
|
|
*/
|
2011-12-08 04:22:38 +04:00
|
|
|
|
|
|
|
if (idx == first_firstmatch_table &&
|
|
|
|
optimizer_flag(join->thd, OPTIMIZER_SWITCH_SEMIJOIN_WITH_CACHE))
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2011-12-08 04:22:38 +04:00
|
|
|
/*
|
2021-10-06 12:31:19 +03:00
|
|
|
An important special case: only one inner table, and
|
|
|
|
@@optimizer_switch allows join buffering.
|
2011-12-08 04:22:38 +04:00
|
|
|
- read_time is the same (i.e. FirstMatch doesn't add any cost
|
2023-03-07 11:25:16 +02:00
|
|
|
- remove fanout added by the last table)
|
2011-12-08 04:22:38 +04:00
|
|
|
*/
|
2011-12-11 19:41:53 -08:00
|
|
|
if (*record_count)
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
*record_count /= join->positions[idx].records_out;
|
2023-03-07 11:25:16 +02:00
|
|
|
/*
|
|
|
|
Remember this choice for
|
|
|
|
fix_semijoin_strategies_for_picked_join_order()
|
|
|
|
*/
|
|
|
|
join->positions[idx].firstmatch_with_join_buf= 1;
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2011-12-08 04:22:38 +04:00
|
|
|
optimize_wo_join_buffering(join, first_firstmatch_table, idx,
|
|
|
|
remaining_tables, FALSE, idx,
|
|
|
|
record_count,
|
|
|
|
read_time);
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
2011-11-23 04:25:52 +04:00
|
|
|
/*
|
|
|
|
We ought to save the alternate POSITIONs produced by
|
|
|
|
optimize_wo_join_buffering but the problem is that providing save
|
|
|
|
space uses too much space. Instead, we will re-calculate the
|
|
|
|
alternate POSITIONs after we've picked the best QEP.
|
|
|
|
*/
|
|
|
|
*handled_fanout= firstmatch_need_tables;
|
|
|
|
/* *record_count and *read_time were set by the above call */
|
|
|
|
*strategy= SJ_OPT_FIRST_MATCH;
|
2020-03-06 10:33:11 +02:00
|
|
|
if (unlikely(trace.trace_started()))
|
2019-08-25 11:03:19 +03:00
|
|
|
{
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
trace.
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
add("rows", *record_count).
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
add("cost", *read_time);
|
2019-08-25 11:03:19 +03:00
|
|
|
}
|
2011-11-23 04:25:52 +04:00
|
|
|
return TRUE;
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
2011-11-23 04:25:52 +04:00
|
|
|
}
|
|
|
|
}
|
2012-03-26 21:34:24 +04:00
|
|
|
else
|
|
|
|
invalidate_firstmatch_prefix();
|
2011-11-23 04:25:52 +04:00
|
|
|
return FALSE;
|
|
|
|
}
|
2010-02-16 00:53:06 +03:00
|
|
|
|
|
|
|
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
/*
|
|
|
|
Duplicate_weedout strategy is described at
|
|
|
|
https://mariadb.com/kb/en/duplicateweedout-strategy/
|
|
|
|
|
|
|
|
The idea is that if one has a subquery of type:
|
|
|
|
|
|
|
|
select *
|
|
|
|
from Country
|
|
|
|
where
|
|
|
|
Country.code IN (select City.Country
|
|
|
|
from City
|
|
|
|
where
|
|
|
|
...)
|
|
|
|
|
|
|
|
Before semi join optimization it was executed this way:
|
|
|
|
- Scan rows in Country
|
|
|
|
- For each accepted row, execute the sub query with
|
|
|
|
'Country.code = City.Country' added to the WHERE clause and with
|
|
|
|
LIMIT 1
|
|
|
|
|
|
|
|
With semi join optimization it can be converted to the following semi join.
|
|
|
|
|
|
|
|
select * from Country semi-join City
|
|
|
|
where Country.code = City.Country and ...
|
|
|
|
|
|
|
|
This is executed as:
|
|
|
|
|
|
|
|
- Scan rows in Country
|
|
|
|
- Scan rows in City with 'Country.code = City.Country' added to the
|
|
|
|
subquery WHERE clause. Stop scanning after the first match.
|
|
|
|
|
|
|
|
or
|
|
|
|
|
|
|
|
- Create temporary table to store City.Country (with a unique key)
|
|
|
|
- Scan rows in City (according to plan for City) and put them into the
|
|
|
|
temporary table
|
|
|
|
- Scan the temporary table
|
|
|
|
- Do index lookup in Country table with City.Country
|
|
|
|
|
|
|
|
With Duplicate_weedout we would try to instead do:
|
|
|
|
|
|
|
|
- Create temporary table to hold unique rowid's for the Country
|
|
|
|
- Scan rows in City (according to plan for City)
|
|
|
|
- Scan rows in Country (according to plan for Country)
|
|
|
|
- Write Country.id rowid to temporary table. If there was no
|
|
|
|
conflicting row in the temporary table, accept the row combination.
|
|
|
|
- Delete temporary table
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
2011-11-23 04:25:52 +04:00
|
|
|
void Duplicate_weedout_picker::set_from_prev(POSITION *prev)
|
|
|
|
{
|
|
|
|
if (prev->dups_weedout_picker.is_used)
|
|
|
|
set_empty();
|
|
|
|
else
|
|
|
|
{
|
|
|
|
dupsweedout_tables= prev->dups_weedout_picker.dupsweedout_tables;
|
|
|
|
first_dupsweedout_table= prev->dups_weedout_picker.first_dupsweedout_table;
|
|
|
|
}
|
|
|
|
is_used= FALSE;
|
|
|
|
}
|
2010-02-16 00:53:06 +03:00
|
|
|
|
|
|
|
|
2011-11-23 04:25:52 +04:00
|
|
|
bool Duplicate_weedout_picker::check_qep(JOIN *join,
|
|
|
|
uint idx,
|
|
|
|
table_map remaining_tables,
|
|
|
|
const JOIN_TAB *new_join_tab,
|
|
|
|
double *record_count,
|
|
|
|
double *read_time,
|
|
|
|
table_map *handled_fanout,
|
|
|
|
sj_strategy_enum *strategy,
|
|
|
|
POSITION *loose_scan_pos
|
|
|
|
)
|
|
|
|
{
|
|
|
|
TABLE_LIST *nest;
|
|
|
|
if ((nest= new_join_tab->emb_sj_nest))
|
|
|
|
{
|
|
|
|
if (!dupsweedout_tables)
|
|
|
|
first_dupsweedout_table= idx;
|
|
|
|
|
|
|
|
dupsweedout_tables |= nest->sj_inner_tables |
|
|
|
|
nest->nested_join->sj_depends_on |
|
|
|
|
nest->nested_join->sj_corr_tables;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (dupsweedout_tables)
|
|
|
|
{
|
|
|
|
/* we're in the process of constructing a DuplicateWeedout range */
|
|
|
|
TABLE_LIST *emb= new_join_tab->table->pos_in_table_list->embedding;
|
|
|
|
/* and we've entered an inner side of an outer join*/
|
|
|
|
if (emb && emb->on_expr)
|
|
|
|
dupsweedout_tables |= emb->nested_join->used_tables;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If this is the last table that we need for DuplicateWeedout range */
|
|
|
|
if (dupsweedout_tables && !(remaining_tables & ~new_join_tab->table->map &
|
|
|
|
dupsweedout_tables))
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
Ok, reached a state where we could put a dups weedout point.
|
|
|
|
Walk back and calculate
|
|
|
|
- the join cost (this is needed as the accumulated cost may assume
|
|
|
|
some other duplicate elimination method)
|
|
|
|
- extra fanout that will be removed by duplicate elimination
|
|
|
|
- duplicate elimination cost
|
|
|
|
There are two cases:
|
|
|
|
1. We have other strategy/ies to remove all of the duplicates.
|
|
|
|
2. We don't.
|
2010-02-16 00:53:06 +03:00
|
|
|
|
2011-11-23 04:25:52 +04:00
|
|
|
We need to calculate the cost in case #2 also because we need to make
|
|
|
|
choice between this join order and others.
|
|
|
|
*/
|
|
|
|
uint first_tab= first_dupsweedout_table;
|
|
|
|
double dups_cost;
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
double first_weedout_table_rec_count;
|
2011-11-23 04:25:52 +04:00
|
|
|
double sj_inner_fanout= 1.0;
|
|
|
|
double sj_outer_fanout= 1.0;
|
|
|
|
uint temptable_rec_size;
|
2020-03-06 10:33:11 +02:00
|
|
|
|
2011-11-23 04:25:52 +04:00
|
|
|
if (first_tab == join->const_tables)
|
|
|
|
{
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
first_weedout_table_rec_count= 1.0;
|
2011-11-23 04:25:52 +04:00
|
|
|
temptable_rec_size= 0;
|
|
|
|
dups_cost= 0.0;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2022-06-03 13:24:18 +03:00
|
|
|
dups_cost= join->positions[first_tab - 1].prefix_cost;
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
first_weedout_table_rec_count=
|
|
|
|
join->positions[first_tab - 1].prefix_record_count;
|
2011-11-23 04:25:52 +04:00
|
|
|
temptable_rec_size= 8; /* This is not true but we'll make it so */
|
|
|
|
}
|
|
|
|
|
|
|
|
table_map dups_removed_fanout= 0;
|
|
|
|
for (uint j= first_dupsweedout_table; j <= idx; j++)
|
|
|
|
{
|
|
|
|
POSITION *p= join->positions + j;
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
dups_cost= COST_ADD(dups_cost, p->read_time);
|
|
|
|
|
2011-11-23 04:25:52 +04:00
|
|
|
if (p->table->emb_sj_nest)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2022-10-03 14:29:04 +03:00
|
|
|
sj_inner_fanout= COST_MULT(sj_inner_fanout, p->records_out);
|
2011-11-23 04:25:52 +04:00
|
|
|
dups_removed_fanout |= p->table->table->map;
|
|
|
|
}
|
|
|
|
else
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2022-10-03 14:29:04 +03:00
|
|
|
sj_outer_fanout= COST_MULT(sj_outer_fanout, p->records_out);
|
2023-01-12 22:31:18 +02:00
|
|
|
/* Ensure that table supports comparable rowids */
|
|
|
|
DBUG_ASSERT(!(p->table->table->file->ha_table_flags() & HA_NON_COMPARABLE_ROWID));
|
|
|
|
|
2011-11-23 04:25:52 +04:00
|
|
|
temptable_rec_size += p->table->table->file->ref_length;
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
}
|
2011-11-23 04:25:52 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
Add the cost of temptable use. The table will have sj_outer_fanout
|
|
|
|
records, and we will make
|
|
|
|
- sj_outer_fanout table writes
|
2021-10-06 12:31:19 +03:00
|
|
|
- sj_inner_fanout*sj_outer_fanout lookups.
|
2022-06-16 13:12:01 +03:00
|
|
|
|
|
|
|
There is no row copy cost (as we are only copying rowid) and no
|
|
|
|
compare cost (as we are only checking if the row exists by
|
|
|
|
checking if we got a write error.
|
2011-11-23 04:25:52 +04:00
|
|
|
*/
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
TMPTABLE_COSTS one_cost= get_tmp_table_costs(join->thd,
|
|
|
|
sj_outer_fanout,
|
|
|
|
temptable_rec_size,
|
2022-06-16 13:12:01 +03:00
|
|
|
0, 0);
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
double write_cost= (one_cost.create +
|
2022-10-03 14:29:04 +03:00
|
|
|
first_weedout_table_rec_count * sj_outer_fanout * one_cost.write);
|
|
|
|
double full_lookup_cost= (first_weedout_table_rec_count* sj_outer_fanout *
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
sj_inner_fanout * one_cost.lookup);
|
|
|
|
*read_time= dups_cost + write_cost + full_lookup_cost;
|
2011-11-23 04:25:52 +04:00
|
|
|
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
*record_count= first_weedout_table_rec_count * sj_outer_fanout;
|
2011-11-23 04:25:52 +04:00
|
|
|
*handled_fanout= dups_removed_fanout;
|
|
|
|
*strategy= SJ_OPT_DUPS_WEEDOUT;
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
if (unlikely(join->thd->trace_started()))
|
2019-08-25 11:03:19 +03:00
|
|
|
{
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
Json_writer_object trace(join->thd);
|
|
|
|
trace.
|
|
|
|
add("strategy", "DuplicateWeedout").
|
2022-10-03 14:29:04 +03:00
|
|
|
add("prefix_row_count", first_weedout_table_rec_count).
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
add("tmp_table_rows", sj_outer_fanout).
|
|
|
|
add("sj_inner_fanout", sj_inner_fanout).
|
|
|
|
add("rows", *record_count).
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
add("dups_cost", dups_cost).
|
|
|
|
add("write_cost", write_cost).
|
|
|
|
add("full_lookup_cost", full_lookup_cost).
|
|
|
|
add("total_cost", *read_time);
|
2019-08-25 11:03:19 +03:00
|
|
|
}
|
2011-11-23 04:25:52 +04:00
|
|
|
return TRUE;
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
2011-11-23 04:25:52 +04:00
|
|
|
return FALSE;
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
|
2022-06-06 22:21:22 +03:00
|
|
|
#ifndef DBUG_OFF
|
|
|
|
/*
|
|
|
|
Verify the value of JOIN::cur_sj_inner_tables by recomputing it
|
|
|
|
*/
|
|
|
|
void JOIN::dbug_verify_sj_inner_tables(uint prefix_size) const
|
|
|
|
{
|
|
|
|
table_map cur_map= const_table_map;
|
|
|
|
table_map nests_entered= 0;
|
|
|
|
if (emb_sjm_nest)
|
|
|
|
{
|
|
|
|
DBUG_ASSERT(cur_sj_inner_tables == 0);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (uint i= const_tables; i < prefix_size; i++)
|
|
|
|
{
|
|
|
|
JOIN_TAB *tab= positions[i].table;
|
|
|
|
cur_map |= tab->table->map;
|
|
|
|
if (TABLE_LIST *sj_nest= tab->emb_sj_nest)
|
|
|
|
{
|
|
|
|
nests_entered |= sj_nest->sj_inner_tables;
|
|
|
|
if (!(sj_nest->sj_inner_tables & ~cur_map))
|
|
|
|
{
|
|
|
|
// all nest tables are in the prefix already
|
|
|
|
nests_entered &= ~sj_nest->sj_inner_tables;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
DBUG_ASSERT(nests_entered == cur_sj_inner_tables);
|
|
|
|
}
|
|
|
|
#endif
|
2010-02-16 00:53:06 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
Remove the last join tab from from join->cur_sj_inner_tables bitmap
|
2022-06-06 22:21:22 +03:00
|
|
|
|
|
|
|
@note
|
|
|
|
remaining_tables contains @tab.
|
|
|
|
|
|
|
|
@seealso update_sj_state() does the reverse
|
2010-02-16 00:53:06 +03:00
|
|
|
*/
|
|
|
|
|
|
|
|
void restore_prev_sj_state(const table_map remaining_tables,
|
2021-10-06 12:31:19 +03:00
|
|
|
const JOIN_TAB *tab, uint idx)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
|
|
|
TABLE_LIST *emb_sj_nest;
|
2013-02-07 21:46:02 -08:00
|
|
|
|
2021-10-06 12:31:19 +03:00
|
|
|
if ((emb_sj_nest= tab->emb_sj_nest))
|
2013-02-07 21:46:02 -08:00
|
|
|
{
|
2021-10-06 12:31:19 +03:00
|
|
|
table_map subq_tables= emb_sj_nest->sj_inner_tables;
|
2013-02-07 21:46:02 -08:00
|
|
|
tab->join->sjm_lookup_tables &= ~subq_tables;
|
|
|
|
|
2021-10-06 12:31:19 +03:00
|
|
|
if (!tab->join->emb_sjm_nest)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2021-10-06 12:31:19 +03:00
|
|
|
table_map subq_tables= (emb_sj_nest->sj_inner_tables &
|
|
|
|
~tab->join->const_table_map);
|
|
|
|
/* If we're removing the last SJ-inner table, remove the sj-nest */
|
|
|
|
if ((remaining_tables & subq_tables) == subq_tables)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
All non-const tables of the SJ nest are in the remaining_tables.
|
|
|
|
we are not in the nest anymore.
|
|
|
|
*/
|
|
|
|
tab->join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
Semi-join nest has:
|
|
|
|
- a table being removed (not in the prefix)
|
|
|
|
- some tables in the prefix.
|
|
|
|
*/
|
|
|
|
tab->join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables;
|
|
|
|
}
|
2022-06-06 22:21:22 +03:00
|
|
|
}
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
2022-06-06 22:21:22 +03:00
|
|
|
|
|
|
|
#ifndef DBUG_OFF
|
|
|
|
/* positions[idx] has been removed. Verify the state for [0...idx-1] */
|
|
|
|
tab->join->dbug_verify_sj_inner_tables(idx);
|
|
|
|
#endif
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Given a semi-join nest, find out which of the IN-equalities are bound
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
get_bound_sj_equalities()
|
|
|
|
sj_nest Semi-join nest
|
|
|
|
remaining_tables Tables that are not yet bound
|
|
|
|
|
|
|
|
DESCRIPTION
|
|
|
|
Given a semi-join nest, find out which of the IN-equalities have their
|
|
|
|
left part expression bound (i.e. the said expression doesn't refer to
|
|
|
|
any of remaining_tables and can be evaluated).
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
Bitmap of bound IN-equalities.
|
|
|
|
*/
|
|
|
|
|
|
|
|
ulonglong get_bound_sj_equalities(TABLE_LIST *sj_nest,
|
|
|
|
table_map remaining_tables)
|
|
|
|
{
|
2017-02-18 17:47:31 +01:00
|
|
|
List_iterator<Item_ptr> li(sj_nest->nested_join->sj_outer_expr_list);
|
|
|
|
Item **item;
|
2010-02-16 00:53:06 +03:00
|
|
|
uint i= 0;
|
|
|
|
ulonglong res= 0;
|
|
|
|
while ((item= li++))
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
Q: should this take into account equality propagation and how?
|
|
|
|
A: If e->outer_side is an Item_field, walk over the equality
|
|
|
|
class and see if there is an element that is bound?
|
|
|
|
(this is an optional feature)
|
|
|
|
*/
|
2017-02-18 17:47:31 +01:00
|
|
|
if (!(item[0]->used_tables() & remaining_tables))
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
|
|
|
res |= 1ULL << i;
|
|
|
|
}
|
2011-09-05 19:28:22 +04:00
|
|
|
i++;
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Check if the last tables of the partial join order allow to use
|
|
|
|
sj-materialization strategy for them
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
at_sjmat_pos()
|
|
|
|
join
|
|
|
|
remaining_tables
|
|
|
|
tab the last table's join tab
|
|
|
|
idx last table's index
|
|
|
|
loose_scan OUT TRUE <=> use LooseScan
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
TRUE Yes, can apply sj-materialization
|
|
|
|
FALSE No, some of the requirements are not met
|
|
|
|
*/
|
|
|
|
|
|
|
|
static SJ_MATERIALIZATION_INFO *
|
|
|
|
at_sjmat_pos(const JOIN *join, table_map remaining_tables, const JOIN_TAB *tab,
|
|
|
|
uint idx, bool *loose_scan)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
Check if
|
|
|
|
1. We're in a semi-join nest that can be run with SJ-materialization
|
|
|
|
2. All the tables correlated through the IN subquery are in the prefix
|
|
|
|
*/
|
|
|
|
TABLE_LIST *emb_sj_nest= tab->emb_sj_nest;
|
|
|
|
table_map suffix= remaining_tables & ~tab->table->map;
|
|
|
|
if (emb_sj_nest && emb_sj_nest->sj_mat_info &&
|
|
|
|
!(suffix & emb_sj_nest->sj_inner_tables))
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
Walk back and check if all immediately preceding tables are from
|
|
|
|
this semi-join.
|
|
|
|
*/
|
|
|
|
uint n_tables= my_count_bits(tab->emb_sj_nest->sj_inner_tables);
|
|
|
|
for (uint i= 1; i < n_tables ; i++)
|
|
|
|
{
|
|
|
|
if (join->positions[idx - i].table->emb_sj_nest != tab->emb_sj_nest)
|
|
|
|
return NULL;
|
|
|
|
}
|
2014-02-19 14:05:15 +04:00
|
|
|
*loose_scan= MY_TEST(remaining_tables & ~tab->table->map &
|
|
|
|
(emb_sj_nest->sj_inner_tables |
|
|
|
|
emb_sj_nest->nested_join->sj_depends_on));
|
2010-02-16 00:53:06 +03:00
|
|
|
if (*loose_scan && !emb_sj_nest->sj_subq_pred->sjm_scan_allowed)
|
|
|
|
return NULL;
|
|
|
|
else
|
|
|
|
return emb_sj_nest->sj_mat_info;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-05-13 13:15:17 +04:00
|
|
|
/*
|
|
|
|
Re-calculate values of join->best_positions[start..end].prefix_record_count
|
|
|
|
*/
|
|
|
|
|
|
|
|
static void recalculate_prefix_record_count(JOIN *join, uint start, uint end)
|
|
|
|
{
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
DBUG_ASSERT(start >= join->const_tables);
|
|
|
|
|
2012-05-13 13:15:17 +04:00
|
|
|
for (uint j= start; j < end ;j++)
|
|
|
|
{
|
|
|
|
double prefix_count;
|
|
|
|
if (j == join->const_tables)
|
|
|
|
prefix_count= 1.0;
|
|
|
|
else
|
2019-05-27 19:08:00 -07:00
|
|
|
prefix_count= COST_MULT(join->best_positions[j-1].prefix_record_count,
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
join->best_positions[j-1].records_out);
|
2012-05-13 13:15:17 +04:00
|
|
|
|
|
|
|
join->best_positions[j].prefix_record_count= prefix_count;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
Fix semi-join strategies for the picked join order
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
fix_semijoin_strategies_for_picked_join_order()
|
|
|
|
join The join with the picked join order
|
|
|
|
|
|
|
|
DESCRIPTION
|
|
|
|
Fix semi-join strategies for the picked join order. This is a step that
|
|
|
|
needs to be done right after we have fixed the join order. What we do
|
|
|
|
here is switch join's semi-join strategy description from backward-based
|
|
|
|
to forwards based.
|
|
|
|
|
|
|
|
When join optimization is in progress, we re-consider semi-join
|
|
|
|
strategies after we've added another table. Here's an illustration.
|
|
|
|
Suppose the join optimization is underway:
|
|
|
|
|
|
|
|
1) ot1 it1 it2
|
|
|
|
sjX -- looking at (ot1, it1, it2) join prefix, we decide
|
|
|
|
to use semi-join strategy sjX.
|
|
|
|
|
|
|
|
2) ot1 it1 it2 ot2
|
|
|
|
sjX sjY -- Having added table ot2, we now may consider
|
|
|
|
another semi-join strategy and decide to use a
|
|
|
|
different strategy sjY. Note that the record
|
|
|
|
of sjX has remained under it2. That is
|
|
|
|
necessary because we need to be able to get
|
|
|
|
back to (ot1, it1, it2) join prefix.
|
|
|
|
what makes things even worse is that there are cases where the choice
|
|
|
|
of sjY changes the way we should access it2.
|
|
|
|
|
|
|
|
3) [ot1 it1 it2 ot2 ot3]
|
|
|
|
sjX sjY -- This means that after join optimization is
|
|
|
|
finished, semi-join info should be read
|
|
|
|
right-to-left (while nearly all plan refinement
|
|
|
|
functions, EXPLAIN, etc proceed from left to
|
|
|
|
right)
|
|
|
|
|
|
|
|
This function does the needed reversal, making it possible to read the
|
|
|
|
join and semi-join order from left to right.
|
|
|
|
*/
|
|
|
|
|
|
|
|
void fix_semijoin_strategies_for_picked_join_order(JOIN *join)
|
|
|
|
{
|
2013-02-07 21:46:02 -08:00
|
|
|
join->sjm_lookup_tables= 0;
|
2017-06-07 12:45:09 -07:00
|
|
|
join->sjm_scan_tables= 0;
|
2019-02-18 17:11:20 +05:30
|
|
|
if (!join->select_lex->sj_nests.elements)
|
|
|
|
return;
|
2020-03-06 10:33:11 +02:00
|
|
|
|
|
|
|
THD *thd= join->thd;
|
|
|
|
uint table_count=join->table_count;
|
|
|
|
uint tablenr;
|
|
|
|
table_map remaining_tables= 0;
|
|
|
|
table_map handled_tabs= 0;
|
2019-02-18 17:11:20 +05:30
|
|
|
Json_writer_object trace_wrapper(thd);
|
|
|
|
Json_writer_array trace_semijoin_strategies(thd,
|
2020-03-06 10:33:11 +02:00
|
|
|
"fix_semijoin_strategies_for_picked_join_order");
|
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
for (tablenr= table_count - 1 ; tablenr != join->const_tables - 1; tablenr--)
|
|
|
|
{
|
|
|
|
POSITION *pos= join->best_positions + tablenr;
|
|
|
|
JOIN_TAB *s= pos->table;
|
2015-02-10 14:05:49 +04:00
|
|
|
uint UNINIT_VAR(first); // Set by every branch except SJ_OPT_NONE which doesn't use it
|
2010-02-16 00:53:06 +03:00
|
|
|
|
|
|
|
if ((handled_tabs & s->table->map) || pos->sj_strategy == SJ_OPT_NONE)
|
|
|
|
{
|
|
|
|
remaining_tables |= s->table->map;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (pos->sj_strategy == SJ_OPT_MATERIALIZE)
|
|
|
|
{
|
|
|
|
SJ_MATERIALIZATION_INFO *sjm= s->emb_sj_nest->sj_mat_info;
|
|
|
|
sjm->is_used= TRUE;
|
|
|
|
sjm->is_sj_scan= FALSE;
|
2016-04-28 21:59:23 +04:00
|
|
|
memcpy((uchar*) (pos - sjm->tables + 1), (uchar*) sjm->positions,
|
2010-02-16 00:53:06 +03:00
|
|
|
sizeof(POSITION) * sjm->tables);
|
2012-05-13 13:15:17 +04:00
|
|
|
recalculate_prefix_record_count(join, tablenr - sjm->tables + 1,
|
|
|
|
tablenr);
|
2010-02-16 00:53:06 +03:00
|
|
|
first= tablenr - sjm->tables + 1;
|
|
|
|
join->best_positions[first].n_sj_tables= sjm->tables;
|
|
|
|
join->best_positions[first].sj_strategy= SJ_OPT_MATERIALIZE;
|
2019-02-18 17:11:20 +05:30
|
|
|
Json_writer_object semijoin_strategy(thd);
|
2019-08-25 11:03:19 +03:00
|
|
|
semijoin_strategy.add("semi_join_strategy","SJ-Materialization");
|
2019-02-18 17:11:20 +05:30
|
|
|
Json_writer_array semijoin_plan(thd, "join_order");
|
2018-07-11 10:43:38 +03:00
|
|
|
for (uint i= first; i < first+ sjm->tables; i++)
|
2019-02-18 17:11:20 +05:30
|
|
|
{
|
|
|
|
if (unlikely(thd->trace_started()))
|
|
|
|
{
|
|
|
|
Json_writer_object trace_one_table(thd);
|
|
|
|
trace_one_table.add_table_name(join->best_positions[i].table);
|
|
|
|
}
|
2018-07-11 10:43:38 +03:00
|
|
|
join->sjm_lookup_tables |= join->best_positions[i].table->table->map;
|
2019-02-18 17:11:20 +05:30
|
|
|
}
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
else if (pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN)
|
|
|
|
{
|
2011-11-23 04:25:52 +04:00
|
|
|
POSITION *first_inner= join->best_positions + pos->sjmat_picker.sjm_scan_last_inner;
|
2010-02-16 00:53:06 +03:00
|
|
|
SJ_MATERIALIZATION_INFO *sjm= first_inner->table->emb_sj_nest->sj_mat_info;
|
|
|
|
sjm->is_used= TRUE;
|
|
|
|
sjm->is_sj_scan= TRUE;
|
2011-11-23 04:25:52 +04:00
|
|
|
first= pos->sjmat_picker.sjm_scan_last_inner - sjm->tables + 1;
|
2016-04-28 21:59:23 +04:00
|
|
|
memcpy((uchar*) (join->best_positions + first),
|
|
|
|
(uchar*) sjm->positions, sizeof(POSITION) * sjm->tables);
|
2012-05-13 13:15:17 +04:00
|
|
|
recalculate_prefix_record_count(join, first, first + sjm->tables);
|
2010-02-16 00:53:06 +03:00
|
|
|
join->best_positions[first].sj_strategy= SJ_OPT_MATERIALIZE_SCAN;
|
|
|
|
join->best_positions[first].n_sj_tables= sjm->tables;
|
|
|
|
/*
|
2022-06-06 22:21:22 +03:00
|
|
|
Do what optimize_semi_joins did: re-run best_access_path for every
|
|
|
|
table in the [last_inner_table + 1; pos..) range
|
2010-02-16 00:53:06 +03:00
|
|
|
*/
|
|
|
|
double prefix_rec_count;
|
|
|
|
/* Get the prefix record count */
|
|
|
|
if (first == join->const_tables)
|
|
|
|
prefix_rec_count= 1.0;
|
|
|
|
else
|
|
|
|
prefix_rec_count= join->best_positions[first-1].prefix_record_count;
|
|
|
|
|
|
|
|
/* Add materialization record count*/
|
|
|
|
prefix_rec_count *= sjm->rows;
|
|
|
|
|
|
|
|
uint i;
|
|
|
|
table_map rem_tables= remaining_tables;
|
|
|
|
for (i= tablenr; i != (first + sjm->tables - 1); i--)
|
|
|
|
rem_tables |= join->best_positions[i].table->table->map;
|
|
|
|
|
2017-06-07 12:45:09 -07:00
|
|
|
for (i= first; i < first+ sjm->tables; i++)
|
|
|
|
join->sjm_scan_tables |= join->best_positions[i].table->table->map;
|
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
POSITION dummy;
|
|
|
|
join->cur_sj_inner_tables= 0;
|
2019-02-18 17:11:20 +05:30
|
|
|
Json_writer_object semijoin_strategy(thd);
|
2019-08-25 11:03:19 +03:00
|
|
|
semijoin_strategy.add("semi_join_strategy","SJ-Materialization-Scan");
|
2019-02-18 17:11:20 +05:30
|
|
|
Json_writer_array semijoin_plan(thd, "join_order");
|
2010-02-16 00:53:06 +03:00
|
|
|
for (i= first + sjm->tables; i <= tablenr; i++)
|
|
|
|
{
|
2021-10-29 01:32:21 +03:00
|
|
|
Json_writer_object trace_one_table(thd);
|
2019-02-18 17:11:20 +05:30
|
|
|
if (unlikely(thd->trace_started()))
|
|
|
|
{
|
|
|
|
trace_one_table.add_table_name(join->best_positions[i].table);
|
|
|
|
}
|
2019-09-10 23:51:42 +03:00
|
|
|
best_access_path(join, join->best_positions[i].table, rem_tables,
|
|
|
|
join->best_positions, i,
|
2011-06-04 19:56:06 -07:00
|
|
|
FALSE, prefix_rec_count,
|
2010-05-26 13:18:18 -07:00
|
|
|
join->best_positions + i, &dummy);
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
prefix_rec_count *= join->best_positions[i].records_out;
|
2010-02-16 00:53:06 +03:00
|
|
|
rem_tables &= ~join->best_positions[i].table->table->map;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (pos->sj_strategy == SJ_OPT_FIRST_MATCH)
|
|
|
|
{
|
2011-11-23 04:25:52 +04:00
|
|
|
first= pos->firstmatch_picker.first_firstmatch_table;
|
2010-02-16 00:53:06 +03:00
|
|
|
join->best_positions[first].sj_strategy= SJ_OPT_FIRST_MATCH;
|
|
|
|
join->best_positions[first].n_sj_tables= tablenr - first + 1;
|
|
|
|
POSITION dummy; // For loose scan paths
|
|
|
|
double record_count= (first== join->const_tables)? 1.0:
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
join->best_positions[first - 1].prefix_record_count;
|
2010-02-16 00:53:06 +03:00
|
|
|
|
|
|
|
table_map rem_tables= remaining_tables;
|
|
|
|
uint idx;
|
|
|
|
for (idx= first; idx <= tablenr; idx++)
|
|
|
|
{
|
|
|
|
rem_tables |= join->best_positions[idx].table->table->map;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
Re-run best_access_path to produce best access methods that do not use
|
|
|
|
join buffering
|
|
|
|
*/
|
|
|
|
join->cur_sj_inner_tables= 0;
|
2019-02-18 17:11:20 +05:30
|
|
|
Json_writer_object semijoin_strategy(thd);
|
2023-06-05 19:27:00 +02:00
|
|
|
double inner_fanout= 1.0;
|
2019-08-25 11:03:19 +03:00
|
|
|
semijoin_strategy.add("semi_join_strategy","FirstMatch");
|
2019-02-18 17:11:20 +05:30
|
|
|
Json_writer_array semijoin_plan(thd, "join_order");
|
2010-02-16 00:53:06 +03:00
|
|
|
for (idx= first; idx <= tablenr; idx++)
|
|
|
|
{
|
2021-10-29 01:32:21 +03:00
|
|
|
Json_writer_object trace_one_table(thd);
|
2019-02-18 17:11:20 +05:30
|
|
|
if (unlikely(thd->trace_started()))
|
|
|
|
{
|
|
|
|
trace_one_table.add_table_name(join->best_positions[idx].table);
|
|
|
|
}
|
2023-03-07 11:25:16 +02:00
|
|
|
if (join->best_positions[idx].use_join_buffer &&
|
|
|
|
!join->best_positions[idx].firstmatch_with_join_buf)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2023-05-26 17:26:42 +03:00
|
|
|
/*
|
|
|
|
records_out cannot be bigger just because we remove join buffer
|
|
|
|
*/
|
|
|
|
double records_out= join->best_positions[idx].records_out;
|
|
|
|
best_access_path(join, join->best_positions[idx].table,
|
|
|
|
rem_tables, join->best_positions, idx,
|
|
|
|
TRUE /* no jbuf */,
|
|
|
|
record_count, join->best_positions + idx, &dummy);
|
|
|
|
set_if_smaller(join->best_positions[idx].records_out, records_out);
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
2023-06-05 19:27:00 +02:00
|
|
|
/*
|
|
|
|
TODO: We should also compute the selectivity here, as well as adjust
|
|
|
|
the records_out according to the fraction of records removed by
|
|
|
|
the semi-join.
|
|
|
|
*/
|
|
|
|
double rec_out= join->best_positions[idx].records_out;
|
|
|
|
if (join->best_positions[idx].table->emb_sj_nest)
|
|
|
|
inner_fanout *= rec_out;
|
|
|
|
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
record_count *= join->best_positions[idx].records_out;
|
2010-02-16 00:53:06 +03:00
|
|
|
rem_tables &= ~join->best_positions[idx].table->table->map;
|
|
|
|
}
|
2023-06-05 19:27:00 +02:00
|
|
|
if (inner_fanout > 1.0)
|
|
|
|
join->best_positions[tablenr].records_out /= inner_fanout;
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
if (pos->sj_strategy == SJ_OPT_LOOSE_SCAN)
|
|
|
|
{
|
2011-11-23 04:25:52 +04:00
|
|
|
first= pos->loosescan_picker.first_loosescan_table;
|
2010-02-16 00:53:06 +03:00
|
|
|
POSITION *first_pos= join->best_positions + first;
|
|
|
|
POSITION loose_scan_pos; // For loose scan paths
|
|
|
|
double record_count= (first== join->const_tables)? 1.0:
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
join->best_positions[first - 1].prefix_record_count;
|
2010-02-16 00:53:06 +03:00
|
|
|
|
|
|
|
table_map rem_tables= remaining_tables;
|
|
|
|
uint idx;
|
|
|
|
for (idx= first; idx <= tablenr; idx++)
|
|
|
|
rem_tables |= join->best_positions[idx].table->table->map;
|
|
|
|
/*
|
|
|
|
Re-run best_access_path to produce best access methods that do not use
|
|
|
|
join buffering
|
|
|
|
*/
|
|
|
|
join->cur_sj_inner_tables= 0;
|
2019-02-18 17:11:20 +05:30
|
|
|
Json_writer_object semijoin_strategy(thd);
|
2019-08-25 11:03:19 +03:00
|
|
|
semijoin_strategy.add("semi_join_strategy","LooseScan");
|
2019-09-10 14:01:31 +05:30
|
|
|
Json_writer_array semijoin_plan(thd, "join_order");
|
2010-02-16 00:53:06 +03:00
|
|
|
for (idx= first; idx <= tablenr; idx++)
|
|
|
|
{
|
2021-10-29 01:32:21 +03:00
|
|
|
Json_writer_object trace_one_table(thd);
|
2019-02-18 17:11:20 +05:30
|
|
|
if (unlikely(thd->trace_started()))
|
|
|
|
{
|
|
|
|
trace_one_table.add_table_name(join->best_positions[idx].table);
|
|
|
|
}
|
2010-02-16 00:53:06 +03:00
|
|
|
if (join->best_positions[idx].use_join_buffer || (idx == first))
|
|
|
|
{
|
|
|
|
best_access_path(join, join->best_positions[idx].table,
|
2019-09-10 23:51:42 +03:00
|
|
|
rem_tables, join->best_positions, idx,
|
|
|
|
TRUE /* no jbuf */,
|
2010-02-16 00:53:06 +03:00
|
|
|
record_count, join->best_positions + idx,
|
|
|
|
&loose_scan_pos);
|
|
|
|
if (idx==first)
|
2012-03-25 18:31:35 +04:00
|
|
|
{
|
2010-02-16 00:53:06 +03:00
|
|
|
join->best_positions[idx]= loose_scan_pos;
|
2012-03-25 18:31:35 +04:00
|
|
|
/*
|
|
|
|
If LooseScan is based on ref access (including the "degenerate"
|
|
|
|
one with 0 key parts), we should use full index scan.
|
|
|
|
|
|
|
|
Unfortunately, lots of code assumes that if tab->type==JT_ALL &&
|
|
|
|
tab->quick!=NULL, then quick select should be used. The only
|
|
|
|
simple way to fix this is to remove the quick select:
|
|
|
|
*/
|
|
|
|
if (join->best_positions[idx].key)
|
|
|
|
{
|
2022-10-31 18:02:36 +02:00
|
|
|
DBUG_ASSERT(join->best_positions[idx].type != JT_RANGE);
|
2012-03-25 18:31:35 +04:00
|
|
|
delete join->best_positions[idx].table->quick;
|
|
|
|
join->best_positions[idx].table->quick= NULL;
|
|
|
|
}
|
|
|
|
}
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
rem_tables &= ~join->best_positions[idx].table->table->map;
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
record_count *= join->best_positions[idx].records_out;
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
first_pos->sj_strategy= SJ_OPT_LOOSE_SCAN;
|
|
|
|
first_pos->n_sj_tables= my_count_bits(first_pos->table->emb_sj_nest->sj_inner_tables);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (pos->sj_strategy == SJ_OPT_DUPS_WEEDOUT)
|
|
|
|
{
|
2019-09-12 19:07:56 +03:00
|
|
|
Json_writer_object semijoin_strategy(thd);
|
|
|
|
semijoin_strategy.add("semi_join_strategy","DuplicateWeedout");
|
2010-02-16 00:53:06 +03:00
|
|
|
/*
|
|
|
|
Duplicate Weedout starting at pos->first_dupsweedout_table, ending at
|
|
|
|
this table.
|
|
|
|
*/
|
2011-11-23 04:25:52 +04:00
|
|
|
first= pos->dups_weedout_picker.first_dupsweedout_table;
|
2010-02-16 00:53:06 +03:00
|
|
|
join->best_positions[first].sj_strategy= SJ_OPT_DUPS_WEEDOUT;
|
|
|
|
join->best_positions[first].n_sj_tables= tablenr - first + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint i_end= first + join->best_positions[first].n_sj_tables;
|
|
|
|
for (uint i= first; i < i_end; i++)
|
|
|
|
{
|
|
|
|
if (i != first)
|
|
|
|
join->best_positions[i].sj_strategy= SJ_OPT_NONE;
|
|
|
|
handled_tabs |= join->best_positions[i].table->table->map;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tablenr != first)
|
|
|
|
pos->sj_strategy= SJ_OPT_NONE;
|
|
|
|
remaining_tables |= s->table->map;
|
2010-03-13 23:04:52 +03:00
|
|
|
join->join_tab[first].sj_strategy= join->best_positions[first].sj_strategy;
|
2011-05-25 19:31:13 +04:00
|
|
|
join->join_tab[first].n_sj_tables= join->best_positions[first].n_sj_tables;
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-06-04 17:40:57 +04:00
|
|
|
|
2019-09-22 01:17:30 +05:30
|
|
|
/*
|
|
|
|
Return the number of tables at the top-level of the JOIN
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
get_number_of_tables_at_top_level()
|
|
|
|
join The join with the picked join order
|
|
|
|
|
|
|
|
DESCRIPTION
|
|
|
|
The number of tables in the JOIN currently include all the inner tables of the
|
|
|
|
mergeable semi-joins. The function would make sure that we only count the semi-join
|
|
|
|
nest and not the inner tables of teh semi-join nest.
|
|
|
|
*/
|
|
|
|
|
|
|
|
uint get_number_of_tables_at_top_level(JOIN *join)
|
|
|
|
{
|
|
|
|
uint j= 0, tables= 0;
|
|
|
|
while(j < join->table_count)
|
|
|
|
{
|
|
|
|
POSITION *cur_pos= &join->best_positions[j];
|
|
|
|
tables++;
|
|
|
|
if (cur_pos->sj_strategy == SJ_OPT_MATERIALIZE ||
|
|
|
|
cur_pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN)
|
|
|
|
{
|
|
|
|
SJ_MATERIALIZATION_INFO *sjm= cur_pos->table->emb_sj_nest->sj_mat_info;
|
|
|
|
j= j + sjm->tables;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
j++;
|
|
|
|
}
|
|
|
|
return tables;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
/*
|
|
|
|
Setup semi-join materialization strategy for one semi-join nest
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
|
|
|
|
setup_sj_materialization()
|
|
|
|
tab The first tab in the semi-join
|
|
|
|
|
|
|
|
DESCRIPTION
|
|
|
|
Setup execution structures for one semi-join materialization nest:
|
|
|
|
- Create the materialization temporary table
|
|
|
|
- If we're going to do index lookups
|
|
|
|
create TABLE_REF structure to make the lookus
|
|
|
|
- else (if we're going to do a full scan of the temptable)
|
|
|
|
create Copy_field structures to do copying.
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
FALSE Ok
|
|
|
|
TRUE Error
|
|
|
|
*/
|
|
|
|
|
2011-06-22 01:57:28 +04:00
|
|
|
bool setup_sj_materialization_part1(JOIN_TAB *sjm_tab)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2010-06-04 17:40:57 +04:00
|
|
|
JOIN_TAB *tab= sjm_tab->bush_children->start;
|
2010-02-16 00:53:06 +03:00
|
|
|
TABLE_LIST *emb_sj_nest= tab->table->pos_in_table_list->embedding;
|
2015-08-24 14:42:07 +03:00
|
|
|
SJ_MATERIALIZATION_INFO *sjm;
|
|
|
|
THD *thd;
|
|
|
|
|
|
|
|
DBUG_ENTER("setup_sj_materialization");
|
2011-09-02 23:44:28 +04:00
|
|
|
|
|
|
|
/* Walk out of outer join nests until we reach the semi-join nest we're in */
|
|
|
|
while (!emb_sj_nest->sj_mat_info)
|
|
|
|
emb_sj_nest= emb_sj_nest->embedding;
|
|
|
|
|
2015-08-24 14:42:07 +03:00
|
|
|
sjm= emb_sj_nest->sj_mat_info;
|
|
|
|
thd= tab->join->thd;
|
2010-02-16 00:53:06 +03:00
|
|
|
/* First the calls come to the materialization function */
|
2018-04-12 14:55:43 +02:00
|
|
|
|
2011-06-22 01:57:28 +04:00
|
|
|
DBUG_ASSERT(sjm->is_used);
|
2010-02-16 00:53:06 +03:00
|
|
|
/*
|
|
|
|
Set up the table to write to, do as select_union::create_result_table does
|
|
|
|
*/
|
|
|
|
sjm->sjm_table_param.init();
|
2010-02-18 00:59:41 +03:00
|
|
|
sjm->sjm_table_param.bit_fields_as_long= TRUE;
|
2011-07-11 17:13:16 +04:00
|
|
|
SELECT_LEX *subq_select= emb_sj_nest->sj_subq_pred->unit->first_select();
|
2018-01-07 18:03:44 +02:00
|
|
|
const LEX_CSTRING sj_materialize_name= { STRING_WITH_LEN("sj-materialize") };
|
2018-04-12 14:55:43 +02:00
|
|
|
List_iterator<Item> it(subq_select->item_list);
|
|
|
|
Item *item;
|
|
|
|
while((item= it++))
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
This semi-join replaced the subquery (subq_select) and so on
|
|
|
|
re-executing it will not be prepared. To use the Items from its
|
|
|
|
select list we have to prepare (fix_fields) them
|
|
|
|
*/
|
2018-06-05 10:25:39 +04:00
|
|
|
if (item->fix_fields_if_needed(thd, it.ref()))
|
2018-04-12 14:55:43 +02:00
|
|
|
DBUG_RETURN(TRUE);
|
|
|
|
item= *(it.ref()); // it can be changed by fix_fields
|
2018-05-11 13:12:18 +02:00
|
|
|
DBUG_ASSERT(!item->name.length || item->name.length == strlen(item->name.str));
|
2018-04-12 14:55:43 +02:00
|
|
|
sjm->sjm_table_cols.push_back(item, thd->mem_root);
|
|
|
|
}
|
2011-07-11 17:13:16 +04:00
|
|
|
|
|
|
|
sjm->sjm_table_param.field_count= subq_select->item_list.elements;
|
2023-02-26 18:33:10 +02:00
|
|
|
sjm->sjm_table_param.func_count= sjm->sjm_table_param.field_count;
|
2012-02-24 16:50:22 -08:00
|
|
|
sjm->sjm_table_param.force_not_null_cols= TRUE;
|
2010-02-16 00:53:06 +03:00
|
|
|
|
|
|
|
if (!(sjm->table= create_tmp_table(thd, &sjm->sjm_table_param,
|
|
|
|
sjm->sjm_table_cols, (ORDER*) 0,
|
|
|
|
TRUE /* distinct */,
|
|
|
|
1, /*save_sum_fields*/
|
2011-04-25 17:22:25 +02:00
|
|
|
thd->variables.option_bits | TMP_TABLE_ALL_COLUMNS,
|
2010-02-16 00:53:06 +03:00
|
|
|
HA_POS_ERROR /*rows_limit */,
|
2018-01-07 18:03:44 +02:00
|
|
|
&sj_materialize_name)))
|
2010-02-16 00:53:06 +03:00
|
|
|
DBUG_RETURN(TRUE); /* purecov: inspected */
|
2011-12-09 14:30:50 -08:00
|
|
|
sjm->table->map= emb_sj_nest->nested_join->used_tables;
|
2010-02-16 00:53:06 +03:00
|
|
|
sjm->table->file->extra(HA_EXTRA_WRITE_CACHE);
|
|
|
|
sjm->table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
|
2010-06-11 18:43:06 +04:00
|
|
|
|
2015-08-24 14:42:07 +03:00
|
|
|
tab->join->sj_tmp_tables.push_back(sjm->table, thd->mem_root);
|
|
|
|
tab->join->sjm_info_list.push_back(sjm, thd->mem_root);
|
2010-02-16 00:53:06 +03:00
|
|
|
|
|
|
|
sjm->materialized= FALSE;
|
2010-06-04 17:40:57 +04:00
|
|
|
sjm_tab->table= sjm->table;
|
2023-05-08 11:42:24 -07:00
|
|
|
sjm_tab->tab_list= emb_sj_nest;
|
2011-06-28 18:25:02 +04:00
|
|
|
sjm->table->pos_in_table_list= emb_sj_nest;
|
2011-06-22 01:57:28 +04:00
|
|
|
|
|
|
|
DBUG_RETURN(FALSE);
|
|
|
|
}
|
|
|
|
|
2017-11-14 07:47:58 +02:00
|
|
|
/**
|
|
|
|
@retval
|
|
|
|
FALSE ok
|
|
|
|
TRUE error
|
|
|
|
*/
|
2011-06-22 01:57:28 +04:00
|
|
|
|
|
|
|
bool setup_sj_materialization_part2(JOIN_TAB *sjm_tab)
|
|
|
|
{
|
|
|
|
DBUG_ENTER("setup_sj_materialization_part2");
|
|
|
|
JOIN_TAB *tab= sjm_tab->bush_children->start;
|
|
|
|
TABLE_LIST *emb_sj_nest= tab->table->pos_in_table_list->embedding;
|
2011-09-02 23:44:28 +04:00
|
|
|
/* Walk out of outer join nests until we reach the semi-join nest we're in */
|
|
|
|
while (!emb_sj_nest->sj_mat_info)
|
|
|
|
emb_sj_nest= emb_sj_nest->embedding;
|
2011-06-22 01:57:28 +04:00
|
|
|
SJ_MATERIALIZATION_INFO *sjm= emb_sj_nest->sj_mat_info;
|
|
|
|
THD *thd= tab->join->thd;
|
|
|
|
uint i;
|
2010-06-04 17:40:57 +04:00
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
if (!sjm->is_sj_scan)
|
|
|
|
{
|
|
|
|
KEY *tmp_key; /* The only index on the temporary table. */
|
|
|
|
uint tmp_key_parts; /* Number of keyparts in tmp_key. */
|
|
|
|
tmp_key= sjm->table->key_info;
|
2013-06-15 18:32:08 +03:00
|
|
|
tmp_key_parts= tmp_key->user_defined_key_parts;
|
2010-02-16 00:53:06 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
Create/initialize everything we will need to index lookups into the
|
|
|
|
temptable.
|
|
|
|
*/
|
|
|
|
TABLE_REF *tab_ref;
|
2010-06-04 17:40:57 +04:00
|
|
|
tab_ref= &sjm_tab->ref;
|
2010-02-16 00:53:06 +03:00
|
|
|
tab_ref->key= 0; /* The only temp table index. */
|
|
|
|
tab_ref->key_length= tmp_key->key_length;
|
|
|
|
if (!(tab_ref->key_buff=
|
|
|
|
(uchar*) thd->calloc(ALIGN_SIZE(tmp_key->key_length) * 2)) ||
|
|
|
|
!(tab_ref->key_copy=
|
|
|
|
(store_key**) thd->alloc((sizeof(store_key*) *
|
|
|
|
(tmp_key_parts + 1)))) ||
|
|
|
|
!(tab_ref->items=
|
|
|
|
(Item**) thd->alloc(sizeof(Item*) * tmp_key_parts)))
|
|
|
|
DBUG_RETURN(TRUE); /* purecov: inspected */
|
|
|
|
|
|
|
|
tab_ref->key_buff2=tab_ref->key_buff+ALIGN_SIZE(tmp_key->key_length);
|
|
|
|
tab_ref->key_err=1;
|
|
|
|
tab_ref->null_rejecting= 1;
|
|
|
|
tab_ref->disable_cache= FALSE;
|
|
|
|
|
|
|
|
KEY_PART_INFO *cur_key_part= tmp_key->key_part;
|
|
|
|
store_key **ref_key= tab_ref->key_copy;
|
|
|
|
uchar *cur_ref_buff= tab_ref->key_buff;
|
|
|
|
|
|
|
|
for (i= 0; i < tmp_key_parts; i++, cur_key_part++, ref_key++)
|
|
|
|
{
|
2020-06-30 15:20:11 +02:00
|
|
|
tab_ref->items[i]=
|
|
|
|
emb_sj_nest->sj_subq_pred->left_exp()->element_index(i);
|
2014-02-19 14:05:15 +04:00
|
|
|
int null_count= MY_TEST(cur_key_part->field->real_maybe_null());
|
2010-02-16 00:53:06 +03:00
|
|
|
*ref_key= new store_key_item(thd, cur_key_part->field,
|
|
|
|
/* TODO:
|
|
|
|
the NULL byte is taken into account in
|
|
|
|
cur_key_part->store_length, so instead of
|
2014-02-19 14:05:15 +04:00
|
|
|
cur_ref_buff + MY_TEST(maybe_null), we could
|
2010-02-16 00:53:06 +03:00
|
|
|
use that information instead.
|
|
|
|
*/
|
|
|
|
cur_ref_buff + null_count,
|
2010-04-06 22:56:20 +03:00
|
|
|
null_count ? cur_ref_buff : 0,
|
2010-07-10 13:37:30 +03:00
|
|
|
cur_key_part->length, tab_ref->items[i],
|
|
|
|
FALSE);
|
2017-11-14 07:47:58 +02:00
|
|
|
if (!*ref_key)
|
|
|
|
DBUG_RETURN(TRUE);
|
2010-02-16 00:53:06 +03:00
|
|
|
cur_ref_buff+= cur_key_part->store_length;
|
|
|
|
}
|
|
|
|
*ref_key= NULL; /* End marker. */
|
2010-06-09 18:43:52 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
We don't ever have guarded conditions for SJM tables, but code at SQL
|
|
|
|
layer depends on cond_guards array being alloced.
|
|
|
|
*/
|
|
|
|
if (!(tab_ref->cond_guards= (bool**) thd->calloc(sizeof(uint*)*tmp_key_parts)))
|
|
|
|
{
|
|
|
|
DBUG_RETURN(TRUE);
|
|
|
|
}
|
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
tab_ref->key_err= 1;
|
|
|
|
tab_ref->key_parts= tmp_key_parts;
|
|
|
|
sjm->tab_ref= tab_ref;
|
|
|
|
|
|
|
|
/*
|
|
|
|
Remove the injected semi-join IN-equalities from join_tab conds. This
|
|
|
|
needs to be done because the IN-equalities refer to columns of
|
|
|
|
sj-inner tables which are not available after the materialization
|
|
|
|
has been finished.
|
|
|
|
*/
|
|
|
|
for (i= 0; i < sjm->tables; i++)
|
|
|
|
{
|
2017-11-14 07:47:58 +02:00
|
|
|
if (remove_sj_conds(thd, &tab[i].select_cond) ||
|
|
|
|
(tab[i].select && remove_sj_conds(thd, &tab[i].select->cond)))
|
|
|
|
DBUG_RETURN(TRUE);
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
if (!(sjm->in_equality= create_subq_in_equalities(thd, sjm,
|
|
|
|
emb_sj_nest->sj_subq_pred)))
|
|
|
|
DBUG_RETURN(TRUE); /* purecov: inspected */
|
2010-06-04 17:40:57 +04:00
|
|
|
sjm_tab->type= JT_EQ_REF;
|
2010-06-09 16:50:12 +04:00
|
|
|
sjm_tab->select_cond= sjm->in_equality;
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
We'll be doing full scan of the temptable.
|
|
|
|
Setup copying of temptable columns back to the record buffers
|
|
|
|
for their source tables. We need this because IN-equalities
|
|
|
|
refer to the original tables.
|
|
|
|
|
|
|
|
EXAMPLE
|
|
|
|
|
|
|
|
Consider the query:
|
|
|
|
SELECT * FROM ot WHERE ot.col1 IN (SELECT it.col2 FROM it)
|
|
|
|
|
|
|
|
Suppose it's executed with SJ-Materialization-scan. We choose to do scan
|
|
|
|
if we can't do the lookup, i.e. the join order is (it, ot). The plan
|
|
|
|
would look as follows:
|
|
|
|
|
|
|
|
table access method condition
|
|
|
|
it materialize+scan -
|
|
|
|
ot (whatever) ot1.col1=it.col2 (C2)
|
|
|
|
|
|
|
|
The condition C2 refers to current row of table it. The problem is
|
|
|
|
that by the time we evaluate C2, we would have finished with scanning
|
|
|
|
it itself and will be scanning the temptable.
|
|
|
|
|
|
|
|
At the moment, our solution is to copy back: when we get the next
|
|
|
|
temptable record, we copy its columns to their corresponding columns
|
|
|
|
in the record buffers for the source tables.
|
|
|
|
*/
|
2017-11-14 07:47:58 +02:00
|
|
|
if (!(sjm->copy_field= new Copy_field[sjm->sjm_table_cols.elements]))
|
|
|
|
DBUG_RETURN(TRUE);
|
|
|
|
|
2011-06-22 01:57:28 +04:00
|
|
|
//it.rewind();
|
2016-05-08 23:04:41 +03:00
|
|
|
Ref_ptr_array p_items= emb_sj_nest->sj_subq_pred->unit->first_select()->ref_pointer_array;
|
2010-02-16 00:53:06 +03:00
|
|
|
for (uint i=0; i < sjm->sjm_table_cols.elements; i++)
|
|
|
|
{
|
|
|
|
bool dummy;
|
|
|
|
Item_equal *item_eq;
|
2011-07-11 17:13:16 +04:00
|
|
|
//Item *item= (it++)->real_item();
|
2016-05-08 23:04:41 +03:00
|
|
|
Item *item= p_items[i]->real_item();
|
2010-02-16 00:53:06 +03:00
|
|
|
DBUG_ASSERT(item->type() == Item::FIELD_ITEM);
|
|
|
|
Field *copy_to= ((Item_field*)item)->field;
|
|
|
|
/*
|
|
|
|
Tricks with Item_equal are due to the following: suppose we have a
|
|
|
|
query:
|
|
|
|
|
|
|
|
... WHERE cond(ot.col) AND ot.col IN (SELECT it2.col FROM it1,it2
|
|
|
|
WHERE it1.col= it2.col)
|
|
|
|
then equality propagation will create an
|
|
|
|
|
|
|
|
Item_equal(it1.col, it2.col, ot.col)
|
|
|
|
|
|
|
|
then substitute_for_best_equal_field() will change the conditions
|
|
|
|
according to the join order:
|
|
|
|
|
2010-05-25 17:13:19 +04:00
|
|
|
table | attached condition
|
|
|
|
------+--------------------
|
|
|
|
it1 |
|
|
|
|
it2 | it1.col=it2.col
|
|
|
|
ot | cond(it1.col)
|
2010-02-16 00:53:06 +03:00
|
|
|
|
|
|
|
although we've originally had "SELECT it2.col", conditions attached
|
|
|
|
to subsequent outer tables will refer to it1.col, so SJM-Scan will
|
|
|
|
need to unpack data to there.
|
|
|
|
That is, if an element from subquery's select list participates in
|
|
|
|
equality propagation, then we need to unpack it to the first
|
|
|
|
element equality propagation member that refers to table that is
|
|
|
|
within the subquery.
|
|
|
|
*/
|
|
|
|
item_eq= find_item_equal(tab->join->cond_equal, copy_to, &dummy);
|
|
|
|
|
|
|
|
if (item_eq)
|
|
|
|
{
|
2011-04-26 19:58:41 -07:00
|
|
|
List_iterator<Item> it(item_eq->equal_items);
|
2011-07-09 13:47:41 +04:00
|
|
|
/* We're interested in field items only */
|
|
|
|
if (item_eq->get_const())
|
|
|
|
it++;
|
2011-04-26 19:58:41 -07:00
|
|
|
Item *item;
|
2010-02-16 00:53:06 +03:00
|
|
|
while ((item= it++))
|
|
|
|
{
|
|
|
|
if (!(item->used_tables() & ~emb_sj_nest->sj_inner_tables))
|
|
|
|
{
|
2011-07-09 13:47:41 +04:00
|
|
|
DBUG_ASSERT(item->real_item()->type() == Item::FIELD_ITEM);
|
2011-04-26 19:58:41 -07:00
|
|
|
copy_to= ((Item_field *) (item->real_item()))->field;
|
2010-02-16 00:53:06 +03:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
sjm->copy_field[i].set(copy_to, sjm->table->field[i], FALSE);
|
|
|
|
/* The write_set for source tables must be set up to allow the copying */
|
|
|
|
bitmap_set_bit(copy_to->table->write_set, copy_to->field_index);
|
|
|
|
}
|
2010-06-04 17:40:57 +04:00
|
|
|
sjm_tab->type= JT_ALL;
|
|
|
|
|
|
|
|
/* Initialize full scan */
|
2020-07-08 20:43:57 +05:30
|
|
|
sjm_tab->read_first_record= join_init_read_record;
|
2010-06-04 17:40:57 +04:00
|
|
|
sjm_tab->read_record.copy_field= sjm->copy_field;
|
|
|
|
sjm_tab->read_record.copy_field_end= sjm->copy_field +
|
|
|
|
sjm->sjm_table_cols.elements;
|
2020-07-08 20:43:57 +05:30
|
|
|
sjm_tab->read_record.read_record_func= read_record_func_for_rr_and_unpack;
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
|
2010-06-04 17:40:57 +04:00
|
|
|
sjm_tab->bush_children->end[-1].next_select= end_sj_materialize;
|
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
DBUG_RETURN(FALSE);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Create subquery IN-equalities assuming use of materialization strategy
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
create_subq_in_equalities()
|
|
|
|
thd Thread handle
|
|
|
|
sjm Semi-join materialization structure
|
|
|
|
subq_pred The subquery predicate
|
|
|
|
|
|
|
|
DESCRIPTION
|
|
|
|
Create subquery IN-equality predicates. That is, for a subquery
|
|
|
|
|
|
|
|
(oe1, oe2, ...) IN (SELECT ie1, ie2, ... FROM ...)
|
|
|
|
|
|
|
|
create "oe1=ie1 AND ie1=ie2 AND ..." expression, such that ie1, ie2, ..
|
|
|
|
refer to the columns of the table that's used to materialize the
|
|
|
|
subquery.
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
Created condition
|
|
|
|
*/
|
|
|
|
|
|
|
|
static Item *create_subq_in_equalities(THD *thd, SJ_MATERIALIZATION_INFO *sjm,
|
|
|
|
Item_in_subselect *subq_pred)
|
|
|
|
{
|
|
|
|
Item *res= NULL;
|
2020-06-30 15:20:11 +02:00
|
|
|
Item *left_exp= subq_pred->left_exp();
|
|
|
|
uint ncols= left_exp->cols();
|
|
|
|
if (ncols == 1)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2020-06-30 15:20:11 +02:00
|
|
|
if (!(res= new (thd->mem_root) Item_func_eq(thd, left_exp,
|
2015-08-20 15:24:13 +03:00
|
|
|
new (thd->mem_root) Item_field(thd, sjm->table->field[0]))))
|
2010-02-16 00:53:06 +03:00
|
|
|
return NULL; /* purecov: inspected */
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
Item *conj;
|
2020-06-30 15:20:11 +02:00
|
|
|
for (uint i= 0; i < ncols; i++)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2020-06-30 15:20:11 +02:00
|
|
|
if (!(conj= new (thd->mem_root) Item_func_eq(thd, left_exp->element_index(i),
|
2015-08-20 15:24:13 +03:00
|
|
|
new (thd->mem_root) Item_field(thd, sjm->table->field[i]))) ||
|
2015-08-11 11:18:38 +04:00
|
|
|
!(res= and_items(thd, res, conj)))
|
2010-02-16 00:53:06 +03:00
|
|
|
return NULL; /* purecov: inspected */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (res->fix_fields(thd, &res))
|
|
|
|
return NULL; /* purecov: inspected */
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-11-14 07:47:58 +02:00
|
|
|
/**
|
|
|
|
@retval
|
|
|
|
0 ok
|
|
|
|
1 error
|
|
|
|
*/
|
2010-02-16 00:53:06 +03:00
|
|
|
|
2017-11-14 07:47:58 +02:00
|
|
|
static bool remove_sj_conds(THD *thd, Item **tree)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
|
|
|
if (*tree)
|
|
|
|
{
|
|
|
|
if (is_cond_sj_in_equality(*tree))
|
|
|
|
{
|
|
|
|
*tree= NULL;
|
2017-11-14 07:47:58 +02:00
|
|
|
return 0;
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
else if ((*tree)->type() == Item::COND_ITEM)
|
|
|
|
{
|
|
|
|
Item *item;
|
|
|
|
List_iterator<Item> li(*(((Item_cond*)*tree)->argument_list()));
|
|
|
|
while ((item= li++))
|
|
|
|
{
|
|
|
|
if (is_cond_sj_in_equality(item))
|
2017-11-14 07:47:58 +02:00
|
|
|
{
|
|
|
|
Item_int *tmp= new (thd->mem_root) Item_int(thd, 1);
|
|
|
|
if (!tmp)
|
|
|
|
return 1;
|
|
|
|
li.replace(tmp);
|
|
|
|
}
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2017-11-14 07:47:58 +02:00
|
|
|
return 0;
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
|
2017-11-14 07:47:58 +02:00
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
/* Check if given Item was injected by semi-join equality */
|
|
|
|
static bool is_cond_sj_in_equality(Item *item)
|
|
|
|
{
|
|
|
|
if (item->type() == Item::FUNC_ITEM &&
|
|
|
|
((Item_func*)item)->functype()== Item_func::EQ_FUNC)
|
|
|
|
{
|
|
|
|
Item_func_eq *item_eq= (Item_func_eq*)item;
|
2014-02-19 14:05:15 +04:00
|
|
|
return MY_TEST(item_eq->in_equality_no != UINT_MAX);
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Create a temporary table to weed out duplicate rowid combinations
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
|
2011-11-25 23:54:36 +04:00
|
|
|
create_sj_weedout_tmp_table()
|
2010-02-16 00:53:06 +03:00
|
|
|
thd Thread handle
|
|
|
|
|
|
|
|
DESCRIPTION
|
|
|
|
Create a temporary table to weed out duplicate rowid combinations. The
|
|
|
|
table has a single column that is a concatenation of all rowids in the
|
|
|
|
combination.
|
|
|
|
|
|
|
|
Depending on the needed length, there are two cases:
|
|
|
|
|
|
|
|
1. When the length of the column < max_key_length:
|
|
|
|
|
|
|
|
CREATE TABLE tmp (col VARBINARY(n) NOT NULL, UNIQUE KEY(col));
|
|
|
|
|
|
|
|
2. Otherwise (not a valid SQL syntax but internally supported):
|
|
|
|
|
|
|
|
CREATE TABLE tmp (col VARBINARY NOT NULL, UNIQUE CONSTRAINT(col));
|
|
|
|
|
|
|
|
The code in this function was produced by extraction of relevant parts
|
|
|
|
from create_tmp_table().
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
created table
|
|
|
|
NULL on error
|
|
|
|
*/
|
|
|
|
|
2011-11-25 23:54:36 +04:00
|
|
|
bool
|
|
|
|
SJ_TMP_TABLE::create_sj_weedout_tmp_table(THD *thd)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
|
|
|
MEM_ROOT *mem_root_save, own_root;
|
|
|
|
TABLE *table;
|
|
|
|
TABLE_SHARE *share;
|
|
|
|
uint temp_pool_slot=MY_BIT_NONE;
|
|
|
|
char *tmpname,path[FN_REFLEN];
|
|
|
|
Field **reg_field;
|
|
|
|
KEY_PART_INFO *key_part_info;
|
|
|
|
KEY *keyinfo;
|
|
|
|
uchar *group_buff;
|
|
|
|
uchar *bitmaps;
|
|
|
|
uint *blob_field;
|
|
|
|
bool using_unique_constraint=FALSE;
|
|
|
|
bool use_packed_rows= FALSE;
|
|
|
|
Field *field, *key_field;
|
2011-10-18 13:44:12 +03:00
|
|
|
uint null_pack_length, null_count;
|
2010-02-16 00:53:06 +03:00
|
|
|
uchar *null_flags;
|
|
|
|
uchar *pos;
|
2011-11-25 23:54:36 +04:00
|
|
|
DBUG_ENTER("create_sj_weedout_tmp_table");
|
|
|
|
DBUG_ASSERT(!is_degenerate);
|
|
|
|
|
|
|
|
tmp_table= NULL;
|
|
|
|
uint uniq_tuple_length_arg= rowid_len + null_bytes;
|
2010-02-16 00:53:06 +03:00
|
|
|
/*
|
|
|
|
STEP 1: Get temporary table name
|
|
|
|
*/
|
|
|
|
if (use_temp_pool && !(test_flags & TEST_KEEP_TMP_TABLES))
|
2021-07-17 08:57:29 +02:00
|
|
|
temp_pool_slot = temp_pool_set_next();
|
2010-02-16 00:53:06 +03:00
|
|
|
|
|
|
|
if (temp_pool_slot != MY_BIT_NONE) // we got a slot
|
2020-04-09 16:52:59 +03:00
|
|
|
sprintf(path, "%s-subquery-%lx-%i", tmp_file_prefix,
|
2010-02-16 00:53:06 +03:00
|
|
|
current_pid, temp_pool_slot);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* if we run out of slots or we are not using tempool */
|
2020-04-09 16:52:59 +03:00
|
|
|
sprintf(path,"%s-subquery-%lx-%lx-%x", tmp_file_prefix,current_pid,
|
2016-02-01 12:45:39 +02:00
|
|
|
(ulong) thd->thread_id, thd->tmp_table++);
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
fn_format(path, path, mysql_tmpdir, "", MY_REPLACE_EXT|MY_UNPACK_FILENAME);
|
|
|
|
|
|
|
|
/* STEP 2: Figure if we'll be using a key or blob+constraint */
|
2013-01-07 20:21:05 +01:00
|
|
|
/* it always has my_charset_bin, so mbmaxlen==1 */
|
2010-02-16 00:53:06 +03:00
|
|
|
if (uniq_tuple_length_arg >= CONVERT_IF_BIGGER_TO_BLOB)
|
|
|
|
using_unique_constraint= TRUE;
|
|
|
|
|
|
|
|
/* STEP 3: Allocate memory for temptable description */
|
2020-01-29 13:50:26 +01:00
|
|
|
init_sql_alloc(PSI_INSTRUMENT_ME, &own_root, TABLE_ALLOC_BLOCK_SIZE, 0,
|
|
|
|
MYF(MY_THREAD_SPECIFIC));
|
2010-02-16 00:53:06 +03:00
|
|
|
if (!multi_alloc_root(&own_root,
|
|
|
|
&table, sizeof(*table),
|
|
|
|
&share, sizeof(*share),
|
|
|
|
®_field, sizeof(Field*) * (1+1),
|
|
|
|
&blob_field, sizeof(uint)*2,
|
|
|
|
&keyinfo, sizeof(*keyinfo),
|
|
|
|
&key_part_info, sizeof(*key_part_info) * 2,
|
|
|
|
&start_recinfo,
|
|
|
|
sizeof(*recinfo)*(1*2+4),
|
|
|
|
&tmpname, (uint) strlen(path)+1,
|
|
|
|
&group_buff, (!using_unique_constraint ?
|
|
|
|
uniq_tuple_length_arg : 0),
|
2017-02-10 17:01:45 +01:00
|
|
|
&bitmaps, bitmap_buffer_size(1)*6,
|
2010-02-16 00:53:06 +03:00
|
|
|
NullS))
|
|
|
|
{
|
|
|
|
if (temp_pool_slot != MY_BIT_NONE)
|
2021-07-17 08:57:29 +02:00
|
|
|
temp_pool_clear_bit(temp_pool_slot);
|
2011-11-25 23:54:36 +04:00
|
|
|
DBUG_RETURN(TRUE);
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
strmov(tmpname,path);
|
|
|
|
|
|
|
|
|
|
|
|
/* STEP 4: Create TABLE description */
|
|
|
|
bzero((char*) table,sizeof(*table));
|
|
|
|
bzero((char*) reg_field,sizeof(Field*)*2);
|
|
|
|
|
|
|
|
table->mem_root= own_root;
|
|
|
|
mem_root_save= thd->mem_root;
|
|
|
|
thd->mem_root= &table->mem_root;
|
|
|
|
|
|
|
|
table->field=reg_field;
|
2010-11-24 00:08:48 +02:00
|
|
|
table->alias.set("weedout-tmp", sizeof("weedout-tmp")-1,
|
|
|
|
table_alias_charset);
|
2010-02-16 00:53:06 +03:00
|
|
|
table->reginfo.lock_type=TL_WRITE; /* Will be updated */
|
2016-11-14 20:24:03 +01:00
|
|
|
table->db_stat=HA_OPEN_KEYFILE;
|
2010-02-16 00:53:06 +03:00
|
|
|
table->map=1;
|
|
|
|
table->temp_pool_slot = temp_pool_slot;
|
|
|
|
table->copy_blobs= 1;
|
|
|
|
table->in_use= thd;
|
|
|
|
|
|
|
|
table->s= share;
|
|
|
|
init_tmp_table_share(thd, share, "", 0, tmpname, tmpname);
|
|
|
|
share->blob_field= blob_field;
|
|
|
|
share->table_charset= NULL;
|
|
|
|
share->primary_key= MAX_KEY; // Indicate no primary key
|
|
|
|
|
|
|
|
/* Create the field */
|
|
|
|
{
|
2017-04-23 19:39:57 +03:00
|
|
|
LEX_CSTRING field_name= {STRING_WITH_LEN("rowids") };
|
2010-02-16 00:53:06 +03:00
|
|
|
/*
|
|
|
|
For the sake of uniformity, always use Field_varstring (altough we could
|
|
|
|
use Field_string for shorter keys)
|
|
|
|
*/
|
2017-04-23 19:39:57 +03:00
|
|
|
field= new Field_varstring(uniq_tuple_length_arg, FALSE, &field_name,
|
|
|
|
share, &my_charset_bin);
|
2010-02-16 00:53:06 +03:00
|
|
|
if (!field)
|
|
|
|
DBUG_RETURN(0);
|
|
|
|
field->table= table;
|
2019-05-09 17:38:22 +02:00
|
|
|
field->key_start.clear_all();
|
|
|
|
field->part_of_key.clear_all();
|
|
|
|
field->part_of_sortkey.clear_all();
|
2010-02-16 00:53:06 +03:00
|
|
|
field->unireg_check= Field::NONE;
|
2019-08-14 20:27:00 +04:00
|
|
|
field->flags= (NOT_NULL_FLAG | BINARY_FLAG | NO_DEFAULT_VALUE_FLAG);
|
2010-02-16 00:53:06 +03:00
|
|
|
field->reset_fields();
|
|
|
|
field->init(table);
|
|
|
|
field->orig_table= NULL;
|
|
|
|
|
|
|
|
field->field_index= 0;
|
|
|
|
|
|
|
|
*(reg_field++)= field;
|
|
|
|
*blob_field= 0;
|
|
|
|
*reg_field= 0;
|
|
|
|
|
|
|
|
share->fields= 1;
|
|
|
|
share->blob_fields= 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint reclength= field->pack_length();
|
2019-09-18 14:17:26 +02:00
|
|
|
if (using_unique_constraint || thd->variables.tmp_memory_table_size == 0)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
|
|
|
share->db_plugin= ha_lock_engine(0, TMP_ENGINE_HTON);
|
|
|
|
table->file= get_new_handler(share, &table->mem_root,
|
|
|
|
share->db_type());
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
share->db_plugin= ha_lock_engine(0, heap_hton);
|
|
|
|
table->file= get_new_handler(share, &table->mem_root,
|
|
|
|
share->db_type());
|
2017-11-14 07:47:58 +02:00
|
|
|
DBUG_ASSERT(!table->file || uniq_tuple_length_arg <= table->file->max_key_length());
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
if (!table->file)
|
|
|
|
goto err;
|
|
|
|
|
2013-06-27 14:01:03 +03:00
|
|
|
if (table->file->set_ha_share_ref(&share->ha_share))
|
|
|
|
{
|
|
|
|
delete table->file;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
null_count=1;
|
|
|
|
|
|
|
|
null_pack_length= 1;
|
|
|
|
reclength += null_pack_length;
|
|
|
|
|
|
|
|
share->reclength= reclength;
|
|
|
|
{
|
|
|
|
uint alloc_length=ALIGN_SIZE(share->reclength + MI_UNIQUE_HASH_LENGTH+1);
|
|
|
|
share->rec_buff_length= alloc_length;
|
|
|
|
if (!(table->record[0]= (uchar*)
|
|
|
|
alloc_root(&table->mem_root, alloc_length*3)))
|
|
|
|
goto err;
|
|
|
|
table->record[1]= table->record[0]+alloc_length;
|
|
|
|
share->default_values= table->record[1]+alloc_length;
|
|
|
|
}
|
2021-04-12 13:04:01 +04:00
|
|
|
setup_tmp_table_column_bitmaps(table, bitmaps, table->s->fields);
|
2010-02-16 00:53:06 +03:00
|
|
|
|
|
|
|
recinfo= start_recinfo;
|
|
|
|
null_flags=(uchar*) table->record[0];
|
|
|
|
pos=table->record[0]+ null_pack_length;
|
|
|
|
if (null_pack_length)
|
|
|
|
{
|
|
|
|
bzero((uchar*) recinfo,sizeof(*recinfo));
|
|
|
|
recinfo->type=FIELD_NORMAL;
|
|
|
|
recinfo->length=null_pack_length;
|
|
|
|
recinfo++;
|
|
|
|
bfill(null_flags,null_pack_length,255); // Set null fields
|
|
|
|
|
|
|
|
table->null_flags= (uchar*) table->record[0];
|
|
|
|
share->null_fields= null_count;
|
|
|
|
share->null_bytes= null_pack_length;
|
|
|
|
}
|
|
|
|
null_count=1;
|
|
|
|
|
|
|
|
{
|
|
|
|
//Field *field= *reg_field;
|
|
|
|
uint length;
|
|
|
|
bzero((uchar*) recinfo,sizeof(*recinfo));
|
|
|
|
field->move_field(pos,(uchar*) 0,0);
|
|
|
|
|
|
|
|
field->reset();
|
|
|
|
/*
|
|
|
|
Test if there is a default field value. The test for ->ptr is to skip
|
2018-12-21 17:06:08 -03:00
|
|
|
'offset' fields generated by initialize_tables
|
2010-02-16 00:53:06 +03:00
|
|
|
*/
|
|
|
|
// Initialize the table field:
|
|
|
|
bzero(field->ptr, field->pack_length());
|
|
|
|
|
|
|
|
length=field->pack_length();
|
|
|
|
pos+= length;
|
|
|
|
|
|
|
|
/* Make entry for create table */
|
|
|
|
recinfo->length=length;
|
2019-06-10 13:59:45 +04:00
|
|
|
recinfo->type= field->tmp_engine_column_type(use_packed_rows);
|
2010-11-24 00:08:48 +02:00
|
|
|
field->set_table_name(&table->alias);
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
|
2017-06-30 17:56:58 +03:00
|
|
|
if (thd->variables.tmp_memory_table_size == ~ (ulonglong) 0) // No limit
|
2010-02-16 00:53:06 +03:00
|
|
|
share->max_rows= ~(ha_rows) 0;
|
|
|
|
else
|
|
|
|
share->max_rows= (ha_rows) (((share->db_type() == heap_hton) ?
|
2017-06-30 17:56:58 +03:00
|
|
|
MY_MIN(thd->variables.tmp_memory_table_size,
|
|
|
|
thd->variables.max_heap_table_size) :
|
2019-09-18 14:18:08 +02:00
|
|
|
thd->variables.tmp_disk_table_size) /
|
2010-02-16 00:53:06 +03:00
|
|
|
share->reclength);
|
|
|
|
set_if_bigger(share->max_rows,1); // For dummy start options
|
|
|
|
|
|
|
|
|
|
|
|
//// keyinfo= param->keyinfo;
|
|
|
|
if (TRUE)
|
|
|
|
{
|
|
|
|
DBUG_PRINT("info",("Creating group key in temporary table"));
|
|
|
|
share->keys=1;
|
2022-12-20 15:55:40 +02:00
|
|
|
table->key_info= share->key_info= keyinfo;
|
2010-02-16 00:53:06 +03:00
|
|
|
keyinfo->key_part=key_part_info;
|
2023-05-03 15:15:37 +03:00
|
|
|
keyinfo->flags= HA_NOSAME | (using_unique_constraint ? HA_UNIQUE_HASH : 0);
|
|
|
|
keyinfo->ext_key_flags= keyinfo->flags;
|
2013-06-15 18:32:08 +03:00
|
|
|
keyinfo->usable_key_parts= keyinfo->user_defined_key_parts= 1;
|
2023-05-03 15:15:37 +03:00
|
|
|
keyinfo->ext_key_parts= 1;
|
|
|
|
share->key_parts= 1;
|
2010-02-16 00:53:06 +03:00
|
|
|
keyinfo->key_length=0;
|
|
|
|
keyinfo->rec_per_key=0;
|
|
|
|
keyinfo->algorithm= HA_KEY_ALG_UNDEF;
|
2017-06-18 12:28:40 +03:00
|
|
|
keyinfo->name= weedout_key;
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
|
|
|
key_part_info->null_bit=0;
|
|
|
|
key_part_info->field= field;
|
|
|
|
key_part_info->offset= field->offset(table->record[0]);
|
|
|
|
key_part_info->length= (uint16) field->key_length();
|
|
|
|
key_part_info->type= (uint8) field->key_type();
|
|
|
|
key_part_info->key_type = FIELDFLAG_BINARY;
|
|
|
|
if (!using_unique_constraint)
|
|
|
|
{
|
|
|
|
if (!(key_field= field->new_key_field(thd->mem_root, table,
|
|
|
|
group_buff,
|
2014-06-09 20:18:53 +02:00
|
|
|
key_part_info->length,
|
2010-02-16 00:53:06 +03:00
|
|
|
field->null_ptr,
|
|
|
|
field->null_bit)))
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
keyinfo->key_length+= key_part_info->length;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-04-04 12:16:12 +03:00
|
|
|
if (unlikely(thd->is_fatal_error)) // If end of memory
|
2010-02-16 00:53:06 +03:00
|
|
|
goto err;
|
|
|
|
share->db_record_offset= 1;
|
2011-01-14 12:03:41 +01:00
|
|
|
table->no_rows= 1; // We don't need the data
|
|
|
|
|
|
|
|
// recinfo must point after last field
|
|
|
|
recinfo++;
|
2010-02-16 00:53:06 +03:00
|
|
|
if (share->db_type() == TMP_ENGINE_HTON)
|
|
|
|
{
|
2018-04-04 12:16:12 +03:00
|
|
|
if (unlikely(create_internal_tmp_table(table, keyinfo, start_recinfo,
|
|
|
|
&recinfo, 0)))
|
2010-02-16 00:53:06 +03:00
|
|
|
goto err;
|
|
|
|
}
|
2018-04-04 12:16:12 +03:00
|
|
|
if (unlikely(open_tmp_table(table)))
|
2010-02-16 00:53:06 +03:00
|
|
|
goto err;
|
|
|
|
|
|
|
|
thd->mem_root= mem_root_save;
|
2011-11-25 23:54:36 +04:00
|
|
|
tmp_table= table;
|
|
|
|
DBUG_RETURN(FALSE);
|
2010-02-16 00:53:06 +03:00
|
|
|
|
|
|
|
err:
|
|
|
|
thd->mem_root= mem_root_save;
|
|
|
|
free_tmp_table(thd,table); /* purecov: inspected */
|
|
|
|
if (temp_pool_slot != MY_BIT_NONE)
|
2021-07-17 08:57:29 +02:00
|
|
|
temp_pool_clear_bit(temp_pool_slot);
|
2011-11-25 23:54:36 +04:00
|
|
|
DBUG_RETURN(TRUE); /* purecov: inspected */
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
SemiJoinDuplicateElimination: Reset the temporary table
|
|
|
|
*/
|
|
|
|
|
2011-11-25 23:54:36 +04:00
|
|
|
int SJ_TMP_TABLE::sj_weedout_delete_rows()
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2011-11-25 23:54:36 +04:00
|
|
|
DBUG_ENTER("SJ_TMP_TABLE::sj_weedout_delete_rows");
|
|
|
|
if (tmp_table)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2011-11-25 23:54:36 +04:00
|
|
|
int rc= tmp_table->file->ha_delete_all_rows();
|
2010-02-16 00:53:06 +03:00
|
|
|
DBUG_RETURN(rc);
|
|
|
|
}
|
2011-11-25 23:54:36 +04:00
|
|
|
have_degenerate_row= FALSE;
|
2010-02-16 00:53:06 +03:00
|
|
|
DBUG_RETURN(0);
|
|
|
|
}
|
|
|
|
|
2011-11-25 23:54:36 +04:00
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
/*
|
|
|
|
SemiJoinDuplicateElimination: Weed out duplicate row combinations
|
|
|
|
|
|
|
|
SYNPOSIS
|
2011-11-25 23:54:36 +04:00
|
|
|
sj_weedout_check_row()
|
2010-02-16 00:53:06 +03:00
|
|
|
thd Thread handle
|
|
|
|
|
|
|
|
DESCRIPTION
|
|
|
|
Try storing current record combination of outer tables (i.e. their
|
|
|
|
rowids) in the temporary table. This records the fact that we've seen
|
|
|
|
this record combination and also tells us if we've seen it before.
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
-1 Error
|
|
|
|
1 The row combination is a duplicate (discard it)
|
|
|
|
0 The row combination is not a duplicate (continue)
|
|
|
|
*/
|
|
|
|
|
2011-11-25 23:54:36 +04:00
|
|
|
int SJ_TMP_TABLE::sj_weedout_check_row(THD *thd)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
|
|
|
int error;
|
2011-11-25 23:54:36 +04:00
|
|
|
SJ_TMP_TABLE::TAB *tab= tabs;
|
|
|
|
SJ_TMP_TABLE::TAB *tab_end= tabs_end;
|
2010-02-16 00:53:06 +03:00
|
|
|
uchar *ptr;
|
|
|
|
uchar *nulls_ptr;
|
|
|
|
|
2011-11-25 23:54:36 +04:00
|
|
|
DBUG_ENTER("SJ_TMP_TABLE::sj_weedout_check_row");
|
2010-02-16 00:53:06 +03:00
|
|
|
|
2011-11-25 23:54:36 +04:00
|
|
|
if (is_degenerate)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2011-11-25 23:54:36 +04:00
|
|
|
if (have_degenerate_row)
|
2010-02-16 00:53:06 +03:00
|
|
|
DBUG_RETURN(1);
|
|
|
|
|
2011-11-25 23:54:36 +04:00
|
|
|
have_degenerate_row= TRUE;
|
2010-02-16 00:53:06 +03:00
|
|
|
DBUG_RETURN(0);
|
|
|
|
}
|
|
|
|
|
2011-11-25 23:54:36 +04:00
|
|
|
ptr= tmp_table->record[0] + 1;
|
2010-02-16 00:53:06 +03:00
|
|
|
|
|
|
|
/* Put the the rowids tuple into table->record[0]: */
|
|
|
|
|
|
|
|
// 1. Store the length
|
2011-11-25 23:54:36 +04:00
|
|
|
if (((Field_varstring*)(tmp_table->field[0]))->length_bytes == 1)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2011-11-25 23:54:36 +04:00
|
|
|
*ptr= (uchar)(rowid_len + null_bytes);
|
2010-02-16 00:53:06 +03:00
|
|
|
ptr++;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2011-11-25 23:54:36 +04:00
|
|
|
int2store(ptr, rowid_len + null_bytes);
|
2010-02-16 00:53:06 +03:00
|
|
|
ptr += 2;
|
|
|
|
}
|
|
|
|
|
2011-06-15 18:37:01 +04:00
|
|
|
nulls_ptr= ptr;
|
2010-02-16 00:53:06 +03:00
|
|
|
// 2. Zero the null bytes
|
2011-11-25 23:54:36 +04:00
|
|
|
if (null_bytes)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2011-11-25 23:54:36 +04:00
|
|
|
bzero(ptr, null_bytes);
|
|
|
|
ptr += null_bytes;
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// 3. Put the rowids
|
|
|
|
for (uint i=0; tab != tab_end; tab++, i++)
|
|
|
|
{
|
|
|
|
handler *h= tab->join_tab->table->file;
|
|
|
|
if (tab->join_tab->table->maybe_null && tab->join_tab->table->null_row)
|
|
|
|
{
|
|
|
|
/* It's a NULL-complemented row */
|
|
|
|
*(nulls_ptr + tab->null_byte) |= tab->null_bit;
|
|
|
|
bzero(ptr + tab->rowid_offset, h->ref_length);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Copy the rowid value */
|
|
|
|
memcpy(ptr + tab->rowid_offset, h->ref, h->ref_length);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-11-25 23:54:36 +04:00
|
|
|
error= tmp_table->file->ha_write_tmp_row(tmp_table->record[0]);
|
2018-04-04 12:16:12 +03:00
|
|
|
if (unlikely(error))
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
|
|
|
/* create_internal_tmp_table_from_heap will generate error if needed */
|
2011-11-25 23:54:36 +04:00
|
|
|
if (!tmp_table->file->is_fatal_error(error, HA_CHECK_DUP))
|
2010-02-16 00:53:06 +03:00
|
|
|
DBUG_RETURN(1); /* Duplicate */
|
2013-07-16 09:22:17 +04:00
|
|
|
|
|
|
|
bool is_duplicate;
|
2011-11-25 23:54:36 +04:00
|
|
|
if (create_internal_tmp_table_from_heap(thd, tmp_table, start_recinfo,
|
2013-07-16 09:22:17 +04:00
|
|
|
&recinfo, error, 1, &is_duplicate))
|
2010-02-16 00:53:06 +03:00
|
|
|
DBUG_RETURN(-1);
|
2013-07-16 09:22:17 +04:00
|
|
|
if (is_duplicate)
|
|
|
|
DBUG_RETURN(1);
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
DBUG_RETURN(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-11-25 05:56:58 +04:00
|
|
|
int init_dups_weedout(JOIN *join, uint first_table, int first_fanout_table, uint n_tables)
|
|
|
|
{
|
|
|
|
THD *thd= join->thd;
|
|
|
|
DBUG_ENTER("init_dups_weedout");
|
|
|
|
SJ_TMP_TABLE::TAB sjtabs[MAX_TABLES];
|
|
|
|
SJ_TMP_TABLE::TAB *last_tab= sjtabs;
|
|
|
|
uint jt_rowid_offset= 0; // # tuple bytes are already occupied (w/o NULL bytes)
|
|
|
|
uint jt_null_bits= 0; // # null bits in tuple bytes
|
|
|
|
/*
|
|
|
|
Walk through the range and remember
|
|
|
|
- tables that need their rowids to be put into temptable
|
|
|
|
- the last outer table
|
|
|
|
*/
|
|
|
|
for (JOIN_TAB *j=join->join_tab + first_table;
|
|
|
|
j < join->join_tab + first_table + n_tables; j++)
|
|
|
|
{
|
|
|
|
if (sj_table_is_included(join, j))
|
|
|
|
{
|
|
|
|
last_tab->join_tab= j;
|
|
|
|
last_tab->rowid_offset= jt_rowid_offset;
|
|
|
|
jt_rowid_offset += j->table->file->ref_length;
|
|
|
|
if (j->table->maybe_null)
|
|
|
|
{
|
|
|
|
last_tab->null_byte= jt_null_bits / 8;
|
|
|
|
last_tab->null_bit= jt_null_bits++;
|
|
|
|
}
|
|
|
|
last_tab++;
|
|
|
|
j->table->prepare_for_position();
|
|
|
|
j->keep_current_rowid= TRUE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
SJ_TMP_TABLE *sjtbl;
|
|
|
|
if (jt_rowid_offset) /* Temptable has at least one rowid */
|
|
|
|
{
|
|
|
|
size_t tabs_size= (last_tab - sjtabs) * sizeof(SJ_TMP_TABLE::TAB);
|
|
|
|
if (!(sjtbl= (SJ_TMP_TABLE*)thd->alloc(sizeof(SJ_TMP_TABLE))) ||
|
|
|
|
!(sjtbl->tabs= (SJ_TMP_TABLE::TAB*) thd->alloc(tabs_size)))
|
|
|
|
DBUG_RETURN(TRUE); /* purecov: inspected */
|
|
|
|
memcpy(sjtbl->tabs, sjtabs, tabs_size);
|
|
|
|
sjtbl->is_degenerate= FALSE;
|
|
|
|
sjtbl->tabs_end= sjtbl->tabs + (last_tab - sjtabs);
|
|
|
|
sjtbl->rowid_len= jt_rowid_offset;
|
|
|
|
sjtbl->null_bits= jt_null_bits;
|
|
|
|
sjtbl->null_bytes= (jt_null_bits + 7)/8;
|
2011-11-25 23:54:36 +04:00
|
|
|
if (sjtbl->create_sj_weedout_tmp_table(thd))
|
|
|
|
DBUG_RETURN(TRUE);
|
2015-08-24 14:42:07 +03:00
|
|
|
join->sj_tmp_tables.push_back(sjtbl->tmp_table, thd->mem_root);
|
2011-11-25 05:56:58 +04:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
This is a special case where the entire subquery predicate does
|
|
|
|
not depend on anything at all, ie this is
|
|
|
|
WHERE const IN (uncorrelated select)
|
|
|
|
*/
|
|
|
|
if (!(sjtbl= (SJ_TMP_TABLE*)thd->alloc(sizeof(SJ_TMP_TABLE))))
|
|
|
|
DBUG_RETURN(TRUE); /* purecov: inspected */
|
|
|
|
sjtbl->tmp_table= NULL;
|
|
|
|
sjtbl->is_degenerate= TRUE;
|
|
|
|
sjtbl->have_degenerate_row= FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
sjtbl->next_flush_table= join->join_tab[first_table].flush_weedout_table;
|
|
|
|
join->join_tab[first_table].flush_weedout_table= sjtbl;
|
|
|
|
join->join_tab[first_fanout_table].first_weedout_table= sjtbl;
|
|
|
|
join->join_tab[first_table + n_tables - 1].check_weed_out_table= sjtbl;
|
|
|
|
DBUG_RETURN(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2015-08-05 20:43:25 +03:00
|
|
|
/*
|
|
|
|
@brief
|
|
|
|
Set up semi-join Loose Scan strategy for execution
|
|
|
|
|
|
|
|
@detail
|
|
|
|
Other strategies are done in setup_semijoin_dups_elimination(),
|
|
|
|
however, we need to set up Loose Scan earlier, before make_join_select is
|
|
|
|
called. This is to prevent make_join_select() from switching full index
|
|
|
|
scans into quick selects (which will break Loose Scan access).
|
|
|
|
|
|
|
|
@return
|
|
|
|
0 OK
|
|
|
|
1 Error
|
|
|
|
*/
|
|
|
|
|
|
|
|
int setup_semijoin_loosescan(JOIN *join)
|
|
|
|
{
|
|
|
|
uint i;
|
|
|
|
DBUG_ENTER("setup_semijoin_loosescan");
|
|
|
|
|
|
|
|
POSITION *pos= join->best_positions + join->const_tables;
|
|
|
|
for (i= join->const_tables ; i < join->top_join_tab_count; )
|
|
|
|
{
|
|
|
|
JOIN_TAB *tab=join->join_tab + i;
|
|
|
|
switch (pos->sj_strategy) {
|
|
|
|
case SJ_OPT_MATERIALIZE:
|
|
|
|
case SJ_OPT_MATERIALIZE_SCAN:
|
|
|
|
i+= 1; /* join tabs are embedded in the nest */
|
|
|
|
pos += pos->n_sj_tables;
|
|
|
|
break;
|
|
|
|
case SJ_OPT_LOOSE_SCAN:
|
|
|
|
{
|
|
|
|
/* We jump from the last table to the first one */
|
|
|
|
tab->loosescan_match_tab= tab + pos->n_sj_tables - 1;
|
|
|
|
|
|
|
|
/* LooseScan requires records to be produced in order */
|
|
|
|
if (tab->select && tab->select->quick)
|
|
|
|
tab->select->quick->need_sorted_output();
|
|
|
|
|
|
|
|
for (uint j= i; j < i + pos->n_sj_tables; j++)
|
|
|
|
join->join_tab[j].inside_loosescan_range= TRUE;
|
|
|
|
|
|
|
|
/* Calculate key length */
|
|
|
|
uint keylen= 0;
|
|
|
|
uint keyno= pos->loosescan_picker.loosescan_key;
|
|
|
|
for (uint kp=0; kp < pos->loosescan_picker.loosescan_parts; kp++)
|
|
|
|
keylen += tab->table->key_info[keyno].key_part[kp].store_length;
|
|
|
|
|
|
|
|
tab->loosescan_key= keyno;
|
|
|
|
tab->loosescan_key_len= keylen;
|
|
|
|
if (pos->n_sj_tables > 1)
|
|
|
|
tab[pos->n_sj_tables - 1].do_firstmatch= tab;
|
|
|
|
i+= pos->n_sj_tables;
|
|
|
|
pos+= pos->n_sj_tables;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
{
|
|
|
|
i++;
|
|
|
|
pos++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
DBUG_RETURN(FALSE);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
/*
|
|
|
|
Setup the strategies to eliminate semi-join duplicates.
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
setup_semijoin_dups_elimination()
|
|
|
|
join Join to process
|
|
|
|
options Join options (needed to see if join buffering will be
|
|
|
|
used or not)
|
|
|
|
no_jbuf_after Another bit of information re where join buffering will
|
|
|
|
be used.
|
|
|
|
|
|
|
|
DESCRIPTION
|
|
|
|
Setup the strategies to eliminate semi-join duplicates. ATM there are 4
|
|
|
|
strategies:
|
|
|
|
|
|
|
|
1. DuplicateWeedout (use of temptable to remove duplicates based on rowids
|
|
|
|
of row combinations)
|
|
|
|
2. FirstMatch (pick only the 1st matching row combination of inner tables)
|
|
|
|
3. LooseScan (scanning the sj-inner table in a way that groups duplicates
|
|
|
|
together and picking the 1st one)
|
|
|
|
4. SJ-Materialization.
|
|
|
|
|
|
|
|
The join order has "duplicate-generating ranges", and every range is
|
|
|
|
served by one strategy or a combination of FirstMatch with with some
|
|
|
|
other strategy.
|
|
|
|
|
|
|
|
"Duplicate-generating range" is defined as a range within the join order
|
|
|
|
that contains all of the inner tables of a semi-join. All ranges must be
|
|
|
|
disjoint, if tables of several semi-joins are interleaved, then the ranges
|
|
|
|
are joined together, which is equivalent to converting
|
|
|
|
SELECT ... WHERE oe1 IN (SELECT ie1 ...) AND oe2 IN (SELECT ie2 )
|
|
|
|
to
|
|
|
|
SELECT ... WHERE (oe1, oe2) IN (SELECT ie1, ie2 ... ...)
|
|
|
|
.
|
|
|
|
|
|
|
|
Applicability conditions are as follows:
|
|
|
|
|
|
|
|
DuplicateWeedout strategy
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
|
|
|
(ot|nt)* [ it ((it|ot|nt)* (it|ot))] (nt)*
|
|
|
|
+------+ +=========================+ +---+
|
|
|
|
(1) (2) (3)
|
|
|
|
|
|
|
|
(1) - Prefix of OuterTables (those that participate in
|
|
|
|
IN-equality and/or are correlated with subquery) and outer
|
|
|
|
Non-correlated tables.
|
|
|
|
(2) - The handled range. The range starts with the first sj-inner
|
|
|
|
table, and covers all sj-inner and outer tables
|
|
|
|
Within the range, Inner, Outer, outer non-correlated tables
|
|
|
|
may follow in any order.
|
|
|
|
(3) - The suffix of outer non-correlated tables.
|
|
|
|
|
|
|
|
FirstMatch strategy
|
|
|
|
~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
|
|
|
(ot|nt)* [ it ((it|nt)* it) ] (nt)*
|
|
|
|
+------+ +==================+ +---+
|
|
|
|
(1) (2) (3)
|
|
|
|
|
|
|
|
(1) - Prefix of outer and non-correlated tables
|
|
|
|
(2) - The handled range, which may contain only inner and
|
|
|
|
non-correlated tables.
|
|
|
|
(3) - The suffix of outer non-correlated tables.
|
|
|
|
|
|
|
|
LooseScan strategy
|
|
|
|
~~~~~~~~~~~~~~~~~~
|
|
|
|
|
|
|
|
(ot|ct|nt) [ loosescan_tbl (ot|nt|it)* it ] (ot|nt)*
|
|
|
|
+--------+ +===========+ +=============+ +------+
|
|
|
|
(1) (2) (3) (4)
|
|
|
|
|
|
|
|
(1) - Prefix that may contain any outer tables. The prefix must contain
|
|
|
|
all the non-trivially correlated outer tables. (non-trivially means
|
|
|
|
that the correlation is not just through the IN-equality).
|
|
|
|
|
|
|
|
(2) - Inner table for which the LooseScan scan is performed.
|
|
|
|
|
|
|
|
(3) - The remainder of the duplicate-generating range. It is served by
|
|
|
|
application of FirstMatch strategy, with the exception that
|
|
|
|
outer IN-correlated tables are considered to be non-correlated.
|
|
|
|
|
|
|
|
(4) - THe suffix of outer and outer non-correlated tables.
|
|
|
|
|
|
|
|
|
|
|
|
The choice between the strategies is made by the join optimizer (see
|
2022-06-06 22:21:22 +03:00
|
|
|
optimize_semi_joins() and fix_semijoin_strategies_for_picked_join_order()).
|
2010-02-16 00:53:06 +03:00
|
|
|
This function sets up all fields/structures/etc needed for execution except
|
|
|
|
for setup/initialization of semi-join materialization which is done in
|
|
|
|
setup_sj_materialization() (todo: can't we move that to here also?)
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
FALSE OK
|
|
|
|
TRUE Out of memory error
|
|
|
|
*/
|
|
|
|
|
|
|
|
int setup_semijoin_dups_elimination(JOIN *join, ulonglong options,
|
|
|
|
uint no_jbuf_after)
|
|
|
|
{
|
|
|
|
uint i;
|
|
|
|
DBUG_ENTER("setup_semijoin_dups_elimination");
|
2012-01-25 22:05:20 +04:00
|
|
|
|
|
|
|
join->complex_firstmatch_tables= table_map(0);
|
2011-11-25 05:56:58 +04:00
|
|
|
|
2011-03-27 03:45:16 +04:00
|
|
|
POSITION *pos= join->best_positions + join->const_tables;
|
2011-03-27 03:54:15 +04:00
|
|
|
for (i= join->const_tables ; i < join->top_join_tab_count; )
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
|
|
|
JOIN_TAB *tab=join->join_tab + i;
|
|
|
|
switch (pos->sj_strategy) {
|
|
|
|
case SJ_OPT_MATERIALIZE:
|
|
|
|
case SJ_OPT_MATERIALIZE_SCAN:
|
|
|
|
/* Do nothing */
|
2011-03-27 03:45:16 +04:00
|
|
|
i+= 1;// It used to be pos->n_sj_tables, but now they are embedded in a nest
|
|
|
|
pos += pos->n_sj_tables;
|
2010-02-16 00:53:06 +03:00
|
|
|
break;
|
|
|
|
case SJ_OPT_LOOSE_SCAN:
|
|
|
|
{
|
2015-08-05 20:43:25 +03:00
|
|
|
/* Setup already handled by setup_semijoin_loosescan */
|
2010-03-15 09:32:54 +03:00
|
|
|
i+= pos->n_sj_tables;
|
2011-03-27 03:45:16 +04:00
|
|
|
pos+= pos->n_sj_tables;
|
2010-02-16 00:53:06 +03:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case SJ_OPT_DUPS_WEEDOUT:
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
Check for join buffering. If there is one, move the first table
|
|
|
|
forwards, but do not destroy other duplicate elimination methods.
|
|
|
|
*/
|
|
|
|
uint first_table= i;
|
2011-11-25 05:56:58 +04:00
|
|
|
|
2010-03-07 18:41:45 +03:00
|
|
|
uint join_cache_level= join->thd->variables.join_cache_level;
|
2010-02-16 00:53:06 +03:00
|
|
|
for (uint j= i; j < i + pos->n_sj_tables; j++)
|
|
|
|
{
|
2010-03-07 18:41:45 +03:00
|
|
|
/*
|
|
|
|
When we'll properly take join buffering into account during
|
|
|
|
join optimization, the below check should be changed to
|
|
|
|
"if (join->best_positions[j].use_join_buffer &&
|
|
|
|
j <= no_jbuf_after)".
|
|
|
|
For now, use a rough criteria:
|
|
|
|
*/
|
|
|
|
JOIN_TAB *js_tab=join->join_tab + j;
|
|
|
|
if (j != join->const_tables && js_tab->use_quick != 2 &&
|
|
|
|
j <= no_jbuf_after &&
|
|
|
|
((js_tab->type == JT_ALL && join_cache_level != 0) ||
|
2011-11-18 13:32:21 -08:00
|
|
|
(join_cache_level > 2 && (js_tab->type == JT_REF ||
|
|
|
|
js_tab->type == JT_EQ_REF))))
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
2010-03-07 18:41:45 +03:00
|
|
|
/* Looks like we'll be using join buffer */
|
2010-02-16 00:53:06 +03:00
|
|
|
first_table= join->const_tables;
|
2011-12-11 12:56:06 -08:00
|
|
|
/*
|
|
|
|
Make sure that possible sorting of rows from the head table
|
|
|
|
is not to be employed.
|
|
|
|
*/
|
|
|
|
if (join->get_sort_by_join_tab())
|
|
|
|
{
|
|
|
|
join->simple_order= 0;
|
|
|
|
join->simple_group= 0;
|
|
|
|
join->need_tmp= join->test_if_need_tmp_table();
|
2010-02-16 00:53:06 +03:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-11-25 05:56:58 +04:00
|
|
|
init_dups_weedout(join, first_table, i, i + pos->n_sj_tables - first_table);
|
2010-03-15 09:32:54 +03:00
|
|
|
i+= pos->n_sj_tables;
|
2011-03-27 03:45:16 +04:00
|
|
|
pos+= pos->n_sj_tables;
|
2010-02-16 00:53:06 +03:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case SJ_OPT_FIRST_MATCH:
|
|
|
|
{
|
2012-01-19 23:44:43 +04:00
|
|
|
JOIN_TAB *j;
|
|
|
|
JOIN_TAB *jump_to= tab-1;
|
2012-01-25 22:05:20 +04:00
|
|
|
|
|
|
|
bool complex_range= FALSE;
|
|
|
|
table_map tables_in_range= table_map(0);
|
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
for (j= tab; j != tab + pos->n_sj_tables; j++)
|
|
|
|
{
|
2012-01-25 22:05:20 +04:00
|
|
|
tables_in_range |= j->table->map;
|
2010-03-07 18:41:45 +03:00
|
|
|
if (!j->emb_sj_nest)
|
2012-01-19 23:44:43 +04:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
Got a table that's not within any semi-join nest. This is a case
|
|
|
|
like this:
|
|
|
|
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
SELECT * FROM ot1, nt1 WHERE
|
|
|
|
ot1.col IN (SELECT expr FROM it1, it2)
|
2012-01-19 23:44:43 +04:00
|
|
|
|
|
|
|
with a join order of
|
|
|
|
|
2012-01-25 22:05:20 +04:00
|
|
|
+----- FirstMatch range ----+
|
|
|
|
| |
|
|
|
|
ot1 it1 nt1 nt2 it2 it3 ...
|
2012-01-19 23:44:43 +04:00
|
|
|
| ^
|
2012-01-25 22:05:20 +04:00
|
|
|
| +-------- 'j' points here
|
2012-01-19 23:44:43 +04:00
|
|
|
+------------- SJ_OPT_FIRST_MATCH was set for this table as
|
|
|
|
it's the first one that produces duplicates
|
|
|
|
|
|
|
|
*/
|
|
|
|
DBUG_ASSERT(j != tab); /* table ntX must have an itX before it */
|
|
|
|
|
|
|
|
/*
|
|
|
|
If the table right before us is an inner table (like it1 in the
|
|
|
|
picture), it should be set to jump back to previous outer-table
|
|
|
|
*/
|
|
|
|
if (j[-1].emb_sj_nest)
|
|
|
|
j[-1].do_firstmatch= jump_to;
|
|
|
|
|
|
|
|
jump_to= j; /* Jump back to us */
|
2012-01-25 22:05:20 +04:00
|
|
|
complex_range= TRUE;
|
2012-01-19 23:44:43 +04:00
|
|
|
}
|
2010-02-16 00:53:06 +03:00
|
|
|
else
|
|
|
|
{
|
|
|
|
j->first_sj_inner_tab= tab;
|
|
|
|
j->last_sj_inner_tab= tab + pos->n_sj_tables - 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
j[-1].do_firstmatch= jump_to;
|
2010-03-15 09:32:54 +03:00
|
|
|
i+= pos->n_sj_tables;
|
2011-03-27 03:45:16 +04:00
|
|
|
pos+= pos->n_sj_tables;
|
2012-01-25 22:05:20 +04:00
|
|
|
|
|
|
|
if (complex_range)
|
|
|
|
join->complex_firstmatch_tables|= tables_in_range;
|
2010-02-16 00:53:06 +03:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case SJ_OPT_NONE:
|
2010-03-15 09:32:54 +03:00
|
|
|
i++;
|
2011-03-27 03:45:16 +04:00
|
|
|
pos++;
|
2010-02-16 00:53:06 +03:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
DBUG_RETURN(FALSE);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Destroy all temporary tables created by NL-semijoin runtime
|
|
|
|
*/
|
|
|
|
|
|
|
|
void destroy_sj_tmp_tables(JOIN *join)
|
|
|
|
{
|
|
|
|
List_iterator<TABLE> it(join->sj_tmp_tables);
|
|
|
|
TABLE *table;
|
|
|
|
while ((table= it++))
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
SJ-Materialization tables are initialized for either sequential reading
|
|
|
|
or index lookup, DuplicateWeedout tables are not initialized for read
|
|
|
|
(we only write to them), so need to call ha_index_or_rnd_end.
|
|
|
|
*/
|
|
|
|
table->file->ha_index_or_rnd_end();
|
|
|
|
free_tmp_table(join->thd, table);
|
|
|
|
}
|
|
|
|
join->sj_tmp_tables.empty();
|
|
|
|
join->sjm_info_list.empty();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Remove all records from all temp tables used by NL-semijoin runtime
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
clear_sj_tmp_tables()
|
|
|
|
join The join to remove tables for
|
|
|
|
|
|
|
|
DESCRIPTION
|
|
|
|
Remove all records from all temp tables used by NL-semijoin runtime. This
|
|
|
|
must be done before every join re-execution.
|
|
|
|
*/
|
|
|
|
|
|
|
|
int clear_sj_tmp_tables(JOIN *join)
|
|
|
|
{
|
|
|
|
int res;
|
|
|
|
List_iterator<TABLE> it(join->sj_tmp_tables);
|
|
|
|
TABLE *table;
|
|
|
|
while ((table= it++))
|
|
|
|
{
|
|
|
|
if ((res= table->file->ha_delete_all_rows()))
|
|
|
|
return res; /* purecov: inspected */
|
|
|
|
}
|
|
|
|
|
|
|
|
SJ_MATERIALIZATION_INFO *sjm;
|
|
|
|
List_iterator<SJ_MATERIALIZATION_INFO> it2(join->sjm_info_list);
|
|
|
|
while ((sjm= it2++))
|
|
|
|
{
|
|
|
|
sjm->materialized= FALSE;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Check if the table's rowid is included in the temptable
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
sj_table_is_included()
|
|
|
|
join The join
|
|
|
|
join_tab The table to be checked
|
|
|
|
|
|
|
|
DESCRIPTION
|
|
|
|
SemiJoinDuplicateElimination: check the table's rowid should be included
|
|
|
|
in the temptable. This is so if
|
|
|
|
|
|
|
|
1. The table is not embedded within some semi-join nest
|
|
|
|
2. The has been pulled out of a semi-join nest, or
|
|
|
|
|
|
|
|
3. The table is functionally dependent on some previous table
|
|
|
|
|
|
|
|
[4. This is also true for constant tables that can't be
|
|
|
|
NULL-complemented but this function is not called for such tables]
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
TRUE - Include table's rowid
|
|
|
|
FALSE - Don't
|
|
|
|
*/
|
|
|
|
|
|
|
|
static bool sj_table_is_included(JOIN *join, JOIN_TAB *join_tab)
|
|
|
|
{
|
|
|
|
if (join_tab->emb_sj_nest)
|
|
|
|
return FALSE;
|
|
|
|
|
|
|
|
/* Check if this table is functionally dependent on the tables that
|
|
|
|
are within the same outer join nest
|
|
|
|
*/
|
|
|
|
TABLE_LIST *embedding= join_tab->table->pos_in_table_list->embedding;
|
|
|
|
if (join_tab->type == JT_EQ_REF)
|
|
|
|
{
|
|
|
|
table_map depends_on= 0;
|
|
|
|
uint idx;
|
|
|
|
|
|
|
|
for (uint kp= 0; kp < join_tab->ref.key_parts; kp++)
|
|
|
|
depends_on |= join_tab->ref.items[kp]->used_tables();
|
|
|
|
|
|
|
|
Table_map_iterator it(depends_on & ~PSEUDO_TABLE_BITS);
|
|
|
|
while ((idx= it.next_bit())!=Table_map_iterator::BITMAP_END)
|
|
|
|
{
|
|
|
|
JOIN_TAB *ref_tab= join->map2table[idx];
|
|
|
|
if (embedding != ref_tab->table->pos_in_table_list->embedding)
|
|
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
/* Ok, functionally dependent */
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
/* Not functionally dependent => need to include*/
|
|
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Index lookup-based subquery: save some flags for EXPLAIN output
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
save_index_subquery_explain_info()
|
|
|
|
join_tab Subquery's join tab (there is only one as index lookup is
|
|
|
|
only used for subqueries that are single-table SELECTs)
|
|
|
|
where Subquery's WHERE clause
|
|
|
|
|
|
|
|
DESCRIPTION
|
|
|
|
For index lookup-based subquery (i.e. one executed with
|
|
|
|
subselect_uniquesubquery_engine or subselect_indexsubquery_engine),
|
|
|
|
check its EXPLAIN output row should contain
|
|
|
|
"Using index" (TAB_INFO_FULL_SCAN_ON_NULL)
|
|
|
|
"Using Where" (TAB_INFO_USING_WHERE)
|
|
|
|
"Full scan on NULL key" (TAB_INFO_FULL_SCAN_ON_NULL)
|
|
|
|
and set appropriate flags in join_tab->packed_info.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static void save_index_subquery_explain_info(JOIN_TAB *join_tab, Item* where)
|
|
|
|
{
|
|
|
|
join_tab->packed_info= TAB_INFO_HAVE_VALUE;
|
|
|
|
if (join_tab->table->covering_keys.is_set(join_tab->ref.key))
|
|
|
|
join_tab->packed_info |= TAB_INFO_USING_INDEX;
|
|
|
|
if (where)
|
|
|
|
join_tab->packed_info |= TAB_INFO_USING_WHERE;
|
|
|
|
for (uint i = 0; i < join_tab->ref.key_parts; i++)
|
|
|
|
{
|
|
|
|
if (join_tab->ref.cond_guards[i])
|
|
|
|
{
|
|
|
|
join_tab->packed_info |= TAB_INFO_FULL_SCAN_ON_NULL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Check if the join can be rewritten to [unique_]indexsubquery_engine
|
|
|
|
|
|
|
|
DESCRIPTION
|
|
|
|
Check if the join can be changed into [unique_]indexsubquery_engine.
|
|
|
|
|
|
|
|
The check is done after join optimization, the idea is that if the join
|
|
|
|
has only one table and uses a [eq_]ref access generated from subselect's
|
|
|
|
IN-equality then we replace it with a subselect_indexsubquery_engine or a
|
|
|
|
subselect_uniquesubquery_engine.
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
0 - Ok, rewrite done (stop join optimization and return)
|
|
|
|
1 - Fatal error (stop join optimization and return)
|
|
|
|
-1 - No rewrite performed, continue with join optimization
|
|
|
|
*/
|
|
|
|
|
|
|
|
int rewrite_to_index_subquery_engine(JOIN *join)
|
|
|
|
{
|
|
|
|
THD *thd= join->thd;
|
|
|
|
JOIN_TAB* join_tab=join->join_tab;
|
|
|
|
SELECT_LEX_UNIT *unit= join->unit;
|
|
|
|
DBUG_ENTER("rewrite_to_index_subquery_engine");
|
2010-09-30 18:32:44 +03:00
|
|
|
|
2010-02-16 00:53:06 +03:00
|
|
|
/*
|
|
|
|
is this simple IN subquery?
|
|
|
|
*/
|
2010-10-20 15:43:55 +03:00
|
|
|
/* TODO: In order to use these more efficient subquery engines in more cases,
|
|
|
|
the following problems need to be solved:
|
|
|
|
- the code that removes GROUP BY (group_list), also adds an ORDER BY
|
|
|
|
(order), thus GROUP BY queries (almost?) never pass through this branch.
|
|
|
|
Solution: remove the test below '!join->order', because we remove the
|
|
|
|
ORDER clase for subqueries anyway.
|
|
|
|
- in order to set a more efficient engine, the optimizer needs to both
|
|
|
|
decide to remove GROUP BY, *and* select one of the JT_[EQ_]REF[_OR_NULL]
|
|
|
|
access methods, *and* loose scan should be more expensive or
|
|
|
|
inapliccable. When is that possible?
|
|
|
|
- Consider expanding the applicability of this rewrite for loose scan
|
|
|
|
for group by queries.
|
|
|
|
*/
|
2010-02-16 00:53:06 +03:00
|
|
|
if (!join->group_list && !join->order &&
|
|
|
|
join->unit->item &&
|
|
|
|
join->unit->item->substype() == Item_subselect::IN_SUBS &&
|
2011-03-27 03:45:16 +04:00
|
|
|
join->table_count == 1 && join->conds &&
|
2017-03-14 11:52:00 +01:00
|
|
|
!join->unit->is_unit_op())
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
|
|
|
if (!join->having)
|
|
|
|
{
|
|
|
|
Item *where= join->conds;
|
|
|
|
if (join_tab[0].type == JT_EQ_REF &&
|
2017-04-23 19:39:57 +03:00
|
|
|
join_tab[0].ref.items[0]->name.str == in_left_expr_name.str)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
|
|
|
remove_subq_pushed_predicates(join, &where);
|
|
|
|
save_index_subquery_explain_info(join_tab, where);
|
|
|
|
join_tab[0].type= JT_UNIQUE_SUBQUERY;
|
|
|
|
join->error= 0;
|
|
|
|
DBUG_RETURN(unit->item->
|
|
|
|
change_engine(new
|
|
|
|
subselect_uniquesubquery_engine(thd,
|
|
|
|
join_tab,
|
2020-06-30 15:20:11 +02:00
|
|
|
unit->item->get_IN_subquery(),
|
2010-02-16 00:53:06 +03:00
|
|
|
where)));
|
|
|
|
}
|
|
|
|
else if (join_tab[0].type == JT_REF &&
|
2017-04-23 19:39:57 +03:00
|
|
|
join_tab[0].ref.items[0]->name.str == in_left_expr_name.str)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
|
|
|
remove_subq_pushed_predicates(join, &where);
|
|
|
|
save_index_subquery_explain_info(join_tab, where);
|
|
|
|
join_tab[0].type= JT_INDEX_SUBQUERY;
|
|
|
|
join->error= 0;
|
|
|
|
DBUG_RETURN(unit->item->
|
|
|
|
change_engine(new
|
|
|
|
subselect_indexsubquery_engine(thd,
|
|
|
|
join_tab,
|
2020-06-30 15:20:11 +02:00
|
|
|
unit->item->get_IN_subquery(),
|
2010-02-16 00:53:06 +03:00
|
|
|
where,
|
|
|
|
NULL,
|
|
|
|
0)));
|
|
|
|
}
|
|
|
|
} else if (join_tab[0].type == JT_REF_OR_NULL &&
|
2017-04-23 19:39:57 +03:00
|
|
|
join_tab[0].ref.items[0]->name.str == in_left_expr_name.str &&
|
|
|
|
join->having->name.str == in_having_cond.str)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
|
|
|
join_tab[0].type= JT_INDEX_SUBQUERY;
|
|
|
|
join->error= 0;
|
|
|
|
join->conds= remove_additional_cond(join->conds);
|
|
|
|
save_index_subquery_explain_info(join_tab, join->conds);
|
|
|
|
DBUG_RETURN(unit->item->
|
|
|
|
change_engine(new subselect_indexsubquery_engine(thd,
|
|
|
|
join_tab,
|
2020-06-30 15:20:11 +02:00
|
|
|
unit->item->get_IN_subquery(),
|
2010-02-16 00:53:06 +03:00
|
|
|
join->conds,
|
|
|
|
join->having,
|
|
|
|
1)));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
DBUG_RETURN(-1); /* Haven't done the rewrite */
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
Remove additional condition inserted by IN/ALL/ANY transformation.
|
|
|
|
|
|
|
|
@param conds condition for processing
|
|
|
|
|
|
|
|
@return
|
|
|
|
new conditions
|
|
|
|
*/
|
|
|
|
|
|
|
|
static Item *remove_additional_cond(Item* conds)
|
|
|
|
{
|
2017-04-23 19:39:57 +03:00
|
|
|
if (conds->name.str == in_additional_cond.str)
|
2010-02-16 00:53:06 +03:00
|
|
|
return 0;
|
|
|
|
if (conds->type() == Item::COND_ITEM)
|
|
|
|
{
|
|
|
|
Item_cond *cnd= (Item_cond*) conds;
|
|
|
|
List_iterator<Item> li(*(cnd->argument_list()));
|
|
|
|
Item *item;
|
|
|
|
while ((item= li++))
|
|
|
|
{
|
2017-04-23 19:39:57 +03:00
|
|
|
if (item->name.str == in_additional_cond.str)
|
2010-02-16 00:53:06 +03:00
|
|
|
{
|
|
|
|
li.remove();
|
|
|
|
if (cnd->argument_list()->elements == 1)
|
|
|
|
return cnd->argument_list()->head();
|
|
|
|
return conds;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return conds;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Remove the predicates pushed down into the subquery
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
remove_subq_pushed_predicates()
|
|
|
|
where IN Must be NULL
|
|
|
|
OUT The remaining WHERE condition, or NULL
|
|
|
|
|
|
|
|
DESCRIPTION
|
|
|
|
Given that this join will be executed using (unique|index)_subquery,
|
|
|
|
without "checking NULL", remove the predicates that were pushed down
|
|
|
|
into the subquery.
|
|
|
|
|
|
|
|
If the subquery compares scalar values, we can remove the condition that
|
|
|
|
was wrapped into trig_cond (it will be checked when needed by the subquery
|
|
|
|
engine)
|
|
|
|
|
|
|
|
If the subquery compares row values, we need to keep the wrapped
|
|
|
|
equalities in the WHERE clause: when the left (outer) tuple has both NULL
|
|
|
|
and non-NULL values, we'll do a full table scan and will rely on the
|
|
|
|
equalities corresponding to non-NULL parts of left tuple to filter out
|
|
|
|
non-matching records.
|
|
|
|
|
|
|
|
TODO: We can remove the equalities that will be guaranteed to be true by the
|
|
|
|
fact that subquery engine will be using index lookup. This must be done only
|
|
|
|
for cases where there are no conversion errors of significance, e.g. 257
|
|
|
|
that is searched in a byte. But this requires homogenization of the return
|
|
|
|
codes of all Field*::store() methods.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static void remove_subq_pushed_predicates(JOIN *join, Item **where)
|
|
|
|
{
|
|
|
|
if (join->conds->type() == Item::FUNC_ITEM &&
|
|
|
|
((Item_func *)join->conds)->functype() == Item_func::EQ_FUNC &&
|
|
|
|
((Item_func *)join->conds)->arguments()[0]->type() == Item::REF_ITEM &&
|
|
|
|
((Item_func *)join->conds)->arguments()[1]->type() == Item::FIELD_ITEM &&
|
|
|
|
test_if_ref (join->conds,
|
|
|
|
(Item_field *)((Item_func *)join->conds)->arguments()[1],
|
|
|
|
((Item_func *)join->conds)->arguments()[0]))
|
|
|
|
{
|
|
|
|
*where= 0;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-05-25 19:31:13 +04:00
|
|
|
|
|
|
|
|
2010-09-17 13:17:27 +03:00
|
|
|
/**
|
2011-05-18 01:23:22 +03:00
|
|
|
Optimize all subqueries of a query that were not flattened into a semijoin.
|
2010-09-17 13:17:27 +03:00
|
|
|
|
2010-10-05 16:00:31 +03:00
|
|
|
@details
|
|
|
|
Optimize all immediate children subqueries of a query.
|
2010-09-17 13:17:27 +03:00
|
|
|
|
|
|
|
This phase must be called after substitute_for_best_equal_field() because
|
|
|
|
that function may replace items with other items from a multiple equality,
|
|
|
|
and we need to reference the correct items in the index access method of the
|
|
|
|
IN predicate.
|
|
|
|
|
|
|
|
@return Operation status
|
|
|
|
@retval FALSE success.
|
|
|
|
@retval TRUE error occurred.
|
|
|
|
*/
|
|
|
|
|
|
|
|
bool JOIN::optimize_unflattened_subqueries()
|
|
|
|
{
|
2012-05-17 13:46:05 +03:00
|
|
|
return select_lex->optimize_unflattened_subqueries(false);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
Optimize all constant subqueries of a query that were not flattened into
|
|
|
|
a semijoin.
|
|
|
|
|
|
|
|
@details
|
|
|
|
Similar to other constant conditions, constant subqueries can be used in
|
|
|
|
various constant optimizations. Having optimized constant subqueries before
|
|
|
|
these constant optimizations, makes it possible to estimate if a subquery
|
|
|
|
is "cheap" enough to be executed during the optimization phase.
|
|
|
|
|
|
|
|
Constant subqueries can be optimized and evaluated independent of the outer
|
|
|
|
query, therefore if const_only = true, this method can be called early in
|
|
|
|
the optimization phase of the outer query.
|
|
|
|
|
|
|
|
@return Operation status
|
|
|
|
@retval FALSE success.
|
|
|
|
@retval TRUE error occurred.
|
|
|
|
*/
|
|
|
|
|
|
|
|
bool JOIN::optimize_constant_subqueries()
|
|
|
|
{
|
|
|
|
ulonglong save_options= select_lex->options;
|
|
|
|
bool res;
|
|
|
|
/*
|
|
|
|
Constant subqueries may be executed during the optimization phase.
|
|
|
|
In EXPLAIN mode the optimizer doesn't initialize many of the data structures
|
|
|
|
needed for execution. In order to make it possible to execute subqueries
|
|
|
|
during optimization, constant subqueries must be optimized for execution,
|
|
|
|
not for EXPLAIN.
|
|
|
|
*/
|
|
|
|
select_lex->options&= ~SELECT_DESCRIBE;
|
|
|
|
res= select_lex->optimize_unflattened_subqueries(true);
|
|
|
|
select_lex->options= save_options;
|
|
|
|
return res;
|
2010-09-17 13:17:27 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-06-04 17:40:57 +04:00
|
|
|
/*
|
2010-06-10 23:38:31 +04:00
|
|
|
Join tab execution startup function.
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
join_tab_execution_startup()
|
|
|
|
tab Join tab to perform startup actions for
|
2010-06-04 17:40:57 +04:00
|
|
|
|
|
|
|
DESCRIPTION
|
|
|
|
Join tab execution startup function. This is different from
|
|
|
|
tab->read_first_record in the regard that this has actions that are to be
|
|
|
|
done once per join execution.
|
|
|
|
|
|
|
|
Currently there are only two possible startup functions, so we have them
|
|
|
|
both here inside if (...) branches. In future we could switch to function
|
|
|
|
pointers.
|
2011-06-27 23:38:56 +04:00
|
|
|
|
|
|
|
TODO: consider moving this together with JOIN_TAB::preread_init
|
2010-06-04 17:40:57 +04:00
|
|
|
|
|
|
|
RETURN
|
2010-07-10 20:51:12 +03:00
|
|
|
NESTED_LOOP_OK - OK
|
|
|
|
NESTED_LOOP_ERROR| NESTED_LOOP_KILLED - Error, abort the join execution
|
2010-06-04 17:40:57 +04:00
|
|
|
*/
|
|
|
|
|
2010-07-10 20:51:12 +03:00
|
|
|
enum_nested_loop_state join_tab_execution_startup(JOIN_TAB *tab)
|
2010-04-06 00:16:45 +04:00
|
|
|
{
|
|
|
|
Item_in_subselect *in_subs;
|
2010-06-08 18:22:31 +04:00
|
|
|
DBUG_ENTER("join_tab_execution_startup");
|
2011-05-25 19:31:13 +04:00
|
|
|
|
2010-04-06 00:16:45 +04:00
|
|
|
if (tab->table->pos_in_table_list &&
|
|
|
|
(in_subs= tab->table->pos_in_table_list->jtbm_subselect))
|
|
|
|
{
|
2010-07-10 20:51:12 +03:00
|
|
|
/* It's a non-merged SJM nest */
|
2010-04-06 00:16:45 +04:00
|
|
|
DBUG_ASSERT(in_subs->engine->engine_type() ==
|
|
|
|
subselect_engine::HASH_SJ_ENGINE);
|
|
|
|
subselect_hash_sj_engine *hash_sj_engine=
|
|
|
|
((subselect_hash_sj_engine*)in_subs->engine);
|
|
|
|
if (!hash_sj_engine->is_materialized)
|
|
|
|
{
|
2023-03-01 19:59:42 +02:00
|
|
|
int error= hash_sj_engine->materialize_join->exec();
|
2010-04-06 00:16:45 +04:00
|
|
|
hash_sj_engine->is_materialized= TRUE;
|
|
|
|
|
2023-03-01 19:59:42 +02:00
|
|
|
if (unlikely(error) ||
|
2018-04-04 12:16:12 +03:00
|
|
|
unlikely(tab->join->thd->is_fatal_error))
|
2010-07-10 20:51:12 +03:00
|
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
2010-04-06 00:16:45 +04:00
|
|
|
}
|
|
|
|
}
|
2010-06-04 17:40:57 +04:00
|
|
|
else if (tab->bush_children)
|
|
|
|
{
|
|
|
|
/* It's a merged SJM nest */
|
2010-07-10 20:51:12 +03:00
|
|
|
enum_nested_loop_state rc;
|
2010-06-04 17:40:57 +04:00
|
|
|
SJ_MATERIALIZATION_INFO *sjm= tab->bush_children->start->emb_sj_nest->sj_mat_info;
|
|
|
|
|
|
|
|
if (!sjm->materialized)
|
|
|
|
{
|
2011-03-22 00:39:27 +03:00
|
|
|
JOIN *join= tab->join;
|
|
|
|
JOIN_TAB *join_tab= tab->bush_children->start;
|
|
|
|
JOIN_TAB *save_return_tab= join->return_tab;
|
2010-06-04 17:40:57 +04:00
|
|
|
/*
|
|
|
|
Now run the join for the inner tables. The first call is to run the
|
|
|
|
join, the second one is to signal EOF (this is essential for some
|
|
|
|
join strategies, e.g. it will make join buffering flush the records)
|
|
|
|
*/
|
|
|
|
if ((rc= sub_select(join, join_tab, FALSE/* no EOF */)) < 0 ||
|
|
|
|
(rc= sub_select(join, join_tab, TRUE/* now EOF */)) < 0)
|
|
|
|
{
|
2010-06-05 12:37:16 +04:00
|
|
|
join->return_tab= save_return_tab;
|
2010-06-04 17:40:57 +04:00
|
|
|
DBUG_RETURN(rc); /* it's NESTED_LOOP_(ERROR|KILLED)*/
|
|
|
|
}
|
2010-06-05 12:37:16 +04:00
|
|
|
join->return_tab= save_return_tab;
|
2010-06-04 17:40:57 +04:00
|
|
|
sjm->materialized= TRUE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-07-10 20:51:12 +03:00
|
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
2010-04-06 00:16:45 +04:00
|
|
|
}
|
2010-02-16 00:53:06 +03:00
|
|
|
|
2011-05-25 19:31:13 +04:00
|
|
|
|
2011-12-14 04:39:29 +04:00
|
|
|
/*
|
|
|
|
Create a dummy temporary table, useful only for the sake of having a
|
|
|
|
TABLE* object with map,tablenr and maybe_null properties.
|
|
|
|
|
|
|
|
This is used by non-mergeable semi-join materilization code to handle
|
|
|
|
degenerate cases where materialized subquery produced "Impossible WHERE"
|
|
|
|
and thus wasn't materialized.
|
|
|
|
*/
|
|
|
|
|
|
|
|
TABLE *create_dummy_tmp_table(THD *thd)
|
|
|
|
{
|
|
|
|
DBUG_ENTER("create_dummy_tmp_table");
|
|
|
|
TABLE *table;
|
|
|
|
TMP_TABLE_PARAM sjm_table_param;
|
|
|
|
List<Item> sjm_table_cols;
|
2018-01-07 18:03:44 +02:00
|
|
|
const LEX_CSTRING dummy_name= { STRING_WITH_LEN("dummy") };
|
2015-08-20 15:24:13 +03:00
|
|
|
Item *column_item= new (thd->mem_root) Item_int(thd, 1);
|
2017-11-14 07:47:58 +02:00
|
|
|
if (!column_item)
|
|
|
|
DBUG_RETURN(NULL);
|
|
|
|
|
2023-02-26 18:33:10 +02:00
|
|
|
sjm_table_param.init();
|
|
|
|
sjm_table_param.field_count= sjm_table_param.func_count= 1;
|
2015-08-24 14:42:07 +03:00
|
|
|
sjm_table_cols.push_back(column_item, thd->mem_root);
|
2011-12-14 04:39:29 +04:00
|
|
|
if (!(table= create_tmp_table(thd, &sjm_table_param,
|
|
|
|
sjm_table_cols, (ORDER*) 0,
|
|
|
|
TRUE /* distinct */,
|
|
|
|
1, /*save_sum_fields*/
|
2015-08-24 14:42:07 +03:00
|
|
|
thd->variables.option_bits |
|
|
|
|
TMP_TABLE_ALL_COLUMNS,
|
2011-12-14 04:39:29 +04:00
|
|
|
HA_POS_ERROR /*rows_limit */,
|
2018-01-07 18:03:44 +02:00
|
|
|
&dummy_name, TRUE /* Do not open */)))
|
2011-12-14 04:39:29 +04:00
|
|
|
{
|
|
|
|
DBUG_RETURN(NULL);
|
|
|
|
}
|
|
|
|
DBUG_RETURN(table);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
A class that is used to catch one single tuple that is sent to the join
|
|
|
|
output, and save it in Item_cache element(s).
|
|
|
|
|
|
|
|
It is very similar to select_singlerow_subselect but doesn't require a
|
|
|
|
Item_singlerow_subselect item.
|
|
|
|
*/
|
|
|
|
|
|
|
|
class select_value_catcher :public select_subselect
|
|
|
|
{
|
|
|
|
public:
|
2015-04-22 13:29:56 +04:00
|
|
|
select_value_catcher(THD *thd_arg, Item_subselect *item_arg):
|
|
|
|
select_subselect(thd_arg, item_arg)
|
2011-12-14 04:39:29 +04:00
|
|
|
{}
|
|
|
|
int send_data(List<Item> &items);
|
|
|
|
int setup(List<Item> *items);
|
|
|
|
bool assigned; /* TRUE <=> we've caught a value */
|
|
|
|
uint n_elements; /* How many elements we get */
|
|
|
|
Item_cache **row; /* Array of cache elements */
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
int select_value_catcher::setup(List<Item> *items)
|
|
|
|
{
|
|
|
|
assigned= FALSE;
|
|
|
|
n_elements= items->elements;
|
|
|
|
|
2015-08-11 11:18:38 +04:00
|
|
|
if (!(row= (Item_cache**) thd->alloc(sizeof(Item_cache*) * n_elements)))
|
2011-12-14 04:39:29 +04:00
|
|
|
return TRUE;
|
|
|
|
|
|
|
|
Item *sel_item;
|
|
|
|
List_iterator<Item> li(*items);
|
|
|
|
for (uint i= 0; (sel_item= li++); i++)
|
|
|
|
{
|
2016-11-26 21:19:48 +04:00
|
|
|
if (!(row[i]= sel_item->get_cache(thd)))
|
2011-12-14 04:39:29 +04:00
|
|
|
return TRUE;
|
2015-08-11 11:18:38 +04:00
|
|
|
row[i]->setup(thd, sel_item);
|
2011-12-14 04:39:29 +04:00
|
|
|
}
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int select_value_catcher::send_data(List<Item> &items)
|
|
|
|
{
|
|
|
|
DBUG_ENTER("select_value_catcher::send_data");
|
|
|
|
DBUG_ASSERT(!assigned);
|
|
|
|
DBUG_ASSERT(items.elements == n_elements);
|
|
|
|
|
|
|
|
Item *val_item;
|
|
|
|
List_iterator_fast<Item> li(items);
|
|
|
|
for (uint i= 0; (val_item= li++); i++)
|
|
|
|
{
|
|
|
|
row[i]->store(val_item);
|
|
|
|
row[i]->cache_value();
|
|
|
|
}
|
|
|
|
assigned= TRUE;
|
|
|
|
DBUG_RETURN(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2018-06-17 19:48:00 +02:00
|
|
|
/**
|
|
|
|
@brief
|
2019-03-23 15:28:22 +03:00
|
|
|
Attach conditions to already optimized condition
|
2018-06-17 19:48:00 +02:00
|
|
|
|
2019-03-23 15:28:22 +03:00
|
|
|
@param thd the thread handle
|
|
|
|
@param cond the condition to which add new conditions
|
|
|
|
@param cond_eq IN/OUT the multiple equalities of cond
|
|
|
|
@param new_conds the list of conditions to be added
|
|
|
|
@param cond_value the returned value of the condition
|
|
|
|
if it can be evaluated
|
2018-05-15 23:45:59 +02:00
|
|
|
|
|
|
|
@details
|
2019-03-23 15:28:22 +03:00
|
|
|
The method creates new condition through union of cond and
|
2018-05-15 23:45:59 +02:00
|
|
|
the conditions from new_conds list.
|
|
|
|
The method is called after optimize_cond() for cond. The result
|
2019-03-23 15:28:22 +03:00
|
|
|
of the union should be the same as if it was done before the
|
2018-05-15 23:45:59 +02:00
|
|
|
the optimize_cond() call.
|
|
|
|
|
|
|
|
@retval otherwise the created condition
|
2019-03-23 15:28:22 +03:00
|
|
|
@retval NULL if an error occurs
|
2018-05-15 23:45:59 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
Item *and_new_conditions_to_optimized_cond(THD *thd, Item *cond,
|
|
|
|
COND_EQUAL **cond_eq,
|
|
|
|
List<Item> &new_conds,
|
2019-03-23 15:28:22 +03:00
|
|
|
Item::cond_result *cond_value)
|
2018-05-15 23:45:59 +02:00
|
|
|
{
|
|
|
|
COND_EQUAL new_cond_equal;
|
|
|
|
Item *item;
|
2019-03-23 15:28:22 +03:00
|
|
|
Item_equal *mult_eq;
|
2018-05-15 23:45:59 +02:00
|
|
|
bool is_simplified_cond= false;
|
2019-03-23 15:28:22 +03:00
|
|
|
/* The list where parts of the new condition are stored. */
|
2018-05-15 23:45:59 +02:00
|
|
|
List_iterator<Item> li(new_conds);
|
|
|
|
List_iterator_fast<Item_equal> it(new_cond_equal.current_level);
|
|
|
|
|
|
|
|
/*
|
2019-03-23 15:28:22 +03:00
|
|
|
Create multiple equalities from the equalities of the list new_conds.
|
|
|
|
Save the created multiple equalities in new_cond_equal.
|
|
|
|
If multiple equality can't be created or the condition
|
|
|
|
from new_conds list isn't an equality leave it in new_conds
|
2018-05-15 23:45:59 +02:00
|
|
|
list.
|
|
|
|
|
|
|
|
The equality can't be converted into the multiple equality if it
|
|
|
|
is a knowingly false or true equality.
|
|
|
|
For example, (3 = 1) equality.
|
|
|
|
*/
|
|
|
|
while ((item=li++))
|
|
|
|
{
|
|
|
|
if (item->type() == Item::FUNC_ITEM &&
|
|
|
|
((Item_func *) item)->functype() == Item_func::EQ_FUNC &&
|
|
|
|
check_simple_equality(thd,
|
|
|
|
Item::Context(Item::ANY_SUBST,
|
Fix all warnings given by UBSAN
The easiest way to compile and test the server with UBSAN is to run:
./BUILD/compile-pentium64-ubsan
and then run mysql-test-run.
After this commit, one should be able to run this without any UBSAN
warnings. There is still a few compiler warnings that should be fixed
at some point, but these do not expose any real bugs.
The 'special' cases where we disable, suppress or circumvent UBSAN are:
- ref10 source (as here we intentionally do some shifts that UBSAN
complains about.
- x86 version of optimized int#korr() methods. UBSAN do not like unaligned
memory access of integers. Fixed by using byte_order_generic.h when
compiling with UBSAN
- We use smaller thread stack with ASAN and UBSAN, which forced me to
disable a few tests that prints the thread stack size.
- Verifying class types does not work for shared libraries. I added
suppression in mysql-test-run.pl for this case.
- Added '#ifdef WITH_UBSAN' when using integer arithmetic where it is
safe to have overflows (two cases, in item_func.cc).
Things fixed:
- Don't left shift signed values
(byte_order_generic.h, mysqltest.c, item_sum.cc and many more)
- Don't assign not non existing values to enum variables.
- Ensure that bool and enum values are properly initialized in
constructors. This was needed as UBSAN checks that these types has
correct values when one copies an object.
(gcalc_tools.h, ha_partition.cc, item_sum.cc, partition_element.h ...)
- Ensure we do not called handler functions on unallocated objects or
deleted objects.
(events.cc, sql_acl.cc).
- Fixed bugs in Item_sp::Item_sp() where we did not call constructor
on Query_arena object.
- Fixed several cast of objects to an incompatible class!
(Item.cc, Item_buff.cc, item_timefunc.cc, opt_subselect.cc, sql_acl.cc,
sql_select.cc ...)
- Ensure we do not do integer arithmetic that causes over or underflows.
This includes also ++ and -- of integers.
(Item_func.cc, Item_strfunc.cc, item_timefunc.cc, sql_base.cc ...)
- Added JSON_VALUE_UNITIALIZED to json_value_types and ensure that
value_type is initialized to this instead of to -1, which is not a valid
enum value for json_value_types.
- Ensure we do not call memcpy() when second argument could be null.
- Fixed that Item_func_str::make_empty_result() creates an empty string
instead of a null string (safer as it ensures we do not do arithmetic
on null strings).
Other things:
- Changed struct st_position to an OBJECT and added an initialization
function to it to ensure that we do not copy or use uninitialized
members. The change to a class was also motived that we used "struct
st_position" and POSITION randomly trough the code which was
confusing.
- Notably big rewrite in sql_acl.cc to avoid using deleted objects.
- Changed in sql_partition to use '^' instead of '-'. This is safe as
the operator is either 0 or 0x8000000000000000ULL.
- Added check for select_nr < INT_MAX in JOIN::build_explain() to
avoid bug when get_select() could return NULL.
- Reordered elements in POSITION for better alignment.
- Changed sql_test.cc::print_plan() to use pointers instead of objects.
- Fixed bug in find_set() where could could execute '1 << -1'.
- Added variable have_sanitizer, used by mtr. (This variable was before
only in 10.5 and up). It can now have one of two values:
ASAN or UBSAN.
- Moved ~Archive_share() from ha_archive.cc to ha_archive.h and marked
it virtual. This was an effort to get UBSAN to work with loaded storage
engines. I kept the change as the new place is better.
- Added in CONNECT engine COLBLK::SetName(), to get around a wrong cast
in tabutil.cpp.
- Added HAVE_REPLICATION around usage of rgi_slave, to get embedded
server to compile with UBSAN. (Patch from Marko).
- Added #ifdef for powerpc64 to avoid a bug in old gcc versions related
to integer arithmetic.
Changes that should not be needed but had to be done to suppress warnings
from UBSAN:
- Added static_cast<<uint16_t>> around shift to get rid of a LOT of
compiler warnings when using UBSAN.
- Had to change some '/' of 2 base integers to shift to get rid of
some compile time warnings.
Reviewed by:
- Json changes: Alexey Botchkov
- Charset changes in ctype-uca.c: Alexander Barkov
- InnoDB changes & Embedded server: Marko Mäkelä
- sql_acl.cc changes: Vicențiu Ciorbaru
- build_explain() changes: Sergey Petrunia
2021-04-18 15:29:13 +03:00
|
|
|
((Item_func_eq *)item)->compare_type_handler(),
|
|
|
|
((Item_func_eq *)item)->compare_collation()),
|
2018-10-07 12:16:59 -07:00
|
|
|
((Item_func *)item)->arguments()[0],
|
|
|
|
((Item_func *)item)->arguments()[1],
|
2018-05-15 23:45:59 +02:00
|
|
|
&new_cond_equal))
|
|
|
|
li.remove();
|
|
|
|
}
|
|
|
|
|
|
|
|
it.rewind();
|
|
|
|
if (cond && cond->type() == Item::COND_ITEM &&
|
|
|
|
((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
|
|
|
|
{
|
|
|
|
/*
|
2019-03-23 15:28:22 +03:00
|
|
|
Case when cond is an AND-condition.
|
|
|
|
Union AND-condition cond, created multiple equalities from
|
|
|
|
new_cond_equal and remaining conditions from new_conds.
|
2018-05-15 23:45:59 +02:00
|
|
|
*/
|
|
|
|
COND_EQUAL *cond_equal= &((Item_cond_and *) cond)->m_cond_equal;
|
|
|
|
List<Item_equal> *cond_equalities= &cond_equal->current_level;
|
|
|
|
List<Item> *and_args= ((Item_cond_and *)cond)->argument_list();
|
|
|
|
|
2019-03-23 15:28:22 +03:00
|
|
|
/*
|
|
|
|
Disjoin multiple equalities of cond.
|
|
|
|
Merge these multiple equalities with the multiple equalities of
|
|
|
|
new_cond_equal. Save the result in new_cond_equal.
|
|
|
|
Check if after the merge some multiple equalities are knowingly
|
|
|
|
true or false.
|
|
|
|
*/
|
|
|
|
and_args->disjoin((List<Item> *) cond_equalities);
|
|
|
|
while ((mult_eq= it++))
|
2018-05-15 23:45:59 +02:00
|
|
|
{
|
2019-03-23 15:28:22 +03:00
|
|
|
mult_eq->upper_levels= 0;
|
|
|
|
mult_eq->merge_into_list(thd, cond_equalities, false, false);
|
2018-05-15 23:45:59 +02:00
|
|
|
}
|
|
|
|
List_iterator_fast<Item_equal> ei(*cond_equalities);
|
2019-03-23 15:28:22 +03:00
|
|
|
while ((mult_eq= ei++))
|
2018-05-15 23:45:59 +02:00
|
|
|
{
|
2019-03-23 15:28:22 +03:00
|
|
|
if (mult_eq->const_item() && !mult_eq->val_int())
|
2018-05-15 23:45:59 +02:00
|
|
|
is_simplified_cond= true;
|
2019-03-23 15:28:22 +03:00
|
|
|
else
|
|
|
|
{
|
|
|
|
mult_eq->unfix_fields();
|
|
|
|
if (mult_eq->fix_fields(thd, NULL))
|
|
|
|
return NULL;
|
|
|
|
}
|
2018-05-15 23:45:59 +02:00
|
|
|
}
|
|
|
|
|
2019-03-23 15:28:22 +03:00
|
|
|
li.rewind();
|
|
|
|
while ((item=li++))
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
There still can be some equalities at not top level of new_conds
|
|
|
|
conditions that are not transformed into multiple equalities.
|
|
|
|
To transform them build_item_equal() is called.
|
|
|
|
|
|
|
|
Examples of not top level equalities:
|
|
|
|
|
|
|
|
1. (t1.a = 3) OR (t1.b > 5)
|
|
|
|
(t1.a = 3) - not top level equality.
|
|
|
|
It is inside OR condition
|
|
|
|
|
|
|
|
2. ((t3.d = t3.c) AND (t3.c < 15)) OR (t3.d > 1)
|
|
|
|
(t1.d = t3.c) - not top level equality.
|
|
|
|
It is inside AND condition which is a part of OR condition
|
|
|
|
*/
|
|
|
|
if (item->type() == Item::COND_ITEM &&
|
|
|
|
((Item_cond *)item)->functype() == Item_func::COND_OR_FUNC)
|
|
|
|
{
|
|
|
|
item= item->build_equal_items(thd,
|
|
|
|
&((Item_cond_and *) cond)->m_cond_equal,
|
|
|
|
false, NULL);
|
|
|
|
}
|
|
|
|
and_args->push_back(item, thd->mem_root);
|
|
|
|
}
|
2018-05-15 23:45:59 +02:00
|
|
|
and_args->append((List<Item> *) cond_equalities);
|
|
|
|
*cond_eq= &((Item_cond_and *) cond)->m_cond_equal;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
2019-03-23 15:28:22 +03:00
|
|
|
Case when cond isn't an AND-condition or is NULL.
|
2018-05-15 23:45:59 +02:00
|
|
|
There can be several cases:
|
|
|
|
|
|
|
|
1. cond is a multiple equality.
|
2019-03-23 15:28:22 +03:00
|
|
|
In this case merge cond with the multiple equalities of
|
2018-05-15 23:45:59 +02:00
|
|
|
new_cond_equal.
|
2019-03-23 15:28:22 +03:00
|
|
|
Create new condition from the created multiple equalities
|
|
|
|
and new_conds list conditions.
|
2018-05-15 23:45:59 +02:00
|
|
|
2. cond is NULL
|
2019-03-23 15:28:22 +03:00
|
|
|
Create new condition from new_conds list conditions
|
|
|
|
and multiple equalities from new_cond_equal.
|
2018-05-15 23:45:59 +02:00
|
|
|
3. Otherwise
|
2019-03-23 15:28:22 +03:00
|
|
|
Create new condition through union of cond, conditions from new_conds
|
|
|
|
list and created multiple equalities from new_cond_equal.
|
2018-05-15 23:45:59 +02:00
|
|
|
*/
|
|
|
|
List<Item> new_conds_list;
|
|
|
|
/* Flag is set to true if cond is a multiple equality */
|
|
|
|
bool is_mult_eq= (cond && cond->type() == Item::FUNC_ITEM &&
|
|
|
|
((Item_func*) cond)->functype() == Item_func::MULT_EQUAL_FUNC);
|
|
|
|
|
2019-03-23 15:28:22 +03:00
|
|
|
/*
|
|
|
|
If cond is non-empty and is not multiple equality save it as
|
|
|
|
a part of a new condition.
|
|
|
|
*/
|
2018-05-15 23:45:59 +02:00
|
|
|
if (cond && !is_mult_eq &&
|
|
|
|
new_conds_list.push_back(cond, thd->mem_root))
|
|
|
|
return NULL;
|
|
|
|
|
2019-03-23 15:28:22 +03:00
|
|
|
/*
|
|
|
|
If cond is a multiple equality merge it with new_cond_equal
|
|
|
|
multiple equalities.
|
|
|
|
*/
|
2018-10-17 01:27:25 -07:00
|
|
|
if (is_mult_eq)
|
|
|
|
{
|
|
|
|
Item_equal *eq_cond= (Item_equal *)cond;
|
|
|
|
eq_cond->upper_levels= 0;
|
|
|
|
eq_cond->merge_into_list(thd, &new_cond_equal.current_level,
|
|
|
|
false, false);
|
2019-03-23 15:28:22 +03:00
|
|
|
}
|
2018-10-17 01:27:25 -07:00
|
|
|
|
2019-03-23 15:28:22 +03:00
|
|
|
/**
|
|
|
|
Fix created multiple equalities and check if they are knowingly
|
|
|
|
true or false.
|
|
|
|
*/
|
|
|
|
List_iterator_fast<Item_equal> ei(new_cond_equal.current_level);
|
|
|
|
while ((mult_eq=ei++))
|
|
|
|
{
|
|
|
|
if (mult_eq->const_item() && !mult_eq->val_int())
|
|
|
|
is_simplified_cond= true;
|
|
|
|
else
|
2018-06-17 19:48:00 +02:00
|
|
|
{
|
2019-03-23 15:28:22 +03:00
|
|
|
mult_eq->unfix_fields();
|
|
|
|
if (mult_eq->fix_fields(thd, NULL))
|
|
|
|
return NULL;
|
2018-06-17 19:48:00 +02:00
|
|
|
}
|
2019-03-23 15:28:22 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
Create AND condition if new condition will have two or
|
|
|
|
more elements.
|
|
|
|
*/
|
|
|
|
Item_cond_and *and_cond= 0;
|
|
|
|
COND_EQUAL *inherited= 0;
|
|
|
|
if (new_conds_list.elements +
|
|
|
|
new_conds.elements +
|
|
|
|
new_cond_equal.current_level.elements > 1)
|
|
|
|
{
|
|
|
|
and_cond= new (thd->mem_root) Item_cond_and(thd);
|
|
|
|
and_cond->m_cond_equal.copy(new_cond_equal);
|
|
|
|
inherited= &and_cond->m_cond_equal;
|
|
|
|
}
|
2018-10-17 01:27:25 -07:00
|
|
|
|
2019-03-23 15:28:22 +03:00
|
|
|
li.rewind();
|
|
|
|
while ((item=li++))
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
Look for the comment in the case when cond is an
|
|
|
|
AND condition above the build_equal_items() call.
|
|
|
|
*/
|
|
|
|
if (item->type() == Item::COND_ITEM &&
|
|
|
|
((Item_cond *)item)->functype() == Item_func::COND_OR_FUNC)
|
2018-05-15 23:45:59 +02:00
|
|
|
{
|
2019-03-23 15:28:22 +03:00
|
|
|
item= item->build_equal_items(thd, inherited, false, NULL);
|
2018-05-15 23:45:59 +02:00
|
|
|
}
|
2019-03-23 15:28:22 +03:00
|
|
|
new_conds_list.push_back(item, thd->mem_root);
|
2018-05-15 23:45:59 +02:00
|
|
|
}
|
2018-06-17 19:48:00 +02:00
|
|
|
new_conds_list.append((List<Item> *)&new_cond_equal.current_level);
|
2018-05-15 23:45:59 +02:00
|
|
|
|
2019-03-23 15:28:22 +03:00
|
|
|
if (and_cond)
|
2018-05-15 23:45:59 +02:00
|
|
|
{
|
2019-03-23 15:28:22 +03:00
|
|
|
and_cond->argument_list()->append(&new_conds_list);
|
2018-05-15 23:45:59 +02:00
|
|
|
cond= (Item *)and_cond;
|
2019-03-23 15:28:22 +03:00
|
|
|
*cond_eq= &((Item_cond_and *) cond)->m_cond_equal;
|
2018-05-15 23:45:59 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
List_iterator_fast<Item> iter(new_conds_list);
|
|
|
|
cond= iter++;
|
2019-03-23 15:28:22 +03:00
|
|
|
if (cond->type() == Item::FUNC_ITEM &&
|
|
|
|
((Item_func *)cond)->functype() == Item_func::MULT_EQUAL_FUNC)
|
|
|
|
{
|
|
|
|
if (!(*cond_eq))
|
|
|
|
*cond_eq= new COND_EQUAL();
|
|
|
|
(*cond_eq)->copy(new_cond_equal);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
*cond_eq= 0;
|
2018-05-15 23:45:59 +02:00
|
|
|
}
|
2019-03-23 15:28:22 +03:00
|
|
|
}
|
2018-05-15 23:45:59 +02:00
|
|
|
|
2019-03-23 15:28:22 +03:00
|
|
|
if (!cond)
|
|
|
|
return NULL;
|
2018-05-15 23:45:59 +02:00
|
|
|
|
2019-03-23 15:28:22 +03:00
|
|
|
if (*cond_eq)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
The multiple equalities are attached only to the upper level
|
|
|
|
of AND-condition cond.
|
|
|
|
Push them down to the bottom levels of cond AND-condition if needed.
|
|
|
|
*/
|
|
|
|
propagate_new_equalities(thd, cond,
|
|
|
|
&(*cond_eq)->current_level,
|
|
|
|
0,
|
|
|
|
&is_simplified_cond);
|
|
|
|
cond= cond->propagate_equal_fields(thd,
|
|
|
|
Item::Context_boolean(),
|
|
|
|
*cond_eq);
|
2019-04-22 16:19:55 +03:00
|
|
|
cond->update_used_tables();
|
2018-05-15 23:45:59 +02:00
|
|
|
}
|
2019-04-22 16:19:55 +03:00
|
|
|
/* Check if conds has knowingly true or false parts. */
|
|
|
|
if (cond &&
|
|
|
|
!is_simplified_cond &&
|
|
|
|
cond->walk(&Item::is_simplified_cond_processor, 0, 0))
|
|
|
|
is_simplified_cond= true;
|
|
|
|
|
2018-05-15 23:45:59 +02:00
|
|
|
|
|
|
|
/*
|
2019-03-23 15:28:22 +03:00
|
|
|
If it was found that there are some knowingly true or false equalities
|
|
|
|
remove them from cond and set cond_value to the appropriate value.
|
2018-05-15 23:45:59 +02:00
|
|
|
*/
|
2019-03-23 15:28:22 +03:00
|
|
|
if (cond && is_simplified_cond)
|
2018-05-15 23:45:59 +02:00
|
|
|
cond= cond->remove_eq_conds(thd, cond_value, true);
|
2018-06-17 19:48:00 +02:00
|
|
|
|
2019-03-23 15:28:22 +03:00
|
|
|
if (cond && cond->fix_fields_if_needed(thd, NULL))
|
|
|
|
return NULL;
|
|
|
|
|
2018-05-15 23:45:59 +02:00
|
|
|
return cond;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
@brief Materialize a degenerate jtbm semi join
|
|
|
|
|
|
|
|
@param thd thread handler
|
|
|
|
@param tbl table list for the target jtbm semi join table
|
|
|
|
@param subq_pred IN subquery predicate with the degenerate jtbm semi join
|
|
|
|
@param eq_list IN/OUT the list where to add produced equalities
|
|
|
|
|
|
|
|
@details
|
|
|
|
The method materializes the degenerate jtbm semi join for the
|
|
|
|
subquery from the IN subquery predicate subq_pred taking table
|
|
|
|
as the target for materialization.
|
|
|
|
Any degenerate table is guaranteed to produce 0 or 1 record.
|
|
|
|
Examples of both cases:
|
|
|
|
|
|
|
|
select * from ot where col in (select ... from it where 2>3)
|
|
|
|
select * from ot where col in (select MY_MIN(it.key) from it)
|
|
|
|
|
|
|
|
in this case, there is no necessity to create a temp.table for
|
|
|
|
materialization.
|
|
|
|
We now just need to
|
|
|
|
1. Check whether 1 or 0 records are produced, setup this as a
|
|
|
|
constant join tab.
|
|
|
|
2. Create a dummy temporary table, because all of the join
|
|
|
|
optimization code relies on TABLE object being present.
|
|
|
|
|
|
|
|
In the case when materialization produces one row the function
|
|
|
|
additionally creates equalities between the expressions from the
|
|
|
|
left part of the IN subquery predicate and the corresponding
|
|
|
|
columns of the produced row. These equalities are added to the
|
|
|
|
list eq_list. They are supposed to be conjuncted with the condition
|
|
|
|
of the WHERE clause.
|
|
|
|
|
|
|
|
@retval TRUE if an error occurs
|
|
|
|
@retval FALSE otherwise
|
|
|
|
*/
|
|
|
|
|
|
|
|
bool execute_degenerate_jtbm_semi_join(THD *thd,
|
|
|
|
TABLE_LIST *tbl,
|
|
|
|
Item_in_subselect *subq_pred,
|
|
|
|
List<Item> &eq_list)
|
|
|
|
{
|
|
|
|
DBUG_ENTER("execute_degenerate_jtbm_semi_join");
|
|
|
|
select_value_catcher *new_sink;
|
|
|
|
|
|
|
|
DBUG_ASSERT(subq_pred->engine->engine_type() ==
|
|
|
|
subselect_engine::SINGLE_SELECT_ENGINE);
|
|
|
|
subselect_single_select_engine *engine=
|
|
|
|
(subselect_single_select_engine*)subq_pred->engine;
|
|
|
|
if (!(new_sink= new (thd->mem_root) select_value_catcher(thd, subq_pred)))
|
|
|
|
DBUG_RETURN(TRUE);
|
|
|
|
if (new_sink->setup(&engine->select_lex->join->fields_list) ||
|
|
|
|
engine->select_lex->join->change_result(new_sink, NULL) ||
|
|
|
|
engine->exec())
|
|
|
|
{
|
|
|
|
DBUG_RETURN(TRUE);
|
|
|
|
}
|
|
|
|
subq_pred->is_jtbm_const_tab= TRUE;
|
|
|
|
|
|
|
|
if (new_sink->assigned)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
Subselect produced one row, which is saved in new_sink->row.
|
|
|
|
Save "left_expr[i] == row[i]" equalities into the eq_list.
|
|
|
|
*/
|
|
|
|
subq_pred->jtbm_const_row_found= TRUE;
|
|
|
|
|
|
|
|
Item *eq_cond;
|
2020-06-30 15:20:11 +02:00
|
|
|
Item *left_exp= subq_pred->left_exp();
|
|
|
|
uint ncols= left_exp->cols();
|
|
|
|
for (uint i= 0; i < ncols; i++)
|
2018-05-15 23:45:59 +02:00
|
|
|
{
|
|
|
|
eq_cond=
|
|
|
|
new (thd->mem_root) Item_func_eq(thd,
|
2020-06-30 15:20:11 +02:00
|
|
|
left_exp->element_index(i),
|
2018-05-15 23:45:59 +02:00
|
|
|
new_sink->row[i]);
|
2018-07-13 21:14:18 +02:00
|
|
|
if (!eq_cond || eq_cond->fix_fields(thd, NULL) ||
|
|
|
|
eq_list.push_back(eq_cond, thd->mem_root))
|
2018-05-15 23:45:59 +02:00
|
|
|
DBUG_RETURN(TRUE);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Subselect produced no rows. Just set the flag */
|
|
|
|
subq_pred->jtbm_const_row_found= FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
TABLE *dummy_table;
|
|
|
|
if (!(dummy_table= create_dummy_tmp_table(thd)))
|
|
|
|
DBUG_RETURN(TRUE);
|
|
|
|
tbl->table= dummy_table;
|
|
|
|
tbl->table->pos_in_table_list= tbl;
|
|
|
|
/*
|
|
|
|
Note: the table created above may be freed by:
|
|
|
|
1. JOIN_TAB::cleanup(), when the parent join is a regular join.
|
|
|
|
2. cleanup_empty_jtbm_semi_joins(), when the parent join is a
|
|
|
|
degenerate join (e.g. one with "Impossible where").
|
|
|
|
*/
|
|
|
|
setup_table_map(tbl->table, tbl, tbl->jtbm_table_no);
|
|
|
|
DBUG_RETURN(FALSE);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
@brief
|
|
|
|
Execute degenerate jtbm semi joins before optimize_cond() for parent
|
|
|
|
|
|
|
|
@param join the parent join for jtbm semi joins
|
|
|
|
@param join_list the list of tables where jtbm semi joins are processed
|
|
|
|
@param eq_list IN/OUT the list where to add equalities produced after
|
|
|
|
materialization of single-row degenerate jtbm semi joins
|
|
|
|
|
|
|
|
@details
|
|
|
|
The method traverses join_list trying to find any degenerate jtbm semi
|
|
|
|
joins for subqueries of IN predicates. For each degenerate jtbm
|
|
|
|
semi join execute_degenerate_jtbm_semi_join() is called. As a result
|
|
|
|
of this call new equalities that substitute for single-row materialized
|
|
|
|
jtbm semi join are added to eq_list.
|
|
|
|
|
|
|
|
In the case when a table is nested in another table 'nested_join' the
|
|
|
|
method is recursively called for the join_list of the 'nested_join' trying
|
|
|
|
to find in the list any degenerate jtbm semi joins. Currently a jtbm semi
|
|
|
|
join may occur in a mergeable semi join nest.
|
|
|
|
|
|
|
|
@retval TRUE if an error occurs
|
|
|
|
@retval FALSE otherwise
|
2011-12-14 04:39:29 +04:00
|
|
|
*/
|
|
|
|
|
2018-05-15 23:45:59 +02:00
|
|
|
bool setup_degenerate_jtbm_semi_joins(JOIN *join,
|
|
|
|
List<TABLE_LIST> *join_list,
|
|
|
|
List<Item> &eq_list)
|
|
|
|
{
|
|
|
|
TABLE_LIST *table;
|
|
|
|
NESTED_JOIN *nested_join;
|
|
|
|
List_iterator<TABLE_LIST> li(*join_list);
|
|
|
|
THD *thd= join->thd;
|
|
|
|
DBUG_ENTER("setup_degenerate_jtbm_semi_joins");
|
|
|
|
|
|
|
|
while ((table= li++))
|
|
|
|
{
|
|
|
|
Item_in_subselect *subq_pred;
|
|
|
|
|
|
|
|
if ((subq_pred= table->jtbm_subselect))
|
|
|
|
{
|
|
|
|
JOIN *subq_join= subq_pred->unit->first_select()->join;
|
|
|
|
|
|
|
|
if (!subq_join->tables_list || !subq_join->table_count)
|
|
|
|
{
|
|
|
|
if (execute_degenerate_jtbm_semi_join(thd,
|
|
|
|
table,
|
|
|
|
subq_pred,
|
|
|
|
eq_list))
|
|
|
|
DBUG_RETURN(TRUE);
|
|
|
|
join->is_orig_degenerated= true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if ((nested_join= table->nested_join))
|
|
|
|
{
|
|
|
|
if (setup_degenerate_jtbm_semi_joins(join,
|
|
|
|
&nested_join->join_list,
|
|
|
|
eq_list))
|
|
|
|
DBUG_RETURN(TRUE);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
DBUG_RETURN(FALSE);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
@brief
|
|
|
|
Optimize jtbm semi joins for materialization
|
|
|
|
|
|
|
|
@param join the parent join for jtbm semi joins
|
|
|
|
@param join_list the list of TABLE_LIST objects where jtbm semi join
|
|
|
|
can occur
|
|
|
|
@param eq_list IN/OUT the list where to add produced equalities
|
|
|
|
|
|
|
|
@details
|
|
|
|
This method is called by the optimizer after the call of
|
|
|
|
optimize_cond() for parent select.
|
|
|
|
The method traverses join_list trying to find any jtbm semi joins for
|
|
|
|
subqueries from IN predicates and optimizes them.
|
|
|
|
After the optimization some of jtbm semi joins may become degenerate.
|
|
|
|
For example the subquery 'SELECT MAX(b) FROM t2' from the query
|
|
|
|
|
|
|
|
SELECT * FROM t1 WHERE 4 IN (SELECT MAX(b) FROM t2);
|
|
|
|
|
|
|
|
will become degenerate if there is an index on t2.b.
|
|
|
|
If a subquery becomes degenerate it is handled by the function
|
|
|
|
execute_degenerate_jtbm_semi_join().
|
|
|
|
|
|
|
|
Otherwise the method creates a temporary table in which the subquery
|
|
|
|
of the jtbm semi join will be materialied.
|
|
|
|
|
|
|
|
The function saves the equalities between all pairs of the expressions
|
|
|
|
from the left part of the IN subquery predicate and the corresponding
|
|
|
|
columns of the subquery from the predicate in eq_list appending them
|
|
|
|
to the list. The equalities of eq_list will be later conjucted with the
|
|
|
|
condition of the WHERE clause.
|
|
|
|
|
|
|
|
In the case when a table is nested in another table 'nested_join' the
|
|
|
|
method is recursively called for the join_list of the 'nested_join' trying
|
|
|
|
to find in the list any degenerate jtbm semi joins. Currently a jtbm semi
|
|
|
|
join may occur in a mergeable semi join nest.
|
|
|
|
|
|
|
|
@retval TRUE if an error occurs
|
|
|
|
@retval FALSE otherwise
|
|
|
|
*/
|
|
|
|
|
|
|
|
bool setup_jtbm_semi_joins(JOIN *join, List<TABLE_LIST> *join_list,
|
|
|
|
List<Item> &eq_list)
|
2011-12-14 04:39:29 +04:00
|
|
|
{
|
|
|
|
TABLE_LIST *table;
|
|
|
|
NESTED_JOIN *nested_join;
|
|
|
|
List_iterator<TABLE_LIST> li(*join_list);
|
2015-08-20 15:24:13 +03:00
|
|
|
THD *thd= join->thd;
|
2011-12-14 04:39:29 +04:00
|
|
|
DBUG_ENTER("setup_jtbm_semi_joins");
|
2018-05-15 23:45:59 +02:00
|
|
|
|
2011-12-14 04:39:29 +04:00
|
|
|
while ((table= li++))
|
|
|
|
{
|
2018-05-15 23:45:59 +02:00
|
|
|
Item_in_subselect *subq_pred;
|
2011-12-14 04:39:29 +04:00
|
|
|
|
2018-05-15 23:45:59 +02:00
|
|
|
if ((subq_pred= table->jtbm_subselect))
|
2011-12-14 04:39:29 +04:00
|
|
|
{
|
|
|
|
double rows;
|
|
|
|
double read_time;
|
|
|
|
|
|
|
|
/*
|
2018-05-15 23:45:59 +02:00
|
|
|
Perform optimization of the subquery, so that we know estimated
|
2011-12-14 04:39:29 +04:00
|
|
|
- cost of materialization process
|
|
|
|
- how many records will be in the materialized temp.table
|
|
|
|
*/
|
|
|
|
if (subq_pred->optimize(&rows, &read_time))
|
|
|
|
DBUG_RETURN(TRUE);
|
|
|
|
|
|
|
|
subq_pred->jtbm_read_time= read_time;
|
|
|
|
subq_pred->jtbm_record_count=rows;
|
|
|
|
JOIN *subq_join= subq_pred->unit->first_select()->join;
|
|
|
|
|
|
|
|
if (!subq_join->tables_list || !subq_join->table_count)
|
|
|
|
{
|
2018-05-15 23:45:59 +02:00
|
|
|
if (!join->is_orig_degenerated &&
|
|
|
|
execute_degenerate_jtbm_semi_join(thd, table, subq_pred,
|
|
|
|
eq_list))
|
2011-12-14 04:39:29 +04:00
|
|
|
DBUG_RETURN(TRUE);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
DBUG_ASSERT(subq_pred->test_set_strategy(SUBS_MATERIALIZATION));
|
|
|
|
subq_pred->is_jtbm_const_tab= FALSE;
|
|
|
|
subselect_hash_sj_engine *hash_sj_engine=
|
2018-05-15 23:45:59 +02:00
|
|
|
((subselect_hash_sj_engine*)subq_pred->engine);
|
2011-12-14 04:39:29 +04:00
|
|
|
|
|
|
|
table->table= hash_sj_engine->tmp_table;
|
|
|
|
table->table->pos_in_table_list= table;
|
|
|
|
|
|
|
|
setup_table_map(table->table, table, table->jtbm_table_no);
|
|
|
|
|
2018-05-15 23:45:59 +02:00
|
|
|
List_iterator<Item> li(*hash_sj_engine->semi_join_conds->argument_list());
|
|
|
|
Item *item;
|
|
|
|
while ((item=li++))
|
|
|
|
{
|
2018-07-29 14:40:58 +02:00
|
|
|
item->update_used_tables();
|
2018-05-15 23:45:59 +02:00
|
|
|
if (eq_list.push_back(item, thd->mem_root))
|
|
|
|
DBUG_RETURN(TRUE);
|
|
|
|
}
|
2011-12-14 04:39:29 +04:00
|
|
|
}
|
2014-02-26 15:28:07 +01:00
|
|
|
table->table->maybe_null= MY_TEST(join->mixed_implicit_grouping);
|
2011-12-14 04:39:29 +04:00
|
|
|
}
|
|
|
|
if ((nested_join= table->nested_join))
|
|
|
|
{
|
2018-05-15 23:45:59 +02:00
|
|
|
if (setup_jtbm_semi_joins(join, &nested_join->join_list, eq_list))
|
2011-12-14 04:39:29 +04:00
|
|
|
DBUG_RETURN(TRUE);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
DBUG_RETURN(FALSE);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-09-12 13:53:13 +04:00
|
|
|
/*
|
|
|
|
Cleanup non-merged semi-joins (JBMs) that have empty.
|
|
|
|
|
|
|
|
This function is to cleanups for a special case:
|
|
|
|
Consider a query like
|
|
|
|
|
|
|
|
select * from t1 where 1=2 AND t1.col IN (select max(..) ... having 1=2)
|
|
|
|
|
|
|
|
For this query, optimization of subquery will short-circuit, and
|
|
|
|
setup_jtbm_semi_joins() will call create_dummy_tmp_table() so that we have
|
|
|
|
empty, constant temp.table to stand in as materialized temp. table.
|
|
|
|
|
|
|
|
Now, suppose that the upper join is also found to be degenerate. In that
|
|
|
|
case, no JOIN_TAB array will be produced, and hence, JOIN::cleanup() will
|
|
|
|
have a problem with cleaning up empty JTBMs (non-empty ones are cleaned up
|
|
|
|
through Item::cleanup() calls).
|
|
|
|
*/
|
|
|
|
|
2014-02-07 20:51:31 +04:00
|
|
|
void cleanup_empty_jtbm_semi_joins(JOIN *join, List<TABLE_LIST> *join_list)
|
2013-09-12 13:53:13 +04:00
|
|
|
{
|
2014-02-07 20:51:31 +04:00
|
|
|
List_iterator<TABLE_LIST> li(*join_list);
|
2013-09-12 13:53:13 +04:00
|
|
|
TABLE_LIST *table;
|
|
|
|
while ((table= li++))
|
|
|
|
{
|
|
|
|
if ((table->jtbm_subselect && table->jtbm_subselect->is_jtbm_const_tab))
|
|
|
|
{
|
|
|
|
if (table->table)
|
|
|
|
{
|
|
|
|
free_tmp_table(join->thd, table->table);
|
|
|
|
table->table= NULL;
|
|
|
|
}
|
|
|
|
}
|
2014-02-07 20:51:31 +04:00
|
|
|
else if (table->nested_join && table->sj_subq_pred)
|
|
|
|
{
|
|
|
|
cleanup_empty_jtbm_semi_joins(join, &table->nested_join->join_list);
|
|
|
|
}
|
2013-09-12 13:53:13 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-10-05 16:00:31 +03:00
|
|
|
/**
|
|
|
|
Choose an optimal strategy to execute an IN/ALL/ANY subquery predicate
|
|
|
|
based on cost.
|
|
|
|
|
|
|
|
@param join_tables the set of tables joined in the subquery
|
|
|
|
|
|
|
|
@notes
|
|
|
|
The method chooses between the materialization and IN=>EXISTS rewrite
|
|
|
|
strategies for the execution of a non-flattened subquery IN predicate.
|
|
|
|
The cost-based decision is made as follows:
|
|
|
|
|
|
|
|
1. compute materialize_strategy_cost based on the unmodified subquery
|
|
|
|
2. reoptimize the subquery taking into account the IN-EXISTS predicates
|
|
|
|
3. compute in_exists_strategy_cost based on the reoptimized plan
|
|
|
|
4. compare and set the cheaper strategy
|
|
|
|
if (materialize_strategy_cost >= in_exists_strategy_cost)
|
|
|
|
in_strategy = MATERIALIZATION
|
|
|
|
else
|
|
|
|
in_strategy = IN_TO_EXISTS
|
|
|
|
5. if in_strategy = MATERIALIZATION and it is not possible to initialize it
|
|
|
|
revert to IN_TO_EXISTS
|
|
|
|
6. if (in_strategy == MATERIALIZATION)
|
|
|
|
revert the subquery plan to the original one before reoptimizing
|
|
|
|
else
|
|
|
|
inject the IN=>EXISTS predicates into the new EXISTS subquery plan
|
|
|
|
|
|
|
|
The implementation itself is a bit more complicated because it takes into
|
|
|
|
account two more factors:
|
|
|
|
- whether the user allowed both strategies through an optimizer_switch, and
|
|
|
|
- if materialization was the cheaper strategy, whether it can be executed
|
|
|
|
or not.
|
|
|
|
|
|
|
|
@retval FALSE success.
|
|
|
|
@retval TRUE error occurred.
|
|
|
|
*/
|
|
|
|
|
2010-09-30 18:32:44 +03:00
|
|
|
bool JOIN::choose_subquery_plan(table_map join_tables)
|
2010-12-23 17:33:00 +02:00
|
|
|
{
|
2010-11-02 15:27:01 +02:00
|
|
|
enum_reopt_result reopt_result= REOPT_NONE;
|
2010-09-30 18:32:44 +03:00
|
|
|
Item_in_subselect *in_subs;
|
|
|
|
|
2011-11-24 15:12:10 +02:00
|
|
|
/*
|
|
|
|
IN/ALL/ANY optimizations are not applicable for so called fake select
|
|
|
|
(this select exists only to filter results of union if it is needed).
|
|
|
|
*/
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
if (select_lex == select_lex->master_unit()->fake_select_lex ||
|
|
|
|
likely(!is_in_subquery()))
|
2010-09-30 18:32:44 +03:00
|
|
|
return false;
|
2012-03-21 09:55:48 +01:00
|
|
|
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
in_subs= unit->item->get_IN_subquery();
|
|
|
|
if (in_subs->create_in_to_exists_cond(this))
|
|
|
|
return true;
|
|
|
|
|
Fix MySQL BUG#12329653
In MariaDB, when running in ONLY_FULL_GROUP_BY mode,
the server produced in incorrect error message that there
is an aggregate function without GROUP BY, for artificially
created MIN/MAX functions during subquery MIN/MAX optimization.
The fix introduces a way to distinguish between artifially
created MIN/MAX functions as a result of a rewrite, and normal
ones present in the query. The test for ONLY_FULL_GROUP_BY violation
now tests in addition if a MIN/MAX function was part of a MIN/MAX
subquery rewrite.
In order to be able to distinguish these MIN/MAX functions, the
patch introduces an additional flag in Item_in_subselect::in_strategy -
SUBS_STRATEGY_CHOSEN. This flag is set when the optimizer makes its
final choice of a subuqery strategy. In order to make the choice
consistent, access to Item_in_subselect::in_strategy is provided
via new class methods.
******
Fix MySQL BUG#12329653
In MariaDB, when running in ONLY_FULL_GROUP_BY mode,
the server produced in incorrect error message that there
is an aggregate function without GROUP BY, for artificially
created MIN/MAX functions during subquery MIN/MAX optimization.
The fix introduces a way to distinguish between artifially
created MIN/MAX functions as a result of a rewrite, and normal
ones present in the query. The test for ONLY_FULL_GROUP_BY violation
now tests in addition if a MIN/MAX function was part of a MIN/MAX
subquery rewrite.
In order to be able to distinguish these MIN/MAX functions, the
patch introduces an additional flag in Item_in_subselect::in_strategy -
SUBS_STRATEGY_CHOSEN. This flag is set when the optimizer makes its
final choice of a subuqery strategy. In order to make the choice
consistent, access to Item_in_subselect::in_strategy is provided
via new class methods.
2011-11-12 11:29:12 +02:00
|
|
|
/* A strategy must be chosen earlier. */
|
|
|
|
DBUG_ASSERT(in_subs->has_strategy());
|
2010-09-30 18:32:44 +03:00
|
|
|
DBUG_ASSERT(in_to_exists_where || in_to_exists_having);
|
2020-08-14 19:51:10 +03:00
|
|
|
DBUG_ASSERT(!in_to_exists_where || in_to_exists_where->fixed());
|
|
|
|
DBUG_ASSERT(!in_to_exists_having || in_to_exists_having->fixed());
|
2010-09-05 18:43:47 +03:00
|
|
|
|
2012-03-23 18:18:16 +02:00
|
|
|
/* The original QEP of the subquery. */
|
|
|
|
Join_plan_state save_qep(table_count);
|
2012-03-21 09:55:48 +01:00
|
|
|
|
2010-09-16 16:49:20 +03:00
|
|
|
/*
|
2010-09-30 18:32:44 +03:00
|
|
|
Compute and compare the costs of materialization and in-exists if both
|
|
|
|
strategies are possible and allowed by the user (checked during the prepare
|
|
|
|
phase.
|
2010-09-16 16:49:20 +03:00
|
|
|
*/
|
Fix MySQL BUG#12329653
In MariaDB, when running in ONLY_FULL_GROUP_BY mode,
the server produced in incorrect error message that there
is an aggregate function without GROUP BY, for artificially
created MIN/MAX functions during subquery MIN/MAX optimization.
The fix introduces a way to distinguish between artifially
created MIN/MAX functions as a result of a rewrite, and normal
ones present in the query. The test for ONLY_FULL_GROUP_BY violation
now tests in addition if a MIN/MAX function was part of a MIN/MAX
subquery rewrite.
In order to be able to distinguish these MIN/MAX functions, the
patch introduces an additional flag in Item_in_subselect::in_strategy -
SUBS_STRATEGY_CHOSEN. This flag is set when the optimizer makes its
final choice of a subuqery strategy. In order to make the choice
consistent, access to Item_in_subselect::in_strategy is provided
via new class methods.
******
Fix MySQL BUG#12329653
In MariaDB, when running in ONLY_FULL_GROUP_BY mode,
the server produced in incorrect error message that there
is an aggregate function without GROUP BY, for artificially
created MIN/MAX functions during subquery MIN/MAX optimization.
The fix introduces a way to distinguish between artifially
created MIN/MAX functions as a result of a rewrite, and normal
ones present in the query. The test for ONLY_FULL_GROUP_BY violation
now tests in addition if a MIN/MAX function was part of a MIN/MAX
subquery rewrite.
In order to be able to distinguish these MIN/MAX functions, the
patch introduces an additional flag in Item_in_subselect::in_strategy -
SUBS_STRATEGY_CHOSEN. This flag is set when the optimizer makes its
final choice of a subuqery strategy. In order to make the choice
consistent, access to Item_in_subselect::in_strategy is provided
via new class methods.
2011-11-12 11:29:12 +02:00
|
|
|
if (in_subs->test_strategy(SUBS_MATERIALIZATION) &&
|
|
|
|
in_subs->test_strategy(SUBS_IN_TO_EXISTS))
|
2010-09-30 18:32:44 +03:00
|
|
|
{
|
2010-12-23 17:33:00 +02:00
|
|
|
JOIN *outer_join;
|
2010-09-30 18:32:44 +03:00
|
|
|
JOIN *inner_join= this;
|
2010-12-23 17:33:00 +02:00
|
|
|
/* Number of unique value combinations filtered by the IN predicate. */
|
|
|
|
double outer_lookup_keys;
|
|
|
|
/* Cost and row count of the unmodified subquery. */
|
2010-10-09 17:48:05 +03:00
|
|
|
double inner_read_time_1, inner_record_count_1;
|
2011-02-03 17:00:28 +02:00
|
|
|
/* Cost of the subquery with injected IN-EXISTS predicates. */
|
2010-12-23 17:33:00 +02:00
|
|
|
double inner_read_time_2;
|
2010-09-30 18:32:44 +03:00
|
|
|
/* The cost to compute IN via materialization. */
|
2010-10-09 17:48:05 +03:00
|
|
|
double materialize_strategy_cost;
|
2010-09-30 18:32:44 +03:00
|
|
|
/* The cost of the IN->EXISTS strategy. */
|
2010-10-09 17:48:05 +03:00
|
|
|
double in_exists_strategy_cost;
|
2010-12-23 17:33:00 +02:00
|
|
|
double dummy;
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
const char *strategy;
|
2010-09-30 18:32:44 +03:00
|
|
|
|
2010-12-23 17:33:00 +02:00
|
|
|
/*
|
|
|
|
A. Estimate the number of rows of the outer table that will be filtered
|
|
|
|
by the IN predicate.
|
|
|
|
*/
|
|
|
|
outer_join= unit->outer_select() ? unit->outer_select()->join : NULL;
|
2012-05-17 13:46:05 +03:00
|
|
|
/*
|
|
|
|
Get the cost of the outer join if:
|
|
|
|
(1) It has at least one table, and
|
|
|
|
(2) It has been already optimized (if there is no join_tab, then the
|
|
|
|
outer join has not been optimized yet).
|
|
|
|
*/
|
|
|
|
if (outer_join && outer_join->table_count > 0 && // (1)
|
2016-02-09 01:46:53 +03:00
|
|
|
outer_join->join_tab && // (2)
|
|
|
|
!in_subs->const_item())
|
2010-12-15 12:54:25 +02:00
|
|
|
{
|
2011-06-21 15:50:07 +03:00
|
|
|
/*
|
|
|
|
TODO:
|
|
|
|
Currently outer_lookup_keys is computed as the number of rows in
|
|
|
|
the partial join including the JOIN_TAB where the IN predicate is
|
|
|
|
pushed to. In the general case this is a gross overestimate because
|
|
|
|
due to caching we are interested only in the number of unique keys.
|
|
|
|
The search key may be formed by columns from much fewer than all
|
|
|
|
tables in the partial join. Example:
|
|
|
|
select * from t1, t2 where t1.c1 = t2.key AND t2.c2 IN (select ...);
|
|
|
|
If the join order: t1, t2, the number of unique lookup keys is ~ to
|
|
|
|
the number of unique values t2.c2 in the partial join t1 join t2.
|
|
|
|
*/
|
2011-09-02 22:43:35 +04:00
|
|
|
outer_join->get_partial_cost_and_fanout(in_subs->get_join_tab_idx(),
|
2011-05-25 19:31:13 +04:00
|
|
|
table_map(-1),
|
|
|
|
&dummy,
|
2011-06-21 15:50:07 +03:00
|
|
|
&outer_lookup_keys);
|
2010-12-15 12:54:25 +02:00
|
|
|
}
|
2010-09-30 18:32:44 +03:00
|
|
|
else
|
|
|
|
{
|
2010-10-05 16:00:31 +03:00
|
|
|
/*
|
|
|
|
TODO: outer_join can be NULL for DELETE statements.
|
|
|
|
How to compute its cost?
|
|
|
|
*/
|
2011-06-21 15:50:07 +03:00
|
|
|
outer_lookup_keys= 1;
|
2010-09-30 18:32:44 +03:00
|
|
|
}
|
|
|
|
|
2010-12-23 17:33:00 +02:00
|
|
|
/*
|
|
|
|
B. Estimate the cost and number of records of the subquery both
|
|
|
|
unmodified, and with injected IN->EXISTS predicates.
|
|
|
|
*/
|
2010-12-15 12:54:25 +02:00
|
|
|
inner_read_time_1= inner_join->best_read;
|
2015-07-06 20:24:14 +03:00
|
|
|
inner_record_count_1= inner_join->join_record_count;
|
2010-09-30 18:32:44 +03:00
|
|
|
|
2011-05-25 19:31:13 +04:00
|
|
|
if (in_to_exists_where && const_tables != table_count)
|
2010-09-05 18:43:47 +03:00
|
|
|
{
|
|
|
|
/*
|
2010-09-30 18:32:44 +03:00
|
|
|
Re-optimize and cost the subquery taking into account the IN-EXISTS
|
|
|
|
conditions.
|
2010-09-05 18:43:47 +03:00
|
|
|
*/
|
2010-12-15 12:54:25 +02:00
|
|
|
reopt_result= reoptimize(in_to_exists_where, join_tables, &save_qep);
|
|
|
|
if (reopt_result == REOPT_ERROR)
|
2010-09-30 18:32:44 +03:00
|
|
|
return TRUE;
|
|
|
|
|
2011-02-03 17:00:28 +02:00
|
|
|
/* Get the cost of the modified IN-EXISTS plan. */
|
2010-12-15 12:54:25 +02:00
|
|
|
inner_read_time_2= inner_join->best_read;
|
2010-09-30 18:32:44 +03:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Reoptimization would not produce any better plan. */
|
|
|
|
inner_read_time_2= inner_read_time_1;
|
2010-09-05 18:43:47 +03:00
|
|
|
}
|
2010-09-30 18:32:44 +03:00
|
|
|
|
|
|
|
/*
|
2010-12-23 17:33:00 +02:00
|
|
|
C. Compute execution costs.
|
2010-09-30 18:32:44 +03:00
|
|
|
*/
|
2010-12-23 17:33:00 +02:00
|
|
|
/* C.1 Compute the cost of the materialization strategy. */
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
bool blobs_used;
|
|
|
|
uint rowlen= get_tmp_table_rec_length(ref_ptrs,
|
|
|
|
select_lex->item_list.elements,
|
|
|
|
&blobs_used);
|
|
|
|
/* The cost of using the temp table */
|
|
|
|
TMPTABLE_COSTS cost= get_tmp_table_costs(thd, inner_record_count_1,
|
2022-06-16 13:12:01 +03:00
|
|
|
rowlen, blobs_used, 1);
|
2010-09-30 18:32:44 +03:00
|
|
|
/*
|
2010-12-15 12:54:25 +02:00
|
|
|
The cost of executing the subquery and storing its result in an indexed
|
|
|
|
temporary table.
|
2010-09-30 18:32:44 +03:00
|
|
|
*/
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
double materialization_cost=
|
|
|
|
COST_ADD(cost.create,
|
|
|
|
COST_ADD(inner_read_time_1,
|
2022-06-16 13:12:01 +03:00
|
|
|
COST_MULT((cost.write + WHERE_COST_THD(thd)),
|
|
|
|
inner_record_count_1)));
|
2010-12-15 12:54:25 +02:00
|
|
|
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
materialize_strategy_cost=
|
|
|
|
COST_ADD(materialization_cost,
|
|
|
|
COST_MULT(outer_lookup_keys, cost.lookup));
|
2010-09-30 18:32:44 +03:00
|
|
|
|
2010-12-23 17:33:00 +02:00
|
|
|
/* C.2 Compute the cost of the IN=>EXISTS strategy. */
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
in_exists_strategy_cost=
|
|
|
|
COST_MULT(outer_lookup_keys, inner_read_time_2);
|
2010-09-30 18:32:44 +03:00
|
|
|
|
2010-12-23 17:33:00 +02:00
|
|
|
/* C.3 Compare the costs and choose the cheaper strategy. */
|
2010-09-30 18:32:44 +03:00
|
|
|
if (materialize_strategy_cost >= in_exists_strategy_cost)
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
{
|
Fix MySQL BUG#12329653
In MariaDB, when running in ONLY_FULL_GROUP_BY mode,
the server produced in incorrect error message that there
is an aggregate function without GROUP BY, for artificially
created MIN/MAX functions during subquery MIN/MAX optimization.
The fix introduces a way to distinguish between artifially
created MIN/MAX functions as a result of a rewrite, and normal
ones present in the query. The test for ONLY_FULL_GROUP_BY violation
now tests in addition if a MIN/MAX function was part of a MIN/MAX
subquery rewrite.
In order to be able to distinguish these MIN/MAX functions, the
patch introduces an additional flag in Item_in_subselect::in_strategy -
SUBS_STRATEGY_CHOSEN. This flag is set when the optimizer makes its
final choice of a subuqery strategy. In order to make the choice
consistent, access to Item_in_subselect::in_strategy is provided
via new class methods.
******
Fix MySQL BUG#12329653
In MariaDB, when running in ONLY_FULL_GROUP_BY mode,
the server produced in incorrect error message that there
is an aggregate function without GROUP BY, for artificially
created MIN/MAX functions during subquery MIN/MAX optimization.
The fix introduces a way to distinguish between artifially
created MIN/MAX functions as a result of a rewrite, and normal
ones present in the query. The test for ONLY_FULL_GROUP_BY violation
now tests in addition if a MIN/MAX function was part of a MIN/MAX
subquery rewrite.
In order to be able to distinguish these MIN/MAX functions, the
patch introduces an additional flag in Item_in_subselect::in_strategy -
SUBS_STRATEGY_CHOSEN. This flag is set when the optimizer makes its
final choice of a subuqery strategy. In order to make the choice
consistent, access to Item_in_subselect::in_strategy is provided
via new class methods.
2011-11-12 11:29:12 +02:00
|
|
|
in_subs->set_strategy(SUBS_IN_TO_EXISTS);
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
strategy= "in_to_exists";
|
|
|
|
}
|
2010-09-30 18:32:44 +03:00
|
|
|
else
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
{
|
Fix MySQL BUG#12329653
In MariaDB, when running in ONLY_FULL_GROUP_BY mode,
the server produced in incorrect error message that there
is an aggregate function without GROUP BY, for artificially
created MIN/MAX functions during subquery MIN/MAX optimization.
The fix introduces a way to distinguish between artifially
created MIN/MAX functions as a result of a rewrite, and normal
ones present in the query. The test for ONLY_FULL_GROUP_BY violation
now tests in addition if a MIN/MAX function was part of a MIN/MAX
subquery rewrite.
In order to be able to distinguish these MIN/MAX functions, the
patch introduces an additional flag in Item_in_subselect::in_strategy -
SUBS_STRATEGY_CHOSEN. This flag is set when the optimizer makes its
final choice of a subuqery strategy. In order to make the choice
consistent, access to Item_in_subselect::in_strategy is provided
via new class methods.
******
Fix MySQL BUG#12329653
In MariaDB, when running in ONLY_FULL_GROUP_BY mode,
the server produced in incorrect error message that there
is an aggregate function without GROUP BY, for artificially
created MIN/MAX functions during subquery MIN/MAX optimization.
The fix introduces a way to distinguish between artifially
created MIN/MAX functions as a result of a rewrite, and normal
ones present in the query. The test for ONLY_FULL_GROUP_BY violation
now tests in addition if a MIN/MAX function was part of a MIN/MAX
subquery rewrite.
In order to be able to distinguish these MIN/MAX functions, the
patch introduces an additional flag in Item_in_subselect::in_strategy -
SUBS_STRATEGY_CHOSEN. This flag is set when the optimizer makes its
final choice of a subuqery strategy. In order to make the choice
consistent, access to Item_in_subselect::in_strategy is provided
via new class methods.
2011-11-12 11:29:12 +02:00
|
|
|
in_subs->set_strategy(SUBS_MATERIALIZATION);
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
strategy= "materialization";
|
|
|
|
}
|
|
|
|
if (unlikely(thd->trace_started()))
|
|
|
|
{
|
2022-06-16 13:12:01 +03:00
|
|
|
Json_writer_object trace_wrapper(thd);
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
Json_writer_object trace_subquery(thd, "subquery_plan");
|
|
|
|
trace_subquery.
|
Changing all cost calculation to be given in milliseconds
This makes it easier to compare different costs and also allows
the optimizer to optimizer different storage engines more reliably.
- Added tests/check_costs.pl, a tool to verify optimizer cost calculations.
- Most engine costs has been found with this program. All steps to
calculate the new costs are documented in Docs/optimizer_costs.txt
- User optimizer_cost variables are given in microseconds (as individual
costs can be very small). Internally they are stored in ms.
- Changed DISK_READ_COST (was DISK_SEEK_BASE_COST) from a hard disk cost
(9 ms) to common SSD cost (400MB/sec).
- Removed cost calculations for hard disks (rotation etc).
- Changed the following handler functions to return IO_AND_CPU_COST.
This makes it easy to apply different cost modifiers in ha_..time()
functions for io and cpu costs.
- scan_time()
- rnd_pos_time() & rnd_pos_call_time()
- keyread_time()
- Enhanched keyread_time() to calculate the full cost of reading of a set
of keys with a given number of ranges and optional number of blocks that
need to be accessed.
- Removed read_time() as keyread_time() + rnd_pos_time() can do the same
thing and more.
- Tuned cost for: heap, myisam, Aria, InnoDB, archive and MyRocks.
Used heap table costs for json_table. The rest are using default engine
costs.
- Added the following new optimizer variables:
- optimizer_disk_read_ratio
- optimizer_disk_read_cost
- optimizer_key_lookup_cost
- optimizer_row_lookup_cost
- optimizer_row_next_find_cost
- optimizer_scan_cost
- Moved all engine specific cost to OPTIMIZER_COSTS structure.
- Changed costs to use 'records_out' instead of 'records_read' when
recalculating costs.
- Split optimizer_costs.h to optimizer_costs.h and optimizer_defaults.h.
This allows one to change costs without having to compile a lot of
files.
- Updated costs for filter lookup.
- Use a better cost estimate in best_extension_by_limited_search()
for the sorting cost.
- Fixed previous issues with 'filtered' explain column as we are now
using 'records_out' (min rows seen for table) to calculate filtering.
This greatly simplifies the filtering code in
JOIN_TAB::save_explain_data().
This change caused a lot of queries to be optimized differently than
before, which exposed different issues in the optimizer that needs to
be fixed. These fixes are in the following commits. To not have to
change the same test case over and over again, the changes in the test
cases are done in a single commit after all the critical change sets
are done.
InnoDB changes:
- Updated InnoDB to not divide big range cost with 2.
- Added cost for InnoDB (innobase_update_optimizer_costs()).
- Don't mark clustered primary key with HA_KEYREAD_ONLY. This will
prevent that the optimizer is trying to use index-only scans on
the clustered key.
- Disabled ha_innobase::scan_time() and ha_innobase::read_time() and
ha_innobase::rnd_pos_time() as the default engine cost functions now
works good for InnoDB.
Other things:
- Added --show-query-costs (\Q) option to mysql.cc to show the query
cost after each query (good when working with query costs).
- Extended my_getopt with GET_ADJUSTED_VALUE which allows one to adjust
the value that user is given. This is used to change cost from
microseconds (user input) to milliseconds (what the server is
internally using).
- Added include/my_tracker.h ; Useful include file to quickly test
costs of a function.
- Use handler::set_table() in all places instead of 'table= arg'.
- Added SHOW_OPTIMIZER_COSTS to sys variables. These are input and
shown in microseconds for the user but stored as milliseconds.
This is to make the numbers easier to read for the user (less
pre-zeros). Implemented in 'Sys_var_optimizer_cost' class.
- In test_quick_select() do not use index scans if 'no_keyread' is set
for the table. This is what we do in other places of the server.
- Added THD parameter to Unique::get_use_cost() and
check_index_intersect_extension() and similar functions to be able
to provide costs to called functions.
- Changed 'records' to 'rows' in optimizer_trace.
- Write more information to optimizer_trace.
- Added INDEX_BLOCK_FILL_FACTOR_MUL (4) and INDEX_BLOCK_FILL_FACTOR_DIV (3)
to calculate usage space of keys in b-trees. (Before we used numeric
constants).
- Removed code that assumed that b-trees has similar costs as binary
trees. Replaced with engine calls that returns the cost.
- Added Bitmap::find_first_bit()
- Added timings to join_cache for ANALYZE table (patch by Sergei Petrunia).
- Added records_init and records_after_filter to POSITION to remember
more of what best_access_patch() calculates.
- table_after_join_selectivity() changed to recalculate 'records_out'
based on the new fields from best_access_patch()
Bug fixes:
- Some queries did not update last_query_cost (was 0). Fixed by moving
setting thd->...last_query_cost in JOIN::optimize().
- Write '0' as number of rows for const tables with a matching row.
Some internals:
- Engine cost are stored in OPTIMIZER_COSTS structure. When a
handlerton is created, we also created a new cost variable for the
handlerton. We also create a new variable if the user changes a
optimizer cost for a not yet loaded handlerton either with command
line arguments or with SET
@@global.engine.optimizer_cost_variable=xx.
- There are 3 global OPTIMIZER_COSTS variables:
default_optimizer_costs The default costs + changes from the
command line without an engine specifier.
heap_optimizer_costs Heap table costs, used for temporary tables
tmp_table_optimizer_costs The cost for the default on disk internal
temporary table (MyISAM or Aria)
- The engine cost for a table is stored in table_share. To speed up
accesses the handler has a pointer to this. The cost is copied
to the table on first access. If one wants to change the cost one
must first update the global engine cost and then do a FLUSH TABLES.
This was done to be able to access the costs for an open table
without any locks.
- When a handlerton is created, the cost are updated the following way:
See sql/keycaches.cc for details:
- Use 'default_optimizer_costs' as a base
- Call hton->update_optimizer_costs() to override with the engines
default costs.
- Override the costs that the user has specified for the engine.
- One handler open, copy the engine cost from handlerton to TABLE_SHARE.
- Call handler::update_optimizer_costs() to allow the engine to update
cost for this particular table.
- There are two costs stored in THD. These are copied to the handler
when the table is used in a query:
- optimizer_where_cost
- optimizer_scan_setup_cost
- Simply code in best_access_path() by storing all cost result in a
structure. (Idea/Suggestion by Igor)
2022-08-11 13:05:23 +03:00
|
|
|
add("rows", inner_record_count_1).
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
add("materialization_cost", materialize_strategy_cost).
|
|
|
|
add("in_exist_cost", in_exists_strategy_cost).
|
|
|
|
add("choosen", strategy);
|
|
|
|
}
|
2011-06-21 15:50:07 +03:00
|
|
|
|
|
|
|
DBUG_PRINT("info",
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
("mat_strategy_cost: %.2f mat_cost: %.2f write_cost: %.2f "
|
|
|
|
"lookup_cost: %.2f",
|
|
|
|
materialize_strategy_cost, materialization_cost, cost.write,
|
|
|
|
cost.lookup));
|
2011-06-21 15:50:07 +03:00
|
|
|
DBUG_PRINT("info",
|
|
|
|
("inx_strategy_cost: %.2f, inner_read_time_2: %.2f",
|
|
|
|
in_exists_strategy_cost, inner_read_time_2));
|
|
|
|
DBUG_PRINT("info",("outer_lookup_keys: %.2f", outer_lookup_keys));
|
2010-09-05 18:43:47 +03:00
|
|
|
}
|
2010-09-30 18:32:44 +03:00
|
|
|
|
|
|
|
/*
|
2010-10-05 16:00:31 +03:00
|
|
|
If (1) materialization is a possible strategy based on semantic analysis
|
2010-09-30 18:32:44 +03:00
|
|
|
during the prepare phase, then if
|
|
|
|
(2) it is more expensive than the IN->EXISTS transformation, and
|
|
|
|
(3) it is not possible to create usable indexes for the materialization
|
|
|
|
strategy,
|
|
|
|
fall back to IN->EXISTS.
|
2010-10-05 16:00:31 +03:00
|
|
|
otherwise
|
|
|
|
use materialization.
|
2010-09-30 18:32:44 +03:00
|
|
|
*/
|
Fix MySQL BUG#12329653
In MariaDB, when running in ONLY_FULL_GROUP_BY mode,
the server produced in incorrect error message that there
is an aggregate function without GROUP BY, for artificially
created MIN/MAX functions during subquery MIN/MAX optimization.
The fix introduces a way to distinguish between artifially
created MIN/MAX functions as a result of a rewrite, and normal
ones present in the query. The test for ONLY_FULL_GROUP_BY violation
now tests in addition if a MIN/MAX function was part of a MIN/MAX
subquery rewrite.
In order to be able to distinguish these MIN/MAX functions, the
patch introduces an additional flag in Item_in_subselect::in_strategy -
SUBS_STRATEGY_CHOSEN. This flag is set when the optimizer makes its
final choice of a subuqery strategy. In order to make the choice
consistent, access to Item_in_subselect::in_strategy is provided
via new class methods.
******
Fix MySQL BUG#12329653
In MariaDB, when running in ONLY_FULL_GROUP_BY mode,
the server produced in incorrect error message that there
is an aggregate function without GROUP BY, for artificially
created MIN/MAX functions during subquery MIN/MAX optimization.
The fix introduces a way to distinguish between artifially
created MIN/MAX functions as a result of a rewrite, and normal
ones present in the query. The test for ONLY_FULL_GROUP_BY violation
now tests in addition if a MIN/MAX function was part of a MIN/MAX
subquery rewrite.
In order to be able to distinguish these MIN/MAX functions, the
patch introduces an additional flag in Item_in_subselect::in_strategy -
SUBS_STRATEGY_CHOSEN. This flag is set when the optimizer makes its
final choice of a subuqery strategy. In order to make the choice
consistent, access to Item_in_subselect::in_strategy is provided
via new class methods.
2011-11-12 11:29:12 +02:00
|
|
|
if (in_subs->test_strategy(SUBS_MATERIALIZATION) &&
|
2010-09-30 18:32:44 +03:00
|
|
|
in_subs->setup_mat_engine())
|
2010-09-17 13:17:27 +03:00
|
|
|
{
|
|
|
|
/*
|
2010-09-30 18:32:44 +03:00
|
|
|
If materialization was the cheaper or the only user-selected strategy,
|
|
|
|
but it is not possible to execute it due to limitations in the
|
|
|
|
implementation, fall back to IN-TO-EXISTS.
|
2010-09-17 13:17:27 +03:00
|
|
|
*/
|
Fix MySQL BUG#12329653
In MariaDB, when running in ONLY_FULL_GROUP_BY mode,
the server produced in incorrect error message that there
is an aggregate function without GROUP BY, for artificially
created MIN/MAX functions during subquery MIN/MAX optimization.
The fix introduces a way to distinguish between artifially
created MIN/MAX functions as a result of a rewrite, and normal
ones present in the query. The test for ONLY_FULL_GROUP_BY violation
now tests in addition if a MIN/MAX function was part of a MIN/MAX
subquery rewrite.
In order to be able to distinguish these MIN/MAX functions, the
patch introduces an additional flag in Item_in_subselect::in_strategy -
SUBS_STRATEGY_CHOSEN. This flag is set when the optimizer makes its
final choice of a subuqery strategy. In order to make the choice
consistent, access to Item_in_subselect::in_strategy is provided
via new class methods.
******
Fix MySQL BUG#12329653
In MariaDB, when running in ONLY_FULL_GROUP_BY mode,
the server produced in incorrect error message that there
is an aggregate function without GROUP BY, for artificially
created MIN/MAX functions during subquery MIN/MAX optimization.
The fix introduces a way to distinguish between artifially
created MIN/MAX functions as a result of a rewrite, and normal
ones present in the query. The test for ONLY_FULL_GROUP_BY violation
now tests in addition if a MIN/MAX function was part of a MIN/MAX
subquery rewrite.
In order to be able to distinguish these MIN/MAX functions, the
patch introduces an additional flag in Item_in_subselect::in_strategy -
SUBS_STRATEGY_CHOSEN. This flag is set when the optimizer makes its
final choice of a subuqery strategy. In order to make the choice
consistent, access to Item_in_subselect::in_strategy is provided
via new class methods.
2011-11-12 11:29:12 +02:00
|
|
|
in_subs->set_strategy(SUBS_IN_TO_EXISTS);
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
|
|
|
|
if (unlikely(thd->trace_started()))
|
|
|
|
{
|
2022-06-16 13:12:01 +03:00
|
|
|
Json_writer_object trace_wrapper(thd);
|
Update row and key fetch cost models to take into account data copy costs
Before this patch, when calculating the cost of fetching and using a
row/key from the engine, we took into account the cost of finding a
row or key from the engine, but did not consistently take into account
index only accessed, clustered key or covered keys for all access
paths.
The cost of the WHERE clause (TIME_FOR_COMPARE) was not consistently
considered in best_access_path(). TIME_FOR_COMPARE was used in
calculation in other places, like greedy_search(), but was in some
cases (like scans) done an a different number of rows than was
accessed.
The cost calculation of row and index scans didn't take into account
the number of rows that where accessed, only the number of accepted
rows.
When using a filter, the cost of index_only_reads and cost of
accessing and disregarding 'filtered rows' where not taken into
account, which made filters cost less than there actually where.
To remedy the above, the following key & row fetch related costs
has been added:
- The cost of fetching and using a row is now split into different costs:
- key + Row fetch cost (as before) but multiplied with the variable
'optimizer_cache_cost' (default to 0.5). This allows the user to
tell the optimizer the likehood of finding the key and row in the
engine cache.
- ROW_COPY_COST, The cost copying a row from the engine to the
sql layer or creating a row from the join_cache to the record
buffer. Mostly affects table scan costs.
- ROW_LOOKUP_COST, the cost of fetching a row by rowid.
- KEY_COPY_COST the cost of finding the next key and copying it from
the engine to the SQL layer. This is used when we calculate the cost
index only reads. It makes index scans more expensive than before if
they cover a lot of rows. (main.index_merge_myisam)
- KEY_LOOKUP_COST, the cost of finding the first key in a range.
This replaces the old define IDX_LOOKUP_COST, but with a higher cost.
- KEY_NEXT_FIND_COST, the cost of finding the next key (and rowid).
when doing a index scan and comparing the rowid to the filter.
Before this cost was assumed to be 0.
All of the above constants/variables are now tuned to be somewhat in
proportion of executing complexity to each other. There is tuning
need for these in the future, but that can wait until the above are
made user variables as that will make tuning much easier.
To make the usage of the above easy, there are new (not virtual)
cost calclation functions in handler:
- ha_read_time(), like read_time(), but take optimizer_cache_cost into
account.
- ha_read_and_copy_time(), like ha_read_time() but take into account
ROW_COPY_TIME
- ha_read_and_compare_time(), like ha_read_and_copy_time() but take
TIME_FOR_COMPARE into account.
- ha_rnd_pos_time(). Read row with row id, taking ROW_COPY_COST
into account. This is used with filesort where we don't need
to execute the WHERE clause again.
- ha_keyread_time(), like keyread_time() but take
optimizer_cache_cost into account.
- ha_keyread_and_copy_time(), like ha_keyread_time(), but add
KEY_COPY_COST.
- ha_key_scan_time(), like key_scan_time() but take
optimizer_cache_cost nto account.
- ha_key_scan_and_compare_time(), like ha_key_scan_time(), but add
KEY_COPY_COST & TIME_FOR_COMPARE.
I also added some setup costs for doing different types of scans and
creating temporary tables (on disk and in memory). This encourages
the optimizer to not use these for simple 'a few row' lookups if
there are adequate key lookup strategies.
- TABLE_SCAN_SETUP_COST, cost of starting a table scan.
- INDEX_SCAN_SETUP_COST, cost of starting an index scan.
- HEAP_TEMPTABLE_CREATE_COST, cost of creating in memory
temporary table.
- DISK_TEMPTABLE_CREATE_COST, cost of creating an on disk temporary
table.
When calculating cost of fetching ranges, we had a cost of
IDX_LOOKUP_COST (0.125) for doing a key div for a new range. This is
now replaced with 'io_cost * KEY_LOOKUP_COST (1.0) *
optimizer_cache_cost', which matches the cost we use for 'ref' and
other key lookups. The effect is that the cost is now a bit higher
when we have many ranges for a key.
Allmost all calculation with TIME_FOR_COMPARE is now done in
best_access_path(). 'JOIN::read_time' now includes the full
cost for finding the rows in the table.
In the result files, many of the changes are now again close to what
they where before the "Update cost for hash and cached joins" commit,
as that commit didn't fix the filter cost (too complex to do
everything in one commit).
The above changes showed a lot of a lot of inconsistencies in
optimizer cost calculation. The main objective with the other changes
was to do calculation as similar (and accurate) as possible and to make
different plans more comparable.
Detailed list of changes:
- Calculate index_only_cost consistently and correctly for all scan
and ref accesses. The row fetch_cost and index_only_cost now
takes into account clustered keys, covered keys and index
only accesses.
- cost_for_index_read now returns both full cost and index_only_cost
- Fixed cost calculation of get_sweep_read_cost() to match other
similar costs. This is bases on the assumption that data is more
often stored on SSD than a hard disk.
- Replaced constant 2.0 with new define TABLE_SCAN_SETUP_COST.
- Some scan cost estimates did not take into account
TIME_FOR_COMPARE. Now all scan costs takes this into
account. (main.show_explain)
- Added session variable optimizer_cache_hit_ratio (default 50%). By
adjusting this on can reduce or increase the cost of index or direct
record lookups. The effect of the default is that key lookups is now
a bit cheaper than before. See usage of 'optimizer_cache_cost' in
handler.h.
- JOIN_TAB::scan_time() did not take into account index only scans,
which produced a wrong cost when index scan was used. Changed
JOIN_TAB:::scan_time() to take into consideration clustered and
covered keys. The values are now cached and we only have to call
this function once. Other calls are changed to use the cached
values. Function renamed to JOIN_TAB::estimate_scan_time().
- Fixed that most index cost calculations are done the same way and
more close to 'range' calculations. The cost is now lower than
before for small data sets and higher for large data sets as we take
into account how many keys are read (main.opt_trace_selectivity,
main.limit_rows_examined).
- Ensured that index_scan_cost() ==
range(scan_of_all_rows_in_table_using_one_range) +
MULTI_RANGE_READ_INFO_CONST. One effect of this is that if there
is choice of doing a full index scan and a range-index scan over
almost the whole table then index scan will be preferred (no
range-read setup cost). (innodb.innodb, main.show_explain,
main.range)
- Fixed the EQ_REF and REF takes into account clustered and covered
keys. This changes some plans to use covered or clustered indexes
as these are much cheaper. (main.subselect_mat_cost,
main.state_tables_innodb, main.limit_rows_examined)
- Rowid filter setup cost and filter compare cost now takes into
account fetching and checking the rowid (KEY_NEXT_FIND_COST).
(main.partition_pruning heap.heap_btree main.log_state)
- Added KEY_NEXT_FIND_COST to
Range_rowid_filter_cost_info::lookup_cost to account of the time
to find and check the next key value against the container
- Introduced ha_keyread_time(rows) that takes into account finding
the next row and copying the key value to 'record'
(KEY_COPY_COST).
- Introduced ha_key_scan_time() for calculating an index scan over
all rows.
- Added IDX_LOOKUP_COST to keyread_time() as a startup cost.
- Added index_only_fetch_cost() as a convenience function to
OPT_RANGE.
- keyread_time() cost is slightly reduced to prefer shorter keys.
(main.index_merge_myisam)
- All of the above caused some index_merge combinations to be
rejected because of cost (main.index_intersect). In some cases
'ref' where replaced with index_merge because of the low
cost calculation of get_sweep_read_cost().
- Some index usage moved from PRIMARY to a covering index.
(main.subselect_innodb)
- Changed cost calculation of filter to take KEY_LOOKUP_COST and
TIME_FOR_COMPARE into account. See sql_select.cc::apply_filter().
filter parameters and costs are now written to optimizer_trace.
- Don't use matchings_records_in_range() to try to estimate the number
of filtered rows for ranges. The reason is that we want to ensure
that 'range' is calculated similar to 'ref'. There is also more work
needed to calculate the selectivity when using ranges and ranges and
filtering. This causes filtering column in EXPLAIN EXTENDED to be
100.00 for some cases where range cannot use filtering.
(main.rowid_filter)
- Introduced ha_scan_time() that takes into account the CPU cost of
finding the next row and copying the row from the engine to
'record'. This causes costs of table scan to slightly increase and
some test to changed their plan from ALL to RANGE or ALL to ref.
(innodb.innodb_mysql, main.select_pkeycache)
In a few cases where scan time of very small tables have lower cost
than a ref or range, things changed from ref/range to ALL.
(main.myisam, main.func_group, main.limit_rows_examined,
main.subselect2)
- Introduced ha_scan_and_compare_time() which is like ha_scan_time()
but also adds the cost of the where clause (TIME_FOR_COMPARE).
- Added small cost for creating temporary table for
materialization. This causes some very small tables to use scan
instead of materialization.
- Added checking of the WHERE clause (TIME_FOR_COMPARE) of the
accepted rows to ROR costs in get_best_ror_intersect()
- Removed '- 0.001' from 'join->best_read' and optimize_straight_join()
to ensure that the 'Last_query_cost' status variable contains the
same value as the one that was calculated by the optimizer.
- Take avg_io_cost() into account in handler::keyread_time() and
handler::read_time(). This should have no effect as it's 1.0 by
default, except for heap that overrides these functions.
- Some 'ref_or_null' accesses changed to 'range' because of cost
adjustments (main.order_by)
- Added scan type "scan_with_join_cache" for optimizer_trace. This is
just to show in the trace what kind of scan was used.
- When using 'scan_with_join_cache' take into account number of
preceding tables (as have to restore all fields for all previous
table combination when checking the where clause)
The new cost added is:
(row_combinations * ROW_COPY_COST * number_of_cached_tables).
This increases the cost of join buffering in proportion of the
number of tables in the join buffer. One effect is that full scans
are now done earlier as the cost is then smaller.
(main.join_outer_innodb, main.greedy_optimizer)
- Removed the usage of 'worst_seeks' in cost_for_index_read as it
caused wrong plans to be created; It prefered JT_EQ_REF even if it
would be much more expensive than a full table scan. A related
issue was that worst_seeks only applied to full lookup, not to
clustered or index only lookups, which is not consistent. This
caused some plans to use index scan instead of eq_ref (main.union)
- Changed federated block size from 4096 to 1500, which is the
typical size of an IO packet.
- Added costs for reading rows to Federated. Needed as there is no
caching of rows in the federated engine.
- Added ha_innobase::rnd_pos_time() cost function.
- A lot of extra things added to optimizer trace
- More costs, especially for materialization and index_merge.
- Make lables more uniform
- Fixed a lot of minor bugs
- Added 'trace_started()' around a lot of trace blocks.
- When calculating ORDER BY with LIMIT cost for using an index
the cost did not take into account the number of row retrivals
that has to be done or the cost of comparing the rows with the
WHERE clause. The cost calculated would be just a fraction of
the real cost. Now we calculate the cost as we do for ranges
and 'ref'.
- 'Using index for group-by' is used a bit more than before as
now take into account the WHERE clause cost when comparing
with 'ref' and prefer the method with fewer row combinations.
(main.group_min_max).
Bugs fixed:
- Fixed that we don't calculate TIME_FOR_COMPARE twice for some plans,
like in optimize_straight_join() and greedy_search()
- Fixed bug in save_explain_data where we could test for the wrong
index when displaying 'Using index'. This caused some old plans to
show 'Using index'. (main.subselect_innodb, main.subselect2)
- Fixed bug in get_best_ror_intersect() where 'min_cost' was not
updated, and the cost we compared with was not the one that was
used.
- Fixed very wrong cost calculation for priority queues in
check_if_pq_applicable(). (main.order_by now correctly uses priority
queue)
- When calculating cost of EQ_REF or REF, we added the cost of
comparing the WHERE clause with the found rows, not all row
combinations. This made ref and eq_ref to be regarded way to cheap
compared to other access methods.
- FORCE INDEX cost calculation didn't take into account clustered or
covered indexes.
- JT_EQ_REF cost was estimated as avg_io_cost(), which is half the
cost of a JT_REF key. This may be true for InnoDB primary key, but
not for other unique keys or other engines. Now we use handler
function to calculate the cost, which allows us to handle
consistently clustered, covered keys and not covered keys.
- ha_start_keyread() didn't call extra_opt() if keyread was already
enabled but still changed the 'keyread' variable (which is wrong).
Fixed by not doing anything if keyread is already enabled.
- multi_range_read_info_cost() didn't take into account io_cost when
calculating the cost of ranges.
- fix_semijoin_strategies_for_picked_join_order() used the wrong
record_count when calling best_access_path() for SJ_OPT_FIRST_MATCH
and SJ_OPT_LOOSE_SCAN.
- Hash joins didn't provide correct best_cost to the upper level, which
means that the cost for hash_joins more expensive than calculated
in best_access_path (a difference of 10x * TIME_OF_COMPARE).
This is fixed in the new code thanks to that we now include
TIME_OF_COMPARE cost in 'read_time'.
Other things:
- Added some 'if (thd->trace_started())' to speed up code
- Removed not used function Cost_estimate::is_zero()
- Simplified testing of HA_POS_ERROR in get_best_ror_intersect().
(No cost changes)
- Moved ha_start_keyread() from join_read_const_table() to join_read_const()
to enable keyread for all types of JT_CONST tables.
- Made a few very short functions inline in handler.h
Notes:
- In main.rowid_filter the join order of order and lineitem is swapped.
This is because the cost of doing a range fetch of lineitem(98 rows) is
almost as big as the whole join of order,lineitem. The filtering will
also ensure that we only have to do very small key fetches of the rows
in lineitem.
- main.index_merge_myisam had a few changes where we are now using
less keys for index_merge. This is because index scans are now more
expensive than before.
- handler->optimizer_cache_cost is updated in ha_external_lock().
This ensures that it is up to date per statements.
Not an optimal solution (for locked tables), but should be ok for now.
- 'DELETE FROM t1 WHERE t1.a > 0 ORDER BY t1.a' does not take cost of
filesort into consideration when table scan is chosen.
(main.myisam_explain_non_select_all)
- perfschema.table_aggregate_global_* has changed because an update
on a table with 1 row will now use table scan instead of key lookup.
TODO in upcomming commits:
- Fix selectivity calculation for ranges with and without filtering and
when there is a ref access but scan is chosen.
For this we have to store the lowest known value for
'accepted_records' in the OPT_RANGE structure.
- Change that records_read does not include filtered rows.
- test_if_cheaper_ordering() needs to be updated to properly calculate
costs. This will fix tests like main.order_by_innodb,
main.single_delete_update
- Extend get_range_limit_read_cost() to take into considering
cost_for_index_read() if there where no quick keys. This will reduce
the computed cost for ORDER BY with LIMIT in some cases.
(main.innodb_ext_key)
- Fix that we take into account selectivity when counting the number
of rows we have to read when considering using a index table scan to
resolve ORDER BY.
- Add new calculation for rnd_pos_time() where we take into account the
benefit of reading multiple rows from the same page.
2021-11-01 12:34:24 +02:00
|
|
|
Json_writer_object trace_subquery(thd, "subquery_plan_revert");
|
|
|
|
trace_subquery.add("choosen", "in_to_exists");
|
|
|
|
}
|
2010-09-17 13:17:27 +03:00
|
|
|
}
|
2010-09-05 18:43:47 +03:00
|
|
|
|
Fix MySQL BUG#12329653
In MariaDB, when running in ONLY_FULL_GROUP_BY mode,
the server produced in incorrect error message that there
is an aggregate function without GROUP BY, for artificially
created MIN/MAX functions during subquery MIN/MAX optimization.
The fix introduces a way to distinguish between artifially
created MIN/MAX functions as a result of a rewrite, and normal
ones present in the query. The test for ONLY_FULL_GROUP_BY violation
now tests in addition if a MIN/MAX function was part of a MIN/MAX
subquery rewrite.
In order to be able to distinguish these MIN/MAX functions, the
patch introduces an additional flag in Item_in_subselect::in_strategy -
SUBS_STRATEGY_CHOSEN. This flag is set when the optimizer makes its
final choice of a subuqery strategy. In order to make the choice
consistent, access to Item_in_subselect::in_strategy is provided
via new class methods.
******
Fix MySQL BUG#12329653
In MariaDB, when running in ONLY_FULL_GROUP_BY mode,
the server produced in incorrect error message that there
is an aggregate function without GROUP BY, for artificially
created MIN/MAX functions during subquery MIN/MAX optimization.
The fix introduces a way to distinguish between artifially
created MIN/MAX functions as a result of a rewrite, and normal
ones present in the query. The test for ONLY_FULL_GROUP_BY violation
now tests in addition if a MIN/MAX function was part of a MIN/MAX
subquery rewrite.
In order to be able to distinguish these MIN/MAX functions, the
patch introduces an additional flag in Item_in_subselect::in_strategy -
SUBS_STRATEGY_CHOSEN. This flag is set when the optimizer makes its
final choice of a subuqery strategy. In order to make the choice
consistent, access to Item_in_subselect::in_strategy is provided
via new class methods.
2011-11-12 11:29:12 +02:00
|
|
|
if (in_subs->test_strategy(SUBS_MATERIALIZATION))
|
2010-09-05 18:43:47 +03:00
|
|
|
{
|
2010-11-02 15:27:01 +02:00
|
|
|
/* Restore the original query plan used for materialization. */
|
|
|
|
if (reopt_result == REOPT_NEW_PLAN)
|
2010-12-15 12:54:25 +02:00
|
|
|
restore_query_plan(&save_qep);
|
2010-09-30 18:32:44 +03:00
|
|
|
|
2011-05-04 18:08:58 +03:00
|
|
|
in_subs->unit->uncacheable&= ~UNCACHEABLE_DEPENDENT_INJECTED;
|
|
|
|
select_lex->uncacheable&= ~UNCACHEABLE_DEPENDENT_INJECTED;
|
2010-09-05 18:43:47 +03:00
|
|
|
|
2010-09-16 17:06:58 +03:00
|
|
|
/*
|
|
|
|
Reset the "LIMIT 1" set in Item_exists_subselect::fix_length_and_dec.
|
|
|
|
TODO:
|
|
|
|
Currently we set the subquery LIMIT to infinity, and this is correct
|
|
|
|
because we forbid at parse time LIMIT inside IN subqueries (see
|
|
|
|
Item_in_subselect::test_limit). However, once we allow this, here
|
|
|
|
we should set the correct limit if given in the query.
|
|
|
|
*/
|
2020-12-19 13:59:37 +02:00
|
|
|
in_subs->unit->global_parameters()->limit_params.select_limit= NULL;
|
2014-10-14 09:36:50 -07:00
|
|
|
in_subs->unit->set_limit(unit->global_parameters());
|
2010-09-16 17:06:58 +03:00
|
|
|
/*
|
|
|
|
Set the limit of this JOIN object as well, because normally its being
|
|
|
|
set in the beginning of JOIN::optimize, which was already done.
|
|
|
|
*/
|
2019-09-26 09:49:50 +02:00
|
|
|
select_limit= in_subs->unit->lim.get_select_limit();
|
2010-09-30 18:32:44 +03:00
|
|
|
}
|
Fix MySQL BUG#12329653
In MariaDB, when running in ONLY_FULL_GROUP_BY mode,
the server produced in incorrect error message that there
is an aggregate function without GROUP BY, for artificially
created MIN/MAX functions during subquery MIN/MAX optimization.
The fix introduces a way to distinguish between artifially
created MIN/MAX functions as a result of a rewrite, and normal
ones present in the query. The test for ONLY_FULL_GROUP_BY violation
now tests in addition if a MIN/MAX function was part of a MIN/MAX
subquery rewrite.
In order to be able to distinguish these MIN/MAX functions, the
patch introduces an additional flag in Item_in_subselect::in_strategy -
SUBS_STRATEGY_CHOSEN. This flag is set when the optimizer makes its
final choice of a subuqery strategy. In order to make the choice
consistent, access to Item_in_subselect::in_strategy is provided
via new class methods.
******
Fix MySQL BUG#12329653
In MariaDB, when running in ONLY_FULL_GROUP_BY mode,
the server produced in incorrect error message that there
is an aggregate function without GROUP BY, for artificially
created MIN/MAX functions during subquery MIN/MAX optimization.
The fix introduces a way to distinguish between artifially
created MIN/MAX functions as a result of a rewrite, and normal
ones present in the query. The test for ONLY_FULL_GROUP_BY violation
now tests in addition if a MIN/MAX function was part of a MIN/MAX
subquery rewrite.
In order to be able to distinguish these MIN/MAX functions, the
patch introduces an additional flag in Item_in_subselect::in_strategy -
SUBS_STRATEGY_CHOSEN. This flag is set when the optimizer makes its
final choice of a subuqery strategy. In order to make the choice
consistent, access to Item_in_subselect::in_strategy is provided
via new class methods.
2011-11-12 11:29:12 +02:00
|
|
|
else if (in_subs->test_strategy(SUBS_IN_TO_EXISTS))
|
2010-09-30 18:32:44 +03:00
|
|
|
{
|
2010-12-15 12:54:25 +02:00
|
|
|
if (reopt_result == REOPT_NONE && in_to_exists_where &&
|
2011-05-25 19:31:13 +04:00
|
|
|
const_tables != table_count)
|
2010-09-30 18:32:44 +03:00
|
|
|
{
|
2010-10-05 16:00:31 +03:00
|
|
|
/*
|
2011-09-01 23:53:12 +03:00
|
|
|
The subquery was not reoptimized with the newly injected IN-EXISTS
|
|
|
|
conditions either because the user allowed only the IN-EXISTS strategy,
|
|
|
|
or because materialization was not possible based on semantic analysis.
|
2010-10-05 16:00:31 +03:00
|
|
|
*/
|
2011-09-01 23:53:12 +03:00
|
|
|
reopt_result= reoptimize(in_to_exists_where, join_tables, NULL);
|
|
|
|
if (reopt_result == REOPT_ERROR)
|
2010-09-30 18:32:44 +03:00
|
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (in_subs->inject_in_to_exists_cond(this))
|
|
|
|
return TRUE;
|
2011-05-12 00:14:15 +03:00
|
|
|
/*
|
2012-06-05 17:25:10 +03:00
|
|
|
If the injected predicate is correlated the IN->EXISTS transformation
|
|
|
|
make the subquery dependent.
|
2011-05-12 00:14:15 +03:00
|
|
|
*/
|
2012-06-05 17:25:10 +03:00
|
|
|
if ((in_to_exists_where &&
|
|
|
|
in_to_exists_where->used_tables() & OUTER_REF_TABLE_BIT) ||
|
|
|
|
(in_to_exists_having &&
|
|
|
|
in_to_exists_having->used_tables() & OUTER_REF_TABLE_BIT))
|
|
|
|
{
|
|
|
|
in_subs->unit->uncacheable|= UNCACHEABLE_DEPENDENT_INJECTED;
|
|
|
|
select_lex->uncacheable|= UNCACHEABLE_DEPENDENT_INJECTED;
|
|
|
|
}
|
2011-05-04 18:08:58 +03:00
|
|
|
select_limit= 1;
|
2010-09-05 18:43:47 +03:00
|
|
|
}
|
|
|
|
else
|
|
|
|
DBUG_ASSERT(FALSE);
|
|
|
|
|
2010-09-30 18:32:44 +03:00
|
|
|
return FALSE;
|
2010-09-05 18:43:47 +03:00
|
|
|
}
|
2010-10-23 21:28:58 +03:00
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
Choose a query plan for a table-less subquery.
|
|
|
|
|
|
|
|
@notes
|
|
|
|
|
|
|
|
@retval FALSE success.
|
|
|
|
@retval TRUE error occurred.
|
|
|
|
*/
|
|
|
|
|
|
|
|
bool JOIN::choose_tableless_subquery_plan()
|
|
|
|
{
|
2011-05-25 19:31:13 +04:00
|
|
|
DBUG_ASSERT(!tables_list || !table_count);
|
2011-05-23 10:56:05 +03:00
|
|
|
if (unit->item)
|
2010-10-23 21:28:58 +03:00
|
|
|
{
|
2011-05-23 10:56:05 +03:00
|
|
|
DBUG_ASSERT(unit->item->type() == Item::SUBSELECT_ITEM);
|
|
|
|
Item_subselect *subs_predicate= unit->item;
|
2010-10-23 21:28:58 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
If the optimizer determined that his query has an empty result,
|
|
|
|
in most cases the subquery predicate is a known constant value -
|
Fix bug lp:985667, MDEV-229
Analysis:
The reason for the wrong result is the interaction between constant
optimization (in this case 1-row table) and subquery optimization.
- First the outer query is optimized, and 'make_join_statistics' finds that
table t2 has one row, reads that row, and marks the whole table as constant.
This also means that all fields of t2 are constant.
- Next, we optimize the subquery in the end of the outer 'make_join_statistics'.
The field 'f2' is considered constant, with value '3'. The subquery predicate
is rewritten as the constant TRUE.
- The outer query execution detects early that the whole query result is empty
and calls 'return_zero_rows'. Since the query is with implicit grouping, we
have to produce one row with special values for the aggregates (depending on
each aggregate function), and NULL values for all non-aggregate fields. This
function calls 'no_rows_in_result' to set each aggregate function to the
default value when it aggregates over an empty result, and then calls
'send_data', which in turn evaluates each Item in the SELECT list.
- When evaluation reaches the subquery predicate, it executes the subquery
with field 'f2' having a constant value '3', and the subquery produces the
incorrect result '7'.
Solution:
Implement Item::no_rows_in_result for all subquery predicates. In order to
make this work, it is also needed to make all val_* methods of all subquery
predicates respect the Item_subselect::forced_const flag. Otherwise subqueries
are executed anyways, and override the default value set by no_rows_in_result
with whatever result is produced from the subquery evaluation.
2012-04-27 12:59:17 +03:00
|
|
|
either of TRUE, FALSE or NULL. The implementation of
|
|
|
|
Item_subselect::no_rows_in_result() determines which one.
|
2010-10-23 21:28:58 +03:00
|
|
|
*/
|
|
|
|
if (zero_result_cause)
|
|
|
|
{
|
|
|
|
if (!implicit_grouping)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
Both group by queries and non-group by queries without aggregate
|
Fix bug lp:985667, MDEV-229
Analysis:
The reason for the wrong result is the interaction between constant
optimization (in this case 1-row table) and subquery optimization.
- First the outer query is optimized, and 'make_join_statistics' finds that
table t2 has one row, reads that row, and marks the whole table as constant.
This also means that all fields of t2 are constant.
- Next, we optimize the subquery in the end of the outer 'make_join_statistics'.
The field 'f2' is considered constant, with value '3'. The subquery predicate
is rewritten as the constant TRUE.
- The outer query execution detects early that the whole query result is empty
and calls 'return_zero_rows'. Since the query is with implicit grouping, we
have to produce one row with special values for the aggregates (depending on
each aggregate function), and NULL values for all non-aggregate fields. This
function calls 'no_rows_in_result' to set each aggregate function to the
default value when it aggregates over an empty result, and then calls
'send_data', which in turn evaluates each Item in the SELECT list.
- When evaluation reaches the subquery predicate, it executes the subquery
with field 'f2' having a constant value '3', and the subquery produces the
incorrect result '7'.
Solution:
Implement Item::no_rows_in_result for all subquery predicates. In order to
make this work, it is also needed to make all val_* methods of all subquery
predicates respect the Item_subselect::forced_const flag. Otherwise subqueries
are executed anyways, and override the default value set by no_rows_in_result
with whatever result is produced from the subquery evaluation.
2012-04-27 12:59:17 +03:00
|
|
|
functions produce empty subquery result. There is no need to further
|
|
|
|
rewrite the subquery because it will not be executed at all.
|
2010-10-23 21:28:58 +03:00
|
|
|
*/
|
2018-06-11 08:52:26 -07:00
|
|
|
exec_const_cond= 0;
|
2010-10-23 21:28:58 +03:00
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
Fix bug lp:985667, MDEV-229
Analysis:
The reason for the wrong result is the interaction between constant
optimization (in this case 1-row table) and subquery optimization.
- First the outer query is optimized, and 'make_join_statistics' finds that
table t2 has one row, reads that row, and marks the whole table as constant.
This also means that all fields of t2 are constant.
- Next, we optimize the subquery in the end of the outer 'make_join_statistics'.
The field 'f2' is considered constant, with value '3'. The subquery predicate
is rewritten as the constant TRUE.
- The outer query execution detects early that the whole query result is empty
and calls 'return_zero_rows'. Since the query is with implicit grouping, we
have to produce one row with special values for the aggregates (depending on
each aggregate function), and NULL values for all non-aggregate fields. This
function calls 'no_rows_in_result' to set each aggregate function to the
default value when it aggregates over an empty result, and then calls
'send_data', which in turn evaluates each Item in the SELECT list.
- When evaluation reaches the subquery predicate, it executes the subquery
with field 'f2' having a constant value '3', and the subquery produces the
incorrect result '7'.
Solution:
Implement Item::no_rows_in_result for all subquery predicates. In order to
make this work, it is also needed to make all val_* methods of all subquery
predicates respect the Item_subselect::forced_const flag. Otherwise subqueries
are executed anyways, and override the default value set by no_rows_in_result
with whatever result is produced from the subquery evaluation.
2012-04-27 12:59:17 +03:00
|
|
|
/* @todo
|
2010-10-23 21:28:58 +03:00
|
|
|
A further optimization is possible when a non-group query with
|
|
|
|
MIN/MAX/COUNT is optimized by opt_sum_query. Then, if there are
|
|
|
|
only MIN/MAX functions over an empty result set, the subquery
|
|
|
|
result is a NULL value/row, thus the value of subs_predicate is
|
|
|
|
NULL.
|
|
|
|
*/
|
|
|
|
}
|
2011-12-14 04:39:29 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
For IN subqueries, use IN->EXISTS transfomation, unless the subquery
|
|
|
|
has been converted to a JTBM semi-join. In that case, just leave
|
|
|
|
everything as-is, setup_jtbm_semi_joins() has special handling for cases
|
|
|
|
like this.
|
|
|
|
*/
|
2020-06-30 15:20:11 +02:00
|
|
|
Item_in_subselect *in_subs;
|
|
|
|
in_subs= subs_predicate->get_IN_subquery();
|
|
|
|
if (in_subs &&
|
|
|
|
!(subs_predicate->substype() == Item_subselect::IN_SUBS &&
|
|
|
|
in_subs->is_jtbm_merged))
|
2010-10-23 21:28:58 +03:00
|
|
|
{
|
Fix MySQL BUG#12329653
In MariaDB, when running in ONLY_FULL_GROUP_BY mode,
the server produced in incorrect error message that there
is an aggregate function without GROUP BY, for artificially
created MIN/MAX functions during subquery MIN/MAX optimization.
The fix introduces a way to distinguish between artifially
created MIN/MAX functions as a result of a rewrite, and normal
ones present in the query. The test for ONLY_FULL_GROUP_BY violation
now tests in addition if a MIN/MAX function was part of a MIN/MAX
subquery rewrite.
In order to be able to distinguish these MIN/MAX functions, the
patch introduces an additional flag in Item_in_subselect::in_strategy -
SUBS_STRATEGY_CHOSEN. This flag is set when the optimizer makes its
final choice of a subuqery strategy. In order to make the choice
consistent, access to Item_in_subselect::in_strategy is provided
via new class methods.
******
Fix MySQL BUG#12329653
In MariaDB, when running in ONLY_FULL_GROUP_BY mode,
the server produced in incorrect error message that there
is an aggregate function without GROUP BY, for artificially
created MIN/MAX functions during subquery MIN/MAX optimization.
The fix introduces a way to distinguish between artifially
created MIN/MAX functions as a result of a rewrite, and normal
ones present in the query. The test for ONLY_FULL_GROUP_BY violation
now tests in addition if a MIN/MAX function was part of a MIN/MAX
subquery rewrite.
In order to be able to distinguish these MIN/MAX functions, the
patch introduces an additional flag in Item_in_subselect::in_strategy -
SUBS_STRATEGY_CHOSEN. This flag is set when the optimizer makes its
final choice of a subuqery strategy. In order to make the choice
consistent, access to Item_in_subselect::in_strategy is provided
via new class methods.
2011-11-12 11:29:12 +02:00
|
|
|
in_subs->set_strategy(SUBS_IN_TO_EXISTS);
|
2010-10-23 21:28:58 +03:00
|
|
|
if (in_subs->create_in_to_exists_cond(this) ||
|
|
|
|
in_subs->inject_in_to_exists_cond(this))
|
|
|
|
return TRUE;
|
|
|
|
tmp_having= having;
|
|
|
|
}
|
|
|
|
}
|
2018-06-11 08:52:26 -07:00
|
|
|
exec_const_cond= zero_result_cause ? 0 : conds;
|
2010-10-23 21:28:58 +03:00
|
|
|
return FALSE;
|
|
|
|
}
|
2018-05-15 23:45:59 +02:00
|
|
|
|
|
|
|
|
2018-06-17 19:48:00 +02:00
|
|
|
bool Item::pushable_equality_checker_for_subquery(uchar *arg)
|
2018-05-15 23:45:59 +02:00
|
|
|
{
|
2018-06-17 19:48:00 +02:00
|
|
|
return
|
|
|
|
get_corresponding_field_pair(this,
|
2020-06-30 15:20:11 +02:00
|
|
|
((Item *)arg)->get_IN_subquery()->
|
|
|
|
corresponding_fields);
|
2018-05-15 23:45:59 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2018-06-17 19:48:00 +02:00
|
|
|
/*
|
|
|
|
Checks if 'item' or some item equal to it is equal to the field from
|
|
|
|
some Field_pair of 'pair_list' and returns matching Field_pair or
|
|
|
|
NULL if the matching Field_pair wasn't found.
|
|
|
|
*/
|
|
|
|
|
|
|
|
Field_pair *find_matching_field_pair(Item *item, List<Field_pair> pair_list)
|
2018-05-15 23:45:59 +02:00
|
|
|
{
|
2018-06-17 19:48:00 +02:00
|
|
|
Field_pair *field_pair= get_corresponding_field_pair(item, pair_list);
|
|
|
|
if (field_pair)
|
|
|
|
return field_pair;
|
|
|
|
|
|
|
|
Item_equal *item_equal= item->get_item_equal();
|
2018-05-15 23:45:59 +02:00
|
|
|
if (item_equal)
|
|
|
|
{
|
|
|
|
Item_equal_fields_iterator it(*item_equal);
|
|
|
|
Item *equal_item;
|
|
|
|
while ((equal_item= it++))
|
|
|
|
{
|
|
|
|
if (equal_item->const_item())
|
|
|
|
continue;
|
2018-06-17 19:48:00 +02:00
|
|
|
field_pair= get_corresponding_field_pair(equal_item, pair_list);
|
|
|
|
if (field_pair)
|
|
|
|
return field_pair;
|
2018-05-15 23:45:59 +02:00
|
|
|
}
|
|
|
|
}
|
2018-06-17 19:48:00 +02:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool Item_field::excl_dep_on_in_subq_left_part(Item_in_subselect *subq_pred)
|
|
|
|
{
|
|
|
|
if (find_matching_field_pair(((Item *) this), subq_pred->corresponding_fields))
|
|
|
|
return true;
|
2018-05-15 23:45:59 +02:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool Item_direct_view_ref::excl_dep_on_in_subq_left_part(Item_in_subselect *subq_pred)
|
|
|
|
{
|
|
|
|
if (item_equal)
|
|
|
|
{
|
|
|
|
DBUG_ASSERT(real_item()->type() == Item::FIELD_ITEM);
|
2018-06-17 19:48:00 +02:00
|
|
|
if (get_corresponding_field_pair(((Item *)this), subq_pred->corresponding_fields))
|
2018-05-15 23:45:59 +02:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return (*ref)->excl_dep_on_in_subq_left_part(subq_pred);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool Item_equal::excl_dep_on_in_subq_left_part(Item_in_subselect *subq_pred)
|
|
|
|
{
|
|
|
|
Item *left_item = get_const();
|
|
|
|
Item_equal_fields_iterator it(*this);
|
|
|
|
Item *item;
|
|
|
|
if (!left_item)
|
|
|
|
{
|
|
|
|
while ((item=it++))
|
|
|
|
{
|
|
|
|
if (item->excl_dep_on_in_subq_left_part(subq_pred))
|
|
|
|
{
|
|
|
|
left_item= item;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!left_item)
|
|
|
|
return false;
|
|
|
|
while ((item=it++))
|
|
|
|
{
|
|
|
|
if (item->excl_dep_on_in_subq_left_part(subq_pred))
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
@brief
|
|
|
|
Get corresponding item from the select of the right part of IN subquery
|
|
|
|
|
|
|
|
@param thd the thread handle
|
|
|
|
@param item the item from the left part of subq_pred for which
|
|
|
|
corresponding item should be found
|
|
|
|
@param subq_pred the IN subquery predicate
|
|
|
|
|
|
|
|
@details
|
|
|
|
This method looks through the fields of the select of the right part of
|
|
|
|
the IN subquery predicate subq_pred trying to find the corresponding
|
|
|
|
item 'new_item' for item. If item has equal items it looks through
|
|
|
|
the fields of the select of the right part of subq_pred for each equal
|
|
|
|
item trying to find the corresponding item.
|
|
|
|
The method assumes that the given item is either a field item or
|
|
|
|
a reference to a field item.
|
|
|
|
|
|
|
|
@retval <item*> reference to the corresponding item
|
|
|
|
@retval NULL if item was not found
|
|
|
|
*/
|
|
|
|
|
|
|
|
static
|
|
|
|
Item *get_corresponding_item(THD *thd, Item *item,
|
|
|
|
Item_in_subselect *subq_pred)
|
|
|
|
{
|
|
|
|
DBUG_ASSERT(item->type() == Item::FIELD_ITEM ||
|
|
|
|
(item->type() == Item::REF_ITEM &&
|
|
|
|
((Item_ref *) item)->ref_type() == Item_ref::VIEW_REF));
|
|
|
|
|
2018-06-17 19:48:00 +02:00
|
|
|
Field_pair *field_pair;
|
2018-05-15 23:45:59 +02:00
|
|
|
Item_equal *item_equal= item->get_item_equal();
|
|
|
|
|
|
|
|
if (item_equal)
|
|
|
|
{
|
|
|
|
Item_equal_fields_iterator it(*item_equal);
|
|
|
|
Item *equal_item;
|
|
|
|
while ((equal_item= it++))
|
|
|
|
{
|
2018-06-17 19:48:00 +02:00
|
|
|
field_pair=
|
|
|
|
get_corresponding_field_pair(equal_item, subq_pred->corresponding_fields);
|
|
|
|
if (field_pair)
|
|
|
|
return field_pair->corresponding_item;
|
2018-05-15 23:45:59 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
2018-06-17 19:48:00 +02:00
|
|
|
{
|
|
|
|
field_pair=
|
|
|
|
get_corresponding_field_pair(item, subq_pred->corresponding_fields);
|
|
|
|
if (field_pair)
|
|
|
|
return field_pair->corresponding_item;
|
|
|
|
}
|
|
|
|
return NULL;
|
2018-05-15 23:45:59 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Item *Item_field::in_subq_field_transformer_for_where(THD *thd, uchar *arg)
|
|
|
|
{
|
2020-06-30 15:20:11 +02:00
|
|
|
Item_in_subselect *subq_pred= ((Item *)arg)->get_IN_subquery();
|
2018-05-15 23:45:59 +02:00
|
|
|
Item *producing_item= get_corresponding_item(thd, this, subq_pred);
|
|
|
|
if (producing_item)
|
|
|
|
return producing_item->build_clone(thd);
|
|
|
|
return this;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Item *Item_direct_view_ref::in_subq_field_transformer_for_where(THD *thd,
|
|
|
|
uchar *arg)
|
|
|
|
{
|
|
|
|
if (item_equal)
|
|
|
|
{
|
2020-06-30 15:20:11 +02:00
|
|
|
Item_in_subselect *subq_pred= ((Item *)arg)->get_IN_subquery();
|
2018-05-15 23:45:59 +02:00
|
|
|
Item *producing_item= get_corresponding_item(thd, this, subq_pred);
|
|
|
|
DBUG_ASSERT (producing_item != NULL);
|
|
|
|
return producing_item->build_clone(thd);
|
|
|
|
}
|
|
|
|
return this;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
@brief
|
|
|
|
Transforms item so it can be pushed into the IN subquery HAVING clause
|
|
|
|
|
|
|
|
@param thd the thread handle
|
|
|
|
@param in_item the item for which pushable item should be created
|
|
|
|
@param subq_pred the IN subquery predicate
|
|
|
|
|
|
|
|
@details
|
|
|
|
This method finds for in_item that is a field from the left part of the
|
|
|
|
IN subquery predicate subq_pred its corresponding item from the right part
|
|
|
|
of subq_pred.
|
|
|
|
If corresponding item is found, a shell for this item is created.
|
|
|
|
This shell can be pushed into the HAVING part of subq_pred select.
|
|
|
|
|
|
|
|
@retval <item*> reference to the created corresponding item shell for in_item
|
|
|
|
@retval NULL if mistake occurs
|
|
|
|
*/
|
|
|
|
|
|
|
|
static Item*
|
|
|
|
get_corresponding_item_for_in_subq_having(THD *thd, Item *in_item,
|
|
|
|
Item_in_subselect *subq_pred)
|
|
|
|
{
|
|
|
|
Item *new_item= get_corresponding_item(thd, in_item, subq_pred);
|
|
|
|
|
|
|
|
if (new_item)
|
|
|
|
{
|
|
|
|
Item_ref *ref=
|
|
|
|
new (thd->mem_root) Item_ref(thd,
|
|
|
|
&subq_pred->unit->first_select()->context,
|
2019-05-26 06:17:35 +04:00
|
|
|
new_item->name);
|
2018-05-15 23:45:59 +02:00
|
|
|
if (!ref)
|
|
|
|
DBUG_ASSERT(0);
|
|
|
|
return ref;
|
|
|
|
}
|
|
|
|
return new_item;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Item *Item_field::in_subq_field_transformer_for_having(THD *thd, uchar *arg)
|
|
|
|
{
|
2020-06-30 15:20:11 +02:00
|
|
|
DBUG_ASSERT(((Item *)arg)->get_IN_subquery());
|
2018-05-15 23:45:59 +02:00
|
|
|
return get_corresponding_item_for_in_subq_having(thd, this,
|
|
|
|
(Item_in_subselect *)arg);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Item *Item_direct_view_ref::in_subq_field_transformer_for_having(THD *thd,
|
|
|
|
uchar *arg)
|
|
|
|
{
|
|
|
|
if (!item_equal)
|
|
|
|
return this;
|
|
|
|
else
|
|
|
|
{
|
2020-06-30 15:20:11 +02:00
|
|
|
DBUG_ASSERT(((Item *)arg)->get_IN_subquery());
|
2018-05-15 23:45:59 +02:00
|
|
|
Item *new_item= get_corresponding_item_for_in_subq_having(thd, this,
|
|
|
|
(Item_in_subselect *)arg);
|
|
|
|
if (!new_item)
|
|
|
|
return this;
|
|
|
|
return new_item;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
@brief
|
|
|
|
Find fields that are used in the GROUP BY of the select
|
|
|
|
|
|
|
|
@param thd the thread handle
|
|
|
|
@param sel the select of the IN subquery predicate
|
|
|
|
@param fields fields of the left part of the IN subquery predicate
|
|
|
|
@param grouping_list GROUP BY clause
|
|
|
|
|
|
|
|
@details
|
|
|
|
This method traverses fields which are used in the GROUP BY of
|
|
|
|
sel and saves them with their corresponding items from fields.
|
|
|
|
*/
|
|
|
|
|
|
|
|
bool grouping_fields_in_the_in_subq_left_part(THD *thd,
|
|
|
|
st_select_lex *sel,
|
|
|
|
List<Field_pair> *fields,
|
|
|
|
ORDER *grouping_list)
|
|
|
|
{
|
|
|
|
DBUG_ENTER("grouping_fields_in_the_in_subq_left_part");
|
|
|
|
sel->grouping_tmp_fields.empty();
|
|
|
|
List_iterator<Field_pair> it(*fields);
|
|
|
|
Field_pair *item;
|
|
|
|
while ((item= it++))
|
|
|
|
{
|
|
|
|
for (ORDER *ord= grouping_list; ord; ord= ord->next)
|
|
|
|
{
|
|
|
|
if ((*ord->item)->eq(item->corresponding_item, 0))
|
|
|
|
{
|
|
|
|
if (sel->grouping_tmp_fields.push_back(item, thd->mem_root))
|
|
|
|
DBUG_RETURN(TRUE);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
DBUG_RETURN(FALSE);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
@brief
|
|
|
|
Extract condition that can be pushed into select of this IN subquery
|
|
|
|
|
|
|
|
@param thd the thread handle
|
|
|
|
@param cond current condition
|
|
|
|
|
|
|
|
@details
|
|
|
|
This function builds the most restrictive condition depending only on
|
|
|
|
the list of fields of the left part of this IN subquery predicate
|
|
|
|
(directly or indirectly through equality) that can be extracted from the
|
|
|
|
given condition cond and pushes it into this IN subquery.
|
|
|
|
|
|
|
|
Example of the transformation:
|
|
|
|
|
|
|
|
SELECT * FROM t1
|
|
|
|
WHERE a>3 AND b>10 AND
|
|
|
|
(a,b) IN (SELECT x,MAX(y) FROM t2 GROUP BY x);
|
|
|
|
|
|
|
|
=>
|
|
|
|
|
|
|
|
SELECT * FROM t1
|
|
|
|
WHERE a>3 AND b>10 AND
|
|
|
|
(a,b) IN (SELECT x,max(y)
|
|
|
|
FROM t2
|
|
|
|
WHERE x>3
|
|
|
|
GROUP BY x
|
|
|
|
HAVING MAX(y)>10);
|
|
|
|
|
|
|
|
|
|
|
|
In details:
|
|
|
|
1. Check what pushable formula can be extracted from cond
|
|
|
|
2. Build a clone PC of the formula that can be extracted
|
|
|
|
(the clone is built only if the extracted formula is a AND subformula
|
|
|
|
of cond or conjunction of such subformulas)
|
|
|
|
3. If there is no HAVING clause prepare PC to be conjuncted with
|
|
|
|
WHERE clause of this subquery. Otherwise do 4-7.
|
|
|
|
4. Check what formula PC_where can be extracted from PC to be pushed
|
|
|
|
into the WHERE clause of the subquery
|
|
|
|
5. Build PC_where and if PC_where is a conjunct(s) of PC remove it from PC
|
|
|
|
getting PC_having
|
|
|
|
6. Prepare PC_where to be conjuncted with the WHERE clause of
|
|
|
|
the IN subquery
|
|
|
|
7. Prepare PC_having to be conjuncted with the HAVING clause of
|
|
|
|
the IN subquery
|
|
|
|
|
|
|
|
@note
|
|
|
|
This method is similar to pushdown_cond_for_derived()
|
|
|
|
|
|
|
|
@retval TRUE if an error occurs
|
|
|
|
@retval FALSE otherwise
|
|
|
|
*/
|
|
|
|
|
|
|
|
bool Item_in_subselect::pushdown_cond_for_in_subquery(THD *thd, Item *cond)
|
|
|
|
{
|
|
|
|
DBUG_ENTER("Item_in_subselect::pushdown_cond_for_in_subquery");
|
|
|
|
Item *remaining_cond= NULL;
|
|
|
|
|
|
|
|
if (!cond)
|
|
|
|
DBUG_RETURN(FALSE);
|
|
|
|
|
|
|
|
st_select_lex *sel = unit->first_select();
|
|
|
|
|
|
|
|
if (is_jtbm_const_tab)
|
|
|
|
DBUG_RETURN(FALSE);
|
|
|
|
|
|
|
|
if (!sel->cond_pushdown_is_allowed())
|
|
|
|
DBUG_RETURN(FALSE);
|
|
|
|
|
|
|
|
/*
|
|
|
|
Create a list of Field_pair items for this IN subquery.
|
|
|
|
It consists of the pairs of fields from the left part of this IN subquery
|
|
|
|
predicate 'left_part' and the respective fields from the select of the
|
|
|
|
right part of the IN subquery 'sel' (the field from left_part with the
|
|
|
|
corresponding field from the sel projection list).
|
|
|
|
Attach this list to the IN subquery.
|
|
|
|
*/
|
|
|
|
corresponding_fields.empty();
|
|
|
|
List_iterator_fast<Item> it(sel->join->fields_list);
|
|
|
|
Item *item;
|
|
|
|
for (uint i= 0; i < left_expr->cols(); i++)
|
|
|
|
{
|
|
|
|
item= it++;
|
|
|
|
Item *elem= left_expr->element_index(i);
|
|
|
|
|
|
|
|
if (elem->real_item()->type() != Item::FIELD_ITEM)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (corresponding_fields.push_back(
|
|
|
|
new Field_pair(((Item_field *)(elem->real_item()))->field,
|
|
|
|
item)))
|
|
|
|
DBUG_RETURN(TRUE);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* 1. Check what pushable formula can be extracted from cond */
|
|
|
|
Item *extracted_cond;
|
|
|
|
cond->check_pushable_cond(&Item::pushable_cond_checker_for_subquery,
|
|
|
|
(uchar *)this);
|
|
|
|
/* 2. Build a clone PC of the formula that can be extracted */
|
|
|
|
extracted_cond=
|
|
|
|
cond->build_pushable_cond(thd,
|
|
|
|
&Item::pushable_equality_checker_for_subquery,
|
|
|
|
(uchar *)this);
|
|
|
|
/* Nothing to push */
|
|
|
|
if (!extracted_cond)
|
|
|
|
{
|
|
|
|
DBUG_RETURN(FALSE);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Collect fields that are used in the GROUP BY of sel */
|
|
|
|
st_select_lex *save_curr_select= thd->lex->current_select;
|
|
|
|
if (sel->have_window_funcs())
|
|
|
|
{
|
|
|
|
if (sel->group_list.first || sel->join->implicit_grouping)
|
|
|
|
goto exit;
|
|
|
|
ORDER *common_partition_fields=
|
|
|
|
sel->find_common_window_func_partition_fields(thd);
|
|
|
|
if (!common_partition_fields)
|
|
|
|
goto exit;
|
|
|
|
|
|
|
|
if (grouping_fields_in_the_in_subq_left_part(thd, sel, &corresponding_fields,
|
|
|
|
common_partition_fields))
|
|
|
|
DBUG_RETURN(TRUE);
|
|
|
|
}
|
|
|
|
else if (grouping_fields_in_the_in_subq_left_part(thd, sel,
|
|
|
|
&corresponding_fields,
|
|
|
|
sel->group_list.first))
|
|
|
|
DBUG_RETURN(TRUE);
|
|
|
|
|
|
|
|
/* Do 4-6 */
|
|
|
|
sel->pushdown_cond_into_where_clause(thd, extracted_cond,
|
|
|
|
&remaining_cond,
|
|
|
|
&Item::in_subq_field_transformer_for_where,
|
|
|
|
(uchar *) this);
|
|
|
|
if (!remaining_cond)
|
|
|
|
goto exit;
|
|
|
|
/*
|
|
|
|
7. Prepare PC_having to be conjuncted with the HAVING clause of
|
|
|
|
the IN subquery
|
|
|
|
*/
|
|
|
|
remaining_cond=
|
|
|
|
remaining_cond->transform(thd,
|
|
|
|
&Item::in_subq_field_transformer_for_having,
|
|
|
|
(uchar *)this);
|
2019-03-23 15:28:22 +03:00
|
|
|
if (!remaining_cond ||
|
|
|
|
remaining_cond->walk(&Item::cleanup_excluding_const_fields_processor,
|
|
|
|
0, 0))
|
2018-05-15 23:45:59 +02:00
|
|
|
goto exit;
|
|
|
|
|
2019-03-23 15:28:22 +03:00
|
|
|
mark_or_conds_to_avoid_pushdown(remaining_cond);
|
|
|
|
|
|
|
|
sel->cond_pushed_into_having= remaining_cond;
|
2018-05-15 23:45:59 +02:00
|
|
|
|
|
|
|
exit:
|
|
|
|
thd->lex->current_select= save_curr_select;
|
|
|
|
DBUG_RETURN(FALSE);
|
|
|
|
}
|
2020-07-08 20:43:57 +05:30
|
|
|
|
|
|
|
/*
|
|
|
|
@brief
|
|
|
|
Check if a table is a SJM Scan table
|
|
|
|
|
|
|
|
@retval
|
|
|
|
TRUE SJM scan table
|
|
|
|
FALSE Otherwise
|
|
|
|
*/
|
|
|
|
bool TABLE_LIST::is_sjm_scan_table()
|
|
|
|
{
|
|
|
|
return is_active_sjm() && sj_mat_info->is_sj_scan;
|
|
|
|
}
|