mariadb/sql/optimizer_costs.h
Monty 07df2029a3 Adjust cost for re-creating a row from the JOIN CACHE
Creating a record from the join cache is faster than getting a row from
the engine (less and simpler code to execute).

Added JOIN_CACHE_ROW_COPY_COST_FACTOR (0.5 for now) as the factor to
take this into account. This is multiplied with ROW_COPY_COST.

Other things:
- Added cost of copying rows to hash join, similar to join_cache joins.
2023-02-02 22:58:52 +03:00

186 lines
6.4 KiB
C

#ifndef OPTIMIZER_COSTS_INCLUDED
#define OPTIMIZER_COSTS_INCLUDED
/*
Copyright (c) 2022, MariaDB AB
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA
*/
/* This file includes costs variables used by the optimizer */
/*
The table/index cache hit ratio in %. 0 means that a searched for key or row
will never be in the cache while 100 means it always in the cache.
According to folklore, one need at least 80 % hit rate in the cache for
MariaDB to run very well. We set CACHE_HIT_RATIO to a bit smaller
as there is still a cost involved in finding the row in the B tree, hash
or other seek structure.
Increasing CACHE_HIT_RATIO will make MariaDB prefer key lookups over
table scans as the impact of ROW_COPY_COST and INDEX_COPY cost will
have a larger impact when more rows are exmined..
Note that avg_io_cost() is multipled with this constant!
*/
#define DEFAULT_CACHE_HIT_RATIO 50
/* Convert ratio to cost */
static inline double cache_hit_ratio(uint ratio)
{
return (((double) (100 - ratio)) / 100.0);
}
/*
Base cost for finding keys and rows from the engine is 1.0
All other costs should be proportional to these
*/
/* Cost for finding the first key in a key scan */
#define KEY_LOOKUP_COST ((double) 1.0)
/* Cost of finding a key from a row_ID (not used for clustered keys) */
#define ROW_LOOKUP_COST ((double) 1.0)
/*
Cost of finding and copying keys from the storage engine index cache to
an internal cache as part of an index scan.
Used in handler::keyread_time()
*/
#define DEFAULT_INDEX_BLOCK_COPY_COST ((double) 1 / 5.0)
#define INDEX_BLOCK_COPY_COST(THD) ((THD)->variables.optimizer_index_block_copy_cost)
/*
Cost of finding the next row during table scan and copying it to
'table->record'.
If this is too small, then table scans will be prefered over 'ref'
as with table scans there are no key read (KEY_LOOKUP_COST), fewer
disk reads but more record copying and row comparisions. If it's
too big then MariaDB will used key lookup even when table scan is
better.
*/
#define DEFAULT_ROW_COPY_COST ((double) 1.0 / 20.0)
#define ROW_COPY_COST optimizer_row_copy_cost
#define ROW_COPY_COST_THD(THD) ((THD)->variables.optimizer_row_copy_cost)
/*
Creating a record from the join cache is faster than getting a row from
the engine. JOIN_CACHE_ROW_COPY_COST_FACTOR is the factor used to
take this into account. This is multiplied with ROW_COPY_COST.
*/
#define JOIN_CACHE_ROW_COPY_COST_FACTOR 0.75
/*
Cost of finding the next key during index scan and copying it to
'table->record'
If this is too small, then index scans will be prefered over 'ref'
as with table scans there are no key read (KEY_LOOKUP_COST) and
fewer disk reads.
*/
#define DEFAULT_KEY_COPY_COST ((double) 1.0 / 40.0)
#define KEY_COPY_COST optimizer_key_copy_cost
#define KEY_COPY_COST_THD(THD) ((THD)->variables.optimizer_key_copy_cost)
/*
Cost of finding the next index entry and checking it against filter
This cost is very low as it's done inside the storage engine.
Should be smaller than KEY_COPY_COST.
*/
#define DEFAULT_KEY_NEXT_FIND_COST ((double) 1.0 / 80.0)
#define KEY_NEXT_FIND_COST optimizer_next_find_cost
/**
The following is used to decide if MariaDB should use table scanning
instead of reading with keys. The number says how many evaluation of the
WHERE clause is comparable to reading one extra row from a table.
*/
#define DEFAULT_WHERE_COST (1 / 5.0)
#define WHERE_COST optimizer_where_cost
#define WHERE_COST_THD(THD) ((THD)->variables.optimizer_where_cost)
#define DEFAULT_KEY_COMPARE_COST (1 / 20.0)
#define KEY_COMPARE_COST optimizer_key_cmp_cost
/*
Cost of comparing two rowids. This is set relative to KEY_COMPARE_COST
This is usally just a memcmp!
*/
#define ROWID_COMPARE_COST KEY_COMPARE_COST/10.0
#define ROWID_COMPARE_COST_THD(THD) ((THD)->variables.KEY_COMPARE_COST / 10.0)
/*
Setup cost for different operations
*/
/* Extra cost for doing a range scan. Used to prefer 'ref' over range */
#define MULTI_RANGE_READ_SETUP_COST (double) (1.0 / 50.0)
/*
These costs are mainly to handle small tables, like the one we have in the
mtr test suite
*/
/* Extra cost for full table scan. Used to prefer range over table scans */
#define TABLE_SCAN_SETUP_COST 1.0
/* Extra cost for full index scan. Used to prefer range over index scans */
#define INDEX_SCAN_SETUP_COST 1.0
/*
The lower bound of accepted rows when using filter.
This is used to ensure that filters are not too agressive.
*/
#define MIN_ROWS_AFTER_FILTERING 1.0
/*
cost1 is better that cost2 only if cost1 + COST_EPS < cost2
The main purpose of this is to ensure we use the first index or plan
when there are identical plans. Without COST_EPS some plans in the
test suite would vary depending on floating point calculations done
in different paths.
*/
#define COST_EPS 0.0001
/*
For sequential disk seeks the cost formula is:
DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST * #blocks_to_skip
The cost of average seek
DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*BLOCKS_IN_AVG_SEEK =1.0.
*/
#define DISK_SEEK_BASE_COST ((double)0.9)
#define BLOCKS_IN_AVG_SEEK 128
#define DISK_SEEK_PROP_COST ((double)0.1/BLOCKS_IN_AVG_SEEK)
/*
Subquery materialization-related constants
*/
/* This should match ha_heap::read_time() */
#define HEAP_TEMPTABLE_LOOKUP_COST 0.05
#define HEAP_TEMPTABLE_CREATE_COST 1.0
#define DISK_TEMPTABLE_LOOKUP_COST 1.0
#define DISK_TEMPTABLE_CREATE_COST 4.0 /* Creating and deleting 2 temp tables */
#define DISK_TEMPTABLE_BLOCK_SIZE 8192
#define SORT_INDEX_CMP_COST 0.02
#define COST_MAX (DBL_MAX * (1.0 - DBL_EPSILON))
#define COST_ADD(c,d) (COST_MAX - (d) > (c) ? (c) + (d) : COST_MAX)
#define COST_MULT(c,f) (COST_MAX / (f) > (c) ? (c) * (f) : COST_MAX)
#endif /* OPTIMIZER_COSTS_INCLUDED */