mariadb/storage/csv/ha_tina.h
Monty d9d0e78039 Add limits for how many IO operations a table access will do
This solves the current problem in the optimizer
- SELECT FROM big_table
  - SELECT from small_table where small_table.eq_ref_key=big_table.id

The old code assumed that each eq_ref access will cause an IO.
As the cost of IO is high, this dominated the cost for the later table
which caused the optimizer to prefer table scans + join cache over
index reads.

This patch fixes this issue by limit the number of expected IO calls,
for rows and index separately, to the size of the table or index or
the number of accesses that we except in a range for the index.

The major changes are:

- Adding a new structure ALL_READ_COST that is mainly used in
  best_access_path() to hold the costs parts of the cost we are
  calculating. This allows us to limit the number of IO when multiplying
  the cost with the previous row combinations.
- All storage engine cost functions are changed to return IO_AND_CPU_COST.
  The virtual cost functions should now return in IO_AND_CPU_COST.io
  the number of disk blocks that will be accessed instead of the cost
  of the access.
- We are not limiting the io_blocks for table or index scans as we
  assume that engines may not store these in the 'hot' part of the
  cache. Table and index scan also uses much less IO blocks than
  key accesses, so the original issue is not as critical with scans.

Other things:
  OPT_RANGE now holds a 'Cost_estimate cost' instead a lot of different
  costs. All the old costs, like index_only_read, can be extracted
  from 'cost'.
- Added to the start of some functions 'handler *file= table->file'
  to shorten the code that is using the handler.
- handler->cost() is used to change a ALL_READ_COST or IO_AND_CPU_COST
  to 'cost in milliseconds'
- New functions:  handler::index_blocks() and handler::row_blocks()
  which are used to limit the IO.
- Added index_cost and row_cost to Cost_estimate and removed all not
  needed members.
- Removed cost coefficients from Cost_estimate as these don't make sense
  when costs (except IO_BLOCKS) are in milliseconds.
- Removed handler::avg_io_cost() and replaced it with DISK_READ_COST.
- Renamed best_range_rowid_filter_for_partial_join() to
  best_range_rowid_filter() as using the old name made rows too long.
- Changed all SJ_MATERIALIZATION_INFO 'Cost_estimate' variables to
  'double' as Cost_estimate power was not used for these and thus
  just caused storage and performance overhead.
- Changed cost_for_index_read() to use 'worst_seeks' to only limit
  IO, not number of table accesses. With this patch worst_seeks is
  probably not needed anymore, but I kept it around just in case.
- Applying cost for filter got to be much shorter and easier thanks
  to the API changes.
- Adjusted cost for fulltext keys in collaboration with Sergei Golubchik.
- Most test changes caused by this patch is that table scans are changed
  to use indexes.
- Added ha_seq::keyread_time() and ha_seq::key_scan_time() to get
  make checking number of potential IO blocks easier during debugging.
2023-02-02 23:57:30 +03:00

185 lines
6 KiB
C++

/* Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
#include <sys/types.h>
#include <sys/stat.h>
#include <my_dir.h>
#include "transparent_file.h"
#define DEFAULT_CHAIN_LENGTH 512
/*
Version for file format.
1 - Initial Version. That is, the version when the metafile was introduced.
*/
#define TINA_VERSION 1
typedef struct st_tina_share {
char *table_name;
char data_file_name[FN_REFLEN];
uint table_name_length, use_count;
/*
Below flag is needed to make log tables work with concurrent insert.
For more details see comment to ha_tina::update_status.
*/
my_bool is_log_table;
/*
Here we save the length of the file for readers. This is updated by
inserts, updates and deletes. The var is initialized along with the
share initialization.
*/
my_off_t saved_data_file_length;
mysql_mutex_t mutex;
THR_LOCK lock;
bool update_file_opened;
bool tina_write_opened;
File meta_file; /* Meta file we use */
File tina_write_filedes; /* File handler for readers */
bool crashed; /* Meta file is crashed */
ha_rows rows_recorded; /* Number of rows in tables */
uint data_file_version; /* Version of the data file used */
} TINA_SHARE;
struct tina_set {
my_off_t begin;
my_off_t end;
};
class ha_tina final : public handler
{
THR_LOCK_DATA lock; /* MySQL lock */
TINA_SHARE *share; /* Shared lock info */
my_off_t current_position; /* Current position in the file during a file scan */
my_off_t next_position; /* Next position in the file scan */
my_off_t local_saved_data_file_length; /* save position for reads */
my_off_t temp_file_length;
uchar byte_buffer[IO_SIZE];
Transparent_file *file_buff;
File data_file; /* File handler for readers */
File update_temp_file;
String buffer;
/*
The chain contains "holes" in the file, occurred because of
deletes/updates. It is used in rnd_end() to get rid of them
in the end of the query.
*/
tina_set chain_buffer[DEFAULT_CHAIN_LENGTH];
tina_set *chain;
tina_set *chain_ptr;
uchar chain_alloced;
uint32 chain_size;
uint local_data_file_version; /* Saved version of the data file used */
bool records_is_known, found_end_of_file;
MEM_ROOT blobroot;
private:
int curr_lock_type;
bool get_write_pos(my_off_t *end_pos, tina_set *closest_hole);
int open_update_temp_file_if_needed();
int init_tina_writer();
int init_data_file();
public:
ha_tina(handlerton *hton, TABLE_SHARE *table_arg);
~ha_tina()
{
if (chain_alloced)
my_free(chain);
if (file_buff)
delete file_buff;
free_root(&blobroot, MYF(0));
}
const char *index_type(uint inx) { return "NONE"; }
ulonglong table_flags() const
{
return (HA_NO_TRANSACTIONS | HA_REC_NOT_IN_SEQ | HA_NO_AUTO_INCREMENT |
HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE | HA_CAN_EXPORT |
HA_CAN_REPAIR | HA_SLOW_RND_POS);
}
ulong index_flags(uint idx, uint part, bool all_parts) const
{
/*
We will never have indexes so this will never be called(AKA we return
zero)
*/
return 0;
}
uint max_record_length() const { return HA_MAX_REC_LENGTH; }
uint max_keys() const { return 0; }
uint max_key_parts() const { return 0; }
uint max_key_length() const { return 0; }
/*
Called in test_quick_select to determine if indexes should be used.
*/
virtual IO_AND_CPU_COST scan_time()
{
return
{ (double) ((share->saved_data_file_length + IO_SIZE-1))/ IO_SIZE,
(stats.records+stats.deleted) * ROW_NEXT_FIND_COST };
}
/* The next method will never be called */
virtual bool fast_key_read() { return 1;}
/*
TODO: return actual upper bound of number of records in the table.
(e.g. save number of records seen on full table scan and/or use file size
as upper bound)
*/
ha_rows estimate_rows_upper_bound() { return HA_POS_ERROR; }
int open(const char *name, int mode, uint open_options);
int close(void);
int write_row(const uchar * buf);
int update_row(const uchar * old_data, const uchar * new_data);
int delete_row(const uchar * buf);
int rnd_init(bool scan=1);
int rnd_next(uchar *buf);
int rnd_pos(uchar * buf, uchar *pos);
bool check_and_repair(THD *thd);
int check(THD* thd, HA_CHECK_OPT* check_opt);
bool is_crashed() const;
int rnd_end();
int repair(THD* thd, HA_CHECK_OPT* check_opt);
/* This is required for SQL layer to know that we support autorepair */
bool auto_repair(int error) const
{ return error == HA_ERR_CRASHED_ON_USAGE; }
bool auto_repair() const { return 1; }
void position(const uchar *record);
int info(uint);
int reset();
int extra(enum ha_extra_function operation);
int delete_all_rows(void);
int create(const char *name, TABLE *form, HA_CREATE_INFO *create_info);
bool check_if_incompatible_data(HA_CREATE_INFO *info,
uint table_changes);
int external_lock(THD *thd, int lock_type);
THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to,
enum thr_lock_type lock_type);
/*
These functions used to get/update status of the handler.
Needed to enable concurrent inserts.
*/
void get_status();
void update_status();
/* The following methods were added just for TINA */
int encode_quote(const uchar *buf);
int find_current_row(uchar *buf);
int chain_append();
};