mariadb/sql/opt_trace_ddl_info.cc
Sergei Petrunia 9438e15ce5 MDEV-36511: Dump basic stats of a table to trace: REVIEW INPUT
Make opt_range.cc use fool-proof APIs for recording.
2025-08-19 13:22:08 +03:00

455 lines
13 KiB
C++

/*
Copyright (c) 2025, MariaDB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335
USA */
#include "opt_trace_ddl_info.h"
#include "sql_show.h"
#include "my_json_writer.h"
#include "sql_table.h"
#include "mysql.h"
#include "hash.h"
#include "sql_select.h"
#include "sql_explain.h"
/**
@file
@brief
Stores the tables, and views context (i.e. ddls, and basic stats)
that are used in either SELECT, INSERT, DELETE, and UPDATE queries,
into the optimizer trace. All the contexts are stored in
one place as a JSON array object with name "list_contexts".
Additionally, database name is also included in the trace.
The json structure looks like: -
{
"current_database": "db_name",
"list_contexts": [
{
"name": "table_name",
"ddl": "create table/view definition",
"num_of_records": n,
"indexes": [
{
"index_name": "index_name1",
"rec_per_key": ["n1", "n2", ...]
}, ...,
],
"list_ranges": [
{
"index_name": "index_name1",
"ranges": ["(NULL) < (key1) < (foo)", ...],
"num_rows": n
}, ...
]
}, ...
]
}
*/
static void store_full_table_name(THD *thd, TABLE_LIST *tbl, String *buf);
struct DDL_Key
{
char *name; //full name of the table or view
size_t name_len;
};
/*
helper function to know the key portion of the record
that is stored in hash.
*/
static const uchar *get_rec_key(const void *entry_, size_t *length,
my_bool flags)
{
auto entry= static_cast<const DDL_Key *>(entry_);
*length= entry->name_len;
return reinterpret_cast<const uchar *>(entry->name);
}
/*
@brief
Check whether a table is a regular base table (for which we should
dump the ddl) or not.
@detail
Besides base tables, the query may have:
- Table functions (Currently it's only JSON_TABLE)
- INFORMATION_SCHEMA tables
- Tables in PERFORMANCE_SCHEMA and mysql database
- Internal temporary ("work") tables
*/
static bool is_base_table(TABLE_LIST *tbl)
{
return
(tbl->table &&
tbl->table->s &&
!tbl->table_function &&
!tbl->schema_table &&
get_table_category(tbl->get_db_name(), tbl->get_table_name()) ==
TABLE_CATEGORY_USER &&
tbl->table->s->tmp_table != INTERNAL_TMP_TABLE &&
tbl->table->s->tmp_table != SYSTEM_TMP_TABLE);
}
static bool dump_name_ddl_to_trace(THD *thd, DDL_Key *ddl_key, String *stmt,
Json_writer_object &ctx_wrapper)
{
ctx_wrapper.add("name", ddl_key->name);
size_t non_esc_stmt_len= stmt->length();
/*
making escape_stmt size to be 4 times the non_esc_stmt
4 is chosen as a worst case although 3 should suffice.
"'" would be escaped to \"\'\"
*/
size_t len_multiplier= sizeof(uint32_t);
size_t escape_stmt_len= len_multiplier * non_esc_stmt_len;
char *escaped_stmt= (char *) thd->alloc(escape_stmt_len + 1);
if (!escaped_stmt)
return true;
int act_escape_stmt_len=
json_escape_string(stmt->c_ptr(), stmt->c_ptr() + non_esc_stmt_len,
escaped_stmt, escaped_stmt + escape_stmt_len);
if (act_escape_stmt_len < 0)
return true;
escaped_stmt[act_escape_stmt_len]= 0;
ctx_wrapper.add("ddl", escaped_stmt);
return false;
}
static void dump_index_range_stats_to_trace(THD *thd, uchar *tbl_name,
size_t tbl_name_len)
{
if (!thd->stats_ctx_recorder || !thd->stats_ctx_recorder->has_records())
return;
trace_table_index_range_context *context=
thd->stats_ctx_recorder->search(tbl_name, tbl_name_len);
if (!context)
return;
Json_writer_array list_ranges_wrapper(thd, "list_ranges");
List_iterator irc_li(context->index_list);
while (Multi_range_read_const_call_record *irc= irc_li++)
{
Json_writer_object irc_wrapper(thd);
irc_wrapper.add("index_name", irc->idx_name);
List_iterator rc_li(irc->range_list);
Json_writer_array ranges_wrapper(thd, "ranges");
while (Range_record *rc= rc_li++)
{
ranges_wrapper.add(rc->range, strlen(rc->range));
}
ranges_wrapper.end();
irc_wrapper.add("num_rows", irc->num_records);
}
}
/*
dump the following table stats to trace: -
1. total number of records in the table
2. if there any indexes for the table then
their names, and the num of records per key
3. range stats on the indexes
*/
static void dump_table_stats_to_trace(THD *thd, TABLE_LIST *tbl,
uchar *tbl_name,
size_t tbl_name_len,
Json_writer_object &ctx_wrapper)
{
TABLE *table= tbl->table;
ctx_wrapper.add("num_of_records", tbl->table->stat_records());
if (!table->key_info)
return;
Json_writer_array indexes_wrapper(thd, "indexes");
for (uint idx= 0; idx < table->s->keys; idx++)
{
KEY key= table->key_info[idx];
uint num_key_parts= key.user_defined_key_parts;
Json_writer_object index_wrapper(thd);
index_wrapper.add("index_name", key.name);
Json_writer_array rpk_wrapper(thd, "rec_per_key");
for (uint i= 0; i < num_key_parts; i++)
{
rpk_wrapper.add(key.actual_rec_per_key(i));
}
}
indexes_wrapper.end();
dump_index_range_stats_to_trace(thd, tbl_name, tbl_name_len);
}
static void create_view_def(THD *thd, TABLE_LIST *table, String *name,
String *buf)
{
buf->append(STRING_WITH_LEN("CREATE "));
view_store_options(thd, table, buf);
buf->append(STRING_WITH_LEN("VIEW "));
buf->append(*name);
buf->append(STRING_WITH_LEN(" AS "));
buf->append(table->select_stmt.str, table->select_stmt.length);
}
/*
@brief
Dump definitions, basic stats of all tables and views used by the
statement into the optimizer trace.
The goal is to eventually save everything that is needed to
reproduce the query execution
@detail
Stores the ddls, stats of the tables, and views that are used
in either SELECT, INSERT, DELETE, and UPDATE queries,
into the optimizer trace.
Global query_tables are read in reverse order from the thd->lex,
and a record with table_name, and ddl of the table are created.
Hash is used to store the records, where in no duplicates
are stored. db_name.table_name is used as a key to discard any
duplicates. If a new record that is created is not in the hash,
then that is dumped into the trace.
@return
false when no error occurred during the computation
*/
bool store_tables_context_in_trace(THD *thd)
{
LEX *lex= thd->lex;
if (!(thd->variables.optimizer_trace &&
thd->variables.optimizer_record_context &&
(lex->sql_command == SQLCOM_SELECT ||
lex->sql_command == SQLCOM_INSERT_SELECT ||
lex->sql_command == SQLCOM_DELETE ||
lex->sql_command == SQLCOM_UPDATE ||
lex->sql_command == SQLCOM_DELETE_MULTI ||
lex->sql_command == SQLCOM_UPDATE_MULTI)))
return false;
if (lex->query_tables == *(lex->query_tables_last))
return false;
Json_writer_object main_wrapper(thd);
main_wrapper.add("current_database", thd->get_db());
Json_writer_array context_list(thd, "list_contexts");
HASH hash;
List<TABLE_LIST> tables_list;
/*
lex->query_tables lists the VIEWs before their underlying tables.
Create a list in the reverse order.
*/
for (TABLE_LIST *tbl= lex->query_tables; tbl; tbl= tbl->next_global)
{
if (!tbl->is_view() && !is_base_table(tbl))
continue;
if (tables_list.push_front(tbl))
return true;
}
if (tables_list.is_empty())
return false;
List_iterator li(tables_list);
my_hash_init(key_memory_trace_ddl_info, &hash, system_charset_info, 16, 0, 0,
get_rec_key, NULL, HASH_UNIQUE);
bool res= false;
for (TABLE_LIST *tbl= li++; tbl; li.remove(), tbl= li++)
{
String ddl;
String name;
DDL_Key *ddl_key;
store_full_table_name(thd, tbl, &name);
/*
A query can use the same table multiple times. Do not dump the DDL
multiple times.
*/
if (my_hash_search(&hash, (uchar *) name.c_ptr(), name.length()))
continue;
if (!(ddl_key= (DDL_Key *) thd->alloc(sizeof(DDL_Key))))
{
res= true;
break;
}
if (tbl->is_view())
create_view_def(thd, tbl, &name, &ddl);
else
{
if (show_create_table(thd, tbl, &ddl, NULL, WITH_DB_NAME))
{
res= true;
break;
}
}
ddl_key->name= strdup_root(thd->mem_root, name.c_ptr());
ddl_key->name_len= name.length();
my_hash_insert(&hash, (uchar *) ddl_key);
Json_writer_object ctx_wrapper(thd);
if (dump_name_ddl_to_trace(thd, ddl_key, &ddl, ctx_wrapper))
{
res= true;
break;
}
if (!tbl->is_view())
{
dump_table_stats_to_trace(thd, tbl, (uchar *) ddl_key->name,
ddl_key->name_len, ctx_wrapper);
}
}
my_hash_free(&hash);
return res;
}
Optimizer_Stats_Context_Recorder::Optimizer_Stats_Context_Recorder()
{
my_hash_init(key_memory_trace_ddl_info, &tbl_trace_ctx_hash,
system_charset_info, 16, 0, 0,
&Optimizer_Stats_Context_Recorder::get_tbl_trace_ctx_key, 0,
HASH_UNIQUE);
}
Optimizer_Stats_Context_Recorder::~Optimizer_Stats_Context_Recorder()
{
my_hash_free(&tbl_trace_ctx_hash);
}
bool Optimizer_Stats_Context_Recorder::has_records()
{
return tbl_trace_ctx_hash.records > 0;
}
trace_table_index_range_context *
Optimizer_Stats_Context_Recorder::search(uchar *tbl_name, size_t tbl_name_len)
{
return (trace_table_index_range_context *) my_hash_search(
&tbl_trace_ctx_hash, tbl_name, tbl_name_len);
}
/*
@detail
Do not use thd->mem_root, allocate memory only on the passed mem_root.
*/
void Range_list_recorder::add_range(MEM_ROOT *mem_root, const char *range)
{
Range_record *trc= new (mem_root) Range_record;
trc->range= strdup_root(mem_root, range);
((Multi_range_read_const_call_record*)this)->range_list.push_back(trc, mem_root);
}
/*
@brief
Start recording a range list for tbl.index_name
@return
Pointer one can use to add ranges.
*/
Range_list_recorder*
Optimizer_Stats_Context_Recorder::start_range_list_record(
THD *thd, MEM_ROOT *mem_root,
TABLE_LIST *tbl, size_t found_records, const char *index_name)
{
String tbl_name;
Multi_range_read_const_call_record *index_ctx=
new (mem_root) Multi_range_read_const_call_record;
/*
Create a new table context if it is not already present in the
hash.
Store the ranges of every index of the table into the
table context.
The table context is also persisted in the hash which is to be
used later for dumping all the context infomation into the trace.
*/
store_full_table_name(thd, tbl, &tbl_name);
trace_table_index_range_context *table_ctx=
search((uchar *) tbl_name.c_ptr(), tbl_name.length());
if (!table_ctx)
{
table_ctx= new (mem_root) trace_table_index_range_context;
table_ctx->name= strdup_root(mem_root, tbl_name.c_ptr());
table_ctx->name_len= tbl_name.length();
table_ctx->index_list.empty();
my_hash_insert(&tbl_trace_ctx_hash, (uchar *) table_ctx);
}
index_ctx->idx_name= strdup_root(mem_root, index_name);
index_ctx->num_records= found_records;
table_ctx->index_list.push_back(index_ctx, mem_root);
return index_ctx;
}
/*
helper function to know the key portion of the
trace table context that is stored in hash.
*/
const uchar *Optimizer_Stats_Context_Recorder::get_tbl_trace_ctx_key(
const void *entry_, size_t *length, my_bool flags)
{
auto entry= static_cast<const trace_table_index_range_context *>(entry_);
*length= entry->name_len;
return reinterpret_cast<const uchar *>(entry->name);
}
/*
store full table name i.e. "db_name.table_name",
into the supplied variable buf
TODO: why does this need THD???
*/
static void store_full_table_name(THD *thd, TABLE_LIST *tbl, String *buf)
{
buf->append(tbl->get_db_name().str, tbl->get_db_name().length);
buf->append(STRING_WITH_LEN("."));
buf->append(tbl->get_table_name().str, tbl->get_table_name().length);
}
/*
return a new c style string by allocating memory
from the heap and copying the contents of buf into it.
uses thd's mem_root for allocating the memory
*/
char *create_new_copy(THD *thd, const char *buf)
{
char *name_copy= (char *) thd->calloc(strlen(buf) + 1);
strcpy(name_copy, buf);
return name_copy;
}
Optimizer_Stats_Context_Recorder * get_current_stats_recorder(THD *thd)
{
if (thd->variables.optimizer_record_context &&
!thd->lex->explain->is_query_plan_ready())
{
if (!thd->stats_ctx_recorder)
thd->stats_ctx_recorder= new Optimizer_Stats_Context_Recorder();
return thd->stats_ctx_recorder;
}
return nullptr;
}