mirror of
https://github.com/MariaDB/server.git
synced 2025-02-13 08:55:35 +01:00
3bd23b76c5
For InnoDB tables, parse the CREATE TABLE statement to defer index and constraint creation until after data loading. For other storage engines, the DISABLE/ENABLE KEYS commands achieve similar optimization. This behavior is controlled by a new option, innodb-optimize-keys (default: ON), compatible with mydumper. Additionally, this commit separates the table creation phase from data loading. Running DDL statements (such as DROP IF EXISTS) in a single thread avoids the "table not locked" issue from MDEV-34741. As a bonus, view creation no longer requires a separate step.
217 lines
6.4 KiB
C++
217 lines
6.4 KiB
C++
/*
|
|
Copyright (c) 2024, MariaDB
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA
|
|
*/
|
|
|
|
/*
|
|
This file contains some routines to do client-side parsing of CREATE TABLE
|
|
statements. The goal is to extract the primary key, constraints, and
|
|
secondary key. his is useful for optimizing the import process, to delay
|
|
secondary index creation until after the data has been loaded.
|
|
*/
|
|
|
|
#include <string>
|
|
#include <vector>
|
|
#include <pcre2posix.h>
|
|
|
|
#include "import_util.h"
|
|
#include <assert.h>
|
|
|
|
/**
|
|
* Extract the first CREATE TABLE statement from a script.
|
|
*
|
|
* @param script The input script containing SQL statements.
|
|
* @return std::string The first CREATE TABLE statement found, or an empty
|
|
* string if not found.
|
|
*/
|
|
std::string extract_first_create_table(const std::string &script)
|
|
{
|
|
regex_t create_table_regex;
|
|
regmatch_t match[2];
|
|
const char *pattern= "(CREATE\\s+TABLE\\s+[^;]+;)\\s*\\n";
|
|
regcomp(&create_table_regex, pattern, REG_EXTENDED);
|
|
|
|
if (regexec(&create_table_regex, script.c_str(), 2, match, 0) == 0)
|
|
{
|
|
std::string result=
|
|
script.substr(match[1].rm_so, match[1].rm_eo - match[1].rm_so);
|
|
regfree(&create_table_regex);
|
|
return result;
|
|
}
|
|
|
|
regfree(&create_table_regex);
|
|
return "";
|
|
}
|
|
|
|
TableDDLInfo::TableDDLInfo(const std::string &create_table_stmt)
|
|
{
|
|
regex_t primary_key_regex, constraint_regex, index_regex, engine_regex,
|
|
table_name_regex;
|
|
constexpr size_t MAX_MATCHES= 10;
|
|
regmatch_t match[10];
|
|
|
|
regcomp(&primary_key_regex, "\\n\\s*(PRIMARY\\s+KEY\\s+(.*?)),?\\n",
|
|
REG_EXTENDED);
|
|
regcomp(&constraint_regex,
|
|
"\\n\\s*(CONSTRAINT\\s+(`?(?:[^`]|``)+`?)\\s+.*?),?\\n",
|
|
REG_EXTENDED);
|
|
regcomp(&index_regex,
|
|
"\\n\\s*(((?:UNIQUE|FULLTEXT|VECTOR|SPATIAL)\\s+)?(INDEX|KEY)\\s+(`(?:[^`]|``)+`)\\s+.*?),?\\n",
|
|
REG_EXTENDED);
|
|
regcomp(&engine_regex, "\\bENGINE\\s*=\\s*(\\w+)", REG_EXTENDED);
|
|
regcomp(&table_name_regex, "CREATE\\s+TABLE\\s+(`?(?:[^`]|``)+`?)\\s*\\(",
|
|
REG_EXTENDED);
|
|
|
|
const char *stmt= create_table_stmt.c_str();
|
|
const char *search_start= stmt;
|
|
|
|
// Extract primary key
|
|
if (regexec(&primary_key_regex, search_start, MAX_MATCHES, match, 0) == 0)
|
|
{
|
|
primary_key= {std::string(stmt + match[1].rm_so, match[1].rm_eo - match[1].rm_so),
|
|
"PRIMARY"};
|
|
}
|
|
|
|
// Extract constraints and foreign keys
|
|
search_start= stmt;
|
|
while (regexec(&constraint_regex, search_start, MAX_MATCHES, match, 0) == 0)
|
|
{
|
|
assert(match[2].rm_so != -1);
|
|
assert(match[1].rm_so != -1);
|
|
std::string name(search_start + match[2].rm_so, match[2].rm_eo - match[2].rm_so);
|
|
std::string definition(search_start + match[1].rm_so, match[1].rm_eo - match[1].rm_so);
|
|
constraints.push_back({definition, name});
|
|
search_start+= match[0].rm_eo - 1;
|
|
}
|
|
|
|
// Extract secondary indexes
|
|
search_start= stmt;
|
|
while (regexec(&index_regex, search_start, MAX_MATCHES, match, 0) == 0)
|
|
{
|
|
assert(match[4].rm_so != -1);
|
|
std::string name(search_start + match[4].rm_so, match[4].rm_eo - match[4].rm_so);
|
|
std::string definition(search_start + match[1].rm_so, match[1].rm_eo - match[1].rm_so);
|
|
secondary_indexes.push_back({definition, name});
|
|
search_start+= match[0].rm_eo -1;
|
|
}
|
|
|
|
// Extract storage engine
|
|
if (regexec(&engine_regex, stmt, MAX_MATCHES, match, 0) == 0)
|
|
{
|
|
storage_engine= std::string(stmt + match[1].rm_so, match[1].rm_eo - match[1].rm_so);
|
|
}
|
|
|
|
// Extract table name
|
|
if (regexec(&table_name_regex, stmt, MAX_MATCHES, match, 0) == 0)
|
|
{
|
|
table_name= std::string(stmt + match[1].rm_so, match[1].rm_eo - match[1].rm_so);
|
|
}
|
|
if (primary_key.definition.empty() && storage_engine == "InnoDB")
|
|
{
|
|
for (const auto &index : secondary_indexes)
|
|
{
|
|
if (index.definition.find("UNIQUE") != std::string::npos)
|
|
{
|
|
non_pk_clustering_key_name= index.name;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
regfree(&primary_key_regex);
|
|
regfree(&constraint_regex);
|
|
regfree(&index_regex);
|
|
regfree(&engine_regex);
|
|
regfree(&table_name_regex);
|
|
}
|
|
|
|
/**
|
|
Convert a KeyOrConstraintDefinitionType enum value to its
|
|
corresponding string representation.
|
|
|
|
@param type The KeyOrConstraintDefinitionType enum value.
|
|
@return std::string The string representation of the
|
|
KeyOrConstraintDefinitionType.
|
|
*/
|
|
static std::string to_string(KeyOrConstraintType type)
|
|
{
|
|
switch (type)
|
|
{
|
|
case KeyOrConstraintType::CONSTRAINT:
|
|
return "CONSTRAINT";
|
|
case KeyOrConstraintType::INDEX:
|
|
return "INDEX";
|
|
default:
|
|
return "UNKNOWN";
|
|
}
|
|
}
|
|
|
|
std::string TableDDLInfo::generate_alter_add(
|
|
const std::vector<KeyDefinition> &definitions,
|
|
KeyOrConstraintType type) const
|
|
{
|
|
if (definitions.empty() ||
|
|
(type == KeyOrConstraintType::INDEX && definitions.size() == 1
|
|
&& !non_pk_clustering_key_name.empty()))
|
|
{
|
|
return "";
|
|
}
|
|
|
|
std::string sql= "ALTER TABLE " + table_name + " ";
|
|
bool need_comma= false;
|
|
for (const auto &definition : definitions)
|
|
{
|
|
/*
|
|
Do not add or drop clustering secondary index
|
|
*/
|
|
if (type == KeyOrConstraintType::INDEX &&
|
|
definition.name == non_pk_clustering_key_name)
|
|
continue;
|
|
|
|
if (need_comma)
|
|
sql+= ", ";
|
|
else
|
|
need_comma= true;
|
|
sql+= "ADD " + definition.definition;
|
|
}
|
|
return sql;
|
|
}
|
|
|
|
std::string TableDDLInfo::generate_alter_drop(
|
|
const std::vector<KeyDefinition> &definitions, KeyOrConstraintType type) const
|
|
{
|
|
if (definitions.empty() ||
|
|
(type == KeyOrConstraintType::INDEX && definitions.size() == 1 &&
|
|
!non_pk_clustering_key_name.empty()))
|
|
{
|
|
return "";
|
|
}
|
|
|
|
std::string sql= "ALTER TABLE " + table_name + " ";
|
|
bool need_comma= false;
|
|
for (const auto &definition : definitions)
|
|
{
|
|
if (type == KeyOrConstraintType::INDEX &&
|
|
definition.name == non_pk_clustering_key_name)
|
|
continue;
|
|
|
|
if (need_comma)
|
|
sql+= ", ";
|
|
else
|
|
need_comma= true;
|
|
sql+= "DROP " + to_string(type) + " " +
|
|
definition.name;
|
|
}
|
|
return sql;
|
|
}
|