mirror of
				https://github.com/MariaDB/server.git
				synced 2025-11-04 12:56:14 +01:00 
			
		
		
		
	For InnoDB tables, parse the CREATE TABLE statement to defer index and constraint creation until after data loading. For other storage engines, the DISABLE/ENABLE KEYS commands achieve similar optimization. This behavior is controlled by a new option, innodb-optimize-keys (default: ON), compatible with mydumper. Additionally, this commit separates the table creation phase from data loading. Running DDL statements (such as DROP IF EXISTS) in a single thread avoids the "table not locked" issue from MDEV-34741. As a bonus, view creation no longer requires a separate step.
		
			
				
	
	
		
			217 lines
		
	
	
	
		
			6.4 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			217 lines
		
	
	
	
		
			6.4 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
/*
 | 
						|
   Copyright (c) 2024, MariaDB
 | 
						|
 | 
						|
   This program is free software; you can redistribute it and/or modify
 | 
						|
   it under the terms of the GNU General Public License as published by
 | 
						|
   the Free Software Foundation; version 2 of the License.
 | 
						|
 | 
						|
   This program is distributed in the hope that it will be useful,
 | 
						|
   but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
   GNU General Public License for more details.
 | 
						|
 | 
						|
   You should have received a copy of the GNU General Public License
 | 
						|
   along with this program; if not, write to the Free Software
 | 
						|
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA
 | 
						|
*/
 | 
						|
 | 
						|
/*
 | 
						|
  This file contains some routines to do client-side parsing of CREATE TABLE
 | 
						|
  statements. The goal is to extract the primary key, constraints, and
 | 
						|
  secondary key. his is useful for optimizing the import process, to delay
 | 
						|
  secondary index creation until after the data has been loaded.
 | 
						|
*/
 | 
						|
 | 
						|
#include <string>
 | 
						|
#include <vector>
 | 
						|
#include <pcre2posix.h>
 | 
						|
 | 
						|
#include "import_util.h"
 | 
						|
#include <assert.h>
 | 
						|
 | 
						|
/**
 | 
						|
 * Extract the first CREATE TABLE statement from a script.
 | 
						|
 *
 | 
						|
 * @param script The input script containing SQL statements.
 | 
						|
 * @return std::string The first CREATE TABLE statement found, or an empty
 | 
						|
 * string if not found.
 | 
						|
 */
 | 
						|
std::string extract_first_create_table(const std::string &script)
 | 
						|
{
 | 
						|
  regex_t create_table_regex;
 | 
						|
  regmatch_t match[2];
 | 
						|
  const char *pattern= "(CREATE\\s+TABLE\\s+[^;]+;)\\s*\\n";
 | 
						|
  regcomp(&create_table_regex, pattern, REG_EXTENDED);
 | 
						|
 | 
						|
  if (regexec(&create_table_regex, script.c_str(), 2, match, 0) == 0)
 | 
						|
  {
 | 
						|
    std::string result=
 | 
						|
        script.substr(match[1].rm_so, match[1].rm_eo - match[1].rm_so);
 | 
						|
    regfree(&create_table_regex);
 | 
						|
    return result;
 | 
						|
  }
 | 
						|
 | 
						|
  regfree(&create_table_regex);
 | 
						|
  return "";
 | 
						|
}
 | 
						|
 | 
						|
TableDDLInfo::TableDDLInfo(const std::string &create_table_stmt)
 | 
						|
{
 | 
						|
  regex_t primary_key_regex, constraint_regex, index_regex, engine_regex,
 | 
						|
      table_name_regex;
 | 
						|
  constexpr size_t MAX_MATCHES= 10;
 | 
						|
  regmatch_t match[10];
 | 
						|
 | 
						|
  regcomp(&primary_key_regex, "\\n\\s*(PRIMARY\\s+KEY\\s+(.*?)),?\\n",
 | 
						|
          REG_EXTENDED);
 | 
						|
  regcomp(&constraint_regex,
 | 
						|
          "\\n\\s*(CONSTRAINT\\s+(`?(?:[^`]|``)+`?)\\s+.*?),?\\n",
 | 
						|
          REG_EXTENDED);
 | 
						|
  regcomp(&index_regex,
 | 
						|
          "\\n\\s*(((?:UNIQUE|FULLTEXT|VECTOR|SPATIAL)\\s+)?(INDEX|KEY)\\s+(`(?:[^`]|``)+`)\\s+.*?),?\\n",
 | 
						|
          REG_EXTENDED);
 | 
						|
  regcomp(&engine_regex, "\\bENGINE\\s*=\\s*(\\w+)", REG_EXTENDED);
 | 
						|
  regcomp(&table_name_regex, "CREATE\\s+TABLE\\s+(`?(?:[^`]|``)+`?)\\s*\\(",
 | 
						|
          REG_EXTENDED);
 | 
						|
 | 
						|
  const char *stmt= create_table_stmt.c_str();
 | 
						|
  const char *search_start= stmt;
 | 
						|
 | 
						|
  // Extract primary key
 | 
						|
  if (regexec(&primary_key_regex, search_start, MAX_MATCHES, match, 0) == 0)
 | 
						|
  {
 | 
						|
    primary_key= {std::string(stmt + match[1].rm_so,  match[1].rm_eo - match[1].rm_so),
 | 
						|
        "PRIMARY"};
 | 
						|
  }
 | 
						|
 | 
						|
  // Extract constraints and foreign keys
 | 
						|
  search_start= stmt;
 | 
						|
  while (regexec(&constraint_regex, search_start, MAX_MATCHES, match, 0) == 0)
 | 
						|
  {
 | 
						|
    assert(match[2].rm_so != -1);
 | 
						|
    assert(match[1].rm_so != -1);
 | 
						|
    std::string name(search_start + match[2].rm_so, match[2].rm_eo - match[2].rm_so);
 | 
						|
    std::string definition(search_start + match[1].rm_so, match[1].rm_eo - match[1].rm_so);
 | 
						|
    constraints.push_back({definition, name});
 | 
						|
    search_start+= match[0].rm_eo - 1;
 | 
						|
  }
 | 
						|
 | 
						|
  // Extract secondary indexes
 | 
						|
  search_start= stmt;
 | 
						|
  while (regexec(&index_regex, search_start, MAX_MATCHES, match, 0) == 0)
 | 
						|
  {
 | 
						|
    assert(match[4].rm_so != -1);
 | 
						|
    std::string name(search_start + match[4].rm_so, match[4].rm_eo - match[4].rm_so);
 | 
						|
    std::string definition(search_start + match[1].rm_so, match[1].rm_eo - match[1].rm_so);
 | 
						|
    secondary_indexes.push_back({definition, name});
 | 
						|
    search_start+= match[0].rm_eo -1;
 | 
						|
  }
 | 
						|
 | 
						|
  // Extract storage engine
 | 
						|
  if (regexec(&engine_regex, stmt, MAX_MATCHES, match, 0) == 0)
 | 
						|
  {
 | 
						|
    storage_engine= std::string(stmt + match[1].rm_so,  match[1].rm_eo - match[1].rm_so);
 | 
						|
  }
 | 
						|
 | 
						|
  // Extract table name
 | 
						|
  if (regexec(&table_name_regex, stmt, MAX_MATCHES, match, 0) == 0)
 | 
						|
  {
 | 
						|
    table_name= std::string(stmt + match[1].rm_so,  match[1].rm_eo - match[1].rm_so);
 | 
						|
  }
 | 
						|
  if (primary_key.definition.empty() && storage_engine == "InnoDB")
 | 
						|
  {
 | 
						|
    for (const auto &index : secondary_indexes)
 | 
						|
    {
 | 
						|
      if (index.definition.find("UNIQUE") != std::string::npos)
 | 
						|
      {
 | 
						|
        non_pk_clustering_key_name= index.name;
 | 
						|
        break;
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
  regfree(&primary_key_regex);
 | 
						|
  regfree(&constraint_regex);
 | 
						|
  regfree(&index_regex);
 | 
						|
  regfree(&engine_regex);
 | 
						|
  regfree(&table_name_regex);
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 Convert a KeyOrConstraintDefinitionType enum value to its
 | 
						|
 corresponding string representation.
 | 
						|
 | 
						|
 @param type The KeyOrConstraintDefinitionType enum value.
 | 
						|
 @return std::string The string representation of the
 | 
						|
  KeyOrConstraintDefinitionType.
 | 
						|
*/
 | 
						|
static std::string to_string(KeyOrConstraintType type)
 | 
						|
{
 | 
						|
  switch (type)
 | 
						|
  {
 | 
						|
  case KeyOrConstraintType::CONSTRAINT:
 | 
						|
    return "CONSTRAINT";
 | 
						|
  case KeyOrConstraintType::INDEX:
 | 
						|
    return "INDEX";
 | 
						|
  default:
 | 
						|
    return "UNKNOWN";
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
std::string TableDDLInfo::generate_alter_add(
 | 
						|
    const std::vector<KeyDefinition> &definitions,
 | 
						|
    KeyOrConstraintType type) const
 | 
						|
{
 | 
						|
  if (definitions.empty() ||
 | 
						|
      (type == KeyOrConstraintType::INDEX && definitions.size() == 1
 | 
						|
      && !non_pk_clustering_key_name.empty()))
 | 
						|
  {
 | 
						|
    return "";
 | 
						|
  }
 | 
						|
 | 
						|
  std::string sql= "ALTER TABLE " + table_name + " ";
 | 
						|
  bool need_comma= false;
 | 
						|
  for (const auto &definition : definitions)
 | 
						|
  {
 | 
						|
    /*
 | 
						|
      Do not add or drop clustering secondary index
 | 
						|
    */
 | 
						|
    if (type == KeyOrConstraintType::INDEX &&
 | 
						|
        definition.name == non_pk_clustering_key_name)
 | 
						|
      continue;
 | 
						|
 | 
						|
    if (need_comma)
 | 
						|
      sql+= ", ";
 | 
						|
    else
 | 
						|
      need_comma= true;
 | 
						|
    sql+= "ADD " + definition.definition;
 | 
						|
  }
 | 
						|
  return sql;
 | 
						|
}
 | 
						|
 | 
						|
std::string TableDDLInfo::generate_alter_drop(
 | 
						|
    const std::vector<KeyDefinition> &definitions, KeyOrConstraintType type) const
 | 
						|
{
 | 
						|
  if (definitions.empty() ||
 | 
						|
      (type == KeyOrConstraintType::INDEX && definitions.size() == 1 &&
 | 
						|
       !non_pk_clustering_key_name.empty()))
 | 
						|
  {
 | 
						|
    return "";
 | 
						|
  }
 | 
						|
 | 
						|
  std::string sql= "ALTER TABLE " + table_name + " ";
 | 
						|
  bool need_comma= false;
 | 
						|
  for (const auto &definition : definitions)
 | 
						|
  {
 | 
						|
    if (type == KeyOrConstraintType::INDEX &&
 | 
						|
        definition.name == non_pk_clustering_key_name)
 | 
						|
      continue;
 | 
						|
 | 
						|
    if (need_comma)
 | 
						|
      sql+= ", ";
 | 
						|
    else
 | 
						|
      need_comma= true;
 | 
						|
    sql+= "DROP " + to_string(type) + " " +
 | 
						|
          definition.name;
 | 
						|
  }
 | 
						|
  return sql;
 | 
						|
}
 |