mariadb/storage/maria/s3_func.c
Sergei Golubchik 2ca3861b55 MDEV-28106 S3 tries to include thread.h while compiling on Windows
S3 engine and libmarias3 don't compile on Windows

also fixes:
MDEV-28104 Typo in storage/maria/s3_func.c: 'FN_DECVHAR
2022-03-23 17:42:15 +01:00

1625 lines
46 KiB
C

/* Copyright (C) 2019 MariaDB Corporation Ab
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
/*
Interface function used by S3 storage engine and aria_copy_for_s3
*/
#include "maria_def.h"
#include "s3_func.h"
#include <aria_backup.h>
#include <mysqld_error.h>
#include <sql_const.h>
#include <mysys_err.h>
#include <mysql_com.h>
#include <zlib.h>
/* number of '.' to print during a copy in verbose mode */
#define DISPLAY_WITH 79
static void convert_index_to_s3_format(uchar *header, ulong block_size,
int compression);
static void convert_index_to_disk_format(uchar *header);
static void convert_frm_to_s3_format(uchar *header);
static void convert_frm_to_disk_format(uchar *header);
static int s3_read_file_from_disk(const char *filename, uchar **to,
size_t *to_size, my_bool print_error);
/* Used by ha_s3.cc and tools to define different protocol options */
static const char *protocol_types[]= {"Auto", "Original", "Amazon", NullS};
TYPELIB s3_protocol_typelib= {array_elements(protocol_types)-1,"",
protocol_types, NULL};
/******************************************************************************
Allocations handler for libmarias3
To be removed when we do the init allocation in mysqld.cc
******************************************************************************/
static void *s3_wrap_malloc(size_t size)
{
return my_malloc(PSI_NOT_INSTRUMENTED, size, MYF(MY_WME));
}
static void *s3_wrap_calloc(size_t nmemb, size_t size)
{
return my_malloc(PSI_NOT_INSTRUMENTED, nmemb * size,
MYF(MY_WME | MY_ZEROFILL));
}
static void *s3_wrap_realloc(void *ptr, size_t size)
{
return my_realloc(PSI_NOT_INSTRUMENTED, ptr, size,
MYF(MY_WME | MY_ALLOW_ZERO_PTR));
}
static char *s3_wrap_strdup(const char *str)
{
return my_strdup(PSI_NOT_INSTRUMENTED, str, MYF(MY_WME));
}
static void s3_wrap_free(void *ptr)
{
if (ptr) /* Avoid tracing of null */
my_free(ptr);
}
void s3_init_library()
{
ms3_library_init_malloc(s3_wrap_malloc, s3_wrap_free, s3_wrap_realloc,
s3_wrap_strdup, s3_wrap_calloc);
}
void s3_deinit_library()
{
ms3_library_deinit();
}
/******************************************************************************
Functions on S3_INFO and S3_BLOCK
******************************************************************************/
/*
Free memory allocated by s3_get_object
*/
void s3_free(S3_BLOCK *data)
{
my_free(data->alloc_ptr);
data->alloc_ptr= 0;
}
/*
Copy a S3_INFO structure
*/
S3_INFO *s3_info_copy(S3_INFO *old)
{
S3_INFO *to, tmp;
/* Copy lengths */
memcpy(&tmp, old, sizeof(tmp));
/* Allocate new buffers */
if (!my_multi_malloc(PSI_NOT_INSTRUMENTED, MY_WME, &to, sizeof(S3_INFO),
&tmp.access_key.str, old->access_key.length+1,
&tmp.secret_key.str, old->secret_key.length+1,
&tmp.region.str, old->region.length+1,
&tmp.bucket.str, old->bucket.length+1,
&tmp.database.str, old->database.length+1,
&tmp.table.str, old->table.length+1,
&tmp.base_table.str, old->base_table.length+1,
NullS))
return 0;
/* Copy lengths and new pointers to to */
memcpy(to, &tmp, sizeof(tmp));
/* Copy data */
strmov((char*) to->access_key.str, old->access_key.str);
strmov((char*) to->secret_key.str, old->secret_key.str);
strmov((char*) to->region.str, old->region.str);
strmov((char*) to->bucket.str, old->bucket.str);
/* Database may not be null terminated */
strmake((char*) to->database.str, old->database.str, old->database.length);
strmov((char*) to->table.str, old->table.str);
strmov((char*) to->base_table.str, old->base_table.str);
return to;
}
/**
Open a connection to s3
*/
ms3_st *s3_open_connection(S3_INFO *s3)
{
ms3_st *s3_client;
if (!(s3_client= ms3_init(s3->access_key.str,
s3->secret_key.str,
s3->region.str,
s3->host_name.str)))
{
my_printf_error(HA_ERR_NO_SUCH_TABLE,
"Can't open connection to S3, error: %d %s", MYF(0),
errno, ms3_error(errno));
my_errno= HA_ERR_NO_SUCH_TABLE;
}
if (s3->protocol_version)
ms3_set_option(s3_client, MS3_OPT_FORCE_PROTOCOL_VERSION,
&s3->protocol_version);
if (s3->port)
ms3_set_option(s3_client, MS3_OPT_PORT_NUMBER, &s3->port);
if (s3->use_http)
ms3_set_option(s3_client, MS3_OPT_USE_HTTP, NULL);
return s3_client;
}
/**
close a connection to s3
*/
void s3_deinit(ms3_st *s3_client)
{
DBUG_PUSH(""); /* Avoid tracing free calls */
ms3_deinit(s3_client);
DBUG_POP();
}
/******************************************************************************
High level functions to copy tables to and from S3
******************************************************************************/
/**
Create suffix for object name
@param to_end end of suffix (from previous call or 000000 at start)
The suffix is a 6 length '0' prefixed number. If the number
gets longer than 6, then it's extended to 7 and more digits.
*/
static void fix_suffix(char *to_end, ulong nr)
{
char buff[11];
uint length= (uint) (int10_to_str(nr, buff, 10) - buff);
set_if_smaller(length, 6);
strmov(to_end - length, buff);
}
/**
Copy file to 'aws_path' in blocks of block_size
@return 0 ok
@return 1 error. Error message is printed to stderr
Notes:
file is always closed before return
*/
static my_bool copy_from_file(ms3_st *s3_client, const char *aws_bucket,
char *aws_path,
File file, my_off_t start, my_off_t file_end,
uchar *block, size_t block_size,
my_bool compression, my_bool display)
{
my_off_t pos;
char *path_end= strend(aws_path);
ulong bnr;
my_bool print_done= 0;
size_t length;
for (pos= start, bnr=1 ; pos < file_end ; pos+= length, bnr++)
{
if ((length= my_pread(file, block, block_size, pos, MYF(MY_WME))) ==
MY_FILE_ERROR)
goto err;
if (length == 0)
{
my_error(EE_EOFERR, MYF(0), my_filename(file), my_errno);
goto err;
}
fix_suffix(path_end, bnr);
if (s3_put_object(s3_client, aws_bucket, aws_path, block, length,
compression))
goto err;
/* Write up to DISPLAY_WITH number of '.' during copy */
if (display &&
((pos + block_size) * DISPLAY_WITH / file_end) >
(pos * DISPLAY_WITH/file_end))
{
fputc('.', stdout); fflush(stdout);
print_done= 1;
}
}
if (print_done)
{
fputc('\n', stdout); fflush(stdout);
}
my_close(file, MYF(MY_WME));
return 0;
err:
my_close(file, MYF(MY_WME));
if (print_done)
{
fputc('\n', stdout); fflush(stdout);
}
return 1;
}
/**
Copy an Aria table to S3
@param s3_client connection to S3
@param aws_bucket Aws bucket
@param path Path for Aria table (can be temp table)
@param database database name
@param table_name table name
@param block_size Block size in s3. If 0 then use block size
and compression as specified in the .MAI file as
specified as part of open.
@param compression Compression algorithm (0 = none, 1 = zip)
If block size is 0 then use .MAI file.
@return 0 ok
@return 1 error
The table will be copied in S3 into the following locations:
frm file (for discovery):
aws_bucket/database/table/frm
First index block (contains description if the Aria file):
aws_bucket/database/table/aria
Rest of the index file:
aws_bucket/database/table/index/block_number
Data file:
aws_bucket/database/table/data/block_number
block_number is 6 digits decimal number, prefixed with 0
(Can be larger than 6 numbers, the prefix is just for nice output)
frm and base blocks are small (just the needed data).
index and blocks are of size 's3_block_size'
If compression is used, then original block size is s3_block_size
but the stored block will be the size of the compressed block.
*/
int aria_copy_to_s3(ms3_st *s3_client, const char *aws_bucket,
const char *path,
const char *database, const char *table_name,
ulong block_size, my_bool compression,
my_bool force, my_bool display, my_bool copy_frm)
{
ARIA_TABLE_CAPABILITIES cap;
char aws_path[FN_REFLEN+100];
char filename[FN_REFLEN];
char *aws_path_end, *end;
uchar *alloc_block= 0, *block;
ms3_status_st status;
File file= -1;
my_off_t file_size;
size_t frm_length;
int error;
my_bool frm_created= 0;
DBUG_ENTER("aria_copy_to_s3");
DBUG_PRINT("enter",("from: %s database: %s table: %s",
path, database, table_name));
aws_path_end= strxmov(aws_path, database, "/", table_name, NullS);
strmov(aws_path_end, "/aria");
if (!ms3_status(s3_client, aws_bucket, aws_path, &status))
{
if (!force)
{
my_printf_error(EE_CANTCREATEFILE, "File %s exists in s3", MYF(0),
aws_path);
DBUG_RETURN(EE_CANTCREATEFILE);
}
if ((error= aria_delete_from_s3(s3_client, aws_bucket, database,
table_name, display)))
DBUG_RETURN(error);
}
if (copy_frm)
{
/*
Copy frm file if it exists
We do this first to ensure that .frm always exists. This is needed to
ensure that discovery of the table will work.
*/
fn_format(filename, path, "", ".frm", MY_REPLACE_EXT);
if (!s3_read_file_from_disk(filename, &alloc_block, &frm_length,0))
{
if (display)
printf("Copying frm file %s\n", filename);
end= strmov(aws_path_end,"/frm");
convert_frm_to_s3_format(alloc_block);
/* Note that frm is not compressed! */
if (s3_put_object(s3_client, aws_bucket, aws_path, alloc_block, frm_length,
0))
goto err;
frm_created= 1;
my_free(alloc_block);
alloc_block= 0;
}
}
if (display)
printf("Copying aria table: %s.%s to s3\n", database, table_name);
/* Index file name */
fn_format(filename, path, "", ".MAI", MY_REPLACE_EXT);
if ((file= my_open(filename,
O_RDONLY | O_SHARE | O_NOFOLLOW | O_CLOEXEC,
MYF(MY_WME))) < 0)
DBUG_RETURN(1);
if ((error= aria_get_capabilities(file, &cap)))
{
fprintf(stderr, "Got error %d when reading Aria header from %s\n",
error, path);
goto err;
}
if (cap.transactional || cap.data_file_type != BLOCK_RECORD ||
cap.encrypted)
{
fprintf(stderr,
"Aria table %s doesn't match criteria to be copied to S3.\n"
"It should be non-transactional and should have row_format page\n",
path);
goto err;
}
/*
If block size is not specified, use the values specified as part of
create
*/
if (block_size == 0)
{
block_size= cap.s3_block_size;
compression= cap.compression;
}
/* Align S3_BLOCK size with table block size */
block_size= (block_size/cap.block_size)*cap.block_size;
/* Allocate block for data + flag for compress header */
if (!(alloc_block= (uchar*) my_malloc(PSI_NOT_INSTRUMENTED,
block_size+ALIGN_SIZE(1),
MYF(MY_WME))))
goto err;
/* Read/write data here, but with prefix space for compression flag */
block= alloc_block+ ALIGN_SIZE(1);
if (my_pread(file, block, cap.header_size, 0, MYF(MY_WME | MY_FNABP)))
goto err;
strmov(aws_path_end, "/aria");
if (display)
printf("Creating aria table information %s\n", aws_path);
convert_index_to_s3_format(block, block_size, compression);
/*
The first page is not compressed as we need it to know if the rest is
compressed
*/
if (s3_put_object(s3_client, aws_bucket, aws_path, block, cap.header_size,
0 /* no compression */ ))
goto err;
file_size= my_seek(file, 0L, MY_SEEK_END, MYF(0));
end= strmov(aws_path_end,"/index");
if (display)
printf("Copying index information %s\n", aws_path);
/* The 000000 will be update with block number by fix_suffix() */
end= strmov(end, "/000000");
error= copy_from_file(s3_client, aws_bucket, aws_path, file, cap.header_size,
file_size, block, block_size, compression, display);
file= -1;
if (error)
goto err;
/* Copy data file */
fn_format(filename, path, "", ".MAD", MY_REPLACE_EXT);
if ((file= my_open(filename,
O_RDONLY | O_SHARE | O_NOFOLLOW | O_CLOEXEC,
MYF(MY_WME))) < 0)
DBUG_RETURN(1);
file_size= my_seek(file, 0L, MY_SEEK_END, MYF(0));
end= strmov(aws_path_end, "/data");
if (display)
printf("Copying data information %s\n", aws_path);
/* The 000000 will be update with block number by fix_suffix() */
end= strmov(end, "/000000");
error= copy_from_file(s3_client, aws_bucket, aws_path, file, 0, file_size,
block, block_size, compression, display);
file= -1;
if (error)
goto err;
my_free(alloc_block);
DBUG_RETURN(0);
err:
if (frm_created)
{
end= strmov(aws_path_end,"/frm");
(void) s3_delete_object(s3_client, aws_bucket, aws_path, MYF(ME_NOTE));
}
if (file >= 0)
my_close(file, MYF(0));
my_free(alloc_block);
DBUG_RETURN(1);
}
/**
Copy file to 'aws_path' in blocks of block_size
@return 0 ok
@return 1 error. Error message is printed to stderr
Notes:
file is always closed before return
*/
static my_bool copy_to_file(ms3_st *s3_client, const char *aws_bucket,
char *aws_path, File file, my_off_t start,
my_off_t file_end, my_bool compression,
my_bool display)
{
my_off_t pos;
char *path_end= strend(aws_path);
size_t error;
ulong bnr;
my_bool print_done= 0;
S3_BLOCK block;
DBUG_ENTER("copy_to_file");
DBUG_PRINT("enter", ("path: %s start: %llu end: %llu",
aws_path, (ulonglong) start, (ulonglong) file_end));
for (pos= start, bnr=1 ; pos < file_end ; pos+= block.length, bnr++)
{
fix_suffix(path_end, bnr);
if (s3_get_object(s3_client, aws_bucket, aws_path, &block, compression, 1))
goto err;
error= my_write(file, block.str, block.length, MYF(MY_WME | MY_FNABP));
s3_free(&block);
if (error == MY_FILE_ERROR)
goto err;
/* Write up to DISPLAY_WITH number of '.' during copy */
if (display &&
((pos + block.length) * DISPLAY_WITH /file_end) >
(pos * DISPLAY_WITH/file_end))
{
fputc('.', stdout); fflush(stdout);
print_done= 1;
}
}
if (print_done)
{
fputc('\n', stdout); fflush(stdout);
}
my_close(file, MYF(MY_WME));
DBUG_RETURN(0);
err:
my_close(file, MYF(MY_WME));
if (print_done)
{
fputc('\n', stdout); fflush(stdout);
}
DBUG_RETURN(1);
}
/**
Copy a table from S3 to current directory
*/
int aria_copy_from_s3(ms3_st *s3_client, const char *aws_bucket,
const char *path, const char *database,
my_bool compression, my_bool force, my_bool display)
{
MARIA_STATE_INFO state;
MY_STAT stat_info;
char table_name[FN_REFLEN], aws_path[FN_REFLEN+100];
char filename[FN_REFLEN];
char *aws_path_end, *end;
File file= -1;
S3_BLOCK block;
my_off_t index_file_size, data_file_size;
uint offset;
int error;
DBUG_ENTER("aria_copy_from_s3");
/* Check if index file exists */
fn_format(filename, path, "", ".MAI", MY_REPLACE_EXT);
if (!force && my_stat(filename, &stat_info, MYF(0)))
{
my_printf_error(EE_CANTCREATEFILE, "Table %s already exists on disk",
MYF(0), filename);
DBUG_RETURN(EE_CANTCREATEFILE);
}
fn_format(table_name, path, "", "", MY_REPLACE_DIR | MY_REPLACE_EXT);
block.str= 0;
aws_path_end= strxmov(aws_path, database, "/", table_name, NullS);
strmov(aws_path_end, "/aria");
if (s3_get_object(s3_client, aws_bucket, aws_path, &block, 0, 0))
{
my_printf_error(EE_FILENOTFOUND, "File %s/%s doesn't exist in s3", MYF(0),
database,filename);
goto err;
}
if (block.length < MARIA_STATE_INFO_SIZE)
{
fprintf(stderr, "Wrong block length for first block: %lu\n",
(ulong) block.length);
goto err_with_free;
}
if (display)
printf("Copying aria table: %s.%s from s3\n", database, table_name);
/* For offset positions, check _ma_state_info_readlength() */
offset= sizeof(state.header) + 4+ LSN_STORE_SIZE*3 + 8*5;
index_file_size= mi_sizekorr(block.str + offset);
data_file_size= mi_sizekorr(block.str + offset+8);
if ((file= my_create(filename, 0,
O_WRONLY | O_TRUNC | O_NOFOLLOW, MYF(MY_WME))) < 0)
goto err_with_free;
convert_index_to_disk_format(block.str);
if (my_write(file, block.str, block.length, MYF(MY_WME | MY_FNABP)))
goto err_with_free;
if (display)
printf("Copying index information %s\n", aws_path);
end= strmov(aws_path_end,"/index/000000");
error= copy_to_file(s3_client, aws_bucket, aws_path, file, block.length,
index_file_size, compression, display);
file= -1;
if (error)
goto err_with_free;
/* Copy data file */
fn_format(filename, path, "", ".MAD", MY_REPLACE_EXT);
if ((file= my_create(filename, 0,
O_WRONLY | O_TRUNC | O_NOFOLLOW, MYF(MY_WME))) < 0)
DBUG_RETURN(1);
end= strmov(aws_path_end, "/data");
if (display)
printf("Copying data information %s\n", aws_path);
/* The 000000 will be update with block number by fix_suffix() */
strmov(end, "/000000");
error= copy_to_file(s3_client, aws_bucket, aws_path, file, 0, data_file_size,
compression, display);
file= -1;
s3_free(&block);
block.str= 0;
if (error)
goto err;
/* Copy frm file if it exists */
strmov(aws_path_end, "/frm");
if (!s3_get_object(s3_client, aws_bucket, aws_path, &block, 0, 0))
{
fn_format(filename, path, "", ".frm", MY_REPLACE_EXT);
if ((file= my_create(filename, 0,
O_WRONLY | O_SHARE | O_NOFOLLOW | O_CLOEXEC,
MYF(0))) >= 0)
{
if (display)
printf("Copying frm file %s\n", filename);
convert_frm_to_disk_format(block.str);
if (my_write(file, block.str, block.length, MYF(MY_WME | MY_FNABP)))
goto err_with_free;
}
s3_free(&block);
my_close(file, MYF(MY_WME));
file= -1;
}
DBUG_RETURN(0);
err_with_free:
s3_free(&block);
err:
if (file >= 0)
my_close(file, MYF(0));
DBUG_RETURN(1);
}
/**
Drop all files related to a table from S3
*/
int aria_delete_from_s3(ms3_st *s3_client, const char *aws_bucket,
const char *database, const char *table,
my_bool display)
{
ms3_status_st status;
char aws_path[FN_REFLEN+100];
char *aws_path_end;
int error;
DBUG_ENTER("aria_delete_from_s3");
aws_path_end= strxmov(aws_path, database, "/", table, NullS);
strmov(aws_path_end, "/aria");
/* Check if either /aria or /frm exists */
if (ms3_status(s3_client, aws_bucket, aws_path, &status))
{
strmov(aws_path_end, "/frm");
if (ms3_status(s3_client, aws_bucket, aws_path, &status))
{
my_printf_error(HA_ERR_NO_SUCH_TABLE,
"Table %s.%s doesn't exist in s3", MYF(0),
database, table);
my_errno= HA_ERR_NO_SUCH_TABLE;
DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
}
}
if (display)
printf("Delete of aria table: %s.%s\n", database, table);
strmov(aws_path_end,"/index");
if (display)
printf("Delete of index information %s\n", aws_path);
error= s3_delete_directory(s3_client, aws_bucket, aws_path);
strmov(aws_path_end,"/data");
if (display)
printf("Delete of data information %s\n", aws_path);
error|= s3_delete_directory(s3_client, aws_bucket, aws_path);
if (display)
printf("Delete of base information and frm\n");
strmov(aws_path_end,"/aria");
if (s3_delete_object(s3_client, aws_bucket, aws_path, MYF(MY_WME)))
error= 1;
/*
Delete .frm last as this is used by discovery to check if a s3 table
exists
*/
strmov(aws_path_end,"/frm");
/* Ignore error if .frm file doesn't exist */
s3_delete_object(s3_client, aws_bucket, aws_path, MYF(ME_NOTE));
DBUG_RETURN(error);
}
/**
Rename a table in s3
*/
int aria_rename_s3(ms3_st *s3_client, const char *aws_bucket,
const char *from_database, const char *from_table,
const char *to_database, const char *to_table,
my_bool rename_frm)
{
ms3_status_st status;
char to_aws_path[FN_REFLEN+100], from_aws_path[FN_REFLEN+100];
char *to_aws_path_end, *from_aws_path_end;
int error;
DBUG_ENTER("aria_rename_s3");
from_aws_path_end= strxmov(from_aws_path, from_database, "/", from_table,
NullS);
to_aws_path_end= strxmov(to_aws_path, to_database, "/", to_table, NullS);
strmov(from_aws_path_end, "/aria");
if (ms3_status(s3_client, aws_bucket, from_aws_path, &status))
{
my_printf_error(HA_ERR_NO_SUCH_TABLE,
"Table %s.%s doesn't exist in s3", MYF(0), from_database,
from_table);
my_errno= HA_ERR_NO_SUCH_TABLE;
DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
}
strmov(from_aws_path_end,"/index");
strmov(to_aws_path_end,"/index");
error= s3_rename_directory(s3_client, aws_bucket, from_aws_path, to_aws_path,
MYF(MY_WME));
strmov(from_aws_path_end,"/data");
strmov(to_aws_path_end,"/data");
error|= s3_rename_directory(s3_client, aws_bucket, from_aws_path,
to_aws_path, MYF(MY_WME));
if (rename_frm) {
strmov(from_aws_path_end, "/frm");
strmov(to_aws_path_end, "/frm");
s3_rename_object(s3_client, aws_bucket, from_aws_path, to_aws_path,
MYF(MY_WME));
}
strmov(from_aws_path_end,"/aria");
strmov(to_aws_path_end,"/aria");
if (s3_rename_object(s3_client, aws_bucket, from_aws_path, to_aws_path,
MYF(MY_WME)))
error= 1;
DBUG_RETURN(error);
}
/**
Copy all partition files related to a table from S3 (.frm and .par)
@param s3_client s3 client connection
@param aws_bucket bucket to use
@param path The path to the partitioned table files (no extension)
@param old_path In some cases the partioned files are not yet renamed.
This points to the temporary files that will later
be renamed to the partioned table
@param database Database for the partitioned table
@param database table name for the partitioned table
*/
int partition_copy_to_s3(ms3_st *s3_client, const char *aws_bucket,
const char *path, const char *old_path,
const char *database, const char *table_name)
{
char aws_path[FN_REFLEN+100];
char filename[FN_REFLEN];
char *aws_path_end;
uchar *alloc_block= 0;
ms3_status_st status;
size_t frm_length;
int error;
DBUG_ENTER("partition_copy_to_s3");
DBUG_PRINT("enter",("from: %s database: %s table: %s",
path, database, table_name));
if (!old_path)
old_path= path;
aws_path_end= strxmov(aws_path, database, "/", table_name, "/", NullS);
strmov(aws_path_end, "frm");
fn_format(filename, old_path, "", ".frm", MY_REPLACE_EXT);
/* Just to be safe, delete any conflicting object */
if (!ms3_status(s3_client, aws_bucket, aws_path, &status))
{
if ((error= s3_delete_object(s3_client, aws_bucket, aws_path,
MYF(ME_FATAL))))
DBUG_RETURN(error);
}
if ((error= s3_read_file_from_disk(filename, &alloc_block, &frm_length, 0)))
{
/*
In case of ADD PARTITION PARTITON the .frm file is already renamed.
Copy the renamed file if it exists.
*/
fn_format(filename, path, "", ".frm", MY_REPLACE_EXT);
if ((error= s3_read_file_from_disk(filename, &alloc_block, &frm_length,
1)))
goto err;
}
if ((error= s3_put_object(s3_client, aws_bucket, aws_path, alloc_block,
frm_length, 0)))
goto err;
/*
Note that because ha_partiton::rename_table() is called before
this function, the .par table already has it's final name!
*/
fn_format(filename, path, "", ".par", MY_REPLACE_EXT);
strmov(aws_path_end, "par");
if (!ms3_status(s3_client, aws_bucket, aws_path, &status))
{
if ((error= s3_delete_object(s3_client, aws_bucket, aws_path,
MYF(ME_FATAL))))
goto err;
}
my_free(alloc_block);
alloc_block= 0;
if ((error=s3_read_file_from_disk(filename, &alloc_block, &frm_length, 1)))
goto err;
if ((error= s3_put_object(s3_client, aws_bucket, aws_path, alloc_block,
frm_length, 0)))
{
/* Delete the .frm file created above */
strmov(aws_path_end, "frm");
(void) s3_delete_object(s3_client, aws_bucket, aws_path,
MYF(ME_FATAL));
goto err;
}
error= 0;
err:
my_free(alloc_block);
DBUG_RETURN(error);
}
/**
Drop all partition files related to a table from S3
*/
int partition_delete_from_s3(ms3_st *s3_client, const char *aws_bucket,
const char *database, const char *table,
myf error_flags)
{
char aws_path[FN_REFLEN+100];
char *aws_path_end;
int error=0, res;
DBUG_ENTER("partition_delete_from_s3");
aws_path_end= strxmov(aws_path, database, "/", table, NullS);
strmov(aws_path_end, "/par");
if ((res= s3_delete_object(s3_client, aws_bucket, aws_path, error_flags)))
error= res;
/*
Delete .frm last as this is used by discovery to check if a s3 table
exists
*/
strmov(aws_path_end, "/frm");
if ((res= s3_delete_object(s3_client, aws_bucket, aws_path, error_flags)))
error= res;
DBUG_RETURN(error);
}
/******************************************************************************
Low level functions interfacing with libmarias3
******************************************************************************/
/**
Create an object for index or data information
Note that if compression is used, the data may be overwritten and
there must be COMPRESS_HEADER length of free space before the data!
*/
int s3_put_object(ms3_st *s3_client, const char *aws_bucket,
const char *name, uchar *data, size_t length,
my_bool compression)
{
uint8_t error;
const char *errmsg;
DBUG_ENTER("s3_put_object");
DBUG_PRINT("enter", ("name: %s", name));
if (compression)
{
size_t comp_len;
data[-COMPRESS_HEADER]= 0; // No compression
if (!my_compress(data, &length, &comp_len))
data[-COMPRESS_HEADER]= 1; // Compressed package
data-= COMPRESS_HEADER;
length+= COMPRESS_HEADER;
int3store(data+1, comp_len); // Original length or 0
}
if (likely(!(error= ms3_put(s3_client, aws_bucket, name, data, length))))
DBUG_RETURN(0);
if (!(errmsg= ms3_server_error(s3_client)))
errmsg= ms3_error(error);
my_printf_error(EE_WRITE, "Got error from put_object(%s): %d %s", MYF(0),
name, error, errmsg);
DBUG_RETURN(EE_WRITE);
}
/**
Read an object for index or data information
@param print_error 0 Don't print error
@param print_error 1 Print error that object doesn't exists
@param print_error 2 Print error that table doesn't exists
*/
int s3_get_object(ms3_st *s3_client, const char *aws_bucket,
const char *name, S3_BLOCK *block,
my_bool compression, int print_error)
{
uint8_t error;
int result= 0;
uchar *data;
DBUG_ENTER("s3_get_object");
DBUG_PRINT("enter", ("name: %s compression: %d", name, compression));
block->str= block->alloc_ptr= 0;
if (likely(!(error= ms3_get(s3_client, aws_bucket, name,
(uint8_t**) &block->alloc_ptr,
&block->length))))
{
block->str= block->alloc_ptr;
if (compression)
{
ulong length;
/* If not compressed */
if (!block->str[0])
{
block->length-= COMPRESS_HEADER;
block->str+= COMPRESS_HEADER;
/* Simple check to ensure that it's a correct block */
if (block->length % 1024)
{
s3_free(block);
my_printf_error(HA_ERR_NOT_A_TABLE,
"Block '%s' is not compressed", MYF(0), name);
DBUG_RETURN(HA_ERR_NOT_A_TABLE);
}
DBUG_RETURN(0);
}
if (((uchar*)block->str)[0] > 1)
{
s3_free(block);
my_printf_error(HA_ERR_NOT_A_TABLE,
"Block '%s' is not compressed", MYF(0), name);
DBUG_RETURN(HA_ERR_NOT_A_TABLE);
}
length= uint3korr(block->str+1);
if (!(data= (uchar*) my_malloc(PSI_NOT_INSTRUMENTED,
length, MYF(MY_WME | MY_THREAD_SPECIFIC))))
{
s3_free(block);
DBUG_RETURN(EE_OUTOFMEMORY);
}
if (uncompress(data, &length, block->str + COMPRESS_HEADER,
block->length - COMPRESS_HEADER))
{
my_printf_error(ER_NET_UNCOMPRESS_ERROR,
"Got error uncompressing s3 packet", MYF(0));
s3_free(block);
my_free(data);
DBUG_RETURN(ER_NET_UNCOMPRESS_ERROR);
}
s3_free(block);
block->str= block->alloc_ptr= data;
block->length= length;
}
DBUG_RETURN(0);
}
if (error == 9)
{
result= my_errno= (print_error == 1 ? EE_FILENOTFOUND :
HA_ERR_NO_SUCH_TABLE);
if (print_error)
my_printf_error(my_errno, "Expected object '%s' didn't exist",
MYF(0), name);
}
else
{
result= my_errno= EE_READ;
if (print_error)
{
const char *errmsg;
if (!(errmsg= ms3_server_error(s3_client)))
errmsg= ms3_error(error);
my_printf_error(EE_READ, "Got error from get_object(%s): %d %s", MYF(0),
name, error, errmsg);
}
}
s3_free(block);
DBUG_RETURN(result);
}
int s3_delete_object(ms3_st *s3_client, const char *aws_bucket,
const char *name, myf error_flags)
{
uint8_t error;
int result= 0;
DBUG_ENTER("s3_delete_object");
DBUG_PRINT("enter", ("name: %s", name));
if (likely(!(error= ms3_delete(s3_client, aws_bucket, name))))
DBUG_RETURN(0);
if (error_flags)
{
error_flags&= ~MY_WME;
if (error == 9)
my_printf_error(result= EE_FILENOTFOUND,
"Expected object '%s' didn't exist",
error_flags, name);
else
{
const char *errmsg;
if (!(errmsg= ms3_server_error(s3_client)))
errmsg= ms3_error(error);
my_printf_error(result= EE_READ,
"Got error from delete_object(%s): %d %s",
error_flags, name, error, errmsg);
}
}
DBUG_RETURN(result);
}
/*
Drop all files in a 'directory' in s3
*/
int s3_delete_directory(ms3_st *s3_client, const char *aws_bucket,
const char *path)
{
ms3_list_st *list, *org_list= 0;
my_bool error;
DBUG_ENTER("delete_directory");
DBUG_PRINT("enter", ("path: %s", path));
if ((error= ms3_list(s3_client, aws_bucket, path, &org_list)))
{
const char *errmsg;
if (!(errmsg= ms3_server_error(s3_client)))
errmsg= ms3_error(error);
my_printf_error(EE_FILENOTFOUND,
"Can't get list of files from %s. Error: %d %s", MYF(0),
path, error, errmsg);
DBUG_RETURN(EE_FILENOTFOUND);
}
for (list= org_list ; list ; list= list->next)
if (s3_delete_object(s3_client, aws_bucket, list->key, MYF(MY_WME)))
error= 1;
if (org_list)
ms3_list_free(org_list);
DBUG_RETURN(error);
}
my_bool s3_rename_object(ms3_st *s3_client, const char *aws_bucket,
const char *from_name, const char *to_name,
myf error_flags)
{
uint8_t error;
DBUG_ENTER("s3_rename_object");
DBUG_PRINT("enter", ("from: %s to: %s", from_name, to_name));
if (likely(!(error= ms3_move(s3_client,
aws_bucket, from_name,
aws_bucket, to_name))))
DBUG_RETURN(FALSE);
if (error_flags)
{
error_flags&= ~MY_WME;
if (error == 9)
{
my_printf_error(EE_FILENOTFOUND, "Expected object '%s' didn't exist",
error_flags, from_name);
}
else
{
const char *errmsg;
if (!(errmsg= ms3_server_error(s3_client)))
errmsg= ms3_error(error);
my_printf_error(EE_READ, "Got error from move_object(%s -> %s): %d %",
error_flags,
from_name, to_name, error, errmsg);
}
}
DBUG_RETURN(TRUE);
}
int s3_rename_directory(ms3_st *s3_client, const char *aws_bucket,
const char *from_name, const char *to_name,
myf error_flags)
{
ms3_list_st *list, *org_list= 0;
my_bool error= 0;
char name[AWS_PATH_LENGTH], *end;
DBUG_ENTER("s3_delete_directory");
if ((error= ms3_list(s3_client, aws_bucket, from_name, &org_list)))
{
const char *errmsg;
if (!(errmsg= ms3_server_error(s3_client)))
errmsg= ms3_error(error);
my_printf_error(EE_FILENOTFOUND,
"Can't get list of files from %s. Error: %d %s",
MYF(error_flags & ~MY_WME),
from_name, error, errmsg);
DBUG_RETURN(EE_FILENOTFOUND);
}
end= strmov(name, to_name);
for (list= org_list ; list ; list= list->next)
{
const char *sep= strrchr(list->key, '/');
if (sep) /* Safety */
{
strmake(end, sep, (sizeof(name) - (end-name) - 1));
if (s3_rename_object(s3_client, aws_bucket, list->key, name,
error_flags))
error= 1;
}
}
if (org_list)
ms3_list_free(org_list);
DBUG_RETURN(error);
}
/******************************************************************************
Converting index and frm files to from S3 storage engine
******************************************************************************/
/**
Change index information to be of type s3
@param header Copy of header in index file
@param block_size S3 block size
@param compression Compression algorithm to use
The position are from _ma_base_info_write()
*/
static void convert_index_to_s3_format(uchar *header, ulong block_size,
int compression)
{
MARIA_STATE_INFO state;
uchar *base_pos;
uint base_offset;
memcpy(state.header.file_version, header, sizeof(state.header));
base_offset= mi_uint2korr(state.header.base_pos);
base_pos= header + base_offset;
base_pos[107]= (uchar) compression;
mi_int3store(base_pos+119, block_size);
}
/**
Change index information to be a normal disk based table
*/
static void convert_index_to_disk_format(uchar *header)
{
MARIA_STATE_INFO state;
uchar *base_pos;
uint base_offset;
memcpy(state.header.file_version, header, sizeof(state.header));
base_offset= mi_uint2korr(state.header.base_pos);
base_pos= header + base_offset;
base_pos[107]= 0;
mi_int3store(base_pos+119, 0);
}
/**
Change storage engine in the .frm file from Aria to s3
For information about engine types, see legacy_db_type
*/
static void convert_frm_to_s3_format(uchar *header)
{
DBUG_ASSERT(header[3] == 42 || header[3] == 41); /* Aria or S3 */
header[3]= 41; /* S3 */
}
/**
Change storage engine in the .frm file from S3 to Aria
For information about engine types, see legacy_db_type
*/
static void convert_frm_to_disk_format(uchar *header)
{
DBUG_ASSERT(header[3] == 41); /* S3 */
header[3]= 42; /* Aria */
}
/******************************************************************************
Helper functions
******************************************************************************/
/**
Set database and table name from path
s3->database and s3->table_name will be pointed into path
Note that s3->database will not be null terminated!
*/
my_bool set_database_and_table_from_path(S3_INFO *s3, const char *path)
{
size_t org_length= dirname_length(path);
size_t length= 0;
if (!org_length)
return 1;
s3->table.str= path+org_length;
s3->table.length= strlen(s3->table.str);
for (length= --org_length; length > 0 ; length --)
{
if (path[length-1] == FN_LIBCHAR || path[length-1] == '/')
break;
#ifdef FN_DEVCHAR
if (path[length-1] == FN_DEVCHAR)
break;
#endif
}
if (length &&
(path[length] != FN_CURLIB || org_length - length != 1))
{
s3->database.str= path + length;
s3->database.length= org_length - length;
return 0;
}
return 1; /* Can't find database */
}
/**
Read frm from the disk
*/
static int s3_read_file_from_disk(const char *filename, uchar **to,
size_t *to_size, my_bool print_error)
{
File file;
uchar *alloc_block;
size_t file_size;
int error;
*to= 0;
if ((file= my_open(filename,
O_RDONLY | O_SHARE | O_NOFOLLOW | O_CLOEXEC,
MYF(print_error ? MY_WME: 0))) < 0)
return(my_errno);
file_size= (size_t) my_seek(file, 0L, MY_SEEK_END, MYF(0));
if (!(alloc_block= my_malloc(PSI_NOT_INSTRUMENTED, file_size, MYF(MY_WME))))
goto err;
if (my_pread(file, alloc_block, file_size, 0, MYF(MY_WME | MY_FNABP)))
goto err;
*to= alloc_block;
*to_size= file_size;
my_close(file, MYF(0));
return 0;
err:
error= my_errno;
my_free(alloc_block);
my_close(file, MYF(0));
return error;
}
/**
Get .frm or par from S3
@return 0 ok
@return 1 error
*/
my_bool s3_get_def(ms3_st *s3_client, S3_INFO *s3_info, S3_BLOCK *block,
const char *ext)
{
char aws_path[AWS_PATH_LENGTH];
strxnmov(aws_path, sizeof(aws_path)-1, s3_info->database.str, "/",
s3_info->table.str, "/", ext, NullS);
return s3_get_object(s3_client, s3_info->bucket.str, aws_path, block,
0, 0);
}
/**
Check if .frm exits in S3
@return 0 frm exists
@return 1 error
*/
my_bool s3_frm_exists(ms3_st *s3_client, S3_INFO *s3_info)
{
char aws_path[AWS_PATH_LENGTH];
ms3_status_st status;
strxnmov(aws_path, sizeof(aws_path)-1, s3_info->database.str, "/",
s3_info->table.str, "/frm", NullS);
return ms3_status(s3_client, s3_info->bucket.str, aws_path, &status);
}
/**
Get version from frm file
@param out Store the table_version_here. It's of size MY_UUID_SIZE
@param frm_image Frm image
@param frm_length size of image
@return 0 Was able to read table version
@return 1 Wrong information in frm file
*/
#define FRM_HEADER_SIZE 64
#define EXTRA2_TABLEDEF_VERSION 0
static inline my_bool is_binary_frm_header(const uchar *head)
{
return head[0] == 254
&& head[1] == 1
&& head[2] >= FRM_VER
&& head[2] <= FRM_VER_CURRENT;
}
static my_bool get_tabledef_version_from_frm(char *out, const uchar *frm_image,
size_t frm_length)
{
uint segment_len;
const uchar *extra, *extra_end;
if (!is_binary_frm_header(frm_image) || frm_length <= FRM_HEADER_SIZE)
return 1;
/* Length of the MariaDB extra2 segment in the form file. */
segment_len= uint2korr(frm_image + 4);
if (frm_length < FRM_HEADER_SIZE + segment_len)
return 1;
extra= frm_image + FRM_HEADER_SIZE;
if (*extra == '/') // old frm had '/' there
return 1;
extra_end= extra + segment_len;
while (extra + 4 < extra_end)
{
uchar type= *extra++;
size_t length= *extra++;
if (!length)
{
length= uint2korr(extra);
extra+= 2;
if (length < 256)
return 1; /* Something is wrong */
}
if (extra + length > extra_end)
return 1;
if (type == EXTRA2_TABLEDEF_VERSION)
{
if (length != MY_UUID_SIZE)
return 1;
memcpy(out, extra, length);
return 0; /* Found it */
}
extra+= length;
}
return 1;
}
/**
Check if version in frm file matches what the server expects
@return 0 table definitions matches
@return 1 table definitions doesn't match
@return 2 Can't find the frm version
@return 3 Can't read the frm version
*/
int s3_check_frm_version(ms3_st *s3_client, S3_INFO *s3_info)
{
my_bool res= 0;
char aws_path[AWS_PATH_LENGTH];
char uuid[MY_UUID_SIZE];
S3_BLOCK block;
DBUG_ENTER("s3_check_frm_version");
strxnmov(aws_path, sizeof(aws_path)-1, s3_info->database.str, "/",
s3_info->base_table.str, "/frm", NullS);
if (s3_get_object(s3_client, s3_info->bucket.str, aws_path, &block, 0, 0))
{
DBUG_PRINT("exit", ("No object found"));
DBUG_RETURN(2); /* Ignore check, use old frm */
}
if (get_tabledef_version_from_frm(uuid, (uchar*) block.str, block.length) ||
s3_info->tabledef_version.length != MY_UUID_SIZE)
{
s3_free(&block);
DBUG_PRINT("error", ("Wrong definition"));
DBUG_RETURN(3); /* Wrong definition */
}
/* res is set to 1 if versions numbers doesn't match */
res= bcmp(s3_info->tabledef_version.str, uuid, MY_UUID_SIZE) != 0;
s3_free(&block);
if (res)
DBUG_PRINT("error", ("Wrong table version"));
else
DBUG_PRINT("exit", ("Version strings matches"));
DBUG_RETURN(res);
}
/******************************************************************************
Reading blocks from index or data from S3
******************************************************************************/
/*
Read the index header (first page) from the index file
In case of error, my_error() is called
*/
my_bool read_index_header(ms3_st *client, S3_INFO *s3, S3_BLOCK *block)
{
char aws_path[AWS_PATH_LENGTH];
DBUG_ENTER("read_index_header");
strxnmov(aws_path, sizeof(aws_path)-1, s3->database.str, "/", s3->table.str,
"/aria", NullS);
DBUG_RETURN(s3_get_object(client, s3->bucket.str, aws_path, block, 0, 2));
}
#ifdef FOR_FUTURE_IF_NEEDED_FOR_DEBUGGING_WITHOUT_S3
/**
Read a big block from disk
*/
my_bool s3_block_read(struct st_pagecache *pagecache,
PAGECACHE_IO_HOOK_ARGS *args,
struct st_pagecache_file *file,
LEX_STRING *data)
{
MARIA_SHARE *share= (MARIA_SHARE*) file->callback_data;
my_bool datafile= file != &share->kfile;
DBUG_ASSERT(file->big_block_size > 0);
DBUG_ASSERT(((((my_off_t) args->pageno - file->head_blocks) <<
pagecache->shift) %
file->big_block_size) == 0);
if (!(data->str= (char *) my_malloc(file->big_block_size, MYF(MY_WME))))
return TRUE;
data->length= mysql_file_pread(file->file,
(unsigned char *)data->str,
file->big_block_size,
((my_off_t) args->pageno << pagecache->shift),
MYF(MY_WME));
if (data->length == 0 || data->length == MY_FILE_ERROR)
{
if (data->length == 0)
{
LEX_STRING *file_name= (datafile ?
&share->data_file_name :
&share->index_file_name);
my_error(EE_EOFERR, MYF(0), file_name->str, my_errno);
}
my_free(data->str);
data->length= 0;
data->str= 0;
return TRUE;
}
return FALSE;
}
#endif
/**
Read a block from S3 to page cache
*/
my_bool s3_block_read(struct st_pagecache *pagecache,
PAGECACHE_IO_HOOK_ARGS *args,
struct st_pagecache_file *file,
S3_BLOCK *block)
{
char aws_path[AWS_PATH_LENGTH];
MARIA_SHARE *share= (MARIA_SHARE*) file->callback_data;
my_bool datafile= file->file != share->kfile.file;
MARIA_HA *info= (MARIA_HA*) my_thread_var->keycache_file;
ms3_st *client= info->s3;
const char *path_suffix= datafile ? "/data/" : "/index/";
char *end;
S3_INFO *s3= share->s3_path;
ulong block_number;
DBUG_ENTER("s3_block_read");
DBUG_ASSERT(file->big_block_size > 0);
DBUG_ASSERT(((((my_off_t) args->pageno - file->head_blocks) <<
pagecache->shift) %
file->big_block_size) == 0);
block_number= (((args->pageno - file->head_blocks) << pagecache->shift) /
file->big_block_size) + 1;
end= strxnmov(aws_path, sizeof(aws_path)-12, s3->database.str, "/",
s3->table.str, path_suffix, "000000", NullS);
fix_suffix(end, block_number);
DBUG_RETURN(s3_get_object(client, s3->bucket.str, aws_path, block,
share->base.compression_algorithm, 1));
}
/*
Start file numbers from 1000 to more easily find bugs when the file number
could be mistaken for a real file
*/
static volatile int32 unique_file_number= 1000;
int32 s3_unique_file_number()
{
return my_atomic_add32_explicit(&unique_file_number, 1,
MY_MEMORY_ORDER_RELAXED);
}