mirror of
https://github.com/MariaDB/server.git
synced 2025-01-16 03:52:35 +01:00
MDEV-29015/MDEV-29260/MDEV-34938: os_file_get_size() WSL work-around
When MariaDB Server is run in a container under Windows Subsystem for Linux, the fstat(2) system calls that InnoDB invokes in os_file_set_size() or os_file_get_size() are causing a failure in case the file had been renamed in the past while the file handle was open. This affects at least ALTER TABLE and OPTIMIZE TABLE. os_file_get_size(): Invoke lseek(2) instead of fstat(2). We do not mind if the file pointer is moving to the end of the file, because InnoDB exclusively invokes positioned reads and writes, or in some rare cases, appends to an existing file. os_file_set_size(): Invoke os_file_get_size() instead of fstat(2). Define the POSIX and Windows versions separately. Formerly, the Windows version was called os_file_change_size_win32(). fil_node_t::read_page0(): Use os_file_get_size() to determine the size, and do not crash on error. fil_node_t::read_metadata(): Remove the non-Windows stat* parameter and always invoke fstat(2) outside Windows, but do tolerate errors. Because fstat(2) is more likely to fail than lseek(2), and this is not time critical code, we can afford the extra lseek(2) system call. Reviewed by: Vladislav Vaintroub
This commit is contained in:
parent
3cd706b107
commit
decdd4bf49
4 changed files with 159 additions and 265 deletions
|
@ -352,7 +352,7 @@ fil_node_t* fil_space_t::add(const char* name, pfs_os_file_t handle,
|
|||
this->size += size;
|
||||
UT_LIST_ADD_LAST(chain, node);
|
||||
if (node->is_open()) {
|
||||
node->find_metadata(node->handle);
|
||||
node->find_metadata();
|
||||
n_pending.fetch_and(~CLOSING, std::memory_order_relaxed);
|
||||
if (++fil_system.n_open >= srv_max_n_open_files) {
|
||||
reacquire();
|
||||
|
@ -1197,7 +1197,7 @@ err_exit:
|
|||
|
||||
if (create_new_db)
|
||||
{
|
||||
node->find_metadata(node->handle);
|
||||
node->find_metadata();
|
||||
continue;
|
||||
}
|
||||
if (skip_read)
|
||||
|
|
|
@ -1060,17 +1060,12 @@ struct fil_node_t final
|
|||
return(handle != OS_FILE_CLOSED);
|
||||
}
|
||||
|
||||
/** Read the first page of a data file.
|
||||
@return whether the page was found valid */
|
||||
bool read_page0();
|
||||
/** Read the first page of a data file.
|
||||
@return whether the page was found valid */
|
||||
bool read_page0() noexcept;
|
||||
|
||||
/** Determine some file metadata when creating or reading the file.
|
||||
@param file the file that is being created, or OS_FILE_CLOSED */
|
||||
void find_metadata(os_file_t file = OS_FILE_CLOSED
|
||||
#ifndef _WIN32
|
||||
, struct stat* statbuf = NULL
|
||||
#endif
|
||||
);
|
||||
/** Determine some file metadata when creating or reading the file. */
|
||||
void find_metadata() noexcept;
|
||||
|
||||
/** Close the file handle. */
|
||||
void close();
|
||||
|
|
|
@ -905,35 +905,13 @@ os_file_get_size(
|
|||
const char* filename)
|
||||
MY_ATTRIBUTE((warn_unused_result));
|
||||
|
||||
/** Gets a file size.
|
||||
@param[in] file handle to a file
|
||||
@return file size, or (os_offset_t) -1 on failure */
|
||||
os_offset_t
|
||||
os_file_get_size(
|
||||
os_file_t file)
|
||||
MY_ATTRIBUTE((warn_unused_result));
|
||||
|
||||
/** Extend a file.
|
||||
|
||||
On Windows, extending a file allocates blocks for the file,
|
||||
unless the file is sparse.
|
||||
|
||||
On Unix, we will extend the file with ftruncate(), if
|
||||
file needs to be sparse. Otherwise posix_fallocate() is used
|
||||
when available, and if not, binary zeroes are added to the end
|
||||
of file.
|
||||
|
||||
@param[in] name file name
|
||||
@param[in] file file handle
|
||||
@param[in] size desired file size
|
||||
@param[in] sparse whether to create a sparse file (no preallocating)
|
||||
@return whether the operation succeeded */
|
||||
bool
|
||||
os_file_set_size(
|
||||
const char* name,
|
||||
os_file_t file,
|
||||
os_offset_t size,
|
||||
bool is_sparse = false)
|
||||
/** Determine the logical size of a file.
|
||||
This may change the current write position of the file to the end of the file.
|
||||
(Not currently a problem; InnoDB typically uses positioned I/O.)
|
||||
@param file handle to an open file
|
||||
@return file size, in octets
|
||||
@retval -1 on failure */
|
||||
os_offset_t os_file_get_size(os_file_t file) noexcept
|
||||
MY_ATTRIBUTE((warn_unused_result));
|
||||
|
||||
/** Truncates a file at its current position.
|
||||
|
@ -1189,11 +1167,25 @@ If file is normal, file system allocates storage.
|
|||
@param[in] size size to preserve in bytes
|
||||
@return true if success */
|
||||
bool
|
||||
os_file_change_size_win32(
|
||||
os_file_set_size(
|
||||
const char* pathname,
|
||||
os_file_t file,
|
||||
os_offset_t size);
|
||||
|
||||
inline bool
|
||||
os_file_set_size(const char* name, os_file_t file, os_offset_t size, bool)
|
||||
{
|
||||
return os_file_set_size(name, file, size);
|
||||
}
|
||||
#else
|
||||
/** Extend a file by appending NUL.
|
||||
@param[in] name file name
|
||||
@param[in] file file handle
|
||||
@param[in] size desired file size
|
||||
@param[in] sparse whether to create a sparse file with ftruncate()
|
||||
@return whether the operation succeeded */
|
||||
bool os_file_set_size(const char *name, os_file_t file, os_offset_t size,
|
||||
bool is_sparse= false) noexcept;
|
||||
#endif /*_WIN32 */
|
||||
|
||||
/** Free storage space associated with a section of the file.
|
||||
|
|
|
@ -46,9 +46,6 @@ Created 10/21/1995 Heikki Tuuri
|
|||
#include "srv0start.h"
|
||||
#include "fil0fil.h"
|
||||
#include "fsp0fsp.h"
|
||||
#ifdef HAVE_LINUX_UNISTD_H
|
||||
#include "unistd.h"
|
||||
#endif
|
||||
#include "os0event.h"
|
||||
#include "os0thread.h"
|
||||
|
||||
|
@ -74,10 +71,12 @@ Created 10/21/1995 Heikki Tuuri
|
|||
#ifdef _WIN32
|
||||
#include <winioctl.h>
|
||||
#else
|
||||
#include <unistd.h>
|
||||
// my_test_if_atomic_write()
|
||||
#include <my_sys.h>
|
||||
#endif
|
||||
|
||||
#include "log.h"
|
||||
#include "buf0dblwr.h"
|
||||
|
||||
#include <thread>
|
||||
|
@ -1505,16 +1504,9 @@ bool os_file_close_func(os_file_t file)
|
|||
return false;
|
||||
}
|
||||
|
||||
/** Gets a file size.
|
||||
@param[in] file handle to an open file
|
||||
@return file size, or (os_offset_t) -1 on failure */
|
||||
os_offset_t
|
||||
os_file_get_size(os_file_t file)
|
||||
os_offset_t os_file_get_size(os_file_t file) noexcept
|
||||
{
|
||||
struct stat statbuf;
|
||||
if (fstat(file, &statbuf)) return os_offset_t(-1);
|
||||
MSAN_STAT_WORKAROUND(&statbuf);
|
||||
return statbuf.st_size;
|
||||
return lseek(file, 0, SEEK_END);
|
||||
}
|
||||
|
||||
/** Gets a file size.
|
||||
|
@ -1653,6 +1645,110 @@ os_file_set_eof(
|
|||
return(!ftruncate(fileno(file), ftell(file)));
|
||||
}
|
||||
|
||||
bool os_file_set_size(const char *name, os_file_t file, os_offset_t size,
|
||||
bool is_sparse) noexcept
|
||||
{
|
||||
ut_ad(!(size & 4095));
|
||||
|
||||
if (is_sparse) {
|
||||
bool success = !ftruncate(file, size);
|
||||
if (!success) {
|
||||
sql_print_error("InnoDB: ftruncate of file %s"
|
||||
" to %llu bytes failed with error %d",
|
||||
name, size, errno);
|
||||
}
|
||||
return success;
|
||||
}
|
||||
|
||||
# ifdef HAVE_POSIX_FALLOCATE
|
||||
int err;
|
||||
os_offset_t current_size;
|
||||
do {
|
||||
current_size = os_file_get_size(file);
|
||||
if (current_size == os_offset_t(-1)) {
|
||||
err = errno;
|
||||
} else {
|
||||
if (current_size >= size) {
|
||||
return true;
|
||||
}
|
||||
current_size &= ~4095ULL;
|
||||
# ifdef __linux__
|
||||
if (!fallocate(file, 0, current_size,
|
||||
size - current_size)) {
|
||||
err = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
err = errno;
|
||||
# else
|
||||
err = posix_fallocate(file, current_size,
|
||||
size - current_size);
|
||||
# endif
|
||||
}
|
||||
} while (err == EINTR
|
||||
&& srv_shutdown_state <= SRV_SHUTDOWN_INITIATED);
|
||||
|
||||
switch (err) {
|
||||
case 0:
|
||||
return true;
|
||||
default:
|
||||
sql_print_error("InnoDB: preallocating %llu"
|
||||
" bytes for file %s failed with error %d",
|
||||
size, name, err);
|
||||
/* fall through */
|
||||
case EINTR:
|
||||
errno = err;
|
||||
return false;
|
||||
case EINVAL:
|
||||
case EOPNOTSUPP:
|
||||
/* fall back to the code below */
|
||||
break;
|
||||
}
|
||||
# else /* HAVE_POSIX_ALLOCATE */
|
||||
os_offset_t current_size = os_file_get_size(file);
|
||||
# endif /* HAVE_POSIX_ALLOCATE */
|
||||
|
||||
current_size &= ~4095ULL;
|
||||
|
||||
if (current_size >= size) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Write up to 1 megabyte at a time. */
|
||||
ulint buf_size = std::min<ulint>(64,
|
||||
ulint(size >> srv_page_size_shift))
|
||||
<< srv_page_size_shift;
|
||||
|
||||
/* Align the buffer for possible raw i/o */
|
||||
byte* buf = static_cast<byte*>(aligned_malloc(buf_size,
|
||||
srv_page_size));
|
||||
/* Write buffer full of zeros */
|
||||
memset(buf, 0, buf_size);
|
||||
|
||||
while (current_size < size
|
||||
&& srv_shutdown_state <= SRV_SHUTDOWN_INITIATED) {
|
||||
ulint n_bytes;
|
||||
|
||||
if (size - current_size < (os_offset_t) buf_size) {
|
||||
n_bytes = (ulint) (size - current_size);
|
||||
} else {
|
||||
n_bytes = buf_size;
|
||||
}
|
||||
|
||||
if (os_file_write(IORequestWrite, name,
|
||||
file, buf, current_size, n_bytes) !=
|
||||
DB_SUCCESS) {
|
||||
break;
|
||||
}
|
||||
|
||||
current_size += n_bytes;
|
||||
}
|
||||
|
||||
aligned_free(buf);
|
||||
|
||||
return current_size >= size && os_file_flush(file);
|
||||
}
|
||||
|
||||
#else /* !_WIN32 */
|
||||
|
||||
#include <WinIoCtl.h>
|
||||
|
@ -2556,21 +2652,12 @@ bool os_file_close_func(os_file_t file)
|
|||
return true;
|
||||
}
|
||||
|
||||
/** Gets a file size.
|
||||
@param[in] file Handle to a file
|
||||
@return file size, or (os_offset_t) -1 on failure */
|
||||
os_offset_t
|
||||
os_file_get_size(
|
||||
os_file_t file)
|
||||
os_offset_t os_file_get_size(os_file_t file) noexcept
|
||||
{
|
||||
DWORD high;
|
||||
DWORD low = GetFileSize(file, &high);
|
||||
|
||||
if (low == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
|
||||
return((os_offset_t) -1);
|
||||
}
|
||||
|
||||
return(os_offset_t(low | (os_offset_t(high) << 32)));
|
||||
DWORD high, low= GetFileSize(file, &high);
|
||||
if (low == 0xFFFFFFFF && GetLastError() != NO_ERROR)
|
||||
return os_offset_t(-1);
|
||||
return os_offset_t{low} | os_offset_t{high} << 32;
|
||||
}
|
||||
|
||||
/** Gets a file size.
|
||||
|
@ -2712,24 +2799,8 @@ bool os_file_set_sparse_win32(os_file_t file, bool is_sparse)
|
|||
FSCTL_SET_SPARSE, &sparse_buffer, sizeof(sparse_buffer), 0, 0,&temp);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Change file size on Windows.
|
||||
|
||||
If file is extended, the bytes between old and new EOF
|
||||
are zeros.
|
||||
|
||||
If file is sparse, "virtual" block is added at the end of
|
||||
allocated area.
|
||||
|
||||
If file is normal, file system allocates storage.
|
||||
|
||||
@param[in] pathname file path
|
||||
@param[in] file file handle
|
||||
@param[in] size size to preserve in bytes
|
||||
@return true if success */
|
||||
bool
|
||||
os_file_change_size_win32(
|
||||
os_file_set_size(
|
||||
const char* pathname,
|
||||
os_file_t file,
|
||||
os_offset_t size)
|
||||
|
@ -3184,149 +3255,6 @@ IF_WIN(static,) bool os_is_sparse_file_supported(os_file_t fh)
|
|||
#endif /* _WIN32 */
|
||||
}
|
||||
|
||||
/** Extend a file.
|
||||
|
||||
On Windows, extending a file allocates blocks for the file,
|
||||
unless the file is sparse.
|
||||
|
||||
On Unix, we will extend the file with ftruncate(), if
|
||||
file needs to be sparse. Otherwise posix_fallocate() is used
|
||||
when available, and if not, binary zeroes are added to the end
|
||||
of file.
|
||||
|
||||
@param[in] name file name
|
||||
@param[in] file file handle
|
||||
@param[in] size desired file size
|
||||
@param[in] sparse whether to create a sparse file (no preallocating)
|
||||
@return whether the operation succeeded */
|
||||
bool
|
||||
os_file_set_size(
|
||||
const char* name,
|
||||
os_file_t file,
|
||||
os_offset_t size,
|
||||
bool is_sparse)
|
||||
{
|
||||
ut_ad(!(size & 4095));
|
||||
|
||||
#ifdef _WIN32
|
||||
/* On Windows, changing file size works well and as expected for both
|
||||
sparse and normal files. */
|
||||
return os_file_change_size_win32(name, file, size);
|
||||
#else
|
||||
struct stat statbuf;
|
||||
|
||||
if (is_sparse) {
|
||||
bool success = !ftruncate(file, size);
|
||||
if (!success) {
|
||||
ib::error() << "ftruncate of file " << name << " to "
|
||||
<< size << " bytes failed with error "
|
||||
<< errno;
|
||||
}
|
||||
return(success);
|
||||
}
|
||||
|
||||
# ifdef HAVE_POSIX_FALLOCATE
|
||||
int err;
|
||||
do {
|
||||
if (fstat(file, &statbuf)) {
|
||||
err = errno;
|
||||
} else {
|
||||
MSAN_STAT_WORKAROUND(&statbuf);
|
||||
os_offset_t current_size = statbuf.st_size;
|
||||
if (current_size >= size) {
|
||||
return true;
|
||||
}
|
||||
current_size &= ~4095ULL;
|
||||
# ifdef __linux__
|
||||
if (!fallocate(file, 0, current_size,
|
||||
size - current_size)) {
|
||||
err = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
err = errno;
|
||||
# else
|
||||
err = posix_fallocate(file, current_size,
|
||||
size - current_size);
|
||||
# endif
|
||||
}
|
||||
} while (err == EINTR
|
||||
&& srv_shutdown_state <= SRV_SHUTDOWN_INITIATED);
|
||||
|
||||
switch (err) {
|
||||
case 0:
|
||||
return true;
|
||||
default:
|
||||
ib::error() << "preallocating "
|
||||
<< size << " bytes for file " << name
|
||||
<< " failed with error " << err;
|
||||
/* fall through */
|
||||
case EINTR:
|
||||
errno = err;
|
||||
return false;
|
||||
case EINVAL:
|
||||
case EOPNOTSUPP:
|
||||
/* fall back to the code below */
|
||||
break;
|
||||
}
|
||||
# endif /* HAVE_POSIX_ALLOCATE */
|
||||
#endif /* _WIN32*/
|
||||
|
||||
#ifdef _WIN32
|
||||
os_offset_t current_size = os_file_get_size(file);
|
||||
FILE_STORAGE_INFO info;
|
||||
if (GetFileInformationByHandleEx(file, FileStorageInfo, &info,
|
||||
sizeof info)) {
|
||||
if (info.LogicalBytesPerSector) {
|
||||
current_size &= ~os_offset_t(info.LogicalBytesPerSector
|
||||
- 1);
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (fstat(file, &statbuf)) {
|
||||
return false;
|
||||
}
|
||||
os_offset_t current_size = statbuf.st_size & ~4095ULL;
|
||||
#endif
|
||||
if (current_size >= size) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Write up to 1 megabyte at a time. */
|
||||
ulint buf_size = ut_min(ulint(64),
|
||||
ulint(size >> srv_page_size_shift))
|
||||
<< srv_page_size_shift;
|
||||
|
||||
/* Align the buffer for possible raw i/o */
|
||||
byte* buf = static_cast<byte*>(aligned_malloc(buf_size,
|
||||
srv_page_size));
|
||||
/* Write buffer full of zeros */
|
||||
memset(buf, 0, buf_size);
|
||||
|
||||
while (current_size < size
|
||||
&& srv_shutdown_state <= SRV_SHUTDOWN_INITIATED) {
|
||||
ulint n_bytes;
|
||||
|
||||
if (size - current_size < (os_offset_t) buf_size) {
|
||||
n_bytes = (ulint) (size - current_size);
|
||||
} else {
|
||||
n_bytes = buf_size;
|
||||
}
|
||||
|
||||
if (os_file_write(IORequestWrite, name,
|
||||
file, buf, current_size, n_bytes) !=
|
||||
DB_SUCCESS) {
|
||||
break;
|
||||
}
|
||||
|
||||
current_size += n_bytes;
|
||||
}
|
||||
|
||||
aligned_free(buf);
|
||||
|
||||
return(current_size >= size && os_file_flush(file));
|
||||
}
|
||||
|
||||
/** Truncate a file to a specified size in bytes.
|
||||
@param[in] pathname file path
|
||||
@param[in] file file to be truncated
|
||||
|
@ -3351,7 +3279,7 @@ os_file_truncate(
|
|||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
return(os_file_change_size_win32(pathname, file, size));
|
||||
return os_file_set_size(pathname, file, size);
|
||||
#else /* _WIN32 */
|
||||
return(os_file_truncate_posix(pathname, file, size));
|
||||
#endif /* _WIN32 */
|
||||
|
@ -4096,18 +4024,10 @@ static bool is_file_on_ssd(HANDLE handle, char *file_path)
|
|||
|
||||
#endif
|
||||
|
||||
/** Determine some file metadata when creating or reading the file.
|
||||
@param file the file that is being created, or OS_FILE_CLOSED */
|
||||
void fil_node_t::find_metadata(os_file_t file
|
||||
#ifndef _WIN32
|
||||
, struct stat* statbuf
|
||||
#endif
|
||||
)
|
||||
void fil_node_t::find_metadata() noexcept
|
||||
{
|
||||
if (file == OS_FILE_CLOSED) {
|
||||
file = handle;
|
||||
ut_ad(is_open());
|
||||
}
|
||||
ut_ad(is_open());
|
||||
os_file_t file = handle;
|
||||
|
||||
#ifdef _WIN32 /* FIXME: make this unconditional */
|
||||
if (space->punch_hole) {
|
||||
|
@ -4139,20 +4059,17 @@ void fil_node_t::find_metadata(os_file_t file
|
|||
block_size = 512;
|
||||
}
|
||||
#else
|
||||
struct stat sbuf;
|
||||
if (!statbuf && !fstat(file, &sbuf)) {
|
||||
MSAN_STAT_WORKAROUND(&sbuf);
|
||||
statbuf = &sbuf;
|
||||
}
|
||||
if (statbuf) {
|
||||
block_size = statbuf->st_blksize;
|
||||
}
|
||||
on_ssd = space->atomic_write_supported
|
||||
on_ssd = space->atomic_write_supported;
|
||||
struct stat statbuf;
|
||||
if (!fstat(file, &statbuf)) {
|
||||
MSAN_STAT_WORKAROUND(&statbuf);
|
||||
block_size = statbuf.st_blksize;
|
||||
# ifdef __linux__
|
||||
|| (statbuf && fil_system.is_ssd(statbuf->st_dev))
|
||||
on_ssd = on_ssd || fil_system.is_ssd(statbuf.st_dev);
|
||||
# endif
|
||||
;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!space->atomic_write_supported) {
|
||||
space->atomic_write_supported = atomic_write
|
||||
&& srv_use_atomic_writes
|
||||
|
@ -4176,21 +4093,14 @@ void fil_node_t::find_metadata(os_file_t file
|
|||
|
||||
/** Read the first page of a data file.
|
||||
@return whether the page was found valid */
|
||||
bool fil_node_t::read_page0()
|
||||
bool fil_node_t::read_page0() noexcept
|
||||
{
|
||||
ut_ad(mutex_own(&fil_system.mutex));
|
||||
const unsigned psize = space->physical_size();
|
||||
#ifndef _WIN32
|
||||
struct stat statbuf;
|
||||
if (fstat(handle, &statbuf)) {
|
||||
os_offset_t size_bytes = os_file_get_size(handle);
|
||||
if (size_bytes == os_offset_t(-1)) {
|
||||
return false;
|
||||
}
|
||||
MSAN_STAT_WORKAROUND(&statbuf);
|
||||
os_offset_t size_bytes = statbuf.st_size;
|
||||
#else
|
||||
os_offset_t size_bytes = os_file_get_size(handle);
|
||||
ut_a(size_bytes != (os_offset_t) -1);
|
||||
#endif
|
||||
const uint32_t min_size = FIL_IBD_FILE_INITIAL_SIZE * psize;
|
||||
|
||||
if (size_bytes < min_size) {
|
||||
|
@ -4258,11 +4168,8 @@ invalid:
|
|||
return false;
|
||||
}
|
||||
|
||||
#ifdef __linux__
|
||||
find_metadata(handle, &statbuf);
|
||||
#else
|
||||
find_metadata();
|
||||
#endif
|
||||
|
||||
/* Truncate the size to a multiple of extent size. */
|
||||
ulint mask = psize * FSP_EXTENT_SIZE - 1;
|
||||
|
||||
|
|
Loading…
Reference in a new issue