branches/innodb+

Provide support for native AIO on Linux.

rb://46 approved by: Marko
This commit is contained in:
inaam 2009-01-13 18:20:49 +00:00
parent 44e408b9c5
commit 2f9fb41b05
9 changed files with 853 additions and 59 deletions

View file

@ -4436,11 +4436,14 @@ fil_aio_wait(
ut_ad(fil_validate());
if (os_aio_use_native_aio) {
if (srv_use_native_aio) {
srv_set_io_thread_op_info(segment, "native aio handle");
#ifdef WIN_ASYNC_IO
ret = os_aio_windows_handle(segment, 0, &fil_node,
&message, &type);
#elif defined(LINUX_NATIVE_AIO)
ret = os_aio_linux_handle(segment, &fil_node,
&message, &type);
#else
ret = 0; /* Eliminate compiler warning */
ut_error;

View file

@ -9573,6 +9573,11 @@ static MYSQL_SYSVAR_STR(version, innodb_version_str,
PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_READONLY,
"InnoDB version", NULL, NULL, INNODB_VERSION_STR);
static MYSQL_SYSVAR_BOOL(use_native_aio, srv_use_native_aio,
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
"Use native AIO if supported on this platform.",
NULL, NULL, TRUE);
static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(additional_mem_pool_size),
MYSQL_SYSVAR(autoextend_increment),
@ -9619,6 +9624,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(thread_sleep_delay),
MYSQL_SYSVAR(autoinc_lock_mode),
MYSQL_SYSVAR(version),
MYSQL_SYSVAR(use_native_aio),
NULL
};

View file

@ -51,12 +51,6 @@ typedef int os_file_t;
extern ulint os_innodb_umask;
/* If this flag is TRUE, then we will use the native aio of the
OS (provided we compiled Innobase with it in), otherwise we will
use simulated aio we build below with threads */
extern ibool os_aio_use_native_aio;
#define OS_FILE_SECTOR_SIZE 512
/* The next value should be smaller or equal to the smallest sector size used
@ -98,6 +92,7 @@ log. */
to become available again */
#define OS_FILE_SHARING_VIOLATION 76
#define OS_FILE_ERROR_NOT_SPECIFIED 77
#define OS_FILE_AIO_INTERRUPTED 78
/* Types for aio operations */
#define OS_FILE_READ 10
@ -556,9 +551,10 @@ in the three first aio arrays is the parameter n_segments given to the
function. The caller must create an i/o handler thread for each segment in
the four first arrays, but not for the sync aio array. */
UNIV_INTERN
void
ibool
os_aio_init(
/*========*/
/* out: TRUE on success. */
ulint n, /* in: maximum number of pending aio operations
allowed; n must be divisible by n_segments */
ulint n_segments, /* in: combined number of segments in the four
@ -737,4 +733,32 @@ innobase_mysql_tmpfile(void);
/* out: temporary file descriptor, or < 0 on error */
#endif /* !UNIV_HOTBACKUP && !__NETWARE__ */
#if defined(LINUX_NATIVE_AIO)
/**************************************************************************
This function is only used in Linux native asynchronous i/o.
Waits for an aio operation to complete. This function is used to wait the
for completed requests. The aio array of pending requests is divided
into segments. The thread specifies which segment or slot it wants to wait
for. NOTE: this function will also take care of freeing the aio slot,
therefore no other thread is allowed to do the freeing! */
UNIV_INTERN
ibool
os_aio_linux_handle(
/*================*/
/* out: TRUE if the IO was successful */
ulint global_seg, /* in: segment number in the aio array
to wait for; segment 0 is the ibuf
i/o thread, segment 1 is log i/o thread,
then follow the non-ibuf read threads,
and the last are the non-ibuf write
threads. */
fil_node_t**message1, /* out: the messages passed with the */
void** message2, /* aio request; note that in case the
aio operation failed, these output
parameters are valid and can be used to
restart the operation. */
ulint* type); /* out: OS_FILE_WRITE or ..._READ */
#endif /* LINUX_NATIVE_AIO */
#endif

View file

@ -68,6 +68,11 @@ extern ulint srv_check_file_format_at_startup;
on duplicate key checking and foreign key checking */
extern ibool srv_locks_unsafe_for_binlog;
/* If this flag is TRUE, then we will use the native aio of the
OS (provided we compiled Innobase with it in), otherwise we will
use simulated aio we build below with threads.
Currently we support native aio on windows and linux */
extern my_bool srv_use_native_aio;
extern ulint srv_n_data_files;
extern char** srv_data_file_names;
extern ulint* srv_data_file_sizes;

View file

@ -162,6 +162,9 @@ operations (very slow); also UNIV_DEBUG must be defined */
for compressed pages */
#define UNIV_ZIP_COPY /* call page_zip_copy_recs()
more often */
#define UNIV_AIO_DEBUG /* prints info about
submitted and reaped AIO
requests to the log. */
#endif
#define UNIV_BTR_DEBUG /* check B-tree links */

File diff suppressed because it is too large Load diff

View file

@ -12,6 +12,14 @@ MYSQL_PLUGIN_ACTIONS(innobase, [
AC_C_BIGENDIAN
case "$target_os" in
lin*)
AC_CHECK_HEADER(libaio.h,
AC_CHECK_LIB(aio, io_setup,
LIBS="$LIBS -laio"
AC_DEFINE(LINUX_NATIVE_AIO, [1],
[Linux native async I/O support]),
AC_MSG_WARN([No Linux native async I/O])),
AC_MSG_WARN([No Linux native async I/O]))
CFLAGS="$CFLAGS -DUNIV_LINUX";;
hpux10*)
CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE -DUNIV_HPUX -DUNIV_HPUX10";;

View file

@ -102,6 +102,12 @@ UNIV_INTERN ulint srv_check_file_format_at_startup = DICT_TF_FORMAT_MAX;
on duplicate key checking and foreign key checking */
UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE;
/* If this flag is TRUE, then we will use the native aio of the
OS (provided we compiled Innobase with it in), otherwise we will
use simulated aio we build below with threads.
Currently we support native aio on windows and linux */
UNIV_INTERN my_bool srv_use_native_aio = TRUE;
UNIV_INTERN ulint srv_n_data_files = 0;
UNIV_INTERN char** srv_data_file_names = NULL;
/* size in database pages */

View file

@ -969,6 +969,7 @@ innobase_start_or_create_for_mysql(void)
ibool log_file_created;
ibool log_created = FALSE;
ibool log_opened = FALSE;
ibool success;
ib_uint64_t min_flushed_lsn;
ib_uint64_t max_flushed_lsn;
#ifdef UNIV_LOG_ARCHIVE
@ -1071,7 +1072,6 @@ innobase_start_or_create_for_mysql(void)
srv_is_being_started = TRUE;
srv_startup_is_before_trx_rollback_phase = TRUE;
os_aio_use_native_aio = FALSE;
#ifdef __WIN__
if (os_get_os_version() == OS_WIN95
@ -1083,12 +1083,30 @@ innobase_start_or_create_for_mysql(void)
but when run in conjunction with InnoDB Hot Backup, it seemed
to corrupt the data files. */
os_aio_use_native_aio = FALSE;
srv_use_native_aio = FALSE;
} else {
/* On Win 2000 and XP use async i/o */
os_aio_use_native_aio = TRUE;
srv_use_native_aio = TRUE;
}
#elif defined(LINUX_NATIVE_AIO)
if (srv_use_native_aio) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Using Linux native AIO\n");
}
#else
/* Currently native AIO is supported only on windows and linux
and that also when the support is compiled in. In all other
cases, we ignore the setting of innodb_use_native_aio. */
/* TODO: comment this out after internal testing. */
fprintf(stderr, "Ignoring innodb_use_native_aio\n");
srv_use_native_aio = FALSE;
#endif
if (srv_file_flush_method_str == NULL) {
/* These are the default options */
@ -1113,11 +1131,11 @@ innobase_start_or_create_for_mysql(void)
#else
} else if (0 == ut_strcmp(srv_file_flush_method_str, "normal")) {
srv_win_file_flush_method = SRV_WIN_IO_NORMAL;
os_aio_use_native_aio = FALSE;
srv_use_native_aio = FALSE;
} else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) {
srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
os_aio_use_native_aio = FALSE;
srv_use_native_aio = FALSE;
} else if (0 == ut_strcmp(srv_file_flush_method_str,
"async_unbuffered")) {
@ -1210,19 +1228,38 @@ innobase_start_or_create_for_mysql(void)
srv_n_file_io_threads = SRV_MAX_N_IO_THREADS;
}
if (!os_aio_use_native_aio) {
if (!srv_use_native_aio) {
/* In simulated aio we currently have use only for 4 threads */
srv_n_file_io_threads = 4;
os_aio_init(8 * SRV_N_PENDING_IOS_PER_THREAD
* srv_n_file_io_threads,
srv_n_file_io_threads,
SRV_MAX_N_PENDING_SYNC_IOS);
success = os_aio_init(8 * SRV_N_PENDING_IOS_PER_THREAD *
srv_n_file_io_threads,
srv_n_file_io_threads,
SRV_MAX_N_PENDING_SYNC_IOS);
if (!success) {
return(DB_ERROR);
}
} else {
os_aio_init(SRV_N_PENDING_IOS_PER_THREAD
* srv_n_file_io_threads,
srv_n_file_io_threads,
SRV_MAX_N_PENDING_SYNC_IOS);
/* Windows has a pending IO per thread limit.
Linux does not have any such restriction.
The question of what should be segment size
is a trade off. The larger size means longer
linear searches through the array and a smaller
value can lead to array being full, causing
unnecessary delays. The following value
for Linux is fairly arbitrary and needs to be
tested and tuned. */
success = os_aio_init(
#if defined(LINUX_NATIVE_AIO)
8 *
#endif /* LINUX_NATIVE_AIO */
SRV_N_PENDING_IOS_PER_THREAD *
srv_n_file_io_threads,
srv_n_file_io_threads,
SRV_MAX_N_PENDING_SYNC_IOS);
if (!success) {
return(DB_ERROR);
}
}
fil_init(srv_max_n_open_files);