mirror of
https://github.com/MariaDB/server.git
synced 2025-01-18 13:02:28 +01:00
2a1f87ff72
Remove potential starvation of a full log buffer flush: only flush up to the lsn which was the largest at the time when we requested the full log buffer flush os0sync.h, os0sync.c: Fix a bug in os_event on Unix: even though we signaled the event, some threads could continue waiting if the event became nonsignaled quickly again; this made group commit less efficient than it should be
648 lines
14 KiB
C
648 lines
14 KiB
C
/******************************************************
|
|
The interface to the operating system
|
|
synchronization primitives.
|
|
|
|
(c) 1995 Innobase Oy
|
|
|
|
Created 9/6/1995 Heikki Tuuri
|
|
*******************************************************/
|
|
|
|
#include "os0sync.h"
|
|
#ifdef UNIV_NONINL
|
|
#include "os0sync.ic"
|
|
#endif
|
|
|
|
#ifdef __WIN__
|
|
#include <windows.h>
|
|
#endif
|
|
|
|
#include "ut0mem.h"
|
|
#include "srv0start.h"
|
|
|
|
/* Type definition for an operating system mutex struct */
|
|
struct os_mutex_struct{
|
|
void* handle; /* OS handle to mutex */
|
|
ulint count; /* we use this counter to check
|
|
that the same thread does not
|
|
recursively lock the mutex: we
|
|
do not assume that the OS mutex
|
|
supports recursive locking, though
|
|
NT seems to do that */
|
|
UT_LIST_NODE_T(os_mutex_str_t) os_mutex_list;
|
|
/* list of all 'slow' OS mutexes created */
|
|
};
|
|
|
|
/* Mutex protecting counts and the lists of OS mutexes and events */
|
|
os_mutex_t os_sync_mutex;
|
|
ibool os_sync_mutex_inited = FALSE;
|
|
|
|
/* This is incremented by 1 in os_thread_create and decremented by 1 in
|
|
os_thread_exit */
|
|
ulint os_thread_count = 0;
|
|
|
|
/* The list of all events created */
|
|
UT_LIST_BASE_NODE_T(os_event_struct_t) os_event_list;
|
|
|
|
/* The list of all OS 'slow' mutexes */
|
|
UT_LIST_BASE_NODE_T(os_mutex_str_t) os_mutex_list;
|
|
|
|
ulint os_event_count = 0;
|
|
ulint os_mutex_count = 0;
|
|
ulint os_fast_mutex_count = 0;
|
|
|
|
|
|
/*************************************************************
|
|
Initializes global event and OS 'slow' mutex lists. */
|
|
|
|
void
|
|
os_sync_init(void)
|
|
/*==============*/
|
|
{
|
|
UT_LIST_INIT(os_event_list);
|
|
UT_LIST_INIT(os_mutex_list);
|
|
|
|
os_sync_mutex = os_mutex_create(NULL);
|
|
|
|
os_sync_mutex_inited = TRUE;
|
|
}
|
|
|
|
/*************************************************************
|
|
Frees created events and OS 'slow' mutexes. */
|
|
|
|
void
|
|
os_sync_free(void)
|
|
/*==============*/
|
|
{
|
|
os_event_t event;
|
|
os_mutex_t mutex;
|
|
|
|
event = UT_LIST_GET_FIRST(os_event_list);
|
|
|
|
while (event) {
|
|
|
|
os_event_free(event);
|
|
|
|
event = UT_LIST_GET_FIRST(os_event_list);
|
|
}
|
|
|
|
mutex = UT_LIST_GET_FIRST(os_mutex_list);
|
|
|
|
while (mutex) {
|
|
if (mutex == os_sync_mutex) {
|
|
/* Set the flag to FALSE so that we do not try to
|
|
reserve os_sync_mutex any more in remaining freeing
|
|
operations in shutdown */
|
|
os_sync_mutex_inited = FALSE;
|
|
}
|
|
|
|
os_mutex_free(mutex);
|
|
|
|
mutex = UT_LIST_GET_FIRST(os_mutex_list);
|
|
}
|
|
}
|
|
|
|
/*************************************************************
|
|
Creates an event semaphore, i.e., a semaphore which may just have two
|
|
states: signaled and nonsignaled. The created event is manual reset: it
|
|
must be reset explicitly by calling sync_os_reset_event. */
|
|
|
|
os_event_t
|
|
os_event_create(
|
|
/*============*/
|
|
/* out: the event handle */
|
|
char* name) /* in: the name of the event, if NULL
|
|
the event is created without a name */
|
|
{
|
|
#ifdef __WIN__
|
|
os_event_t event;
|
|
|
|
event = ut_malloc(sizeof(struct os_event_struct));
|
|
|
|
event->handle = CreateEvent(NULL,/* No security attributes */
|
|
TRUE, /* Manual reset */
|
|
FALSE, /* Initial state nonsignaled */
|
|
name);
|
|
if (!event->handle) {
|
|
fprintf(stderr,
|
|
"InnoDB: Could not create a Windows event semaphore; Windows error %lu\n",
|
|
(ulint)GetLastError());
|
|
}
|
|
#else /* Unix */
|
|
os_event_t event;
|
|
|
|
UT_NOT_USED(name);
|
|
|
|
event = ut_malloc(sizeof(struct os_event_struct));
|
|
|
|
os_fast_mutex_init(&(event->os_mutex));
|
|
|
|
#if defined(UNIV_HOTBACKUP) && defined(UNIV_HPUX10)
|
|
ut_a(0 == pthread_cond_init(&(event->cond_var),
|
|
pthread_condattr_default));
|
|
#else
|
|
ut_a(0 == pthread_cond_init(&(event->cond_var), NULL));
|
|
#endif
|
|
event->is_set = FALSE;
|
|
event->signal_count = 0;
|
|
#endif /* __WIN__ */
|
|
|
|
/* Put to the list of events */
|
|
os_mutex_enter(os_sync_mutex);
|
|
|
|
UT_LIST_ADD_FIRST(os_event_list, os_event_list, event);
|
|
|
|
os_event_count++;
|
|
|
|
os_mutex_exit(os_sync_mutex);
|
|
|
|
return(event);
|
|
}
|
|
|
|
#ifdef __WIN__
|
|
/*************************************************************
|
|
Creates an auto-reset event semaphore, i.e., an event which is automatically
|
|
reset when a single thread is released. Works only in Windows. */
|
|
|
|
os_event_t
|
|
os_event_create_auto(
|
|
/*=================*/
|
|
/* out: the event handle */
|
|
char* name) /* in: the name of the event, if NULL
|
|
the event is created without a name */
|
|
{
|
|
os_event_t event;
|
|
|
|
event = ut_malloc(sizeof(struct os_event_struct));
|
|
|
|
event->handle = CreateEvent(NULL,/* No security attributes */
|
|
FALSE, /* Auto-reset */
|
|
FALSE, /* Initial state nonsignaled */
|
|
name);
|
|
|
|
if (!event->handle) {
|
|
fprintf(stderr,
|
|
"InnoDB: Could not create a Windows auto event semaphore; Windows error %lu\n",
|
|
(ulint)GetLastError());
|
|
}
|
|
|
|
/* Put to the list of events */
|
|
os_mutex_enter(os_sync_mutex);
|
|
|
|
UT_LIST_ADD_FIRST(os_event_list, os_event_list, event);
|
|
|
|
os_event_count++;
|
|
|
|
os_mutex_exit(os_sync_mutex);
|
|
|
|
return(event);
|
|
}
|
|
#endif
|
|
|
|
/**************************************************************
|
|
Sets an event semaphore to the signaled state: lets waiting threads
|
|
proceed. */
|
|
|
|
void
|
|
os_event_set(
|
|
/*=========*/
|
|
os_event_t event) /* in: event to set */
|
|
{
|
|
#ifdef __WIN__
|
|
ut_a(event);
|
|
ut_a(SetEvent(event->handle));
|
|
#else
|
|
ut_a(event);
|
|
|
|
os_fast_mutex_lock(&(event->os_mutex));
|
|
|
|
if (event->is_set) {
|
|
/* Do nothing */
|
|
} else {
|
|
event->is_set = TRUE;
|
|
event->signal_count += 1;
|
|
ut_a(0 == pthread_cond_broadcast(&(event->cond_var)));
|
|
}
|
|
|
|
os_fast_mutex_unlock(&(event->os_mutex));
|
|
#endif
|
|
}
|
|
|
|
/**************************************************************
|
|
Resets an event semaphore to the nonsignaled state. Waiting threads will
|
|
stop to wait for the event. */
|
|
|
|
void
|
|
os_event_reset(
|
|
/*===========*/
|
|
os_event_t event) /* in: event to reset */
|
|
{
|
|
#ifdef __WIN__
|
|
ut_a(event);
|
|
|
|
ut_a(ResetEvent(event->handle));
|
|
#else
|
|
ut_a(event);
|
|
|
|
os_fast_mutex_lock(&(event->os_mutex));
|
|
|
|
if (!event->is_set) {
|
|
/* Do nothing */
|
|
} else {
|
|
event->is_set = FALSE;
|
|
}
|
|
|
|
os_fast_mutex_unlock(&(event->os_mutex));
|
|
#endif
|
|
}
|
|
|
|
/**************************************************************
|
|
Frees an event object. */
|
|
|
|
void
|
|
os_event_free(
|
|
/*==========*/
|
|
os_event_t event) /* in: event to free */
|
|
|
|
{
|
|
#ifdef __WIN__
|
|
ut_a(event);
|
|
|
|
ut_a(CloseHandle(event->handle));
|
|
#else
|
|
ut_a(event);
|
|
|
|
os_fast_mutex_free(&(event->os_mutex));
|
|
ut_a(0 == pthread_cond_destroy(&(event->cond_var)));
|
|
#endif
|
|
/* Remove from the list of events */
|
|
|
|
os_mutex_enter(os_sync_mutex);
|
|
|
|
UT_LIST_REMOVE(os_event_list, os_event_list, event);
|
|
|
|
os_event_count--;
|
|
|
|
os_mutex_exit(os_sync_mutex);
|
|
|
|
ut_free(event);
|
|
}
|
|
|
|
/**************************************************************
|
|
Waits for an event object until it is in the signaled state. If
|
|
srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS this also exits the
|
|
waiting thread when the event becomes signaled (or immediately if the
|
|
event is already in the signaled state). */
|
|
|
|
void
|
|
os_event_wait(
|
|
/*==========*/
|
|
os_event_t event) /* in: event to wait */
|
|
{
|
|
#ifdef __WIN__
|
|
DWORD err;
|
|
|
|
ut_a(event);
|
|
|
|
/* Specify an infinite time limit for waiting */
|
|
err = WaitForSingleObject(event->handle, INFINITE);
|
|
|
|
ut_a(err == WAIT_OBJECT_0);
|
|
|
|
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
|
|
os_thread_exit(NULL);
|
|
}
|
|
#else
|
|
ib_longlong old_signal_count;
|
|
|
|
os_fast_mutex_lock(&(event->os_mutex));
|
|
|
|
old_signal_count = event->signal_count;
|
|
loop:
|
|
if (event->is_set == TRUE
|
|
|| event->signal_count != old_signal_count) {
|
|
|
|
os_fast_mutex_unlock(&(event->os_mutex));
|
|
|
|
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
|
|
|
|
os_thread_exit(NULL);
|
|
}
|
|
/* Ok, we may return */
|
|
|
|
return;
|
|
}
|
|
|
|
pthread_cond_wait(&(event->cond_var), &(event->os_mutex));
|
|
|
|
/* Solaris manual said that spurious wakeups may occur: we have to
|
|
check if the event really has been signaled after we came here to
|
|
wait */
|
|
|
|
goto loop;
|
|
#endif
|
|
}
|
|
|
|
/**************************************************************
|
|
Waits for an event object until it is in the signaled state or
|
|
a timeout is exceeded. In Unix the timeout is always infinite. */
|
|
|
|
ulint
|
|
os_event_wait_time(
|
|
/*===============*/
|
|
/* out: 0 if success, OS_SYNC_TIME_EXCEEDED if
|
|
timeout was exceeded */
|
|
os_event_t event, /* in: event to wait */
|
|
ulint time) /* in: timeout in microseconds, or
|
|
OS_SYNC_INFINITE_TIME */
|
|
{
|
|
#ifdef __WIN__
|
|
DWORD err;
|
|
|
|
ut_a(event);
|
|
|
|
if (time != OS_SYNC_INFINITE_TIME) {
|
|
err = WaitForSingleObject(event->handle, time / 1000);
|
|
} else {
|
|
err = WaitForSingleObject(event->handle, INFINITE);
|
|
}
|
|
|
|
if (err == WAIT_OBJECT_0) {
|
|
|
|
return(0);
|
|
} else if (err == WAIT_TIMEOUT) {
|
|
|
|
return(OS_SYNC_TIME_EXCEEDED);
|
|
} else {
|
|
ut_error;
|
|
return(1000000); /* dummy value to eliminate compiler warn. */
|
|
}
|
|
#else
|
|
UT_NOT_USED(time);
|
|
|
|
/* In Posix this is just an ordinary, infinite wait */
|
|
|
|
os_event_wait(event);
|
|
|
|
return(0);
|
|
#endif
|
|
}
|
|
|
|
#ifdef __WIN__
|
|
/**************************************************************
|
|
Waits for any event in an OS native event array. Returns if even a single
|
|
one is signaled or becomes signaled. */
|
|
|
|
ulint
|
|
os_event_wait_multiple(
|
|
/*===================*/
|
|
/* out: index of the event
|
|
which was signaled */
|
|
ulint n, /* in: number of events in the
|
|
array */
|
|
os_native_event_t* native_event_array)
|
|
/* in: pointer to an array of event
|
|
handles */
|
|
{
|
|
DWORD index;
|
|
|
|
ut_a(native_event_array);
|
|
ut_a(n > 0);
|
|
|
|
index = WaitForMultipleObjects(n, native_event_array,
|
|
FALSE, /* Wait for any 1 event */
|
|
INFINITE); /* Infinite wait time
|
|
limit */
|
|
ut_a(index >= WAIT_OBJECT_0);
|
|
ut_a(index < WAIT_OBJECT_0 + n);
|
|
|
|
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
|
|
os_thread_exit(NULL);
|
|
}
|
|
|
|
return(index - WAIT_OBJECT_0);
|
|
}
|
|
#endif
|
|
|
|
/*************************************************************
|
|
Creates an operating system mutex semaphore. Because these are slow, the
|
|
mutex semaphore of InnoDB itself (mutex_t) should be used where possible. */
|
|
|
|
os_mutex_t
|
|
os_mutex_create(
|
|
/*============*/
|
|
/* out: the mutex handle */
|
|
char* name) /* in: the name of the mutex, if NULL
|
|
the mutex is created without a name */
|
|
{
|
|
#ifdef __WIN__
|
|
HANDLE mutex;
|
|
os_mutex_t mutex_str;
|
|
|
|
mutex = CreateMutex(NULL, /* No security attributes */
|
|
FALSE, /* Initial state: no owner */
|
|
name);
|
|
ut_a(mutex);
|
|
#else
|
|
os_fast_mutex_t* mutex;
|
|
os_mutex_t mutex_str;
|
|
|
|
UT_NOT_USED(name);
|
|
|
|
mutex = ut_malloc(sizeof(os_fast_mutex_t));
|
|
|
|
os_fast_mutex_init(mutex);
|
|
#endif
|
|
mutex_str = ut_malloc(sizeof(os_mutex_str_t));
|
|
|
|
mutex_str->handle = mutex;
|
|
mutex_str->count = 0;
|
|
|
|
if (os_sync_mutex_inited) {
|
|
/* When creating os_sync_mutex itself we cannot reserve it */
|
|
os_mutex_enter(os_sync_mutex);
|
|
}
|
|
|
|
UT_LIST_ADD_FIRST(os_mutex_list, os_mutex_list, mutex_str);
|
|
|
|
os_mutex_count++;
|
|
|
|
if (os_sync_mutex_inited) {
|
|
os_mutex_exit(os_sync_mutex);
|
|
}
|
|
|
|
return(mutex_str);
|
|
}
|
|
|
|
/**************************************************************
|
|
Acquires ownership of a mutex semaphore. */
|
|
|
|
void
|
|
os_mutex_enter(
|
|
/*===========*/
|
|
os_mutex_t mutex) /* in: mutex to acquire */
|
|
{
|
|
#ifdef __WIN__
|
|
DWORD err;
|
|
|
|
ut_a(mutex);
|
|
|
|
/* Specify infinite time limit for waiting */
|
|
err = WaitForSingleObject(mutex->handle, INFINITE);
|
|
|
|
ut_a(err == WAIT_OBJECT_0);
|
|
|
|
(mutex->count)++;
|
|
ut_a(mutex->count == 1);
|
|
#else
|
|
os_fast_mutex_lock(mutex->handle);
|
|
|
|
(mutex->count)++;
|
|
|
|
ut_a(mutex->count == 1);
|
|
#endif
|
|
}
|
|
|
|
/**************************************************************
|
|
Releases ownership of a mutex. */
|
|
|
|
void
|
|
os_mutex_exit(
|
|
/*==========*/
|
|
os_mutex_t mutex) /* in: mutex to release */
|
|
{
|
|
ut_a(mutex);
|
|
|
|
ut_a(mutex->count == 1);
|
|
|
|
(mutex->count)--;
|
|
#ifdef __WIN__
|
|
ut_a(ReleaseMutex(mutex->handle));
|
|
#else
|
|
os_fast_mutex_unlock(mutex->handle);
|
|
#endif
|
|
}
|
|
|
|
/**************************************************************
|
|
Frees a mutex object. */
|
|
|
|
void
|
|
os_mutex_free(
|
|
/*==========*/
|
|
os_mutex_t mutex) /* in: mutex to free */
|
|
{
|
|
ut_a(mutex);
|
|
|
|
if (os_sync_mutex_inited) {
|
|
os_mutex_enter(os_sync_mutex);
|
|
}
|
|
|
|
UT_LIST_REMOVE(os_mutex_list, os_mutex_list, mutex);
|
|
|
|
os_mutex_count--;
|
|
|
|
if (os_sync_mutex_inited) {
|
|
os_mutex_exit(os_sync_mutex);
|
|
}
|
|
|
|
#ifdef __WIN__
|
|
ut_a(CloseHandle(mutex->handle));
|
|
|
|
ut_free(mutex);
|
|
#else
|
|
os_fast_mutex_free(mutex->handle);
|
|
ut_free(mutex->handle);
|
|
ut_free(mutex);
|
|
#endif
|
|
}
|
|
|
|
/*************************************************************
|
|
Initializes an operating system fast mutex semaphore. */
|
|
|
|
void
|
|
os_fast_mutex_init(
|
|
/*===============*/
|
|
os_fast_mutex_t* fast_mutex) /* in: fast mutex */
|
|
{
|
|
#ifdef __WIN__
|
|
ut_a(fast_mutex);
|
|
|
|
InitializeCriticalSection((LPCRITICAL_SECTION) fast_mutex);
|
|
#else
|
|
#if defined(UNIV_HOTBACKUP) && defined(UNIV_HPUX10)
|
|
ut_a(0 == pthread_mutex_init(fast_mutex, pthread_mutexattr_default));
|
|
#else
|
|
ut_a(0 == pthread_mutex_init(fast_mutex, MY_MUTEX_INIT_FAST));
|
|
#endif
|
|
#endif
|
|
if (os_sync_mutex_inited) {
|
|
/* When creating os_sync_mutex itself (in Unix) we cannot
|
|
reserve it */
|
|
|
|
os_mutex_enter(os_sync_mutex);
|
|
}
|
|
|
|
os_fast_mutex_count++;
|
|
|
|
if (os_sync_mutex_inited) {
|
|
os_mutex_exit(os_sync_mutex);
|
|
}
|
|
}
|
|
|
|
/**************************************************************
|
|
Acquires ownership of a fast mutex. */
|
|
|
|
void
|
|
os_fast_mutex_lock(
|
|
/*===============*/
|
|
os_fast_mutex_t* fast_mutex) /* in: mutex to acquire */
|
|
{
|
|
#ifdef __WIN__
|
|
EnterCriticalSection((LPCRITICAL_SECTION) fast_mutex);
|
|
#else
|
|
pthread_mutex_lock(fast_mutex);
|
|
#endif
|
|
}
|
|
|
|
/**************************************************************
|
|
Releases ownership of a fast mutex. */
|
|
|
|
void
|
|
os_fast_mutex_unlock(
|
|
/*=================*/
|
|
os_fast_mutex_t* fast_mutex) /* in: mutex to release */
|
|
{
|
|
#ifdef __WIN__
|
|
LeaveCriticalSection(fast_mutex);
|
|
#else
|
|
pthread_mutex_unlock(fast_mutex);
|
|
#endif
|
|
}
|
|
|
|
/**************************************************************
|
|
Frees a mutex object. */
|
|
|
|
void
|
|
os_fast_mutex_free(
|
|
/*===============*/
|
|
os_fast_mutex_t* fast_mutex) /* in: mutex to free */
|
|
{
|
|
#ifdef __WIN__
|
|
ut_a(fast_mutex);
|
|
|
|
DeleteCriticalSection((LPCRITICAL_SECTION) fast_mutex);
|
|
#else
|
|
ut_a(0 == pthread_mutex_destroy(fast_mutex));
|
|
#endif
|
|
if (os_sync_mutex_inited) {
|
|
/* When freeing the last mutexes, we have
|
|
already freed os_sync_mutex */
|
|
|
|
os_mutex_enter(os_sync_mutex);
|
|
}
|
|
|
|
os_fast_mutex_count--;
|
|
|
|
if (os_sync_mutex_inited) {
|
|
os_mutex_exit(os_sync_mutex);
|
|
}
|
|
}
|