mariadb/innobase/os/os0sync.c
heikki@hundin.mysql.fi 2a1f87ff72 Many files:
Remove potential starvation of a full log buffer flush: only flush up to the lsn which was the largest at the time when we requested the full log buffer flush
os0sync.h, os0sync.c:
  Fix a bug in os_event on Unix: even though we signaled the event, some threads could continue waiting if the event became nonsignaled quickly again; this made group commit less efficient than it should be
2003-07-03 21:25:55 +03:00

648 lines
14 KiB
C

/******************************************************
The interface to the operating system
synchronization primitives.
(c) 1995 Innobase Oy
Created 9/6/1995 Heikki Tuuri
*******************************************************/
#include "os0sync.h"
#ifdef UNIV_NONINL
#include "os0sync.ic"
#endif
#ifdef __WIN__
#include <windows.h>
#endif
#include "ut0mem.h"
#include "srv0start.h"
/* Type definition for an operating system mutex struct */
struct os_mutex_struct{
void* handle; /* OS handle to mutex */
ulint count; /* we use this counter to check
that the same thread does not
recursively lock the mutex: we
do not assume that the OS mutex
supports recursive locking, though
NT seems to do that */
UT_LIST_NODE_T(os_mutex_str_t) os_mutex_list;
/* list of all 'slow' OS mutexes created */
};
/* Mutex protecting counts and the lists of OS mutexes and events */
os_mutex_t os_sync_mutex;
ibool os_sync_mutex_inited = FALSE;
/* This is incremented by 1 in os_thread_create and decremented by 1 in
os_thread_exit */
ulint os_thread_count = 0;
/* The list of all events created */
UT_LIST_BASE_NODE_T(os_event_struct_t) os_event_list;
/* The list of all OS 'slow' mutexes */
UT_LIST_BASE_NODE_T(os_mutex_str_t) os_mutex_list;
ulint os_event_count = 0;
ulint os_mutex_count = 0;
ulint os_fast_mutex_count = 0;
/*************************************************************
Initializes global event and OS 'slow' mutex lists. */
void
os_sync_init(void)
/*==============*/
{
UT_LIST_INIT(os_event_list);
UT_LIST_INIT(os_mutex_list);
os_sync_mutex = os_mutex_create(NULL);
os_sync_mutex_inited = TRUE;
}
/*************************************************************
Frees created events and OS 'slow' mutexes. */
void
os_sync_free(void)
/*==============*/
{
os_event_t event;
os_mutex_t mutex;
event = UT_LIST_GET_FIRST(os_event_list);
while (event) {
os_event_free(event);
event = UT_LIST_GET_FIRST(os_event_list);
}
mutex = UT_LIST_GET_FIRST(os_mutex_list);
while (mutex) {
if (mutex == os_sync_mutex) {
/* Set the flag to FALSE so that we do not try to
reserve os_sync_mutex any more in remaining freeing
operations in shutdown */
os_sync_mutex_inited = FALSE;
}
os_mutex_free(mutex);
mutex = UT_LIST_GET_FIRST(os_mutex_list);
}
}
/*************************************************************
Creates an event semaphore, i.e., a semaphore which may just have two
states: signaled and nonsignaled. The created event is manual reset: it
must be reset explicitly by calling sync_os_reset_event. */
os_event_t
os_event_create(
/*============*/
/* out: the event handle */
char* name) /* in: the name of the event, if NULL
the event is created without a name */
{
#ifdef __WIN__
os_event_t event;
event = ut_malloc(sizeof(struct os_event_struct));
event->handle = CreateEvent(NULL,/* No security attributes */
TRUE, /* Manual reset */
FALSE, /* Initial state nonsignaled */
name);
if (!event->handle) {
fprintf(stderr,
"InnoDB: Could not create a Windows event semaphore; Windows error %lu\n",
(ulint)GetLastError());
}
#else /* Unix */
os_event_t event;
UT_NOT_USED(name);
event = ut_malloc(sizeof(struct os_event_struct));
os_fast_mutex_init(&(event->os_mutex));
#if defined(UNIV_HOTBACKUP) && defined(UNIV_HPUX10)
ut_a(0 == pthread_cond_init(&(event->cond_var),
pthread_condattr_default));
#else
ut_a(0 == pthread_cond_init(&(event->cond_var), NULL));
#endif
event->is_set = FALSE;
event->signal_count = 0;
#endif /* __WIN__ */
/* Put to the list of events */
os_mutex_enter(os_sync_mutex);
UT_LIST_ADD_FIRST(os_event_list, os_event_list, event);
os_event_count++;
os_mutex_exit(os_sync_mutex);
return(event);
}
#ifdef __WIN__
/*************************************************************
Creates an auto-reset event semaphore, i.e., an event which is automatically
reset when a single thread is released. Works only in Windows. */
os_event_t
os_event_create_auto(
/*=================*/
/* out: the event handle */
char* name) /* in: the name of the event, if NULL
the event is created without a name */
{
os_event_t event;
event = ut_malloc(sizeof(struct os_event_struct));
event->handle = CreateEvent(NULL,/* No security attributes */
FALSE, /* Auto-reset */
FALSE, /* Initial state nonsignaled */
name);
if (!event->handle) {
fprintf(stderr,
"InnoDB: Could not create a Windows auto event semaphore; Windows error %lu\n",
(ulint)GetLastError());
}
/* Put to the list of events */
os_mutex_enter(os_sync_mutex);
UT_LIST_ADD_FIRST(os_event_list, os_event_list, event);
os_event_count++;
os_mutex_exit(os_sync_mutex);
return(event);
}
#endif
/**************************************************************
Sets an event semaphore to the signaled state: lets waiting threads
proceed. */
void
os_event_set(
/*=========*/
os_event_t event) /* in: event to set */
{
#ifdef __WIN__
ut_a(event);
ut_a(SetEvent(event->handle));
#else
ut_a(event);
os_fast_mutex_lock(&(event->os_mutex));
if (event->is_set) {
/* Do nothing */
} else {
event->is_set = TRUE;
event->signal_count += 1;
ut_a(0 == pthread_cond_broadcast(&(event->cond_var)));
}
os_fast_mutex_unlock(&(event->os_mutex));
#endif
}
/**************************************************************
Resets an event semaphore to the nonsignaled state. Waiting threads will
stop to wait for the event. */
void
os_event_reset(
/*===========*/
os_event_t event) /* in: event to reset */
{
#ifdef __WIN__
ut_a(event);
ut_a(ResetEvent(event->handle));
#else
ut_a(event);
os_fast_mutex_lock(&(event->os_mutex));
if (!event->is_set) {
/* Do nothing */
} else {
event->is_set = FALSE;
}
os_fast_mutex_unlock(&(event->os_mutex));
#endif
}
/**************************************************************
Frees an event object. */
void
os_event_free(
/*==========*/
os_event_t event) /* in: event to free */
{
#ifdef __WIN__
ut_a(event);
ut_a(CloseHandle(event->handle));
#else
ut_a(event);
os_fast_mutex_free(&(event->os_mutex));
ut_a(0 == pthread_cond_destroy(&(event->cond_var)));
#endif
/* Remove from the list of events */
os_mutex_enter(os_sync_mutex);
UT_LIST_REMOVE(os_event_list, os_event_list, event);
os_event_count--;
os_mutex_exit(os_sync_mutex);
ut_free(event);
}
/**************************************************************
Waits for an event object until it is in the signaled state. If
srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS this also exits the
waiting thread when the event becomes signaled (or immediately if the
event is already in the signaled state). */
void
os_event_wait(
/*==========*/
os_event_t event) /* in: event to wait */
{
#ifdef __WIN__
DWORD err;
ut_a(event);
/* Specify an infinite time limit for waiting */
err = WaitForSingleObject(event->handle, INFINITE);
ut_a(err == WAIT_OBJECT_0);
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
os_thread_exit(NULL);
}
#else
ib_longlong old_signal_count;
os_fast_mutex_lock(&(event->os_mutex));
old_signal_count = event->signal_count;
loop:
if (event->is_set == TRUE
|| event->signal_count != old_signal_count) {
os_fast_mutex_unlock(&(event->os_mutex));
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
os_thread_exit(NULL);
}
/* Ok, we may return */
return;
}
pthread_cond_wait(&(event->cond_var), &(event->os_mutex));
/* Solaris manual said that spurious wakeups may occur: we have to
check if the event really has been signaled after we came here to
wait */
goto loop;
#endif
}
/**************************************************************
Waits for an event object until it is in the signaled state or
a timeout is exceeded. In Unix the timeout is always infinite. */
ulint
os_event_wait_time(
/*===============*/
/* out: 0 if success, OS_SYNC_TIME_EXCEEDED if
timeout was exceeded */
os_event_t event, /* in: event to wait */
ulint time) /* in: timeout in microseconds, or
OS_SYNC_INFINITE_TIME */
{
#ifdef __WIN__
DWORD err;
ut_a(event);
if (time != OS_SYNC_INFINITE_TIME) {
err = WaitForSingleObject(event->handle, time / 1000);
} else {
err = WaitForSingleObject(event->handle, INFINITE);
}
if (err == WAIT_OBJECT_0) {
return(0);
} else if (err == WAIT_TIMEOUT) {
return(OS_SYNC_TIME_EXCEEDED);
} else {
ut_error;
return(1000000); /* dummy value to eliminate compiler warn. */
}
#else
UT_NOT_USED(time);
/* In Posix this is just an ordinary, infinite wait */
os_event_wait(event);
return(0);
#endif
}
#ifdef __WIN__
/**************************************************************
Waits for any event in an OS native event array. Returns if even a single
one is signaled or becomes signaled. */
ulint
os_event_wait_multiple(
/*===================*/
/* out: index of the event
which was signaled */
ulint n, /* in: number of events in the
array */
os_native_event_t* native_event_array)
/* in: pointer to an array of event
handles */
{
DWORD index;
ut_a(native_event_array);
ut_a(n > 0);
index = WaitForMultipleObjects(n, native_event_array,
FALSE, /* Wait for any 1 event */
INFINITE); /* Infinite wait time
limit */
ut_a(index >= WAIT_OBJECT_0);
ut_a(index < WAIT_OBJECT_0 + n);
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
os_thread_exit(NULL);
}
return(index - WAIT_OBJECT_0);
}
#endif
/*************************************************************
Creates an operating system mutex semaphore. Because these are slow, the
mutex semaphore of InnoDB itself (mutex_t) should be used where possible. */
os_mutex_t
os_mutex_create(
/*============*/
/* out: the mutex handle */
char* name) /* in: the name of the mutex, if NULL
the mutex is created without a name */
{
#ifdef __WIN__
HANDLE mutex;
os_mutex_t mutex_str;
mutex = CreateMutex(NULL, /* No security attributes */
FALSE, /* Initial state: no owner */
name);
ut_a(mutex);
#else
os_fast_mutex_t* mutex;
os_mutex_t mutex_str;
UT_NOT_USED(name);
mutex = ut_malloc(sizeof(os_fast_mutex_t));
os_fast_mutex_init(mutex);
#endif
mutex_str = ut_malloc(sizeof(os_mutex_str_t));
mutex_str->handle = mutex;
mutex_str->count = 0;
if (os_sync_mutex_inited) {
/* When creating os_sync_mutex itself we cannot reserve it */
os_mutex_enter(os_sync_mutex);
}
UT_LIST_ADD_FIRST(os_mutex_list, os_mutex_list, mutex_str);
os_mutex_count++;
if (os_sync_mutex_inited) {
os_mutex_exit(os_sync_mutex);
}
return(mutex_str);
}
/**************************************************************
Acquires ownership of a mutex semaphore. */
void
os_mutex_enter(
/*===========*/
os_mutex_t mutex) /* in: mutex to acquire */
{
#ifdef __WIN__
DWORD err;
ut_a(mutex);
/* Specify infinite time limit for waiting */
err = WaitForSingleObject(mutex->handle, INFINITE);
ut_a(err == WAIT_OBJECT_0);
(mutex->count)++;
ut_a(mutex->count == 1);
#else
os_fast_mutex_lock(mutex->handle);
(mutex->count)++;
ut_a(mutex->count == 1);
#endif
}
/**************************************************************
Releases ownership of a mutex. */
void
os_mutex_exit(
/*==========*/
os_mutex_t mutex) /* in: mutex to release */
{
ut_a(mutex);
ut_a(mutex->count == 1);
(mutex->count)--;
#ifdef __WIN__
ut_a(ReleaseMutex(mutex->handle));
#else
os_fast_mutex_unlock(mutex->handle);
#endif
}
/**************************************************************
Frees a mutex object. */
void
os_mutex_free(
/*==========*/
os_mutex_t mutex) /* in: mutex to free */
{
ut_a(mutex);
if (os_sync_mutex_inited) {
os_mutex_enter(os_sync_mutex);
}
UT_LIST_REMOVE(os_mutex_list, os_mutex_list, mutex);
os_mutex_count--;
if (os_sync_mutex_inited) {
os_mutex_exit(os_sync_mutex);
}
#ifdef __WIN__
ut_a(CloseHandle(mutex->handle));
ut_free(mutex);
#else
os_fast_mutex_free(mutex->handle);
ut_free(mutex->handle);
ut_free(mutex);
#endif
}
/*************************************************************
Initializes an operating system fast mutex semaphore. */
void
os_fast_mutex_init(
/*===============*/
os_fast_mutex_t* fast_mutex) /* in: fast mutex */
{
#ifdef __WIN__
ut_a(fast_mutex);
InitializeCriticalSection((LPCRITICAL_SECTION) fast_mutex);
#else
#if defined(UNIV_HOTBACKUP) && defined(UNIV_HPUX10)
ut_a(0 == pthread_mutex_init(fast_mutex, pthread_mutexattr_default));
#else
ut_a(0 == pthread_mutex_init(fast_mutex, MY_MUTEX_INIT_FAST));
#endif
#endif
if (os_sync_mutex_inited) {
/* When creating os_sync_mutex itself (in Unix) we cannot
reserve it */
os_mutex_enter(os_sync_mutex);
}
os_fast_mutex_count++;
if (os_sync_mutex_inited) {
os_mutex_exit(os_sync_mutex);
}
}
/**************************************************************
Acquires ownership of a fast mutex. */
void
os_fast_mutex_lock(
/*===============*/
os_fast_mutex_t* fast_mutex) /* in: mutex to acquire */
{
#ifdef __WIN__
EnterCriticalSection((LPCRITICAL_SECTION) fast_mutex);
#else
pthread_mutex_lock(fast_mutex);
#endif
}
/**************************************************************
Releases ownership of a fast mutex. */
void
os_fast_mutex_unlock(
/*=================*/
os_fast_mutex_t* fast_mutex) /* in: mutex to release */
{
#ifdef __WIN__
LeaveCriticalSection(fast_mutex);
#else
pthread_mutex_unlock(fast_mutex);
#endif
}
/**************************************************************
Frees a mutex object. */
void
os_fast_mutex_free(
/*===============*/
os_fast_mutex_t* fast_mutex) /* in: mutex to free */
{
#ifdef __WIN__
ut_a(fast_mutex);
DeleteCriticalSection((LPCRITICAL_SECTION) fast_mutex);
#else
ut_a(0 == pthread_mutex_destroy(fast_mutex));
#endif
if (os_sync_mutex_inited) {
/* When freeing the last mutexes, we have
already freed os_sync_mutex */
os_mutex_enter(os_sync_mutex);
}
os_fast_mutex_count--;
if (os_sync_mutex_inited) {
os_mutex_exit(os_sync_mutex);
}
}