diff --git a/include/thr_alarm.h b/include/thr_alarm.h index 439f046252f..8ff4472f700 100644 --- a/include/thr_alarm.h +++ b/include/thr_alarm.h @@ -100,7 +100,7 @@ typedef struct st_alarm { #define thr_alarm_init(A) (*(A))=0 #define thr_alarm_in_use(A) (*(A)!= 0) void init_thr_alarm(uint max_alarm); -bool thr_alarm(thr_alarm_t *alarmed, uint sec, ALARM *buff); +my_bool thr_alarm(thr_alarm_t *alarmed, uint sec, ALARM *buff); void thr_alarm_kill(pthread_t thread_id); void thr_end_alarm(thr_alarm_t *alarmed); void end_thr_alarm(my_bool free_structures); diff --git a/innobase/com/com0shm.c b/innobase/com/com0shm.c index 72ab23b9be8..ed185ccdf47 100644 --- a/innobase/com/com0shm.c +++ b/innobase/com/com0shm.c @@ -103,7 +103,8 @@ struct com_shm_endpoint_struct{ the area currently may contain a datagram; NOTE: automatic event */ os_event_t empty; /* this is in the signaled state if the area - currently may be empty; NOTE: automatic event */ + currently may be empty; NOTE: automatic + event */ ip_mutex_hdl_t* ip_mutex; /* handle to the interprocess mutex protecting the shared memory */ UT_LIST_NODE_T(com_shm_endpoint_t) list; /* If the endpoint struct @@ -793,16 +794,18 @@ com_shm_create_or_open( ut_strcpy(buf + len, (char*)"_IBSHM_EV_NE"), - event_ne = os_event_create_auto(buf); + event_ne = os_event_create(buf); ut_ad(event_ne); ut_strcpy(buf + len, (char*)"_IBSHM_EV_EM"), - event_em = os_event_create_auto(buf); + event_em = os_event_create(buf); ut_ad(event_em); + ut_a(0); /* event_ne and event_em should be auto events! */ + com_shm_endpoint_set_shm(ep, shm); com_shm_endpoint_set_map(ep, map); diff --git a/innobase/include/os0sync.h b/innobase/include/os0sync.h index bad8e6e120a..634507467f9 100644 --- a/innobase/include/os0sync.h +++ b/innobase/include/os0sync.h @@ -13,13 +13,26 @@ Created 9/6/1995 Heikki Tuuri #include "ut0lst.h" #ifdef __WIN__ + #define os_fast_mutex_t CRITICAL_SECTION -typedef HANDLE os_event_t; + +typedef HANDLE os_native_event_t; + +typedef struct os_event_struct os_event_struct_t; +typedef os_event_struct_t* os_event_t; + +struct os_event_struct { + os_native_event_t handle; + /* Windows event */ + UT_LIST_NODE_T(os_event_struct_t) os_event_list; + /* list of all created events */ +}; #else typedef pthread_mutex_t os_fast_mutex_t; typedef struct os_event_struct os_event_struct_t; typedef os_event_struct_t* os_event_t; + struct os_event_struct { os_fast_mutex_t os_mutex; /* this mutex protects the next fields */ @@ -39,16 +52,16 @@ typedef os_mutex_str_t* os_mutex_t; #define OS_SYNC_TIME_EXCEEDED 1 -/* Mutex protecting the thread count and event and OS 'slow' mutex lists */ +/* Mutex protecting counts and the event and OS 'slow' mutex lists */ extern os_mutex_t os_sync_mutex; /* This is incremented by 1 in os_thread_create and decremented by 1 in os_thread_exit */ extern ulint os_thread_count; -/* The following are approximate counters for debugging in Unix */ extern ulint os_event_count; extern ulint os_mutex_count; +extern ulint os_fast_mutex_count; /************************************************************* Initializes global event and OS 'slow' mutex lists. */ @@ -57,15 +70,14 @@ void os_sync_init(void); /*==============*/ /************************************************************* -Frees created events (not in Windows) and OS 'slow' mutexes. */ +Frees created events and OS 'slow' mutexes. */ void os_sync_free(void); /*==============*/ -/************************************************************* -Creates an event semaphore, i.e., a semaphore which may -just have two states: signaled and nonsignaled. -The created event is manual reset: it must be reset +/************************************************************* +Creates an event semaphore, i.e., a semaphore which may just have two states: +signaled and nonsignaled. The created event is manual reset: it must be reset explicitly by calling sync_os_reset_event. */ os_event_t @@ -74,10 +86,10 @@ os_event_create( /* out: the event handle */ char* name); /* in: the name of the event, if NULL the event is created without a name */ +#ifdef __WIN__ /************************************************************* -Creates an auto-reset event semaphore, i.e., an event -which is automatically reset when a single thread is -released. */ +Creates an auto-reset event semaphore, i.e., an event which is automatically +reset when a single thread is released. Works only in Windows. */ os_event_t os_event_create_auto( @@ -85,6 +97,7 @@ os_event_create_auto( /* out: the event handle */ char* name); /* in: the name of the event, if NULL the event is created without a name */ +#endif /************************************************************** Sets an event semaphore to the signaled state: lets waiting threads proceed. */ @@ -120,7 +133,7 @@ os_event_wait( os_event_t event); /* in: event to wait */ /************************************************************** Waits for an event object until it is in the signaled state or -a timeout is exceeded. */ +a timeout is exceeded. In Unix the timeout is always infinite. */ ulint os_event_wait_time( @@ -131,8 +144,9 @@ os_event_wait_time( os_event_t event, /* in: event to wait */ ulint time); /* in: timeout in microseconds, or OS_SYNC_INFINITE_TIME */ +#ifdef __WIN__ /************************************************************** -Waits for any event in an event array. Returns if even a single +Waits for any event in an OS native event array. Returns if even a single one is signaled or becomes signaled. */ ulint @@ -140,14 +154,15 @@ os_event_wait_multiple( /*===================*/ /* out: index of the event which was signaled */ - ulint n, /* in: number of events in the + ulint n, /* in: number of events in the array */ - os_event_t* event_array); /* in: pointer to an array of event + os_native_event_t* native_event_array); + /* in: pointer to an array of event handles */ +#endif /************************************************************* -Creates an operating system mutex semaphore. -Because these are slow, the mutex semaphore of the database -itself (sync_mutex_t) should be used where possible. */ +Creates an operating system mutex semaphore. Because these are slow, the +mutex semaphore of InnoDB itself (mutex_t) should be used where possible. */ os_mutex_t os_mutex_create( diff --git a/innobase/include/os0sync.ic b/innobase/include/os0sync.ic index 10b85c435e3..1337e97152a 100644 --- a/innobase/include/os0sync.ic +++ b/innobase/include/os0sync.ic @@ -44,4 +44,3 @@ os_fast_mutex_trylock( #endif #endif } - diff --git a/innobase/include/os0thread.h b/innobase/include/os0thread.h index 92187f315c2..491d8866af4 100644 --- a/innobase/include/os0thread.h +++ b/innobase/include/os0thread.h @@ -65,7 +65,9 @@ os_thread_pf( /******************************************************************** Creates a new thread of execution. The execution starts from the function given. The start function takes a void* parameter -and returns a ulint. */ +and returns a ulint. +NOTE: We count the number of threads in os_thread_exit(). A created +thread should always use that to exit and not use return() to exit. */ os_thread_t os_thread_create( diff --git a/innobase/os/os0file.c b/innobase/os/os0file.c index 9da5c6a47a5..e401503c4e3 100644 --- a/innobase/os/os0file.c +++ b/innobase/os/os0file.c @@ -80,6 +80,8 @@ struct os_aio_slot_struct{ which pending aio operation was completed */ #ifdef WIN_ASYNC_IO + os_event_t event; /* event object we need in the + OVERLAPPED struct */ OVERLAPPED control; /* Windows control block for the aio request */ #elif defined(POSIX_ASYNC_IO) @@ -107,11 +109,14 @@ struct os_aio_array_struct{ ulint n_reserved;/* Number of reserved slots in the aio array outside the ibuf segment */ os_aio_slot_t* slots; /* Pointer to the slots in the array */ - os_event_t* events; /* Pointer to an array of event handles - where we copied the handles from slots, - in the same order. This can be used in - WaitForMultipleObjects; used only in +#ifdef __WIN__ + os_native_event_t* native_events; + /* Pointer to an array of OS native event + handles where we copied the handles from + slots, in the same order. This can be used + in WaitForMultipleObjects; used only in Windows */ +#endif }; /* Array of events used in simulated aio */ @@ -296,7 +301,7 @@ os_file_handle_error( operation */ os_file_t file, /* in: file pointer */ char* name, /* in: name of a file or NULL */ - const char* operation) /* in: type of operation */ + const char* operation)/* in: operation */ { ulint err; @@ -338,8 +343,8 @@ os_file_handle_error( if (name) { fprintf(stderr, "InnoDB: File name %s\n", name); } - fprintf(stderr, "InnoDB: system call %s\n", operation); - + + fprintf(stderr, "InnoDB: System call %s.\n", operation); fprintf(stderr, "InnoDB: Cannot continue operation.\n"); fflush(stderr); @@ -422,9 +427,8 @@ try_again: *success = FALSE; retry = os_file_handle_error(file, name, - create_mode == OS_FILE_OPEN ? - "open" : "create"); - + create_mode == OS_FILE_OPEN ? + "open" : "create"); if (retry) { goto try_again; } @@ -465,10 +469,8 @@ try_again: *success = FALSE; retry = os_file_handle_error(file, name, - create_mode == OS_FILE_OPEN ? - "open" : "create"); - - + create_mode == OS_FILE_OPEN ? + "open" : "create"); if (retry) { goto try_again; } @@ -576,9 +578,8 @@ try_again: *success = FALSE; retry = os_file_handle_error(file, name, - create_mode == OS_FILE_OPEN ? - "open" : "create"); - + create_mode == OS_FILE_OPEN ? + "open" : "create"); if (retry) { goto try_again; } @@ -625,9 +626,8 @@ try_again: *success = FALSE; retry = os_file_handle_error(file, name, - create_mode == OS_FILE_OPEN ? - "open" : "create"); - + create_mode == OS_FILE_OPEN ? + "open" : "create"); if (retry) { goto try_again; } @@ -1319,19 +1319,22 @@ os_aio_array_create( array->n_segments = n_segments; array->n_reserved = 0; array->slots = ut_malloc(n * sizeof(os_aio_slot_t)); - array->events = ut_malloc(n * sizeof(os_event_t)); - +#ifdef __WIN__ + array->native_events = ut_malloc(n * sizeof(os_native_event_t)); +#endif for (i = 0; i < n; i++) { slot = os_aio_array_get_nth_slot(array, i); slot->pos = i; slot->reserved = FALSE; #ifdef WIN_ASYNC_IO + slot->event = os_event_create(NULL); + over = &(slot->control); - over->hEvent = os_event_create(NULL); + over->hEvent = slot->event->handle; - *((array->events) + i) = over->hEvent; + *((array->native_events) + i) = over->hEvent; #endif } @@ -1429,7 +1432,7 @@ os_aio_array_wake_win_aio_at_shutdown( for (i = 0; i < array->n_slots; i++) { - os_event_set(*(array->events + i)); + os_event_set((array->slots + i)->event); } } #endif @@ -1689,7 +1692,7 @@ loop: control = &(slot->control); control->Offset = (DWORD)offset; control->OffsetHigh = (DWORD)offset_high; - os_event_reset(control->hEvent); + os_event_reset(slot->event); #elif defined(POSIX_ASYNC_IO) @@ -1747,7 +1750,7 @@ os_aio_array_free_slot( } #ifdef WIN_ASYNC_IO - os_event_reset(slot->control.hEvent); + os_event_reset(slot->event); #endif os_mutex_exit(array->mutex); } @@ -1872,7 +1875,7 @@ os_aio( offset where to read or write */ ulint offset_high, /* in: most significant 32 bits of offset */ - ulint n, /* in: number of bytes to read or write */ + ulint n, /* in: number of bytes to read or write */ void* message1,/* in: messages for the aio handler (these can be used to identify a completed aio operation); if mode is OS_AIO_SYNC, these @@ -1916,7 +1919,8 @@ os_aio( wait in the Windows case. */ if (type == OS_FILE_READ) { - return(os_file_read(file, buf, offset, offset_high, n)); + return(os_file_read(file, buf, offset, + offset_high, n)); } ut_a(type == OS_FILE_WRITE); @@ -1994,8 +1998,7 @@ try_again: #ifdef WIN_ASYNC_IO if (os_aio_use_native_aio) { if ((ret && len == n) - || (!ret && GetLastError() == ERROR_IO_PENDING)) { - + || (!ret && GetLastError() == ERROR_IO_PENDING)) { /* aio was queued successfully! */ if (mode == OS_AIO_SYNC) { @@ -2025,8 +2028,8 @@ try_again: os_aio_array_free_slot(array, slot); - retry = os_file_handle_error(file, name, "aio"); - + retry = os_file_handle_error(file, name, + type == OS_FILE_READ ? "aio read" : "aio write"); if (retry) { goto try_again; @@ -2091,15 +2094,15 @@ os_aio_windows_handle( n = array->n_slots / array->n_segments; if (array == os_aio_sync_array) { - srv_io_thread_op_info[orig_seg] = "wait Windows aio for 1 page"; - - ut_ad(pos < array->n_slots); - os_event_wait(array->events[pos]); + srv_io_thread_op_info[orig_seg] = + "wait Windows aio for 1 page"; + os_event_wait(os_aio_array_get_nth_slot(array, pos)->event); i = pos; } else { srv_io_thread_op_info[orig_seg] = "wait Windows aio"; - i = os_event_wait_multiple(n, (array->events) + segment * n); + i = os_event_wait_multiple(n, + (array->native_events) + segment * n); } os_mutex_enter(array->mutex); @@ -2124,7 +2127,7 @@ os_aio_windows_handle( ut_a(TRUE == os_file_flush(slot->file)); } } else { - os_file_handle_error(slot->file, slot->name, "aio"); + os_file_handle_error(slot->file, slot->name, "Windows aio"); ret_val = FALSE; } diff --git a/innobase/os/os0sync.c b/innobase/os/os0sync.c index 4f322ee82b2..bf5fc57bf57 100644 --- a/innobase/os/os0sync.c +++ b/innobase/os/os0sync.c @@ -32,24 +32,23 @@ struct os_mutex_struct{ /* list of all 'slow' OS mutexes created */ }; -/* Mutex protecting the thread count and the lists of OS mutexes -and events */ +/* Mutex protecting counts and the lists of OS mutexes and events */ os_mutex_t os_sync_mutex; ibool os_sync_mutex_inited = FALSE; /* This is incremented by 1 in os_thread_create and decremented by 1 in os_thread_exit */ -ulint os_thread_count = 0; +ulint os_thread_count = 0; -/* The list of all events created (not in Windows) */ +/* The list of all events created */ UT_LIST_BASE_NODE_T(os_event_struct_t) os_event_list; /* The list of all OS 'slow' mutexes */ UT_LIST_BASE_NODE_T(os_mutex_str_t) os_mutex_list; -/* The following are approximate counters for debugging in Unix */ -ulint os_event_count = 0; -ulint os_mutex_count = 0; +ulint os_event_count = 0; +ulint os_mutex_count = 0; +ulint os_fast_mutex_count = 0; /************************************************************* @@ -68,7 +67,7 @@ os_sync_init(void) } /************************************************************* -Frees created events (not in Windows) and OS 'slow' mutexes. */ +Frees created events and OS 'slow' mutexes. */ void os_sync_free(void) @@ -89,6 +88,12 @@ os_sync_free(void) mutex = UT_LIST_GET_FIRST(os_mutex_list); while (mutex) { + if (mutex == os_sync_mutex) { + /* Set the flag to FALSE so that we do not try to + reserve os_sync_mutex any more in remaining freeing + operations in shutdown */ + os_sync_mutex_inited = FALSE; + } os_mutex_free(mutex); @@ -97,10 +102,9 @@ os_sync_free(void) } /************************************************************* -Creates an event semaphore, i.e., a semaphore which may -just have two states: signaled and nonsignaled. -The created event is manual reset: it must be reset -explicitly by calling sync_os_reset_event. */ +Creates an event semaphore, i.e., a semaphore which may just have two +states: signaled and nonsignaled. The created event is manual reset: it +must be reset explicitly by calling sync_os_reset_event. */ os_event_t os_event_create( @@ -112,20 +116,18 @@ os_event_create( #ifdef __WIN__ os_event_t event; - event = CreateEvent(NULL, /* No security attributes */ + event = ut_malloc(sizeof(struct os_event_struct)); + + event->handle = CreateEvent(NULL,/* No security attributes */ TRUE, /* Manual reset */ FALSE, /* Initial state nonsignaled */ name); - if (!event) { + if (!event->handle) { fprintf(stderr, "InnoDB: Could not create a Windows event semaphore; Windows error %lu\n", (ulint)GetLastError()); } - - ut_a(event); - - return(event); -#else +#else /* Unix */ os_event_t event; UT_NOT_USED(name); @@ -141,7 +143,9 @@ os_event_create( ut_a(0 == pthread_cond_init(&(event->cond_var), NULL)); #endif event->is_set = FALSE; +#endif /* __WIN__ */ + /* Put to the list of events */ os_mutex_enter(os_sync_mutex); UT_LIST_ADD_FIRST(os_event_list, os_event_list, event); @@ -151,13 +155,12 @@ os_event_create( os_mutex_exit(os_sync_mutex); return(event); -#endif } +#ifdef __WIN__ /************************************************************* -Creates an auto-reset event semaphore, i.e., an event -which is automatically reset when a single thread is -released. */ +Creates an auto-reset event semaphore, i.e., an event which is automatically +reset when a single thread is released. Works only in Windows. */ os_event_t os_event_create_auto( @@ -166,26 +169,33 @@ os_event_create_auto( char* name) /* in: the name of the event, if NULL the event is created without a name */ { -#ifdef __WIN__ os_event_t event; - event = CreateEvent(NULL, /* No security attributes */ + event = ut_malloc(sizeof(struct os_event_struct)); + + event->handle = CreateEvent(NULL,/* No security attributes */ FALSE, /* Auto-reset */ FALSE, /* Initial state nonsignaled */ name); - ut_a(event); + + if (!event->handle) { + fprintf(stderr, +"InnoDB: Could not create a Windows auto event semaphore; Windows error %lu\n", + (ulint)GetLastError()); + } + + /* Put to the list of events */ + os_mutex_enter(os_sync_mutex); + + UT_LIST_ADD_FIRST(os_event_list, os_event_list, event); + + os_event_count++; + + os_mutex_exit(os_sync_mutex); return(event); -#else - /* Does nothing in Posix because we do not need this with MySQL */ - - UT_NOT_USED(name); - - ut_a(0); - - return(NULL); -#endif } +#endif /************************************************************** Sets an event semaphore to the signaled state: lets waiting threads @@ -198,7 +208,7 @@ os_event_set( { #ifdef __WIN__ ut_a(event); - ut_a(SetEvent(event)); + ut_a(SetEvent(event->handle)); #else ut_a(event); @@ -227,7 +237,7 @@ os_event_reset( #ifdef __WIN__ ut_a(event); - ut_a(ResetEvent(event)); + ut_a(ResetEvent(event->handle)); #else ut_a(event); @@ -255,12 +265,14 @@ os_event_free( #ifdef __WIN__ ut_a(event); - ut_a(CloseHandle(event)); + ut_a(CloseHandle(event->handle)); #else ut_a(event); os_fast_mutex_free(&(event->os_mutex)); ut_a(0 == pthread_cond_destroy(&(event->cond_var))); +#endif + /* Remove from the list of events */ os_mutex_enter(os_sync_mutex); @@ -271,7 +283,6 @@ os_event_free( os_mutex_exit(os_sync_mutex); ut_free(event); -#endif } /************************************************************** @@ -291,7 +302,7 @@ os_event_wait( ut_a(event); /* Specify an infinite time limit for waiting */ - err = WaitForSingleObject(event, INFINITE); + err = WaitForSingleObject(event->handle, INFINITE); ut_a(err == WAIT_OBJECT_0); @@ -324,7 +335,7 @@ loop: /************************************************************** Waits for an event object until it is in the signaled state or -a timeout is exceeded. */ +a timeout is exceeded. In Unix the timeout is always infinite. */ ulint os_event_wait_time( @@ -341,9 +352,9 @@ os_event_wait_time( ut_a(event); if (time != OS_SYNC_INFINITE_TIME) { - err = WaitForSingleObject(event, time / 1000); + err = WaitForSingleObject(event->handle, time / 1000); } else { - err = WaitForSingleObject(event, INFINITE); + err = WaitForSingleObject(event->handle, INFINITE); } if (err == WAIT_OBJECT_0) { @@ -367,8 +378,9 @@ os_event_wait_time( #endif } +#ifdef __WIN__ /************************************************************** -Waits for any event in an event array. Returns if even a single +Waits for any event in an OS native event array. Returns if even a single one is signaled or becomes signaled. */ ulint @@ -376,18 +388,18 @@ os_event_wait_multiple( /*===================*/ /* out: index of the event which was signaled */ - ulint n, /* in: number of events in the + ulint n, /* in: number of events in the array */ - os_event_t* event_array) /* in: pointer to an array of event + os_native_event_t* native_event_array) + /* in: pointer to an array of event handles */ { -#ifdef __WIN__ DWORD index; - ut_a(event_array); + ut_a(native_event_array); ut_a(n > 0); - index = WaitForMultipleObjects(n, event_array, + index = WaitForMultipleObjects(n, native_event_array, FALSE, /* Wait for any 1 event */ INFINITE); /* Infinite wait time limit */ @@ -399,21 +411,12 @@ os_event_wait_multiple( } return(index - WAIT_OBJECT_0); -#else - ut_a(n == 0); - - /* In Posix we can only wait for a single event */ - - os_event_wait(*event_array); - - return(0); -#endif } +#endif /************************************************************* -Creates an operating system mutex semaphore. -Because these are slow, the mutex semaphore of the database -itself (sync_mutex_t) should be used where possible. */ +Creates an operating system mutex semaphore. Because these are slow, the +mutex semaphore of InnoDB itself (mutex_t) should be used where possible. */ os_mutex_t os_mutex_create( @@ -430,50 +433,35 @@ os_mutex_create( FALSE, /* Initial state: no owner */ name); ut_a(mutex); +#else + os_fast_mutex_t* mutex; + os_mutex_t mutex_str; + UT_NOT_USED(name); + + mutex = ut_malloc(sizeof(os_fast_mutex_t)); + + os_fast_mutex_init(mutex); +#endif mutex_str = ut_malloc(sizeof(os_mutex_str_t)); mutex_str->handle = mutex; mutex_str->count = 0; if (os_sync_mutex_inited) { + /* When creating os_sync_mutex itself we cannot reserve it */ os_mutex_enter(os_sync_mutex); } UT_LIST_ADD_FIRST(os_mutex_list, os_mutex_list, mutex_str); + os_mutex_count++; + if (os_sync_mutex_inited) { os_mutex_exit(os_sync_mutex); } return(mutex_str); -#else - os_fast_mutex_t* os_mutex; - os_mutex_t mutex_str; - - UT_NOT_USED(name); - - os_mutex = ut_malloc(sizeof(os_fast_mutex_t)); - - os_fast_mutex_init(os_mutex); - - mutex_str = ut_malloc(sizeof(os_mutex_str_t)); - - mutex_str->handle = os_mutex; - mutex_str->count = 0; - - if (os_sync_mutex_inited) { - os_mutex_enter(os_sync_mutex); - } - - UT_LIST_ADD_FIRST(os_mutex_list, os_mutex_list, mutex_str); - - if (os_sync_mutex_inited) { - os_mutex_exit(os_sync_mutex); - } - - return(mutex_str); -#endif } /************************************************************** @@ -513,21 +501,14 @@ os_mutex_exit( /*==========*/ os_mutex_t mutex) /* in: mutex to release */ { -#ifdef __WIN__ ut_a(mutex); ut_a(mutex->count == 1); (mutex->count)--; - +#ifdef __WIN__ ut_a(ReleaseMutex(mutex->handle)); #else - ut_a(mutex); - - ut_a(mutex->count == 1); - - (mutex->count)--; - os_fast_mutex_unlock(mutex->handle); #endif } @@ -540,25 +521,25 @@ os_mutex_free( /*==========*/ os_mutex_t mutex) /* in: mutex to free */ { -#ifdef __WIN__ ut_a(mutex); - os_mutex_enter(os_sync_mutex); + if (os_sync_mutex_inited) { + os_mutex_enter(os_sync_mutex); + } UT_LIST_REMOVE(os_mutex_list, os_mutex_list, mutex); + + os_mutex_count--; - os_mutex_exit(os_sync_mutex); + if (os_sync_mutex_inited) { + os_mutex_exit(os_sync_mutex); + } +#ifdef __WIN__ ut_a(CloseHandle(mutex->handle)); ut_free(mutex); #else - os_mutex_enter(os_sync_mutex); - - UT_LIST_REMOVE(os_mutex_list, os_mutex_list, mutex); - - os_mutex_exit(os_sync_mutex); - os_fast_mutex_free(mutex->handle); ut_free(mutex->handle); ut_free(mutex); @@ -583,8 +564,19 @@ os_fast_mutex_init( #else ut_a(0 == pthread_mutex_init(fast_mutex, MY_MUTEX_INIT_FAST)); #endif - os_mutex_count++; #endif + if (os_sync_mutex_inited) { + /* When creating os_sync_mutex itself (in Unix) we cannot + reserve it */ + + os_mutex_enter(os_sync_mutex); + } + + os_fast_mutex_count++; + + if (os_sync_mutex_inited) { + os_mutex_exit(os_sync_mutex); + } } /************************************************************** @@ -631,6 +623,17 @@ os_fast_mutex_free( DeleteCriticalSection((LPCRITICAL_SECTION) fast_mutex); #else ut_a(0 == pthread_mutex_destroy(fast_mutex)); - os_mutex_count--; #endif + if (os_sync_mutex_inited) { + /* When freeing the last mutexes, we have + already freed os_sync_mutex */ + + os_mutex_enter(os_sync_mutex); + } + + os_fast_mutex_count--; + + if (os_sync_mutex_inited) { + os_mutex_exit(os_sync_mutex); + } } diff --git a/innobase/os/os0thread.c b/innobase/os/os0thread.c index 02ea2c227a7..9af98760ad1 100644 --- a/innobase/os/os0thread.c +++ b/innobase/os/os0thread.c @@ -186,6 +186,10 @@ os_thread_exit( void* exit_value) /* in: exit value; in Windows this void* is cast as a DWORD */ { +#ifdef UNIV_DEBUG_THREAD_CREATION + printf("A thread exits.\n"); + printf("Thread id %lu\n", os_thread_pf(os_thread_get_curr_id())); +#endif os_mutex_enter(os_sync_mutex); os_thread_count--; os_mutex_exit(os_sync_mutex); diff --git a/innobase/srv/srv0srv.c b/innobase/srv/srv0srv.c index 9353766248d..90331157289 100644 --- a/innobase/srv/srv0srv.c +++ b/innobase/srv/srv0srv.c @@ -868,6 +868,7 @@ srv_release_max_if_no_queries(void) mutex_exit(&kernel_mutex); } +#ifdef notdefined /*********************************************************************** Releases one utility thread if no queries are active and the high-water mark 2 for the utility is exceeded. */ @@ -902,7 +903,6 @@ srv_release_one_if_no_queries(void) mutex_exit(&kernel_mutex); } -#ifdef notdefined /*********************************************************************** Decrements the utility meter by the value given and suspends the calling thread, which must be an utility thread of the type given, if necessary. */ @@ -1012,6 +1012,8 @@ srv_communication_init( ut_a(ret == 0); } + +#ifdef notdefined /************************************************************************* Implements the recovery utility. */ @@ -1072,6 +1074,7 @@ srv_purge_thread( return(0); } +#endif /* notdefined */ /************************************************************************* Creates the utility threads. */ @@ -1102,6 +1105,7 @@ srv_create_utility_threads(void) ut_a(thread); */ } +#ifdef notdefined /************************************************************************* Implements the communication threads. */ static @@ -1151,6 +1155,7 @@ srv_com_thread( return(0); } +#endif /************************************************************************* Creates the communication threads. */ @@ -1171,6 +1176,7 @@ srv_create_com_threads(void) } } +#ifdef notdefined /************************************************************************* Implements the worker threads. */ static @@ -1215,6 +1221,7 @@ srv_worker_thread( return(0); } +#endif /************************************************************************* Creates the worker threads. */ @@ -2490,6 +2497,10 @@ srv_lock_timeout_and_monitor_thread( char* buf; ulint i; +#ifdef UNIV_DEBUG_THREAD_CREATION + printf("Lock timeout thread starts\n"); + printf("Thread id %lu\n", os_thread_pf(os_thread_get_curr_id())); +#endif UT_NOT_USED(arg); srv_last_monitor_time = time(NULL); last_table_monitor_time = time(NULL); @@ -2630,6 +2641,10 @@ loop: exit_func: srv_lock_timeout_and_monitor_active = FALSE; + /* We count the number of threads in os_thread_exit(). A created + thread should always use that to exit and not use return() to exit. */ + + os_thread_exit(NULL); #ifndef __WIN__ return(NULL); #else @@ -2655,6 +2670,10 @@ srv_error_monitor_thread( ulint cnt = 0; UT_NOT_USED(arg); +#ifdef UNIV_DEBUG_THREAD_CREATION + printf("Error monitor thread starts\n"); + printf("Thread id %lu\n", os_thread_pf(os_thread_get_curr_id())); +#endif loop: srv_error_monitor_active = TRUE; @@ -2691,6 +2710,9 @@ loop: srv_error_monitor_active = FALSE; + /* We count the number of threads in os_thread_exit(). A created + thread should always use that to exit and not use return() to exit. */ + os_thread_exit(NULL); #ifndef __WIN__ @@ -2771,6 +2793,10 @@ srv_master_thread( UT_NOT_USED(arg); +#ifdef UNIV_DEBUG_THREAD_CREATION + printf("Master thread starts\n"); + printf("Thread id %lu\n", os_thread_pf(os_thread_get_curr_id())); +#endif srv_main_thread_process_no = os_proc_get_number(); srv_main_thread_id = os_thread_pf(os_thread_get_curr_id()); @@ -3006,6 +3032,15 @@ background_loop: n_tables_to_drop = row_drop_tables_for_mysql_in_background(); + if (n_tables_to_drop > 0) { + /* Do not monopolize the CPU even if there are tables waiting + in the background drop queue. (It is essentially a bug if + MySQL tries to drop a table while there are still open handles + to it and we had to put it to the background drop queue.) */ + + os_thread_sleep(100000); + } + srv_main_thread_op_info = (char*)"purging"; if (srv_fast_shutdown && srv_shutdown_state > 0) { @@ -3144,6 +3179,13 @@ suspend_thread: goto loop; + /* We count the number of threads in os_thread_exit(). A created + thread should always use that to exit and not use return() to exit. + The thread actually never comes here because it is exited in an + os_event_wait(). */ + + os_thread_exit(NULL); + #ifndef __WIN__ return(NULL); #else diff --git a/innobase/srv/srv0start.c b/innobase/srv/srv0start.c index 9a2bf72130b..ad985d8282d 100644 --- a/innobase/srv/srv0start.c +++ b/innobase/srv/srv0start.c @@ -414,8 +414,10 @@ io_handler_thread( segment = *((ulint*)arg); -/* printf("Io handler thread %lu starts\n", segment); */ - +#ifdef UNIV_DEBUG_THREAD_CREATION + printf("Io handler thread %lu starts\n", segment); + printf("Thread id %lu\n", os_thread_pf(os_thread_get_curr_id())); +#endif for (i = 0;; i++) { fil_aio_wait(segment); @@ -424,6 +426,13 @@ io_handler_thread( mutex_exit(&ios_mutex); } + /* We count the number of threads in os_thread_exit(). A created + thread should always use that to exit and not use return() to exit. + The thread actually never comes here because it is exited in an + os_event_wait(). */ + + os_thread_exit(NULL); + #ifndef __WIN__ return(NULL); #else @@ -1585,21 +1594,32 @@ innobase_shutdown_for_mysql(void) os_thread_count); } - /* 3. Free all InnoDB's own mutexes */ + /* 3. Free all InnoDB's own mutexes and the os_fast_mutexes inside + them */ sync_close(); - /* 4. Free all OS synchronization primitives (in Windows currently - events are not freed) */ + /* 4. Free the os_conc_mutex and all os_events and os_mutexes */ srv_free(); os_sync_free(); - /* 5. Free all allocated memory (and the os_fast_mutex created in + /* 5. Free all allocated memory and the os_fast_mutex created in ut0mem.c */ ut_free_all_mem(); + if (os_thread_count != 0 + || os_event_count != 0 + || os_mutex_count != 0 + || os_fast_mutex_count != 0) { + fprintf(stderr, +"InnoDB: Warning: some resources were not cleaned up in shutdown:\n" +"InnoDB: threads %lu, events %lu, os_mutexes %lu, os_fast_mutexes %lu\n", + os_thread_count, os_event_count, os_mutex_count, + os_fast_mutex_count); + } + if (srv_print_verbose_log) { ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Shutdown completed\n"); diff --git a/mysql-test/r/func_str.result b/mysql-test/r/func_str.result index 1a1de0ee3d5..c61563e6010 100644 --- a/mysql-test/r/func_str.result +++ b/mysql-test/r/func_str.result @@ -64,7 +64,7 @@ concat_ws(NULL,'a') concat_ws(',',NULL,'') NULL select concat_ws(',','',NULL,'a'); concat_ws(',','',NULL,'a') -a +,a SELECT CONCAT('"',CONCAT_WS('";"',repeat('a',60),repeat('b',60),repeat('c',60),repeat('d',100)), '"'); CONCAT('"',CONCAT_WS('";"',repeat('a',60),repeat('b',60),repeat('c',60),repeat('d',100)), '"') "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";"bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";"cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc";"dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd" diff --git a/mysql-test/r/rpl_loaddata.result b/mysql-test/r/rpl_loaddata.result index 5b7aab3df74..f4b003d6cc3 100644 --- a/mysql-test/r/rpl_loaddata.result +++ b/mysql-test/r/rpl_loaddata.result @@ -22,3 +22,9 @@ day id category name drop table t1; drop table t2; drop table t3; +create table t1(a int, b int, unique(b)); +insert into t1 values(1,10); +load data infile '../../std_data/rpl_loaddata.dat' into table t1; +show status like 'slave_running'; +Variable_name Value +Slave_running OFF diff --git a/mysql-test/t/rpl_loaddata.test b/mysql-test/t/rpl_loaddata.test index 1f34aa9d3f9..dc4eadda192 100644 --- a/mysql-test/t/rpl_loaddata.test +++ b/mysql-test/t/rpl_loaddata.test @@ -4,6 +4,9 @@ # # check replication of load data for temporary tables with additional parameters # +# check if duplicate entries trigger an error (they should unless IGNORE or +# REPLACE was used on the master) (bug 571). + source include/master-slave.inc; create table t1(a int not null auto_increment, b int, primary key(a) ); @@ -27,7 +30,21 @@ connection master; drop table t1; drop table t2; drop table t3; +create table t1(a int, b int, unique(b)); save_master_pos; connection slave; sync_with_master; +insert into t1 values(1,10); + +connection master; +load data infile '../../std_data/rpl_loaddata.dat' into table t1; + +save_master_pos; +connection slave; +# don't sync_with_master because the slave SQL thread should be stopped because +# of the error so MASTER_POS_WAIT() will not return; just sleep and hope the +# slave SQL thread will have had time to stop. + +sleep 1; +show status like 'slave_running'; diff --git a/mysys/thr_alarm.c b/mysys/thr_alarm.c index a2647ec7399..7a845e3eb00 100644 --- a/mysys/thr_alarm.c +++ b/mysys/thr_alarm.c @@ -122,12 +122,24 @@ void init_thr_alarm(uint max_alarms) /* Request alarm after sec seconds. - A pointer is returned with points to a non-zero int when the alarm has been - given. This can't be called from the alarm-handling thread. - Returns 0 if no more alarms are allowed (aborted by process) + + SYNOPSIS + thr_alarm() + alrm Pointer to alarm detection + alarm_data Structure to store in alarm queue + + NOTES + This function can't be called from the alarm-handling thread. + + RETURN VALUES + 0 ok + 1 If no more alarms are allowed (aborted by process) + + Stores in first argument a pointer to a non-zero int which is set to 0 + when the alarm has been given */ -bool thr_alarm(thr_alarm_t *alrm, uint sec, ALARM *alarm_data) +my_bool thr_alarm(thr_alarm_t *alrm, uint sec, ALARM *alarm_data) { ulong now; sigset_t old_mask; @@ -209,7 +221,7 @@ void thr_end_alarm(thr_alarm_t *alarmed) ALARM *alarm_data; sigset_t old_mask; uint i; - bool found=0; + my_bool found=0; DBUG_ENTER("thr_end_alarm"); pthread_sigmask(SIG_BLOCK,&full_signal_set,&old_mask); @@ -230,10 +242,9 @@ void thr_end_alarm(thr_alarm_t *alarmed) DBUG_ASSERT(!*alarmed || found); if (!found) { -#ifdef MAIN - printf("Warning: Didn't find alarm %lx in queue of %d alarms\n", - (long) *alarmed, alarm_queue.elements); -#endif + if (*alarmed) + fprintf(stderr,"Warning: Didn't find alarm %lx in queue of %d alarms\n", + (long) *alarmed, alarm_queue.elements); DBUG_PRINT("warning",("Didn't find alarm %lx in queue\n", (long) *alarmed)); } diff --git a/sql/ha_innodb.cc b/sql/ha_innodb.cc index bf4dc7e5066..1cf123dbec6 100644 --- a/sql/ha_innodb.cc +++ b/sql/ha_innodb.cc @@ -131,9 +131,11 @@ static void innobase_print_error(const char* db_errpfx, char* buffer); /********************************************************************** Releases possible search latch and InnoDB thread FIFO ticket. These should -be released at each SQL statement end. It does no harm to release these -also in the middle of an SQL statement. */ +be released at each SQL statement end, and also when mysqld passes the +control to the client. It does no harm to release these also in the middle +of an SQL statement. */ static +inline void innobase_release_stat_resources( /*============================*/ @@ -914,6 +916,11 @@ innobase_commit_low( trx_t* trx) /* in: transaction handle */ { #ifdef HAVE_REPLICATION + if (trx->conc_state == TRX_NOT_STARTED) { + + return; + } + /* TODO: Guilhem should check if master_log_name, pending etc. are right if the master log gets rotated! Possible bug here. Comment by Heikki March 4, 2003. */ @@ -929,11 +936,13 @@ innobase_commit_low( )); } #endif /* HAVE_REPLICATION */ + trx_commit_for_mysql(trx); } /********************************************************************* -Commits a transaction in an InnoDB database. */ +Commits a transaction in an InnoDB database or marks an SQL statement +ended. */ int innobase_commit( @@ -951,29 +960,45 @@ innobase_commit( DBUG_ENTER("innobase_commit"); DBUG_PRINT("trans", ("ending transaction")); + /* The flag thd->transaction.all.innodb_active_trans is set to 1 + in ::external_lock and ::start_stmt, and it is only set to 0 in + a commit or a rollback. If it is 0 we know there cannot be resources + to be freed and we can return immediately. */ + + if (thd->transaction.all.innodb_active_trans == 0) { + + DBUG_RETURN(0); + } + trx = check_trx_exists(thd); - if (trx->auto_inc_lock) { - - /* If we had reserved the auto-inc lock for - some table in this SQL statement, we release it now */ - - srv_conc_enter_innodb(trx); - row_unlock_table_autoinc_for_mysql(trx); - srv_conc_exit_innodb(trx); - } - - if (trx_handle != (void*)&innodb_dummy_stmt_trx_handle) { + if (trx_handle != (void*)&innodb_dummy_stmt_trx_handle + || (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))) { + innobase_commit_low(trx); - thd->transaction.all.innodb_active_trans=0; + + thd->transaction.all.innodb_active_trans = 0; + } else { + if (trx->auto_inc_lock) { + /* If we had reserved the auto-inc lock for some + table in this SQL statement we release it now */ + + srv_conc_enter_innodb(trx); + row_unlock_table_autoinc_for_mysql(trx); + srv_conc_exit_innodb(trx); + } + /* Store the current undo_no of the transaction so that we + know where to roll back if we have to roll back the next + SQL statement */ + + trx_mark_sql_stat_end(trx); } - /* Release possible statement level resources */ + /* Release a possible FIFO ticket and search latch */ innobase_release_stat_resources(trx); - trx_mark_sql_stat_end(trx); - /* Tell InnoDB server that there might be work for - utility threads: */ + /* Tell the InnoDB server that there might be work for utility + threads: */ srv_active_wake_master_thread(); @@ -1044,7 +1069,7 @@ innobase_commit_complete( } /********************************************************************* -Rolls back a transaction in an InnoDB database. */ +Rolls back a transaction or the latest SQL statement in an InnoDB database. */ int innobase_rollback( @@ -1085,11 +1110,9 @@ innobase_rollback( srv_conc_exit_innodb(trx); - /* Release possible statement level resources */ + /* Release a possible FIFO ticket and search latch */ innobase_release_stat_resources(trx); - trx_mark_sql_stat_end(trx); - DBUG_RETURN(convert_error_code_to_mysql(error, NULL)); } @@ -3032,6 +3055,8 @@ create_index( KEY* key; KEY_PART_INFO* key_part; ulint ind_type; + ulint col_type; + ulint prefix_len; ulint i; DBUG_ENTER("create_index"); @@ -3059,6 +3084,27 @@ create_index( for (i = 0; i < n_fields; i++) { key_part = key->key_part + i; + if (key_part->length != key_part->field->pack_length()) { + prefix_len = key_part->length; + + col_type = get_innobase_type_from_mysql_type( + key_part->field); + if (col_type == DATA_INT + || col_type == DATA_FLOAT + || col_type == DATA_DOUBLE + || col_type == DATA_DECIMAL) { + fprintf(stderr, +"InnoDB: error: MySQL is trying to create a column prefix index field\n" +"InnoDB: on an inappropriate data type %lu. Table name %s, column name %s.\n", + col_type, table_name, + key_part->field->field_name); + + prefix_len = 0; + } + } else { + prefix_len = 0; + } + /* We assume all fields should be sorted in ascending order, hence the '0': */ dict_mem_index_add_field(index, @@ -3600,8 +3646,7 @@ ha_innobase::records_in_range( /************************************************************************* Gives an UPPER BOUND to the number of rows in a table. This is used in -filesort.cc and its better if the upper bound hold. -*/ +filesort.cc. */ ha_rows ha_innobase::estimate_number_of_rows(void) @@ -3636,11 +3681,11 @@ ha_innobase::estimate_number_of_rows(void) /* Calculate a minimum length for a clustered index record and from that an upper bound for the number of rows. Since we only calculate - new statistics in row0mysql.c when a tablehas grown - by a threshold factor, we must add a safety factor 2 in front - of the formula below. */ + new statistics in row0mysql.c when a table has grown by a threshold + factor, we must add a safety factor 2 in front of the formula below. */ - estimate = 2 * local_data_file_length / dict_index_calc_min_rec_len(index); + estimate = 2 * local_data_file_length / + dict_index_calc_min_rec_len(index); prebuilt->trx->op_info = (char*)""; @@ -3667,27 +3712,36 @@ ha_innobase::scan_time() return((double) (prebuilt->table->stat_clustered_index_size)); } -/* - Calculate the time it takes to read a set of ranges through and index - This enables us to optimise reads for clustered indexes. -*/ +/********************************************************************** +Calculate the time it takes to read a set of ranges through an index +This enables us to optimise reads for clustered indexes. */ -double ha_innobase::read_time(uint index, uint ranges, ha_rows rows) +double +ha_innobase::read_time( +/*===================*/ + /* out: estimated time measured in disk seeks */ + uint index, /* in: key number */ + uint ranges, /* in: how many ranges */ + ha_rows rows) /* in: estimated number of rows in the ranges */ { - ha_rows total_rows; - double time_for_scan; - if (index != table->primary_key) - return handler::read_time(index, ranges, rows); // Not clustered - if (rows <= 2) - return (double) rows; - /* - Assume that the read is proportional to scan time for all rows + one - seek per range. - */ - time_for_scan= scan_time(); - if ((total_rows= estimate_number_of_rows()) < rows) - return time_for_scan; - return (ranges + (double) rows / (double) total_rows * time_for_scan); + ha_rows total_rows; + double time_for_scan; + + if (index != table->primary_key) + return handler::read_time(index, ranges, rows); // Not clustered + + if (rows <= 2) + return (double) rows; + + /* Assume that the read time is proportional to the scan time for all + rows + at most one seek per range. */ + + time_for_scan= scan_time(); + + if ((total_rows= estimate_number_of_rows()) < rows) + return time_for_scan; + + return (ranges + (double) rows / (double) total_rows * time_for_scan); } /************************************************************************* @@ -4040,10 +4094,10 @@ ha_innobase::reset(void) } /********************************************************************** -Inside LOCK TABLES MySQL will not call external_lock() between SQL -statements. It will call this function at the start of each SQL statement. -Note also a spacial case: if a temporary table is created inside LOCK -TABLES, MySQL has not called external_lock() at all on that table. */ +MySQL calls this function at the start of each SQL statement. Inside LOCK +TABLES the ::external_lock method does not work to mark SQL statement +borders. Note also a special case: if a temporary table is created inside +LOCK TABLES, MySQL has not called external_lock() at all on that table. */ int ha_innobase::start_stmt( @@ -4058,8 +4112,14 @@ ha_innobase::start_stmt( trx = prebuilt->trx; + /* Here we release the search latch and the InnoDB thread FIFO ticket + if they were reserved. They should have been released already at the + end of the previous statement, but because inside LOCK TABLES the + lock count method does not work to mark the end of a SELECT statement, + that may not be the case. We MUST release the search latch before an + INSERT, for example. */ + innobase_release_stat_resources(trx); - trx_mark_sql_stat_end(trx); if (trx->isolation_level <= TRX_ISO_READ_COMMITTED && trx->read_view) { @@ -4082,7 +4142,8 @@ ha_innobase::start_stmt( prebuilt->select_lock_type = LOCK_X; } - + + /* Set the MySQL flag to mark that there is an active transaction */ thd->transaction.all.innodb_active_trans = 1; return(0); @@ -4146,17 +4207,20 @@ ha_innobase::external_lock( } if (lock_type != F_UNLCK) { - if (trx->n_mysql_tables_in_use == 0) { - trx_mark_sql_stat_end(trx); - } + /* MySQL is setting a new table lock */ + /* Set the MySQL flag to mark that there is an active + transaction */ thd->transaction.all.innodb_active_trans = 1; + trx->n_mysql_tables_in_use++; prebuilt->mysql_has_locked = TRUE; - trx->isolation_level = innobase_map_isolation_level( + if (trx->n_mysql_tables_in_use == 1) { + trx->isolation_level = innobase_map_isolation_level( (enum_tx_isolation) thd->variables.tx_isolation); + } if (trx->isolation_level == TRX_ISO_SERIALIZABLE && prebuilt->select_lock_type == LOCK_NONE) { @@ -4172,37 +4236,44 @@ ha_innobase::external_lock( trx->mysql_n_tables_locked++; } - } else { - trx->n_mysql_tables_in_use--; - prebuilt->mysql_has_locked = FALSE; - auto_inc_counter_for_this_stat = 0; - if (trx->n_mysql_tables_in_use == 0) { + DBUG_RETURN(error); + } - trx->mysql_n_tables_locked = 0; + /* MySQL is releasing a table lock */ - prebuilt->used_in_HANDLER = FALSE; + trx->n_mysql_tables_in_use--; + prebuilt->mysql_has_locked = FALSE; + auto_inc_counter_for_this_stat = 0; - /* Here we release the search latch and InnoDB - thread FIFO ticket if they were reserved. */ + /* If the MySQL lock count drops to zero we know that the current SQL + statement has ended */ - innobase_release_stat_resources(trx); + if (trx->n_mysql_tables_in_use == 0) { + trx->mysql_n_tables_locked = 0; + prebuilt->used_in_HANDLER = FALSE; + + if (!(thd->options + & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) { + if (thd->transaction.all.innodb_active_trans != 0) { + innobase_commit(thd, trx); + } + } else { if (trx->isolation_level <= TRX_ISO_READ_COMMITTED && trx->read_view) { - /* At low transaction isolation levels we let + /* At low transaction isolation levels we let each consistent read set its own snapshot */ - read_view_close_for_mysql(trx); + read_view_close_for_mysql(trx); } - - if (!(thd->options - & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) { - - innobase_commit(thd, trx); - } } + + /* Here we release the search latch and the InnoDB thread FIFO + ticket if they were reserved. */ + + innobase_release_stat_resources(trx); } DBUG_RETURN(error); @@ -4513,4 +4584,3 @@ ha_innobase::get_auto_increment() } #endif /* HAVE_INNOBASE_DB */ - diff --git a/sql/handler.cc b/sql/handler.cc index db1857e938c..493959f0473 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -218,8 +218,12 @@ void ha_close_connection(THD* thd) } /* - This is used to commit or rollback a single statement depending - on the value of error + This is used to commit or rollback a single statement depending on the value + of error. Note that if the autocommit is on, then the following call inside + InnoDB will commit or rollback the whole transaction (= the statement). The + autocommit mechanism built into InnoDB is based on counting locks, but if + the user has used LOCK TABLES then that mechanism does not know to do the + commit. */ int ha_autocommit_or_rollback(THD *thd, int error) diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index 2a4f4e0c410..e5b73e7670f 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -515,18 +515,18 @@ String *Item_func_concat_ws::val_str(String *str) str->length(0); // QQ; Should be removed res=str; - // Skip until non-null and non-empty argument is found. + // Skip until non-null argument is found. // If not, return the empty string for (i=0; i < arg_count; i++) - if ((res= args[i]->val_str(str)) && res->length()) + if ((res= args[i]->val_str(str))) break; if (i == arg_count) return &empty_string; for (i++; i < arg_count ; i++) { - if (!(res2= args[i]->val_str(use_as_buff)) || !res2->length()) - continue; // Skip NULL and empty string + if (!(res2= args[i]->val_str(use_as_buff))) + continue; // Skip NULL if (res->length() + sep_str->length() + res2->length() > current_thd->variables.max_allowed_packet) diff --git a/sql/log_event.cc b/sql/log_event.cc index fb690efc364..3d500ede462 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -1573,9 +1573,27 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli, else { char llbuff[22]; - enum enum_duplicates handle_dup = DUP_IGNORE; + enum enum_duplicates handle_dup; if (sql_ex.opt_flags & REPLACE_FLAG) handle_dup= DUP_REPLACE; + else if (sql_ex.opt_flags & IGNORE_FLAG) + handle_dup= DUP_IGNORE; + else + /* + Note that when replication is running fine, if it was DUP_ERROR on the + master then we could choose DUP_IGNORE here, because if DUP_ERROR + suceeded on master, and data is identical on the master and slave, + then there should be no uniqueness errors on slave, so DUP_IGNORE is + the same as DUP_ERROR. But in the unlikely case of uniqueness errors + (because the data on the master and slave happen to be different (user + error or bug), we want LOAD DATA to print an error message on the + slave to discover the problem. + + If reading from net (a 3.23 master), mysql_load() will change this + to DUP_IGNORE. + */ + handle_dup= DUP_ERROR; + sql_exchange ex((char*)fname, sql_ex.opt_flags & DUMPFILE_FLAG); String field_term(sql_ex.field_term,sql_ex.field_term_len,log_cs); String enclosed(sql_ex.enclosed,sql_ex.enclosed_len,log_cs); @@ -1637,12 +1655,19 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli, close_thread_tables(thd); if (thd->query_error) { - int sql_error= thd->net.last_errno; - if (!sql_error) - sql_error= ER_UNKNOWN_ERROR; - slave_print_error(rli,sql_error, - "Error '%s' running LOAD DATA INFILE", - ER_SAFE(sql_error)); + /* this err/sql_errno code is copy-paste from send_error() */ + const char *err; + int sql_errno; + if ((err=thd->net.last_error)[0]) + sql_errno=thd->net.last_errno; + else + { + sql_errno=ER_UNKNOWN_ERROR; + err=ER(sql_errno); + } + slave_print_error(rli,sql_errno, + "Error '%s' running load data infile", + err); free_root(&thd->mem_root,0); return 1; } diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 2d5ce52d557..3f3b2ee7c9f 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -267,7 +267,7 @@ ulong slave_net_timeout; ulong thread_cache_size=0, binlog_cache_size=0, max_binlog_cache_size=0; ulong query_cache_size=0; ulong com_stat[(uint) SQLCOM_END], com_other; -ulong bytes_sent, bytes_received; +ulong bytes_sent, bytes_received, net_big_packet_count; ulong refresh_version=1L,flush_version=1L; /* Increments on each reload */ ulong query_id, long_query_count, aborted_threads, aborted_connects; ulong delayed_insert_timeout, delayed_insert_limit, delayed_queue_size; @@ -987,14 +987,21 @@ static void set_ports() static void set_user(const char *user) { #if !defined(__WIN__) && !defined(OS2) && !defined(__NETWARE__) - struct passwd *ent; + struct passwd *ent; + uid_t user_id= geteuid(); // don't bother if we aren't superuser - if (geteuid()) + if (user_id) { if (user) - fprintf(stderr, - "Warning: One can only use the --user switch if running as root\n"); + { + /* Don't give a warning, if real user is same as given with --user */ + struct passwd *user_info= getpwnam(user); + + if (!user_info || user_id != user_info->pw_uid) + fprintf(stderr, + "Warning: One can only use the --user switch if running as root\n"); + } return; } else if (!user) diff --git a/sql/net_serv.cc b/sql/net_serv.cc index 621fa5f6334..eb4d76bbf6e 100644 --- a/sql/net_serv.cc +++ b/sql/net_serv.cc @@ -73,11 +73,13 @@ void sql_print_error(const char *format,...); #define USE_QUERY_CACHE extern uint test_flags; extern void query_cache_insert(NET *net, const char *packet, ulong length); -extern ulong bytes_sent, bytes_received; +extern ulong bytes_sent, bytes_received, net_big_packet_count; extern pthread_mutex_t LOCK_bytes_sent , LOCK_bytes_received; #else #undef statistic_add +#undef statistic_increment #define statistic_add(A,B,C) +#define statistic_increment(A,B) #endif #define TEST_BLOCKING 8 @@ -562,7 +564,7 @@ static my_bool net_safe_read(NET *net, char *buff, uint32 length, if ((tmp=vio_read(net->vio,(char*) net->buff, length)) <= 0) { my_bool interrupted = vio_should_retry(net->vio); - if (!thr_got_alarm(&alarmed) && interrupted) + if (!thr_got_alarm(alarmed) && interrupted) { /* Probably in MIT threads */ if (retry_count++ < net->retry_count) continue; @@ -596,10 +598,13 @@ static my_bool my_net_skip_rest(NET *net, uint32 remain, thr_alarm_t *alarmed, DBUG_ENTER("my_net_skip_rest"); DBUG_PRINT("enter",("bytes_to_skip: %u", (uint) remain)); - if (!thr_alarm_in_use(&alarmed)) + /* The following is good for debugging */ + statistic_increment(net_big_packet_count,&LOCK_bytes_received); + + if (!thr_alarm_in_use(alarmed)) { my_bool old_mode; - if (!thr_alarm(alarmed,net->read_timeout, alarm_buff) || + if (thr_alarm(alarmed,net->read_timeout, alarm_buff) || vio_blocking(net->vio, TRUE, &old_mode) < 0) DBUG_RETURN(1); /* Can't setup, abort */ } diff --git a/sql/slave.cc b/sql/slave.cc index 4767245834a..cc27bb96ab7 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -287,7 +287,7 @@ int init_relay_log_pos(RELAY_LOG_INFO* rli,const char* log, goto err; rli->cur_log = &rli->cache_buf; } - if (pos > BIN_LOG_HEADER_SIZE) + if (pos >= BIN_LOG_HEADER_SIZE) my_b_seek(rli->cur_log,(off_t)pos); err: @@ -2154,8 +2154,13 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli) else { sql_print_error("\ -Could not parse log event entry, check the master for binlog corruption\n\ -This may also be a network problem, or just a bug in the master or slave code.\ +Could not parse relay log event entry. The possible reasons are: the master's \ +binary log is corrupted (you can check this by running 'mysqlbinlog' on the \ +binary log), the slave's relay log is corrupted (you can check this by running \ +'mysqlbinlog' on the relay log), a network problem, or a bug in the master's \ +or slave's MySQL code. If you want to check the master's binary log or slave's \ +relay log, you will be able to know their names by issuing 'SHOW SLAVE STATUS' \ +on this slave.\ "); return 1; } diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index f098ce0d0b6..a092df8193b 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -891,7 +891,11 @@ pthread_handler_decl(handle_one_connection,arg) send_error(thd,net->last_errno,NullS); statistic_increment(aborted_threads,&LOCK_status); } - + else if (thd->killed) + { + statistic_increment(aborted_threads,&LOCK_status); + } + end_thread: close_connection(thd, 0, 1); end_thread(thd,1); @@ -1068,7 +1072,10 @@ bool do_command(THD *thd) vio_description(net->vio))); /* Check if we can continue without closing the connection */ if (net->error != 3) + { + statistic_increment(aborted_threads,&LOCK_status); DBUG_RETURN(TRUE); // We have to close it. + } send_error(thd,net->last_errno,NullS); net->error= 0; DBUG_RETURN(FALSE); diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index fe47e553cf3..121411379f8 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -997,7 +997,7 @@ int show_binlog_events(THD* thd) { LEX_MASTER_INFO *lex_mi = &thd->lex.mi; ha_rows event_count, limit_start, limit_end; - my_off_t pos = lex_mi->pos; + my_off_t pos = max(BIN_LOG_HEADER_SIZE, lex_mi->pos); // user-friendly char search_file_name[FN_REFLEN], *name; const char *log_file_name = lex_mi->log_file_name; pthread_mutex_t *log_lock = mysql_bin_log.get_log_lock(); @@ -1025,12 +1025,6 @@ int show_binlog_events(THD* thd) if ((file=open_binlog(&log, linfo.log_file_name, &errmsg)) < 0) goto err; - if (pos < 4) - { - errmsg = "Invalid log position"; - goto err; - } - pthread_mutex_lock(log_lock); my_b_seek(&log, pos); diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 5219a8df04a..97a7cc6bcda 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -804,6 +804,18 @@ master_def: MASTER_LOG_POS_SYM EQ ulonglong_num { Lex->mi.pos = $3; + /* + If the user specified a value < BIN_LOG_HEADER_SIZE, adjust it + instead of causing subsequent errors. + We need to do it in this file, because only there we know that + MASTER_LOG_POS has been explicitely specified. On the contrary + in change_master() (sql_repl.cc) we cannot distinguish between 0 + (MASTER_LOG_POS explicitely specified as 0) and 0 (unspecified), + whereas we want to distinguish (specified 0 means "read the binlog + from 0" (4 in fact), unspecified means "don't change the position + (keep the preceding value)"). + */ + Lex->mi.pos = max(BIN_LOG_HEADER_SIZE, Lex->mi.pos); } | MASTER_CONNECT_RETRY_SYM EQ ULONG_NUM @@ -819,6 +831,8 @@ master_def: RELAY_LOG_POS_SYM EQ ULONG_NUM { Lex->mi.relay_log_pos = $3; + /* Adjust if < BIN_LOG_HEADER_SIZE (same comment as Lex->mi.pos) */ + Lex->mi.relay_log_pos = max(BIN_LOG_HEADER_SIZE, Lex->mi.relay_log_pos); } ; diff --git a/sql/unireg.h b/sql/unireg.h index cd459dfc783..4bbfa8b0fae 100644 --- a/sql/unireg.h +++ b/sql/unireg.h @@ -141,10 +141,13 @@ */ #define MIN_TURBOBM_PATTERN_LEN 3 -/* Defines for binary logging */ - -#define BIN_LOG_HEADER_SIZE 4 +/* + Defines for binary logging. + Do not decrease the value of BIN_LOG_HEADER_SIZE. + Do not even increase it before checking code. +*/ +#define BIN_LOG_HEADER_SIZE 4 #define FLOATING_POINT_BUFFER 331 /* Include prototypes for unireg */ diff --git a/support-files/mysql.server.sh b/support-files/mysql.server.sh index de01142beac..694e6fa8ebb 100644 --- a/support-files/mysql.server.sh +++ b/support-files/mysql.server.sh @@ -19,7 +19,7 @@ # Required-Start: $local_fs $network $remote_fs # Required-Stop: $local_fs $network $remote_fs # Default-Start: 2 3 4 5 -# Default-Stop: 2 3 4 5 +# Default-Stop: 0 1 6 # Short-Description: start and stop MySQL # Description: MySQL is a very fast and reliable SQL database engine. ### END INIT INFO diff --git a/support-files/mysql.spec.sh b/support-files/mysql.spec.sh index 11e850206c3..81c05f9cc89 100644 --- a/support-files/mysql.spec.sh +++ b/support-files/mysql.spec.sh @@ -254,6 +254,13 @@ export PATH # Build the 4.0 Max binary (includes BDB and UDFs and therefore # cannot be linked statically against the patched glibc) +# If we want to compile with RAID using gcc 3, we need to use +# gcc instead of g++ to avoid linking problems (RAID code is written in C++) +if gcc -v 2>&1 | grep 'version 3' > /dev/null 2>&1 +then + export CXX="gcc" +fi + BuildMySQL "--enable-shared \ --with-openssl \ --with-berkeley-db \ @@ -319,6 +326,10 @@ install -m644 $MBD/sql/mysqld.sym $RBR/usr/lib/mysql/mysqld.sym install -m644 $MBD/support-files/mysql-log-rotate $RBR/etc/logrotate.d/mysql install -m755 $MBD/support-files/mysql.server $RBR/etc/init.d/mysql +# Create a symlink "rcmysql", pointing to the init.script. SuSE users +# will appreciate that, as all services usually offer this. +ln -s ../../sbin/init.d/mysql $RPM_BUILD_ROOT/usr/sbin/rcmysql + # Create symbolic compatibility link safe_mysqld -> mysqld_safe # (safe_mysqld will be gone in MySQL 4.1) ln -sf ./mysqld_safe $RBR/usr/bin/safe_mysqld @@ -463,6 +474,7 @@ fi %attr(755, root, root) /usr/bin/safe_mysqld %attr(755, root, root) /usr/sbin/mysqld +%attr(755, root, root) /usr/sbin/rcmysql %attr(644, root, root) /usr/lib/mysql/mysqld.sym %attr(644, root, root) /etc/logrotate.d/mysql