mariadb/server-tools/instance-manager/manager.cc
unknown 4bb9efa610 Fix for BUG#24415: Instance manager test im_daemon_life_cycle fails randomly.
The cause of im_daemon_life_cycle.imtest random failures was the following
behaviour of some implementations of LINUX threads: let's suppose that a
process has several threads (in LINUX threads, there is a separate process for
each thread). When the main process gets killed, the parent receives SIGCHLD
before all threads (child processes) die. In other words, the parent receives
SIGCHLD, when its child is not completely dead.

In terms of IM, that means that IM-angel receives SIGCHLD when IM-main is not dead
and still holds some resources. After receiving SIGCHLD, IM-angel restarts
IM-main, but IM-main failed to initialize, because previous instance (copy) of
IM-main still holds server socket (TCP-port).

Another problem here was that IM-angel restarted IM-main only if it was killed
by signal. If it exited with error, IM-angel thought it's intended / graceful
shutdown and exited itself.

So, when the second instance of IM-main failed to initialize, IM-angel thought
it's intended shutdown and quit.

The fix is
  1. to change IM-angel so that it restarts IM-main if it exited with error code;
  2. to change IM-main so that it returns proper exit code in case of failure.


mysql-test/t/disabled.def:
  Enable im_daemon_life_cycle.
server-tools/instance-manager/listener.cc:
  Set error status if Listener failed to initialize.
server-tools/instance-manager/manager.cc:
  Return exit code from the manager.
server-tools/instance-manager/manager.h:
  Return exit code from the manager.
server-tools/instance-manager/mysqlmanager.cc:
  1. Restart IM-main if exit code is not EXIT_SUCCESS (0).
  2. Log IM-main exit code in case of failure.
server-tools/instance-manager/thread_registry.cc:
  Add support for exit code.
server-tools/instance-manager/thread_registry.h:
  Add support for exit code.
2007-02-20 22:31:50 +03:00

309 lines
8.1 KiB
C++

/* Copyright (C) 2003-2006 MySQL AB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
#include <my_global.h>
#include "manager.h"
#include "priv.h"
#include "thread_registry.h"
#include "listener.h"
#include "instance_map.h"
#include "options.h"
#include "user_map.h"
#include "log.h"
#include "guardian.h"
#include <my_sys.h>
#include <m_string.h>
#include <signal.h>
#include <thr_alarm.h>
#ifndef __WIN__
#include <sys/wait.h>
#endif
int create_pid_file(const char *pid_file_name, int pid)
{
if (FILE *pid_file= my_fopen(pid_file_name,
O_WRONLY | O_CREAT | O_BINARY, MYF(0)))
{
fprintf(pid_file, "%d\n", (int) pid);
my_fclose(pid_file, MYF(0));
return 0;
}
log_error("can't create pid file %s: errno=%d, %s",
pid_file_name, errno, strerror(errno));
return 1;
}
#ifndef __WIN__
void set_signals(sigset_t *mask)
{
/* block signals */
sigemptyset(mask);
sigaddset(mask, SIGINT);
sigaddset(mask, SIGTERM);
sigaddset(mask, SIGPIPE);
sigaddset(mask, SIGHUP);
signal(SIGPIPE, SIG_IGN);
/*
We want this signal to be blocked in all theads but the signal
one. It is needed for the thr_alarm subsystem to work.
*/
sigaddset(mask,THR_SERVER_ALARM);
/* all new threads will inherite this signal mask */
pthread_sigmask(SIG_BLOCK, mask, NULL);
/*
In our case the signal thread also implements functions of alarm thread.
Here we init alarm thread functionality. We suppose that we won't have
more then 10 alarms at the same time.
*/
init_thr_alarm(10);
}
#else
bool have_signal;
void onsignal(int signo)
{
have_signal= true;
}
void set_signals(sigset_t *set)
{
signal(SIGINT, onsignal);
signal(SIGTERM, onsignal);
have_signal= false;
}
int my_sigwait(const sigset_t *set, int *sig)
{
while (!have_signal)
{
Sleep(100);
}
return 0;
}
#endif
void stop_all(Guardian_thread *guardian, Thread_registry *registry)
{
/*
Let guardian thread know that it should break it's processing cycle,
once it wakes up.
*/
guardian->request_shutdown();
/* wake guardian */
pthread_cond_signal(&guardian->COND_guardian);
/* stop all threads */
registry->deliver_shutdown();
/* Set error status in the thread registry. */
registry->set_error_status();
}
/*
manager - entry point to the main instance manager process: start
listener thread, write pid file and enter into signal handling.
See also comments in mysqlmanager.cc to picture general Instance Manager
architecture.
*/
int manager(const Options &options)
{
Thread_registry thread_registry;
/*
All objects created in the manager() function live as long as
thread_registry lives, and thread_registry is alive until there are
working threads.
*/
User_map user_map;
Instance_map instance_map(options.default_mysqld_path);
Guardian_thread guardian_thread(thread_registry,
&instance_map,
options.monitoring_interval);
Listener_thread_args listener_args(thread_registry, options, user_map,
instance_map);
manager_pid= getpid();
instance_map.guardian= &guardian_thread;
if (instance_map.init() || user_map.init())
return 1;
if (user_map.load(options.password_file_name))
return 1;
/* write Instance Manager pid file */
log_info("IM pid file: '%s'; PID: %d.",
(const char *) options.pid_file_name,
(int) manager_pid);
if (create_pid_file(options.pid_file_name, manager_pid))
return 1;
/*
Initialize signals and alarm-infrastructure.
NOTE: To work nicely with LinuxThreads, the signal thread is the first
thread in the process.
NOTE:
After init_thr_alarm() call it's possible to call thr_alarm() (from
different threads), that results in sending ALARM signal to the alarm
thread (which can be the main thread). That signal can interrupt
blocking calls.
In other words, a blocking call can be interrupted in the main thread
after init_thr_alarm().
*/
sigset_t mask;
set_signals(&mask);
/* create guardian thread */
{
pthread_t guardian_thd_id;
pthread_attr_t guardian_thd_attr;
int rc;
/*
NOTE: Guardian should be shutdown first. Only then all other threads
need to be stopped. This should be done, as guardian is responsible
for shutting down the instances, and this is a long operation.
NOTE: Guardian uses thr_alarm() when detects current state of
instances (is_running()), but it is not interfere with
flush_instances() later in the code, because until flush_instances()
complete in the main thread, Guardian thread is not permitted to
process instances. And before flush_instances() there is no instances
to proceed.
*/
pthread_attr_init(&guardian_thd_attr);
pthread_attr_setdetachstate(&guardian_thd_attr, PTHREAD_CREATE_DETACHED);
rc= set_stacksize_n_create_thread(&guardian_thd_id, &guardian_thd_attr,
guardian, &guardian_thread);
pthread_attr_destroy(&guardian_thd_attr);
if (rc)
{
log_error("manager(): set_stacksize_n_create_thread(guardian) failed");
goto err;
}
}
/* Load instances. */
int signo;
bool shutdown_complete;
shutdown_complete= FALSE;
if (instance_map.flush_instances())
{
log_error("Cannot init instances repository. This might be caused by "
"the wrong config file options. For instance, missing mysqld "
"binary. Aborting.");
stop_all(&guardian_thread, &thread_registry);
goto err;
}
/* create the listener */
{
pthread_t listener_thd_id;
pthread_attr_t listener_thd_attr;
int rc;
pthread_attr_init(&listener_thd_attr);
pthread_attr_setdetachstate(&listener_thd_attr, PTHREAD_CREATE_DETACHED);
rc= set_stacksize_n_create_thread(&listener_thd_id, &listener_thd_attr,
listener, &listener_args);
pthread_attr_destroy(&listener_thd_attr);
if (rc)
{
log_error("manager(): set_stacksize_n_create_thread(listener) failed");
stop_all(&guardian_thread, &thread_registry);
goto err;
}
}
/*
After the list of guarded instances have been initialized,
Guardian should start them.
*/
pthread_cond_signal(&guardian_thread.COND_guardian);
while (!shutdown_complete)
{
int status= 0;
if ((status= my_sigwait(&mask, &signo)) != 0)
{
log_error("sigwait() failed");
stop_all(&guardian_thread, &thread_registry);
goto err;
}
#ifndef __WIN__
/*
On some Darwin kernels SIGHUP is delivered along with most
signals. This is why we skip it's processing on these
platforms. For more details and test program see
Bug #14164 IM tests fail on MacOS X (powermacg5)
*/
#ifdef IGNORE_SIGHUP_SIGQUIT
if ( SIGHUP == signo )
continue;
#endif
if (THR_SERVER_ALARM == signo)
process_alarm(signo);
else
#endif
{
if (!guardian_thread.is_stopped())
{
guardian_thread.request_shutdown();
pthread_cond_signal(&guardian_thread.COND_guardian);
}
else
{
thread_registry.deliver_shutdown();
shutdown_complete= TRUE;
}
}
}
err:
/* delete the pid file */
my_delete(options.pid_file_name, MYF(0));
#ifndef __WIN__
/* free alarm structures */
end_thr_alarm(1);
/* don't pthread_exit to kill all threads who did not shut down in time */
#endif
return thread_registry.get_error_status() ? 1 : 0;
}