2004-10-23 11:32:52 +04:00
|
|
|
/* Copyright (C) 2004 MySQL AB
|
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef __GNUC__
|
|
|
|
#pragma implementation
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include "guardian.h"
|
|
|
|
#include "instance_map.h"
|
2004-10-26 23:22:12 +04:00
|
|
|
#include "mysql_manager_error.h"
|
|
|
|
#include "log.h"
|
2004-10-23 11:32:52 +04:00
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
C_MODE_START
|
|
|
|
|
|
|
|
pthread_handler_decl(guardian, arg)
|
|
|
|
{
|
|
|
|
Guardian_thread *guardian_thread= (Guardian_thread *) arg;
|
|
|
|
guardian_thread->run();
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
C_MODE_END
|
|
|
|
|
|
|
|
|
|
|
|
Guardian_thread::Guardian_thread(Thread_registry &thread_registry_arg,
|
|
|
|
Instance_map *instance_map_arg,
|
|
|
|
uint monitoring_interval_arg) :
|
|
|
|
Guardian_thread_args(thread_registry_arg, instance_map_arg,
|
|
|
|
monitoring_interval_arg),
|
|
|
|
thread_info(pthread_self())
|
|
|
|
{
|
|
|
|
pthread_mutex_init(&LOCK_guardian, 0);
|
2005-01-25 13:54:56 +03:00
|
|
|
pthread_cond_init(&COND_guardian, 0);
|
2005-01-31 23:54:08 +03:00
|
|
|
shutdown_guardian= FALSE;
|
|
|
|
is_stopped= FALSE;
|
2004-10-23 11:32:52 +04:00
|
|
|
thread_registry.register_thread(&thread_info);
|
|
|
|
init_alloc_root(&alloc, MEM_ROOT_BLOCK_SIZE, 0);
|
|
|
|
guarded_instances= NULL;
|
2004-10-26 23:22:12 +04:00
|
|
|
starting_instances= NULL;
|
2004-10-23 11:32:52 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Guardian_thread::~Guardian_thread()
|
|
|
|
{
|
|
|
|
/* delay guardian destruction to the moment when no one needs it */
|
|
|
|
pthread_mutex_lock(&LOCK_guardian);
|
|
|
|
free_root(&alloc, MYF(0));
|
|
|
|
thread_registry.unregister_thread(&thread_info);
|
|
|
|
pthread_mutex_unlock(&LOCK_guardian);
|
|
|
|
pthread_mutex_destroy(&LOCK_guardian);
|
2005-01-25 13:54:56 +03:00
|
|
|
pthread_cond_destroy(&COND_guardian);
|
2004-10-23 11:32:52 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2005-01-31 23:54:08 +03:00
|
|
|
void Guardian_thread::shutdown()
|
|
|
|
{
|
|
|
|
pthread_mutex_lock(&LOCK_guardian);
|
|
|
|
shutdown_guardian= TRUE;
|
|
|
|
pthread_mutex_unlock(&LOCK_guardian);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void Guardian_thread::request_stop_instances()
|
|
|
|
{
|
|
|
|
pthread_mutex_lock(&LOCK_guardian);
|
|
|
|
request_stop= TRUE;
|
|
|
|
pthread_mutex_unlock(&LOCK_guardian);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2004-10-23 11:32:52 +04:00
|
|
|
/*
|
|
|
|
Run guardian thread
|
|
|
|
|
|
|
|
SYNOPSYS
|
|
|
|
run()
|
|
|
|
|
|
|
|
DESCRIPTION
|
|
|
|
|
|
|
|
Check for all guarded instances and restart them if needed. If everything
|
|
|
|
is fine go and sleep for some time.
|
|
|
|
*/
|
|
|
|
|
|
|
|
void Guardian_thread::run()
|
|
|
|
{
|
|
|
|
Instance *instance;
|
2005-01-31 23:54:08 +03:00
|
|
|
int restart_retry= 100;
|
2004-10-23 11:32:52 +04:00
|
|
|
LIST *loop;
|
2005-01-25 13:54:56 +03:00
|
|
|
struct timespec timeout;
|
2004-10-23 11:32:52 +04:00
|
|
|
|
|
|
|
my_thread_init();
|
2005-01-25 13:54:56 +03:00
|
|
|
pthread_mutex_lock(&LOCK_guardian);
|
|
|
|
|
2004-10-23 11:32:52 +04:00
|
|
|
|
2005-01-31 23:54:08 +03:00
|
|
|
while (!shutdown_guardian)
|
2004-10-23 11:32:52 +04:00
|
|
|
{
|
2005-01-31 23:54:08 +03:00
|
|
|
int status= 0;
|
2004-10-23 11:32:52 +04:00
|
|
|
loop= guarded_instances;
|
|
|
|
while (loop != NULL)
|
|
|
|
{
|
2005-01-31 23:54:08 +03:00
|
|
|
instance= ((GUARD_NODE *) loop->data)->instance;
|
|
|
|
if (!instance->is_running())
|
|
|
|
{
|
|
|
|
int state= 0; /* state of guardian */
|
|
|
|
|
|
|
|
if ((((GUARD_NODE *) loop->data)->crash_moment == 0))
|
|
|
|
state= 1; /* an instance just crashed */
|
|
|
|
else
|
|
|
|
if (time(NULL) - ((GUARD_NODE *) loop->data)->crash_moment <= 2)
|
|
|
|
/* try to restart an instance immediately */
|
|
|
|
state= 2;
|
|
|
|
else
|
|
|
|
state= 3; /* try to restart it */
|
|
|
|
|
|
|
|
if (state == 1)
|
|
|
|
((GUARD_NODE *) loop->data)->crash_moment= time(NULL);
|
|
|
|
|
|
|
|
if ((state == 1) || (state == 2))
|
|
|
|
{
|
|
|
|
instance->start();
|
|
|
|
((GUARD_NODE *) loop->data)->restart_counter++;
|
|
|
|
log_info("guardian: starting instance %s",
|
|
|
|
instance->options.instance_name);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if ((status == ETIMEDOUT) &&
|
|
|
|
(((GUARD_NODE *) loop->data)->restart_counter < restart_retry))
|
|
|
|
{
|
|
|
|
instance->start();
|
|
|
|
((GUARD_NODE *) loop->data)->restart_counter++;
|
|
|
|
log_info("guardian: starting instance %s",
|
|
|
|
instance->options.instance_name);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else /* clear status fields */
|
|
|
|
{
|
|
|
|
((GUARD_NODE *) loop->data)->restart_counter= 0;
|
|
|
|
((GUARD_NODE *) loop->data)->crash_moment= 0;
|
|
|
|
}
|
2004-10-23 11:32:52 +04:00
|
|
|
loop= loop->next;
|
|
|
|
}
|
2004-10-26 23:22:12 +04:00
|
|
|
move_to_list(&starting_instances, &guarded_instances);
|
2005-01-25 13:54:56 +03:00
|
|
|
timeout.tv_sec= time(NULL) + monitoring_interval;
|
|
|
|
timeout.tv_nsec= 0;
|
|
|
|
|
2005-01-31 23:54:08 +03:00
|
|
|
status= pthread_cond_timedwait(&COND_guardian, &LOCK_guardian, &timeout);
|
2004-10-23 11:32:52 +04:00
|
|
|
}
|
|
|
|
|
2005-01-25 13:54:56 +03:00
|
|
|
pthread_mutex_unlock(&LOCK_guardian);
|
2005-01-31 23:54:08 +03:00
|
|
|
if (request_stop)
|
|
|
|
stop_instances();
|
|
|
|
is_stopped= TRUE;
|
|
|
|
/* now, when the Guardian is stopped we can stop the IM */
|
|
|
|
thread_registry.request_shutdown();
|
2004-10-23 11:32:52 +04:00
|
|
|
my_thread_end();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2004-10-26 23:22:12 +04:00
|
|
|
int Guardian_thread::start()
|
|
|
|
{
|
|
|
|
Instance *instance;
|
2004-10-27 10:21:48 +04:00
|
|
|
Instance_map::Iterator iterator(instance_map);
|
2004-10-26 23:22:12 +04:00
|
|
|
|
|
|
|
instance_map->lock();
|
2004-12-12 20:59:15 +03:00
|
|
|
while ((instance= iterator.next()))
|
2004-10-26 23:22:12 +04:00
|
|
|
{
|
2005-01-31 23:54:08 +03:00
|
|
|
if ((instance->options.nonguarded == NULL))
|
2004-10-26 23:22:12 +04:00
|
|
|
if (guard(instance))
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
instance_map->unlock();
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2004-10-23 11:32:52 +04:00
|
|
|
/*
|
|
|
|
Start instance guarding
|
|
|
|
|
|
|
|
SYNOPSYS
|
|
|
|
guard()
|
2004-10-27 11:01:38 +04:00
|
|
|
instance the instance to be guarded
|
2004-10-23 11:32:52 +04:00
|
|
|
|
|
|
|
DESCRIPTION
|
|
|
|
|
2004-10-27 11:01:38 +04:00
|
|
|
The instance is added to the list of starting instances. Then after one guardian
|
|
|
|
loop it is moved to the guarded instances list. Usually guard() is called after we
|
|
|
|
start an instance, so we need to give some time to the instance to start.
|
2004-10-23 11:32:52 +04:00
|
|
|
|
|
|
|
RETURN
|
|
|
|
0 - ok
|
|
|
|
1 - error occured
|
|
|
|
*/
|
|
|
|
|
2004-10-26 23:22:12 +04:00
|
|
|
|
|
|
|
int Guardian_thread::guard(Instance *instance)
|
|
|
|
{
|
|
|
|
return add_instance_to_list(instance, &starting_instances);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void Guardian_thread::move_to_list(LIST **from, LIST **to)
|
|
|
|
{
|
|
|
|
LIST *tmp;
|
|
|
|
|
|
|
|
while (*from)
|
|
|
|
{
|
|
|
|
tmp= rest(*from);
|
|
|
|
*to= list_add(*to, *from);
|
|
|
|
*from= tmp;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int Guardian_thread::add_instance_to_list(Instance *instance, LIST **list)
|
2004-10-23 11:32:52 +04:00
|
|
|
{
|
2004-10-25 14:23:31 +04:00
|
|
|
LIST *node;
|
2005-01-31 23:54:08 +03:00
|
|
|
GUARD_NODE *content;
|
2004-10-23 11:32:52 +04:00
|
|
|
|
2004-10-25 14:23:31 +04:00
|
|
|
node= (LIST *) alloc_root(&alloc, sizeof(LIST));
|
2005-01-31 23:54:08 +03:00
|
|
|
content= (GUARD_NODE *) alloc_root(&alloc, sizeof(GUARD_NODE));
|
|
|
|
|
|
|
|
if ((node == NULL) || (content == NULL))
|
2004-10-25 14:23:31 +04:00
|
|
|
return 1;
|
2004-10-23 11:32:52 +04:00
|
|
|
/* we store the pointers to instances from the instance_map's MEM_ROOT */
|
2005-01-31 23:54:08 +03:00
|
|
|
content->instance= instance;
|
|
|
|
content->restart_counter= 0;
|
|
|
|
content->crash_moment= 0;
|
|
|
|
node->data= (void *) content;
|
2004-10-23 11:32:52 +04:00
|
|
|
|
|
|
|
pthread_mutex_lock(&LOCK_guardian);
|
2004-10-26 23:22:12 +04:00
|
|
|
*list= list_add(*list, node);
|
2004-10-23 11:32:52 +04:00
|
|
|
pthread_mutex_unlock(&LOCK_guardian);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
TODO: perhaps it would make sense to create a pool of the LIST elements
|
|
|
|
elements and give them upon request. Now we are loosing a bit of memory when
|
|
|
|
guarded instance was stopped and then restarted (since we cannot free just
|
|
|
|
a piece of the MEM_ROOT).
|
|
|
|
*/
|
|
|
|
|
2004-10-26 23:22:12 +04:00
|
|
|
int Guardian_thread::stop_guard(Instance *instance)
|
2004-10-23 11:32:52 +04:00
|
|
|
{
|
2004-10-25 14:23:31 +04:00
|
|
|
LIST *node;
|
2004-10-23 11:32:52 +04:00
|
|
|
|
|
|
|
pthread_mutex_lock(&LOCK_guardian);
|
2004-10-25 14:23:31 +04:00
|
|
|
node= guarded_instances;
|
|
|
|
|
|
|
|
while (node != NULL)
|
2004-10-23 11:32:52 +04:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
We compare only pointers, as we always use pointers from the
|
|
|
|
instance_map's MEM_ROOT.
|
|
|
|
*/
|
2005-01-31 23:54:08 +03:00
|
|
|
if (((GUARD_NODE *) node->data)->instance == instance)
|
2004-10-23 11:32:52 +04:00
|
|
|
{
|
2004-10-25 14:23:31 +04:00
|
|
|
guarded_instances= list_delete(guarded_instances, node);
|
2004-10-23 11:32:52 +04:00
|
|
|
pthread_mutex_unlock(&LOCK_guardian);
|
|
|
|
return 0;
|
|
|
|
}
|
2004-10-25 14:23:31 +04:00
|
|
|
else
|
|
|
|
node= node->next;
|
2004-10-23 11:32:52 +04:00
|
|
|
}
|
|
|
|
pthread_mutex_unlock(&LOCK_guardian);
|
|
|
|
/* if there is nothing to delete it is also fine */
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2005-01-31 23:54:08 +03:00
|
|
|
int Guardian_thread::stop_instances()
|
|
|
|
{
|
|
|
|
Instance *instance;
|
|
|
|
Instance_map::Iterator iterator(instance_map);
|
|
|
|
|
|
|
|
while ((instance= iterator.next()))
|
|
|
|
{
|
|
|
|
if ((instance->options.nonguarded == NULL))
|
|
|
|
{
|
|
|
|
if (stop_guard(instance))
|
|
|
|
return 1;
|
|
|
|
/* let us try to stop the server */
|
|
|
|
instance->stop();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|