2013-04-17 00:00:22 -04:00
#!/usr/bin/env python
2013-04-17 00:00:22 -04:00
"""
A script for running our stress tests repeatedly to see if any fail .
Runs a list of stress tests in parallel , reporting passes and collecting
failure scenarios until killed . Runs with different table sizes ,
cachetable sizes , and numbers of threads .
Suitable for running on a dev branch , or a release branch , or main .
Just run the script from within a branch you want to test .
By default , we stop everything , update from svn , rebuild , and restart the
tests once a day .
"""
2013-04-17 00:00:22 -04:00
import logging
import os
2013-04-17 00:00:23 -04:00
import re
2013-04-17 00:00:22 -04:00
import sys
import time
2013-04-17 00:00:22 -04:00
from glob import glob
2013-04-17 00:00:22 -04:00
from logging import debug , info , warning , error , exception
2013-04-17 00:00:23 -04:00
from optparse import OptionGroup , OptionParser
2013-04-17 00:00:22 -04:00
from Queue import Queue
2013-04-17 00:00:22 -04:00
from random import randrange , shuffle
2013-04-17 00:00:22 -04:00
from resource import setrlimit , RLIMIT_CORE
from shutil import copy , copytree , move , rmtree
from signal import signal , SIGHUP , SIGINT , SIGPIPE , SIGALRM , SIGTERM
from subprocess import call , Popen , PIPE , STDOUT
from tempfile import mkdtemp , mkstemp
from threading import Event , Thread , Timer
__version__ = ' $Id$ '
__copyright__ = """ Copyright (c) 2007-2012 Tokutek Inc. All rights reserved.
The technology is licensed by the Massachusetts Institute
of Technology , Rutgers State University of New Jersey , and
the Research Foundation of State University of New York at
Stony Brook under United States of America Serial
No . 11 / 760379 and to the patents and / or patent
applications resulting from it . """
def setlimits ( ) :
setrlimit ( RLIMIT_CORE , ( - 1 , - 1 ) )
os . nice ( 7 )
class TestFailure ( Exception ) :
pass
class Killed ( Exception ) :
pass
class TestRunnerBase ( object ) :
2013-04-17 00:00:31 -04:00
def __init__ ( self , scheduler , builddir , installdir , rev , jemalloc , execf , tsize , csize , test_time , savedir ) :
2013-04-17 00:00:22 -04:00
self . scheduler = scheduler
2013-04-17 00:00:31 -04:00
self . builddir = builddir
self . installdir = installdir
2013-04-17 00:00:22 -04:00
self . rev = rev
self . execf = execf
self . tsize = tsize
self . csize = csize
self . test_time = test_time
self . savedir = savedir
2013-04-17 00:00:22 -04:00
2013-04-17 00:00:22 -04:00
self . env = os . environ
2013-04-17 00:00:31 -04:00
libpath = os . path . join ( self . installdir , ' lib ' )
2013-04-17 00:00:22 -04:00
if ' LD_LIBRARY_PATH ' in self . env :
self . env [ ' LD_LIBRARY_PATH ' ] = ' %s : %s ' % ( libpath , self . env [ ' LD_LIBRARY_PATH ' ] )
else :
self . env [ ' LD_LIBRARY_PATH ' ] = libpath
if jemalloc is not None and len ( jemalloc ) > 0 :
preload = os . path . normpath ( jemalloc )
if ' LD_PRELOAD ' in self . env :
self . env [ ' LD_PRELOAD ' ] = ' %s : %s ' % ( preload , self . env [ ' LD_PRELOAD ' ] )
else :
self . env [ ' LD_PRELOAD ' ] = preload
2013-04-17 00:00:22 -04:00
self . nruns = 0
2013-04-17 00:00:22 -04:00
self . rundir = None
2013-04-17 00:00:22 -04:00
self . outf = None
self . times = [ 0 , 0 ]
2013-04-17 00:00:22 -04:00
self . is_large = ( tsize > = 10000000 )
2013-04-17 00:00:23 -04:00
self . oldversionstr = ' noupgrade '
2013-04-17 00:00:22 -04:00
def __str__ ( self ) :
2013-04-17 00:00:23 -04:00
return ( self . __class__ . __name__ +
' < %(execf)s , %(tsize)d , %(csize)d , %(oldversionstr)s > ' ) % self
2013-04-17 00:00:22 -04:00
2013-04-17 00:00:22 -04:00
def __getitem__ ( self , k ) :
2013-04-17 00:00:22 -04:00
return self . __getattribute__ ( k )
2013-04-17 00:00:22 -04:00
2013-04-17 00:00:22 -04:00
def infostr ( self ) :
return ' \t ' . join ( [ ' %(execf)s ' ,
' %(rev)s ' ,
' %(tsize)d ' ,
' %(csize)d ' ,
2013-04-17 00:00:23 -04:00
' %(oldversionstr)s ' ,
2013-04-17 00:00:22 -04:00
' %(num_ptquery)d ' ,
' %(num_update)d ' ,
' %(time)d ' ] ) % self
2013-04-17 00:00:22 -04:00
@property
def time ( self ) :
if self . times [ 0 ] != 0 and self . times [ 1 ] != 0 :
return self . times [ 1 ] - self . times [ 0 ]
else :
return 0
2013-04-17 00:00:22 -04:00
2013-04-17 00:00:22 -04:00
@property
def num_ptquery ( self ) :
2013-04-17 00:00:22 -04:00
if self . nruns % 2 < 1 :
2013-04-17 00:00:22 -04:00
return 1
2013-04-17 00:00:22 -04:00
else :
2013-04-17 00:00:22 -04:00
return randrange ( 16 )
@property
def num_update ( self ) :
2013-04-17 00:00:22 -04:00
if self . nruns % 4 < 2 :
2013-04-17 00:00:22 -04:00
return 1
2013-04-17 00:00:22 -04:00
else :
2013-04-17 00:00:22 -04:00
return randrange ( 16 )
2013-04-17 00:00:22 -04:00
@property
def envdir ( self ) :
return os . path . join ( self . rundir , ' envdir ' )
2013-04-17 00:00:22 -04:00
@property
def prepareloc ( self ) :
preparename = ' dir. %(execf)s - %(tsize)d - %(csize)d ' % self
2013-04-17 00:00:53 -04:00
return os . path . join ( self . builddir , ' src ' , ' tests ' , preparename )
2013-04-17 00:00:22 -04:00
2013-04-17 00:00:22 -04:00
def prepare ( self ) :
if os . path . isdir ( self . prepareloc ) :
debug ( ' %s found existing environment. ' , self )
2013-04-17 00:00:22 -04:00
copytree ( self . prepareloc , self . envdir )
2013-04-17 00:00:22 -04:00
else :
debug ( ' %s preparing an environment. ' , self )
self . run_prepare ( )
2013-04-17 00:00:23 -04:00
self . save_prepared_envdir ( )
def save_prepared_envdir ( self ) :
debug ( ' %s copying environment to %s . ' , self , self . prepareloc )
copytree ( self . envdir , self . prepareloc )
2013-04-17 00:00:22 -04:00
def run ( self ) :
2013-04-17 00:00:31 -04:00
srctests = os . path . join ( self . builddir , ' src ' , ' tests ' )
2013-04-17 00:00:22 -04:00
self . rundir = mkdtemp ( dir = srctests )
2013-04-17 00:00:22 -04:00
try :
2013-04-17 00:00:22 -04:00
outname = os . path . join ( self . rundir , ' output.txt ' )
self . outf = open ( outname , ' w ' )
2013-04-17 00:00:22 -04:00
try :
2013-04-17 00:00:22 -04:00
self . prepare ( )
2013-04-17 00:00:22 -04:00
debug ( ' %s testing. ' , self )
2013-04-17 00:00:22 -04:00
self . times [ 0 ] = time . time ( )
2013-04-17 00:00:22 -04:00
self . run_test ( )
2013-04-17 00:00:22 -04:00
self . times [ 1 ] = time . time ( )
2013-04-17 00:00:22 -04:00
debug ( ' %s done. ' , self )
2013-04-17 00:00:22 -04:00
except Killed :
pass
except TestFailure :
savedir = self . save ( )
2013-04-17 00:00:22 -04:00
self . scheduler . report_failure ( self )
2013-04-17 00:00:22 -04:00
warning ( ' Saved environment to %s ' , savedir )
2013-04-17 00:00:22 -04:00
else :
2013-04-17 00:00:22 -04:00
self . scheduler . report_success ( self )
2013-04-17 00:00:22 -04:00
finally :
2013-04-17 00:00:22 -04:00
self . outf . close ( )
rmtree ( self . rundir )
2013-04-17 00:00:22 -04:00
self . rundir = None
2013-04-17 00:00:22 -04:00
self . times = [ 0 , 0 ]
2013-04-17 00:00:22 -04:00
self . nruns + = 1
def save ( self ) :
2013-04-17 00:00:23 -04:00
savepfx = ' %(execf)s - %(rev)s - %(tsize)d - %(csize)d - %(num_ptquery)d - %(num_update)d - %(phase)s - ' % self
2013-04-17 00:00:22 -04:00
savedir = mkdtemp ( dir = self . savedir , prefix = savepfx )
def targetfor ( path ) :
return os . path . join ( savedir , os . path . basename ( path ) )
2013-04-17 00:00:22 -04:00
2013-04-17 00:00:23 -04:00
for f in glob ( os . path . join ( self . rundir , ' * ' ) ) :
if os . path . isdir ( f ) :
copytree ( f , targetfor ( f ) )
else :
copy ( f , targetfor ( f ) )
2013-04-17 00:00:31 -04:00
fullexecf = os . path . join ( self . builddir , ' src ' , ' tests ' , self . execf )
2013-04-17 00:00:22 -04:00
copy ( fullexecf , targetfor ( fullexecf ) )
2013-04-17 00:00:31 -04:00
for lib in glob ( os . path . join ( self . installdir , ' lib ' , ' *.so ' ) ) :
2013-04-17 00:00:22 -04:00
copy ( lib , targetfor ( lib ) )
2013-04-17 00:00:22 -04:00
return savedir
2013-04-17 00:00:22 -04:00
def waitfor ( self , proc ) :
while proc . poll ( ) is None :
self . scheduler . stopping . wait ( 1 )
if self . scheduler . stopping . isSet ( ) :
os . kill ( proc . pid , SIGTERM )
raise Killed ( )
2013-04-17 00:00:22 -04:00
def spawn_child ( self , args ) :
2013-04-17 00:00:23 -04:00
logging . debug ( ' %s spawning %s ' , self , ' ' . join ( [ self . execf ] + args ) )
2013-04-17 00:00:23 -04:00
commandsf = open ( os . path . join ( self . rundir , ' commands.txt ' ) , ' a ' )
print >> commandsf , ' ' . join ( [ self . execf ] + args )
commandsf . close ( )
2013-04-17 00:00:22 -04:00
proc = Popen ( [ self . execf ] + args ,
2013-04-17 00:00:22 -04:00
executable = os . path . join ( ' .. ' , self . execf ) ,
env = self . env ,
cwd = self . rundir ,
preexec_fn = setlimits ,
2013-04-17 00:00:22 -04:00
stdout = self . outf ,
2013-04-17 00:00:22 -04:00
stderr = STDOUT )
self . waitfor ( proc )
return proc . returncode
2013-04-17 00:00:23 -04:00
@property
def extraargs ( self ) :
# for overriding
return [ ]
@property
2013-04-17 00:00:22 -04:00
def prepareargs ( self ) :
return [ ' -v ' ,
2013-04-17 00:00:22 -04:00
' --envdir ' , ' envdir ' ,
2013-04-17 00:00:22 -04:00
' --num_elements ' , str ( self . tsize ) ,
2013-04-17 00:00:23 -04:00
' --cachetable_size ' , str ( self . csize ) ] + self . extraargs
2013-04-17 00:00:22 -04:00
2013-04-17 00:00:23 -04:00
@property
2013-04-17 00:00:22 -04:00
def testargs ( self ) :
return [ ' --num_seconds ' , str ( self . test_time ) ,
2013-04-17 00:00:53 -04:00
' --no-crash_on_operation_failure ' ,
2013-04-17 00:00:22 -04:00
' --num_ptquery_threads ' , str ( self . num_ptquery ) ,
2013-04-17 00:00:23 -04:00
' --num_update_threads ' , str ( self . num_update ) ] + self . prepareargs
2013-04-17 00:00:22 -04:00
2013-04-17 00:00:22 -04:00
class TestRunner ( TestRunnerBase ) :
2013-04-17 00:00:22 -04:00
def run_prepare ( self ) :
2013-04-17 00:00:22 -04:00
self . phase = " create "
2013-04-17 00:00:23 -04:00
if self . spawn_child ( [ ' --only_create ' ] + self . prepareargs ) != 0 :
2013-04-17 00:00:22 -04:00
raise TestFailure ( ' %s crashed during --only_create. ' % self . execf )
def run_test ( self ) :
self . phase = " stress "
2013-04-17 00:00:23 -04:00
if self . spawn_child ( [ ' --only_stress ' ] + self . testargs ) != 0 :
2013-04-17 00:00:22 -04:00
raise TestFailure ( ' %s crashed during --only_stress. ' % self . execf )
class RecoverTestRunner ( TestRunnerBase ) :
2013-04-17 00:00:22 -04:00
def run_prepare ( self ) :
self . phase = " create "
2013-04-17 00:00:23 -04:00
if self . spawn_child ( [ ' --only_create ' , ' --test ' ] + self . prepareargs ) != 0 :
2013-04-17 00:00:22 -04:00
raise TestFailure ( ' %s crashed during --only_create --test. ' % self . execf )
2013-04-17 00:00:22 -04:00
def run_test ( self ) :
2013-04-17 00:00:22 -04:00
self . phase = " test "
2013-04-17 00:00:23 -04:00
if self . spawn_child ( [ ' --only_stress ' , ' --test ' ] + self . testargs ) == 0 :
2013-04-17 00:00:22 -04:00
raise TestFailure ( ' %s did not crash during --only_stress --test ' % self . execf )
2013-04-17 00:00:22 -04:00
self . phase = " recover "
2013-04-17 00:00:23 -04:00
if self . spawn_child ( [ ' --recover ' ] + self . prepareargs ) != 0 :
2013-04-17 00:00:22 -04:00
raise TestFailure ( ' %s crashed during --recover ' % self . execf )
2013-04-17 00:00:23 -04:00
class UpgradeTestRunnerMixin ( TestRunnerBase ) :
def __init__ ( self , old_environments_dir , version , pristine_or_stressed , * * kwargs ) :
super ( UpgradeTestRunnerMixin , self ) . __init__ ( * * kwargs )
self . version = version
self . pristine_or_stressed = pristine_or_stressed
self . old_env_dirs = os . path . join ( old_environments_dir , version )
self . oldversionstr = ' %(version)s - %(pristine_or_stressed)s ' % self
@property
def extraargs ( self ) :
return [ ' --num_DBs ' , ' 1 ' ]
@property
def old_envdir ( self ) :
oldname = ' saved %(pristine_or_stressed)s - %(tsize)d -dir ' % self
logging . debug ( ' %s using old version environment %s from %s . ' , self , oldname , self . old_env_dirs )
return os . path . join ( self . old_env_dirs , oldname )
def save_prepared_envdir ( self ) :
# no need to do this
pass
def run_prepare ( self ) :
self . phase = " create "
copytree ( self . old_envdir , self . envdir )
class DoubleTestRunnerMixin ( TestRunnerBase ) :
""" Runs the test phase twice in a row.
Good for upgrade tests , to run the test once to upgrade it and then
again to make sure the upgrade left it in a good state .
"""
def run_test ( self ) :
super ( DoubleTestRunnerMixin , self ) . run_test ( )
super ( DoubleTestRunnerMixin , self ) . run_test ( )
class UpgradeTestRunner ( UpgradeTestRunnerMixin , TestRunner ) :
pass
class UpgradeRecoverTestRunner ( UpgradeTestRunnerMixin , RecoverTestRunner ) :
pass
class DoubleUpgradeTestRunner ( DoubleTestRunnerMixin , UpgradeTestRunner ) :
pass
class DoubleUpgradeRecoverTestRunner ( DoubleTestRunnerMixin , UpgradeRecoverTestRunner ) :
pass
2013-04-17 00:00:22 -04:00
class Worker ( Thread ) :
def __init__ ( self , scheduler ) :
super ( Worker , self ) . __init__ ( )
self . scheduler = scheduler
def run ( self ) :
2013-04-17 00:00:22 -04:00
debug ( ' %s starting. ' % self )
2013-04-17 00:00:22 -04:00
while not self . scheduler . stopping . isSet ( ) :
test_runner = self . scheduler . get ( )
2013-04-17 00:00:22 -04:00
if test_runner . is_large :
if self . scheduler . nlarge + 1 > self . scheduler . maxlarge :
debug ( ' %s pulled a large test, but there are already %d running. Putting it back. ' ,
self , self . scheduler . nlarge )
self . scheduler . put ( test_runner )
continue
self . scheduler . nlarge + = 1
2013-04-17 00:00:22 -04:00
try :
test_runner . run ( )
except Exception , e :
2013-04-17 00:00:22 -04:00
exception ( ' Fatal error in worker thread. ' )
info ( ' Killing all workers. ' )
2013-04-17 00:00:22 -04:00
self . scheduler . error = e
self . scheduler . stop ( )
2013-04-17 00:00:22 -04:00
if test_runner . is_large :
self . scheduler . nlarge - = 1
2013-04-17 00:00:22 -04:00
if not self . scheduler . stopping . isSet ( ) :
self . scheduler . put ( test_runner )
2013-04-17 00:00:22 -04:00
debug ( ' %s exiting. ' % self )
2013-04-17 00:00:22 -04:00
class Scheduler ( Queue ) :
2013-04-17 00:00:22 -04:00
def __init__ ( self , nworkers , maxlarge , logger ) :
2013-04-17 00:00:22 -04:00
Queue . __init__ ( self )
2013-04-17 00:00:22 -04:00
info ( ' Initializing scheduler with %d jobs. ' , nworkers )
2013-04-17 00:00:22 -04:00
self . nworkers = nworkers
2013-04-17 00:00:22 -04:00
self . logger = logger
self . maxlarge = maxlarge
self . nlarge = 0 # not thread safe, don't really care right now
2013-04-17 00:00:22 -04:00
self . passed = 0
self . failed = 0
self . workers = [ ]
self . stopping = Event ( )
self . timer = None
self . error = None
def run ( self , timeout ) :
2013-04-17 00:00:22 -04:00
info ( ' Starting workers. ' )
2013-04-17 00:00:22 -04:00
self . stopping . clear ( )
for i in range ( self . nworkers ) :
w = Worker ( self )
self . workers . append ( w )
w . start ( )
if timeout != 0 :
self . timer = Timer ( timeout , self . stop )
self . timer . start ( )
while not self . stopping . isSet ( ) :
try :
for w in self . workers :
if self . stopping . isSet ( ) :
break
w . join ( timeout = 1.0 )
except ( KeyboardInterrupt , SystemExit ) :
2013-04-17 00:00:22 -04:00
debug ( ' Scheduler interrupted. Stopping and joining threads. ' )
2013-04-17 00:00:22 -04:00
self . stop ( )
self . join ( )
sys . exit ( 0 )
else :
2013-04-17 00:00:22 -04:00
debug ( ' Scheduler stopped by someone else. Joining threads. ' )
2013-04-17 00:00:22 -04:00
self . join ( )
def join ( self ) :
if self . timer is not None :
self . timer . cancel ( )
while len ( self . workers ) > 0 :
self . workers . pop ( ) . join ( )
def stop ( self ) :
2013-04-17 00:00:22 -04:00
info ( ' Stopping workers. ' )
2013-04-17 00:00:22 -04:00
self . stopping . set ( )
2013-04-17 00:00:22 -04:00
def __getitem__ ( self , k ) :
return self . __dict__ [ k ]
def reportstr ( self ) :
return ' [PASS= %(passed)d FAIL= %(failed)d ] ' % self
def report_success ( self , runner ) :
self . passed + = 1
self . logger . info ( ' PASSED %s ' , runner . infostr ( ) )
info ( ' %s PASSED %s ' , self . reportstr ( ) , runner . infostr ( ) )
def report_failure ( self , runner ) :
self . failed + = 1
self . logger . warning ( ' FAILED %s ' , runner . infostr ( ) )
warning ( ' %s FAILED %s ' , self . reportstr ( ) , runner . infostr ( ) )
2013-04-17 00:00:22 -04:00
def compiler_works ( cc ) :
try :
devnull = open ( os . devnull , ' w ' )
r = call ( [ cc , ' -v ' ] , stdout = devnull , stderr = STDOUT )
devnull . close ( )
return r == 0
except OSError :
exception ( ' Error running %s . ' , cc )
return False
2013-04-17 00:00:31 -04:00
def rebuild ( tokudb , builddir , installdir , cc , tests ) :
2013-04-17 00:00:22 -04:00
info ( ' Updating from svn. ' )
devnull = open ( os . devnull , ' w ' )
call ( [ ' svn ' , ' up ' ] , stdout = devnull , stderr = STDOUT , cwd = tokudb )
devnull . close ( )
if not compiler_works ( cc ) :
error ( ' Cannot find working compiler named " %s " . Try sourcing the icc env script or providing another compiler with --cc. ' , cc )
2013-04-17 00:00:53 -04:00
sys . exit ( 2 )
2013-04-17 00:00:31 -04:00
if cc == ' icc ' :
iccstr = ' ON '
else :
iccstr = ' OFF '
2013-04-17 00:00:22 -04:00
info ( ' Building tokudb. ' )
2013-04-17 00:00:31 -04:00
if not os . path . exists ( builddir ) :
os . mkdir ( builddir )
r = call ( [ ' cmake ' ,
' -DCMAKE_BUILD_TYPE=Debug ' ,
2013-04-17 00:00:53 -04:00
' -DINTEL_CC= %s ' % iccstr ,
2013-04-17 00:00:31 -04:00
' -DCMAKE_INSTALL_DIR= %s ' % installdir ,
2013-04-17 00:00:53 -04:00
tokudb ] ,
2013-04-17 00:00:31 -04:00
cwd = builddir )
r = call ( [ ' make ' , ' -s ' ] + tests , cwd = builddir )
2013-04-17 00:00:22 -04:00
if r != 0 :
2013-04-17 00:00:22 -04:00
error ( ' Building the tests failed. ' )
2013-04-17 00:00:22 -04:00
sys . exit ( r )
def revfor ( tokudb ) :
proc = Popen ( " svn info | awk ' /Revision/ { print $2} ' " ,
shell = True , cwd = tokudb , stdout = PIPE )
( out , err ) = proc . communicate ( )
rev = out . strip ( )
2013-04-17 00:00:22 -04:00
info ( ' Using tokudb at r %s . ' , rev )
2013-04-17 00:00:22 -04:00
return rev
def main ( opts ) :
2013-04-17 00:00:31 -04:00
builddir = os . path . join ( opts . tokudb , ' build ' )
installdir = os . path . join ( opts . tokudb , ' install ' )
2013-04-17 00:00:22 -04:00
if opts . build :
2013-04-17 00:00:31 -04:00
rebuild ( opts . tokudb , builddir , installdir , opts . cc , opts . testnames + opts . recover_testnames )
2013-04-17 00:00:22 -04:00
rev = revfor ( opts . tokudb )
2013-04-17 00:00:22 -04:00
2013-04-17 00:00:22 -04:00
if not os . path . exists ( opts . savedir ) :
os . mkdir ( opts . savedir )
2013-04-17 00:00:22 -04:00
logger = logging . getLogger ( ' stress ' )
logger . propagate = False
logger . setLevel ( logging . INFO )
logger . addHandler ( logging . FileHandler ( opts . log ) )
2013-04-17 00:00:22 -04:00
info ( ' Saving pass/fail logs to %s . ' , opts . log )
info ( ' Saving failure environments to %s . ' , opts . savedir )
2013-04-17 00:00:22 -04:00
2013-04-17 00:00:22 -04:00
scheduler = Scheduler ( opts . jobs , opts . maxlarge , logger )
2013-04-17 00:00:22 -04:00
runners = [ ]
for tsize in [ 2000 , 200000 , 50000000 ] :
for csize in [ 50 * tsize , 1000 * * 3 ] :
2013-04-17 00:00:23 -04:00
kwargs = {
' scheduler ' : scheduler ,
2013-04-17 00:00:31 -04:00
' builddir ' : builddir ,
' installdir ' : installdir ,
2013-04-17 00:00:23 -04:00
' rev ' : rev ,
' jemalloc ' : opts . jemalloc ,
' tsize ' : tsize ,
' csize ' : csize ,
' test_time ' : opts . test_time ,
' savedir ' : opts . savedir
}
for test in opts . testnames :
if opts . run_non_upgrade :
runners . append ( TestRunner ( execf = test , * * kwargs ) )
2013-04-17 00:00:54 -04:00
# never run test_stress_openclose.tdb on existing
# environments, it doesn't want them
2013-04-17 00:00:54 -04:00
if opts . run_upgrade and test != ' test_stress_openclose.tdb ' :
2013-04-17 00:00:23 -04:00
for version in opts . old_versions :
for pristine_or_stressed in [ ' pristine ' , ' stressed ' ] :
upgrade_kwargs = {
' old_environments_dir ' : opts . old_environments_dir ,
' version ' : version ,
' pristine_or_stressed ' : pristine_or_stressed
}
upgrade_kwargs . update ( kwargs )
2013-04-17 00:00:54 -04:00
# skip running test_stress4.tdb on any env
# that has already been stressed, as that
# breaks its assumptions
if opts . double_upgrade and test != ' test_stress4.tdb ' :
2013-04-17 00:00:23 -04:00
runners . append ( DoubleUpgradeTestRunner (
execf = test ,
* * upgrade_kwargs ) )
2013-04-17 00:00:54 -04:00
elif not ( test == ' test_stress4.tdb ' and pristine_or_stressed == ' stressed ' ) :
2013-04-17 00:00:23 -04:00
runners . append ( UpgradeTestRunner (
execf = test ,
* * upgrade_kwargs ) )
for test in opts . recover_testnames :
if opts . run_non_upgrade :
runners . append ( RecoverTestRunner ( execf = test , * * kwargs ) )
if opts . run_upgrade :
for version in opts . old_versions :
for pristine_or_stressed in [ ' pristine ' , ' stressed ' ] :
upgrade_kwargs = {
' old_environments_dir ' : opts . old_environments_dir ,
' version ' : version ,
' pristine_or_stressed ' : pristine_or_stressed
}
upgrade_kwargs . update ( kwargs )
if opts . double_upgrade :
runners . append ( DoubleUpgradeRecoverTestRunner (
execf = test ,
* * upgrade_kwargs ) )
else :
runners . append ( UpgradeRecoverTestRunner (
execf = test ,
* * upgrade_kwargs ) )
2013-04-17 00:00:22 -04:00
2013-04-17 00:00:22 -04:00
shuffle ( runners )
2013-04-17 00:00:22 -04:00
for runner in runners :
scheduler . put ( runner )
try :
while scheduler . error is None :
scheduler . run ( opts . rebuild_period )
if scheduler . error is not None :
2013-04-17 00:00:22 -04:00
error ( ' Scheduler reported an error. ' )
2013-04-17 00:00:22 -04:00
raise scheduler . error
2013-04-17 00:00:31 -04:00
rebuild ( opts . tokudb , builddir , installdir , opts . cc , opts . testnames + opts . recover_testnames )
2013-04-17 00:00:22 -04:00
rev = revfor ( opts . tokudb )
for runner in runners :
runner . rev = rev
except ( KeyboardInterrupt , SystemExit ) :
sys . exit ( 0 )
2013-04-17 00:00:22 -04:00
except Exception , e :
exception ( ' Unhandled exception caught in main. ' )
raise e
2013-04-17 00:00:22 -04:00
# relpath implementation for python <2.6
# from http://unittest-ext.googlecode.com/hg-history/1df911640f7be239e58fb185b06ac2a8489dcdc4/unittest2/unittest2/compatibility.py
if not hasattr ( os . path , ' relpath ' ) :
if os . path is sys . modules . get ( ' ntpath ' ) :
def relpath ( path , start = os . path . curdir ) :
""" Return a relative version of a path """
if not path :
raise ValueError ( " no path specified " )
start_list = os . path . abspath ( start ) . split ( os . path . sep )
path_list = os . path . abspath ( path ) . split ( os . path . sep )
if start_list [ 0 ] . lower ( ) != path_list [ 0 ] . lower ( ) :
unc_path , rest = os . path . splitunc ( path )
unc_start , rest = os . path . splitunc ( start )
if bool ( unc_path ) ^ bool ( unc_start ) :
raise ValueError ( " Cannot mix UNC and non-UNC paths ( %s and %s ) "
% ( path , start ) )
else :
raise ValueError ( " path is on drive %s , start on drive %s "
% ( path_list [ 0 ] , start_list [ 0 ] ) )
# Work out how much of the filepath is shared by start and path.
for i in range ( min ( len ( start_list ) , len ( path_list ) ) ) :
if start_list [ i ] . lower ( ) != path_list [ i ] . lower ( ) :
break
else :
i + = 1
rel_list = [ os . path . pardir ] * ( len ( start_list ) - i ) + path_list [ i : ]
if not rel_list :
return os . path . curdir
return os . path . join ( * rel_list )
else :
# default to posixpath definition
def relpath ( path , start = os . path . curdir ) :
""" Return a relative version of a path """
if not path :
raise ValueError ( " no path specified " )
start_list = os . path . abspath ( start ) . split ( os . path . sep )
path_list = os . path . abspath ( path ) . split ( os . path . sep )
# Work out how much of the filepath is shared by start and path.
i = len ( os . path . commonprefix ( [ start_list , path_list ] ) )
rel_list = [ os . path . pardir ] * ( len ( start_list ) - i ) + path_list [ i : ]
if not rel_list :
return os . path . curdir
return os . path . join ( * rel_list )
os . path . relpath = relpath
if __name__ == ' __main__ ' :
a0 = os . path . abspath ( sys . argv [ 0 ] )
usage = ' % prog [options] \n ' + __doc__
parser = OptionParser ( usage = usage )
2013-04-17 00:00:23 -04:00
parser . add_option ( ' -v ' , ' --verbose ' , action = ' store_true ' , dest = ' verbose ' , default = False , help = ' show build status, passing tests, and other info ' )
parser . add_option ( ' -d ' , ' --debug ' , action = ' store_true ' , dest = ' debug ' , default = False , help = ' show debugging info ' )
2013-04-17 00:00:22 -04:00
parser . add_option ( ' -l ' , ' --log ' , type = ' string ' , dest = ' log ' ,
default = ' /tmp/run.stress-tests.log ' ,
help = ' where to save logfiles ' )
parser . add_option ( ' -s ' , ' --savedir ' , type = ' string ' , dest = ' savedir ' ,
default = ' /tmp/run.stress-tests.failures ' ,
help = ' where to save environments and extra data for failed tests ' )
2013-04-17 00:00:23 -04:00
default_toplevel = os . path . dirname ( os . path . dirname ( a0 ) )
2013-04-17 00:00:22 -04:00
parser . add_option ( ' --tokudb ' , type = ' string ' , dest = ' tokudb ' ,
default = default_toplevel ,
2013-04-17 00:00:35 -04:00
help = ( ' top of the tokudb tree (contains ft/ and src/) [default= %s ] ' % os . path . relpath ( default_toplevel ) ) )
2013-04-17 00:00:23 -04:00
test_group = OptionGroup ( parser , ' Scheduler Options ' , ' Control how the scheduler runs jobs. ' )
test_group . add_option ( ' -t ' , ' --test_time ' , type = ' int ' , dest = ' test_time ' ,
default = 600 ,
help = ' time to run each test, in seconds [default=600] ' ) ,
test_group . add_option ( ' -j ' , ' --jobs ' , type = ' int ' , dest = ' jobs ' , default = 8 ,
help = ' how many concurrent tests to run [default=8] ' )
test_group . add_option ( ' --maxlarge ' , type = ' int ' , dest = ' maxlarge ' , default = 2 ,
help = ' maximum number of large tests to run concurrently (helps prevent swapping) [default=2] ' )
parser . add_option_group ( test_group )
default_testnames = [ ' test_stress1.tdb ' ,
' test_stress5.tdb ' ,
' test_stress6.tdb ' ]
default_recover_testnames = [ ' recover-test_stress1.tdb ' ,
' recover-test_stress2.tdb ' ,
' recover-test_stress3.tdb ' ]
build_group = OptionGroup ( parser , ' Build Options ' , ' Control how the fractal tree and tests get built. ' )
build_group . add_option ( ' --skip_build ' , action = ' store_false ' , dest = ' build ' , default = True ,
help = ' skip the svn up and build phase before testing [default=False] ' )
build_group . add_option ( ' --rebuild_period ' , type = ' int ' , dest = ' rebuild_period ' , default = 60 * 60 * 24 ,
help = ' how many seconds between doing an svn up and rebuild, 0 means never rebuild [default=24 hours] ' )
build_group . add_option ( ' --cc ' , type = ' string ' , dest = ' cc ' , default = ' icc ' ,
help = ' which compiler to use [default=icc] ' )
build_group . add_option ( ' --jemalloc ' , type = ' string ' , dest = ' jemalloc ' ,
help = ' a libjemalloc.so to put in LD_PRELOAD when running tests ' )
build_group . add_option ( ' --add_test ' , action = ' append ' , type = ' string ' , dest = ' testnames ' , default = default_testnames ,
help = ( ' add a stress test to run [default= %r ] ' % default_testnames ) )
build_group . add_option ( ' --add_recover_test ' , action = ' append ' , type = ' string ' , dest = ' recover_testnames ' , default = default_recover_testnames ,
help = ( ' add a recover stress test to run [default= %r ] ' % default_recover_testnames ) )
parser . add_option_group ( build_group )
upgrade_group = OptionGroup ( parser , ' Upgrade Options ' , ' Also run on environments from old versions of tokudb. ' )
upgrade_group . add_option ( ' --run_upgrade ' , action = ' store_true ' , dest = ' run_upgrade ' , default = False ,
help = ' run the tests on old dictionaries as well, to test upgrade [default=False] ' )
upgrade_group . add_option ( ' --skip_non_upgrade ' , action = ' store_false ' , dest = ' run_non_upgrade ' , default = True ,
help = " skip the tests that don ' t involve upgrade [default=False] " )
upgrade_group . add_option ( ' --double_upgrade ' , action = ' store_true ' , dest = ' double_upgrade ' , default = False ,
help = ' run the upgrade tests twice in a row [default=False] ' )
upgrade_group . add_option ( ' --add_old_version ' , action = ' append ' , type = ' choice ' , dest = ' old_versions ' , choices = [ ' 4.2.0 ' , ' 5.0.8 ' , ' 5.2.7 ' ] ,
help = ' which old versions to use for running the stress tests in upgrade mode. can be specified multiple times [options=4.2.0, 5.0.8, 5.2.7] ' )
upgrade_group . add_option ( ' --old_environments_dir ' , type = ' string ' , dest = ' old_environments_dir ' ,
2013-04-17 00:00:53 -04:00
default = ' ../../tokudb.data/old-stress-test-envs ' ,
2013-04-17 00:00:23 -04:00
help = ' directory containing old version environments (should contain 5.0.8/, 5.2.7/, etc, and the environments should be in those) [default=../../tokudb.data/stress_environments] ' )
parser . add_option_group ( upgrade_group )
2013-04-17 00:00:22 -04:00
( opts , args ) = parser . parse_args ( )
if len ( args ) > 0 :
2013-04-17 00:00:23 -04:00
parser . error ( ' Invalid arguments: %r ' % args )
if opts . run_upgrade :
if not os . path . isdir ( opts . old_environments_dir ) :
parser . error ( ' You specified --run_upgrade but did not specify an --old_environments_dir that exists. ' )
if len ( opts . old_versions ) < 1 :
parser . error ( ' You specified --run_upgrade but gave no --old_versions to run against. ' )
for version in opts . old_versions :
version_dir = os . path . join ( opts . old_environments_dir , version )
if not os . path . isdir ( version_dir ) :
parser . error ( ' You specified --run_upgrade but %s is not a directory. ' % version_dir )
2013-04-17 00:00:22 -04:00
if opts . debug :
logging . basicConfig ( level = logging . DEBUG )
elif opts . verbose :
logging . basicConfig ( level = logging . INFO )
else :
logging . basicConfig ( level = logging . WARNING )
2013-04-17 00:00:22 -04:00
main ( opts )