mirror of
https://github.com/MariaDB/server.git
synced 2025-01-31 11:01:52 +01:00
#45 add the tpch database loader tests
This commit is contained in:
parent
aa7dbbdd01
commit
04d87d9ba8
3 changed files with 367 additions and 3 deletions
|
@ -1,8 +1,9 @@
|
||||||
script to run the queries is called run.atc.ontime.bash.
|
The script to run the load the air traffic ontime database and run queries against it
|
||||||
|
is called run.atc.ontime.bas.
|
||||||
|
|
||||||
queries are in the ft-engine/scripts/atc.ontime directory.
|
The queries are in the ft-engine/scripts/atc.ontime directory.
|
||||||
|
|
||||||
data for the ontime database is in the tokutek-mysql-data amazon s3 bucket.
|
The data for the ontime database is in the amazon s3 bucket called tokutek-mysql-data.
|
||||||
|
|
||||||
$ s3ls -l tokutek-mysql-data --prefix=atc_On_Time_Performance
|
$ s3ls -l tokutek-mysql-data --prefix=atc_On_Time_Performance
|
||||||
2010-06-15T13:07:09.000Z 1073741824 atc_On_Time_Performance.mysql.csv.gz.aa
|
2010-06-15T13:07:09.000Z 1073741824 atc_On_Time_Performance.mysql.csv.gz.aa
|
||||||
|
@ -10,3 +11,9 @@ $ s3ls -l tokutek-mysql-data --prefix=atc_On_Time_Performance
|
||||||
2010-06-15T13:09:38.000Z 1073741824 atc_On_Time_Performance.mysql.csv.gz.ac
|
2010-06-15T13:09:38.000Z 1073741824 atc_On_Time_Performance.mysql.csv.gz.ac
|
||||||
2010-06-15T13:10:54.000Z 446709742 atc_On_Time_Performance.mysql.csv.gz.ad
|
2010-06-15T13:10:54.000Z 446709742 atc_On_Time_Performance.mysql.csv.gz.ad
|
||||||
2010-06-15T13:11:26.000Z 503 atc_On_Time_Performance.mysql.csv.gz.xml
|
2010-06-15T13:11:26.000Z 503 atc_On_Time_Performance.mysql.csv.gz.xml
|
||||||
|
|
||||||
|
The raw data is also stored in the amazon s3 bucket called tokutek-mysql-data.
|
||||||
|
|
||||||
|
$ s3ls -l tokutek-mysql-data --prefix=atc
|
||||||
|
|
||||||
|
|
||||||
|
|
323
scripts/run.tpch.bash
Executable file
323
scripts/run.tpch.bash
Executable file
|
@ -0,0 +1,323 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
function usage() {
|
||||||
|
echo "run the TPCH load and compare test"
|
||||||
|
echo "[--SCALE=$SCALE] [--ENGINE=$ENGINE]"
|
||||||
|
echo "[--dbgen=$dbgen] [--load=$load] [--check=$check] [--compare=$compare] [--query=$query]"
|
||||||
|
echo "[--mysqlbuild=$mysqlbuild] [--commit=$commit]"
|
||||||
|
echo "[--testinstance=$testinstance] [--tokudb_load_save_space=$tokudb_load_save_space]"
|
||||||
|
}
|
||||||
|
|
||||||
|
function retry() {
|
||||||
|
local cmd
|
||||||
|
local retries
|
||||||
|
local exitcode
|
||||||
|
cmd=$*
|
||||||
|
let retries=0
|
||||||
|
while [ $retries -le 10 ] ; do
|
||||||
|
echo `date` $cmd
|
||||||
|
bash -c "$cmd"
|
||||||
|
exitcode=$?
|
||||||
|
echo `date` $cmd $exitcode $retries
|
||||||
|
let retries=retries+1
|
||||||
|
if [ $exitcode -eq 0 ] ; then break; fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
test $exitcode = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
SCALE=1
|
||||||
|
ENGINE=tokudb
|
||||||
|
TABLES="part partsupp customer lineitem nation orders region supplier"
|
||||||
|
dbgen=1
|
||||||
|
load=1
|
||||||
|
compare=1
|
||||||
|
query=0
|
||||||
|
check=1
|
||||||
|
datadir=/usr/local/mysql/data
|
||||||
|
mysqlbuild=
|
||||||
|
commit=0
|
||||||
|
mysqlserver=`hostname`
|
||||||
|
mysqluser=`whoami`
|
||||||
|
mysqlsocket=/tmp/mysql.sock
|
||||||
|
basedir=$HOME/svn.build
|
||||||
|
builddir=$basedir/mysql.build
|
||||||
|
system=`uname -s | tr [:upper:] [:lower:]`
|
||||||
|
arch=`uname -m | tr [:upper:] [:lower:]`
|
||||||
|
testinstance=
|
||||||
|
tokudb_load_save_space=0
|
||||||
|
svn_server=https://svn.tokutek.com/tokudb
|
||||||
|
svn_branch=.
|
||||||
|
svn_revision=HEAD
|
||||||
|
|
||||||
|
# parse the command line
|
||||||
|
while [ $# -gt 0 ] ; do
|
||||||
|
arg=$1; shift
|
||||||
|
if [[ $arg =~ --(.*)=(.*) ]] ; then
|
||||||
|
eval ${BASH_REMATCH[1]}=${BASH_REMATCH[2]}
|
||||||
|
else
|
||||||
|
usage; exit 1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ $mysqlbuild =~ (.*)-(tokudb\-.*)-(linux)-(x86_64) ]] ; then
|
||||||
|
mysql=${BASH_REMATCH[1]}
|
||||||
|
tokudb=${BASH_REMATCH[2]}
|
||||||
|
system=${BASH_REMATCH[3]}
|
||||||
|
arch=${BASH_REMATCH[4]}
|
||||||
|
else
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
dbname=tpch${SCALE}G_${ENGINE}
|
||||||
|
if [ "$testinstance" != "" ] ; then dbname=${dbname}_${testinstance}; fi
|
||||||
|
tpchdir=$basedir/tpch${SCALE}G
|
||||||
|
|
||||||
|
if [ -d /usr/local/mysql ] ; then
|
||||||
|
export PATH=/usr/local/mysql/bin:$PATH
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -d /usr/local/mysql/lib/mysql ] ; then
|
||||||
|
export LD_LIBRARY_PATH=/usr/local/mysql/lib/mysql:$PATH
|
||||||
|
fi
|
||||||
|
|
||||||
|
# goto the base directory
|
||||||
|
if [ ! -d $basedir ] ; then mkdir $basedir; fi
|
||||||
|
|
||||||
|
pushd $basedir
|
||||||
|
|
||||||
|
# update the build directory
|
||||||
|
if [ $commit != 0 ] ; then
|
||||||
|
if [ ! -d $builddir ] ; then mkdir $builddir; fi
|
||||||
|
|
||||||
|
date=`date +%Y%m%d`
|
||||||
|
testresultsdir=$builddir/$date
|
||||||
|
pushd $builddir
|
||||||
|
while [ ! -d $date ] ; do
|
||||||
|
svn mkdir $svn_server/mysql.build/$date -m ""
|
||||||
|
svn checkout -q $svn_server/mysql.build/$date
|
||||||
|
if [ $? -ne 0 ] ; then rm -rf $date; fi
|
||||||
|
done
|
||||||
|
popd
|
||||||
|
else
|
||||||
|
testresultsdir=$PWD
|
||||||
|
fi
|
||||||
|
|
||||||
|
runfile=$testresultsdir/$dbname
|
||||||
|
if [ $tokudb_load_save_space != 0 ] ; then runfile=$runfile-compress; fi
|
||||||
|
runfile=$runfile-$mysqlbuild-$mysqlserver
|
||||||
|
rm -rf $runfile
|
||||||
|
|
||||||
|
testresult="PASS"
|
||||||
|
|
||||||
|
# maybe get the tpch data from AWS S3
|
||||||
|
if [ $compare != 0 ] && [ ! -d $tpchdir ] ; then
|
||||||
|
tpchtarball=tpch${SCALE}G_data_dump.tar
|
||||||
|
if [ ! -f $tpchtarball ] ; then
|
||||||
|
echo `date` s3get --bundle tokutek-mysql-data $tpchtarball >>$runfile 2>&1
|
||||||
|
s3get --verbose --bundle tokutek-mysql-data $tpchtarball >>$runfile 2>&1
|
||||||
|
exitcode=$?
|
||||||
|
echo `date` s3get --bundle tokutek-mysql-data $tpchtarball $exitcode >>$runfile 2>&1
|
||||||
|
if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
|
||||||
|
fi
|
||||||
|
if [ $testresult = "PASS" ] ; then
|
||||||
|
tar xf $tpchtarball
|
||||||
|
exitcode=$?
|
||||||
|
echo `date` tar xf $tpchtarball $exitcode >>$runfile 2>&1
|
||||||
|
if [ $exitcode -ne 0 ] ; then
|
||||||
|
testresult="FAIL"
|
||||||
|
else
|
||||||
|
# gunzip the data files
|
||||||
|
pushd tpch${SCALE}G/data/tpch${SCALE}G
|
||||||
|
for f in *.gz ; do
|
||||||
|
echo `date` gunzip $f >>$runfile 2>&1
|
||||||
|
gunzip $f
|
||||||
|
done
|
||||||
|
ls -l >>$runfile 2>&1
|
||||||
|
popd
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# checkout the tpch scripts
|
||||||
|
tpchtestdir=tpch-$mysqlbuild
|
||||||
|
if [ "$testinstance" != "" ] ; then tpchtestdir=${tpchtestdir}_${testinstance}; fi
|
||||||
|
if [ $testresult = "PASS" ] ; then
|
||||||
|
rm -rf $tpchtestdir
|
||||||
|
retry svn export -q -r $svn_revision $svn_server/$svn_branch/tpch $tpchtestdir
|
||||||
|
exitcode=$?
|
||||||
|
echo `date` export $svn_server/$svn_branch/tpch $exitcode >>$runfile 2>&1
|
||||||
|
if [ $exitcode != 0 ] ; then
|
||||||
|
retry svn export -q -r $svn_revision $svn_server/tpch $tpchtestdir
|
||||||
|
exitcode=$?
|
||||||
|
echo `date` export $svn_server/tpch $exitcode >>$runfile 2>&1
|
||||||
|
fi
|
||||||
|
if [ $exitcode != 0 ] ; then testresult="FAIL"; fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# generate the tpch data
|
||||||
|
if [ $dbgen != 0 -a $testresult = "PASS" ] ; then
|
||||||
|
pushd $tpchtestdir/dbgen
|
||||||
|
make
|
||||||
|
exitcode=$?
|
||||||
|
echo `date` make dbgen $exitcode >>$runfile 2>&1
|
||||||
|
if [ $exitcode != 0 ] ; then testresult="FAIL"; fi
|
||||||
|
popd
|
||||||
|
if [ $testresult = "PASS" ] ; then
|
||||||
|
dbgen=0
|
||||||
|
mkdir -p tpch${SCALE}G/data/tpch${SCALE}G
|
||||||
|
pushd tpch${SCALE}G/data/tpch${SCALE}G
|
||||||
|
if [ ! -f lineitem.tbl ] ; then dbgen=1; fi
|
||||||
|
popd
|
||||||
|
if [ $dbgen != 0 ] ; then
|
||||||
|
pushd $tpchtestdir/dbgen
|
||||||
|
./dbgen -fF -s $SCALE
|
||||||
|
exitcode=$?
|
||||||
|
echo `date` dbgen -fF -s $SCALE $exitcode >>$runfile 2>&1
|
||||||
|
if [ $exitcode != 0 ] ; then
|
||||||
|
testresult="FAIL"
|
||||||
|
else
|
||||||
|
ls -l *.tbl >>$runfile
|
||||||
|
chmod 0644 *.tbl
|
||||||
|
ls -l *.tbl >>$runfile
|
||||||
|
mv *.tbl $basedir/tpch${SCALE}G/data/tpch${SCALE}G
|
||||||
|
fi
|
||||||
|
popd
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# create the tpch database
|
||||||
|
if [ $load != 0 -a $testresult = "PASS" ] ; then
|
||||||
|
echo `date` drop database if exists $dbname >>$runfile
|
||||||
|
mysql -S $mysqlsocket -u $mysqluser -e "drop database if exists $dbname" >>$runfile 2>&1
|
||||||
|
exitcode=$?
|
||||||
|
echo `date` drop database if exists $dbname $exitcode>>$runfile
|
||||||
|
if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
|
||||||
|
echo `date` create database $dbname >>$runfile
|
||||||
|
mysql -S $mysqlsocket -u $mysqluser -e "create database $dbname" >>$runfile 2>&1
|
||||||
|
exitcode=$?
|
||||||
|
echo `date` create database $dbname $exitcode >>$runfile
|
||||||
|
if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# create the tpch tables
|
||||||
|
if [ $load != 0 -a $testresult = "PASS" ] ; then
|
||||||
|
echo `date` create table >>$runfile
|
||||||
|
mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "source $basedir/tpch-$mysqlbuild/scripts/${ENGINE}_tpch_create_table.sql" >>$runfile 2>&1
|
||||||
|
exitcode=$?
|
||||||
|
echo `date` create table $exitcode >>$runfile
|
||||||
|
if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# load the data
|
||||||
|
if [ $load != 0 -a $testresult = "PASS" ] ; then
|
||||||
|
for tblname in $TABLES ; do
|
||||||
|
echo `date` load table $tblname >>$runfile
|
||||||
|
ls -l $tpchdir/data/tpch${SCALE}G/$tblname.tbl >>$runfile
|
||||||
|
start=$(date +%s)
|
||||||
|
mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "set session tokudb_load_save_space=$tokudb_load_save_space; load data infile '$tpchdir/data/tpch${SCALE}G/$tblname.tbl' into table $tblname fields terminated by '|'" >>$runfile 2>&1
|
||||||
|
exitcode=$?
|
||||||
|
let loadtime=$(date +%s)-$start
|
||||||
|
echo `date` load table $tblname $exitcode loadtime=$loadtime>>$runfile
|
||||||
|
if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $check != 0 -a $testresult = "PASS" ] ; then
|
||||||
|
for tblname in lineitem ; do
|
||||||
|
echo `date` add clustering index $tblname >>$runfile
|
||||||
|
start=$(date +%s)
|
||||||
|
mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "set session tokudb_create_index_online=0;create clustering index i_shipdate on lineitem (l_shipdate)" >>$runfile 2>&1
|
||||||
|
exitcode=$?
|
||||||
|
let loadtime=$(date +%s)-$start
|
||||||
|
echo `date` add clustering index $tblname $exitcode loadtime=$loadtime >>$runfile
|
||||||
|
if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
# check the tables
|
||||||
|
if [ $check != 0 -a $testresult = "PASS" ] ; then
|
||||||
|
for tblname in $TABLES ; do
|
||||||
|
echo `date` check table $tblname >>$runfile
|
||||||
|
start=$(date +%s)
|
||||||
|
mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "check table $tblname" >>$runfile 2>&1
|
||||||
|
exitcode=$?
|
||||||
|
let checktime=$(date +%s)-$start
|
||||||
|
echo `date` check table $tblname $exitcode checktime=$checktime >>$runfile
|
||||||
|
if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $check != 0 -a $testresult = "PASS" ] ; then
|
||||||
|
for tblname in lineitem ; do
|
||||||
|
echo `date` drop index $tblname >>$runfile
|
||||||
|
mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "drop index i_shipdate on lineitem" >>$runfile 2>&1
|
||||||
|
exitcode=$?
|
||||||
|
echo `date` drop index $tblname $exitcode >>$runfile
|
||||||
|
if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
# compare the data
|
||||||
|
if [ $compare != 0 -a $testresult = "PASS" ] ; then
|
||||||
|
if [ -d $tpchdir/dump/tpch${SCALE}G ] ; then
|
||||||
|
mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "source $basedir/tpch-$mysqlbuild/scripts/dumptpch.sql" >>$runfile 2>&1
|
||||||
|
exitcode=$?
|
||||||
|
echo `date` dump data $exitcode >>$runfile
|
||||||
|
if [ $exitcode -ne 0 ] ; then
|
||||||
|
testresult="FAIL"
|
||||||
|
else
|
||||||
|
# force the permissions on the dumpdir open
|
||||||
|
pushd $datadir/$dbname
|
||||||
|
exitcode=$?
|
||||||
|
if [ $exitcode != 0 ] ; then
|
||||||
|
sudo chmod g+rwx $datadir
|
||||||
|
sudo chmod g+rwx $datadir/$dbname
|
||||||
|
pushd $datadir/$dbname
|
||||||
|
exitcode=$?
|
||||||
|
fi
|
||||||
|
if [ $exitcode = 0 ] ; then
|
||||||
|
popd
|
||||||
|
fi
|
||||||
|
|
||||||
|
# compare the dump files
|
||||||
|
dumpdir=$datadir/$dbname
|
||||||
|
comparedir=$tpchdir/dump/tpch${SCALE}G
|
||||||
|
for f in $dumpdir/dump* ; do
|
||||||
|
d=`basename $f`
|
||||||
|
if [ ! -f $comparedir/$d ] && [ -f $comparedir/$d.gz ] ; then
|
||||||
|
pushd $comparedir; gunzip $d.gz; popd
|
||||||
|
fi
|
||||||
|
if [ -f $comparedir/$d ] ; then
|
||||||
|
diff -q $dumpdir/$d $comparedir/$d
|
||||||
|
if [ $? = 0 ] ; then
|
||||||
|
result="PASS"
|
||||||
|
else
|
||||||
|
result="FAIL"
|
||||||
|
testresult="FAIL"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
result="MISSING"
|
||||||
|
testresult="FAIL"
|
||||||
|
fi
|
||||||
|
echo `date` $d $result >>$runfile
|
||||||
|
done
|
||||||
|
if [ $testresult = "PASS" ] ; then
|
||||||
|
# remove the dump files
|
||||||
|
rm -f $datadir/$dbname/dump*
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# commit results
|
||||||
|
if [ $commit != 0 ] ; then
|
||||||
|
svn add $runfile
|
||||||
|
retry svn commit -m \"$testresult $dbname $mysqlbuild $mysqlserver compress=$tokudb_load_save_space\" $runfile
|
||||||
|
fi
|
||||||
|
|
||||||
|
popd
|
||||||
|
|
||||||
|
if [ $testresult = "PASS" ] ; then exitcode=0; else exitcode=1; fi
|
||||||
|
exit $exitcode
|
34
scripts/tpch.readme
Normal file
34
scripts/tpch.readme
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
TPCH is an industry standard data warehouse benchmark. We use TPCH databases to test the TokuDB loader.
|
||||||
|
|
||||||
|
The run.tpch.bash script loads a TPCH database at a given scale factor into TokuDB. It then uses the SQL
|
||||||
|
CHECK TABLE statement to verify the fractal tree invariants. Finally, it dumps the database and compares
|
||||||
|
with a precomputed dump of the database from InnoDB.
|
||||||
|
|
||||||
|
Here are some TPCH databases dumped from InnoDB. These dumps are used to compare with TPCH data loaded
|
||||||
|
into TokuDB.
|
||||||
|
|
||||||
|
$ s3ls tokutek-mysql-data -l --prefix=tpch
|
||||||
|
2010-08-16T21:21:10.000Z 1073741824 tpch10G_data_dump.tar.0
|
||||||
|
2010-08-16T21:38:45.000Z 1073741824 tpch10G_data_dump.tar.1
|
||||||
|
2010-08-16T21:56:43.000Z 1073741824 tpch10G_data_dump.tar.2
|
||||||
|
2010-08-16T22:14:49.000Z 1073741824 tpch10G_data_dump.tar.3
|
||||||
|
2010-08-16T22:32:38.000Z 1073741824 tpch10G_data_dump.tar.4
|
||||||
|
2010-08-16T22:51:04.000Z 1073741824 tpch10G_data_dump.tar.5
|
||||||
|
2010-08-16T23:08:51.000Z 91262976 tpch10G_data_dump.tar.6
|
||||||
|
2010-08-16T23:10:21.000Z 654 tpch10G_data_dump.tar.xml
|
||||||
|
2010-08-12T17:45:09.000Z 633579520 tpch1G_data_dump.tar
|
||||||
|
2010-08-12T17:56:30.000Z 160 tpch1G_data_dump.tar.xml
|
||||||
|
2010-08-06T13:57:51.000Z 633610240 tpch1G_data_dump_innodb.tar
|
||||||
|
2010-08-06T14:07:09.000Z 174 tpch1G_data_dump_innodb.tar.xml
|
||||||
|
2010-11-28T12:20:58.000Z 886 tpch30G_data_dump.tar.xml
|
||||||
|
2010-09-14T19:16:30.000Z 1073741824 tpch30G_dump_data.tar.0
|
||||||
|
2010-09-14T19:40:02.000Z 1073741824 tpch30G_dump_data.tar.1
|
||||||
|
2010-09-14T20:12:22.000Z 1073741824 tpch30G_dump_data.tar.2
|
||||||
|
2010-09-14T20:45:23.000Z 1073741824 tpch30G_dump_data.tar.3
|
||||||
|
2010-09-14T21:14:07.000Z 1073741824 tpch30G_dump_data.tar.4
|
||||||
|
2010-09-14T21:37:54.000Z 1073741824 tpch30G_dump_data.tar.5
|
||||||
|
2010-09-14T21:57:02.000Z 1073741824 tpch30G_dump_data.tar.6
|
||||||
|
2010-09-14T22:16:59.000Z 1073741824 tpch30G_dump_data.tar.7
|
||||||
|
2010-09-14T22:36:22.000Z 1073741824 tpch30G_dump_data.tar.8
|
||||||
|
2010-09-14T22:55:25.000Z 382511104 tpch30G_dump_data.tar.9
|
||||||
|
2010-09-14T23:02:04.000Z 886 tpch30G_dump_data.tar.xml
|
Loading…
Add table
Reference in a new issue