Bug MTR: parallel execution breaks with smart ordering of test cases

There were actually more problems in this area:
  Slaves (if any) were unconditionally restarted, this appears unnecessary.
  Sort criteria were suboptimal, included the test name.
Added logic to "reserve" a sequence of tests with same config for one thread
Got rid of sort_criteria hash, put it into the test case itself
Adds little sanity check that expected worker picks up test
Fixed some tests that may fail if starting on running server
Some of these fail only if *same* test is repeated.
Finally, special sorting of tests that do --force-restart
This commit is contained in:
Bjorn Munch 2010-11-05 15:26:38 +01:00
parent 909f0bf94a
commit 866cec611a
14 changed files with 62 additions and 30 deletions

View file

@ -170,8 +170,6 @@ sub collect_test_cases ($$$) {
if ( $opt_reorder && !$quick_collect)
{
# Reorder the test cases in an order that will make them faster to run
my %sort_criteria;
# Make a mapping of test name to a string that represents how that test
# should be sorted among the other tests. Put the most important criterion
# first, then a sub-criterion, then sub-sub-criterion, etc.
@ -183,24 +181,31 @@ sub collect_test_cases ($$$) {
# Append the criteria for sorting, in order of importance.
#
push(@criteria, "ndb=" . ($tinfo->{'ndb_test'} ? "A" : "B"));
push(@criteria, $tinfo->{template_path});
# Group test with equal options together.
# Ending with "~" makes empty sort later than filled
my $opts= $tinfo->{'master_opt'} ? $tinfo->{'master_opt'} : [];
push(@criteria, join("!", sort @{$opts}) . "~");
# Add slave opts if any
if ($tinfo->{'slave_opt'})
{
push(@criteria, join("!", sort @{$tinfo->{'slave_opt'}}));
}
# This sorts tests with force-restart *before* identical tests
push(@criteria, $tinfo->{force_restart} ? "force-restart" : "no-restart");
$sort_criteria{$tinfo->{name}} = join(" ", @criteria);
$tinfo->{criteria}= join(" ", @criteria);
}
@$cases = sort {
$sort_criteria{$a->{'name'}} . $a->{'name'} cmp
$sort_criteria{$b->{'name'}} . $b->{'name'}; } @$cases;
@$cases = sort {$a->{criteria} cmp $b->{criteria}; } @$cases;
# For debugging the sort-order
# foreach my $tinfo (@$cases)
# {
# print("$sort_criteria{$tinfo->{'name'}} -> \t$tinfo->{'name'}\n");
# my $tname= $tinfo->{name} . ' ' . $tinfo->{combination};
# my $crit= $tinfo->{criteria};
# print("$tname\n\t$crit\n");
# }
}
if (defined $print_testcases){

View file

@ -662,22 +662,40 @@ sub run_test_server ($$$) {
next;
}
# Prefer same configuration, or just use next if --noreorder
if (!$opt_reorder or (defined $result and
$result->{template_path} eq $t->{template_path}))
{
#mtr_report("Test uses same config => good match");
# Test uses same config => good match
$next= splice(@$tests, $i, 1);
last;
}
# Second best choice is the first that does not fulfill
# any of the above conditions
if (!defined $second_best){
#mtr_report("Setting second_best to $i");
$second_best= $i;
}
# Smart allocation of next test within this thread.
if ($opt_reorder and $opt_parallel > 1 and defined $result)
{
my $wid= $result->{worker};
# Reserved for other thread, try next
next if (defined $t->{reserved} and $t->{reserved} != $wid);
if (! defined $t->{reserved})
{
# Force-restart not relevant when comparing *next* test
$t->{criteria} =~ s/force-restart$/no-restart/;
my $criteria= $t->{criteria};
# Reserve similar tests for this worker, but not too many
my $maxres= (@$tests - $i) / $opt_parallel + 1;
for (my $j= $i+1; $j <= $i + $maxres; $j++)
{
my $tt= $tests->[$j];
last unless defined $tt;
last if $tt->{criteria} ne $criteria;
$tt->{reserved}= $wid;
}
}
}
# At this point we have found next suitable test
$next= splice(@$tests, $i, 1);
last;
}
# Use second best choice if no other test has been found
@ -686,10 +704,12 @@ sub run_test_server ($$$) {
mtr_error("Internal error, second best too large($second_best)")
if $second_best > $#$tests;
$next= splice(@$tests, $second_best, 1);
delete $next->{reserved};
}
if ($next) {
#$next->print_test();
# We don't need this any more
delete $next->{criteria};
$next->write_test($sock, 'TESTCASE');
$running{$next->key()}= $next;
$num_ndb_tests++ if ($next->{ndb_test});
@ -772,6 +792,11 @@ sub run_worker ($) {
delete($test->{'comment'});
delete($test->{'logfile'});
# A sanity check. Should this happen often we need to look at it.
if (defined $test->{reserved} && $test->{reserved} != $thread_num) {
my $tres= $test->{reserved};
mtr_warning("Test reserved for w$tres picked up by w$thread_num");
}
$test->{worker} = $thread_num if $opt_parallel > 1;
run_testcase($test);
@ -4575,17 +4600,6 @@ sub server_need_restart {
}
}
# Temporary re-enable the "always restart slave" hack
# this should be removed asap, but will require that each rpl
# testcase cleanup better after itself - ie. stop and reset
# replication
# Use the "#!use-slave-opt" marker to detect that this is a "slave"
# server
if ( $server->option("#!use-slave-opt") ){
mtr_verbose_restart($server, "Always restart slave(s)");
return 1;
}
my $is_mysqld= grep ($server eq $_, mysqlds());
if ($is_mysqld)
{

View file

@ -0,0 +1 @@
--force-restart

View file

@ -1 +1,2 @@
-O max_binlog_size=4096
--force-restart

View file

@ -1 +1,2 @@
--binlog-do-db=b42829
--force-restart

View file

@ -141,3 +141,4 @@ HEX(word)
SELECT * FROM tmptbl504451f4258$1;
ERROR 42S02: Table 'test.tmptbl504451f4258$1' doesn't exist
DROP TABLE t5;
call mtr.force_restart();

View file

@ -1 +1,2 @@
--replicate-same-server-id --relay-log=slave-relay-bin --secure-file-priv=$MYSQL_TMP_DIR
--force-restart

View file

@ -0,0 +1 @@
--force-restart

View file

@ -174,3 +174,5 @@ SELECT * FROM tmptbl504451f4258$1;
connection master;
DROP TABLE t5;
sync_slave_with_master;
call mtr.force_restart();

View file

@ -1 +1,2 @@
rm -f $MYSQLTEST_VARDIR/std_data_master_link
ln -s $MYSQLTEST_VARDIR/std_data $MYSQLTEST_VARDIR/std_data_master_link

View file

@ -1 +1,2 @@
rm -f $MYSQLTEST_VARDIR/std_data_slave_link
ln -s $MYSQLTEST_VARDIR/std_data $MYSQLTEST_VARDIR/std_data_slave_link

View file

@ -0,0 +1 @@
--force-restart

View file

@ -1 +1,2 @@
--key_buffer_size=2M --small.key_buffer_size=256K --small.key_buffer_size=128K
--force-restart

View file

@ -1 +1,2 @@
--max-binlog-size=4096
--force-restart