mariadb/storage/tokudb/PerconaFT/tools/ba_replay.cc
Sergei Golubchik 2c8c652978 5.6.26-74.0
2015-10-26 12:57:57 +01:00

629 lines
25 KiB
C++

/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
#ident "$Id$"
/*======
This file is part of PerconaFT.
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
PerconaFT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2,
as published by the Free Software Foundation.
PerconaFT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
----------------------------------------
PerconaFT is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License, version 3,
as published by the Free Software Foundation.
PerconaFT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
======= */
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
// Replay a block allocator trace against different strategies and compare
// the results
#include <db.h>
#include <getopt.h>
#include <math.h>
#include <stdio.h>
#include <string.h>
#include <map>
#include <set>
#include <string>
#include <sstream>
#include <vector>
#include <portability/memory.h>
#include <portability/toku_assert.h>
#include <portability/toku_stdlib.h>
#include "ft/serialize/block_allocator.h"
using std::map;
using std::set;
using std::string;
using std::vector;
static int verbose = false;
static void ba_replay_assert(bool pred, const char *msg, const char *line, int line_num) {
if (!pred) {
fprintf(stderr, "%s, line (#%d): %s\n", msg, line_num, line);
abort();
}
}
static char *trim_whitespace(char *line) {
// skip leading whitespace
while (isspace(*line)) {
line++;
}
return line;
}
static int64_t parse_number(char **ptr, int line_num, int base) {
*ptr = trim_whitespace(*ptr);
char *line = *ptr;
char *new_ptr;
int64_t n = strtoll(line, &new_ptr, base);
ba_replay_assert(n >= 0, "malformed trace (bad numeric token)", line, line_num);
ba_replay_assert(new_ptr > *ptr, "malformed trace (missing numeric token)", line, line_num);
*ptr = new_ptr;
return n;
}
static uint64_t parse_uint64(char **ptr, int line_num) {
int64_t n = parse_number(ptr, line_num, 10);
// we happen to know that the uint64's we deal with will
// take less than 63 bits (they come from pointers)
return static_cast<uint64_t>(n);
}
static string parse_token(char **ptr, int line_num) {
*ptr = trim_whitespace(*ptr);
char *line = *ptr;
// parse the first token, which represents the traced function
char token[64];
int r = sscanf(*ptr, "%64s", token);
ba_replay_assert(r == 1, "malformed trace (missing string token)", line, line_num);
*ptr += strlen(token);
return string(token);
}
static block_allocator::blockpair parse_blockpair(char **ptr, int line_num) {
*ptr = trim_whitespace(*ptr);
char *line = *ptr;
uint64_t offset, size;
int bytes_read;
int r = sscanf(line, "[%" PRIu64 " %" PRIu64 "]%n", &offset, &size, &bytes_read);
ba_replay_assert(r == 2, "malformed trace (bad offset/size pair)", line, line_num);
*ptr += bytes_read;
return block_allocator::blockpair(offset, size);
}
static char *strip_newline(char *line, bool *found) {
char *ptr = strchr(line, '\n');
if (ptr != nullptr) {
if (found != nullptr) {
*found = true;
}
*ptr = '\0';
}
return line;
}
static char *read_trace_line(FILE *file) {
const int buf_size = 4096;
char buf[buf_size];
std::stringstream ss;
while (true) {
if (fgets(buf, buf_size, file) == nullptr) {
break;
}
bool has_newline = false;
ss << strip_newline(buf, &has_newline);
if (has_newline) {
// end of the line, we're done out
break;
}
}
std::string s = ss.str();
return s.size() ? toku_strdup(s.c_str()) : nullptr;
}
static vector<string> canonicalize_trace_from(FILE *file) {
// new trace, canonicalized from a raw trace
vector<string> canonicalized_trace;
// raw allocator id -> canonical allocator id
//
// keeps track of allocators that were created as part of the trace,
// and therefore will be part of the canonicalized trace.
uint64_t allocator_id_seq_num = 0;
map<uint64_t, uint64_t> allocator_ids;
// allocated offset -> allocation seq num
//
uint64_t allocation_seq_num = 0;
static const uint64_t ASN_NONE = (uint64_t) -1;
typedef map<uint64_t, uint64_t> offset_seq_map;
// raw allocator id -> offset_seq_map that tracks its allocations
map<uint64_t, offset_seq_map> offset_to_seq_num_maps;
int line_num = 0;
char *line;
while ((line = read_trace_line(file)) != nullptr) {
line_num++;
char *ptr = line;
string fn = parse_token(&ptr, line_num);
int64_t allocator_id = parse_number(&ptr, line_num, 16);
std::stringstream ss;
if (fn.find("ba_trace_create") != string::npos) {
ba_replay_assert(allocator_ids.count(allocator_id) == 0, "corrupted trace: double create", line, line_num);
ba_replay_assert(fn == "ba_trace_create" || fn == "ba_trace_create_from_blockpairs",
"corrupted trace: bad fn", line, line_num);
// we only convert the allocator_id to an allocator_id_seq_num
// in the canonical trace and leave the rest of the line as-is.
allocator_ids[allocator_id] = allocator_id_seq_num;
ss << fn << ' ' << allocator_id_seq_num << ' ' << trim_whitespace(ptr) << std::endl;
allocator_id_seq_num++;
// First, read passed the reserve / alignment values.
(void) parse_uint64(&ptr, line_num);
(void) parse_uint64(&ptr, line_num);
if (fn == "ba_trace_create_from_blockpairs") {
// For each blockpair created by this traceline, add its offset to the offset seq map
// with asn ASN_NONE so that later canonicalizations of `free' know whether to write
// down the asn or the raw offset.
offset_seq_map *map = &offset_to_seq_num_maps[allocator_id];
while (*trim_whitespace(ptr) != '\0') {
const block_allocator::blockpair bp = parse_blockpair(&ptr, line_num);
(*map)[bp.offset] = ASN_NONE;
}
}
} else {
ba_replay_assert(allocator_ids.count(allocator_id) > 0, "corrupted trace: unknown allocator", line, line_num);
uint64_t canonical_allocator_id = allocator_ids[allocator_id];
// this is the map that tracks allocations for this allocator
offset_seq_map *map = &offset_to_seq_num_maps[allocator_id];
if (fn == "ba_trace_alloc") {
const uint64_t size = parse_uint64(&ptr, line_num);
const uint64_t heat = parse_uint64(&ptr, line_num);
const uint64_t offset = parse_uint64(&ptr, line_num);
ba_replay_assert(map->count(offset) == 0, "corrupted trace: double alloc", line, line_num);
// remember that an allocation at `offset' has the current alloc seq num
(*map)[offset] = allocation_seq_num;
// translate `offset = alloc(size)' to `asn = alloc(size)'
ss << fn << ' ' << canonical_allocator_id << ' ' << size << ' ' << heat << ' ' << allocation_seq_num << std::endl;
allocation_seq_num++;
} else if (fn == "ba_trace_free") {
const uint64_t offset = parse_uint64(&ptr, line_num);
ba_replay_assert(map->count(offset) != 0, "corrupted trace: invalid free", line, line_num);
// get the alloc seq num for an allcation that occurred at `offset'
const uint64_t asn = (*map)[offset];
map->erase(offset);
// if there's an asn, then a corresponding ba_trace_alloc occurred and we should
// write `free(asn)'. otherwise, the blockpair was initialized from create_from_blockpairs
// and we write the original offset.
if (asn != ASN_NONE) {
ss << "ba_trace_free_asn" << ' ' << canonical_allocator_id << ' ' << asn << std::endl;
} else {
ss << "ba_trace_free_offset" << ' ' << canonical_allocator_id << ' ' << offset << std::endl;
}
} else if (fn == "ba_trace_destroy") {
// Remove this allocator from both maps
allocator_ids.erase(allocator_id);
offset_to_seq_num_maps.erase(allocator_id);
// translate `destroy(ptr_id) to destroy(canonical_id)'
ss << fn << ' ' << canonical_allocator_id << ' ' << std::endl;
} else {
ba_replay_assert(false, "corrupted trace: bad fn", line, line_num);
}
}
canonicalized_trace.push_back(ss.str());
toku_free(line);
}
if (allocator_ids.size() != 0) {
fprintf(stderr, "warning: leaked allocators. this might be ok if the tracing process is still running");
}
return canonicalized_trace;
}
struct streaming_variance_calculator {
int64_t n_samples;
int64_t mean;
int64_t variance;
// math credit: AoCP, Donald Knuth, '62
void add_sample(int64_t x) {
n_samples++;
if (n_samples == 1) {
mean = x;
variance = 0;
} else {
int64_t old_mean = mean;
mean = old_mean + ((x - old_mean) / n_samples);
variance = (((n_samples - 1) * variance) +
((x - old_mean) * (x - mean))) / n_samples;
}
}
};
struct canonical_trace_stats {
uint64_t n_lines_replayed;
uint64_t n_create;
uint64_t n_create_from_blockpairs;
uint64_t n_alloc_hot;
uint64_t n_alloc_cold;
uint64_t n_free;
uint64_t n_destroy;
struct streaming_variance_calculator alloc_hot_bytes;
struct streaming_variance_calculator alloc_cold_bytes;
canonical_trace_stats() {
memset(this, 0, sizeof(*this));
}
};
struct fragmentation_report {
TOKU_DB_FRAGMENTATION_S beginning;
TOKU_DB_FRAGMENTATION_S end;
fragmentation_report() {
memset(this, 0, sizeof(*this));
}
void merge(const struct fragmentation_report &src_report) {
for (int i = 0; i < 2; i++) {
TOKU_DB_FRAGMENTATION_S *dst = i == 0 ? &beginning : &end;
const TOKU_DB_FRAGMENTATION_S *src = i == 0 ? &src_report.beginning : &src_report.end;
dst->file_size_bytes += src->file_size_bytes;
dst->data_bytes += src->data_bytes;
dst->data_blocks += src->data_blocks;
dst->checkpoint_bytes_additional += src->checkpoint_bytes_additional;
dst->checkpoint_blocks_additional += src->checkpoint_blocks_additional;
dst->unused_bytes += src->unused_bytes;
dst->unused_blocks += src->unused_blocks;
dst->largest_unused_block += src->largest_unused_block;
}
}
};
static void replay_canonicalized_trace(const vector<string> &canonicalized_trace,
block_allocator::allocation_strategy strategy,
map<uint64_t, struct fragmentation_report> *reports,
struct canonical_trace_stats *stats) {
// maps an allocator id to its block allocator
map<uint64_t, block_allocator *> allocator_map;
// maps allocation seq num to allocated offset
map<uint64_t, uint64_t> seq_num_to_offset;
for (vector<string>::const_iterator it = canonicalized_trace.begin();
it != canonicalized_trace.end(); it++) {
const int line_num = stats->n_lines_replayed++;
char *line = toku_strdup(it->c_str());
line = strip_newline(line, nullptr);
char *ptr = trim_whitespace(line);
// canonical allocator id is in base 10, not 16
string fn = parse_token(&ptr, line_num);
int64_t allocator_id = parse_number(&ptr, line_num, 10);
if (fn.find("ba_trace_create") != string::npos) {
const uint64_t reserve_at_beginning = parse_uint64(&ptr, line_num);
const uint64_t alignment = parse_uint64(&ptr, line_num);
ba_replay_assert(allocator_map.count(allocator_id) == 0,
"corrupted canonical trace: double create", line, line_num);
block_allocator *ba = new block_allocator();
if (fn == "ba_trace_create") {
ba->create(reserve_at_beginning, alignment);
stats->n_create++;
} else {
ba_replay_assert(fn == "ba_trace_create_from_blockpairs",
"corrupted canonical trace: bad create fn", line, line_num);
vector<block_allocator::blockpair> pairs;
while (*trim_whitespace(ptr) != '\0') {
const block_allocator::blockpair bp = parse_blockpair(&ptr, line_num);
pairs.push_back(bp);
}
ba->create_from_blockpairs(reserve_at_beginning, alignment, &pairs[0], pairs.size());
stats->n_create_from_blockpairs++;
}
ba->set_strategy(strategy);
TOKU_DB_FRAGMENTATION_S report;
ba->get_statistics(&report);
(*reports)[allocator_id].beginning = report;
allocator_map[allocator_id] = ba;
} else {
ba_replay_assert(allocator_map.count(allocator_id) > 0,
"corrupted canonical trace: no such allocator", line, line_num);
block_allocator *ba = allocator_map[allocator_id];
if (fn == "ba_trace_alloc") {
// replay an `alloc' whose result will be associated with a certain asn
const uint64_t size = parse_uint64(&ptr, line_num);
const uint64_t heat = parse_uint64(&ptr, line_num);
const uint64_t asn = parse_uint64(&ptr, line_num);
ba_replay_assert(seq_num_to_offset.count(asn) == 0,
"corrupted canonical trace: double alloc (asn in use)", line, line_num);
uint64_t offset;
ba->alloc_block(size, heat, &offset);
seq_num_to_offset[asn] = offset;
heat ? stats->n_alloc_hot++ : stats->n_alloc_cold++;
heat ? stats->alloc_hot_bytes.add_sample(size) : stats->alloc_cold_bytes.add_sample(size);
} else if (fn == "ba_trace_free_asn") {
// replay a `free' on a block whose offset is the result of an alloc with an asn
const uint64_t asn = parse_uint64(&ptr, line_num);
ba_replay_assert(seq_num_to_offset.count(asn) == 1,
"corrupted canonical trace: double free (asn unused)", line, line_num);
const uint64_t offset = seq_num_to_offset[asn];
ba->free_block(offset);
seq_num_to_offset.erase(asn);
stats->n_free++;
} else if (fn == "ba_trace_free_offset") {
// replay a `free' on a block whose offset was explicitly set during a create_from_blockpairs
const uint64_t offset = parse_uint64(&ptr, line_num);
ba->free_block(offset);
stats->n_free++;
} else if (fn == "ba_trace_destroy") {
TOKU_DB_FRAGMENTATION_S report;
ba->get_statistics(&report);
ba->destroy();
(*reports)[allocator_id].end = report;
allocator_map.erase(allocator_id);
stats->n_destroy++;
} else {
ba_replay_assert(false, "corrupted canonical trace: bad fn", line, line_num);
}
}
toku_free(line);
}
}
static const char *strategy_to_cstring(block_allocator::allocation_strategy strategy) {
switch (strategy) {
case block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT:
return "first-fit";
case block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT:
return "best-fit";
case block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE:
return "heat-zone";
case block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT:
return "padded-fit";
default:
abort();
}
}
static block_allocator::allocation_strategy cstring_to_strategy(const char *str) {
if (strcmp(str, "first-fit") == 0) {
return block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT;
}
if (strcmp(str, "best-fit") == 0) {
return block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT;
}
if (strcmp(str, "heat-zone") == 0) {
return block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE;
}
if (strcmp(str, "padded-fit") != 0) {
fprintf(stderr, "bad strategy string: %s\n", str);
abort();
}
return block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT;
}
static void print_result_verbose(uint64_t allocator_id,
block_allocator::allocation_strategy strategy,
const struct fragmentation_report &report) {
if (report.end.data_bytes + report.end.unused_bytes +
report.beginning.data_bytes + report.beginning.unused_bytes
< 32UL * 1024 * 1024) {
printf(" ...skipping allocator_id %" PRId64 " (total bytes < 32mb)\n", allocator_id);
return;
}
printf(" allocator_id: %20" PRId64 "\n", allocator_id);
printf(" strategy: %20s\n", strategy_to_cstring(strategy));
for (int i = 0; i < 2; i++) {
const TOKU_DB_FRAGMENTATION_S *r = i == 0 ? &report.beginning : &report.end;
printf("%s\n", i == 0 ? "BEFORE" : "AFTER");
uint64_t total_bytes = r->data_bytes + r->unused_bytes;
uint64_t total_blocks = r->data_blocks + r->unused_blocks;
// byte statistics
printf(" total bytes: %20" PRId64 "\n", total_bytes);
printf(" used bytes: %20" PRId64 " (%.3lf)\n", r->data_bytes,
static_cast<double>(r->data_bytes) / total_bytes);
printf(" unused bytes: %20" PRId64 " (%.3lf)\n", r->unused_bytes,
static_cast<double>(r->unused_bytes) / total_bytes);
// block statistics
printf(" total blocks: %20" PRId64 "\n", total_blocks);
printf(" used blocks: %20" PRId64 " (%.3lf)\n", r->data_blocks,
static_cast<double>(r->data_blocks) / total_blocks);
printf(" unused blocks: %20" PRId64 " (%.3lf)\n", r->unused_blocks,
static_cast<double>(r->unused_blocks) / total_blocks);
// misc
printf(" largest unused: %20" PRId64 "\n", r->largest_unused_block);
}
}
static void print_result(uint64_t allocator_id,
block_allocator::allocation_strategy strategy,
const struct fragmentation_report &report) {
const TOKU_DB_FRAGMENTATION_S *beginning = &report.beginning;
const TOKU_DB_FRAGMENTATION_S *end = &report.end;
uint64_t total_beginning_bytes = beginning->data_bytes + beginning->unused_bytes;
uint64_t total_end_bytes = end->data_bytes + end->unused_bytes;
if (total_end_bytes + total_beginning_bytes < 32UL * 1024 * 1024) {
if (verbose) {
printf("\n");
printf(" ...skipping allocator_id %" PRId64 " (total bytes < 32mb)\n", allocator_id);
}
return;
}
printf("\n");
if (verbose) {
print_result_verbose(allocator_id, strategy, report);
} else {
printf(" %-15s: allocator %" PRId64 ", %.3lf used bytes (%.3lf before)\n",
strategy_to_cstring(strategy), allocator_id,
static_cast<double>(report.end.data_bytes) / total_end_bytes,
static_cast<double>(report.beginning.data_bytes) / total_beginning_bytes);
}
}
static int only_aggregate_reports;
static struct option getopt_options[] = {
{ "verbose", no_argument, &verbose, 1 },
{ "only-aggregate-reports", no_argument, &only_aggregate_reports, 1 },
{ "include-strategy", required_argument, nullptr, 'i' },
{ "exclude-strategy", required_argument, nullptr, 'x' },
{ nullptr, 0, nullptr, 0 },
};
int main(int argc, char *argv[]) {
int opt;
set<block_allocator::allocation_strategy> candidate_strategies, excluded_strategies;
while ((opt = getopt_long(argc, argv, "", getopt_options, nullptr)) != -1) {
switch (opt) {
case 0:
break;
case 'i':
candidate_strategies.insert(cstring_to_strategy(optarg));
break;
case 'x':
excluded_strategies.insert(cstring_to_strategy(optarg));
break;
case '?':
default:
abort();
};
}
// Default to everything if nothing was explicitly included.
if (candidate_strategies.empty()) {
candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT);
candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT);
candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT);
candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE);
}
// ..but remove anything that was explicitly excluded
for (set<block_allocator::allocation_strategy>::const_iterator it = excluded_strategies.begin();
it != excluded_strategies.end(); it++) {
candidate_strategies.erase(*it);
}
// Run the real trace
//
// First, read the raw trace from stdin
vector<string> canonicalized_trace = canonicalize_trace_from(stdin);
if (!only_aggregate_reports) {
printf("\n");
printf("Individual reports, by allocator:\n");
}
struct canonical_trace_stats stats;
map<block_allocator::allocation_strategy, struct fragmentation_report> reports_by_strategy;
for (set<block_allocator::allocation_strategy>::const_iterator it = candidate_strategies.begin();
it != candidate_strategies.end(); it++) {
const block_allocator::allocation_strategy strategy(*it);
// replay the canonicalized trace against the current strategy.
//
// we provided the allocator map so we can gather statistics later
struct canonical_trace_stats dummy_stats;
map<uint64_t, struct fragmentation_report> reports;
replay_canonicalized_trace(canonicalized_trace, strategy, &reports,
// Only need to gather canonical trace stats once
it == candidate_strategies.begin() ? &stats : &dummy_stats);
struct fragmentation_report aggregate_report;
memset(&aggregate_report, 0, sizeof(aggregate_report));
for (map<uint64_t, struct fragmentation_report>::iterator rp = reports.begin();
rp != reports.end(); rp++) {
const struct fragmentation_report &report = rp->second;
aggregate_report.merge(report);
if (!only_aggregate_reports) {
print_result(rp->first, strategy, report);
}
}
reports_by_strategy[strategy] = aggregate_report;
}
printf("\n");
printf("Aggregate reports, by strategy:\n");
for (map<block_allocator::allocation_strategy, struct fragmentation_report>::iterator it = reports_by_strategy.begin();
it != reports_by_strategy.end(); it++) {
print_result(0, it->first, it->second);
}
printf("\n");
printf("Overall trace stats:\n");
printf("\n");
printf(" n_lines_played: %15" PRIu64 "\n", stats.n_lines_replayed);
printf(" n_create: %15" PRIu64 "\n", stats.n_create);
printf(" n_create_from_blockpairs: %15" PRIu64 "\n", stats.n_create_from_blockpairs);
printf(" n_alloc_hot: %15" PRIu64 "\n", stats.n_alloc_hot);
printf(" n_alloc_cold: %15" PRIu64 "\n", stats.n_alloc_cold);
printf(" n_free: %15" PRIu64 "\n", stats.n_free);
printf(" n_destroy: %15" PRIu64 "\n", stats.n_destroy);
printf("\n");
printf(" avg_alloc_hot: %15" PRIu64 "\n", stats.alloc_hot_bytes.mean);
printf(" stddev_alloc_hot: %15" PRIu64 "\n", (uint64_t) sqrt(stats.alloc_hot_bytes.variance));
printf(" avg_alloc_cold: %15" PRIu64 "\n", stats.alloc_cold_bytes.mean);
printf(" stddev_alloc_cold: %15" PRIu64 "\n", (uint64_t) sqrt(stats.alloc_cold_bytes.variance));
printf("\n");
return 0;
}