From cb8ea84001fde7aaba8370430f7ab3432b5fd829 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sun, 14 May 2017 16:22:03 +0300 Subject: Add a collect phase to collect non-large data from results This avoids having to keep all the huge files in memory all at once. --- obbenchlib/benchmarker.py | 2 - obbenchlib/htmlgen.py | 94 ++++++++++++++++++++++++++++++++++------------- 2 files changed, 68 insertions(+), 28 deletions(-) (limited to 'obbenchlib') diff --git a/obbenchlib/benchmarker.py b/obbenchlib/benchmarker.py index 6cbb727..76e4849 100644 --- a/obbenchlib/benchmarker.py +++ b/obbenchlib/benchmarker.py @@ -169,9 +169,7 @@ class Benchmarker(object): result.set_value( obnam_subcommand, 'profile-text', self.read_profile_text()) - logging.log('reading log file') log = self.read_log_file() - logging.log('finished reading log file') result.set_value(obnam_subcommand, 'log', log) result.set_value(obnam_subcommand, 'vmrss', self.find_max_vmrss(log)) diff --git a/obbenchlib/htmlgen.py b/obbenchlib/htmlgen.py index 8efab12..c0bc06f 100644 --- a/obbenchlib/htmlgen.py +++ b/obbenchlib/htmlgen.py @@ -34,8 +34,6 @@ class HtmlGenerator(object): self.spec = None def generate_html(self): - results = self.load_results() - env = jinja2.Environment( loader=jinja2.PackageLoader('obbenchlib'), autoescape=lambda foo: True, @@ -49,12 +47,17 @@ class HtmlGenerator(object): LogFile, CssFile, ] - for page_class in page_classes: - page = page_class() + pages = [page_class() for page_class in page_classes] + for page in pages: page.env = env - page.results = results page.spec = self.spec + for result in self.load_results(): + for page in pages: + for filename, data in page.collect(result): + self.write_file(filename, data) + + for page in pages: for filename, data in page.generate(): self.write_file(filename, data) @@ -67,11 +70,11 @@ class HtmlGenerator(object): os.mkdir(self.htmldir) def load_results(self): - results = [] - for filename in glob.glob(os.path.join(self.resultdir, '*.yaml')): + filenames = list(glob.glob(os.path.join(self.resultdir, '*.yaml'))) + for i, filename in enumerate(filenames): + print 'Loading', filename, i+1, 'of', len(filenames) with open(filename) as f: - results.append(yaml.safe_load(f)) - return results + yield yaml.safe_load(f) def write_file(self, relative_path, text): filename = os.path.join(self.htmldir, relative_path) @@ -83,9 +86,12 @@ class HtmlPage(object): def __init__(self): self.env = None - self.results = None + self.results = [] self.spec = None + def collect(self, result): + raise NotImplementedError() + def format_markdown(self, text): return markdown.markdown(text) @@ -99,9 +105,34 @@ class HtmlPage(object): template = self.env.get_template(template_name) return template.render(**variables) + def deep_copy(self, item, copy): + if isinstance(item, dict): + return { + key: self.deep_copy(value, copy) + for key, value in item.items() + } + elif isinstance(item, list): + return [ + self.deep_copy(value, copy) + for value in item + ] + else: + return copy(item) + + def copy(self, thing): + if isinstance(thing, str) and len(thing) >= 1024: + return None + if hasattr(thing, 'copy'): + return thing.copy() + return thing + class FrontPage(HtmlPage): + def collect(self, result): + self.results.append(self.deep_copy(result, self.copy)) + return [] + def generate(self): variables = { 'description': self.format_markdown(self.spec['description']), @@ -140,6 +171,10 @@ class FrontPage(HtmlPage): class BenchmarkPage(HtmlPage): + def collect(self, result): + self.results.append(self.deep_copy(result, self.copy)) + return [] + def generate(self): benchmark_names = [ benchmark['name'] @@ -221,15 +256,14 @@ class BenchmarkPage(HtmlPage): class ProfileData(HtmlPage): - def generate(self): - for result in self.results: - for i, step in enumerate(result['steps']): - for operation in step: - if 'profile' in step[operation]: - yield self.generate_profile_data( - result, step, i, operation) - yield self.generate_profile_text( - result, step, i, operation) + def collect(self, result): + for i, step in enumerate(result['steps']): + for operation in step: + if 'profile' in step[operation]: + yield self.generate_profile_data( + result, step, i, operation) + yield self.generate_profile_text( + result, step, i, operation) def generate_profile_data(self, result, step, i, operation): filename = '{}_{}.prof'.format(result['result_id'], i) @@ -239,24 +273,32 @@ class ProfileData(HtmlPage): filename = '{}_{}.txt'.format(result['result_id'], i) return filename, step[operation]['profile-text'] + def generate(self): + return [] + class LogFile(HtmlPage): - def generate(self): - for result in self.results: - for i, step in enumerate(result['steps']): - for operation in step: - if 'log' in step[operation]: - yield self.generate_log_file( - result, step, i, operation) + def collect(self, result): + for i, step in enumerate(result['steps']): + for operation in step: + if 'log' in step[operation]: + yield self.generate_log_file( + result, step, i, operation) def generate_log_file(self, result, step, i, operation): filename = '{}_{}.log'.format(result['result_id'], i) return filename, step[operation]['log'] + def generate(self): + return [] + class CssFile(object): + def collect(self, result): + return [] + def generate(self): filename = os.path.join( os.path.dirname(obbenchlib.__file__), 'obbench.css') -- cgit v1.2.1