#!/usr/bin/env python3 # # Script to aggregate and report Linux perf results. # # Example: # ./scripts/perf.py -R -obench.perf ./runners/bench_runner # ./scripts/perf.py bench.perf -j -Flfs.c -Flfs_util.c -Scycles # # Copyright (c) 2022, The littlefs authors. # SPDX-License-Identifier: BSD-3-Clause # import bisect import collections as co import csv import errno import fcntl import functools as ft import itertools as it import math as m import multiprocessing as mp import os import re import shlex import shutil import subprocess as sp import tempfile import zipfile # TODO support non-zip perf results? PERF_PATH = ['perf'] PERF_EVENTS = 'cycles,branch-misses,branches,cache-misses,cache-references' PERF_FREQ = 100 OBJDUMP_PATH = ['objdump'] THRESHOLD = (0.5, 0.85) # integer fields class Int(co.namedtuple('Int', 'x')): __slots__ = () def __new__(cls, x=0): if isinstance(x, Int): return x if isinstance(x, str): try: x = int(x, 0) except ValueError: # also accept +-∞ and +-inf if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x): x = m.inf elif re.match('^\s*-\s*(?:∞|inf)\s*$', x): x = -m.inf else: raise assert isinstance(x, int) or m.isinf(x), x return super().__new__(cls, x) def __str__(self): if self.x == m.inf: return '∞' elif self.x == -m.inf: return '-∞' else: return str(self.x) def __int__(self): assert not m.isinf(self.x) return self.x def __float__(self): return float(self.x) none = '%7s' % '-' def table(self): return '%7s' % (self,) diff_none = '%7s' % '-' diff_table = table def diff_diff(self, other): new = self.x if self else 0 old = other.x if other else 0 diff = new - old if diff == +m.inf: return '%7s' % '+∞' elif diff == -m.inf: return '%7s' % '-∞' else: return '%+7d' % diff def ratio(self, other): new = self.x if self else 0 old = other.x if other else 0 if m.isinf(new) and m.isinf(old): return 0.0 elif m.isinf(new): return +m.inf elif m.isinf(old): return -m.inf elif not old and not new: return 0.0 elif not old: return 1.0 else: return (new-old) / old def __add__(self, other): return self.__class__(self.x + other.x) def __sub__(self, other): return self.__class__(self.x - other.x) def __mul__(self, other): return self.__class__(self.x * other.x) # perf results class PerfResult(co.namedtuple('PerfResult', [ 'file', 'function', 'line', 'cycles', 'bmisses', 'branches', 'cmisses', 'caches', 'children'])): _by = ['file', 'function', 'line'] _fields = ['cycles', 'bmisses', 'branches', 'cmisses', 'caches'] _sort = ['cycles', 'bmisses', 'cmisses', 'branches', 'caches'] _types = { 'cycles': Int, 'bmisses': Int, 'branches': Int, 'cmisses': Int, 'caches': Int} __slots__ = () def __new__(cls, file='', function='', line=0, cycles=0, bmisses=0, branches=0, cmisses=0, caches=0, children=[]): return super().__new__(cls, file, function, int(Int(line)), Int(cycles), Int(bmisses), Int(branches), Int(cmisses), Int(caches), children) def __add__(self, other): return PerfResult(self.file, self.function, self.line, self.cycles + other.cycles, self.bmisses + other.bmisses, self.branches + other.branches, self.cmisses + other.cmisses, self.caches + other.caches, self.children + other.children) def openio(path, mode='r', buffering=-1): # allow '-' for stdin/stdout if path == '-': if mode == 'r': return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering) else: return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering) else: return open(path, mode, buffering) # run perf as a subprocess, storing measurements into a zip file def record(command, *, output=None, perf_freq=PERF_FREQ, perf_period=None, perf_events=PERF_EVENTS, perf_path=PERF_PATH, **args): # create a temporary file for perf to write to, as far as I can tell # this is strictly needed because perf's pipe-mode only works with stdout with tempfile.NamedTemporaryFile('rb') as f: # figure out our perf invocation perf = perf_path + list(filter(None, [ 'record', '-F%s' % perf_freq if perf_freq is not None and perf_period is None else None, '-c%s' % perf_period if perf_period is not None else None, '-B', '-g', '--all-user', '-e%s' % perf_events, '-o%s' % f.name])) # run our command try: if args.get('verbose'): print(' '.join(shlex.quote(c) for c in perf + command)) err = sp.call(perf + command, close_fds=False) except KeyboardInterrupt: err = errno.EOWNERDEAD # synchronize access z = os.open(output, os.O_RDWR | os.O_CREAT) fcntl.flock(z, fcntl.LOCK_EX) # copy measurements into our zip file with os.fdopen(z, 'r+b') as z: with zipfile.ZipFile(z, 'a', compression=zipfile.ZIP_DEFLATED, compresslevel=1) as z: with z.open('perf.%d' % os.getpid(), 'w') as g: shutil.copyfileobj(f, g) # forward the return code return err # try to only process each dso onceS # # note this only caches with the non-keyword arguments def multiprocessing_cache(f): local_cache = {} manager = mp.Manager() global_cache = manager.dict() lock = mp.Lock() def multiprocessing_cache(*args, **kwargs): # check local cache? if args in local_cache: return local_cache[args] # check global cache? with lock: if args in global_cache: v = global_cache[args] local_cache[args] = v return v # fall back to calling the function v = f(*args, **kwargs) global_cache[args] = v local_cache[args] = v return v return multiprocessing_cache @multiprocessing_cache def collect_syms_and_lines(obj_path, *, objdump_path=None, **args): symbol_pattern = re.compile( '^(?P<addr>[0-9a-fA-F]+)' '\s+.*' '\s+(?P<size>[0-9a-fA-F]+)' '\s+(?P<name>[^\s]+)\s*$') line_pattern = re.compile( '^\s+(?:' # matches dir/file table '(?P<no>[0-9]+)' '(?:\s+(?P<dir>[0-9]+))?' '\s+.*' '\s+(?P<path>[^\s]+)' # matches line opcodes '|' '\[[^\]]*\]\s+' '(?:' '(?P<op_special>Special)' '|' '(?P<op_copy>Copy)' '|' '(?P<op_end>End of Sequence)' '|' 'File .*?to (?:entry )?(?P<op_file>\d+)' '|' 'Line .*?to (?P<op_line>[0-9]+)' '|' '(?:Address|PC) .*?to (?P<op_addr>[0x0-9a-fA-F]+)' '|' '.' ')*' ')$', re.IGNORECASE) # figure out symbol addresses and file+line ranges syms = {} sym_at = [] cmd = objdump_path + ['-t', obj_path] if args.get('verbose'): print(' '.join(shlex.quote(c) for c in cmd)) proc = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE if not args.get('verbose') else None, universal_newlines=True, errors='replace', close_fds=False) for line in proc.stdout: m = symbol_pattern.match(line) if m: name = m.group('name') addr = int(m.group('addr'), 16) size = int(m.group('size'), 16) # ignore zero-sized symbols if not size: continue # note multiple symbols can share a name if name not in syms: syms[name] = set() syms[name].add((addr, size)) sym_at.append((addr, name, size)) proc.wait() if proc.returncode != 0: if not args.get('verbose'): for line in proc.stderr: sys.stdout.write(line) # assume no debug-info on failure pass # sort and keep largest/first when duplicates sym_at.sort(key=lambda x: (x[0], -x[2], x[1])) sym_at_ = [] for addr, name, size in sym_at: if len(sym_at_) == 0 or sym_at_[-1][0] != addr: sym_at_.append((addr, name, size)) sym_at = sym_at_ # state machine for dwarf line numbers, note that objdump's # decodedline seems to have issues with multiple dir/file # tables, which is why we need this lines = [] line_at = [] dirs = {} files = {} op_file = 1 op_line = 1 op_addr = 0 cmd = objdump_path + ['--dwarf=rawline', obj_path] if args.get('verbose'): print(' '.join(shlex.quote(c) for c in cmd)) proc = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE if not args.get('verbose') else None, universal_newlines=True, errors='replace', close_fds=False) for line in proc.stdout: m = line_pattern.match(line) if m: if m.group('no') and not m.group('dir'): # found a directory entry dirs[int(m.group('no'))] = m.group('path') elif m.group('no'): # found a file entry dir = int(m.group('dir')) if dir in dirs: files[int(m.group('no'))] = os.path.join( dirs[dir], m.group('path')) else: files[int(m.group('no'))] = m.group('path') else: # found a state machine update if m.group('op_file'): op_file = int(m.group('op_file'), 0) if m.group('op_line'): op_line = int(m.group('op_line'), 0) if m.group('op_addr'): op_addr = int(m.group('op_addr'), 0) if (m.group('op_special') or m.group('op_copy') or m.group('op_end')): file = os.path.abspath(files.get(op_file, '?')) lines.append((file, op_line, op_addr)) line_at.append((op_addr, file, op_line)) if m.group('op_end'): op_file = 1 op_line = 1 op_addr = 0 proc.wait() if proc.returncode != 0: if not args.get('verbose'): for line in proc.stderr: sys.stdout.write(line) # assume no debug-info on failure pass # sort and keep first when duplicates lines.sort() lines_ = [] for file, line, addr in lines: if len(lines_) == 0 or lines_[-1][0] != file or lines[-1][1] != line: lines_.append((file, line, addr)) lines = lines_ # sort and keep first when duplicates line_at.sort() line_at_ = [] for addr, file, line in line_at: if len(line_at_) == 0 or line_at_[-1][0] != addr: line_at_.append((addr, file, line)) line_at = line_at_ return syms, sym_at, lines, line_at def collect_decompressed(path, *, perf_path=PERF_PATH, sources=None, everything=False, propagate=0, depth=1, **args): sample_pattern = re.compile( '(?P<comm>\w+)' '\s+(?P<pid>\w+)' '\s+(?P<time>[\w.]+):' '\s*(?P<period>\w+)' '\s+(?P<event>[^:]+):') frame_pattern = re.compile( '\s+(?P<addr>\w+)' '\s+(?P<sym>[^\s\+]+)(?:\+(?P<off>\w+))?' '\s+\((?P<dso>[^\)]+)\)') events = { 'cycles': 'cycles', 'branch-misses': 'bmisses', 'branches': 'branches', 'cache-misses': 'cmisses', 'cache-references': 'caches'} # note perf_path may contain extra args cmd = perf_path + [ 'script', '-i%s' % path] if args.get('verbose'): print(' '.join(shlex.quote(c) for c in cmd)) proc = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE if not args.get('verbose') else None, universal_newlines=True, errors='replace', close_fds=False) last_filtered = False last_event = '' last_period = 0 last_stack = [] deltas = co.defaultdict(lambda: {}) syms_ = co.defaultdict(lambda: {}) at_cache = {} results = {} def commit(): # tail-recursively propagate measurements for i in range(len(last_stack)): results_ = results for j in reversed(range(i+1)): if i+1-j > depth: break # propagate name = last_stack[j] if name not in results_: results_[name] = (co.defaultdict(lambda: 0), {}) results_[name][0][last_event] += last_period # recurse results_ = results_[name][1] for line in proc.stdout: # we need to process a lot of data, so wait to use regex as late # as possible if not line.startswith('\t'): if last_filtered: commit() last_filtered = False if line: m = sample_pattern.match(line) if m and m.group('event') in events: last_filtered = True last_event = m.group('event') last_period = int(m.group('period'), 0) last_stack = [] elif last_filtered: m = frame_pattern.match(line) if m: # filter out internal/kernel functions if not everything and ( m.group('sym').startswith('__') or m.group('sym').startswith('0') or m.group('sym').startswith('-') or m.group('sym').startswith('[') or m.group('dso').startswith('/usr/lib')): continue dso = m.group('dso') sym = m.group('sym') off = int(m.group('off'), 0) if m.group('off') else 0 addr_ = int(m.group('addr'), 16) # get the syms/lines for the dso, this is cached syms, sym_at, lines, line_at = collect_syms_and_lines( dso, **args) # ASLR is tricky, we have symbols+offsets, but static symbols # means we may have multiple options for each symbol. # # To try to solve this, we use previous seen symbols to build # confidence for the correct ASLR delta. This means we may # guess incorrectly for early symbols, but this will only affect # a few samples. if sym in syms: sym_addr_ = addr_ - off # track possible deltas? for sym_addr, size in syms[sym]: delta = sym_addr - sym_addr_ if delta not in deltas[dso]: deltas[dso][delta] = sum( abs(a_+delta - a) for s, (a_, _) in syms_[dso].items() for a, _ in syms[s]) for delta in deltas[dso].keys(): deltas[dso][delta] += abs(sym_addr_+delta - sym_addr) syms_[dso][sym] = sym_addr_, size # guess the best delta delta, _ = min(deltas[dso].items(), key=lambda x: (x[1], x[0])) addr = addr_ + delta # cached? if (dso,addr) in at_cache: cached = at_cache[(dso,addr)] if cached is None: # cache says to skip continue file, line = cached else: # find file+line i = bisect.bisect(line_at, addr, key=lambda x: x[0]) if i > 0: _, file, line = line_at[i-1] else: file, line = re.sub('(\.o)?$', '.c', dso, 1), 0 # ignore filtered sources if sources is not None: if not any( os.path.abspath(file) == os.path.abspath(s) for s in sources): at_cache[(dso,addr)] = None continue else: # default to only cwd if not everything and not os.path.commonpath([ os.getcwd(), os.path.abspath(file)]) == os.getcwd(): at_cache[(dso,addr)] = None continue # simplify path if os.path.commonpath([ os.getcwd(), os.path.abspath(file)]) == os.getcwd(): file = os.path.relpath(file) else: file = os.path.abspath(file) at_cache[(dso,addr)] = file, line else: file, line = re.sub('(\.o)?$', '.c', dso, 1), 0 last_stack.append((file, sym, line)) # stop propogating? if propagate and len(last_stack) >= propagate: commit() last_filtered = False if last_filtered: commit() proc.wait() if proc.returncode != 0: if not args.get('verbose'): for line in proc.stderr: sys.stdout.write(line) sys.exit(-1) # rearrange results into result type def to_results(results): results_ = [] for name, (r, children) in results.items(): results_.append(PerfResult(*name, **{events[k]: v for k, v in r.items()}, children=to_results(children))) return results_ return to_results(results) def collect_job(path, i, **args): # decompress into a temporary file, this is to work around # some limitations of perf with zipfile.ZipFile(path) as z: with z.open(i) as f: with tempfile.NamedTemporaryFile('wb') as g: shutil.copyfileobj(f, g) g.flush() return collect_decompressed(g.name, **args) def starapply(args): f, args, kwargs = args return f(*args, **kwargs) def collect(perf_paths, *, jobs=None, **args): # automatic job detection? if jobs == 0: jobs = len(os.sched_getaffinity(0)) records = [] for path in perf_paths: # each .perf file is actually a zip file containing perf files from # multiple runs with zipfile.ZipFile(path) as z: records.extend((path, i) for i in z.infolist()) # we're dealing with a lot of data but also surprisingly # parallelizable if jobs is not None: results = [] with mp.Pool(jobs) as p: for results_ in p.imap_unordered( starapply, ((collect_job, (path, i), args) for path, i in records)): results.extend(results_) else: results = [] for path, i in records: results.extend(collect_job(path, i, **args)) return results def fold(Result, results, *, by=None, defines=None, **_): if by is None: by = Result._by for k in it.chain(by or [], (k for k, _ in defines or [])): if k not in Result._by and k not in Result._fields: print("error: could not find field %r?" % k) sys.exit(-1) # filter by matching defines if defines is not None: results_ = [] for r in results: if all(getattr(r, k) in vs for k, vs in defines): results_.append(r) results = results_ # organize results into conflicts folding = co.OrderedDict() for r in results: name = tuple(getattr(r, k) for k in by) if name not in folding: folding[name] = [] folding[name].append(r) # merge conflicts folded = [] for name, rs in folding.items(): folded.append(sum(rs[1:], start=rs[0])) # fold recursively folded_ = [] for r in folded: folded_.append(r._replace(children=fold( Result, r.children, by=by, defines=defines))) folded = folded_ return folded def table(Result, results, diff_results=None, *, by=None, fields=None, sort=None, summary=False, all=False, percent=False, depth=1, **_): all_, all = all, __builtins__.all if by is None: by = Result._by if fields is None: fields = Result._fields types = Result._types # fold again results = fold(Result, results, by=by) if diff_results is not None: diff_results = fold(Result, diff_results, by=by) # organize by name table = { ','.join(str(getattr(r, k) or '') for k in by): r for r in results} diff_table = { ','.join(str(getattr(r, k) or '') for k in by): r for r in diff_results or []} names = list(table.keys() | diff_table.keys()) # sort again, now with diff info, note that python's sort is stable names.sort() if diff_results is not None: names.sort(key=lambda n: tuple( types[k].ratio( getattr(table.get(n), k, None), getattr(diff_table.get(n), k, None)) for k in fields), reverse=True) if sort: for k, reverse in reversed(sort): names.sort( key=lambda n: tuple( (getattr(table[n], k),) if getattr(table.get(n), k, None) is not None else () for k in ([k] if k else [ k for k in Result._sort if k in fields])), reverse=reverse ^ (not k or k in Result._fields)) # build up our lines lines = [] # header header = [] header.append('%s%s' % ( ','.join(by), ' (%d added, %d removed)' % ( sum(1 for n in table if n not in diff_table), sum(1 for n in diff_table if n not in table)) if diff_results is not None and not percent else '') if not summary else '') if diff_results is None: for k in fields: header.append(k) elif percent: for k in fields: header.append(k) else: for k in fields: header.append('o'+k) for k in fields: header.append('n'+k) for k in fields: header.append('d'+k) header.append('') lines.append(header) def table_entry(name, r, diff_r=None, ratios=[]): entry = [] entry.append(name) if diff_results is None: for k in fields: entry.append(getattr(r, k).table() if getattr(r, k, None) is not None else types[k].none) elif percent: for k in fields: entry.append(getattr(r, k).diff_table() if getattr(r, k, None) is not None else types[k].diff_none) else: for k in fields: entry.append(getattr(diff_r, k).diff_table() if getattr(diff_r, k, None) is not None else types[k].diff_none) for k in fields: entry.append(getattr(r, k).diff_table() if getattr(r, k, None) is not None else types[k].diff_none) for k in fields: entry.append(types[k].diff_diff( getattr(r, k, None), getattr(diff_r, k, None))) if diff_results is None: entry.append('') elif percent: entry.append(' (%s)' % ', '.join( '+∞%' if t == +m.inf else '-∞%' if t == -m.inf else '%+.1f%%' % (100*t) for t in ratios)) else: entry.append(' (%s)' % ', '.join( '+∞%' if t == +m.inf else '-∞%' if t == -m.inf else '%+.1f%%' % (100*t) for t in ratios if t) if any(ratios) else '') return entry # entries if not summary: for name in names: r = table.get(name) if diff_results is None: diff_r = None ratios = None else: diff_r = diff_table.get(name) ratios = [ types[k].ratio( getattr(r, k, None), getattr(diff_r, k, None)) for k in fields] if not all_ and not any(ratios): continue lines.append(table_entry(name, r, diff_r, ratios)) # total r = next(iter(fold(Result, results, by=[])), None) if diff_results is None: diff_r = None ratios = None else: diff_r = next(iter(fold(Result, diff_results, by=[])), None) ratios = [ types[k].ratio( getattr(r, k, None), getattr(diff_r, k, None)) for k in fields] lines.append(table_entry('TOTAL', r, diff_r, ratios)) # find the best widths, note that column 0 contains the names and column -1 # the ratios, so those are handled a bit differently widths = [ ((max(it.chain([w], (len(l[i]) for l in lines)))+1+4-1)//4)*4-1 for w, i in zip( it.chain([23], it.repeat(7)), range(len(lines[0])-1))] # adjust the name width based on the expected call depth, though # note this doesn't really work with unbounded recursion if not summary and not m.isinf(depth): widths[0] += 4*(depth-1) # print the tree recursively print('%-*s %s%s' % ( widths[0], lines[0][0], ' '.join('%*s' % (w, x) for w, x in zip(widths[1:], lines[0][1:-1])), lines[0][-1])) if not summary: def recurse(results_, depth_, prefixes=('', '', '', '')): # rebuild our tables at each layer table_ = { ','.join(str(getattr(r, k) or '') for k in by): r for r in results_} names_ = list(table_.keys()) # sort again at each layer, keep in mind the numbers are # changing as we descend names_.sort() if sort: for k, reverse in reversed(sort): names_.sort( key=lambda n: tuple( (getattr(table_[n], k),) if getattr(table_.get(n), k, None) is not None else () for k in ([k] if k else [ k for k in Result._sort if k in fields])), reverse=reverse ^ (not k or k in Result._fields)) for i, name in enumerate(names_): r = table_[name] is_last = (i == len(names_)-1) print('%s%-*s %s' % ( prefixes[0+is_last], widths[0] - ( len(prefixes[0+is_last]) if not m.isinf(depth) else 0), name, ' '.join('%*s' % (w, x) for w, x in zip( widths[1:], table_entry(name, r)[1:])))) # recurse? if depth_ > 1: recurse( r.children, depth_-1, (prefixes[2+is_last] + "|-> ", prefixes[2+is_last] + "'-> ", prefixes[2+is_last] + "| ", prefixes[2+is_last] + " ")) # we have enough going on with diffing to make the top layer # a special case for name, line in zip(names, lines[1:-1]): print('%-*s %s%s' % ( widths[0], line[0], ' '.join('%*s' % (w, x) for w, x in zip(widths[1:], line[1:-1])), line[-1])) if name in table and depth > 1: recurse( table[name].children, depth-1, ("|-> ", "'-> ", "| ", " ")) print('%-*s %s%s' % ( widths[0], lines[-1][0], ' '.join('%*s' % (w, x) for w, x in zip(widths[1:], lines[-1][1:-1])), lines[-1][-1])) def annotate(Result, results, *, annotate=None, threshold=None, branches=False, caches=False, **args): # figure out the threshold if threshold is None: t0, t1 = THRESHOLD elif len(threshold) == 1: t0, t1 = threshold[0], threshold[0] else: t0, t1 = threshold t0, t1 = min(t0, t1), max(t0, t1) if not branches and not caches: tk = 'cycles' elif branches: tk = 'bmisses' else: tk = 'cmisses' # find max cycles max_ = max(it.chain((float(getattr(r, tk)) for r in results), [1])) for path in co.OrderedDict.fromkeys(r.file for r in results).keys(): # flatten to line info results = fold(Result, results, by=['file', 'line']) table = {r.line: r for r in results if r.file == path} # calculate spans to show if not annotate: spans = [] last = None func = None for line, r in sorted(table.items()): if float(getattr(r, tk)) / max_ >= t0: if last is not None and line - last.stop <= args['context']: last = range( last.start, line+1+args['context']) else: if last is not None: spans.append((last, func)) last = range( line-args['context'], line+1+args['context']) func = r.function if last is not None: spans.append((last, func)) with open(path) as f: skipped = False for i, line in enumerate(f): # skip lines not in spans? if not annotate and not any(i+1 in s for s, _ in spans): skipped = True continue if skipped: skipped = False print('%s@@ %s:%d: %s @@%s' % ( '\x1b[36m' if args['color'] else '', path, i+1, next(iter(f for _, f in spans)), '\x1b[m' if args['color'] else '')) # build line if line.endswith('\n'): line = line[:-1] r = table.get(i+1) if r is not None and ( float(r.cycles) > 0 if not branches and not caches else float(r.bmisses) > 0 or float(r.branches) > 0 if branches else float(r.cmisses) > 0 or float(r.caches) > 0): line = '%-*s // %s' % ( args['width'], line, '%s cycles' % r.cycles if not branches and not caches else '%s bmisses, %s branches' % (r.bmisses, r.branches) if branches else '%s cmisses, %s caches' % (r.cmisses, r.caches)) if args['color']: if float(getattr(r, tk)) / max_ >= t1: line = '\x1b[1;31m%s\x1b[m' % line elif float(getattr(r, tk)) / max_ >= t0: line = '\x1b[35m%s\x1b[m' % line print(line) def report(perf_paths, *, by=None, fields=None, defines=None, sort=None, branches=False, caches=False, **args): # figure out what color should be if args.get('color') == 'auto': args['color'] = sys.stdout.isatty() elif args.get('color') == 'always': args['color'] = True else: args['color'] = False # depth of 0 == m.inf if args.get('depth') == 0: args['depth'] = m.inf # find sizes if not args.get('use', None): results = collect(perf_paths, **args) else: results = [] with openio(args['use']) as f: reader = csv.DictReader(f, restval='') for r in reader: if not any('perf_'+k in r and r['perf_'+k].strip() for k in PerfResult._fields): continue try: results.append(PerfResult( **{k: r[k] for k in PerfResult._by if k in r and r[k].strip()}, **{k: r['perf_'+k] for k in PerfResult._fields if 'perf_'+k in r and r['perf_'+k].strip()})) except TypeError: pass # fold results = fold(PerfResult, results, by=by, defines=defines) # sort, note that python's sort is stable results.sort() if sort: for k, reverse in reversed(sort): results.sort( key=lambda r: tuple( (getattr(r, k),) if getattr(r, k) is not None else () for k in ([k] if k else PerfResult._sort)), reverse=reverse ^ (not k or k in PerfResult._fields)) # write results to CSV if args.get('output'): with openio(args['output'], 'w') as f: writer = csv.DictWriter(f, (by if by is not None else PerfResult._by) + ['perf_'+k for k in ( fields if fields is not None else PerfResult._fields)]) writer.writeheader() for r in results: writer.writerow( {k: getattr(r, k) for k in ( by if by is not None else PerfResult._by)} | {'perf_'+k: getattr(r, k) for k in ( fields if fields is not None else PerfResult._fields)}) # find previous results? if args.get('diff'): diff_results = [] try: with openio(args['diff']) as f: reader = csv.DictReader(f, restval='') for r in reader: if not any('perf_'+k in r and r['perf_'+k].strip() for k in PerfResult._fields): continue try: diff_results.append(PerfResult( **{k: r[k] for k in PerfResult._by if k in r and r[k].strip()}, **{k: r['perf_'+k] for k in PerfResult._fields if 'perf_'+k in r and r['perf_'+k].strip()})) except TypeError: pass except FileNotFoundError: pass # fold diff_results = fold(PerfResult, diff_results, by=by, defines=defines) # print table if not args.get('quiet'): if args.get('annotate') or args.get('threshold'): # annotate sources annotate(PerfResult, results, branches=branches, caches=caches, **args) else: # print table table(PerfResult, results, diff_results if args.get('diff') else None, by=by if by is not None else ['function'], fields=fields if fields is not None else ['cycles'] if not branches and not caches else ['bmisses', 'branches'] if branches else ['cmisses', 'caches'], sort=sort, **args) def main(**args): if args.get('record'): return record(**args) else: return report(**args) if __name__ == "__main__": import argparse import sys # bit of a hack, but parse_intermixed_args and REMAINDER are # incompatible, so we need to figure out what we want before running # argparse if '-R' in sys.argv or '--record' in sys.argv: nargs = argparse.REMAINDER else: nargs = '*' argparse.ArgumentParser._handle_conflict_ignore = lambda *_: None argparse._ArgumentGroup._handle_conflict_ignore = lambda *_: None parser = argparse.ArgumentParser( description="Aggregate and report Linux perf results.", allow_abbrev=False, conflict_handler='ignore') parser.add_argument( 'perf_paths', nargs=nargs, help="Input *.perf files.") parser.add_argument( '-v', '--verbose', action='store_true', help="Output commands that run behind the scenes.") parser.add_argument( '-q', '--quiet', action='store_true', help="Don't show anything, useful with -o.") parser.add_argument( '-o', '--output', help="Specify CSV file to store results.") parser.add_argument( '-u', '--use', help="Don't parse anything, use this CSV file.") parser.add_argument( '-d', '--diff', help="Specify CSV file to diff against.") parser.add_argument( '-a', '--all', action='store_true', help="Show all, not just the ones that changed.") parser.add_argument( '-p', '--percent', action='store_true', help="Only show percentage change, not a full diff.") parser.add_argument( '-b', '--by', action='append', choices=PerfResult._by, help="Group by this field.") parser.add_argument( '-f', '--field', dest='fields', action='append', choices=PerfResult._fields, help="Show this field.") parser.add_argument( '-D', '--define', dest='defines', action='append', type=lambda x: (lambda k,v: (k, set(v.split(','))))(*x.split('=', 1)), help="Only include results where this field is this value.") class AppendSort(argparse.Action): def __call__(self, parser, namespace, value, option): if namespace.sort is None: namespace.sort = [] namespace.sort.append((value, True if option == '-S' else False)) parser.add_argument( '-s', '--sort', nargs='?', action=AppendSort, help="Sort by this field.") parser.add_argument( '-S', '--reverse-sort', nargs='?', action=AppendSort, help="Sort by this field, but backwards.") parser.add_argument( '-Y', '--summary', action='store_true', help="Only show the total.") parser.add_argument( '-F', '--source', dest='sources', action='append', help="Only consider definitions in this file. Defaults to anything " "in the current directory.") parser.add_argument( '--everything', action='store_true', help="Include builtin and libc specific symbols.") parser.add_argument( '--branches', action='store_true', help="Show branches and branch misses.") parser.add_argument( '--caches', action='store_true', help="Show cache accesses and cache misses.") parser.add_argument( '-P', '--propagate', type=lambda x: int(x, 0), help="Depth to propagate samples up the call-stack. 0 propagates up " "to the entry point, 1 does no propagation. Defaults to 0.") parser.add_argument( '-Z', '--depth', nargs='?', type=lambda x: int(x, 0), const=0, help="Depth of function calls to show. 0 shows all calls but may not " "terminate!") parser.add_argument( '-A', '--annotate', action='store_true', help="Show source files annotated with coverage info.") parser.add_argument( '-T', '--threshold', nargs='?', type=lambda x: tuple(float(x) for x in x.split(',')), const=THRESHOLD, help="Show lines with samples above this threshold as a percent of " "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD)) parser.add_argument( '-c', '--context', type=lambda x: int(x, 0), default=3, help="Show n additional lines of context. Defaults to 3.") parser.add_argument( '-W', '--width', type=lambda x: int(x, 0), default=80, help="Assume source is styled with this many columns. Defaults to 80.") parser.add_argument( '--color', choices=['never', 'always', 'auto'], default='auto', help="When to use terminal colors. Defaults to 'auto'.") parser.add_argument( '-j', '--jobs', nargs='?', type=lambda x: int(x, 0), const=0, help="Number of processes to use. 0 spawns one process per core.") parser.add_argument( '--perf-path', type=lambda x: x.split(), help="Path to the perf executable, may include flags. " "Defaults to %r." % PERF_PATH) parser.add_argument( '--objdump-path', type=lambda x: x.split(), default=OBJDUMP_PATH, help="Path to the objdump executable, may include flags. " "Defaults to %r." % OBJDUMP_PATH) # record flags record_parser = parser.add_argument_group('record options') record_parser.add_argument( 'command', nargs=nargs, help="Command to run.") record_parser.add_argument( '-R', '--record', action='store_true', help="Run a command and aggregate perf measurements.") record_parser.add_argument( '-o', '--output', help="Output file. Uses flock to synchronize. This is stored as a " "zip-file of multiple perf results.") record_parser.add_argument( '--perf-freq', help="perf sampling frequency. This is passed directly to perf. " "Defaults to %r." % PERF_FREQ) record_parser.add_argument( '--perf-period', help="perf sampling period. This is passed directly to perf.") record_parser.add_argument( '--perf-events', help="perf events to record. This is passed directly to perf. " "Defaults to %r." % PERF_EVENTS) record_parser.add_argument( '--perf-path', type=lambda x: x.split(), help="Path to the perf executable, may include flags. " "Defaults to %r." % PERF_PATH) # avoid intermixed/REMAINDER conflict, see above if nargs == argparse.REMAINDER: args = parser.parse_args() else: args = parser.parse_intermixed_args() # perf_paths/command overlap, so need to do some munging here args.command = args.perf_paths if args.record: if not args.command: print('error: no command specified?') sys.exit(-1) if not args.output: print('error: no output file specified?') sys.exit(-1) sys.exit(main(**{k: v for k, v in vars(args).items() if v is not None}))