229 lines
		
	
	
		
			7.3 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			229 lines
		
	
	
		
			7.3 KiB
		
	
	
	
		
			Python
		
	
	
	
# Given a path to llvm-objdump and a directory tree, spider the directory tree
 | 
						|
# dumping every object file encountered with correct options needed to demangle
 | 
						|
# symbols in the object file, and collect statistics about failed / crashed
 | 
						|
# demanglings.  Useful for stress testing the demangler against a large corpus
 | 
						|
# of inputs.
 | 
						|
 | 
						|
from __future__ import print_function
 | 
						|
 | 
						|
import argparse
 | 
						|
import functools
 | 
						|
import os
 | 
						|
import re
 | 
						|
import sys
 | 
						|
import subprocess
 | 
						|
import traceback
 | 
						|
from multiprocessing import Pool
 | 
						|
import multiprocessing
 | 
						|
 | 
						|
args = None
 | 
						|
 | 
						|
def parse_line(line):
 | 
						|
    question = line.find('?')
 | 
						|
    if question == -1:
 | 
						|
        return None, None
 | 
						|
 | 
						|
    open_paren = line.find('(', question)
 | 
						|
    if open_paren == -1:
 | 
						|
        return None, None
 | 
						|
    close_paren = line.rfind(')', open_paren)
 | 
						|
    if open_paren == -1:
 | 
						|
        return None, None
 | 
						|
    mangled = line[question : open_paren]
 | 
						|
    demangled = line[open_paren+1 : close_paren]
 | 
						|
    return mangled.strip(), demangled.strip()
 | 
						|
 | 
						|
class Result(object):
 | 
						|
    def __init__(self):
 | 
						|
        self.crashed = []
 | 
						|
        self.file = None
 | 
						|
        self.nsymbols = 0
 | 
						|
        self.errors = set()
 | 
						|
        self.nfiles = 0
 | 
						|
 | 
						|
class MapContext(object):
 | 
						|
    def __init__(self):
 | 
						|
        self.rincomplete = None
 | 
						|
        self.rcumulative = Result()
 | 
						|
        self.pending_objs = []
 | 
						|
        self.npending = 0
 | 
						|
 | 
						|
def process_file(path, objdump):
 | 
						|
    r = Result()
 | 
						|
    r.file = path
 | 
						|
 | 
						|
    popen_args = [objdump, '-t', '-demangle', path]
 | 
						|
    p = subprocess.Popen(popen_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 | 
						|
    stdout, stderr = p.communicate()
 | 
						|
    if p.returncode != 0:
 | 
						|
        r.crashed = [r.file]
 | 
						|
        return r
 | 
						|
 | 
						|
    output = stdout.decode('utf-8')
 | 
						|
 | 
						|
    for line in output.splitlines():
 | 
						|
        mangled, demangled = parse_line(line)
 | 
						|
        if mangled is None:
 | 
						|
            continue
 | 
						|
        r.nsymbols += 1
 | 
						|
        if "invalid mangled name" in demangled:
 | 
						|
            r.errors.add(mangled)
 | 
						|
    return r
 | 
						|
 | 
						|
def add_results(r1, r2):
 | 
						|
    r1.crashed.extend(r2.crashed)
 | 
						|
    r1.errors.update(r2.errors)
 | 
						|
    r1.nsymbols += r2.nsymbols
 | 
						|
    r1.nfiles += r2.nfiles
 | 
						|
 | 
						|
def print_result_row(directory, result):
 | 
						|
    print("[{0} files, {1} crashes, {2} errors, {3} symbols]: '{4}'".format(
 | 
						|
        result.nfiles, len(result.crashed), len(result.errors), result.nsymbols, directory))
 | 
						|
 | 
						|
def process_one_chunk(pool, chunk_size, objdump, context):
 | 
						|
    objs = []
 | 
						|
 | 
						|
    incomplete = False
 | 
						|
    dir_results = {}
 | 
						|
    ordered_dirs = []
 | 
						|
    while context.npending > 0 and len(objs) < chunk_size:
 | 
						|
        this_dir = context.pending_objs[0][0]
 | 
						|
        ordered_dirs.append(this_dir)
 | 
						|
        re = Result()
 | 
						|
        if context.rincomplete is not None:
 | 
						|
            re = context.rincomplete
 | 
						|
            context.rincomplete = None
 | 
						|
 | 
						|
        dir_results[this_dir] = re
 | 
						|
        re.file = this_dir
 | 
						|
 | 
						|
        nneeded = chunk_size - len(objs)
 | 
						|
        objs_this_dir = context.pending_objs[0][1]
 | 
						|
        navail = len(objs_this_dir)
 | 
						|
        ntaken = min(nneeded, navail)
 | 
						|
        objs.extend(objs_this_dir[0:ntaken])
 | 
						|
        remaining_objs_this_dir = objs_this_dir[ntaken:]
 | 
						|
        context.pending_objs[0] = (context.pending_objs[0][0], remaining_objs_this_dir)
 | 
						|
        context.npending -= ntaken
 | 
						|
        if ntaken == navail:
 | 
						|
            context.pending_objs.pop(0)
 | 
						|
        else:
 | 
						|
            incomplete = True
 | 
						|
 | 
						|
        re.nfiles += ntaken
 | 
						|
 | 
						|
    assert(len(objs) == chunk_size or context.npending == 0)
 | 
						|
 | 
						|
    copier = functools.partial(process_file, objdump=objdump)
 | 
						|
    mapped_results = list(pool.map(copier, objs))
 | 
						|
 | 
						|
    for mr in mapped_results:
 | 
						|
        result_dir = os.path.dirname(mr.file)
 | 
						|
        result_entry = dir_results[result_dir]
 | 
						|
        add_results(result_entry, mr)
 | 
						|
 | 
						|
    # It's only possible that a single item is incomplete, and it has to be the
 | 
						|
    # last item.
 | 
						|
    if incomplete:
 | 
						|
        context.rincomplete = dir_results[ordered_dirs[-1]]
 | 
						|
        ordered_dirs.pop()
 | 
						|
 | 
						|
    # Now ordered_dirs contains a list of all directories which *did* complete.
 | 
						|
    for c in ordered_dirs:
 | 
						|
        re = dir_results[c]
 | 
						|
        add_results(context.rcumulative, re)
 | 
						|
        print_result_row(c, re)
 | 
						|
 | 
						|
def process_pending_files(pool, chunk_size, objdump, context):
 | 
						|
    while context.npending >= chunk_size:
 | 
						|
        process_one_chunk(pool, chunk_size, objdump, context)
 | 
						|
 | 
						|
def go():
 | 
						|
    global args
 | 
						|
 | 
						|
    obj_dir = args.dir
 | 
						|
    extensions = args.extensions.split(',')
 | 
						|
    extensions = [x if x[0] == '.' else '.' + x for x in extensions]
 | 
						|
 | 
						|
 | 
						|
    pool_size = 48
 | 
						|
    pool = Pool(processes=pool_size)
 | 
						|
 | 
						|
    try:
 | 
						|
        nfiles = 0
 | 
						|
        context = MapContext()
 | 
						|
 | 
						|
        for root, dirs, files in os.walk(obj_dir):
 | 
						|
            root = os.path.normpath(root)
 | 
						|
            pending = []
 | 
						|
            for f in files:
 | 
						|
                file, ext = os.path.splitext(f)
 | 
						|
                if not ext in extensions:
 | 
						|
                    continue
 | 
						|
 | 
						|
                nfiles += 1
 | 
						|
                full_path = os.path.join(root, f)
 | 
						|
                full_path = os.path.normpath(full_path)
 | 
						|
                pending.append(full_path)
 | 
						|
 | 
						|
            # If this directory had no object files, just print a default
 | 
						|
            # status line and continue with the next dir
 | 
						|
            if len(pending) == 0:
 | 
						|
                print_result_row(root, Result())
 | 
						|
                continue
 | 
						|
 | 
						|
            context.npending += len(pending)
 | 
						|
            context.pending_objs.append((root, pending))
 | 
						|
            # Drain the tasks, `pool_size` at a time, until we have less than
 | 
						|
            # `pool_size` tasks remaining.
 | 
						|
            process_pending_files(pool, pool_size, args.objdump, context)
 | 
						|
 | 
						|
        assert(context.npending < pool_size);
 | 
						|
        process_one_chunk(pool, pool_size, args.objdump, context)
 | 
						|
 | 
						|
        total = context.rcumulative
 | 
						|
        nfailed = len(total.errors)
 | 
						|
        nsuccess = total.nsymbols - nfailed
 | 
						|
        ncrashed = len(total.crashed)
 | 
						|
 | 
						|
        if (nfailed > 0):
 | 
						|
            print("Failures:")
 | 
						|
            for m in sorted(total.errors):
 | 
						|
                print("  " + m)
 | 
						|
        if (ncrashed > 0):
 | 
						|
            print("Crashes:")
 | 
						|
            for f in sorted(total.crashed):
 | 
						|
                print("  " + f)
 | 
						|
        print("Summary:")
 | 
						|
        spct = float(nsuccess)/float(total.nsymbols)
 | 
						|
        fpct = float(nfailed)/float(total.nsymbols)
 | 
						|
        cpct = float(ncrashed)/float(nfiles)
 | 
						|
        print("Processed {0} object files.".format(nfiles))
 | 
						|
        print("{0}/{1} symbols successfully demangled ({2:.4%})".format(nsuccess, total.nsymbols, spct))
 | 
						|
        print("{0} symbols could not be demangled ({1:.4%})".format(nfailed, fpct))
 | 
						|
        print("{0} files crashed while demangling ({1:.4%})".format(ncrashed, cpct))
 | 
						|
            
 | 
						|
    except:
 | 
						|
        traceback.print_exc()
 | 
						|
 | 
						|
    pool.close()
 | 
						|
    pool.join()
 | 
						|
 | 
						|
if __name__ == "__main__":
 | 
						|
    def_obj = 'obj' if sys.platform == 'win32' else 'o'
 | 
						|
 | 
						|
    parser = argparse.ArgumentParser(description='Demangle all symbols in a tree of object files, looking for failures.')
 | 
						|
    parser.add_argument('dir', type=str, help='the root directory at which to start crawling')
 | 
						|
    parser.add_argument('--objdump', type=str, help='path to llvm-objdump.  If not specified ' +
 | 
						|
                        'the tool is located as if by `which llvm-objdump`.')
 | 
						|
    parser.add_argument('--extensions', type=str, default=def_obj,
 | 
						|
                        help='comma separated list of extensions to demangle (e.g. `o,obj`).  ' +
 | 
						|
                        'By default this will be `obj` on Windows and `o` otherwise.')
 | 
						|
 | 
						|
    args = parser.parse_args()
 | 
						|
 | 
						|
 | 
						|
    multiprocessing.freeze_support()
 | 
						|
    go()
 | 
						|
 |