forked from OSchip/llvm-project
				
			
		
			
				
	
	
		
			457 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
			
		
		
	
	
			457 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
#!/usr/bin/env python
 | 
						|
 | 
						|
"""
 | 
						|
CmpRuns - A simple tool for comparing two static analyzer runs to determine
 | 
						|
which reports have been added, removed, or changed.
 | 
						|
 | 
						|
This is designed to support automated testing using the static analyzer, from
 | 
						|
two perspectives:
 | 
						|
  1. To monitor changes in the static analyzer's reports on real code bases,
 | 
						|
     for regression testing.
 | 
						|
 | 
						|
  2. For use by end users who want to integrate regular static analyzer testing
 | 
						|
     into a buildbot like environment.
 | 
						|
 | 
						|
Usage:
 | 
						|
 | 
						|
    # Load the results of both runs, to obtain lists of the corresponding
 | 
						|
    # AnalysisDiagnostic objects.
 | 
						|
    #
 | 
						|
    resultsA = loadResultsFromSingleRun(singleRunInfoA, deleteEmpty)
 | 
						|
    resultsB = loadResultsFromSingleRun(singleRunInfoB, deleteEmpty)
 | 
						|
 | 
						|
    # Generate a relation from diagnostics in run A to diagnostics in run B
 | 
						|
    # to obtain a list of triples (a, b, confidence).
 | 
						|
    diff = compareResults(resultsA, resultsB)
 | 
						|
 | 
						|
"""
 | 
						|
from __future__ import division, print_function
 | 
						|
 | 
						|
from collections import defaultdict
 | 
						|
 | 
						|
from math import log
 | 
						|
from optparse import OptionParser
 | 
						|
import json
 | 
						|
import os
 | 
						|
import plistlib
 | 
						|
import re
 | 
						|
import sys
 | 
						|
 | 
						|
STATS_REGEXP = re.compile(r"Statistics: (\{.+\})", re.MULTILINE | re.DOTALL)
 | 
						|
 | 
						|
class Colors(object):
 | 
						|
    """
 | 
						|
    Color for terminal highlight.
 | 
						|
    """
 | 
						|
    RED = '\x1b[2;30;41m'
 | 
						|
    GREEN = '\x1b[6;30;42m'
 | 
						|
    CLEAR = '\x1b[0m'
 | 
						|
 | 
						|
# Information about analysis run:
 | 
						|
# path - the analysis output directory
 | 
						|
# root - the name of the root directory, which will be disregarded when
 | 
						|
# determining the source file name
 | 
						|
class SingleRunInfo(object):
 | 
						|
    def __init__(self, path, root="", verboseLog=None):
 | 
						|
        self.path = path
 | 
						|
        self.root = root.rstrip("/\\")
 | 
						|
        self.verboseLog = verboseLog
 | 
						|
 | 
						|
 | 
						|
class AnalysisDiagnostic(object):
 | 
						|
    def __init__(self, data, report, htmlReport):
 | 
						|
        self._data = data
 | 
						|
        self._loc = self._data['location']
 | 
						|
        self._report = report
 | 
						|
        self._htmlReport = htmlReport
 | 
						|
        self._reportSize = len(self._data['path'])
 | 
						|
 | 
						|
    def getFileName(self):
 | 
						|
        root = self._report.run.root
 | 
						|
        fileName = self._report.files[self._loc['file']]
 | 
						|
        if fileName.startswith(root) and len(root) > 0:
 | 
						|
            return fileName[len(root) + 1:]
 | 
						|
        return fileName
 | 
						|
 | 
						|
    def getRootFileName(self):
 | 
						|
        path = self._data['path']
 | 
						|
        if not path:
 | 
						|
            return self.getFileName()
 | 
						|
        p = path[0]
 | 
						|
        if 'location' in p:
 | 
						|
            fIdx = p['location']['file']
 | 
						|
        else: # control edge
 | 
						|
            fIdx = path[0]['edges'][0]['start'][0]['file']
 | 
						|
        out = self._report.files[fIdx]
 | 
						|
        root = self._report.run.root
 | 
						|
        if out.startswith(root):
 | 
						|
            return out[len(root):]
 | 
						|
        return out
 | 
						|
 | 
						|
    def getLine(self):
 | 
						|
        return self._loc['line']
 | 
						|
 | 
						|
    def getColumn(self):
 | 
						|
        return self._loc['col']
 | 
						|
 | 
						|
    def getPathLength(self):
 | 
						|
        return self._reportSize
 | 
						|
 | 
						|
    def getCategory(self):
 | 
						|
        return self._data['category']
 | 
						|
 | 
						|
    def getDescription(self):
 | 
						|
        return self._data['description']
 | 
						|
 | 
						|
    def getIssueIdentifier(self):
 | 
						|
        id = self.getFileName() + "+"
 | 
						|
        if 'issue_context' in self._data:
 | 
						|
            id += self._data['issue_context'] + "+"
 | 
						|
        if 'issue_hash_content_of_line_in_context' in self._data:
 | 
						|
            id += str(self._data['issue_hash_content_of_line_in_context'])
 | 
						|
        return id
 | 
						|
 | 
						|
    def getReport(self):
 | 
						|
        if self._htmlReport is None:
 | 
						|
            return " "
 | 
						|
        return os.path.join(self._report.run.path, self._htmlReport)
 | 
						|
 | 
						|
    def getReadableName(self):
 | 
						|
        if 'issue_context' in self._data:
 | 
						|
            funcnamePostfix = "#" + self._data['issue_context']
 | 
						|
        else:
 | 
						|
            funcnamePostfix = ""
 | 
						|
        rootFilename = self.getRootFileName()
 | 
						|
        fileName = self.getFileName()
 | 
						|
        if rootFilename != fileName:
 | 
						|
            filePrefix = "[%s] %s" % (rootFilename, fileName)
 | 
						|
        else:
 | 
						|
            filePrefix = rootFilename
 | 
						|
        return '%s%s:%d:%d, %s: %s' % (filePrefix,
 | 
						|
                                       funcnamePostfix,
 | 
						|
                                       self.getLine(),
 | 
						|
                                       self.getColumn(), self.getCategory(),
 | 
						|
                                       self.getDescription())
 | 
						|
 | 
						|
    # Note, the data format is not an API and may change from one analyzer
 | 
						|
    # version to another.
 | 
						|
    def getRawData(self):
 | 
						|
        return self._data
 | 
						|
 | 
						|
 | 
						|
class AnalysisReport(object):
 | 
						|
    def __init__(self, run, files):
 | 
						|
        self.run = run
 | 
						|
        self.files = files
 | 
						|
        self.diagnostics = []
 | 
						|
 | 
						|
 | 
						|
class AnalysisRun(object):
 | 
						|
    def __init__(self, info):
 | 
						|
        self.path = info.path
 | 
						|
        self.root = info.root
 | 
						|
        self.info = info
 | 
						|
        self.reports = []
 | 
						|
        # Cumulative list of all diagnostics from all the reports.
 | 
						|
        self.diagnostics = []
 | 
						|
        self.clang_version = None
 | 
						|
        self.stats = []
 | 
						|
 | 
						|
    def getClangVersion(self):
 | 
						|
        return self.clang_version
 | 
						|
 | 
						|
    def readSingleFile(self, p, deleteEmpty):
 | 
						|
        data = plistlib.readPlist(p)
 | 
						|
        if 'statistics' in data:
 | 
						|
            self.stats.append(json.loads(data['statistics']))
 | 
						|
            data.pop('statistics')
 | 
						|
 | 
						|
        # We want to retrieve the clang version even if there are no
 | 
						|
        # reports. Assume that all reports were created using the same
 | 
						|
        # clang version (this is always true and is more efficient).
 | 
						|
        if 'clang_version' in data:
 | 
						|
            if self.clang_version is None:
 | 
						|
                self.clang_version = data.pop('clang_version')
 | 
						|
            else:
 | 
						|
                data.pop('clang_version')
 | 
						|
 | 
						|
        # Ignore/delete empty reports.
 | 
						|
        if not data['files']:
 | 
						|
            if deleteEmpty:
 | 
						|
                os.remove(p)
 | 
						|
            return
 | 
						|
 | 
						|
        # Extract the HTML reports, if they exists.
 | 
						|
        if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
 | 
						|
            htmlFiles = []
 | 
						|
            for d in data['diagnostics']:
 | 
						|
                # FIXME: Why is this named files, when does it have multiple
 | 
						|
                # files?
 | 
						|
                assert len(d['HTMLDiagnostics_files']) == 1
 | 
						|
                htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
 | 
						|
        else:
 | 
						|
            htmlFiles = [None] * len(data['diagnostics'])
 | 
						|
 | 
						|
        report = AnalysisReport(self, data.pop('files'))
 | 
						|
        diagnostics = [AnalysisDiagnostic(d, report, h)
 | 
						|
                       for d, h in zip(data.pop('diagnostics'), htmlFiles)]
 | 
						|
 | 
						|
        assert not data
 | 
						|
 | 
						|
        report.diagnostics.extend(diagnostics)
 | 
						|
        self.reports.append(report)
 | 
						|
        self.diagnostics.extend(diagnostics)
 | 
						|
 | 
						|
 | 
						|
def loadResults(path, opts, root="", deleteEmpty=True):
 | 
						|
    """
 | 
						|
    Backwards compatibility API.
 | 
						|
    """
 | 
						|
    return loadResultsFromSingleRun(SingleRunInfo(path, root, opts.verboseLog),
 | 
						|
                                    deleteEmpty)
 | 
						|
 | 
						|
 | 
						|
def loadResultsFromSingleRun(info, deleteEmpty=True):
 | 
						|
    """
 | 
						|
    # Load results of the analyzes from a given output folder.
 | 
						|
    # - info is the SingleRunInfo object
 | 
						|
    # - deleteEmpty specifies if the empty plist files should be deleted
 | 
						|
 | 
						|
    """
 | 
						|
    path = info.path
 | 
						|
    run = AnalysisRun(info)
 | 
						|
 | 
						|
    if os.path.isfile(path):
 | 
						|
        run.readSingleFile(path, deleteEmpty)
 | 
						|
    else:
 | 
						|
        for (dirpath, dirnames, filenames) in os.walk(path):
 | 
						|
            for f in filenames:
 | 
						|
                if (not f.endswith('plist')):
 | 
						|
                    continue
 | 
						|
                p = os.path.join(dirpath, f)
 | 
						|
                run.readSingleFile(p, deleteEmpty)
 | 
						|
 | 
						|
    return run
 | 
						|
 | 
						|
 | 
						|
def cmpAnalysisDiagnostic(d):
 | 
						|
    return d.getIssueIdentifier()
 | 
						|
 | 
						|
 | 
						|
def compareResults(A, B, opts):
 | 
						|
    """
 | 
						|
    compareResults - Generate a relation from diagnostics in run A to
 | 
						|
    diagnostics in run B.
 | 
						|
 | 
						|
    The result is the relation as a list of triples (a, b) where
 | 
						|
    each element {a,b} is None or a matching element from the respective run
 | 
						|
    """
 | 
						|
 | 
						|
    res = []
 | 
						|
 | 
						|
    # Map size_before -> size_after
 | 
						|
    path_difference_data = []
 | 
						|
 | 
						|
    # Quickly eliminate equal elements.
 | 
						|
    neqA = []
 | 
						|
    neqB = []
 | 
						|
    eltsA = list(A.diagnostics)
 | 
						|
    eltsB = list(B.diagnostics)
 | 
						|
    eltsA.sort(key=cmpAnalysisDiagnostic)
 | 
						|
    eltsB.sort(key=cmpAnalysisDiagnostic)
 | 
						|
    while eltsA and eltsB:
 | 
						|
        a = eltsA.pop()
 | 
						|
        b = eltsB.pop()
 | 
						|
        if (a.getIssueIdentifier() == b.getIssueIdentifier()):
 | 
						|
            if a.getPathLength() != b.getPathLength():
 | 
						|
                if opts.relative_path_histogram:
 | 
						|
                    path_difference_data.append(
 | 
						|
                        float(a.getPathLength()) / b.getPathLength())
 | 
						|
                elif opts.relative_log_path_histogram:
 | 
						|
                    path_difference_data.append(
 | 
						|
                        log(float(a.getPathLength()) / b.getPathLength()))
 | 
						|
                elif opts.absolute_path_histogram:
 | 
						|
                    path_difference_data.append(
 | 
						|
                        a.getPathLength() - b.getPathLength())
 | 
						|
 | 
						|
            res.append((a, b))
 | 
						|
        elif a.getIssueIdentifier() > b.getIssueIdentifier():
 | 
						|
            eltsB.append(b)
 | 
						|
            neqA.append(a)
 | 
						|
        else:
 | 
						|
            eltsA.append(a)
 | 
						|
            neqB.append(b)
 | 
						|
    neqA.extend(eltsA)
 | 
						|
    neqB.extend(eltsB)
 | 
						|
 | 
						|
    # FIXME: Add fuzzy matching. One simple and possible effective idea would
 | 
						|
    # be to bin the diagnostics, print them in a normalized form (based solely
 | 
						|
    # on the structure of the diagnostic), compute the diff, then use that as
 | 
						|
    # the basis for matching. This has the nice property that we don't depend
 | 
						|
    # in any way on the diagnostic format.
 | 
						|
 | 
						|
    for a in neqA:
 | 
						|
        res.append((a, None))
 | 
						|
    for b in neqB:
 | 
						|
        res.append((None, b))
 | 
						|
 | 
						|
    if opts.relative_log_path_histogram or opts.relative_path_histogram or \
 | 
						|
            opts.absolute_path_histogram:
 | 
						|
        from matplotlib import pyplot
 | 
						|
        pyplot.hist(path_difference_data, bins=100)
 | 
						|
        pyplot.show()
 | 
						|
 | 
						|
    return res
 | 
						|
 | 
						|
def computePercentile(l, percentile):
 | 
						|
    """
 | 
						|
    Return computed percentile.
 | 
						|
    """
 | 
						|
    return sorted(l)[int(round(percentile * len(l) + 0.5)) - 1]
 | 
						|
 | 
						|
def deriveStats(results):
 | 
						|
    # Assume all keys are the same in each statistics bucket.
 | 
						|
    combined_data = defaultdict(list)
 | 
						|
 | 
						|
    # Collect data on paths length.
 | 
						|
    for report in results.reports:
 | 
						|
        for diagnostic in report.diagnostics:
 | 
						|
            combined_data['PathsLength'].append(diagnostic.getPathLength())
 | 
						|
 | 
						|
    for stat in results.stats:
 | 
						|
        for key, value in stat.items():
 | 
						|
            combined_data[key].append(value)
 | 
						|
    combined_stats = {}
 | 
						|
    for key, values in combined_data.items():
 | 
						|
        combined_stats[str(key)] = {
 | 
						|
            "max": max(values),
 | 
						|
            "min": min(values),
 | 
						|
            "mean": sum(values) / len(values),
 | 
						|
            "90th %tile": computePercentile(values, 0.9),
 | 
						|
            "95th %tile": computePercentile(values, 0.95),
 | 
						|
            "median": sorted(values)[len(values) // 2],
 | 
						|
            "total": sum(values)
 | 
						|
        }
 | 
						|
    return combined_stats
 | 
						|
 | 
						|
 | 
						|
def compareStats(resultsA, resultsB):
 | 
						|
    statsA = deriveStats(resultsA)
 | 
						|
    statsB = deriveStats(resultsB)
 | 
						|
    keys = sorted(statsA.keys())
 | 
						|
    for key in keys:
 | 
						|
        print(key)
 | 
						|
        for kkey in statsA[key]:
 | 
						|
            valA = float(statsA[key][kkey])
 | 
						|
            valB = float(statsB[key][kkey])
 | 
						|
            report = "%.3f -> %.3f" % (valA, valB)
 | 
						|
            # Only apply highlighting when writing to TTY and it's not Windows
 | 
						|
            if sys.stdout.isatty() and os.name != 'nt':
 | 
						|
                if valB != 0:
 | 
						|
                    ratio = (valB - valA) / valB
 | 
						|
                    if ratio < -0.2:
 | 
						|
                        report = Colors.GREEN + report + Colors.CLEAR
 | 
						|
                    elif ratio > 0.2:
 | 
						|
                        report = Colors.RED + report + Colors.CLEAR
 | 
						|
            print("\t %s %s" % (kkey, report))
 | 
						|
 | 
						|
def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True,
 | 
						|
                             Stdout=sys.stdout):
 | 
						|
    # Load the run results.
 | 
						|
    resultsA = loadResults(dirA, opts, opts.rootA, deleteEmpty)
 | 
						|
    resultsB = loadResults(dirB, opts, opts.rootB, deleteEmpty)
 | 
						|
    if opts.show_stats:
 | 
						|
        compareStats(resultsA, resultsB)
 | 
						|
    if opts.stats_only:
 | 
						|
        return
 | 
						|
 | 
						|
    # Open the verbose log, if given.
 | 
						|
    if opts.verboseLog:
 | 
						|
        auxLog = open(opts.verboseLog, "wb")
 | 
						|
    else:
 | 
						|
        auxLog = None
 | 
						|
 | 
						|
    diff = compareResults(resultsA, resultsB, opts)
 | 
						|
    foundDiffs = 0
 | 
						|
    totalAdded = 0
 | 
						|
    totalRemoved = 0
 | 
						|
    for res in diff:
 | 
						|
        a, b = res
 | 
						|
        if a is None:
 | 
						|
            Stdout.write("ADDED: %r\n" % b.getReadableName())
 | 
						|
            foundDiffs += 1
 | 
						|
            totalAdded += 1
 | 
						|
            if auxLog:
 | 
						|
                auxLog.write("('ADDED', %r, %r)\n" % (b.getReadableName(),
 | 
						|
                                                      b.getReport()))
 | 
						|
        elif b is None:
 | 
						|
            Stdout.write("REMOVED: %r\n" % a.getReadableName())
 | 
						|
            foundDiffs += 1
 | 
						|
            totalRemoved += 1
 | 
						|
            if auxLog:
 | 
						|
                auxLog.write("('REMOVED', %r, %r)\n" % (a.getReadableName(),
 | 
						|
                                                        a.getReport()))
 | 
						|
        else:
 | 
						|
            pass
 | 
						|
 | 
						|
    TotalReports = len(resultsB.diagnostics)
 | 
						|
    Stdout.write("TOTAL REPORTS: %r\n" % TotalReports)
 | 
						|
    Stdout.write("TOTAL ADDED: %r\n" % totalAdded)
 | 
						|
    Stdout.write("TOTAL REMOVED: %r\n" % totalRemoved)
 | 
						|
    if auxLog:
 | 
						|
        auxLog.write("('TOTAL NEW REPORTS', %r)\n" % TotalReports)
 | 
						|
        auxLog.write("('TOTAL DIFFERENCES', %r)\n" % foundDiffs)
 | 
						|
        auxLog.close()
 | 
						|
 | 
						|
    return foundDiffs, len(resultsA.diagnostics), len(resultsB.diagnostics)
 | 
						|
 | 
						|
def generate_option_parser():
 | 
						|
    parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
 | 
						|
    parser.add_option("", "--rootA", dest="rootA",
 | 
						|
                      help="Prefix to ignore on source files for directory A",
 | 
						|
                      action="store", type=str, default="")
 | 
						|
    parser.add_option("", "--rootB", dest="rootB",
 | 
						|
                      help="Prefix to ignore on source files for directory B",
 | 
						|
                      action="store", type=str, default="")
 | 
						|
    parser.add_option("", "--verbose-log", dest="verboseLog",
 | 
						|
                      help="Write additional information to LOG \
 | 
						|
                           [default=None]",
 | 
						|
                      action="store", type=str, default=None,
 | 
						|
                      metavar="LOG")
 | 
						|
    parser.add_option("--relative-path-differences-histogram",
 | 
						|
                      action="store_true", dest="relative_path_histogram",
 | 
						|
                      default=False,
 | 
						|
                      help="Show histogram of relative paths differences. \
 | 
						|
                            Requires matplotlib")
 | 
						|
    parser.add_option("--relative-log-path-differences-histogram",
 | 
						|
                      action="store_true", dest="relative_log_path_histogram",
 | 
						|
                      default=False,
 | 
						|
                      help="Show histogram of log relative paths differences. \
 | 
						|
                            Requires matplotlib")
 | 
						|
    parser.add_option("--absolute-path-differences-histogram",
 | 
						|
                      action="store_true", dest="absolute_path_histogram",
 | 
						|
                      default=False,
 | 
						|
                      help="Show histogram of absolute paths differences. \
 | 
						|
                            Requires matplotlib")
 | 
						|
    parser.add_option("--stats-only", action="store_true", dest="stats_only",
 | 
						|
                      default=False, help="Only show statistics on reports")
 | 
						|
    parser.add_option("--show-stats", action="store_true", dest="show_stats",
 | 
						|
                      default=False, help="Show change in statistics")
 | 
						|
    return parser
 | 
						|
 | 
						|
 | 
						|
def main():
 | 
						|
    parser = generate_option_parser()
 | 
						|
    (opts, args) = parser.parse_args()
 | 
						|
 | 
						|
    if len(args) != 2:
 | 
						|
        parser.error("invalid number of arguments")
 | 
						|
 | 
						|
    dirA, dirB = args
 | 
						|
 | 
						|
    dumpScanBuildResultsDiff(dirA, dirB, opts)
 | 
						|
 | 
						|
 | 
						|
if __name__ == '__main__':
 | 
						|
    main()
 |