391 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
			
		
		
	
	
			391 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
| #!/usr/bin/env python
 | |
| #
 | |
| # This is a tool that works like debug location coverage calculator.
 | |
| # It parses the llvm-dwarfdump --statistics output by reporting it
 | |
| # in a more human readable way.
 | |
| #
 | |
| 
 | |
| from __future__ import print_function
 | |
| import argparse
 | |
| import os
 | |
| import sys
 | |
| from json import loads
 | |
| from math import ceil
 | |
| from collections import OrderedDict
 | |
| from subprocess import Popen, PIPE
 | |
| 
 | |
| # This special value has been used to mark statistics that overflowed.
 | |
| TAINT_VALUE = "tainted"
 | |
| 
 | |
| # Initialize the plot.
 | |
| def init_plot(plt):
 | |
|   plt.title('Debug Location Statistics', fontweight='bold')
 | |
|   plt.xlabel('location buckets')
 | |
|   plt.ylabel('number of variables in the location buckets')
 | |
|   plt.xticks(rotation=45, fontsize='x-small')
 | |
|   plt.yticks()
 | |
| 
 | |
| # Finalize the plot.
 | |
| def finish_plot(plt):
 | |
|   plt.legend()
 | |
|   plt.grid(color='grey', which='major', axis='y', linestyle='-', linewidth=0.3)
 | |
|   plt.savefig('locstats.png')
 | |
|   print('The plot was saved within "locstats.png".')
 | |
| 
 | |
| # Holds the debug location statistics.
 | |
| class LocationStats:
 | |
|   def __init__(self, file_name, variables_total, variables_total_locstats,
 | |
|     variables_with_loc, variables_scope_bytes_covered, variables_scope_bytes,
 | |
|     variables_coverage_map):
 | |
|     self.file_name = file_name
 | |
|     self.variables_total = variables_total
 | |
|     self.variables_total_locstats = variables_total_locstats
 | |
|     self.variables_with_loc = variables_with_loc
 | |
|     self.scope_bytes_covered = variables_scope_bytes_covered
 | |
|     self.scope_bytes = variables_scope_bytes
 | |
|     self.variables_coverage_map = variables_coverage_map
 | |
| 
 | |
|   # Get the PC ranges coverage.
 | |
|   def get_pc_coverage(self):
 | |
|     if self.scope_bytes_covered == TAINT_VALUE or \
 | |
|        self.scope_bytes == TAINT_VALUE:
 | |
|       return TAINT_VALUE
 | |
|     pc_ranges_covered = int(ceil(self.scope_bytes_covered * 100.0) \
 | |
|                 / self.scope_bytes)
 | |
|     return pc_ranges_covered
 | |
| 
 | |
|   # Pretty print the debug location buckets.
 | |
|   def pretty_print(self):
 | |
|     if self.scope_bytes == 0:
 | |
|       print ('No scope bytes found.')
 | |
|       return -1
 | |
| 
 | |
|     pc_ranges_covered = self.get_pc_coverage()
 | |
|     variables_coverage_per_map = {}
 | |
|     for cov_bucket in coverage_buckets():
 | |
|       variables_coverage_per_map[cov_bucket] = None
 | |
|       if self.variables_coverage_map[cov_bucket] == TAINT_VALUE or \
 | |
|          self.variables_total_locstats == TAINT_VALUE:
 | |
|         variables_coverage_per_map[cov_bucket] = TAINT_VALUE
 | |
|       else:
 | |
|         variables_coverage_per_map[cov_bucket] = \
 | |
|           int(ceil(self.variables_coverage_map[cov_bucket] * 100.0) \
 | |
|                    / self.variables_total_locstats)
 | |
| 
 | |
|     print (' =================================================')
 | |
|     print ('            Debug Location Statistics       ')
 | |
|     print (' =================================================')
 | |
|     print ('     cov%           samples         percentage(~)  ')
 | |
|     print (' -------------------------------------------------')
 | |
|     for cov_bucket in coverage_buckets():
 | |
|       if self.variables_coverage_map[cov_bucket] or \
 | |
|          self.variables_total_locstats == TAINT_VALUE:
 | |
|         print ('   {0:10}     {1:8}              {2:3}%'. \
 | |
|           format(cov_bucket, self.variables_coverage_map[cov_bucket], \
 | |
|                  variables_coverage_per_map[cov_bucket]))
 | |
|       else:
 | |
|         print ('   {0:10}     {1:8d}              {2:3d}%'. \
 | |
|           format(cov_bucket, self.variables_coverage_map[cov_bucket], \
 | |
|                  variables_coverage_per_map[cov_bucket]))
 | |
|     print (' =================================================')
 | |
|     print (' -the number of debug variables processed: ' \
 | |
|       + str(self.variables_total_locstats))
 | |
|     print (' -PC ranges covered: ' + str(pc_ranges_covered) + '%')
 | |
| 
 | |
|     # Only if we are processing all the variables output the total
 | |
|     # availability.
 | |
|     if self.variables_total and self.variables_with_loc:
 | |
|       total_availability = None
 | |
|       if self.variables_total == TAINT_VALUE or \
 | |
|          self.variables_with_loc == TAINT_VALUE:
 | |
|         total_availability = TAINT_VALUE
 | |
|       else:
 | |
|         total_availability = int(ceil(self.variables_with_loc * 100.0) \
 | |
|                                       / self.variables_total)
 | |
|       print (' -------------------------------------------------')
 | |
|       print (' -total availability: ' + str(total_availability) + '%')
 | |
|     print (' =================================================')
 | |
| 
 | |
|     return 0
 | |
| 
 | |
|   # Draw a plot representing the location buckets.
 | |
|   def draw_plot(self):
 | |
|     from matplotlib import pyplot as plt
 | |
| 
 | |
|     buckets = range(len(self.variables_coverage_map))
 | |
|     plt.figure(figsize=(12, 8))
 | |
|     init_plot(plt)
 | |
|     plt.bar(buckets, self.variables_coverage_map.values(), align='center',
 | |
|             tick_label=self.variables_coverage_map.keys(),
 | |
|             label='variables of {}'.format(self.file_name))
 | |
| 
 | |
|     # Place the text box with the coverage info.
 | |
|     pc_ranges_covered = self.get_pc_coverage()
 | |
|     props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
 | |
|     plt.text(0.02, 0.90, 'PC ranges covered: {}%'.format(pc_ranges_covered),
 | |
|              transform=plt.gca().transAxes, fontsize=12,
 | |
|              verticalalignment='top', bbox=props)
 | |
| 
 | |
|     finish_plot(plt)
 | |
| 
 | |
|   # Compare the two LocationStats objects and draw a plot showing
 | |
|   # the difference.
 | |
|   def draw_location_diff(self, locstats_to_compare):
 | |
|     from matplotlib import pyplot as plt
 | |
| 
 | |
|     pc_ranges_covered = self.get_pc_coverage()
 | |
|     pc_ranges_covered_to_compare = locstats_to_compare.get_pc_coverage()
 | |
| 
 | |
|     buckets = range(len(self.variables_coverage_map))
 | |
|     buckets_to_compare = range(len(locstats_to_compare.variables_coverage_map))
 | |
| 
 | |
|     fig = plt.figure(figsize=(12, 8))
 | |
|     ax = fig.add_subplot(111)
 | |
|     init_plot(plt)
 | |
| 
 | |
|     comparison_keys = list(coverage_buckets())
 | |
|     ax.bar(buckets, self.variables_coverage_map.values(), align='edge',
 | |
|            width=0.4,
 | |
|            label='variables of {}'.format(self.file_name))
 | |
|     ax.bar(buckets_to_compare,
 | |
|            locstats_to_compare.variables_coverage_map.values(),
 | |
|            color='r', align='edge', width=-0.4,
 | |
|            label='variables of {}'.format(locstats_to_compare.file_name))
 | |
|     ax.set_xticks(range(len(comparison_keys)))
 | |
|     ax.set_xticklabels(comparison_keys)
 | |
| 
 | |
|     props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
 | |
|     plt.text(0.02, 0.88,
 | |
|              '{} PC ranges covered: {}%'. \
 | |
|              format(self.file_name, pc_ranges_covered),
 | |
|              transform=plt.gca().transAxes, fontsize=12,
 | |
|              verticalalignment='top', bbox=props)
 | |
|     plt.text(0.02, 0.83,
 | |
|              '{} PC ranges covered: {}%'. \
 | |
|              format(locstats_to_compare.file_name,
 | |
|                     pc_ranges_covered_to_compare),
 | |
|              transform=plt.gca().transAxes, fontsize=12,
 | |
|              verticalalignment='top', bbox=props)
 | |
| 
 | |
|     finish_plot(plt)
 | |
| 
 | |
| # Define the location buckets.
 | |
| def coverage_buckets():
 | |
|   yield '0%'
 | |
|   yield '(0%,10%)'
 | |
|   for start in range(10, 91, 10):
 | |
|     yield '[{0}%,{1}%)'.format(start, start + 10)
 | |
|   yield '100%'
 | |
| 
 | |
| # Parse the JSON representing the debug statistics, and create a
 | |
| # LocationStats object.
 | |
| def parse_locstats(opts, binary):
 | |
|   # These will be different due to different options enabled.
 | |
|   variables_total = None
 | |
|   variables_total_locstats = None
 | |
|   variables_with_loc = None
 | |
|   variables_scope_bytes_covered = None
 | |
|   variables_scope_bytes = None
 | |
|   variables_scope_bytes_entry_values = None
 | |
|   variables_coverage_map = OrderedDict()
 | |
| 
 | |
|   # Get the directory of the LLVM tools.
 | |
|   llvm_dwarfdump_cmd = os.path.join(os.path.dirname(__file__), \
 | |
|                                     "llvm-dwarfdump")
 | |
|   # The statistics llvm-dwarfdump option.
 | |
|   llvm_dwarfdump_stats_opt = "--statistics"
 | |
| 
 | |
|   # Generate the stats with the llvm-dwarfdump.
 | |
|   subproc = Popen([llvm_dwarfdump_cmd, llvm_dwarfdump_stats_opt, binary], \
 | |
|                   stdin=PIPE, stdout=PIPE, stderr=PIPE, \
 | |
|                   universal_newlines = True)
 | |
|   cmd_stdout, cmd_stderr = subproc.communicate()
 | |
| 
 | |
|   # TODO: Handle errors that are coming from llvm-dwarfdump.
 | |
| 
 | |
|   # Get the JSON and parse it.
 | |
|   json_parsed = None
 | |
| 
 | |
|   try:
 | |
|     json_parsed = loads(cmd_stdout)
 | |
|   except:
 | |
|     print ('error: No valid llvm-dwarfdump statistics found.')
 | |
|     sys.exit(1)
 | |
| 
 | |
|   # TODO: Parse the statistics Version from JSON.
 | |
| 
 | |
|   def init_field(name):
 | |
|     if json_parsed[name] == 'overflowed':
 | |
|       print ('warning: "' + name + '" field overflowed.')
 | |
|       return TAINT_VALUE
 | |
|     return json_parsed[name]
 | |
| 
 | |
|   if opts.only_variables:
 | |
|     # Read the JSON only for local variables.
 | |
|     variables_total_locstats = \
 | |
|       init_field('#local vars processed by location statistics')
 | |
|     variables_scope_bytes_covered = \
 | |
|       init_field('sum_all_local_vars(#bytes in parent scope covered' \
 | |
|                   ' by DW_AT_location)')
 | |
|     variables_scope_bytes = \
 | |
|       init_field('sum_all_local_vars(#bytes in parent scope)')
 | |
|     if not opts.ignore_debug_entry_values:
 | |
|       for cov_bucket in coverage_buckets():
 | |
|         cov_category = "#local vars with {} of parent scope covered " \
 | |
|                        "by DW_AT_location".format(cov_bucket)
 | |
|         variables_coverage_map[cov_bucket] = init_field(cov_category)
 | |
|     else:
 | |
|       variables_scope_bytes_entry_values = \
 | |
|         init_field('sum_all_local_vars(#bytes in parent scope ' \
 | |
|                     'covered by DW_OP_entry_value)')
 | |
|       if variables_scope_bytes_covered != TAINT_VALUE and \
 | |
|          variables_scope_bytes_entry_values != TAINT_VALUE:
 | |
|         variables_scope_bytes_covered = variables_scope_bytes_covered \
 | |
|            - variables_scope_bytes_entry_values
 | |
|       for cov_bucket in coverage_buckets():
 | |
|         cov_category = \
 | |
|           "#local vars - entry values with {} of parent scope " \
 | |
|           "covered by DW_AT_location".format(cov_bucket)
 | |
|         variables_coverage_map[cov_bucket] = init_field(cov_category)
 | |
|   elif opts.only_formal_parameters:
 | |
|     # Read the JSON only for formal parameters.
 | |
|     variables_total_locstats = \
 | |
|       init_field('#params processed by location statistics')
 | |
|     variables_scope_bytes_covered = \
 | |
|       init_field('sum_all_params(#bytes in parent scope covered ' \
 | |
|                   'by DW_AT_location)')
 | |
|     variables_scope_bytes = \
 | |
|       init_field('sum_all_params(#bytes in parent scope)')
 | |
|     if not opts.ignore_debug_entry_values:
 | |
|       for cov_bucket in coverage_buckets():
 | |
|         cov_category = "#params with {} of parent scope covered " \
 | |
|                        "by DW_AT_location".format(cov_bucket)
 | |
|         variables_coverage_map[cov_bucket] = init_field(cov_category)
 | |
|     else:
 | |
|       variables_scope_bytes_entry_values = \
 | |
|         init_field('sum_all_params(#bytes in parent scope covered ' \
 | |
|                     'by DW_OP_entry_value)')
 | |
|       if variables_scope_bytes_covered != TAINT_VALUE and \
 | |
|          variables_scope_bytes_entry_values != TAINT_VALUE:
 | |
|         variables_scope_bytes_covered = variables_scope_bytes_covered \
 | |
|           - variables_scope_bytes_entry_values
 | |
|       for cov_bucket in coverage_buckets():
 | |
|         cov_category = \
 | |
|           "#params - entry values with {} of parent scope covered" \
 | |
|           " by DW_AT_location".format(cov_bucket)
 | |
|         variables_coverage_map[cov_bucket] = init_field(cov_category)
 | |
|   else:
 | |
|     # Read the JSON for both local variables and formal parameters.
 | |
|     variables_total = \
 | |
|       init_field('#source variables')
 | |
|     variables_with_loc = init_field('#source variables with location')
 | |
|     variables_total_locstats = \
 | |
|       init_field('#variables processed by location statistics')
 | |
|     variables_scope_bytes_covered = \
 | |
|       init_field('sum_all_variables(#bytes in parent scope covered ' \
 | |
|                   'by DW_AT_location)')
 | |
|     variables_scope_bytes = \
 | |
|       init_field('sum_all_variables(#bytes in parent scope)')
 | |
| 
 | |
|     if not opts.ignore_debug_entry_values:
 | |
|       for cov_bucket in coverage_buckets():
 | |
|         cov_category = "#variables with {} of parent scope covered " \
 | |
|                        "by DW_AT_location".format(cov_bucket)
 | |
|         variables_coverage_map[cov_bucket] = init_field(cov_category)
 | |
|     else:
 | |
|       variables_scope_bytes_entry_values = \
 | |
|         init_field('sum_all_variables(#bytes in parent scope covered ' \
 | |
|                     'by DW_OP_entry_value)')
 | |
|       if variables_scope_bytes_covered != TAINT_VALUE and \
 | |
|          variables_scope_bytes_entry_values != TAINT_VALUE:
 | |
|         variables_scope_bytes_covered = variables_scope_bytes_covered \
 | |
|           - variables_scope_bytes_entry_values
 | |
|       for cov_bucket in coverage_buckets():
 | |
|         cov_category = \
 | |
|           "#variables - entry values with {} of parent scope covered " \
 | |
|           "by DW_AT_location".format(cov_bucket)
 | |
|         variables_coverage_map[cov_bucket] = init_field(cov_category)
 | |
| 
 | |
|   return LocationStats(binary, variables_total, variables_total_locstats,
 | |
|                        variables_with_loc, variables_scope_bytes_covered,
 | |
|                        variables_scope_bytes, variables_coverage_map)
 | |
| 
 | |
| # Parse the program arguments.
 | |
| def parse_program_args(parser):
 | |
|   parser.add_argument('--only-variables', action='store_true', default=False,
 | |
|             help='calculate the location statistics only for local variables')
 | |
|   parser.add_argument('--only-formal-parameters', action='store_true',
 | |
|             default=False,
 | |
|             help='calculate the location statistics only for formal parameters')
 | |
|   parser.add_argument('--ignore-debug-entry-values', action='store_true',
 | |
|             default=False,
 | |
|             help='ignore the location statistics on locations with '
 | |
|                  'entry values')
 | |
|   parser.add_argument('--draw-plot', action='store_true', default=False,
 | |
|             help='show histogram of location buckets generated (requires '
 | |
|                  'matplotlib)')
 | |
|   parser.add_argument('--compare', action='store_true', default=False,
 | |
|             help='compare the debug location coverage on two files provided, '
 | |
|                  'and draw a plot showing the difference  (requires '
 | |
|                  'matplotlib)')
 | |
|   parser.add_argument('file_names', nargs='+', type=str, help='file to process')
 | |
| 
 | |
|   return parser.parse_args()
 | |
| 
 | |
| # Verify that the program inputs meet the requirements.
 | |
| def verify_program_inputs(opts):
 | |
|   if len(sys.argv) < 2:
 | |
|     print ('error: Too few arguments.')
 | |
|     return False
 | |
| 
 | |
|   if opts.only_variables and opts.only_formal_parameters:
 | |
|     print ('error: Please use just one --only* option.')
 | |
|     return False
 | |
| 
 | |
|   if not opts.compare and len(opts.file_names) != 1:
 | |
|     print ('error: Please specify only one file to process.')
 | |
|     return False
 | |
| 
 | |
|   if opts.compare and len(opts.file_names) != 2:
 | |
|     print ('error: Please specify two files to process.')
 | |
|     return False
 | |
| 
 | |
|   if opts.draw_plot or opts.compare:
 | |
|     try:
 | |
|       import matplotlib
 | |
|     except ImportError:
 | |
|       print('error: matplotlib not found.')
 | |
|       return False
 | |
| 
 | |
|   return True
 | |
| 
 | |
| def Main():
 | |
|   parser = argparse.ArgumentParser()
 | |
|   opts = parse_program_args(parser)
 | |
| 
 | |
|   if not verify_program_inputs(opts):
 | |
|     parser.print_help()
 | |
|     sys.exit(1)
 | |
| 
 | |
|   binary_file = opts.file_names[0]
 | |
|   locstats = parse_locstats(opts, binary_file)
 | |
| 
 | |
|   if not opts.compare:
 | |
|     if opts.draw_plot:
 | |
|       # Draw a histogram representing the location buckets.
 | |
|       locstats.draw_plot()
 | |
|     else:
 | |
|       # Pretty print collected info on the standard output.
 | |
|       if locstats.pretty_print() == -1:
 | |
|         sys.exit(0)
 | |
|   else:
 | |
|     binary_file_to_compare = opts.file_names[1]
 | |
|     locstats_to_compare = parse_locstats(opts, binary_file_to_compare)
 | |
|     # Draw a plot showing the difference in debug location coverage between
 | |
|     # two files.
 | |
|     locstats.draw_location_diff(locstats_to_compare)
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|   Main()
 | |
|   sys.exit(0)
 |