408 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
			
		
		
	
	
			408 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
| #!/usr/bin/env python2.7
 | |
| 
 | |
| """A script to generate FileCheck statements for regression tests.
 | |
| 
 | |
| This script is a utility to update LLVM opt or llc test cases with new
 | |
| FileCheck patterns. It can either update all of the tests in the file or
 | |
| a single test function.
 | |
| 
 | |
| Example usage:
 | |
| $ update_test_checks.py --tool=../bin/opt test/foo.ll
 | |
| 
 | |
| Workflow:
 | |
| 1. Make a compiler patch that requires updating some number of FileCheck lines
 | |
|    in regression test files.
 | |
| 2. Save the patch and revert it from your local work area.
 | |
| 3. Update the RUN-lines in the affected regression tests to look canonical.
 | |
|    Example: "; RUN: opt < %s -instcombine -S | FileCheck %s"
 | |
| 4. Refresh the FileCheck lines for either the entire file or select functions by
 | |
|    running this script.
 | |
| 5. Commit the fresh baseline of checks.
 | |
| 6. Apply your patch from step 1 and rebuild your local binaries.
 | |
| 7. Re-run this script on affected regression tests.
 | |
| 8. Check the diffs to ensure the script has done something reasonable.
 | |
| 9. Submit a patch including the regression test diffs for review.
 | |
| 
 | |
| A common pattern is to have the script insert complete checking of every
 | |
| instruction. Then, edit it down to only check the relevant instructions.
 | |
| The script is designed to make adding checks to a test case fast, it is *not*
 | |
| designed to be authoratitive about what constitutes a good test!
 | |
| """
 | |
| 
 | |
| import argparse
 | |
| import itertools
 | |
| import os         # Used to advertise this file's name ("autogenerated_note").
 | |
| import string
 | |
| import subprocess
 | |
| import sys
 | |
| import tempfile
 | |
| import re
 | |
| 
 | |
| ADVERT = '; NOTE: Assertions have been autogenerated by '
 | |
| 
 | |
| # RegEx: this is where the magic happens.
 | |
| 
 | |
| SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
 | |
| SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
 | |
| SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
 | |
| SCRUB_X86_SHUFFLES_RE = (
 | |
|     re.compile(
 | |
|         r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem)( \{%k\d+\}( \{z\})?)? = .*)$',
 | |
|         flags=re.M))
 | |
| SCRUB_X86_SP_RE = re.compile(r'\d+\(%(esp|rsp)\)')
 | |
| SCRUB_X86_RIP_RE = re.compile(r'[.\w]+\(%rip\)')
 | |
| SCRUB_X86_LCP_RE = re.compile(r'\.LCPI[0-9]+_[0-9]+')
 | |
| SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
 | |
| SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
 | |
| 
 | |
| RUN_LINE_RE = re.compile('^\s*;\s*RUN:\s*(.*)$')
 | |
| IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@([\w-]+)\s*\(')
 | |
| LLC_FUNCTION_RE = re.compile(
 | |
|     r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?'
 | |
|     r'(?P<body>^##?[ \t]+[^:]+:.*?)\s*'
 | |
|     r'^\s*(?:[^:\n]+?:\s*\n\s*\.size|\.cfi_endproc|\.globl|\.comm|\.(?:sub)?section)',
 | |
|     flags=(re.M | re.S))
 | |
| OPT_FUNCTION_RE = re.compile(
 | |
|     r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w-]+?)\s*\('
 | |
|     r'(\s+)?[^)]*[^{]*\{\n(?P<body>.*?)^\}$',
 | |
|     flags=(re.M | re.S))
 | |
| CHECK_PREFIX_RE = re.compile('--?check-prefix(?:es)?=(\S+)')
 | |
| CHECK_RE = re.compile(r'^\s*;\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
 | |
| # Match things that look at identifiers, but only if they are followed by
 | |
| # spaces, commas, paren, or end of the string
 | |
| IR_VALUE_RE = re.compile(r'(\s+)%([\w\.]+?)([,\s\(\)]|\Z)')
 | |
| 
 | |
| 
 | |
| # Invoke the tool that is being tested.
 | |
| def invoke_tool(args, cmd_args, ir):
 | |
|   with open(ir) as ir_file:
 | |
|     stdout = subprocess.check_output(args.tool_binary + ' ' + cmd_args,
 | |
|                                      shell=True, stdin=ir_file)
 | |
|   # Fix line endings to unix CR style.
 | |
|   stdout = stdout.replace('\r\n', '\n')
 | |
|   return stdout
 | |
| 
 | |
| 
 | |
| # FIXME: Separate the x86-specific scrubbers, so this can be used for other targets.
 | |
| def scrub_asm(asm):
 | |
|   # Detect shuffle asm comments and hide the operands in favor of the comments.
 | |
|   asm = SCRUB_X86_SHUFFLES_RE.sub(r'\1 {{.*#+}} \2', asm)
 | |
|   # Generically match the stack offset of a memory operand.
 | |
|   asm = SCRUB_X86_SP_RE.sub(r'{{[0-9]+}}(%\1)', asm)
 | |
|   # Generically match a RIP-relative memory operand.
 | |
|   asm = SCRUB_X86_RIP_RE.sub(r'{{.*}}(%rip)', asm)
 | |
|   # Generically match a LCP symbol.
 | |
|   asm = SCRUB_X86_LCP_RE.sub(r'{{\.LCPI.*}}', asm)
 | |
|   # Strip kill operands inserted into the asm.
 | |
|   asm = SCRUB_KILL_COMMENT_RE.sub('', asm)
 | |
|   return asm
 | |
| 
 | |
| 
 | |
| def scrub_body(body, tool_basename):
 | |
|   # Scrub runs of whitespace out of the assembly, but leave the leading
 | |
|   # whitespace in place.
 | |
|   body = SCRUB_WHITESPACE_RE.sub(r' ', body)
 | |
|   # Expand the tabs used for indentation.
 | |
|   body = string.expandtabs(body, 2)
 | |
|   # Strip trailing whitespace.
 | |
|   body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body)
 | |
|   if tool_basename == "llc":
 | |
|     body = scrub_asm(body)
 | |
|   return body
 | |
| 
 | |
| 
 | |
| # Build up a dictionary of all the function bodies.
 | |
| def build_function_body_dictionary(raw_tool_output, prefixes, func_dict, verbose, tool_basename):
 | |
|   if tool_basename == "llc":
 | |
|     func_regex = LLC_FUNCTION_RE
 | |
|   else:
 | |
|     func_regex = OPT_FUNCTION_RE
 | |
|   for m in func_regex.finditer(raw_tool_output):
 | |
|     if not m:
 | |
|       continue
 | |
|     func = m.group('func')
 | |
|     scrubbed_body = scrub_body(m.group('body'), tool_basename)
 | |
|     if func.startswith('stress'):
 | |
|       # We only use the last line of the function body for stress tests.
 | |
|       scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
 | |
|     if verbose:
 | |
|       print >>sys.stderr, 'Processing function: ' + func
 | |
|       for l in scrubbed_body.splitlines():
 | |
|         print >>sys.stderr, '  ' + l
 | |
|     for prefix in prefixes:
 | |
|       if func in func_dict[prefix] and func_dict[prefix][func] != scrubbed_body:
 | |
|         if prefix == prefixes[-1]:
 | |
|           print >>sys.stderr, ('WARNING: Found conflicting asm under the '
 | |
|                                'same prefix: %r!' % (prefix,))
 | |
|         else:
 | |
|           func_dict[prefix][func] = None
 | |
|           continue
 | |
| 
 | |
|       func_dict[prefix][func] = scrubbed_body
 | |
| 
 | |
| 
 | |
| # Create a FileCheck variable name based on an IR name.
 | |
| def get_value_name(var):
 | |
|   if var.isdigit():
 | |
|     var = 'TMP' + var
 | |
|   var = var.replace('.', '_')
 | |
|   return var.upper()
 | |
| 
 | |
| 
 | |
| # Create a FileCheck variable from regex.
 | |
| def get_value_definition(var):
 | |
|   return '[[' + get_value_name(var) + ':%.*]]'
 | |
| 
 | |
| 
 | |
| # Use a FileCheck variable.
 | |
| def get_value_use(var):
 | |
|   return '[[' + get_value_name(var) + ']]'
 | |
| 
 | |
| # Replace IR value defs and uses with FileCheck variables.
 | |
| def genericize_check_lines(lines):
 | |
|   # This gets called for each match that occurs in
 | |
|   # a line. We transform variables we haven't seen
 | |
|   # into defs, and variables we have seen into uses.
 | |
|   def transform_line_vars(match):
 | |
|     var = match.group(2)
 | |
|     if var in vars_seen:
 | |
|       rv = get_value_use(var)
 | |
|     else:
 | |
|       vars_seen.add(var)
 | |
|       rv = get_value_definition(var)
 | |
|     # re.sub replaces the entire regex match
 | |
|     # with whatever you return, so we have
 | |
|     # to make sure to hand it back everything
 | |
|     # including the commas and spaces.
 | |
|     return match.group(1) + rv + match.group(3)
 | |
| 
 | |
|   vars_seen = set()
 | |
|   lines_with_def = []
 | |
| 
 | |
|   for i, line in enumerate(lines):
 | |
|     # An IR variable named '%.' matches the FileCheck regex string.
 | |
|     line = line.replace('%.', '%dot')
 | |
|     # Ignore any comments, since the check lines will too.
 | |
|     scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line)
 | |
|     lines[i] =  IR_VALUE_RE.sub(transform_line_vars, scrubbed_line)
 | |
|   return lines
 | |
| 
 | |
| 
 | |
| def add_checks(output_lines, prefix_list, func_dict, func_name, tool_basename):
 | |
|   # Select a label format based on the whether we're checking asm or IR.
 | |
|   if tool_basename == "llc":
 | |
|     check_label_format = "; %s-LABEL: %s:"
 | |
|   else:
 | |
|     check_label_format = "; %s-LABEL: @%s("
 | |
| 
 | |
|   printed_prefixes = []
 | |
|   for checkprefixes, _ in prefix_list:
 | |
|     for checkprefix in checkprefixes:
 | |
|       if checkprefix in printed_prefixes:
 | |
|         break
 | |
|       if not func_dict[checkprefix][func_name]:
 | |
|         continue
 | |
|       # Add some space between different check prefixes, but not after the last
 | |
|       # check line (before the test code).
 | |
|       #if len(printed_prefixes) != 0:
 | |
|       #  output_lines.append(';')
 | |
|       printed_prefixes.append(checkprefix)
 | |
|       output_lines.append(check_label_format % (checkprefix, func_name))
 | |
|       func_body = func_dict[checkprefix][func_name].splitlines()
 | |
| 
 | |
|       # For IR output, change all defs to FileCheck variables, so we're immune
 | |
|       # to variable naming fashions.
 | |
|       if tool_basename == "opt":
 | |
|         func_body = genericize_check_lines(func_body)
 | |
| 
 | |
|       # This could be selectively enabled with an optional invocation argument.
 | |
|       # Disabled for now: better to check everything. Be safe rather than sorry.
 | |
| 
 | |
|       # Handle the first line of the function body as a special case because
 | |
|       # it's often just noise (a useless asm comment or entry label).
 | |
|       #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
 | |
|       #  is_blank_line = True
 | |
|       #else:
 | |
|       #  output_lines.append('; %s:       %s' % (checkprefix, func_body[0]))
 | |
|       #  is_blank_line = False
 | |
| 
 | |
|       # For llc tests, there may be asm directives between the label and the
 | |
|       # first checked line (most likely that first checked line is "# BB#0").
 | |
|       if tool_basename == "opt":
 | |
|         is_blank_line = False
 | |
|       else:
 | |
|         is_blank_line = True;
 | |
| 
 | |
|       for func_line in func_body:
 | |
|         if func_line.strip() == '':
 | |
|           is_blank_line = True
 | |
|           continue
 | |
|         # Do not waste time checking IR comments.
 | |
|         if tool_basename == "opt":
 | |
|           func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
 | |
| 
 | |
|         # Skip blank lines instead of checking them.
 | |
|         if is_blank_line == True:
 | |
|           output_lines.append('; %s:       %s' % (checkprefix, func_line))
 | |
|         else:
 | |
|           output_lines.append('; %s-NEXT:  %s' % (checkprefix, func_line))
 | |
|         is_blank_line = False
 | |
| 
 | |
|       # Add space between different check prefixes and also before the first
 | |
|       # line of code in the test function.
 | |
|       output_lines.append(';')
 | |
|       break
 | |
|   return output_lines
 | |
| 
 | |
| 
 | |
| def should_add_line_to_output(input_line, prefix_set):
 | |
|   # Skip any blank comment lines in the IR.
 | |
|   if input_line.strip() == ';':
 | |
|     return False
 | |
|   # Skip any blank lines in the IR.
 | |
|   #if input_line.strip() == '':
 | |
|   #  return False
 | |
|   # And skip any CHECK lines. We're building our own.
 | |
|   m = CHECK_RE.match(input_line)
 | |
|   if m and m.group(1) in prefix_set:
 | |
|     return False
 | |
| 
 | |
|   return True
 | |
| 
 | |
| 
 | |
| def main():
 | |
|   from argparse import RawTextHelpFormatter
 | |
|   parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter)
 | |
|   parser.add_argument('-v', '--verbose', action='store_true',
 | |
|                       help='Show verbose output')
 | |
|   parser.add_argument('--tool-binary', default='llc',
 | |
|                       help='The tool used to generate the test case')
 | |
|   parser.add_argument(
 | |
|       '--function', help='The function in the test file to update')
 | |
|   parser.add_argument('tests', nargs='+')
 | |
|   args = parser.parse_args()
 | |
| 
 | |
|   autogenerated_note = (ADVERT + 'utils/' + os.path.basename(__file__))
 | |
| 
 | |
|   tool_basename = os.path.basename(args.tool_binary)
 | |
|   if (tool_basename != "llc" and tool_basename != "opt"):
 | |
|     print >>sys.stderr, 'ERROR: Unexpected tool name: ' + tool_basename
 | |
|     sys.exit(1)
 | |
| 
 | |
|   for test in args.tests:
 | |
|     if args.verbose:
 | |
|       print >>sys.stderr, 'Scanning for RUN lines in test file: %s' % (test,)
 | |
|     with open(test) as f:
 | |
|       input_lines = [l.rstrip() for l in f]
 | |
| 
 | |
|     raw_lines = [m.group(1)
 | |
|                  for m in [RUN_LINE_RE.match(l) for l in input_lines] if m]
 | |
|     run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
 | |
|     for l in raw_lines[1:]:
 | |
|       if run_lines[-1].endswith("\\"):
 | |
|         run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
 | |
|       else:
 | |
|         run_lines.append(l)
 | |
| 
 | |
|     if args.verbose:
 | |
|       print >>sys.stderr, 'Found %d RUN lines:' % (len(run_lines),)
 | |
|       for l in run_lines:
 | |
|         print >>sys.stderr, '  RUN: ' + l
 | |
| 
 | |
|     prefix_list = []
 | |
|     for l in run_lines:
 | |
|       (tool_cmd, filecheck_cmd) = tuple([cmd.strip() for cmd in l.split('|', 1)])
 | |
| 
 | |
|       if not tool_cmd.startswith(tool_basename + ' '):
 | |
|         print >>sys.stderr, 'WARNING: Skipping non-%s RUN line: %s' % (tool_basename, l)
 | |
|         continue
 | |
| 
 | |
|       if not filecheck_cmd.startswith('FileCheck '):
 | |
|         print >>sys.stderr, 'WARNING: Skipping non-FileChecked RUN line: ' + l
 | |
|         continue
 | |
| 
 | |
|       tool_cmd_args = tool_cmd[len(tool_basename):].strip()
 | |
|       tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
 | |
| 
 | |
|       check_prefixes = [item for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
 | |
|                                for item in m.group(1).split(',')]
 | |
|       if not check_prefixes:
 | |
|         check_prefixes = ['CHECK']
 | |
| 
 | |
|       # FIXME: We should use multiple check prefixes to common check lines. For
 | |
|       # now, we just ignore all but the last.
 | |
|       prefix_list.append((check_prefixes, tool_cmd_args))
 | |
| 
 | |
|     func_dict = {}
 | |
|     for prefixes, _ in prefix_list:
 | |
|       for prefix in prefixes:
 | |
|         func_dict.update({prefix: dict()})
 | |
|     for prefixes, tool_args in prefix_list:
 | |
|       if args.verbose:
 | |
|         print >>sys.stderr, 'Extracted tool cmd: ' + tool_basename + ' ' + tool_args
 | |
|         print >>sys.stderr, 'Extracted FileCheck prefixes: ' + str(prefixes)
 | |
| 
 | |
|       raw_tool_output = invoke_tool(args, tool_args, test)
 | |
|       build_function_body_dictionary(raw_tool_output, prefixes, func_dict, args.verbose, tool_basename)
 | |
| 
 | |
|     is_in_function = False
 | |
|     is_in_function_start = False
 | |
|     prefix_set = set([prefix for prefixes, _ in prefix_list for prefix in prefixes])
 | |
|     if args.verbose:
 | |
|       print >>sys.stderr, 'Rewriting FileCheck prefixes: %s' % (prefix_set,)
 | |
|     output_lines = []
 | |
|     output_lines.append(autogenerated_note)
 | |
| 
 | |
|     for input_line in input_lines:
 | |
|       if is_in_function_start:
 | |
|         if input_line == '':
 | |
|           continue
 | |
|         if input_line.lstrip().startswith(';'):
 | |
|           m = CHECK_RE.match(input_line)
 | |
|           if not m or m.group(1) not in prefix_set:
 | |
|             output_lines.append(input_line)
 | |
|             continue
 | |
| 
 | |
|         # Print out the various check lines here.
 | |
|         output_lines = add_checks(output_lines, prefix_list, func_dict, name, tool_basename)
 | |
|         is_in_function_start = False
 | |
| 
 | |
|       if is_in_function:
 | |
|         if should_add_line_to_output(input_line, prefix_set) == True:
 | |
|           # This input line of the function body will go as-is into the output.
 | |
|           # Except make leading whitespace uniform: 2 spaces.
 | |
|           input_line = SCRUB_LEADING_WHITESPACE_RE.sub(r'  ', input_line)
 | |
|           output_lines.append(input_line)
 | |
|         else:
 | |
|           continue
 | |
|         if input_line.strip() == '}':
 | |
|           is_in_function = False
 | |
|         continue
 | |
| 
 | |
|       # Discard any previous script advertising.
 | |
|       if input_line.startswith(ADVERT):
 | |
|         continue
 | |
| 
 | |
|       # If it's outside a function, it just gets copied to the output.
 | |
|       output_lines.append(input_line)
 | |
| 
 | |
|       m = IR_FUNCTION_RE.match(input_line)
 | |
|       if not m:
 | |
|         continue
 | |
|       name = m.group(1)
 | |
|       if args.function is not None and name != args.function:
 | |
|         # When filtering on a specific function, skip all others.
 | |
|         continue
 | |
|       is_in_function = is_in_function_start = True
 | |
| 
 | |
|     if args.verbose:
 | |
|       print>>sys.stderr, 'Writing %d lines to %s...' % (len(output_lines), test)
 | |
| 
 | |
|     with open(test, 'wb') as f:
 | |
|       f.writelines([l + '\n' for l in output_lines])
 | |
| 
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|   main()
 | |
| 
 |