forked from OSchip/llvm-project
				
			
		
			
				
	
	
		
			681 lines
		
	
	
		
			25 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			681 lines
		
	
	
		
			25 KiB
		
	
	
	
		
			Python
		
	
	
	
| #!/usr/bin/env python
 | |
| 
 | |
| from __future__ import print_function
 | |
| 
 | |
| import argparse
 | |
| import email.mime.multipart
 | |
| import email.mime.text
 | |
| import logging
 | |
| import os.path
 | |
| import pickle
 | |
| import re
 | |
| import smtplib
 | |
| import subprocess
 | |
| import sys
 | |
| from datetime import datetime, timedelta
 | |
| from phabricator import Phabricator
 | |
| 
 | |
| # Setting up a virtualenv to run this script can be done by running the
 | |
| # following commands:
 | |
| # $ virtualenv venv
 | |
| # $ . ./venv/bin/activate
 | |
| # $ pip install Phabricator
 | |
| 
 | |
| GIT_REPO_METADATA = (("llvm-monorepo", "https://github.com/llvm/llvm-project"),
 | |
|                      )
 | |
| 
 | |
| # The below PhabXXX classes represent objects as modelled by Phabricator.
 | |
| # The classes can be serialized to disk, to try and make sure that we don't
 | |
| # needlessly have to re-fetch lots of data from Phabricator, as that would
 | |
| # make this script unusably slow.
 | |
| 
 | |
| 
 | |
| class PhabObject:
 | |
|     OBJECT_KIND = None
 | |
| 
 | |
|     def __init__(self, id):
 | |
|         self.id = id
 | |
| 
 | |
| 
 | |
| class PhabObjectCache:
 | |
|     def __init__(self, PhabObjectClass):
 | |
|         self.PhabObjectClass = PhabObjectClass
 | |
|         self.most_recent_info = None
 | |
|         self.oldest_info = None
 | |
|         self.id2PhabObjects = {}
 | |
| 
 | |
|     def get_name(self):
 | |
|         return self.PhabObjectClass.OBJECT_KIND + "sCache"
 | |
| 
 | |
|     def get(self, id):
 | |
|         if id not in self.id2PhabObjects:
 | |
|             self.id2PhabObjects[id] = self.PhabObjectClass(id)
 | |
|         return self.id2PhabObjects[id]
 | |
| 
 | |
|     def get_ids_in_cache(self):
 | |
|         return list(self.id2PhabObjects.keys())
 | |
| 
 | |
|     def get_objects(self):
 | |
|         return list(self.id2PhabObjects.values())
 | |
| 
 | |
|     DEFAULT_DIRECTORY = "PhabObjectCache"
 | |
| 
 | |
|     def _get_pickle_name(self, directory):
 | |
|         file_name = "Phab" + self.PhabObjectClass.OBJECT_KIND + "s.pickle"
 | |
|         return os.path.join(directory, file_name)
 | |
| 
 | |
|     def populate_cache_from_disk(self, directory=DEFAULT_DIRECTORY):
 | |
|         """
 | |
|         FIXME: consider if serializing to JSON would bring interoperability
 | |
|         advantages over serializing to pickle.
 | |
|         """
 | |
|         try:
 | |
|             f = open(self._get_pickle_name(directory), "rb")
 | |
|         except IOError as err:
 | |
|             print("Could not find cache. Error message: {0}. Continuing..."
 | |
|                   .format(err))
 | |
|         else:
 | |
|             with f:
 | |
|                 try:
 | |
|                     d = pickle.load(f)
 | |
|                     self.__dict__.update(d)
 | |
|                 except EOFError as err:
 | |
|                     print("Cache seems to be corrupt. " +
 | |
|                           "Not using cache. Error message: {0}".format(err))
 | |
| 
 | |
|     def write_cache_to_disk(self, directory=DEFAULT_DIRECTORY):
 | |
|         if not os.path.exists(directory):
 | |
|             os.makedirs(directory)
 | |
|         with open(self._get_pickle_name(directory), "wb") as f:
 | |
|             pickle.dump(self.__dict__, f)
 | |
|         print("wrote cache to disk, most_recent_info= {0}".format(
 | |
|             datetime.fromtimestamp(self.most_recent_info)
 | |
|             if self.most_recent_info is not None else None))
 | |
| 
 | |
| 
 | |
| class PhabReview(PhabObject):
 | |
|     OBJECT_KIND = "Review"
 | |
| 
 | |
|     def __init__(self, id):
 | |
|         PhabObject.__init__(self, id)
 | |
| 
 | |
|     def update(self, title, dateCreated, dateModified, author):
 | |
|         self.title = title
 | |
|         self.dateCreated = dateCreated
 | |
|         self.dateModified = dateModified
 | |
|         self.author = author
 | |
| 
 | |
|     def setPhabDiffs(self, phabDiffs):
 | |
|         self.phabDiffs = phabDiffs
 | |
| 
 | |
| 
 | |
| class PhabUser(PhabObject):
 | |
|     OBJECT_KIND = "User"
 | |
| 
 | |
|     def __init__(self, id):
 | |
|         PhabObject.__init__(self, id)
 | |
| 
 | |
|     def update(self, phid, realName):
 | |
|         self.phid = phid
 | |
|         self.realName = realName
 | |
| 
 | |
| 
 | |
| class PhabHunk:
 | |
|     def __init__(self, rest_api_hunk):
 | |
|         self.oldOffset = int(rest_api_hunk["oldOffset"])
 | |
|         self.oldLength = int(rest_api_hunk["oldLength"])
 | |
|         # self.actual_lines_changed_offset will contain the offsets of the
 | |
|         # lines that were changed in this hunk.
 | |
|         self.actual_lines_changed_offset = []
 | |
|         offset = self.oldOffset
 | |
|         inHunk = False
 | |
|         hunkStart = -1
 | |
|         contextLines = 3
 | |
|         for line in rest_api_hunk["corpus"].split("\n"):
 | |
|             if line.startswith("+"):
 | |
|                 # line is a new line that got introduced in this patch.
 | |
|                 # Do not record it as a changed line.
 | |
|                 if inHunk is False:
 | |
|                     inHunk = True
 | |
|                     hunkStart = max(self.oldOffset, offset - contextLines)
 | |
|                 continue
 | |
|             if line.startswith("-"):
 | |
|                 # line was changed or removed from the older version of the
 | |
|                 # code. Record it as a changed line.
 | |
|                 if inHunk is False:
 | |
|                     inHunk = True
 | |
|                     hunkStart = max(self.oldOffset, offset - contextLines)
 | |
|                 offset += 1
 | |
|                 continue
 | |
|             # line is a context line.
 | |
|             if inHunk is True:
 | |
|                 inHunk = False
 | |
|                 hunkEnd = offset + contextLines
 | |
|                 self.actual_lines_changed_offset.append((hunkStart, hunkEnd))
 | |
|             offset += 1
 | |
|         if inHunk is True:
 | |
|             hunkEnd = offset + contextLines
 | |
|             self.actual_lines_changed_offset.append((hunkStart, hunkEnd))
 | |
| 
 | |
|         # The above algorithm could result in adjacent or overlapping ranges
 | |
|         # being recorded into self.actual_lines_changed_offset.
 | |
|         # Merge the adjacent and overlapping ranges in there:
 | |
|         t = []
 | |
|         lastRange = None
 | |
|         for start, end in self.actual_lines_changed_offset + \
 | |
|                 [(sys.maxsize, sys.maxsize)]:
 | |
|             if lastRange is None:
 | |
|                 lastRange = (start, end)
 | |
|             else:
 | |
|                 if lastRange[1] >= start:
 | |
|                     lastRange = (lastRange[0], end)
 | |
|                 else:
 | |
|                     t.append(lastRange)
 | |
|                     lastRange = (start, end)
 | |
|         self.actual_lines_changed_offset = t
 | |
| 
 | |
| 
 | |
| class PhabChange:
 | |
|     def __init__(self, rest_api_change):
 | |
|         self.oldPath = rest_api_change["oldPath"]
 | |
|         self.hunks = [PhabHunk(h) for h in rest_api_change["hunks"]]
 | |
| 
 | |
| 
 | |
| class PhabDiff(PhabObject):
 | |
|     OBJECT_KIND = "Diff"
 | |
| 
 | |
|     def __init__(self, id):
 | |
|         PhabObject.__init__(self, id)
 | |
| 
 | |
|     def update(self, rest_api_results):
 | |
|         self.revisionID = rest_api_results["revisionID"]
 | |
|         self.dateModified = int(rest_api_results["dateModified"])
 | |
|         self.dateCreated = int(rest_api_results["dateCreated"])
 | |
|         self.changes = [PhabChange(c) for c in rest_api_results["changes"]]
 | |
| 
 | |
| 
 | |
| class ReviewsCache(PhabObjectCache):
 | |
|     def __init__(self):
 | |
|         PhabObjectCache.__init__(self, PhabReview)
 | |
| 
 | |
| 
 | |
| class UsersCache(PhabObjectCache):
 | |
|     def __init__(self):
 | |
|         PhabObjectCache.__init__(self, PhabUser)
 | |
| 
 | |
| 
 | |
| reviews_cache = ReviewsCache()
 | |
| users_cache = UsersCache()
 | |
| 
 | |
| 
 | |
| def init_phab_connection():
 | |
|     phab = Phabricator()
 | |
|     phab.update_interfaces()
 | |
|     return phab
 | |
| 
 | |
| 
 | |
| def update_cached_info(phab, cache, phab_query, order, record_results,
 | |
|                        max_nr_entries_per_fetch, max_nr_days_to_cache):
 | |
|     q = phab
 | |
|     LIMIT = max_nr_entries_per_fetch
 | |
|     for query_step in phab_query:
 | |
|         q = getattr(q, query_step)
 | |
|     results = q(order=order, limit=LIMIT)
 | |
|     most_recent_info, oldest_info = record_results(cache, results, phab)
 | |
|     oldest_info_to_fetch = datetime.fromtimestamp(most_recent_info) - \
 | |
|         timedelta(days=max_nr_days_to_cache)
 | |
|     most_recent_info_overall = most_recent_info
 | |
|     cache.write_cache_to_disk()
 | |
|     after = results["cursor"]["after"]
 | |
|     print("after: {0!r}".format(after))
 | |
|     print("most_recent_info: {0}".format(
 | |
|         datetime.fromtimestamp(most_recent_info)))
 | |
|     while (after is not None
 | |
|            and datetime.fromtimestamp(oldest_info) > oldest_info_to_fetch):
 | |
|         need_more_older_data = \
 | |
|             (cache.oldest_info is None or
 | |
|              datetime.fromtimestamp(cache.oldest_info) > oldest_info_to_fetch)
 | |
|         print(("need_more_older_data={0} cache.oldest_info={1} " +
 | |
|                "oldest_info_to_fetch={2}").format(
 | |
|                    need_more_older_data,
 | |
|                    datetime.fromtimestamp(cache.oldest_info)
 | |
|                    if cache.oldest_info is not None else None,
 | |
|                    oldest_info_to_fetch))
 | |
|         need_more_newer_data = \
 | |
|             (cache.most_recent_info is None or
 | |
|              cache.most_recent_info < most_recent_info)
 | |
|         print(("need_more_newer_data={0} cache.most_recent_info={1} " +
 | |
|                "most_recent_info={2}")
 | |
|               .format(need_more_newer_data, cache.most_recent_info,
 | |
|                       most_recent_info))
 | |
|         if not need_more_older_data and not need_more_newer_data:
 | |
|             break
 | |
|         results = q(order=order, after=after, limit=LIMIT)
 | |
|         most_recent_info, oldest_info = record_results(cache, results, phab)
 | |
|         after = results["cursor"]["after"]
 | |
|         print("after: {0!r}".format(after))
 | |
|         print("most_recent_info: {0}".format(
 | |
|             datetime.fromtimestamp(most_recent_info)))
 | |
|         cache.write_cache_to_disk()
 | |
|     cache.most_recent_info = most_recent_info_overall
 | |
|     if after is None:
 | |
|         # We did fetch all records. Mark the cache to contain all info since
 | |
|         # the start of time.
 | |
|         oldest_info = 0
 | |
|     cache.oldest_info = oldest_info
 | |
|     cache.write_cache_to_disk()
 | |
| 
 | |
| 
 | |
| def record_reviews(cache, reviews, phab):
 | |
|     most_recent_info = None
 | |
|     oldest_info = None
 | |
|     for reviewInfo in reviews["data"]:
 | |
|         if reviewInfo["type"] != "DREV":
 | |
|             continue
 | |
|         id = reviewInfo["id"]
 | |
|         # phid = reviewInfo["phid"]
 | |
|         dateModified = int(reviewInfo["fields"]["dateModified"])
 | |
|         dateCreated = int(reviewInfo["fields"]["dateCreated"])
 | |
|         title = reviewInfo["fields"]["title"]
 | |
|         author = reviewInfo["fields"]["authorPHID"]
 | |
|         phabReview = cache.get(id)
 | |
|         if "dateModified" not in phabReview.__dict__ or \
 | |
|            dateModified > phabReview.dateModified:
 | |
|             diff_results = phab.differential.querydiffs(revisionIDs=[id])
 | |
|             diff_ids = sorted(diff_results.keys())
 | |
|             phabDiffs = []
 | |
|             for diff_id in diff_ids:
 | |
|                 diffInfo = diff_results[diff_id]
 | |
|                 d = PhabDiff(diff_id)
 | |
|                 d.update(diffInfo)
 | |
|                 phabDiffs.append(d)
 | |
|             phabReview.update(title, dateCreated, dateModified, author)
 | |
|             phabReview.setPhabDiffs(phabDiffs)
 | |
|             print("Updated D{0} modified on {1} ({2} diffs)".format(
 | |
|                 id, datetime.fromtimestamp(dateModified), len(phabDiffs)))
 | |
| 
 | |
|         if most_recent_info is None:
 | |
|             most_recent_info = dateModified
 | |
|         elif most_recent_info < dateModified:
 | |
|             most_recent_info = dateModified
 | |
| 
 | |
|         if oldest_info is None:
 | |
|             oldest_info = dateModified
 | |
|         elif oldest_info > dateModified:
 | |
|             oldest_info = dateModified
 | |
|     return most_recent_info, oldest_info
 | |
| 
 | |
| 
 | |
| def record_users(cache, users, phab):
 | |
|     most_recent_info = None
 | |
|     oldest_info = None
 | |
|     for info in users["data"]:
 | |
|         if info["type"] != "USER":
 | |
|             continue
 | |
|         id = info["id"]
 | |
|         phid = info["phid"]
 | |
|         dateModified = int(info["fields"]["dateModified"])
 | |
|         # dateCreated = int(info["fields"]["dateCreated"])
 | |
|         realName = info["fields"]["realName"]
 | |
|         phabUser = cache.get(id)
 | |
|         phabUser.update(phid, realName)
 | |
|         if most_recent_info is None:
 | |
|             most_recent_info = dateModified
 | |
|         elif most_recent_info < dateModified:
 | |
|             most_recent_info = dateModified
 | |
|         if oldest_info is None:
 | |
|             oldest_info = dateModified
 | |
|         elif oldest_info > dateModified:
 | |
|             oldest_info = dateModified
 | |
|     return most_recent_info, oldest_info
 | |
| 
 | |
| 
 | |
| PHABCACHESINFO = ((reviews_cache, ("differential", "revision", "search"),
 | |
|                    "updated", record_reviews, 5, 7),
 | |
|                   (users_cache, ("user", "search"), "newest", record_users,
 | |
|                    100, 1000))
 | |
| 
 | |
| 
 | |
| def load_cache():
 | |
|     for cache, phab_query, order, record_results, _, _ in PHABCACHESINFO:
 | |
|         cache.populate_cache_from_disk()
 | |
|         print("Loaded {0} nr entries: {1}".format(
 | |
|             cache.get_name(), len(cache.get_ids_in_cache())))
 | |
|         print("Loaded {0} has most recent info: {1}".format(
 | |
|             cache.get_name(),
 | |
|             datetime.fromtimestamp(cache.most_recent_info)
 | |
|             if cache.most_recent_info is not None else None))
 | |
| 
 | |
| 
 | |
| def update_cache(phab):
 | |
|     load_cache()
 | |
|     for cache, phab_query, order, record_results, max_nr_entries_per_fetch, \
 | |
|             max_nr_days_to_cache in PHABCACHESINFO:
 | |
|         update_cached_info(phab, cache, phab_query, order, record_results,
 | |
|                            max_nr_entries_per_fetch, max_nr_days_to_cache)
 | |
|         ids_in_cache = cache.get_ids_in_cache()
 | |
|         print("{0} objects in {1}".format(len(ids_in_cache), cache.get_name()))
 | |
|         cache.write_cache_to_disk()
 | |
| 
 | |
| 
 | |
| def get_most_recent_reviews(days):
 | |
|     newest_reviews = sorted(
 | |
|         reviews_cache.get_objects(), key=lambda r: -r.dateModified)
 | |
|     if len(newest_reviews) == 0:
 | |
|         return newest_reviews
 | |
|     most_recent_review_time = \
 | |
|         datetime.fromtimestamp(newest_reviews[0].dateModified)
 | |
|     cut_off_date = most_recent_review_time - timedelta(days=days)
 | |
|     result = []
 | |
|     for review in newest_reviews:
 | |
|         if datetime.fromtimestamp(review.dateModified) < cut_off_date:
 | |
|             return result
 | |
|         result.append(review)
 | |
|     return result
 | |
| 
 | |
| 
 | |
| # All of the above code is about fetching data from Phabricator and caching it
 | |
| # on local disk. The below code contains the actual "business logic" for this
 | |
| # script.
 | |
| 
 | |
| _userphid2realname = None
 | |
| 
 | |
| 
 | |
| def get_real_name_from_author(user_phid):
 | |
|     global _userphid2realname
 | |
|     if _userphid2realname is None:
 | |
|         _userphid2realname = {}
 | |
|         for user in users_cache.get_objects():
 | |
|             _userphid2realname[user.phid] = user.realName
 | |
|     return _userphid2realname.get(user_phid, "unknown")
 | |
| 
 | |
| 
 | |
| def print_most_recent_reviews(phab, days, filter_reviewers):
 | |
|     msgs = []
 | |
| 
 | |
|     def add_msg(msg):
 | |
|         msgs.append(msg)
 | |
|         print(msg.encode('utf-8'))
 | |
| 
 | |
|     newest_reviews = get_most_recent_reviews(days)
 | |
|     add_msg(u"These are the reviews that look interesting to be reviewed. " +
 | |
|             u"The report below has 2 sections. The first " +
 | |
|             u"section is organized per review; the second section is organized "
 | |
|             + u"per potential reviewer.\n")
 | |
|     oldest_review = newest_reviews[-1] if len(newest_reviews) > 0 else None
 | |
|     oldest_datetime = \
 | |
|         datetime.fromtimestamp(oldest_review.dateModified) \
 | |
|         if oldest_review else None
 | |
|     add_msg((u"The report below is based on analyzing the reviews that got " +
 | |
|              u"touched in the past {0} days (since {1}). " +
 | |
|              u"The script found {2} such reviews.\n").format(
 | |
|                  days, oldest_datetime, len(newest_reviews)))
 | |
|     reviewer2reviews_and_scores = {}
 | |
|     for i, review in enumerate(newest_reviews):
 | |
|         matched_reviewers = find_reviewers_for_review(review)
 | |
|         matched_reviewers = filter_reviewers(matched_reviewers)
 | |
|         if len(matched_reviewers) == 0:
 | |
|             continue
 | |
|         add_msg((u"{0:>3}. https://reviews.llvm.org/D{1} by {2}\n     {3}\n" +
 | |
|                  u"     Last updated on {4}").format(
 | |
|                      i, review.id,
 | |
|                      get_real_name_from_author(review.author), review.title,
 | |
|                      datetime.fromtimestamp(review.dateModified)))
 | |
|         for reviewer, scores in matched_reviewers:
 | |
|             add_msg(u"    potential reviewer {0}, score {1}".format(
 | |
|                 reviewer,
 | |
|                 "(" + "/".join(["{0:.1f}%".format(s) for s in scores]) + ")"))
 | |
|             if reviewer not in reviewer2reviews_and_scores:
 | |
|                 reviewer2reviews_and_scores[reviewer] = []
 | |
|             reviewer2reviews_and_scores[reviewer].append((review, scores))
 | |
| 
 | |
|     # Print out a summary per reviewer.
 | |
|     for reviewer in sorted(reviewer2reviews_and_scores.keys()):
 | |
|         reviews_and_scores = reviewer2reviews_and_scores[reviewer]
 | |
|         reviews_and_scores.sort(key=lambda rs: rs[1], reverse=True)
 | |
|         add_msg(u"\n\nSUMMARY FOR {0} (found {1} reviews):".format(
 | |
|             reviewer, len(reviews_and_scores)))
 | |
|         for review, scores in reviews_and_scores:
 | |
|             add_msg(u"[{0}] https://reviews.llvm.org/D{1} '{2}' by {3}".format(
 | |
|                 "/".join(["{0:.1f}%".format(s) for s in scores]), review.id,
 | |
|                 review.title, get_real_name_from_author(review.author)))
 | |
|     return "\n".join(msgs)
 | |
| 
 | |
| 
 | |
| def get_git_cmd_output(cmd):
 | |
|     output = None
 | |
|     try:
 | |
|         logging.debug(cmd)
 | |
|         output = subprocess.check_output(
 | |
|             cmd, shell=True, stderr=subprocess.STDOUT)
 | |
|     except subprocess.CalledProcessError as e:
 | |
|         logging.debug(str(e))
 | |
|     if output is None:
 | |
|         return None
 | |
|     return output.decode("utf-8", errors='ignore')
 | |
| 
 | |
| 
 | |
| reAuthorMail = re.compile("^author-mail <([^>]*)>.*$")
 | |
| 
 | |
| 
 | |
| def parse_blame_output_line_porcelain(blame_output_lines):
 | |
|     email2nr_occurences = {}
 | |
|     if blame_output_lines is None:
 | |
|         return email2nr_occurences
 | |
|     for line in blame_output_lines:
 | |
|         m = reAuthorMail.match(line)
 | |
|         if m:
 | |
|             author_email_address = m.group(1)
 | |
|             if author_email_address not in email2nr_occurences:
 | |
|                 email2nr_occurences[author_email_address] = 1
 | |
|             else:
 | |
|                 email2nr_occurences[author_email_address] += 1
 | |
|     return email2nr_occurences
 | |
| 
 | |
| 
 | |
| class BlameOutputCache:
 | |
|     def __init__(self):
 | |
|         self.cache = {}
 | |
| 
 | |
|     def _populate_cache_for(self, cache_key):
 | |
|         assert cache_key not in self.cache
 | |
|         git_repo, base_revision, path = cache_key
 | |
|         cmd = ("git -C {0} blame --encoding=utf-8 --date iso -f -e -w " +
 | |
|                "--line-porcelain {1} -- {2}").format(git_repo, base_revision,
 | |
|                                                      path)
 | |
|         blame_output = get_git_cmd_output(cmd)
 | |
|         self.cache[cache_key] = \
 | |
|             blame_output.split('\n') if blame_output is not None else None
 | |
|         # FIXME: the blame cache could probably be made more effective still if
 | |
|         # instead of storing the requested base_revision in the cache, the last
 | |
|         # revision before the base revision this file/path got changed in gets
 | |
|         # stored. That way multiple project revisions for which this specific
 | |
|         # file/patch hasn't changed would get cache hits (instead of misses in
 | |
|         # the current implementation).
 | |
| 
 | |
|     def get_blame_output_for(self, git_repo, base_revision, path, start_line=-1,
 | |
|                              end_line=-1):
 | |
|         cache_key = (git_repo, base_revision, path)
 | |
|         if cache_key not in self.cache:
 | |
|             self._populate_cache_for(cache_key)
 | |
|         assert cache_key in self.cache
 | |
|         all_blame_lines = self.cache[cache_key]
 | |
|         if all_blame_lines is None:
 | |
|             return None
 | |
|         if start_line == -1 and end_line == -1:
 | |
|             return all_blame_lines
 | |
|         assert start_line >= 0
 | |
|         assert end_line >= 0
 | |
|         assert end_line <= len(all_blame_lines)
 | |
|         assert start_line <= len(all_blame_lines)
 | |
|         assert start_line <= end_line
 | |
|         return all_blame_lines[start_line:end_line]
 | |
| 
 | |
|     def get_parsed_git_blame_for(self, git_repo, base_revision, path,
 | |
|                                  start_line=-1, end_line=-1):
 | |
|         return parse_blame_output_line_porcelain(
 | |
|             self.get_blame_output_for(git_repo, base_revision, path, start_line,
 | |
|                                       end_line))
 | |
| 
 | |
| 
 | |
| blameOutputCache = BlameOutputCache()
 | |
| 
 | |
| 
 | |
| def find_reviewers_for_diff_heuristic(diff):
 | |
|     # Heuristic 1: assume good reviewers are the ones that touched the same
 | |
|     # lines before as this patch is touching.
 | |
|     # Heuristic 2: assume good reviewers are the ones that touched the same
 | |
|     # files before as this patch is touching.
 | |
|     reviewers2nr_lines_touched = {}
 | |
|     reviewers2nr_files_touched = {}
 | |
|     # Assume last revision before diff was modified is the revision the diff
 | |
|     # applies to.
 | |
|     assert len(GIT_REPO_METADATA) == 1
 | |
|     git_repo = os.path.join("git_repos", GIT_REPO_METADATA[0][0])
 | |
|     cmd = 'git -C {0} rev-list -n 1 --before="{1}" master'.format(
 | |
|         git_repo,
 | |
|         datetime.fromtimestamp(
 | |
|             diff.dateModified).strftime("%Y-%m-%d %H:%M:%s"))
 | |
|     base_revision = get_git_cmd_output(cmd).strip()
 | |
|     logging.debug("Base revision={0}".format(base_revision))
 | |
|     for change in diff.changes:
 | |
|         path = change.oldPath
 | |
|         # Compute heuristic 1: look at context of patch lines.
 | |
|         for hunk in change.hunks:
 | |
|             for start_line, end_line in hunk.actual_lines_changed_offset:
 | |
|                 # Collect git blame results for authors in those ranges.
 | |
|                 for reviewer, nr_occurences in \
 | |
|                         blameOutputCache.get_parsed_git_blame_for(
 | |
|                             git_repo, base_revision, path, start_line, end_line
 | |
|                         ).items():
 | |
|                     if reviewer not in reviewers2nr_lines_touched:
 | |
|                         reviewers2nr_lines_touched[reviewer] = 0
 | |
|                     reviewers2nr_lines_touched[reviewer] += nr_occurences
 | |
|         # Compute heuristic 2: don't look at context, just at files touched.
 | |
|         # Collect git blame results for authors in those ranges.
 | |
|         for reviewer, nr_occurences in \
 | |
|                 blameOutputCache.get_parsed_git_blame_for(
 | |
|                     git_repo, base_revision, path).items():
 | |
|             if reviewer not in reviewers2nr_files_touched:
 | |
|                 reviewers2nr_files_touched[reviewer] = 0
 | |
|             reviewers2nr_files_touched[reviewer] += 1
 | |
| 
 | |
|     # Compute "match scores"
 | |
|     total_nr_lines = sum(reviewers2nr_lines_touched.values())
 | |
|     total_nr_files = len(diff.changes)
 | |
|     reviewers_matchscores = \
 | |
|         [(reviewer,
 | |
|           (reviewers2nr_lines_touched.get(reviewer, 0)*100.0/total_nr_lines
 | |
|            if total_nr_lines != 0 else 0,
 | |
|            reviewers2nr_files_touched[reviewer]*100.0/total_nr_files
 | |
|            if total_nr_files != 0 else 0))
 | |
|          for reviewer, nr_lines
 | |
|          in reviewers2nr_files_touched.items()]
 | |
|     reviewers_matchscores.sort(key=lambda i: i[1], reverse=True)
 | |
|     return reviewers_matchscores
 | |
| 
 | |
| 
 | |
| def find_reviewers_for_review(review):
 | |
|     # Process the newest diff first.
 | |
|     diffs = sorted(
 | |
|         review.phabDiffs, key=lambda d: d.dateModified, reverse=True)
 | |
|     if len(diffs) == 0:
 | |
|         return
 | |
|     diff = diffs[0]
 | |
|     matched_reviewers = find_reviewers_for_diff_heuristic(diff)
 | |
|     # Show progress, as this is a slow operation:
 | |
|     sys.stdout.write('.')
 | |
|     sys.stdout.flush()
 | |
|     logging.debug(u"matched_reviewers: {0}".format(matched_reviewers))
 | |
|     return matched_reviewers
 | |
| 
 | |
| 
 | |
| def update_git_repos():
 | |
|     git_repos_directory = "git_repos"
 | |
|     for name, url in GIT_REPO_METADATA:
 | |
|         dirname = os.path.join(git_repos_directory, name)
 | |
|         if not os.path.exists(dirname):
 | |
|             cmd = "git clone {0} {1}".format(url, dirname)
 | |
|             output = get_git_cmd_output(cmd)
 | |
|         cmd = "git -C {0} pull --rebase".format(dirname)
 | |
|         output = get_git_cmd_output(cmd)
 | |
| 
 | |
| 
 | |
| def send_emails(email_addresses, sender, msg):
 | |
|     s = smtplib.SMTP()
 | |
|     s.connect()
 | |
|     for email_address in email_addresses:
 | |
|         email_msg = email.mime.multipart.MIMEMultipart()
 | |
|         email_msg['From'] = sender
 | |
|         email_msg['To'] = email_address
 | |
|         email_msg['Subject'] = 'LLVM patches you may be able to review.'
 | |
|         email_msg.attach(email.mime.text.MIMEText(msg.encode('utf-8'), 'plain'))
 | |
|         # python 3.x: s.send_message(email_msg)
 | |
|         s.sendmail(email_msg['From'], email_msg['To'], email_msg.as_string())
 | |
|     s.quit()
 | |
| 
 | |
| 
 | |
| def filter_reviewers_to_report_for(people_to_look_for):
 | |
|     # The below is just an example filter, to only report potential reviews
 | |
|     # to do for the people that will receive the report email.
 | |
|     return lambda potential_reviewers: [r for r in potential_reviewers
 | |
|                                         if r[0] in people_to_look_for]
 | |
| 
 | |
| 
 | |
| def main():
 | |
|     parser = argparse.ArgumentParser(
 | |
|         description='Match open reviews to potential reviewers.')
 | |
|     parser.add_argument(
 | |
|         '--no-update-cache',
 | |
|         dest='update_cache',
 | |
|         action='store_false',
 | |
|         default=True,
 | |
|         help='Do not update cached Phabricator objects')
 | |
|     parser.add_argument(
 | |
|         '--email-report',
 | |
|         dest='email_report',
 | |
|         nargs='*',
 | |
|         default="",
 | |
|         help="A email addresses to send the report to.")
 | |
|     parser.add_argument(
 | |
|         '--sender',
 | |
|         dest='sender',
 | |
|         default="",
 | |
|         help="The email address to use in 'From' on messages emailed out.")
 | |
|     parser.add_argument(
 | |
|         '--email-addresses',
 | |
|         dest='email_addresses',
 | |
|         nargs='*',
 | |
|         help="The email addresses (as known by LLVM git) of " +
 | |
|         "the people to look for reviews for.")
 | |
|     parser.add_argument('--verbose', '-v', action='count')
 | |
| 
 | |
|     args = parser.parse_args()
 | |
| 
 | |
|     if args.verbose >= 1:
 | |
|         logging.basicConfig(level=logging.DEBUG)
 | |
| 
 | |
|     people_to_look_for = [e.decode('utf-8') for e in args.email_addresses]
 | |
|     logging.debug("Will look for reviews that following contributors could " +
 | |
|                   "review: {}".format(people_to_look_for))
 | |
|     logging.debug("Will email a report to: {}".format(args.email_report))
 | |
| 
 | |
|     phab = init_phab_connection()
 | |
| 
 | |
|     if args.update_cache:
 | |
|         update_cache(phab)
 | |
| 
 | |
|     load_cache()
 | |
|     update_git_repos()
 | |
|     msg = print_most_recent_reviews(
 | |
|         phab,
 | |
|         days=1,
 | |
|         filter_reviewers=filter_reviewers_to_report_for(people_to_look_for))
 | |
| 
 | |
|     if args.email_report != []:
 | |
|         send_emails(args.email_report, args.sender, msg)
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     main()
 |