Ensure old tab state is cleared between iterations of run-benchmark
[WebKit-https.git] / Tools / Scripts / validate-committer-lists
1 #!/usr/bin/env python
2
3 # Copyright (c) 2009, Google Inc. All rights reserved.
4 #
5 # Redistribution and use in source and binary forms, with or without
6 # modification, are permitted provided that the following conditions are
7 # met:
8
9 #     * Redistributions of source code must retain the above copyright
10 # notice, this list of conditions and the following disclaimer.
11 #     * Redistributions in binary form must reproduce the above
12 # copyright notice, this list of conditions and the following disclaimer
13 # in the documentation and/or other materials provided with the
14 # distribution.
15 #     * Neither the name of Google Inc. nor the names of its
16 # contributors may be used to endorse or promote products derived from
17 # this software without specific prior written permission.
18
19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 #
31 # Checks Python's known list of committers against lists.webkit.org and SVN history.
32
33
34 import logging
35 import os
36 import subprocess
37 import re
38 import urllib2
39 from datetime import date, datetime, timedelta
40 from optparse import OptionParser
41
42 from webkitpy.common.config.committers import CommitterList
43 from webkitpy.common.checkout.changelog import ChangeLogEntry
44 from webkitpy.common.checkout.scm import Git
45 from webkitpy.common.net.bugzilla import Bugzilla
46
47 # WebKit includes a built copy of BeautifulSoup in Scripts/webkitpy
48 # so this import should always succeed.
49 from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup
50
51 _log = logging.getLogger(__name__)
52
53 def print_list_if_non_empty(title, list_to_print):
54     if not list_to_print:
55         return
56     print # Newline before the list
57     print title
58     for item in list_to_print:
59         print item
60
61
62 class CommitterListFromMailingList(object):
63     committers_list_url = "http://lists.webkit.org/mailman/roster/webkit-committers"
64     reviewers_list_url = "http://lists.webkit.org/mailman/roster/webkit-reviewers"
65
66     def _fetch_emails_from_page(self, url):
67         page = urllib2.urlopen(url)
68         soup = BeautifulSoup(page)
69
70         emails = []
71         # Grab the cells in the first column (which happens to be the bug ids).
72         for email_item in soup('li'):
73             email_link = email_item.find("a")
74             email = email_link.string.replace(" at ", "@") # The email is obfuscated using " at " instead of "@".
75             emails.append(email)
76         return emails
77
78     @staticmethod
79     def _commiters_not_found_in_email_list(committers, emails):
80         missing_from_mailing_list = []
81         for committer in committers:
82             for email in committer.emails:
83                 if email in emails:
84                     break
85             else:
86                 missing_from_mailing_list.append(committer)
87         return missing_from_mailing_list
88
89     @staticmethod
90     def _emails_not_found_in_committer_list(committers, emails):
91         email_to_committer_map = {}
92         for committer in committers:
93             for email in committer.emails:
94                 email_to_committer_map[email] = committer
95
96         return filter(lambda email: not email_to_committer_map.get(email), emails)
97
98     def check_for_emails_missing_from_list(self, committer_list):
99         committer_emails = self._fetch_emails_from_page(self.committers_list_url)
100         list_name = "webkit-committers@lists.webkit.org"
101
102         missing_from_mailing_list = self._commiters_not_found_in_email_list(committer_list.committers(), committer_emails)
103         print_list_if_non_empty("Committers missing from %s:" % list_name, missing_from_mailing_list)
104
105         users_missing_from_committers = self._emails_not_found_in_committer_list(committer_list.committers(), committer_emails)
106         print_list_if_non_empty("Subcribers to %s missing from contributors.json:" % list_name, users_missing_from_committers)
107
108
109         reviewer_emails = self._fetch_emails_from_page(self.reviewers_list_url)
110         list_name = "webkit-reviewers@lists.webkit.org"
111
112         missing_from_mailing_list = self._commiters_not_found_in_email_list(committer_list.reviewers(), reviewer_emails)
113         print_list_if_non_empty("Reviewers missing from %s:" % list_name, missing_from_mailing_list)
114
115         missing_from_reviewers = self._emails_not_found_in_committer_list(committer_list.reviewers(), reviewer_emails)
116         print_list_if_non_empty("Subcribers to %s missing from reviewers in contributors.json:" % list_name, missing_from_reviewers)
117
118         missing_from_committers = self._emails_not_found_in_committer_list(committer_list.committers(), reviewer_emails)
119         print_list_if_non_empty("Subcribers to %s completely missing from contributors.json:" % list_name, missing_from_committers)
120
121
122 class CommitterListFromGit(object):
123     login_to_email_address = {
124         'aliceli1' : 'alice.liu@apple.com',
125         'bdash' : 'mrowe@apple.com',
126         'bdibello' : 'bdibello@apple.com', # Bruce DiBello, only 4 commits: r10023, r9548, r9538, r9535
127         'cblu' : 'cblu@apple.com',
128         'cpeterse' : 'cpetersen@apple.com',
129         'eseidel' : 'eric@webkit.org',
130         'gdennis' : 'gdennis@webkit.org',
131         'goldsmit' : 'goldsmit@apple.com', # Debbie Goldsmith, only one commit r8839
132         'gramps' : 'gramps@apple.com',
133         'honeycutt' : 'jhoneycutt@apple.com',
134         'jdevalk' : 'joost@webkit.org',
135         'jens' : 'jens@apple.com',
136         'justing' : 'justin.garcia@apple.com',
137         'kali' : 'kali@apple.com', # Christy Warren, did BIDI work, 5 commits: r8815, r8802, r8801, r8791, r8773, r8603
138         'kjk' : 'kkowalczyk@gmail.com',
139         'kmccullo' : 'kmccullough@apple.com',
140         'kocienda' : 'kocienda@apple.com',
141         'lamadio' : 'lamadio@apple.com', # Lou Amadio, only 2 commits: r17949 and r17783
142         'lars' : 'lars@kde.org',
143         'lweintraub' : 'lweintraub@apple.com',
144         'lypanov' : 'lypanov@kde.org',
145         'mhay' : 'mhay@apple.com', # Mike Hay, 3 commits: r3813, r2552, r2548
146         'ouch' : 'ouch@apple.com', # John Louch
147         'pyeh' : 'patti@apple.com', # Patti Yeh, did VoiceOver work in WebKit
148         'rjw' : 'rjw@apple.com',
149         'seangies' : 'seangies@apple.com', # Sean Gies?, only 5 commits: r16600, r16592, r16511, r16489, r16484
150         'sheridan' : 'sheridan@apple.com', # Shelly Sheridan
151         'thatcher' : 'timothy@apple.com',
152         'tomernic' : 'timo@apple.com',
153         'trey' : 'trey@usa.net',
154         'tristan' : 'tristan@apple.com',
155         'vicki' : 'vicki@apple.com',
156         'voas' : 'voas@apple.com', # Ed Voas, did some Carbon work in WebKit
157         'zack' : 'zack@kde.org',
158         'zimmermann' : 'zimmermann@webkit.org',
159     }
160
161     def __init__(self):
162         self._last_commit_time_by_author_cache = {}
163
164     def _fetch_authors_and_last_commit_time_from_git_log(self):
165         last_commit_dates = {}
166         git_log_args = ['git', 'log', '--reverse', '--pretty=format:%ae %at']
167         process = subprocess.Popen(git_log_args, stdout=subprocess.PIPE)
168
169         # eric@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc 1257090899
170         line_regexp = re.compile("^(?P<author>.+)@\S+ (?P<timestamp>\d+)$")
171         while True:
172             output_line = process.stdout.readline()
173             if output_line == '' and process.poll() != None:
174                 return last_commit_dates
175
176             match_result = line_regexp.match(output_line)
177             if not match_result:
178                 _log.error("Failed to match line: %s" % output_line)
179                 exit(1)
180             last_commit_dates[match_result.group('author')] = float(match_result.group('timestamp'))
181
182     def _fill_in_emails_for_old_logins(self):
183         authors_missing_email = filter(lambda author: author.find('@') == -1, self._last_commit_time_by_author_cache)
184         authors_with_email = filter(lambda author: author.find('@') != -1, self._last_commit_time_by_author_cache)
185         prefixes_of_authors_with_email = map(lambda author: author.split('@')[0], authors_with_email)
186
187         for author in authors_missing_email:
188             # First check to see if we have a manual mapping from login to email.
189             author_email = self.login_to_email_address.get(author)
190
191             # Most old logins like 'darin' are now just 'darin@apple.com', so check for a prefix match if a manual mapping was not found.
192             if not author_email and author in prefixes_of_authors_with_email:
193                 author_email_index = prefixes_of_authors_with_email.index(author)
194                 author_email = authors_with_email[author_email_index]
195
196             if not author_email:
197                 # No known email mapping, likely not an active committer.  We could log here.
198                 continue
199
200             # _log.info("%s -> %s" % (author, author_email)) # For sanity checking.
201             no_email_commit_time = self._last_commit_time_by_author_cache.get(author)
202             email_commit_time = self._last_commit_time_by_author_cache.get(author_email)
203             # We compare the timestamps for extra sanity even though we could assume commits before email address were used for login are always going to be older.
204             if not email_commit_time or email_commit_time < no_email_commit_time:
205                 self._last_commit_time_by_author_cache[author_email] = no_email_commit_time
206             del self._last_commit_time_by_author_cache[author]
207
208     def _last_commit_by_author(self):
209         if not self._last_commit_time_by_author_cache:
210             self._last_commit_time_by_author_cache = self._fetch_authors_and_last_commit_time_from_git_log()
211             self._fill_in_emails_for_old_logins()
212             del self._last_commit_time_by_author_cache['(no author)'] # The initial svn import isn't very useful.
213         return self._last_commit_time_by_author_cache
214
215     @staticmethod
216     def _print_three_column_row(widths, values):
217         print "%s%s%s" % (values[0].ljust(widths[0]), values[1].ljust(widths[1]), values[2])
218
219     def possibly_expired_committers(self, committer_list):
220         authors_and_last_commits = self._last_commit_by_author().items()
221         authors_and_last_commits.sort(lambda a,b: cmp(a[1], b[1]), reverse=True)
222         committer_cutof = date.today() - timedelta(days=365)
223         retired_authors_and_last_commits = []
224         for (author, last_commit) in authors_and_last_commits:
225             last_commit_date = date.fromtimestamp(last_commit)
226             if committer_cutof > last_commit_date:
227                 retired_authors_and_last_commits.append((author, last_commit))
228         return retired_authors_and_last_commits
229
230     def possibly_inactive_reviewers(self, committer_list):
231         git_log_args = ['git', 'log', '--since=1.year']
232         process = subprocess.Popen(git_log_args, stdout=subprocess.PIPE)
233         git_output, err = process.communicate()
234
235         comment_regex = re.compile(r'^Date: .+?\n+(.+?)(?:^commit |\Z)', re.MULTILINE | re.DOTALL)
236         reviewed_by_regexp = re.compile(ChangeLogEntry.reviewed_by_regexp, re.MULTILINE)
237         
238         reviewers = committer_list.reviewers()
239
240         for comment in comment_regex.findall(git_output):
241             reviewer_match = reviewed_by_regexp.search(comment)
242             if reviewer_match:
243                 reviewers_text = reviewer_match.group('reviewer').decode('utf-8', 'backslashreplace')
244                 # reviewers might be something like "Darin Adler and Dave Hyatt".
245                 # Rather than trying to fuzzy match names, find known reviewers and remove them from the list.
246                 for reviewer in reviewers:
247                     if reviewer.mentioned_in_text(reviewers_text):
248                         reviewers.remove(reviewer)
249                         break
250
251         return reviewers
252
253     def print_possibly_expired_committers(self, committer_list):
254         retired_authors_and_last_commits = self.possibly_expired_committers(committer_list)
255         column_widths = [13, 25]
256         print
257         print "Committers who have not committed within one year:"
258         self._print_three_column_row(column_widths, ("Last Commit", "Committer Email", "Committer Record"))
259         for (author, last_commit) in retired_authors_and_last_commits:
260             committer_record = committer_list.committer_by_email(author)
261             last_commit_date = date.fromtimestamp(last_commit)
262             self._print_three_column_row(column_widths, (str(last_commit_date), author, committer_record))
263
264     def print_possibly_inactive_reviewers(self, committer_list):
265         inactive_reviewers = self.possibly_inactive_reviewers(committer_list)
266         
267         column_widths = [13, 25]
268         print
269         print "Reviewers who have not reviewed within one year:"
270         for contributor in inactive_reviewers:
271             print "\"{}\" {}".format(contributor.full_name.encode("utf-8"), contributor.bugzilla_email())
272
273     def print_committers_missing_from_committer_list(self, committer_list):
274         missing_from_contributors_json = []
275         last_commit_time_by_author = self._last_commit_by_author()
276         for author in last_commit_time_by_author:
277             if not committer_list.committer_by_email(author):
278                 missing_from_contributors_json.append(author)
279
280         never_committed = []
281         for committer in committer_list.committers():
282             for email in committer.emails:
283                 if last_commit_time_by_author.get(email):
284                     break
285             else:
286                 never_committed.append(committer)
287
288         print_list_if_non_empty("Historical committers missing from contributors.json:", missing_from_contributors_json)
289         print_list_if_non_empty("Committers in contributors.json who have never committed:", never_committed)
290
291
292 class CommitterListBugzillaChecker(object):
293     def __init__(self):
294         self._bugzilla = Bugzilla()
295
296     def _has_invalid_bugzilla_email(self, committer):
297         return self._bugzilla.queries.is_invalid_bugzilla_email(committer.bugzilla_email())
298
299     def print_committers_with_invalid_bugzilla_emails(self, committer_list):
300         print # Print a newline before we start hitting bugzilla (it logs about logging in).
301         print "Checking committer emails against bugzilla (this will take a long time)"
302         committers_with_invalid_bugzilla_email = filter(self._has_invalid_bugzilla_email, committer_list.committers())
303         print_list_if_non_empty("Committers with invalid bugzilla email:", committers_with_invalid_bugzilla_email)
304
305
306 def main():
307     parser = OptionParser()
308     parser.add_option("-b", "--check-bugzilla-emails", action="store_true", help="Check the bugzilla_email for each committer against bugs.webkit.org")
309     parser.add_option("-d", "--dump", action="store_true", help="Dump the contributor list as JSON to stdout (suitable for saving to contributors.json)")
310     parser.add_option("-c", "--canonicalize", action="store_true", help="Canonicalize contributors.json, rewriting it in-place")
311
312     (options, args) = parser.parse_args()
313
314     committer_list = CommitterList()
315     if options.dump:
316         print committer_list.as_json()
317         return 0;
318
319     if options.canonicalize:
320         print "Updating contributors.json in-place..."
321         committer_list.reformat_in_place()
322         print "Done"
323         return 0;
324
325     CommitterListFromMailingList().check_for_emails_missing_from_list(committer_list)
326  
327     if not Git.in_working_directory("."):
328         print """\n\nWARNING: validate-committer-lists requires a git checkout.
329 The following checks are disabled:
330  - List of inactive committers
331  - List of inactive reviewers
332  - List of historical committers missing from contributors.json
333 """
334         return 1
335
336     svn_committer_list = CommitterListFromGit()
337     svn_committer_list.print_possibly_expired_committers(committer_list)
338     svn_committer_list.print_possibly_inactive_reviewers(committer_list)
339     svn_committer_list.print_committers_missing_from_committer_list(committer_list)
340
341     if options.check_bugzilla_emails:
342         CommitterListBugzillaChecker().print_committers_with_invalid_bugzilla_emails(committer_list)
343
344
345 if __name__ == "__main__":
346     main()