The list of contributors in committers.py should be a separate JSON
[WebKit-https.git] / Tools / Scripts / webkitpy / common / config / committers.py
1 # Copyright (c) 2011, Apple Inc. All rights reserved.
2 # Copyright (c) 2009, 2011, 2012 Google Inc. All rights reserved.
3 #
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are
6 # met:
7 #
8 #     * Redistributions of source code must retain the above copyright
9 # notice, this list of conditions and the following disclaimer.
10 #     * Redistributions in binary form must reproduce the above
11 # copyright notice, this list of conditions and the following disclaimer
12 # in the documentation and/or other materials provided with the
13 # distribution.
14 #     * Neither the name of Google Inc. nor the names of its
15 # contributors may be used to endorse or promote products derived from
16 # this software without specific prior written permission.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #
30 # WebKit's Python module for committer and reviewer validation.
31
32 import fnmatch
33 import json
34
35 from webkitpy.common.editdistance import edit_distance
36 from webkitpy.common.memoized import memoized
37 from webkitpy.common.system.filesystem import FileSystem
38
39
40 # The list of contributors have been moved to contributors.json
41
42
43 class Contributor(object):
44     def __init__(self, name, email_or_emails, irc_nickname_or_nicknames=None):
45         assert(name)
46         assert(email_or_emails)
47         self.full_name = name
48         if isinstance(email_or_emails, str):
49             self.emails = [email_or_emails]
50         else:
51             self.emails = email_or_emails
52         self.emails = map(lambda email: email.lower(), self.emails)  # Emails are case-insensitive.
53         if isinstance(irc_nickname_or_nicknames, str):
54             self.irc_nicknames = [irc_nickname_or_nicknames]
55         else:
56             self.irc_nicknames = irc_nickname_or_nicknames
57         self.can_commit = False
58         self.can_review = False
59
60     def bugzilla_email(self):
61         # FIXME: We're assuming the first email is a valid bugzilla email,
62         # which might not be right.
63         return self.emails[0]
64
65     def __str__(self):
66         return '"%s" <%s>' % (self.full_name, self.emails[0])
67
68     def contains_string(self, search_string):
69         string = search_string.lower()
70         if string in self.full_name.lower():
71             return True
72         if self.irc_nicknames:
73             for nickname in self.irc_nicknames:
74                 if string in nickname.lower():
75                     return True
76         for email in self.emails:
77             if string in email:
78                 return True
79         return False
80
81     def matches_glob(self, glob_string):
82         if fnmatch.fnmatch(self.full_name, glob_string):
83             return True
84         if self.irc_nicknames:
85             for nickname in self.irc_nicknames:
86                 if fnmatch.fnmatch(nickname, glob_string):
87                     return True
88         for email in self.emails:
89             if fnmatch.fnmatch(email, glob_string):
90                 return True
91         return False
92
93
94 class Committer(Contributor):
95     def __init__(self, name, email_or_emails, irc_nickname=None):
96         Contributor.__init__(self, name, email_or_emails, irc_nickname)
97         self.can_commit = True
98
99
100 class Reviewer(Committer):
101     def __init__(self, name, email_or_emails, irc_nickname=None):
102         Committer.__init__(self, name, email_or_emails, irc_nickname)
103         self.can_review = True
104
105
106 class CommitterList(object):
107
108     # Committers and reviewers are passed in to allow easy testing
109     def __init__(self,
110                  committers=[],
111                  reviewers=[],
112                  contributors=[]):
113         # FIXME: These arguments only exist for testing. Clean it up.
114         if not (committers or reviewers or contributors):
115             loaded_data = self.load_json()
116             contributors = loaded_data['Contributors']
117             committers = loaded_data['Committers']
118             reviewers = loaded_data['Reviewers']
119
120         self._contributors = contributors + committers + reviewers
121         self._committers = committers + reviewers
122         self._reviewers = reviewers
123         self._contributors_by_name = {}
124         self._accounts_by_email = {}
125         self._accounts_by_login = {}
126
127     @staticmethod
128     @memoized
129     def load_json():
130         filesystem = FileSystem()
131         json_path = filesystem.join(filesystem.dirname(filesystem.path_to_module('webkitpy.common.config')), 'contributors.json')
132         contributors = json.loads(filesystem.read_text_file(json_path))
133
134         return {
135             'Contributors': [Contributor(name, data.get('emails'), data.get('nicks')) for name, data in contributors['Contributors'].iteritems()],
136             'Committers': [Committer(name, data.get('emails'), data.get('nicks')) for name, data in contributors['Committers'].iteritems()],
137             'Reviewers': [Reviewer(name, data.get('emails'), data.get('nicks')) for name, data in contributors['Reviewers'].iteritems()],
138         }
139
140     def contributors(self):
141         return self._contributors
142
143     def committers(self):
144         return self._committers
145
146     def reviewers(self):
147         return self._reviewers
148
149     def _name_to_contributor_map(self):
150         if not len(self._contributors_by_name):
151             for contributor in self._contributors:
152                 assert(contributor.full_name)
153                 assert(contributor.full_name.lower() not in self._contributors_by_name)  # We should never have duplicate names.
154                 self._contributors_by_name[contributor.full_name.lower()] = contributor
155         return self._contributors_by_name
156
157     def _email_to_account_map(self):
158         if not len(self._accounts_by_email):
159             for account in self._contributors:
160                 for email in account.emails:
161                     assert(email not in self._accounts_by_email)  # We should never have duplicate emails.
162                     self._accounts_by_email[email] = account
163         return self._accounts_by_email
164
165     def _login_to_account_map(self):
166         if not len(self._accounts_by_login):
167             for account in self._contributors:
168                 if account.emails:
169                     login = account.bugzilla_email()
170                     assert(login not in self._accounts_by_login)  # We should never have duplicate emails.
171                     self._accounts_by_login[login] = account
172         return self._accounts_by_login
173
174     def _committer_only(self, record):
175         if record and not record.can_commit:
176             return None
177         return record
178
179     def _reviewer_only(self, record):
180         if record and not record.can_review:
181             return None
182         return record
183
184     def committer_by_name(self, name):
185         return self._committer_only(self.contributor_by_name(name))
186
187     def contributor_by_irc_nickname(self, irc_nickname):
188         for contributor in self.contributors():
189             # FIXME: This should do case-insensitive comparison or assert that all IRC nicknames are in lowercase
190             if contributor.irc_nicknames and irc_nickname in contributor.irc_nicknames:
191                 return contributor
192         return None
193
194     def contributors_by_search_string(self, string):
195         glob_matches = filter(lambda contributor: contributor.matches_glob(string), self.contributors())
196         return glob_matches or filter(lambda contributor: contributor.contains_string(string), self.contributors())
197
198     def contributors_by_email_username(self, string):
199         string = string + '@'
200         result = []
201         for contributor in self.contributors():
202             for email in contributor.emails:
203                 if email.startswith(string):
204                     result.append(contributor)
205                     break
206         return result
207
208     def _contributor_name_shorthands(self, contributor):
209         if ' ' not in contributor.full_name:
210             return []
211         split_fullname = contributor.full_name.split()
212         first_name = split_fullname[0]
213         last_name = split_fullname[-1]
214         return first_name, last_name, first_name + last_name[0], first_name + ' ' + last_name[0]
215
216     def _tokenize_contributor_name(self, contributor):
217         full_name_in_lowercase = contributor.full_name.lower()
218         tokens = [full_name_in_lowercase] + full_name_in_lowercase.split()
219         if contributor.irc_nicknames:
220             return tokens + [nickname.lower() for nickname in contributor.irc_nicknames if len(nickname) > 5]
221         return tokens
222
223     def contributors_by_fuzzy_match(self, string):
224         string_in_lowercase = string.lower()
225
226         # 1. Exact match for fullname, email and irc_nicknames
227         account = self.contributor_by_name(string_in_lowercase) or self.contributor_by_email(string_in_lowercase) or self.contributor_by_irc_nickname(string_in_lowercase)
228         if account:
229             return [account], 0
230
231         # 2. Exact match for email username (before @)
232         accounts = self.contributors_by_email_username(string_in_lowercase)
233         if accounts and len(accounts) == 1:
234             return accounts, 0
235
236         # 3. Exact match for first name, last name, and first name + initial combinations such as "Dan B" and "Tim H"
237         accounts = [contributor for contributor in self.contributors() if string in self._contributor_name_shorthands(contributor)]
238         if accounts and len(accounts) == 1:
239             return accounts, 0
240
241         # 4. Finally, fuzzy-match using edit-distance
242         string = string_in_lowercase
243         contributorWithMinDistance = []
244         minDistance = len(string) / 2 - 1
245         for contributor in self.contributors():
246             tokens = self._tokenize_contributor_name(contributor)
247             editdistances = [edit_distance(token, string) for token in tokens if abs(len(token) - len(string)) <= minDistance]
248             if not editdistances:
249                 continue
250             distance = min(editdistances)
251             if distance == minDistance:
252                 contributorWithMinDistance.append(contributor)
253             elif distance < minDistance:
254                 contributorWithMinDistance = [contributor]
255                 minDistance = distance
256         if not len(contributorWithMinDistance):
257             return [], len(string)
258         return contributorWithMinDistance, minDistance
259
260     def contributor_by_email(self, email):
261         return self._email_to_account_map().get(email.lower()) if email else None
262
263     def contributor_by_name(self, name):
264         return self._name_to_contributor_map().get(name.lower()) if name else None
265
266     def committer_by_email(self, email):
267         return self._committer_only(self.contributor_by_email(email))
268
269     def reviewer_by_email(self, email):
270         return self._reviewer_only(self.contributor_by_email(email))