4975b605449d57bbaa3437b19d6ed4cc51391534
[WebKit-https.git] / Tools / Scripts / webkitpy / common / checkout / changelog.py
1 # Copyright (C) 2009, Google Inc. All rights reserved.
2 #
3 # Redistribution and use in source and binary forms, with or without
4 # modification, are permitted provided that the following conditions are
5 # met:
6 #
7 #     * Redistributions of source code must retain the above copyright
8 # notice, this list of conditions and the following disclaimer.
9 #     * Redistributions in binary form must reproduce the above
10 # copyright notice, this list of conditions and the following disclaimer
11 # in the documentation and/or other materials provided with the
12 # distribution.
13 #     * Neither the name of Google Inc. nor the names of its
14 # contributors may be used to endorse or promote products derived from
15 # this software without specific prior written permission.
16 #
17 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #
29 # WebKit's Python module for parsing and modifying ChangeLog files
30
31 import logging
32 import re
33 from StringIO import StringIO
34 import textwrap
35
36 from webkitpy.common.config.committers import CommitterList
37 from webkitpy.common.config.committers import Account
38 from webkitpy.common.system.filesystem import FileSystem
39 import webkitpy.common.config.urls as config_urls
40
41 _log = logging.getLogger(__name__)
42
43
44 # FIXME: parse_bug_id_from_changelog should not be a free function.
45 # Parse the bug ID out of a Changelog message based on the format that is
46 # used by prepare-ChangeLog
47 def parse_bug_id_from_changelog(message):
48     if not message:
49         return None
50     match = re.search("^\s*" + config_urls.bug_url_short + "$", message, re.MULTILINE)
51     if match:
52         return int(match.group('bug_id'))
53     match = re.search("^\s*" + config_urls.bug_url_long + "$", message, re.MULTILINE)
54     if match:
55         return int(match.group('bug_id'))
56     # We weren't able to find a bug URL in the format used by prepare-ChangeLog. Fall back to the
57     # first bug URL found anywhere in the message.
58     return config_urls.parse_bug_id(message)
59
60
61 class ChangeLogEntry(object):
62     # e.g. 2009-06-03  Eric Seidel  <eric@webkit.org>
63     date_line_regexp = r'^(?P<date>\d{4}-\d{2}-\d{2})\s+(?P<authors>(?P<name>[^<]+?)\s+<(?P<email>[^<>]+)>.*?)$'
64
65     # e.g. * Source/WebCore/page/EventHandler.cpp: Implement FooBarQuux.
66     touched_files_regexp = r'^\s*\*\s*(?P<file>[A-Za-z0-9_\-\./\\]+)\s*\:'
67     # e.g. (ChangeLogEntry.touched_functions): Added.
68     touched_functions_regexp = r'^\s*\((?P<function>[^)]*)\):'
69
70     # e.g. Reviewed by Darin Adler.
71     # (Discard everything after the first period to match more invalid lines.)
72     reviewed_by_regexp = r'^\s*((\w+\s+)+and\s+)?(Review|Rubber(\s*|-)stamp)(s|ed)?\s+([a-z]+\s+)*?by\s+(?P<reviewer>.*?)[\.,]?\s*$'
73
74     reviewed_byless_regexp = r'^\s*((Review|Rubber(\s*|-)stamp)(s|ed)?|RS)(\s+|\s*=\s*)(?P<reviewer>([A-Z]\w+\s*)+)[\.,]?\s*$'
75
76     reviewer_name_noise_regexp = re.compile(r"""
77     (\s+((tweaked\s+)?and\s+)?(landed|committed|okayed)\s+by.+) # "landed by", "commented by", etc...
78     |(^(Reviewed\s+)?by\s+) # extra "Reviewed by" or "by"
79     |([(<]\s*[\w_\-\.]+@[\w_\-\.]+[>)]) # email addresses
80     |([(<](https?://?bugs.)webkit.org[^>)]+[>)]) # bug url
81     |("[^"]+") # wresler names like 'Sean/Shawn/Shaun' in 'Geoffrey "Sean/Shawn/Shaun" Garen'
82     |('[^']+') # wresler names like "The Belly" in "Sam 'The Belly' Weinig"
83     |((Mr|Ms|Dr|Mrs|Prof)\.(\s+|$))
84     """, re.IGNORECASE | re.VERBOSE)
85
86     reviewer_name_casesensitive_noise_regexp = re.compile(r"""
87     ((\s+|^)(and\s+)?([a-z-]+\s+){5,}by\s+) # e.g. "and given a good once-over by"
88     |(\(\s*(?!(and|[A-Z])).+\)) # any parenthesis that doesn't start with "and" or a capital letter
89     |(with(\s+[a-z-]+)+) # phrases with "with no hesitation" in "Sam Weinig with no hesitation"
90     """, re.VERBOSE)
91
92     reviewer_name_noise_needing_a_backreference_regexp = re.compile(r"""
93     (\S\S)\.(?:(\s.+|$)) # Text after the two word characters (don't match initials) and a period followed by a space.
94     """, re.IGNORECASE | re.VERBOSE)
95
96     nobody_regexp = re.compile(r"""(\s+|^)nobody(
97     ((,|\s+-)?\s+(\w+\s+)+fix.*) # e.g. nobody, build fix...
98     |(\s*\([^)]+\).*) # NOBODY (..)...
99     |$)""", re.IGNORECASE | re.VERBOSE)
100
101     # e.g. == Rolled over to ChangeLog-2011-02-16 ==
102     rolled_over_regexp = r'^== Rolled over to ChangeLog-\d{4}-\d{2}-\d{2} ==$'
103
104     # e.g. git-svn-id: http://svn.webkit.org/repository/webkit/trunk@96161 268f45cc-cd09-0410-ab3c-d52691b4dbfc
105     svn_id_regexp = r'git-svn-id: http://svn.webkit.org/repository/webkit/trunk@(?P<svnid>\d+) '
106
107     def __init__(self, contents, committer_list=CommitterList(), revision=None):
108         self._contents = contents
109         self._committer_list = committer_list
110         self._revision = revision
111         self._parse_entry()
112
113     @classmethod
114     def _parse_reviewer_text(cls, text):
115         match = re.search(ChangeLogEntry.reviewed_by_regexp, text, re.MULTILINE | re.IGNORECASE)
116         if not match:
117             # There are cases where people omit "by". We match it only if reviewer part looked nice
118             # in order to avoid matching random lines that start with Reviewed
119             match = re.search(ChangeLogEntry.reviewed_byless_regexp, text, re.MULTILINE | re.IGNORECASE)
120         if not match:
121             return None, None
122
123         reviewer_text = match.group("reviewer")
124
125         reviewer_text = ChangeLogEntry.nobody_regexp.sub('', reviewer_text)
126         reviewer_text = ChangeLogEntry.reviewer_name_noise_regexp.sub('', reviewer_text)
127         reviewer_text = ChangeLogEntry.reviewer_name_casesensitive_noise_regexp.sub('', reviewer_text)
128         reviewer_text = ChangeLogEntry.reviewer_name_noise_needing_a_backreference_regexp.sub(r'\1', reviewer_text)
129         reviewer_text = reviewer_text.replace('(', '').replace(')', '')
130         reviewer_text = re.sub(r'\s\s+|[,.]\s*$', ' ', reviewer_text).strip()
131         if not len(reviewer_text):
132             return None, None
133
134         reviewer_list = ChangeLogEntry._split_contributor_names(reviewer_text)
135
136         # Get rid of "reviewers" like "even though this is just a..." in "Reviewed by Sam Weinig, even though this is just a..."
137         # and "who wrote the original code" in "Noam Rosenthal, who wrote the original code"
138         reviewer_list = [reviewer for reviewer in reviewer_list if not re.match('^who\s|^([a-z]+(\s+|\.|$)){6,}$', reviewer)]
139
140         return reviewer_text, reviewer_list
141
142     @classmethod
143     def _split_contributor_names(cls, text):
144         return re.split(r'\s*(?:,(?:\s+and\s+|&)?|(?:^|\s+)and\s+|&&|[/+&])\s*', text)
145
146     def _fuzz_match_reviewers(self, reviewers_text_list):
147         if not reviewers_text_list:
148             return []
149         list_of_reviewers = [self._committer_list.contributors_by_fuzzy_match(reviewer)[0] for reviewer in reviewers_text_list]
150         # Flatten lists and get rid of any reviewers with more than one candidate.
151         return [reviewers[0] for reviewers in list_of_reviewers if len(reviewers) == 1]
152
153     @classmethod
154     def _parse_author_name_and_email(cls, author_name_and_email):
155         match = re.match(r'(?P<name>.+?)\s+<(?P<email>[^>]+)>', author_name_and_email)
156         return {'name': match.group("name"), 'email': match.group("email")}
157
158     @classmethod
159     def _parse_author_text(cls, text):
160         if not text:
161             return []
162         authors = cls._split_contributor_names(text)
163         assert(authors and len(authors) >= 1)
164         return [cls._parse_author_name_and_email(author) for author in authors]
165
166     @classmethod
167     def _parse_touched_functions(cls, text):
168         result = {}
169         cur_file = None
170         for line in text.splitlines():
171             file_match = re.match(cls.touched_files_regexp, line)
172             if file_match:
173                 cur_file = file_match.group("file")
174                 result[cur_file] = []
175             func_match = re.match(cls.touched_functions_regexp, line)
176             if func_match and cur_file:
177                 result[cur_file].append(func_match.group("function"))
178         return result
179
180     @classmethod
181     def _parse_bug_description(cls, text):
182         # If line 4 is a bug url, line 3 is the bug description.
183         # It's too hard to guess in other cases, so we return None.
184         lines = text.splitlines()
185         if len(lines) < 4:
186             return None
187         for bug_url in (config_urls.bug_url_short, config_urls.bug_url_long):
188             if re.match("^\s*" + bug_url + "$", lines[3]):
189                 return lines[2].strip()
190         return None
191
192     def _parse_entry(self):
193         match = re.match(self.date_line_regexp, self._contents, re.MULTILINE)
194         if not match:
195             _log.warning("Creating invalid ChangeLogEntry:\n%s" % self._contents)
196
197         self._date_line = match.group()
198         self._bug_description = self._parse_bug_description(self._contents)
199
200         # FIXME: group("name") does not seem to be Unicode?  Probably due to self._contents not being unicode.
201         self._author_text = match.group("authors") if match else None
202         self._authors = ChangeLogEntry._parse_author_text(self._author_text)
203
204         self._reviewer_text, self._reviewers_text_list = ChangeLogEntry._parse_reviewer_text(self._contents)
205         self._reviewers = self._fuzz_match_reviewers(self._reviewers_text_list)
206         self._author = self._committer_list.contributor_by_email(self.author_email()) or self._committer_list.contributor_by_name(self.author_name())
207
208         self._touched_files = re.findall(self.touched_files_regexp, self._contents, re.MULTILINE)
209         self._touched_functions = self._parse_touched_functions(self._contents)
210
211     def date_line(self):
212         return self._date_line
213
214     def author_text(self):
215         return self._author_text
216
217     def revision(self):
218         return self._revision
219
220     def author_name(self):
221         return self._authors[0]['name']
222
223     def author_email(self):
224         return self._authors[0]['email']
225
226     def author(self):
227         return self._author  # Might be None
228
229     def authors(self):
230         return self._authors
231
232     # FIXME: Eventually we would like to map reviwer names to reviewer objects.
233     # See https://bugs.webkit.org/show_bug.cgi?id=26533
234     def reviewer_text(self):
235         return self._reviewer_text
236
237     # Might be None, might also not be a Reviewer!
238     def reviewer(self):
239         return self._reviewers[0] if len(self._reviewers) > 0 else None
240
241     def reviewers(self):
242         return self._reviewers
243
244     def has_valid_reviewer(self):
245         if self._reviewers_text_list:
246             for reviewer in self._reviewers_text_list:
247                 reviewer = self._committer_list.committer_by_name(reviewer)
248                 if reviewer:
249                     return True
250         return bool(re.search("unreviewed", self._contents, re.IGNORECASE))
251
252     def contents(self):
253         return self._contents
254
255     def bug_id(self):
256         return parse_bug_id_from_changelog(self._contents)
257
258     def bug_description(self):
259         return self._bug_description
260
261     def touched_files(self):
262         return self._touched_files
263
264     # Returns a dict from file name to lists of function names.
265     def touched_functions(self):
266         return self._touched_functions
267
268     def touched_files_text(self):
269         match = re.search(self.touched_files_regexp, self._contents, re.MULTILINE)
270         return self._contents[match.start():].lstrip("\n\r") if match else ""
271
272     # Determine if any text has been added to the section on touched files
273     def is_touched_files_text_clean(self):
274         file_line_end = r"( (Added|Removed|(Copied|Renamed) from [A-Za-z0-9_\-./\\]+).)?$"
275         for line in self.touched_files_text().splitlines():
276             if re.match(self.touched_files_regexp + file_line_end, line):
277                 continue
278             if re.match(self.touched_functions_regexp + "$", line):
279                 continue
280             return False
281         return True
282
283 # FIXME: Various methods on ChangeLog should move into ChangeLogEntry instead.
284 class ChangeLog(object):
285
286     def __init__(self, path, filesystem=None):
287         self.path = path
288         self._filesystem = filesystem or FileSystem()
289
290     _changelog_indent = " " * 8
291
292     @classmethod
293     def parse_latest_entry_from_file(cls, changelog_file):
294         try:
295             return next(cls.parse_entries_from_file(changelog_file))
296         except StopIteration, e:
297             return None
298
299     svn_blame_regexp = re.compile(r'^(\s*(?P<revision>\d+) [^ ]+)\s*(?P<line>.*?\n)')
300
301     @classmethod
302     def _separate_revision_and_line(cls, line):
303         match = cls.svn_blame_regexp.match(line)
304         if not match:
305             return None, line
306         return int(match.group('revision')), match.group('line')
307
308     @classmethod
309     def parse_entries_from_file(cls, changelog_file):
310         """changelog_file must be a file-like object which returns
311         unicode strings, e.g. from StringIO(unicode()) or
312         fs.open_text_file_for_reading()"""
313         date_line_regexp = re.compile(ChangeLogEntry.date_line_regexp)
314         rolled_over_regexp = re.compile(ChangeLogEntry.rolled_over_regexp)
315
316         # The first line should be a date line.
317         revision, first_line = cls._separate_revision_and_line(changelog_file.readline())
318         assert(isinstance(first_line, unicode))
319         if not date_line_regexp.match(cls.svn_blame_regexp.sub('', first_line)):
320             raise StopIteration
321
322         entry_lines = [first_line]
323         revisions_in_entry = {revision: 1} if revision != None else None
324         for line in changelog_file:
325             if revisions_in_entry:
326                 revision, line = cls._separate_revision_and_line(line)
327
328             if rolled_over_regexp.match(line):
329                 break
330
331             if date_line_regexp.match(line):
332                 most_probable_revision = max(revisions_in_entry, key=revisions_in_entry.__getitem__) if revisions_in_entry else None
333                 # Remove the extra newline at the end
334                 yield ChangeLogEntry(''.join(entry_lines[:-1]), revision=most_probable_revision)
335                 entry_lines = []
336                 revisions_in_entry = {revision: 0}
337
338             entry_lines.append(line)
339             if revisions_in_entry:
340                 revisions_in_entry[revision] = revisions_in_entry.get(revision, 0) + 1
341
342         most_probable_revision = max(revisions_in_entry, key=revisions_in_entry.__getitem__) if revisions_in_entry else None
343         yield ChangeLogEntry(''.join(entry_lines[:-1]), revision=most_probable_revision)
344
345     def latest_entry(self):
346         # ChangeLog files are always UTF-8, we read them in as such to support Reviewers with unicode in their names.
347         changelog_file = self._filesystem.open_text_file_for_reading(self.path)
348         try:
349             return self.parse_latest_entry_from_file(changelog_file)
350         finally:
351             changelog_file.close()
352
353     # _wrap_line and _wrap_lines exist to work around
354     # http://bugs.python.org/issue1859
355
356     def _wrap_line(self, line):
357         return textwrap.fill(line,
358                              width=70,
359                              initial_indent=self._changelog_indent,
360                              # Don't break urls which may be longer than width.
361                              break_long_words=False,
362                              subsequent_indent=self._changelog_indent)
363
364     # Workaround as suggested by guido in
365     # http://bugs.python.org/issue1859#msg60040
366
367     def _wrap_lines(self, message):
368         lines = [self._wrap_line(line) for line in message.splitlines()]
369         return "\n".join(lines)
370
371     def update_with_unreviewed_message(self, message):
372         first_boilerplate_line_regexp = re.compile(
373                 "%sNeed a short description \(OOPS!\)\." % self._changelog_indent)
374         removing_boilerplate = False
375         result = StringIO()
376         with self._filesystem.open_text_file_for_reading(self.path) as file:
377             for line in file:
378                 if first_boilerplate_line_regexp.search(line):
379                     message_lines = self._wrap_lines(message)
380                     result.write(first_boilerplate_line_regexp.sub(message_lines, line))
381                     # Remove all the ChangeLog boilerplate before the first changed
382                     # file.
383                     removing_boilerplate = True
384                 elif removing_boilerplate:
385                     if line.find('*') >= 0:  # each changed file is preceded by a *
386                         removing_boilerplate = False
387
388                 if not removing_boilerplate:
389                     result.write(line)
390         self._filesystem.write_text_file(self.path, result.getvalue())
391
392     def set_reviewer(self, reviewer):
393         latest_entry = self.latest_entry()
394         latest_entry_contents = latest_entry.contents()
395         reviewer_text = latest_entry.reviewer()
396         found_nobody = re.search("NOBODY\s*\(OOPS!\)", latest_entry_contents, re.MULTILINE)
397
398         if not found_nobody and not reviewer_text:
399             bug_url_number_of_items = len(re.findall(config_urls.bug_url_long, latest_entry_contents, re.MULTILINE))
400             bug_url_number_of_items += len(re.findall(config_urls.bug_url_short, latest_entry_contents, re.MULTILINE))
401             result = StringIO()
402             with self._filesystem.open_text_file_for_reading(self.path) as file:
403                 for line in file:
404                     found_bug_url = re.search(config_urls.bug_url_long, line)
405                     if not found_bug_url:
406                         found_bug_url = re.search(config_urls.bug_url_short, line)
407                     result.write(line)
408                     if found_bug_url:
409                         if bug_url_number_of_items == 1:
410                             result.write("\n        Reviewed by %s.\n" % reviewer)
411                         bug_url_number_of_items -= 1
412             self._filesystem.write_text_file(self.path, result.getvalue())
413         else:
414             data = self._filesystem.read_text_file(self.path)
415             newdata = data.replace("NOBODY (OOPS!)", reviewer)
416             self._filesystem.write_text_file(self.path, newdata)
417
418     def set_short_description_and_bug_url(self, short_description, bug_url):
419         message = "%s\n%s%s" % (short_description, self._changelog_indent, bug_url)
420         bug_boilerplate = "%sNeed the bug URL (OOPS!).\n" % self._changelog_indent
421         result = StringIO()
422         with self._filesystem.open_text_file_for_reading(self.path) as file:
423             for line in file:
424                 line = line.replace("Need a short description (OOPS!).", message)
425                 if line != bug_boilerplate:
426                     result.write(line)
427         self._filesystem.write_text_file(self.path, result.getvalue())
428
429     def delete_entries(self, num_entries):
430         date_line_regexp = re.compile(ChangeLogEntry.date_line_regexp)
431         rolled_over_regexp = re.compile(ChangeLogEntry.rolled_over_regexp)
432         entries = 0
433         result = StringIO()
434         with self._filesystem.open_text_file_for_reading(self.path) as file:
435             for line in file:
436                 if date_line_regexp.match(line):
437                     entries += 1
438                 elif rolled_over_regexp.match(line):
439                     entries = num_entries + 1
440                 if entries > num_entries:
441                     result.write(line)
442         self._filesystem.write_text_file(self.path, result.getvalue())
443
444     def prepend_text(self, text):
445         data = self._filesystem.read_text_file(self.path)
446         self._filesystem.write_text_file(self.path, text + data)