Don't update author info in PrepareChangeLog and allow users to skip the PrepareChang...
[WebKit-https.git] / Tools / Scripts / webkitpy / common / checkout / changelog.py
1 # Copyright (C) 2009, Google Inc. All rights reserved.
2 #
3 # Redistribution and use in source and binary forms, with or without
4 # modification, are permitted provided that the following conditions are
5 # met:
6 #
7 #     * Redistributions of source code must retain the above copyright
8 # notice, this list of conditions and the following disclaimer.
9 #     * Redistributions in binary form must reproduce the above
10 # copyright notice, this list of conditions and the following disclaimer
11 # in the documentation and/or other materials provided with the
12 # distribution.
13 #     * Neither the name of Google Inc. nor the names of its
14 # contributors may be used to endorse or promote products derived from
15 # this software without specific prior written permission.
16 #
17 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #
29 # WebKit's Python module for parsing and modifying ChangeLog files
30
31 import logging
32 import re
33 from StringIO import StringIO
34 import textwrap
35
36 from webkitpy.common.config.committers import CommitterList
37 from webkitpy.common.config.committers import Account
38 from webkitpy.common.system.filesystem import FileSystem
39 import webkitpy.common.config.urls as config_urls
40
41 _log = logging.getLogger(__name__)
42
43
44 # FIXME: parse_bug_id_from_changelog should not be a free function.
45 # Parse the bug ID out of a Changelog message based on the format that is
46 # used by prepare-ChangeLog
47 def parse_bug_id_from_changelog(message):
48     if not message:
49         return None
50     match = re.search("^\s*" + config_urls.bug_url_short + "$", message, re.MULTILINE)
51     if match:
52         return int(match.group('bug_id'))
53     match = re.search("^\s*" + config_urls.bug_url_long + "$", message, re.MULTILINE)
54     if match:
55         return int(match.group('bug_id'))
56     # We weren't able to find a bug URL in the format used by prepare-ChangeLog. Fall back to the
57     # first bug URL found anywhere in the message.
58     return config_urls.parse_bug_id(message)
59
60
61 class ChangeLogEntry(object):
62     # e.g. 2009-06-03  Eric Seidel  <eric@webkit.org>
63     date_line_regexp = r'^(?P<date>\d{4}-\d{2}-\d{2})\s+(?P<authors>(?P<name>[^<]+?)\s+<(?P<email>[^<>]+)>.*?)$'
64
65     # e.g. * Source/WebCore/page/EventHandler.cpp: Implement FooBarQuux.
66     touched_files_regexp = r'^\s*\*\s*(?P<file>[A-Za-z0-9_\-\./\\]+)\s*\:'
67     # e.g. (ChangeLogEntry.touched_functions): Added.
68     touched_functions_regexp = r'^\s*\((?P<function>[^)]*)\):'
69
70     # e.g. Reviewed by Darin Adler.
71     # (Discard everything after the first period to match more invalid lines.)
72     reviewed_by_regexp = r'^\s*((\w+\s+)+and\s+)?(Review|Rubber(\s*|-)stamp)(s|ed)?\s+([a-z]+\s+)*?by\s+(?P<reviewer>.*?)[\.,]?\s*$'
73
74     reviewed_byless_regexp = r'^\s*((Review|Rubber(\s*|-)stamp)(s|ed)?|RS)(\s+|\s*=\s*)(?P<reviewer>([A-Z]\w+\s*)+)[\.,]?\s*$'
75
76     reviewer_name_noise_regexp = re.compile(r"""
77     (\s+((tweaked\s+)?and\s+)?(landed|committed|okayed)\s+by.+) # "landed by", "commented by", etc...
78     |(^(Reviewed\s+)?by\s+) # extra "Reviewed by" or "by"
79     |([(<]\s*[\w_\-\.]+@[\w_\-\.]+[>)]) # email addresses
80     |([(<](https?://?bugs.)webkit.org[^>)]+[>)]) # bug url
81     |("[^"]+") # wresler names like 'Sean/Shawn/Shaun' in 'Geoffrey "Sean/Shawn/Shaun" Garen'
82     |('[^']+') # wresler names like "The Belly" in "Sam 'The Belly' Weinig"
83     |((Mr|Ms|Dr|Mrs|Prof)\.(\s+|$))
84     """, re.IGNORECASE | re.VERBOSE)
85
86     reviewer_name_casesensitive_noise_regexp = re.compile(r"""
87     ((\s+|^)(and\s+)?([a-z-]+\s+){5,}by\s+) # e.g. "and given a good once-over by"
88     |(\(\s*(?!(and|[A-Z])).+\)) # any parenthesis that doesn't start with "and" or a capital letter
89     |(with(\s+[a-z-]+)+) # phrases with "with no hesitation" in "Sam Weinig with no hesitation"
90     """, re.VERBOSE)
91
92     reviewer_name_noise_needing_a_backreference_regexp = re.compile(r"""
93     (\S\S)\.(?:(\s.+|$)) # Text after the two word characters (don't match initials) and a period followed by a space.
94     """, re.IGNORECASE | re.VERBOSE)
95
96     nobody_regexp = re.compile(r"""(\s+|^)nobody(
97     ((,|\s+-)?\s+(\w+\s+)+fix.*) # e.g. nobody, build fix...
98     |(\s*\([^)]+\).*) # NOBODY (..)...
99     |$)""", re.IGNORECASE | re.VERBOSE)
100
101     # e.g. == Rolled over to ChangeLog-2011-02-16 ==
102     rolled_over_regexp = r'^== Rolled over to ChangeLog-\d{4}-\d{2}-\d{2} ==$'
103
104     # e.g. git-svn-id: http://svn.webkit.org/repository/webkit/trunk@96161 268f45cc-cd09-0410-ab3c-d52691b4dbfc
105     svn_id_regexp = r'git-svn-id: http://svn.webkit.org/repository/webkit/trunk@(?P<svnid>\d+) '
106
107     def __init__(self, contents, committer_list=CommitterList(), revision=None):
108         self._contents = contents
109         self._committer_list = committer_list
110         self._revision = revision
111         self._parse_entry()
112
113     @classmethod
114     def _parse_reviewer_text(cls, text):
115         match = re.search(ChangeLogEntry.reviewed_by_regexp, text, re.MULTILINE | re.IGNORECASE)
116         if not match:
117             # There are cases where people omit "by". We match it only if reviewer part looked nice
118             # in order to avoid matching random lines that start with Reviewed
119             match = re.search(ChangeLogEntry.reviewed_byless_regexp, text, re.MULTILINE | re.IGNORECASE)
120         if not match:
121             return None, None
122
123         reviewer_text = match.group("reviewer")
124
125         reviewer_text = ChangeLogEntry.nobody_regexp.sub('', reviewer_text)
126         reviewer_text = ChangeLogEntry.reviewer_name_noise_regexp.sub('', reviewer_text)
127         reviewer_text = ChangeLogEntry.reviewer_name_casesensitive_noise_regexp.sub('', reviewer_text)
128         reviewer_text = ChangeLogEntry.reviewer_name_noise_needing_a_backreference_regexp.sub(r'\1', reviewer_text)
129         reviewer_text = reviewer_text.replace('(', '').replace(')', '')
130         reviewer_text = re.sub(r'\s\s+|[,.]\s*$', ' ', reviewer_text).strip()
131         if not len(reviewer_text):
132             return None, None
133
134         reviewer_list = ChangeLogEntry._split_contributor_names(reviewer_text)
135
136         # Get rid of "reviewers" like "even though this is just a..." in "Reviewed by Sam Weinig, even though this is just a..."
137         # and "who wrote the original code" in "Noam Rosenthal, who wrote the original code"
138         reviewer_list = [reviewer for reviewer in reviewer_list if not re.match('^who\s|^([a-z]+(\s+|\.|$)){6,}$', reviewer)]
139
140         return reviewer_text, reviewer_list
141
142     @classmethod
143     def _split_contributor_names(cls, text):
144         return re.split(r'\s*(?:,(?:\s+and\s+|&)?|(?:^|\s+)and\s+|&&|[/+&])\s*', text)
145
146     def _fuzz_match_reviewers(self, reviewers_text_list):
147         if not reviewers_text_list:
148             return []
149         list_of_reviewers = [self._committer_list.contributors_by_fuzzy_match(reviewer)[0] for reviewer in reviewers_text_list]
150         # Flatten lists and get rid of any reviewers with more than one candidate.
151         return [reviewers[0] for reviewers in list_of_reviewers if len(reviewers) == 1]
152
153     @classmethod
154     def _parse_author_name_and_email(cls, author_name_and_email):
155         match = re.match(r'(?P<name>.+?)\s+<(?P<email>[^>]+)>', author_name_and_email)
156         return {'name': match.group("name"), 'email': match.group("email")}
157
158     @classmethod
159     def _parse_author_text(cls, text):
160         if not text:
161             return []
162         authors = cls._split_contributor_names(text)
163         assert(authors and len(authors) >= 1)
164         return [cls._parse_author_name_and_email(author) for author in authors]
165
166     @classmethod
167     def _parse_touched_functions(cls, text):
168         result = {}
169         cur_file = None
170         for line in text.splitlines():
171             file_match = re.match(cls.touched_files_regexp, line)
172             if file_match:
173                 cur_file = file_match.group("file")
174                 result[cur_file] = []
175             func_match = re.match(cls.touched_functions_regexp, line)
176             if func_match and cur_file:
177                 result[cur_file].append(func_match.group("function"))
178         return result
179
180     @classmethod
181     def _parse_bug_description(cls, text):
182         # If line 4 is a bug url, line 3 is the bug description.
183         # It's too hard to guess in other cases, so we return None.
184         lines = text.splitlines()
185         if len(lines) < 4:
186             return None
187         for bug_url in (config_urls.bug_url_short, config_urls.bug_url_long):
188             if re.match("^\s*" + bug_url + "$", lines[3]):
189                 return lines[2].strip()
190         return None
191
192     def _parse_entry(self):
193         match = re.match(self.date_line_regexp, self._contents, re.MULTILINE)
194         if not match:
195             _log.warning("Creating invalid ChangeLogEntry:\n%s" % self._contents)
196
197         self._date_line = match.group()
198         self._date = match.group("date")
199         self._bug_description = self._parse_bug_description(self._contents)
200
201         # FIXME: group("name") does not seem to be Unicode?  Probably due to self._contents not being unicode.
202         self._author_text = match.group("authors") if match else None
203         self._authors = ChangeLogEntry._parse_author_text(self._author_text)
204
205         self._reviewer_text, self._reviewers_text_list = ChangeLogEntry._parse_reviewer_text(self._contents)
206         self._reviewers = self._fuzz_match_reviewers(self._reviewers_text_list)
207         self._author = self._committer_list.contributor_by_email(self.author_email()) or self._committer_list.contributor_by_name(self.author_name())
208
209         self._touched_files = re.findall(self.touched_files_regexp, self._contents, re.MULTILINE)
210         self._touched_functions = self._parse_touched_functions(self._contents)
211
212     def date_line(self):
213         return self._date_line
214
215     def date(self):
216         return self._date
217
218     def author_text(self):
219         return self._author_text
220
221     def revision(self):
222         return self._revision
223
224     def author_name(self):
225         return self._authors[0]['name']
226
227     def author_email(self):
228         return self._authors[0]['email']
229
230     def author(self):
231         return self._author  # Might be None
232
233     def authors(self):
234         return self._authors
235
236     # FIXME: Eventually we would like to map reviwer names to reviewer objects.
237     # See https://bugs.webkit.org/show_bug.cgi?id=26533
238     def reviewer_text(self):
239         return self._reviewer_text
240
241     # Might be None, might also not be a Reviewer!
242     def reviewer(self):
243         return self._reviewers[0] if len(self._reviewers) > 0 else None
244
245     def reviewers(self):
246         return self._reviewers
247
248     def has_valid_reviewer(self):
249         if self._reviewers_text_list:
250             for reviewer in self._reviewers_text_list:
251                 reviewer = self._committer_list.committer_by_name(reviewer)
252                 if reviewer:
253                     return True
254         return bool(re.search("unreviewed", self._contents, re.IGNORECASE))
255
256     def contents(self):
257         return self._contents
258
259     def bug_id(self):
260         return parse_bug_id_from_changelog(self._contents)
261
262     def bug_description(self):
263         return self._bug_description
264
265     def touched_files(self):
266         return self._touched_files
267
268     # Returns a dict from file name to lists of function names.
269     def touched_functions(self):
270         return self._touched_functions
271
272     def touched_files_text(self):
273         match = re.search(self.touched_files_regexp, self._contents, re.MULTILINE)
274         return self._contents[match.start():].lstrip("\n\r") if match else ""
275
276     # Determine if any text has been added to the section on touched files
277     def is_touched_files_text_clean(self):
278         file_line_end = r"( (Added|Removed|(Copied|Renamed) from [A-Za-z0-9_\-./\\]+).)?$"
279         for line in self.touched_files_text().splitlines():
280             if re.match(self.touched_files_regexp + file_line_end, line):
281                 continue
282             if re.match(self.touched_functions_regexp + "$", line):
283                 continue
284             return False
285         return True
286
287 # FIXME: Various methods on ChangeLog should move into ChangeLogEntry instead.
288 class ChangeLog(object):
289
290     def __init__(self, path, filesystem=None):
291         self.path = path
292         self._filesystem = filesystem or FileSystem()
293
294     _changelog_indent = " " * 8
295
296     @classmethod
297     def parse_latest_entry_from_file(cls, changelog_file):
298         try:
299             return next(cls.parse_entries_from_file(changelog_file))
300         except StopIteration, e:
301             return None
302
303     svn_blame_regexp = re.compile(r'^(\s*(?P<revision>\d+) [^ ]+)\s*(?P<line>.*?\n)')
304
305     @classmethod
306     def _separate_revision_and_line(cls, line):
307         match = cls.svn_blame_regexp.match(line)
308         if not match:
309             return None, line
310         return int(match.group('revision')), match.group('line')
311
312     @classmethod
313     def parse_entries_from_file(cls, changelog_file):
314         """changelog_file must be a file-like object which returns
315         unicode strings, e.g. from StringIO(unicode()) or
316         fs.open_text_file_for_reading()"""
317         date_line_regexp = re.compile(ChangeLogEntry.date_line_regexp)
318         rolled_over_regexp = re.compile(ChangeLogEntry.rolled_over_regexp)
319
320         # The first line should be a date line.
321         revision, first_line = cls._separate_revision_and_line(changelog_file.readline())
322         assert(isinstance(first_line, unicode))
323         if not date_line_regexp.match(cls.svn_blame_regexp.sub('', first_line)):
324             raise StopIteration
325
326         entry_lines = [first_line]
327         revisions_in_entry = {revision: 1} if revision != None else None
328         for line in changelog_file:
329             if revisions_in_entry:
330                 revision, line = cls._separate_revision_and_line(line)
331
332             if rolled_over_regexp.match(line):
333                 break
334
335             if date_line_regexp.match(line):
336                 most_probable_revision = max(revisions_in_entry, key=revisions_in_entry.__getitem__) if revisions_in_entry else None
337                 # Remove the extra newline at the end
338                 yield ChangeLogEntry(''.join(entry_lines[:-1]), revision=most_probable_revision)
339                 entry_lines = []
340                 revisions_in_entry = {revision: 0}
341
342             entry_lines.append(line)
343             if revisions_in_entry:
344                 revisions_in_entry[revision] = revisions_in_entry.get(revision, 0) + 1
345
346         most_probable_revision = max(revisions_in_entry, key=revisions_in_entry.__getitem__) if revisions_in_entry else None
347         yield ChangeLogEntry(''.join(entry_lines[:-1]), revision=most_probable_revision)
348
349     def latest_entry(self):
350         # ChangeLog files are always UTF-8, we read them in as such to support Reviewers with unicode in their names.
351         changelog_file = self._filesystem.open_text_file_for_reading(self.path)
352         try:
353             return self.parse_latest_entry_from_file(changelog_file)
354         finally:
355             changelog_file.close()
356
357     # _wrap_line and _wrap_lines exist to work around
358     # http://bugs.python.org/issue1859
359
360     def _wrap_line(self, line):
361         return textwrap.fill(line,
362                              width=70,
363                              initial_indent=self._changelog_indent,
364                              # Don't break urls which may be longer than width.
365                              break_long_words=False,
366                              subsequent_indent=self._changelog_indent)
367
368     # Workaround as suggested by guido in
369     # http://bugs.python.org/issue1859#msg60040
370
371     def _wrap_lines(self, message):
372         lines = [self._wrap_line(line) for line in message.splitlines()]
373         return "\n".join(lines)
374
375     def update_with_unreviewed_message(self, message):
376         first_boilerplate_line_regexp = re.compile(
377                 "%sNeed a short description \(OOPS!\)\." % self._changelog_indent)
378         removing_boilerplate = False
379         result = StringIO()
380         with self._filesystem.open_text_file_for_reading(self.path) as file:
381             for line in file:
382                 if first_boilerplate_line_regexp.search(line):
383                     message_lines = self._wrap_lines(message)
384                     result.write(first_boilerplate_line_regexp.sub(message_lines, line))
385                     # Remove all the ChangeLog boilerplate before the first changed
386                     # file.
387                     removing_boilerplate = True
388                 elif removing_boilerplate:
389                     if line.find('*') >= 0:  # each changed file is preceded by a *
390                         removing_boilerplate = False
391
392                 if not removing_boilerplate:
393                     result.write(line)
394         self._filesystem.write_text_file(self.path, result.getvalue())
395
396     def set_reviewer(self, reviewer):
397         latest_entry = self.latest_entry()
398         latest_entry_contents = latest_entry.contents()
399         reviewer_text = latest_entry.reviewer()
400         found_nobody = re.search("NOBODY\s*\(OOPS!\)", latest_entry_contents, re.MULTILINE)
401
402         if not found_nobody and not reviewer_text:
403             bug_url_number_of_items = len(re.findall(config_urls.bug_url_long, latest_entry_contents, re.MULTILINE))
404             bug_url_number_of_items += len(re.findall(config_urls.bug_url_short, latest_entry_contents, re.MULTILINE))
405             result = StringIO()
406             with self._filesystem.open_text_file_for_reading(self.path) as file:
407                 for line in file:
408                     found_bug_url = re.search(config_urls.bug_url_long, line)
409                     if not found_bug_url:
410                         found_bug_url = re.search(config_urls.bug_url_short, line)
411                     result.write(line)
412                     if found_bug_url:
413                         if bug_url_number_of_items == 1:
414                             result.write("\n        Reviewed by %s.\n" % reviewer)
415                         bug_url_number_of_items -= 1
416             self._filesystem.write_text_file(self.path, result.getvalue())
417         else:
418             data = self._filesystem.read_text_file(self.path)
419             newdata = data.replace("NOBODY (OOPS!)", reviewer)
420             self._filesystem.write_text_file(self.path, newdata)
421
422     def set_short_description_and_bug_url(self, short_description, bug_url):
423         message = "%s\n%s%s" % (short_description, self._changelog_indent, bug_url)
424         bug_boilerplate = "%sNeed the bug URL (OOPS!).\n" % self._changelog_indent
425         result = StringIO()
426         with self._filesystem.open_text_file_for_reading(self.path) as file:
427             for line in file:
428                 line = line.replace("Need a short description (OOPS!).", message)
429                 if line != bug_boilerplate:
430                     result.write(line)
431         self._filesystem.write_text_file(self.path, result.getvalue())
432
433     def delete_entries(self, num_entries):
434         date_line_regexp = re.compile(ChangeLogEntry.date_line_regexp)
435         rolled_over_regexp = re.compile(ChangeLogEntry.rolled_over_regexp)
436         entries = 0
437         result = StringIO()
438         with self._filesystem.open_text_file_for_reading(self.path) as file:
439             for line in file:
440                 if date_line_regexp.match(line):
441                     entries += 1
442                 elif rolled_over_regexp.match(line):
443                     entries = num_entries + 1
444                 if entries > num_entries:
445                     result.write(line)
446         self._filesystem.write_text_file(self.path, result.getvalue())
447
448     def prepend_text(self, text):
449         data = self._filesystem.read_text_file(self.path)
450         self._filesystem.write_text_file(self.path, text + data)