[ macOS ] imported/w3c/web-platform-tests/media-source/mediasource-changetype-play...
[WebKit-https.git] / Tools / Scripts / webkitpy / common / checkout / changelog.py
1 # Copyright (C) 2009, Google Inc. All rights reserved.
2 #
3 # Redistribution and use in source and binary forms, with or without
4 # modification, are permitted provided that the following conditions are
5 # met:
6 #
7 #     * Redistributions of source code must retain the above copyright
8 # notice, this list of conditions and the following disclaimer.
9 #     * Redistributions in binary form must reproduce the above
10 # copyright notice, this list of conditions and the following disclaimer
11 # in the documentation and/or other materials provided with the
12 # distribution.
13 #     * Neither the name of Google Inc. nor the names of its
14 # contributors may be used to endorse or promote products derived from
15 # this software without specific prior written permission.
16 #
17 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #
29 # WebKit's Python module for parsing and modifying ChangeLog files
30
31 import logging
32 import re
33 import textwrap
34
35 from webkitpy.common.config.committers import CommitterList
36 from webkitpy.common.system.filesystem import FileSystem
37 from webkitpy.common.unicode_compatibility import StringIO, unicode
38 import webkitpy.common.config.urls as config_urls
39
40 _log = logging.getLogger(__name__)
41
42
43 # FIXME: parse_bug_id_from_changelog should not be a free function.
44 # Parse the bug ID out of a Changelog message based on the format that is
45 # used by prepare-ChangeLog
46 def parse_bug_id_from_changelog(message):
47     if not message:
48         return None
49     match = re.search("^\s*" + config_urls.bug_url_short + "$", message, re.MULTILINE)
50     if match:
51         return int(match.group('bug_id'))
52     match = re.search("^\s*" + config_urls.bug_url_long + "$", message, re.MULTILINE)
53     if match:
54         return int(match.group('bug_id'))
55     # We weren't able to find a bug URL in the format used by prepare-ChangeLog. Fall back to the
56     # first bug URL found anywhere in the message.
57     return config_urls.parse_bug_id(message)
58
59
60 class ChangeLogEntry(object):
61     # e.g. 2009-06-03  Eric Seidel  <eric@webkit.org>
62     date_line_regexp = r'^(?P<date>\d{4}-\d{2}-\d{2})\s+(?P<authors>(?P<name>[^<]+?)\s+<(?P<email>[^<>]+)>.*?)$'
63
64     # e.g. * Source/WebCore/page/EventHandler.cpp: Implement FooBarQuux.
65     touched_files_regexp = r'^\s*\*\s*(?P<file>[A-Za-z0-9_\-\./\\]+)\s*\:'
66     # e.g. (ChangeLogEntry.touched_functions): Added.
67     touched_functions_regexp = r'^\s*\((?P<function>[^)]*)\):'
68
69     radar_id_regexp = r'^\s*(<?rdar://problems?/)?(?P<radar_id>-?\d{7,})>?'
70
71     # e.g. Reviewed by Darin Adler.
72     # (Discard everything after the first period to match more invalid lines.)
73     reviewed_by_regexp = r'^\s*((\w+\s+)+and\s+)?(Review|Rubber(\s*|-)stamp)(s|ed)?\s+([a-z]+\s+)*?by\s+(?P<reviewer>.*?)[\.,]?\s*$'
74
75     reviewed_byless_regexp = r'^\s*((Review|Rubber(\s*|-)stamp)(s|ed)?|RS)(\s+|\s*=\s*)(?P<reviewer>([A-Z]\w+\s*)+)[\.,]?\s*$'
76
77     reviewer_name_noise_regexp = re.compile(r"""
78     (\s+((tweaked\s+)?and\s+)?(landed|committed|okayed)\s+by.+) # "landed by", "commented by", etc...
79     |(^(Reviewed\s+)?by\s+) # extra "Reviewed by" or "by"
80     |([(<]\s*[\w_\-\.]+@[\w_\-\.]+[>)]) # email addresses
81     |([(<](https?://?bugs.)webkit.org[^>)]+[>)]) # bug url
82     |("[^"]+") # wresler names like 'Sean/Shawn/Shaun' in 'Geoffrey "Sean/Shawn/Shaun" Garen'
83     |('[^']+') # wresler names like "The Belly" in "Sam 'The Belly' Weinig"
84     |((Mr|Ms|Dr|Mrs|Prof)\.(\s+|$))
85     """, re.IGNORECASE | re.VERBOSE)
86
87     reviewer_name_casesensitive_noise_regexp = re.compile(r"""
88     ((\s+|^)(and\s+)?([a-z-]+\s+){5,}by\s+) # e.g. "and given a good once-over by"
89     |(\(\s*(?!(and|[A-Z])).+\)) # any parenthesis that doesn't start with "and" or a capital letter
90     |(with(\s+[a-z-]+)+) # phrases with "with no hesitation" in "Sam Weinig with no hesitation"
91     """, re.VERBOSE)
92
93     reviewer_name_noise_needing_a_backreference_regexp = re.compile(r"""
94     (\S\S)\.(?:(\s.+|$)) # Text after the two word characters (don't match initials) and a period followed by a space.
95     """, re.IGNORECASE | re.VERBOSE)
96
97     nobody_regexp = re.compile(r"""(\s+|^)nobody(
98     ((,|\s+-)?\s+(\w+\s+)+fix.*) # e.g. nobody, build fix...
99     |(\s*\([^)]+\).*) # NOBODY (..)...
100     |$)""", re.IGNORECASE | re.VERBOSE)
101
102     # e.g. == Rolled over to ChangeLog-2011-02-16 ==
103     rolled_over_regexp = r'^== Rolled over to ChangeLog-\d{4}-\d{2}-\d{2} ==$'
104
105     # e.g. git-svn-id: http://svn.webkit.org/repository/webkit/trunk@96161 268f45cc-cd09-0410-ab3c-d52691b4dbfc
106     svn_id_regexp = r'git-svn-id: http://svn.webkit.org/repository/webkit/trunk@(?P<svnid>\d+) '
107
108     split_names_regexp = r'\s*(?:,(?:\s+and\s+|&)?|(?:^|\s+)and\s+|&&|[/+&])\s*'
109
110     def __init__(self, contents, committer_list=None, revision=None):
111         self._contents = contents
112         self._committer_list = committer_list or CommitterList()
113         self._revision = revision
114         self._parse_entry()
115
116     @classmethod
117     def _parse_radar_id(cls, text):
118         if not text:
119             return None
120         match = re.search(ChangeLogEntry.radar_id_regexp, text, re.MULTILINE | re.IGNORECASE)
121         if not match:
122             return None
123         radar_id = int(match.group('radar_id'))
124         if radar_id < 0:
125             return None
126
127         return radar_id
128
129     @classmethod
130     def _parse_reviewer_text(cls, text):
131         match = re.search(ChangeLogEntry.reviewed_by_regexp, text, re.MULTILINE | re.IGNORECASE)
132         if not match:
133             # There are cases where people omit "by". We match it only if reviewer part looked nice
134             # in order to avoid matching random lines that start with Reviewed
135             match = re.search(ChangeLogEntry.reviewed_byless_regexp, text, re.MULTILINE | re.IGNORECASE)
136         if not match:
137             return None, None
138
139         reviewer_text = match.group("reviewer")
140
141         reviewer_text = ChangeLogEntry.nobody_regexp.sub('', reviewer_text)
142         reviewer_text = ChangeLogEntry.reviewer_name_noise_regexp.sub('', reviewer_text)
143         reviewer_text = ChangeLogEntry.reviewer_name_casesensitive_noise_regexp.sub('', reviewer_text)
144         reviewer_text = ChangeLogEntry.reviewer_name_noise_needing_a_backreference_regexp.sub(r'\1', reviewer_text)
145         reviewer_text = reviewer_text.replace('(', '').replace(')', '')
146         reviewer_text = re.sub(r'\s\s+|[,.]\s*$', ' ', reviewer_text).strip()
147         if not len(reviewer_text):
148             return None, None
149
150         reviewer_list = ChangeLogEntry._split_reviewer_names(reviewer_text)
151
152         # Get rid of "reviewers" like "even though this is just a..." in "Reviewed by Sam Weinig, even though this is just a..."
153         # and "who wrote the original code" in "Noam Rosenthal, who wrote the original code"
154         reviewer_list = [reviewer for reviewer in reviewer_list if not re.match('^who\s|^([a-z]+(\s+|\.|$)){6,}$', reviewer)]
155
156         return reviewer_text, reviewer_list
157
158     @classmethod
159     def _split_reviewer_names(cls, text):
160         return re.split(ChangeLogEntry.split_names_regexp, text)
161
162     @classmethod
163     def _split_author_names_with_emails(cls, text):
164         regex = '>' + ChangeLogEntry.split_names_regexp
165         names = re.split(regex, text)
166         if len(names) > 1:
167             names = [name + ">" for name in names[:-1]] + [names[-1]]
168         return names
169
170     def _fuzz_match_reviewers(self, reviewers_text_list):
171         if not reviewers_text_list:
172             return []
173         list_of_reviewers = [self._committer_list.contributors_by_fuzzy_match(reviewer)[0] for reviewer in reviewers_text_list]
174         # Flatten lists and get rid of any reviewers with more than one candidate.
175         return [reviewers[0] for reviewers in list_of_reviewers if len(reviewers) == 1]
176
177     @classmethod
178     def _parse_author_name_and_email(cls, author_name_and_email):
179         match = re.match(r'(?P<name>.+?)\s+<(?P<email>[^>]+)>', author_name_and_email)
180         return {'name': match.group("name"), 'email': match.group("email")}
181
182     @classmethod
183     def _parse_author_text(cls, text):
184         if not text:
185             return []
186         authors = cls._split_author_names_with_emails(text)
187         assert(authors and len(authors) >= 1)
188         return [cls._parse_author_name_and_email(author) for author in authors]
189
190     @classmethod
191     def _parse_touched_functions(cls, text):
192         result = {}
193         cur_file = None
194         for line in text.splitlines():
195             file_match = re.match(cls.touched_files_regexp, line)
196             if file_match:
197                 cur_file = file_match.group("file")
198                 result[cur_file] = []
199             func_match = re.match(cls.touched_functions_regexp, line)
200             if func_match and cur_file:
201                 result[cur_file].append(func_match.group("function"))
202         return result
203
204     @classmethod
205     def _parse_bug_description(cls, text):
206         # Line 3 is the bug description in most cases.
207         lines = text.splitlines()
208         if len(lines) < 3:
209             return None
210         found_reviewed = re.search(ChangeLogEntry.reviewed_by_regexp, lines[2], re.IGNORECASE)
211         found_reviewed_byless = re.search(ChangeLogEntry.reviewed_byless_regexp, lines[2], re.IGNORECASE)
212         found_url = parse_bug_id_from_changelog(lines[2])
213         if found_reviewed or found_reviewed_byless or found_url:
214             return None
215         return lines[2].strip()
216
217     def _parse_entry(self):
218         match = re.match(self.date_line_regexp, self._contents, re.MULTILINE)
219         if not match:
220             _log.warning("Creating invalid ChangeLogEntry:\n%s" % self._contents)
221
222         self._date_line = match.group()
223         self._date = match.group("date")
224         self._bug_description = self._parse_bug_description(self._contents)
225
226         # FIXME: group("name") does not seem to be Unicode?  Probably due to self._contents not being unicode.
227         self._author_text = match.group("authors") if match else None
228         self._authors = ChangeLogEntry._parse_author_text(self._author_text)
229
230         self._reviewer_text, self._reviewers_text_list = ChangeLogEntry._parse_reviewer_text(self._contents)
231         self._reviewers = self._fuzz_match_reviewers(self._reviewers_text_list)
232         self._author = self._committer_list.contributor_by_email(self.author_email()) or self._committer_list.contributor_by_name(self.author_name())
233
234         self._touched_files = re.findall(self.touched_files_regexp, self._contents, re.MULTILINE)
235         self._touched_functions = self._parse_touched_functions(self._contents)
236
237     def date_line(self):
238         return self._date_line
239
240     def date(self):
241         return self._date
242
243     def author_text(self):
244         return self._author_text
245
246     def revision(self):
247         return self._revision
248
249     def author_name(self):
250         return self._authors[0]['name']
251
252     def author_email(self):
253         return self._authors[0]['email']
254
255     def author(self):
256         return self._author  # Might be None
257
258     def authors(self):
259         return self._authors
260
261     # FIXME: Eventually we would like to map reviwer names to reviewer objects.
262     # See https://bugs.webkit.org/show_bug.cgi?id=26533
263     def reviewer_text(self):
264         return self._reviewer_text
265
266     # Might be None, might also not be a Reviewer!
267     def reviewer(self):
268         return self._reviewers[0] if len(self._reviewers) > 0 else None
269
270     def reviewers(self):
271         return self._reviewers
272
273     def has_valid_reviewer(self):
274         if self._reviewers_text_list:
275             for reviewer in self._reviewers_text_list:
276                 reviewer = self._committer_list.reviewer_by_name(reviewer)
277                 if reviewer:
278                     return True
279         return bool(re.search("unreviewed", self._contents, re.IGNORECASE))
280
281     def contents(self):
282         return self._contents
283
284     def bug_id(self):
285         return parse_bug_id_from_changelog(self._contents)
286
287     def bug_description(self):
288         return self._bug_description
289
290     def touched_files(self):
291         return self._touched_files
292
293     # Returns a dict from file name to lists of function names.
294     def touched_functions(self):
295         return self._touched_functions
296
297     def touched_files_text(self):
298         match = re.search(self.touched_files_regexp, self._contents, re.MULTILINE)
299         return self._contents[match.start():].lstrip("\n\r") if match else ""
300
301     # Determine if any text has been added to the section on touched files
302     def is_touched_files_text_clean(self):
303         file_line_end = r"( (Added|Removed|(Copied|Renamed) from [A-Za-z0-9_\-./\\]+).)?$"
304         for line in self.touched_files_text().splitlines():
305             if re.match(self.touched_files_regexp + file_line_end, line):
306                 continue
307             if re.match(self.touched_functions_regexp + "$", line):
308                 continue
309             return False
310         return True
311
312
313 # FIXME: Various methods on ChangeLog should move into ChangeLogEntry instead.
314 class ChangeLog(object):
315
316     def __init__(self, path, filesystem=None):
317         self.path = path
318         self._filesystem = filesystem or FileSystem()
319
320     _changelog_indent = " " * 8
321
322     @classmethod
323     def parse_latest_entry_from_file(cls, changelog_file):
324         try:
325             return next(cls.parse_entries_from_file(changelog_file))
326         except StopIteration as e:
327             return None
328
329     svn_blame_regexp = re.compile(r'^(\s*(?P<revision>\d+) [^ ]+)\s*(?P<line>.*?\n)')
330
331     @classmethod
332     def _separate_revision_and_line(cls, line):
333         match = cls.svn_blame_regexp.match(line)
334         if not match:
335             return None, line
336         return int(match.group('revision')), match.group('line')
337
338     @classmethod
339     def parse_entries_from_file(cls, changelog_file):
340         """changelog_file must be a file-like object which returns
341         unicode strings, e.g. from StringIO(unicode()) or
342         fs.open_text_file_for_reading()"""
343         date_line_regexp = re.compile(ChangeLogEntry.date_line_regexp)
344         rolled_over_regexp = re.compile(ChangeLogEntry.rolled_over_regexp)
345
346         # The first line should be a date line.
347         revision, first_line = cls._separate_revision_and_line(changelog_file.readline())
348         assert(isinstance(first_line, unicode))
349         if not date_line_regexp.match(cls.svn_blame_regexp.sub('', first_line)):
350             raise StopIteration
351
352         entry_lines = [first_line]
353         revisions_in_entry = {revision: 1} if revision != None else None
354         for line in changelog_file:
355             if revisions_in_entry:
356                 revision, line = cls._separate_revision_and_line(line)
357
358             if rolled_over_regexp.match(line):
359                 break
360
361             if date_line_regexp.match(line):
362                 most_probable_revision = max(revisions_in_entry, key=revisions_in_entry.__getitem__) if revisions_in_entry else None
363                 # Remove the extra newline at the end
364                 yield ChangeLogEntry(''.join(entry_lines[:-1]), revision=most_probable_revision)
365                 entry_lines = []
366                 revisions_in_entry = {revision: 0}
367
368             entry_lines.append(line)
369             if revisions_in_entry:
370                 revisions_in_entry[revision] = revisions_in_entry.get(revision, 0) + 1
371
372         most_probable_revision = max(revisions_in_entry, key=revisions_in_entry.__getitem__) if revisions_in_entry else None
373         yield ChangeLogEntry(''.join(entry_lines[:-1]), revision=most_probable_revision)
374
375     def latest_entry(self):
376         # ChangeLog files are always UTF-8, we read them in as such to support Reviewers with unicode in their names.
377         changelog_file = self._filesystem.open_text_file_for_reading(self.path)
378         try:
379             return self.parse_latest_entry_from_file(changelog_file)
380         finally:
381             changelog_file.close()
382
383     # _wrap_line and _wrap_lines exist to work around
384     # http://bugs.python.org/issue1859
385
386     def _wrap_line(self, line):
387         return textwrap.fill(line,
388                              width=70,
389                              initial_indent=self._changelog_indent,
390                              # Don't break urls which may be longer than width.
391                              break_long_words=False,
392                              subsequent_indent=self._changelog_indent)
393
394     # Workaround as suggested by guido in
395     # http://bugs.python.org/issue1859#msg60040
396
397     def _wrap_lines(self, message):
398         lines = [self._wrap_line(line) for line in message.splitlines()]
399         return "\n".join(lines)
400
401     def update_with_unreviewed_message(self, message):
402         first_boilerplate_line_regexp = re.compile(
403                 "%sNeed a short description \(OOPS!\)\." % self._changelog_indent)
404         removing_boilerplate = False
405         result = StringIO()
406         with self._filesystem.open_text_file_for_reading(self.path) as file:
407             for line in file:
408                 if first_boilerplate_line_regexp.search(line):
409                     message_lines = self._wrap_lines(message)
410                     result.write(first_boilerplate_line_regexp.sub(message_lines, line))
411                     # Remove all the ChangeLog boilerplate, except the first line (date, name, e-mail).
412                     removing_boilerplate = True
413                 elif removing_boilerplate:
414                     if re.search("^[1-9]", line):  # each changelog entry is preceded by a date
415                         removing_boilerplate = False
416
417                 if not removing_boilerplate:
418                     result.write(line)
419         self._filesystem.write_text_file(self.path, result.getvalue())
420
421     def set_reviewer(self, reviewer):
422         latest_entry = self.latest_entry()
423         latest_entry_contents = latest_entry.contents()
424         reviewer_text = latest_entry.reviewer()
425         found_nobody = re.search("NOBODY\s*\(OOPS!\)", latest_entry_contents, re.MULTILINE)
426         found_reviewer_or_unreviewed = latest_entry.has_valid_reviewer()
427         if not found_nobody and not found_reviewer_or_unreviewed and not reviewer_text:
428             bug_url_number_of_items = len(re.findall(config_urls.bug_url_long, latest_entry_contents, re.MULTILINE))
429             bug_url_number_of_items += len(re.findall(config_urls.bug_url_short, latest_entry_contents, re.MULTILINE))
430             result = StringIO()
431             with self._filesystem.open_text_file_for_reading(self.path) as file:
432                 for line in file:
433                     found_bug_url = re.search(config_urls.bug_url_long, line)
434                     if not found_bug_url:
435                         found_bug_url = re.search(config_urls.bug_url_short, line)
436                     result.write(line)
437                     if found_bug_url:
438                         if bug_url_number_of_items == 1:
439                             result.write("\n        Reviewed by %s.\n" % reviewer)
440                         bug_url_number_of_items -= 1
441             self._filesystem.write_text_file(self.path, result.getvalue())
442         else:
443             data = self._filesystem.read_text_file(self.path)
444             newdata = data.replace("NOBODY (OOPS!)", reviewer)
445             self._filesystem.write_text_file(self.path, newdata)
446
447     def set_short_description_and_bug_url(self, short_description, bug_url):
448         result = StringIO()
449         with self._filesystem.open_text_file_for_reading(self.path) as file:
450             short_description_placeholder = "Need a short description (OOPS!)."
451             bug_url_placeholder = "Need the bug URL (OOPS!)."
452             for line in file:
453                 stripped = line.strip()
454                 if stripped == short_description_placeholder:
455                     line = self._changelog_indent + short_description + "\n"
456                 if stripped == bug_url_placeholder:
457                     line = self._changelog_indent + bug_url + "\n"
458                 result.write(line)
459         self._filesystem.write_text_file(self.path, result.getvalue())
460
461     def delete_entries(self, num_entries):
462         date_line_regexp = re.compile(ChangeLogEntry.date_line_regexp)
463         rolled_over_regexp = re.compile(ChangeLogEntry.rolled_over_regexp)
464         entries = 0
465         result = StringIO()
466         with self._filesystem.open_text_file_for_reading(self.path) as file:
467             for line in file:
468                 if date_line_regexp.match(line):
469                     entries += 1
470                 elif rolled_over_regexp.match(line):
471                     entries = num_entries + 1
472                 if entries > num_entries:
473                     result.write(line)
474         self._filesystem.write_text_file(self.path, result.getvalue())
475
476     def prepend_text(self, text):
477         data = self._filesystem.read_text_file(self.path)
478         self._filesystem.write_text_file(self.path, text + data)