Add support for webkitpy tests EWS
[WebKit-https.git] / Tools / Scripts / webkitpy / tool / bot / patchanalysistask.py
1 # Copyright (c) 2010 Google Inc. All rights reserved.
2 # Copyright (C) 2017 Apple Inc. All rights reserved.
3 #
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are
6 # met:
7 #
8 #     * Redistributions of source code must retain the above copyright
9 # notice, this list of conditions and the following disclaimer.
10 #     * Redistributions in binary form must reproduce the above
11 # copyright notice, this list of conditions and the following disclaimer
12 # in the documentation and/or other materials provided with the
13 # distribution.
14 #     * Neither the name of Google Inc. nor the names of its
15 # contributors may be used to endorse or promote products derived from
16 # this software without specific prior written permission.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 from webkitpy.common.system.executive import ScriptError
31 from webkitpy.common.net.layouttestresults import LayoutTestResults
32 from webkitpy.common.net.jsctestresults import JSCTestResults
33
34
35 class UnableToApplyPatch(Exception):
36     def __init__(self, patch):
37         Exception.__init__(self)
38         self.patch = patch
39
40
41 class PatchIsNotValid(Exception):
42     def __init__(self, patch, failure_message):
43         Exception.__init__(self)
44         self.patch = patch
45         self.failure_message = failure_message
46
47
48 class PatchIsNotApplicable(Exception):
49     def __init__(self, patch):
50         Exception.__init__(self)
51         self.patch = patch
52
53
54 class PatchAnalysisTaskDelegate(object):
55     def parent_command(self):
56         raise NotImplementedError("subclasses must implement")
57
58     def run_command(self, command):
59         raise NotImplementedError("subclasses must implement")
60
61     def command_passed(self, message, patch):
62         raise NotImplementedError("subclasses must implement")
63
64     def command_failed(self, message, script_error, patch):
65         raise NotImplementedError("subclasses must implement")
66
67     def refetch_patch(self, patch):
68         raise NotImplementedError("subclasses must implement")
69
70     def expected_failures(self):
71         raise NotImplementedError("subclasses must implement")
72
73     def test_results(self):
74         raise NotImplementedError("subclasses must implement")
75
76     def archive_last_test_results(self, patch):
77         raise NotImplementedError("subclasses must implement")
78
79     def build_style(self):
80         raise NotImplementedError("subclasses must implement")
81
82     # We could make results_archive optional, but for now it's required.
83     def report_flaky_tests(self, patch, flaky_tests, results_archive):
84         raise NotImplementedError("subclasses must implement")
85
86
87 class PatchAnalysisTask(object):
88     def __init__(self, delegate, patch):
89         self._delegate = delegate
90         self._patch = patch
91         self._script_error = None
92         self._results_archive_from_patch_test_run = None
93         self._results_from_patch_test_run = None
94         self.error = None
95
96     def _run_command(self, command, success_message, failure_message):
97         if not self.validate():
98             raise PatchIsNotValid(self._patch, self.error)
99         try:
100             self._delegate.run_command(command)
101             self._delegate.command_passed(success_message, patch=self._patch)
102             return True
103         except ScriptError, e:
104             self._script_error = e
105             self.failure_status_id = self._delegate.command_failed(failure_message, script_error=self._script_error, patch=self._patch)
106             return False
107
108     def _clean(self):
109         return self._run_command([
110             "clean",
111         ],
112         "Cleaned working directory",
113         "Unable to clean working directory")
114
115     def _update(self):
116         # FIXME: Ideally the status server log message should include which revision we updated to.
117         return self._run_command([
118             "update",
119         ],
120         "Updated working directory",
121         "Unable to update working directory")
122
123     def _apply(self):
124         return self._run_command([
125             "apply-attachment",
126             "--no-update",
127             "--non-interactive",
128             self._patch.id(),
129         ],
130         "Applied patch",
131         "Patch does not apply")
132
133     def _check_patch_relevance(self):
134         args = [
135             "check-patch-relevance",
136             "--quiet",
137         ]
138
139         if hasattr(self._delegate, 'group'):
140             args.append("--group=%s" % self._delegate.group())
141
142         return self._run_command(args, "Checked relevance of patch", "Patch was not relevant")
143
144     def _build(self):
145         args = [
146             "build",
147             "--no-clean",
148             "--no-update",
149             "--build-style=%s" % self._delegate.build_style(),
150         ]
151
152         if hasattr(self._delegate, 'group'):
153             args.append("--group=%s" % self._delegate.group())
154
155         return self._run_command(args, "Built patch", "Patch does not build")
156
157     def _build_without_patch(self):
158         args = [
159             "build",
160             "--force-clean",
161             "--no-update",
162             "--build-style=%s" % self._delegate.build_style(),
163         ]
164
165         if hasattr(self._delegate, 'group'):
166             args.append("--group=%s" % self._delegate.group())
167
168         return self._run_command(args, "Able to build without patch", "Unable to build without patch")
169
170     def _test(self):
171         args = [
172             "build-and-test",
173             "--no-clean",
174             "--no-update",
175             # Notice that we don't pass --build, which means we won't build!
176             "--test",
177             "--non-interactive",
178             "--build-style=%s" % self._delegate.build_style(),
179         ]
180
181         if hasattr(self._delegate, 'group'):
182             args.append("--group=%s" % self._delegate.group())
183
184         return self._run_command(args, "Passed tests", "Patch does not pass tests")
185
186     def _build_and_test_without_patch(self):
187         args = [
188             "build-and-test",
189             "--force-clean",
190             "--no-update",
191             "--test",
192             "--non-interactive",
193             "--build-style=%s" % self._delegate.build_style(),
194         ]
195
196         if getattr(self._delegate, 'should_build', True):
197             args.append("--build")
198
199         if hasattr(self._delegate, 'group'):
200             args.append("--group=%s" % self._delegate.group())
201
202         return self._run_command(args, "Able to pass tests without patch", "Unable to pass tests without patch (tree is red?)")
203
204     def _land(self):
205         # Unclear if this should pass --quiet or not.  If --parent-command always does the reporting, then it should.
206         return self._run_command([
207             "land-attachment",
208             "--force-clean",
209             "--non-interactive",
210             "--parent-command=" + self._delegate.parent_command(),
211             self._patch.id(),
212         ],
213         "Landed patch",
214         "Unable to land patch")
215
216     def _report_flaky_tests(self, flaky_test_results, results_archive):
217         self._delegate.report_flaky_tests(self._patch, flaky_test_results, results_archive)
218
219     def _results_failed_different_tests(self, first, second):
220         first_failing_tests = [] if not first else first.failing_tests()
221         second_failing_tests = [] if not second else second.failing_tests()
222         return first_failing_tests != second_failing_tests
223
224     def _should_defer_patch_or_throw(self, failures_with_patch, results_archive_for_failures_with_patch, script_error, failure_id):
225         self._build_and_test_without_patch()
226         clean_tree_results = self._delegate.test_results()
227
228         if clean_tree_results.did_exceed_test_failure_limit():
229             # We cannot know whether the failures we saw in the test runs with the patch are expected.
230             return True
231
232         failures_introduced_by_patch = frozenset(failures_with_patch) - frozenset(clean_tree_results.failing_test_results())
233         if failures_introduced_by_patch:
234             self.failure_status_id = failure_id
235             # report_failure will either throw or return false.
236             return not self.report_failure(results_archive_for_failures_with_patch, LayoutTestResults(failures_introduced_by_patch, did_exceed_test_failure_limit=False), script_error)
237
238         # In this case, we know that all of the failures that we saw with the patch were
239         # also present without the patch, so we don't need to defer.
240         return False
241
242     def _retry_bindings_tests(self):
243         first_results = self._delegate.test_results()
244         first_script_error = self._script_error
245         first_failure_status_id = self.failure_status_id
246         if first_results is None:
247             return False
248
249         # Some errors are not correctly reported by the run-bindings-tests script
250         # https://bugs.webkit.org/show_bug.cgi?id=169449
251         # In affected cases, add a message requesting to look at test output instead.
252         if not first_results._failures:
253             first_results._failures = ["Please see test output for results"]
254
255         self._build_and_test_without_patch()
256         clean_tree_results = self._delegate.test_results()
257         if clean_tree_results is None:
258             return False
259
260         if first_results.is_subset(clean_tree_results):
261             return True
262
263         self.failure_status_id = first_failure_status_id
264         return self.report_failure(None, first_results, first_script_error)
265
266     # FIXME: Abstract out common parts of the retry logic.
267     def _retry_jsc_tests(self):
268         first_results = self._delegate.test_results()
269         first_script_error = self._script_error
270         first_failure_status_id = self.failure_status_id
271         if first_results is None:
272             return False
273
274         if self._test():
275             return True
276         second_results = self._delegate.test_results()
277         second_script_error = self._script_error
278         if second_results is None:
279             return False
280
281         consistently_failing_test_results = JSCTestResults.intersection(first_results, second_results)
282
283         self._build_and_test_without_patch()
284         clean_tree_results = self._delegate.test_results()
285         if clean_tree_results is None:
286             return False
287
288         if consistently_failing_test_results.is_subset(clean_tree_results):
289             return True
290
291         self.failure_status_id = first_failure_status_id
292         return self.report_failure(None, consistently_failing_test_results, first_script_error)
293
294     def _retry_layout_tests(self):
295         # Note: archive_last_test_results deletes the results directory, making these calls order-sensitve.
296         # We could remove this dependency by building the test_results from the archive.
297         first_results = self._delegate.test_results()
298         first_results_archive = self._delegate.archive_last_test_results(self._patch)
299         first_script_error = self._script_error
300         first_failure_status_id = self.failure_status_id
301
302         if self._test() and not first_results.did_exceed_test_failure_limit():
303             # Only report flaky tests if we were successful at parsing results.json and archiving results.
304             if first_results and first_results_archive:
305                 self._report_flaky_tests(first_results.failing_test_results(), first_results_archive)
306             return True
307
308         second_results = self._delegate.test_results()
309         second_results_archive = self._delegate.archive_last_test_results(self._patch)
310         second_script_error = self._script_error
311         second_failure_status_id = self.failure_status_id
312
313         if second_results.did_exceed_test_failure_limit() and first_results.did_exceed_test_failure_limit():
314             self._build_and_test_without_patch()
315             clean_tree_results = self._delegate.test_results()
316
317             if (len(first_results.failing_tests()) - len(clean_tree_results.failing_tests())) <= 5:
318                 return False
319
320             self.failure_status_id = first_failure_status_id
321
322             return self.report_failure(first_results_archive, first_results, first_script_error)
323
324         if second_results.did_exceed_test_failure_limit():
325             self._should_defer_patch_or_throw(first_results.failing_test_results(), first_results_archive,
326                                               first_script_error, first_failure_status_id)
327             return False
328
329         if first_results.did_exceed_test_failure_limit():
330             self._should_defer_patch_or_throw(second_results.failing_test_results(), second_results_archive,
331                                               second_script_error, second_failure_status_id)
332             return False
333
334         if self._results_failed_different_tests(first_results, second_results):
335             first_failing_results_set = frozenset(first_results.failing_test_results())
336             second_failing_results_set = frozenset(second_results.failing_test_results())
337
338             tests_that_only_failed_first = first_failing_results_set.difference(second_failing_results_set)
339             self._report_flaky_tests(tests_that_only_failed_first, first_results_archive)
340
341             tests_that_only_failed_second = second_failing_results_set.difference(first_failing_results_set)
342             self._report_flaky_tests(tests_that_only_failed_second, second_results_archive)
343
344             tests_that_consistently_failed = first_failing_results_set.intersection(second_failing_results_set)
345             if tests_that_consistently_failed:
346                 if self._should_defer_patch_or_throw(tests_that_consistently_failed, first_results_archive,
347                                                      first_script_error, first_failure_status_id):
348                     return False  # Defer patch
349
350             # At this point we know that at least one test flaked, but no consistent failures
351             # were introduced. This is a bit of a grey-zone.
352             return False  # Defer patch
353
354         if self._should_defer_patch_or_throw(first_results.failing_test_results(), first_results_archive,
355                                              first_script_error, first_failure_status_id):
356             return False  # Defer patch
357
358         # At this point, we know that the first and second runs had the exact same failures,
359         # and that those failures are all present on the clean tree, so we can say with certainty
360         # that the patch is good.
361         return True
362
363     def _test_patch(self):
364         if self._test():
365             return True
366
367         if hasattr(self._delegate, 'group') and self._delegate.group() == "jsc":
368             return self._retry_jsc_tests()
369         elif hasattr(self._delegate, 'group') and self._delegate.group() == "bindings":
370             return self._retry_bindings_tests()
371         elif hasattr(self._delegate, 'group') and self._delegate.group() == "webkitpy":
372             return False
373         else:
374             return self._retry_layout_tests()
375
376     def results_archive_from_patch_test_run(self, patch):
377         assert(self._patch.id() == patch.id())  # PatchAnalysisTask is not currently re-useable.
378         return self._results_archive_from_patch_test_run
379
380     def results_from_patch_test_run(self, patch):
381         assert(self._patch.id() == patch.id())  # PatchAnalysisTask is not currently re-useable.
382         return self._results_from_patch_test_run
383
384     def report_failure(self, results_archive=None, results=None, script_error=None):
385         if not self.validate():
386             return False
387         self._results_archive_from_patch_test_run = results_archive
388         self._results_from_patch_test_run = results
389         raise script_error or self._script_error
390
391     def validate(self):
392         raise NotImplementedError("subclasses must implement")
393
394     def run(self):
395         raise NotImplementedError("subclasses must implement")