More debug queue build fixing.
[WebKit-https.git] / Tools / Scripts / webkitpy / tool / bot / patchanalysistask.py
1 # Copyright (c) 2010 Google Inc. All rights reserved.
2 #
3 # Redistribution and use in source and binary forms, with or without
4 # modification, are permitted provided that the following conditions are
5 # met:
6 #
7 #     * Redistributions of source code must retain the above copyright
8 # notice, this list of conditions and the following disclaimer.
9 #     * Redistributions in binary form must reproduce the above
10 # copyright notice, this list of conditions and the following disclaimer
11 # in the documentation and/or other materials provided with the
12 # distribution.
13 #     * Neither the name of Google Inc. nor the names of its
14 # contributors may be used to endorse or promote products derived from
15 # this software without specific prior written permission.
16 #
17 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29 from webkitpy.common.system.executive import ScriptError
30 from webkitpy.common.net.layouttestresults import LayoutTestResults
31
32
33 class UnableToApplyPatch(Exception):
34     def __init__(self, patch):
35         Exception.__init__(self)
36         self.patch = patch
37
38
39 class PatchIsNotValid(Exception):
40     def __init__(self, patch):
41         Exception.__init__(self)
42         self.patch = patch
43
44
45 class PatchAnalysisTaskDelegate(object):
46     def parent_command(self):
47         raise NotImplementedError("subclasses must implement")
48
49     def run_command(self, command):
50         raise NotImplementedError("subclasses must implement")
51
52     def command_passed(self, message, patch):
53         raise NotImplementedError("subclasses must implement")
54
55     def command_failed(self, message, script_error, patch):
56         raise NotImplementedError("subclasses must implement")
57
58     def refetch_patch(self, patch):
59         raise NotImplementedError("subclasses must implement")
60
61     def expected_failures(self):
62         raise NotImplementedError("subclasses must implement")
63
64     def test_results(self):
65         raise NotImplementedError("subclasses must implement")
66
67     def archive_last_test_results(self, patch):
68         raise NotImplementedError("subclasses must implement")
69
70     def build_style(self):
71         raise NotImplementedError("subclasses must implement")
72
73     # We could make results_archive optional, but for now it's required.
74     def report_flaky_tests(self, patch, flaky_tests, results_archive):
75         raise NotImplementedError("subclasses must implement")
76
77
78 class PatchAnalysisTask(object):
79     def __init__(self, delegate, patch):
80         self._delegate = delegate
81         self._patch = patch
82         self._script_error = None
83         self._results_archive_from_patch_test_run = None
84         self._results_from_patch_test_run = None
85
86     def _run_command(self, command, success_message, failure_message):
87         try:
88             self._delegate.run_command(command)
89             self._delegate.command_passed(success_message, patch=self._patch)
90             return True
91         except ScriptError, e:
92             self._script_error = e
93             self.failure_status_id = self._delegate.command_failed(failure_message, script_error=self._script_error, patch=self._patch)
94             return False
95
96     def _clean(self):
97         return self._run_command([
98             "clean",
99         ],
100         "Cleaned working directory",
101         "Unable to clean working directory")
102
103     def _update(self):
104         # FIXME: Ideally the status server log message should include which revision we updated to.
105         return self._run_command([
106             "update",
107         ],
108         "Updated working directory",
109         "Unable to update working directory")
110
111     def _apply(self):
112         return self._run_command([
113             "apply-attachment",
114             "--no-update",
115             "--non-interactive",
116             self._patch.id(),
117         ],
118         "Applied patch",
119         "Patch does not apply")
120
121     def _build(self):
122         return self._run_command([
123             "build",
124             "--no-clean",
125             "--no-update",
126             "--build-style=%s" % self._delegate.build_style(),
127         ],
128         "Built patch",
129         "Patch does not build")
130
131     def _build_without_patch(self):
132         return self._run_command([
133             "build",
134             "--force-clean",
135             "--no-update",
136             "--build-style=%s" % self._delegate.build_style(),
137         ],
138         "Able to build without patch",
139         "Unable to build without patch")
140
141     def _test(self):
142         return self._run_command([
143             "build-and-test",
144             "--no-clean",
145             "--no-update",
146             # Notice that we don't pass --build, which means we won't build!
147             "--test",
148             "--non-interactive",
149             "--build-style=%s" % self._delegate.build_style(),
150         ],
151         "Passed tests",
152         "Patch does not pass tests")
153
154     def _build_and_test_without_patch(self):
155         return self._run_command([
156             "build-and-test",
157             "--force-clean",
158             "--no-update",
159             "--build",
160             "--test",
161             "--non-interactive",
162             "--build-style=%s" % self._delegate.build_style(),
163         ],
164         "Able to pass tests without patch",
165         "Unable to pass tests without patch (tree is red?)")
166
167     def _land(self):
168         # Unclear if this should pass --quiet or not.  If --parent-command always does the reporting, then it should.
169         return self._run_command([
170             "land-attachment",
171             "--force-clean",
172             "--non-interactive",
173             "--parent-command=" + self._delegate.parent_command(),
174             self._patch.id(),
175         ],
176         "Landed patch",
177         "Unable to land patch")
178
179     def _report_flaky_tests(self, flaky_test_results, results_archive):
180         self._delegate.report_flaky_tests(self._patch, flaky_test_results, results_archive)
181
182     def _results_failed_different_tests(self, first, second):
183         first_failing_tests = [] if not first else first.failing_tests()
184         second_failing_tests = [] if not second else second.failing_tests()
185         return first_failing_tests != second_failing_tests
186
187     def _should_defer_patch_or_throw(self, failures_with_patch, results_archive_for_failures_with_patch, script_error, failure_id):
188         self._build_and_test_without_patch()
189         clean_tree_results = self._delegate.test_results()
190
191         if clean_tree_results.did_exceed_test_failure_limit():
192             # We cannot know whether the failures we saw in the test runs with the patch are expected.
193             return True
194
195         failures_introduced_by_patch = frozenset(failures_with_patch) - frozenset(clean_tree_results.failing_test_results())
196         if failures_introduced_by_patch:
197             self.failure_status_id = failure_id
198             # report_failure will either throw or return false.
199             return not self.report_failure(results_archive_for_failures_with_patch, LayoutTestResults(failures_introduced_by_patch, did_exceed_test_failure_limit=False), script_error)
200
201         # In this case, we know that all of the failures that we saw with the patch were
202         # also present without the patch, so we don't need to defer.
203         return False
204
205     def _test_patch(self):
206         if self._test():
207             return True
208
209         # Note: archive_last_test_results deletes the results directory, making these calls order-sensitve.
210         # We could remove this dependency by building the test_results from the archive.
211         first_results = self._delegate.test_results()
212         first_results_archive = self._delegate.archive_last_test_results(self._patch)
213         first_script_error = self._script_error
214         first_failure_status_id = self.failure_status_id
215
216         if self._test() and not first_results.did_exceed_test_failure_limit():
217             # Only report flaky tests if we were successful at parsing results.json and archiving results.
218             if first_results and first_results_archive:
219                 self._report_flaky_tests(first_results.failing_test_results(), first_results_archive)
220             return True
221
222         second_results = self._delegate.test_results()
223         second_results_archive = self._delegate.archive_last_test_results(self._patch)
224         second_script_error = self._script_error
225         second_failure_status_id = self.failure_status_id
226
227         if second_results.did_exceed_test_failure_limit() and first_results.did_exceed_test_failure_limit():
228             self._build_and_test_without_patch()
229             clean_tree_results = self._delegate.test_results()
230
231             if (len(first_results.failing_tests()) - len(clean_tree_results.failing_tests())) <= 5:
232                 return False
233
234             self.failure_status_id = first_failure_status_id
235
236             return self.report_failure(first_results_archive, first_results, first_script_error)
237
238         if second_results.did_exceed_test_failure_limit():
239             self._should_defer_patch_or_throw(first_results.failing_test_results(), first_results_archive, first_script_error, first_failure_status_id)
240             return False
241
242         if first_results.did_exceed_test_failure_limit():
243             self._should_defer_patch_or_throw(second_results.failing_test_results(), second_results_archive, second_script_error, second_failure_status_id)
244             return False
245
246         if self._results_failed_different_tests(first_results, second_results):
247             first_failing_results_set = frozenset(first_results.failing_test_results())
248             second_failing_results_set = frozenset(second_results.failing_test_results())
249
250             tests_that_only_failed_first = first_failing_results_set.difference(second_failing_results_set)
251             self._report_flaky_tests(tests_that_only_failed_first, first_results_archive)
252
253             tests_that_only_failed_second = second_failing_results_set.difference(first_failing_results_set)
254             self._report_flaky_tests(tests_that_only_failed_second, second_results_archive)
255
256             tests_that_consistently_failed = first_failing_results_set.intersection(second_failing_results_set)
257             if tests_that_consistently_failed:
258                 if self._should_defer_patch_or_throw(tests_that_consistently_failed, first_results_archive, first_script_error, first_failure_status_id):
259                     return False  # Defer patch
260
261             # At this point we know that at least one test flaked, but no consistent failures
262             # were introduced. This is a bit of a grey-zone.
263             return False  # Defer patch
264
265         if self._should_defer_patch_or_throw(first_results.failing_test_results(), first_results_archive, first_script_error, first_failure_status_id):
266             return False  # Defer patch
267
268         # At this point, we know that the first and second runs had the exact same failures,
269         # and that those failures are all present on the clean tree, so we can say with certainty
270         # that the patch is good.
271         return True
272
273     def results_archive_from_patch_test_run(self, patch):
274         assert(self._patch.id() == patch.id())  # PatchAnalysisTask is not currently re-useable.
275         return self._results_archive_from_patch_test_run
276
277     def results_from_patch_test_run(self, patch):
278         assert(self._patch.id() == patch.id())  # PatchAnalysisTask is not currently re-useable.
279         return self._results_from_patch_test_run
280
281     def report_failure(self, results_archive=None, results=None, script_error=None):
282         if not self.validate():
283             return False
284         self._results_archive_from_patch_test_run = results_archive
285         self._results_from_patch_test_run = results
286         raise script_error or self._script_error
287
288     def validate(self):
289         raise NotImplementedError("subclasses must implement")
290
291     def run(self):
292         raise NotImplementedError("subclasses must implement")