53f25ce3be431dbe11307ff379e67ec3b257f27e
[WebKit-https.git] / Tools / Scripts / webkitpy / layout_tests / controllers / single_test_runner.py
1 # Copyright (C) 2011 Google Inc. All rights reserved.
2 #
3 # Redistribution and use in source and binary forms, with or without
4 # modification, are permitted provided that the following conditions are
5 # met:
6 #
7 #     * Redistributions of source code must retain the above copyright
8 # notice, this list of conditions and the following disclaimer.
9 #     * Redistributions in binary form must reproduce the above
10 # copyright notice, this list of conditions and the following disclaimer
11 # in the documentation and/or other materials provided with the
12 # distribution.
13 #     * Neither the name of Google Inc. nor the names of its
14 # contributors may be used to endorse or promote products derived from
15 # this software without specific prior written permission.
16 #
17 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29
30 import logging
31 import re
32 import time
33
34 from webkitpy.layout_tests.controllers import test_result_writer
35 from webkitpy.port.driver import DriverInput, DriverOutput
36 from webkitpy.layout_tests.models import test_expectations
37 from webkitpy.layout_tests.models import test_failures
38 from webkitpy.layout_tests.models.test_results import TestResult
39
40
41 _log = logging.getLogger(__name__)
42
43
44 def run_single_test(port, options, results_directory, worker_name, driver, test_input, stop_when_done):
45     runner = SingleTestRunner(port, options, results_directory, worker_name, driver, test_input, stop_when_done)
46     return runner.run()
47
48
49 class SingleTestRunner(object):
50     (ALONGSIDE_TEST, PLATFORM_DIR, VERSION_DIR, UPDATE) = ('alongside', 'platform', 'version', 'update')
51
52     def __init__(self, port, options, results_directory, worker_name, driver, test_input, stop_when_done):
53         self._port = port
54         self._filesystem = port.host.filesystem
55         self._options = options
56         self._results_directory = results_directory
57         self._driver = driver
58         self._timeout = test_input.timeout
59         self._worker_name = worker_name
60         self._test_name = test_input.test_name
61         self._should_run_pixel_test = test_input.should_run_pixel_test
62         self._reference_files = test_input.reference_files
63         self._stop_when_done = stop_when_done
64
65         if self._reference_files:
66             # Detect and report a test which has a wrong combination of expectation files.
67             # For example, if 'foo.html' has two expectation files, 'foo-expected.html' and
68             # 'foo-expected.txt', we should warn users. One test file must be used exclusively
69             # in either layout tests or reftests, but not in both.
70             for suffix in ('.txt', '.png', '.wav'):
71                 expected_filename = self._port.expected_filename(self._test_name, suffix)
72                 if self._filesystem.exists(expected_filename):
73                     _log.error('%s is a reftest, but has an unused expectation file. Please remove %s.',
74                         self._test_name, expected_filename)
75
76     def _expected_driver_output(self):
77         return DriverOutput(self._port.expected_text(self._test_name),
78                                  self._port.expected_image(self._test_name),
79                                  self._port.expected_checksum(self._test_name),
80                                  self._port.expected_audio(self._test_name))
81
82     def _should_fetch_expected_checksum(self):
83         return self._should_run_pixel_test and not (self._options.new_baseline or self._options.reset_results)
84
85     def _driver_input(self):
86         # The image hash is used to avoid doing an image dump if the
87         # checksums match, so it should be set to a blank value if we
88         # are generating a new baseline.  (Otherwise, an image from a
89         # previous run will be copied into the baseline."""
90         image_hash = None
91         if self._should_fetch_expected_checksum():
92             image_hash = self._port.expected_checksum(self._test_name)
93         return DriverInput(self._test_name, self._timeout, image_hash, self._should_run_pixel_test)
94
95     def run(self):
96         if self._reference_files:
97             if self._port.get_option('no_ref_tests') or self._options.reset_results:
98                 reftest_type = set([reference_file[0] for reference_file in self._reference_files])
99                 result = TestResult(self._test_name, reftest_type=reftest_type)
100                 result.type = test_expectations.SKIP
101                 return result
102             return self._run_reftest()
103         if self._options.reset_results:
104             return self._run_rebaseline()
105         return self._run_compare_test()
106
107     def _run_compare_test(self):
108         driver_output = self._driver.run_test(self._driver_input(), self._stop_when_done)
109         expected_driver_output = self._expected_driver_output()
110
111         if self._options.ignore_metrics:
112             expected_driver_output.strip_metrics()
113             driver_output.strip_metrics()
114
115         test_result = self._compare_output(expected_driver_output, driver_output)
116         if self._options.new_test_results:
117             self._add_missing_baselines(test_result, driver_output)
118         test_result_writer.write_test_result(self._filesystem, self._port, self._results_directory, self._test_name, driver_output, expected_driver_output, test_result.failures)
119         return test_result
120
121     def _run_rebaseline(self):
122         driver_output = self._driver.run_test(self._driver_input(), self._stop_when_done)
123         failures = self._handle_error(driver_output)
124         test_result_writer.write_test_result(self._filesystem, self._port, self._results_directory, self._test_name, driver_output, None, failures)
125         # FIXME: It the test crashed or timed out, it might be better to avoid
126         # to write new baselines.
127         self._overwrite_baselines(driver_output)
128         return TestResult(self._test_name, failures, driver_output.test_time, driver_output.has_stderr(), pid=driver_output.pid)
129
130     _render_tree_dump_pattern = re.compile(r"^layer at \(\d+,\d+\) size \d+x\d+\n")
131
132     def _add_missing_baselines(self, test_result, driver_output):
133         missingImage = test_result.has_failure_matching_types(test_failures.FailureMissingImage, test_failures.FailureMissingImageHash)
134         if test_result.has_failure_matching_types(test_failures.FailureMissingResult):
135             self._save_baseline_data(driver_output.text, '.txt', self._location_for_new_baseline(driver_output.text, '.txt'))
136         if test_result.has_failure_matching_types(test_failures.FailureMissingAudio):
137             self._save_baseline_data(driver_output.audio, '.wav', self._location_for_new_baseline(driver_output.audio, '.wav'))
138         if missingImage:
139             self._save_baseline_data(driver_output.image, '.png', self._location_for_new_baseline(driver_output.image, '.png'))
140
141     def _location_for_new_baseline(self, data, extension):
142         if self._options.add_platform_exceptions:
143             return self.VERSION_DIR
144         if extension == '.png':
145             return self.PLATFORM_DIR
146         if extension == '.wav':
147             return self.ALONGSIDE_TEST
148         if extension == '.txt' and self._render_tree_dump_pattern.match(data):
149             return self.PLATFORM_DIR
150         return self.ALONGSIDE_TEST
151
152     def _overwrite_baselines(self, driver_output):
153         location = self.VERSION_DIR if self._options.add_platform_exceptions else self.UPDATE
154         self._save_baseline_data(driver_output.text, '.txt', location)
155         self._save_baseline_data(driver_output.audio, '.wav', location)
156         if self._should_run_pixel_test:
157             self._save_baseline_data(driver_output.image, '.png', location)
158
159     def _save_baseline_data(self, data, extension, location):
160         if data is None:
161             return
162         port = self._port
163         fs = self._filesystem
164         if location == self.ALONGSIDE_TEST:
165             output_dir = fs.dirname(port.abspath_for_test(self._test_name))
166         elif location == self.VERSION_DIR:
167             output_dir = fs.join(port.baseline_version_dir(), fs.dirname(self._test_name))
168         elif location == self.PLATFORM_DIR:
169             output_dir = fs.join(port.baseline_platform_dir(), fs.dirname(self._test_name))
170         elif location == self.UPDATE:
171             output_dir = fs.dirname(port.expected_filename(self._test_name, extension))
172         else:
173             raise AssertionError('unrecognized baseline location: %s' % location)
174
175         fs.maybe_make_directory(output_dir)
176         output_basename = fs.basename(fs.splitext(self._test_name)[0] + "-expected" + extension)
177         output_path = fs.join(output_dir, output_basename)
178         _log.info('Writing new expected result "%s"' % port.relative_test_filename(output_path))
179         port.update_baseline(output_path, data)
180
181     def _handle_error(self, driver_output, reference_filename=None):
182         """Returns test failures if some unusual errors happen in driver's run.
183
184         Args:
185           driver_output: The output from the driver.
186           reference_filename: The full path to the reference file which produced the driver_output.
187               This arg is optional and should be used only in reftests until we have a better way to know
188               which html file is used for producing the driver_output.
189         """
190         failures = []
191         fs = self._filesystem
192         if driver_output.timeout:
193             failures.append(test_failures.FailureTimeout(bool(reference_filename)))
194
195         if reference_filename:
196             testname = self._port.relative_test_filename(reference_filename)
197         else:
198             testname = self._test_name
199
200         if driver_output.crash:
201             failures.append(test_failures.FailureCrash(bool(reference_filename),
202                                                        driver_output.crashed_process_name,
203                                                        driver_output.crashed_pid))
204             if driver_output.error:
205                 _log.debug("%s %s crashed, (stderr lines):" % (self._worker_name, testname))
206             else:
207                 _log.debug("%s %s crashed, (no stderr)" % (self._worker_name, testname))
208         elif driver_output.error:
209             _log.debug("%s %s output stderr lines:" % (self._worker_name, testname))
210         for line in driver_output.error.splitlines():
211             _log.debug("  %s" % line)
212         return failures
213
214     def _compare_output(self, expected_driver_output, driver_output):
215         failures = []
216         failures.extend(self._handle_error(driver_output))
217
218         if driver_output.crash:
219             # Don't continue any more if we already have a crash.
220             # In case of timeouts, we continue since we still want to see the text and image output.
221             return TestResult(self._test_name, failures, driver_output.test_time, driver_output.has_stderr(), pid=driver_output.pid)
222
223         failures.extend(self._compare_text(expected_driver_output.text, driver_output.text))
224         failures.extend(self._compare_audio(expected_driver_output.audio, driver_output.audio))
225         if self._should_run_pixel_test:
226             failures.extend(self._compare_image(expected_driver_output, driver_output))
227         return TestResult(self._test_name, failures, driver_output.test_time, driver_output.has_stderr(), pid=driver_output.pid)
228
229     def _compare_text(self, expected_text, actual_text):
230         failures = []
231         if (expected_text and actual_text and
232             # Assuming expected_text is already normalized.
233             self._port.do_text_results_differ(expected_text, self._get_normalized_output_text(actual_text))):
234             failures.append(test_failures.FailureTextMismatch())
235         elif actual_text and not expected_text:
236             failures.append(test_failures.FailureMissingResult())
237         return failures
238
239     def _compare_audio(self, expected_audio, actual_audio):
240         failures = []
241         if (expected_audio and actual_audio and
242             self._port.do_audio_results_differ(expected_audio, actual_audio)):
243             failures.append(test_failures.FailureAudioMismatch())
244         elif actual_audio and not expected_audio:
245             failures.append(test_failures.FailureMissingAudio())
246         return failures
247
248     def _get_normalized_output_text(self, output):
249         """Returns the normalized text output, i.e. the output in which
250         the end-of-line characters are normalized to "\n"."""
251         # Running tests on Windows produces "\r\n".  The "\n" part is helpfully
252         # changed to "\r\n" by our system (Python/Cygwin), resulting in
253         # "\r\r\n", when, in fact, we wanted to compare the text output with
254         # the normalized text expectation files.
255         return output.replace("\r\r\n", "\r\n").replace("\r\n", "\n")
256
257     # FIXME: This function also creates the image diff. Maybe that work should
258     # be handled elsewhere?
259     def _compare_image(self, expected_driver_output, driver_output):
260         failures = []
261         # If we didn't produce a hash file, this test must be text-only.
262         if driver_output.image_hash is None:
263             return failures
264         if not expected_driver_output.image:
265             failures.append(test_failures.FailureMissingImage())
266         elif not expected_driver_output.image_hash:
267             failures.append(test_failures.FailureMissingImageHash())
268         elif driver_output.image_hash != expected_driver_output.image_hash:
269             diff_result = self._port.diff_image(expected_driver_output.image, driver_output.image)
270             err_str = diff_result[2]
271             if err_str:
272                 _log.warning('  %s : %s' % (self._test_name, err_str))
273                 failures.append(test_failures.FailureImageHashMismatch())
274                 driver_output.error = (driver_output.error or '') + err_str
275             else:
276                 driver_output.image_diff = diff_result[0]
277                 if driver_output.image_diff:
278                     failures.append(test_failures.FailureImageHashMismatch(diff_result[1]))
279                 else:
280                     # See https://bugs.webkit.org/show_bug.cgi?id=69444 for why this isn't a full failure.
281                     _log.warning('  %s -> pixel hash failed (but diff passed)' % self._test_name)
282         return failures
283
284     def _run_reftest(self):
285         test_output = self._driver.run_test(self._driver_input(), self._stop_when_done)
286         total_test_time = 0
287         reference_output = None
288         test_result = None
289
290         # A reftest can have multiple match references and multiple mismatch references;
291         # the test fails if any mismatch matches and all of the matches don't match.
292         # To minimize the number of references we have to check, we run all of the mismatches first,
293         # then the matches, and short-circuit out as soon as we can.
294         # Note that sorting by the expectation sorts "!=" before "==" so this is easy to do.
295
296         putAllMismatchBeforeMatch = sorted
297         reference_test_names = []
298         for expectation, reference_filename in putAllMismatchBeforeMatch(self._reference_files):
299             reference_test_name = self._port.relative_test_filename(reference_filename)
300             reference_test_names.append(reference_test_name)
301             reference_output = self._driver.run_test(DriverInput(reference_test_name, self._timeout, None, should_run_pixel_test=True), self._stop_when_done)
302             test_result = self._compare_output_with_reference(reference_output, test_output, reference_filename, expectation == '!=')
303
304             if (expectation == '!=' and test_result.failures) or (expectation == '==' and not test_result.failures):
305                 break
306             total_test_time += test_result.test_run_time
307
308         assert(reference_output)
309         test_result_writer.write_test_result(self._filesystem, self._port, self._results_directory, self._test_name, test_output, reference_output, test_result.failures)
310         reftest_type = set([reference_file[0] for reference_file in self._reference_files])
311         return TestResult(self._test_name, test_result.failures, total_test_time + test_result.test_run_time, test_result.has_stderr, reftest_type=reftest_type, pid=test_result.pid, references=reference_test_names)
312
313     def _compare_output_with_reference(self, reference_driver_output, actual_driver_output, reference_filename, mismatch):
314         total_test_time = reference_driver_output.test_time + actual_driver_output.test_time
315         has_stderr = reference_driver_output.has_stderr() or actual_driver_output.has_stderr()
316         failures = []
317         failures.extend(self._handle_error(actual_driver_output))
318         if failures:
319             # Don't continue any more if we already have crash or timeout.
320             return TestResult(self._test_name, failures, total_test_time, has_stderr)
321         failures.extend(self._handle_error(reference_driver_output, reference_filename=reference_filename))
322         if failures:
323             return TestResult(self._test_name, failures, total_test_time, has_stderr, pid=actual_driver_output.pid)
324
325         if not reference_driver_output.image_hash and not actual_driver_output.image_hash:
326             failures.append(test_failures.FailureReftestNoImagesGenerated(reference_filename))
327         elif mismatch:
328             if reference_driver_output.image_hash == actual_driver_output.image_hash:
329                 diff_result = self._port.diff_image(reference_driver_output.image, actual_driver_output.image, tolerance=0)
330                 if not diff_result[0]:
331                     failures.append(test_failures.FailureReftestMismatchDidNotOccur(reference_filename))
332                 else:
333                     _log.warning("  %s -> ref test hashes matched but diff failed" % self._test_name)
334
335         elif reference_driver_output.image_hash != actual_driver_output.image_hash:
336             diff_result = self._port.diff_image(reference_driver_output.image, actual_driver_output.image, tolerance=0)
337             if diff_result[0]:
338                 failures.append(test_failures.FailureReftestMismatch(reference_filename))
339             else:
340                 _log.warning("  %s -> ref test hashes didn't match but diff passed" % self._test_name)
341
342         return TestResult(self._test_name, failures, total_test_time, has_stderr, pid=actual_driver_output.pid)