2011-02-08 Hayato Ito <hayato@chromium.org>
[WebKit-https.git] / Tools / Scripts / webkitpy / layout_tests / layout_package / single_test_runner.py
1 # Copyright (C) 2011 Google Inc. All rights reserved.
2 #
3 # Redistribution and use in source and binary forms, with or without
4 # modification, are permitted provided that the following conditions are
5 # met:
6 #
7 #     * Redistributions of source code must retain the above copyright
8 # notice, this list of conditions and the following disclaimer.
9 #     * Redistributions in binary form must reproduce the above
10 # copyright notice, this list of conditions and the following disclaimer
11 # in the documentation and/or other materials provided with the
12 # distribution.
13 #     * Neither the name of Google Inc. nor the names of its
14 # contributors may be used to endorse or promote products derived from
15 # this software without specific prior written permission.
16 #
17 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29
30 import logging
31 import threading
32 import time
33
34 from webkitpy.layout_tests.port import base
35
36 from webkitpy.layout_tests.test_types import text_diff
37 from webkitpy.layout_tests.test_types import image_diff
38
39 from webkitpy.layout_tests.layout_package import test_failures
40 from webkitpy.layout_tests.layout_package.test_results import TestResult
41
42
43 _log = logging.getLogger(__name__)
44
45
46 class ExpectedDriverOutput:
47     """Groups information about an expected driver output."""
48     def __init__(self, text, image, image_hash):
49         self.text = text
50         self.image = image
51         self.image_hash = image_hash
52
53
54 class SingleTestRunner:
55
56     def __init__(self, options, port, worker_name, worker_number):
57         self._options = options
58         self._port = port
59         self._worker_name = worker_name
60         self._worker_number = worker_number
61         self._driver = None
62         self._test_types = []
63         self.has_http_lock = False
64         for cls in self._get_test_type_classes():
65             self._test_types.append(cls(self._port,
66                                         self._options.results_directory))
67
68     def cleanup(self):
69         self.kill_dump_render_tree()
70         if self.has_http_lock:
71             self.stop_servers_with_lock()
72
73     def _get_test_type_classes(self):
74         classes = [text_diff.TestTextDiff]
75         if self._options.pixel_tests:
76             classes.append(image_diff.ImageDiff)
77         return classes
78
79     def timeout(self, test_input):
80         # We calculate how long we expect the test to take.
81         #
82         # The DumpRenderTree watchdog uses 2.5x the timeout; we want to be
83         # larger than that. We also add a little more padding if we're
84         # running tests in a separate thread.
85         #
86         # Note that we need to convert the test timeout from a
87         # string value in milliseconds to a float for Python.
88         driver_timeout_sec = 3.0 * float(test_input.timeout) / 1000.0
89         if not self._options.run_singly:
90             return driver_timeout_sec
91
92         thread_padding_sec = 1.0
93         thread_timeout_sec = driver_timeout_sec + thread_padding_sec
94         return thread_timeout_sec
95
96     def run_test(self, test_input, timeout):
97         if self._options.run_singly:
98             return self._run_test_in_another_thread(test_input, timeout)
99         else:
100             return self._run_test_in_this_thread(test_input)
101         return result
102
103     def _run_test_in_another_thread(self, test_input, thread_timeout_sec):
104         """Run a test in a separate thread, enforcing a hard time limit.
105
106         Since we can only detect the termination of a thread, not any internal
107         state or progress, we can only run per-test timeouts when running test
108         files singly.
109
110         Args:
111           test_input: Object containing the test filename and timeout
112           thread_timeout_sec: time to wait before killing the driver process.
113         Returns:
114           A TestResult
115         """
116         worker = self
117         result = None
118
119         driver = worker._port.create_driver(worker._worker_number)
120         driver.start()
121
122         class SingleTestThread(threading.Thread):
123             def run(self):
124                 result = worker.run(test_input, driver)
125
126         thread = SingleTestThread()
127         thread.start()
128         thread.join(thread_timeout_sec)
129         if thread.isAlive():
130             # If join() returned with the thread still running, the
131             # DumpRenderTree is completely hung and there's nothing
132             # more we can do with it.  We have to kill all the
133             # DumpRenderTrees to free it up. If we're running more than
134             # one DumpRenderTree thread, we'll end up killing the other
135             # DumpRenderTrees too, introducing spurious crashes. We accept
136             # that tradeoff in order to avoid losing the rest of this
137             # thread's results.
138             _log.error('Test thread hung: killing all DumpRenderTrees')
139
140         driver.stop()
141
142         if not result:
143             result = TestResult(test_input.filename, failures=[],
144                 test_run_time=0, total_time_for_all_diffs=0, time_for_diffs={})
145         return result
146
147     def _run_test_in_this_thread(self, test_input):
148         """Run a single test file using a shared DumpRenderTree process.
149
150         Args:
151           test_input: Object containing the test filename, uri and timeout
152
153         Returns: a TestResult object.
154         """
155         # poll() is not threadsafe and can throw OSError due to:
156         # http://bugs.python.org/issue1731717
157         if not self._driver or self._driver.poll() is not None:
158             self._driver = self._port.create_driver(self._worker_number)
159             self._driver.start()
160         return self._run(self._driver, test_input)
161
162     def _expected_driver_output(self):
163         return ExpectedDriverOutput(self._port.expected_text(self._filename),
164                                     self._port.expected_image(self._filename),
165                                     self._port.expected_checksum(self._filename))
166
167     def _should_fetch_expected_checksum(self):
168         return (self._options.pixel_tests and
169                 not (self._options.new_baseline or self._options.reset_results))
170
171     def _driver_input(self, test_input):
172         self._filename = test_input.filename
173         self._timeout = test_input.timeout
174         self._testname = self._port.relative_test_filename(test_input.filename)
175
176         # The image hash is used to avoid doing an image dump if the
177         # checksums match, so it should be set to a blank value if we
178         # are generating a new baseline.  (Otherwise, an image from a
179         # previous run will be copied into the baseline."""
180         image_hash = None
181         if self._should_fetch_expected_checksum():
182             image_hash = self._port.expected_checksum(self._filename)
183         return base.DriverInput(self._filename, self._timeout, image_hash)
184
185     def _run(self, driver, test_input):
186         if self._options.new_baseline or self._options.reset_results:
187             return self._run_rebaseline(driver, test_input)
188         return self._run_compare_test(driver, test_input)
189
190     def _run_compare_test(self, driver, test_input):
191         driver_output = self._driver.run_test(self._driver_input(test_input))
192         return self._process_output(driver_output)
193
194     def _run_rebaseline(self, driver, test_input):
195         driver_output = self._driver.run_test(self._driver_input(test_input))
196         failures = self._handle_error(driver_output)
197         # FIXME: It the test crashed or timed out, it might be bettter to avoid
198         # to write new baselines.
199         self._save_baselines(driver_output)
200         return TestResult(self._filename, failures, driver_output.test_time)
201
202     def _save_baselines(self, driver_output):
203         # Although all test_shell/DumpRenderTree output should be utf-8,
204         # we do not ever decode it inside run-webkit-tests.  For some tests
205         # DumpRenderTree may not output utf-8 text (e.g. webarchives).
206         self._save_baseline_data(driver_output.text, ".txt", encoding=None,
207                                  generate_new_baseline=self._options.new_baseline)
208         if self._options.pixel_tests and driver_output.image_hash:
209             self._save_baseline_data(driver_output.image, ".png", encoding=None,
210                                      generate_new_baseline=self._options.new_baseline)
211             self._save_baseline_data(driver_output.image_hash, ".checksum",
212                                      encoding="ascii",
213                                      generate_new_baseline=self._options.new_baseline)
214
215     def _save_baseline_data(self, data, modifier, encoding,
216                             generate_new_baseline=True):
217         """Saves a new baseline file into the port's baseline directory.
218
219         The file will be named simply "<test>-expected<modifier>", suitable for
220         use as the expected results in a later run.
221
222         Args:
223           data: result to be saved as the new baseline
224           modifier: type of the result file, e.g. ".txt" or ".png"
225           encoding: file encoding (none, "utf-8", etc.)
226           generate_new_baseline: whether to enerate a new, platform-specific
227             baseline, or update the existing one
228         """
229
230         port = self._port
231         fs = port._filesystem
232         if generate_new_baseline:
233             relative_dir = fs.dirname(self._testname)
234             baseline_path = port.baseline_path()
235             output_dir = fs.join(baseline_path, relative_dir)
236             output_file = fs.basename(fs.splitext(self._filename)[0] +
237                 "-expected" + modifier)
238             fs.maybe_make_directory(output_dir)
239             output_path = fs.join(output_dir, output_file)
240             _log.debug('writing new baseline result "%s"' % (output_path))
241         else:
242             output_path = port.expected_filename(self._filename, modifier)
243             _log.debug('resetting baseline result "%s"' % output_path)
244
245         port.update_baseline(output_path, data, encoding)
246
247     def _handle_error(self, driver_output):
248         failures = []
249         fs = self._port._filesystem
250         if driver_output.timeout:
251             failures.append(test_failures.FailureTimeout())
252         if driver_output.crash:
253             failures.append(test_failures.FailureCrash())
254             _log.debug("%s Stacktrace for %s:\n%s" % (self._worker_name, self._testname,
255                                                       driver_output.error))
256             stack_filename = fs.join(self._options.results_directory, self._testname)
257             stack_filename = fs.splitext(stack_filename)[0] + "-stack.txt"
258             fs.maybe_make_directory(fs.dirname(stack_filename))
259             fs.write_text_file(stack_filename, driver_output.error)
260         elif driver_output.error:
261             _log.debug("%s %s output stderr lines:\n%s" % (self._worker_name, self._testname,
262                                                            driver_output.error))
263         return failures
264
265     def _run_test(self):
266         driver_output = self._driver.run_test(self._driver_input())
267         return self._process_output(driver_output)
268
269     def _process_output(self, driver_output):
270         """Receives the output from a DumpRenderTree process, subjects it to a
271         number of tests, and returns a list of failure types the test produced.
272         Args:
273           driver_output: a DriverOutput object containing the output from the driver
274
275         Returns: a TestResult object
276         """
277         fs = self._port._filesystem
278         failures = self._handle_error(driver_output)
279         expected_driver_output = self._expected_driver_output()
280
281         # Check the output and save the results.
282         start_time = time.time()
283         time_for_diffs = {}
284         for test_type in self._test_types:
285             start_diff_time = time.time()
286             new_failures = test_type.compare_output(
287                 self._port, self._filename, self._options, driver_output,
288                 expected_driver_output)
289             # Don't add any more failures if we already have a crash, so we don't
290             # double-report those tests. We do double-report for timeouts since
291             # we still want to see the text and image output.
292             if not driver_output.crash:
293                 failures.extend(new_failures)
294             time_for_diffs[test_type.__class__.__name__] = (
295                 time.time() - start_diff_time)
296
297         total_time_for_all_diffs = time.time() - start_diff_time
298         return TestResult(self._filename, failures, driver_output.test_time,
299                           total_time_for_all_diffs, time_for_diffs)
300
301     def start_servers_with_lock(self):
302         _log.debug('Acquiring http lock ...')
303         self._port.acquire_http_lock()
304         _log.debug('Starting HTTP server ...')
305         self._port.start_http_server()
306         _log.debug('Starting WebSocket server ...')
307         self._port.start_websocket_server()
308         self.has_http_lock = True
309
310     def stop_servers_with_lock(self):
311         """Stop the servers and release http lock."""
312         if self.has_http_lock:
313             _log.debug('Stopping HTTP server ...')
314             self._port.stop_http_server()
315             _log.debug('Stopping WebSocket server ...')
316             self._port.stop_websocket_server()
317             _log.debug('Releasing server lock ...')
318             self._port.release_http_lock()
319             self.has_http_lock = False
320
321     def kill_dump_render_tree(self):
322         """Kill the DumpRenderTree process if it's running."""
323         if self._driver:
324             self._driver.stop()
325             self._driver = None