2011-02-03 Hayato Ito <hayato@chromium.org>
[WebKit-https.git] / Tools / Scripts / webkitpy / layout_tests / layout_package / dump_render_tree_thread.py
1 #!/usr/bin/env python
2 # Copyright (C) 2010 Google Inc. All rights reserved.
3 # Copyright (C) 2010 Gabor Rapcsanyi (rgabor@inf.u-szeged.hu), University of Szeged
4 #
5 # Redistribution and use in source and binary forms, with or without
6 # modification, are permitted provided that the following conditions are
7 # met:
8 #
9 #     * Redistributions of source code must retain the above copyright
10 # notice, this list of conditions and the following disclaimer.
11 #     * Redistributions in binary form must reproduce the above
12 # copyright notice, this list of conditions and the following disclaimer
13 # in the documentation and/or other materials provided with the
14 # distribution.
15 #     * Neither the name of Google Inc. nor the names of its
16 # contributors may be used to endorse or promote products derived from
17 # this software without specific prior written permission.
18 #
19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 """A Thread object for running DumpRenderTree and processing URLs from a
32 shared queue.
33
34 Each thread runs a separate instance of the DumpRenderTree binary and validates
35 the output.  When there are no more URLs to process in the shared queue, the
36 thread exits.
37 """
38
39 import copy
40 import logging
41 import os
42 import Queue
43 import signal
44 import sys
45 import thread
46 import threading
47 import time
48
49
50 from webkitpy.layout_tests.test_types import image_diff
51 from webkitpy.layout_tests.test_types import test_type_base
52 from webkitpy.layout_tests.test_types import text_diff
53
54 import single_test_runner
55 import test_failures
56 import test_results
57
58 _log = logging.getLogger("webkitpy.layout_tests.layout_package."
59                          "dump_render_tree_thread")
60
61
62 def _pad_timeout(timeout):
63     """Returns a safe multiple of the per-test timeout value to use
64     to detect hung test threads.
65
66     """
67     # When we're running one test per DumpRenderTree process, we can
68     # enforce a hard timeout.  The DumpRenderTree watchdog uses 2.5x
69     # the timeout; we want to be larger than that.
70     return timeout * 3
71
72
73 def _milliseconds_to_seconds(msecs):
74     return float(msecs) / 1000.0
75
76
77 def _should_fetch_expected_checksum(options):
78     return options.pixel_tests and not (options.new_baseline or options.reset_results)
79
80
81 class SingleTestThread(threading.Thread):
82     """Thread wrapper for running a single test file."""
83
84     def __init__(self, port, options, worker_number, worker_name,
85                  test_input, test_types):
86         """
87         Args:
88           port: object implementing port-specific hooks
89           options: command line argument object from optparse
90           worker_number: worker number for tests
91           worker_name: for logging
92           test_input: Object containing the test filename and timeout
93           test_types: A list of TestType objects to run the test output
94               against.
95         """
96
97         threading.Thread.__init__(self)
98         self._port = port
99         self._options = options
100         self._test_input = test_input
101         self._test_types = test_types
102         self._driver = None
103         self._worker_number = worker_number
104         self._name = worker_name
105
106     def run(self):
107         self._covered_run()
108
109     def _covered_run(self):
110         # FIXME: this is a separate routine to work around a bug
111         # in coverage: see http://bitbucket.org/ned/coveragepy/issue/85.
112         self._driver = self._port.create_driver(self._worker_number)
113         self._driver.start()
114         self._test_result = single_test_runner.run_single_test(
115             self._port, self._options, self._test_input, self._driver,
116             self._name, self._test_type)
117         self._driver.stop()
118
119     def get_test_result(self):
120         return self._test_result
121
122
123 class WatchableThread(threading.Thread):
124     """This class abstracts an interface used by
125     run_webkit_tests.TestRunner._wait_for_threads_to_finish for thread
126     management."""
127     def __init__(self):
128         threading.Thread.__init__(self)
129         self._canceled = False
130         self._exception_info = None
131         self._next_timeout = None
132         self._thread_id = None
133
134     def cancel(self):
135         """Set a flag telling this thread to quit."""
136         self._canceled = True
137
138     def clear_next_timeout(self):
139         """Mark a flag telling this thread to stop setting timeouts."""
140         self._timeout = 0
141
142     def exception_info(self):
143         """If run() terminated on an uncaught exception, return it here
144         ((type, value, traceback) tuple).
145         Returns None if run() terminated normally. Meant to be called after
146         joining this thread."""
147         return self._exception_info
148
149     def id(self):
150         """Return a thread identifier."""
151         return self._thread_id
152
153     def next_timeout(self):
154         """Return the time the test is supposed to finish by."""
155         return self._next_timeout
156
157
158 class TestShellThread(WatchableThread):
159     def __init__(self, port, options, worker_number, worker_name,
160                  filename_list_queue, result_queue):
161         """Initialize all the local state for this DumpRenderTree thread.
162
163         Args:
164           port: interface to port-specific hooks
165           options: command line options argument from optparse
166           worker_number: identifier for a particular worker thread.
167           worker_name: for logging.
168           filename_list_queue: A thread safe Queue class that contains lists
169               of tuples of (filename, uri) pairs.
170           result_queue: A thread safe Queue class that will contain
171               serialized TestResult objects.
172         """
173         WatchableThread.__init__(self)
174         self._port = port
175         self._options = options
176         self._worker_number = worker_number
177         self._name = worker_name
178         self._filename_list_queue = filename_list_queue
179         self._result_queue = result_queue
180         self._filename_list = []
181         self._driver = None
182         self._test_group_timing_stats = {}
183         self._test_results = []
184         self._num_tests = 0
185         self._start_time = 0
186         self._stop_time = 0
187         self._have_http_lock = False
188         self._http_lock_wait_begin = 0
189         self._http_lock_wait_end = 0
190
191         self._test_types = []
192         for cls in self._get_test_type_classes():
193             self._test_types.append(cls(self._port,
194                                         self._options.results_directory))
195
196         # Current group of tests we're running.
197         self._current_group = None
198         # Number of tests in self._current_group.
199         self._num_tests_in_current_group = None
200         # Time at which we started running tests from self._current_group.
201         self._current_group_start_time = None
202
203     def _get_test_type_classes(self):
204         classes = [text_diff.TestTextDiff]
205         if self._options.pixel_tests:
206             classes.append(image_diff.ImageDiff)
207         return classes
208
209     def get_test_group_timing_stats(self):
210         """Returns a dictionary mapping test group to a tuple of
211         (number of tests in that group, time to run the tests)"""
212         return self._test_group_timing_stats
213
214     def get_test_results(self):
215         """Return the list of all tests run on this thread.
216
217         This is used to calculate per-thread statistics.
218
219         """
220         return self._test_results
221
222     def get_total_time(self):
223         return max(self._stop_time - self._start_time -
224                    self._http_lock_wait_time(), 0.0)
225
226     def get_num_tests(self):
227         return self._num_tests
228
229     def run(self):
230         """Delegate main work to a helper method and watch for uncaught
231         exceptions."""
232         self._covered_run()
233
234     def _covered_run(self):
235         # FIXME: this is a separate routine to work around a bug
236         # in coverage: see http://bitbucket.org/ned/coveragepy/issue/85.
237         self._thread_id = thread.get_ident()
238         self._start_time = time.time()
239         self._num_tests = 0
240         try:
241             _log.debug('%s starting' % (self.getName()))
242             self._run(test_runner=None, result_summary=None)
243             _log.debug('%s done (%d tests)' % (self.getName(),
244                        self.get_num_tests()))
245         except KeyboardInterrupt:
246             self._exception_info = sys.exc_info()
247             _log.debug("%s interrupted" % self.getName())
248         except:
249             # Save the exception for our caller to see.
250             self._exception_info = sys.exc_info()
251             self._stop_time = time.time()
252             _log.error('%s dying, exception raised' % self.getName())
253
254         self._stop_time = time.time()
255
256     def run_in_main_thread(self, test_runner, result_summary):
257         """This hook allows us to run the tests from the main thread if
258         --num-test-shells==1, instead of having to always run two or more
259         threads. This allows us to debug the test harness without having to
260         do multi-threaded debugging."""
261         self._run(test_runner, result_summary)
262
263     def cancel(self):
264         """Clean up http lock and set a flag telling this thread to quit."""
265         self._stop_servers_with_lock()
266         WatchableThread.cancel(self)
267
268     def next_timeout(self):
269         """Return the time the test is supposed to finish by."""
270         if self._next_timeout:
271             return self._next_timeout + self._http_lock_wait_time()
272         return self._next_timeout
273
274     def _http_lock_wait_time(self):
275         """Return the time what http locking takes."""
276         if self._http_lock_wait_begin == 0:
277             return 0
278         if self._http_lock_wait_end == 0:
279             return time.time() - self._http_lock_wait_begin
280         return self._http_lock_wait_end - self._http_lock_wait_begin
281
282     def _run(self, test_runner, result_summary):
283         """Main work entry point of the thread. Basically we pull urls from the
284         filename queue and run the tests until we run out of urls.
285
286         If test_runner is not None, then we call test_runner.UpdateSummary()
287         with the results of each test."""
288         batch_size = self._options.batch_size
289         batch_count = 0
290
291         # Append tests we're running to the existing tests_run.txt file.
292         # This is created in run_webkit_tests.py:_PrepareListsAndPrintOutput.
293         tests_run_filename = self._port._filesystem.join(self._options.results_directory,
294                                           "tests_run.txt")
295         tests_run_file = self._port._filesystem.open_text_file_for_writing(tests_run_filename, append=False)
296         while True:
297             if self._canceled:
298                 _log.debug('Testing cancelled')
299                 tests_run_file.close()
300                 return
301
302             if len(self._filename_list) is 0:
303                 if self._current_group is not None:
304                     self._test_group_timing_stats[self._current_group] = \
305                         (self._num_tests_in_current_group,
306                          time.time() - self._current_group_start_time)
307
308                 try:
309                     self._current_group, self._filename_list = \
310                         self._filename_list_queue.get_nowait()
311                 except Queue.Empty:
312                     self._stop_servers_with_lock()
313                     self._kill_dump_render_tree()
314                     tests_run_file.close()
315                     return
316
317                 if self._current_group == "tests_to_http_lock":
318                     self._start_servers_with_lock()
319                 elif self._have_http_lock:
320                     self._stop_servers_with_lock()
321
322                 self._num_tests_in_current_group = len(self._filename_list)
323                 self._current_group_start_time = time.time()
324
325             test_input = self._filename_list.pop()
326
327             # We have a url, run tests.
328             batch_count += 1
329             self._num_tests += 1
330             if self._options.run_singly:
331                 result = self._run_test_in_another_thread(test_input)
332             else:
333                 result = self._run_test_in_this_thread(test_input)
334
335             filename = test_input.filename
336             tests_run_file.write(filename + "\n")
337             if result.failures:
338                 # Check and kill DumpRenderTree if we need to.
339                 if len([1 for f in result.failures
340                         if f.should_kill_dump_render_tree()]):
341                     self._kill_dump_render_tree()
342                     # Reset the batch count since the shell just bounced.
343                     batch_count = 0
344                 # Print the error message(s).
345                 error_str = '\n'.join(['  ' + f.message() for
346                                        f in result.failures])
347                 _log.debug("%s %s failed:\n%s" % (self.getName(),
348                            self._port.relative_test_filename(filename),
349                            error_str))
350             else:
351                 _log.debug("%s %s passed" % (self.getName(),
352                            self._port.relative_test_filename(filename)))
353             self._result_queue.put(result.dumps())
354
355             if batch_size > 0 and batch_count >= batch_size:
356                 # Bounce the shell and reset count.
357                 self._kill_dump_render_tree()
358                 batch_count = 0
359
360             if test_runner:
361                 test_runner.update_summary(result_summary)
362
363     def _run_test_in_another_thread(self, test_input):
364         """Run a test in a separate thread, enforcing a hard time limit.
365
366         Since we can only detect the termination of a thread, not any internal
367         state or progress, we can only run per-test timeouts when running test
368         files singly.
369
370         Args:
371           test_input: Object containing the test filename and timeout
372
373         Returns:
374           A TestResult
375         """
376         worker = SingleTestThread(self._port,
377                                   self._options,
378                                   self._worker_number,
379                                   self._name,
380                                   test_input,
381                                   self._test_types)
382
383         worker.start()
384
385         thread_timeout = _milliseconds_to_seconds(
386             _pad_timeout(int(test_input.timeout)))
387         thread._next_timeout = time.time() + thread_timeout
388         worker.join(thread_timeout)
389         if worker.isAlive():
390             # If join() returned with the thread still running, the
391             # DumpRenderTree is completely hung and there's nothing
392             # more we can do with it.  We have to kill all the
393             # DumpRenderTrees to free it up. If we're running more than
394             # one DumpRenderTree thread, we'll end up killing the other
395             # DumpRenderTrees too, introducing spurious crashes. We accept
396             # that tradeoff in order to avoid losing the rest of this
397             # thread's results.
398             _log.error('Test thread hung: killing all DumpRenderTrees')
399             if worker._driver:
400                 worker._driver.stop()
401
402         try:
403             result = worker.get_test_result()
404         except AttributeError, e:
405             # This gets raised if the worker thread has already exited.
406             _log.error('Cannot get results of test: %s' % test_input.filename)
407             # FIXME: Seems we want a unique failure type here.
408             result = test_results.TestResult(test_input.filename)
409
410         return result
411
412     def _run_test_in_this_thread(self, test_input):
413         """Run a single test file using a shared DumpRenderTree process.
414
415         Args:
416           test_input: Object containing the test filename, uri and timeout
417
418         Returns: a TestResult object.
419         """
420         self._ensure_dump_render_tree_is_running()
421         thread_timeout = _milliseconds_to_seconds(
422              _pad_timeout(int(test_input.timeout)))
423         self._next_timeout = time.time() + thread_timeout
424         test_result = single_test_runner.run_single_test(
425             self._port, self._options, test_input, self._driver, self._name,
426             self._test_types)
427         self._test_results.append(test_result)
428         return test_result
429
430     def _ensure_dump_render_tree_is_running(self):
431         """Start the shared DumpRenderTree, if it's not running.
432
433         This is not for use when running tests singly, since those each start
434         a separate DumpRenderTree in their own thread.
435
436         """
437         # poll() is not threadsafe and can throw OSError due to:
438         # http://bugs.python.org/issue1731717
439         if not self._driver or self._driver.poll() is not None:
440             self._driver = self._port.create_driver(self._worker_number)
441             self._driver.start()
442
443     def _start_servers_with_lock(self):
444         """Acquire http lock and start the servers."""
445         self._http_lock_wait_begin = time.time()
446         _log.debug('Acquire http lock ...')
447         self._port.acquire_http_lock()
448         _log.debug('Starting HTTP server ...')
449         self._port.start_http_server()
450         _log.debug('Starting WebSocket server ...')
451         self._port.start_websocket_server()
452         self._http_lock_wait_end = time.time()
453         self._have_http_lock = True
454
455     def _stop_servers_with_lock(self):
456         """Stop the servers and release http lock."""
457         if self._have_http_lock:
458             _log.debug('Stopping HTTP server ...')
459             self._port.stop_http_server()
460             _log.debug('Stopping WebSocket server ...')
461             self._port.stop_websocket_server()
462             _log.debug('Release http lock ...')
463             self._port.release_http_lock()
464             self._have_http_lock = False
465
466     def _kill_dump_render_tree(self):
467         """Kill the DumpRenderTree process if it's running."""
468         if self._driver:
469             self._driver.stop()
470             self._driver = None